# AUTOGENERATED! DO NOT EDIT! File to edit: ../00_datasets.ipynb.

# %% auto 0
__all__ = ['load_BarroLee_1994', 'make_causal_effect']

# %% ../00_datasets.ipynb 5
#| include: false
import pandas as pd

def load_BarroLee_1994(
    return_tuple:bool=True # Whether to return the data in a tuple or jointly in a single pandas DataFrame
): # `X` and `y` in a tuple or in a single pandas DataFrame
    "Dataset used in R Barro and J-W Lee's *Sources of Economic Growth* (1994)"
    df = pd.read_csv('gingado/dataset_BarroLee_1994.csv')
    if return_tuple:
        y = df.pop('Outcome')
        X = df
        return X, y
    else:
        return df

# %% ../00_datasets.ipynb 11
#| include: false
import pandas as pd
import numpy as np
from inspect import signature
from sklearn.utils import check_random_state
from typing import Callable

def make_causal_effect(
    n_samples:int=100, # Number of observations
    n_features:int=100, # Number of covariates
    # Function that generates the outcome variable before any treatment effects
    pretreatment_outcome=lambda X, bias, rng: X[:, 1] + np.maximum(X[:, 2], 0) + bias + rng.standard_normal(size=X.shape[0]), 
    # Number between 0 and 1, or function that generates a treatment propensity for each observation
    treatment_propensity=lambda X: 0.4 + 0.2 * (X[:, 0] > 0),
    # Function that controls how treatment propensities actually result in observations being treated
    treatment_assignment=lambda propensity, rng: rng.binomial(1, propensity),
    # Function that determines the magnitude of the treatment for each observation, conditional on assignment
    treatment=lambda assignment: assignment,
    # Function that calculates the effect of a treatment to each treated observation
    treatment_effect=lambda treatment_value, X: np.maximum(X[:, 0], 0) * treatment_value,
    bias:float=0, # The value of the constant
    noise:float=0, # If 0, the pretreatment value of the overview does not include a random term. If > 0, a random draw of the normal distribution with scale `noise` is drawn
    random_state=None, # Seed for the random number generator
    return_propensity:bool=False, # Whether the treatment propensity of each observation is to be retuned
    return_assignment:bool=False, # Whether the treatment assignment status of each observation is to be retuned
    return_treatment_value:bool=False, # Whether the treatment value of each observation is to be retuned
    return_treatment_effect:bool=True, # Whether the treatment effect of each observation is to be retuned
    return_pretreatment_y:bool=False, # Whether the outcome variable of each observation before the inclusion of treatment effects is to be retuned
    return_as_dict:bool=False # Whether the results are returned as a list (False) or as a dictionary (True)
): # `X`, `y` and any other variables set to return in a list or in a dictionary
    "Simulated dataset with causal effects from treatment"  
    generator = check_random_state(random_state)

    X = generator.standard_normal(size=(n_samples, n_features))

    if 'rng' in signature(pretreatment_outcome).parameters.keys():
        pretreatment_y = pretreatment_outcome(X=X, bias=bias, rng=generator)
    else:
        pretreatment_y = pretreatment_outcome(X=X, bias=bias)
    pretreatment_y = np.squeeze(pretreatment_y)
    if noise > 0.0:
        pretreatment_y += generator.normal(scale=noise, size=pretreatment_y.shape)

    # Since propensity may be a scalar (ie, the same propensity for all),
    # it is necessary to first check that it is callable.
    if callable(treatment_propensity):
        propensity = treatment_propensity(X=X)
    else:
        propensity = np.broadcast_to(treatment_propensity, pretreatment_y.shape)

    if 'rng' in signature(treatment_assignment).parameters.keys():
        assignment = treatment_assignment(propensity=propensity, rng=generator)
    else:
        assignment = treatment_assignment(propensity=propensity)

    # In case treatment is heterogenous amongst the treated observations,
    # the treatment function depends on `X`; otherwise only on `assignment`
    if 'X' in signature(treatment).parameters.keys():
        treatment_value = treatment(assignment=assignment, X=X)
    else:
        treatment_value = treatment(assignment=assignment)

    if len(treatment_value) == 1: treatment_value = treatment_value[0]

    # check that the treatment value is 0 for all observations that
    # are not assigned for treatment
    treatment_check = np.column_stack((assignment, treatment_value))
    if all(treatment_check[treatment_check[:, 0] == 0, 1] == 0) is False:
        raise ValueError("Argument `treatment` must be a function that returns 0 for observations with `assignment` == 0.\nOne suggestion is to multiply the desired treatment value with `assignment`.")

    # the code below checks whether the treatment effect responds to each unit's covariates
    # if not, then it just passes the treatment variable to `treatment_effect`
    if 'X' in signature(treatment_effect).parameters.keys(): 
        treat = treatment_effect(treatment_value=treatment_value, X=X)
    else:
        treat = treatment_effect(treatment_value=treatment_value)

    y = pretreatment_y + treat

    return_items = {'X': X, 'y': y}

    if return_propensity: return_items['propensity'] = propensity
    if return_assignment: return_items['treatment_assignment'] = assignment
    if return_treatment_value: return_items['treatment_value'] = treatment_value,
    if return_treatment_effect: return_items['treatment_effect'] = treat
    if return_pretreatment_y: return_items['pretreatment_y'] = pretreatment_y

    if return_as_dict == False:
        return_items = [v for k, v in return_items.items()]

    return return_items
