library(tidyverse) library(truncnorm) library(fabricatr) library(scales) library(ids) # Generate random IDs set.seed(12345) happiness_data <- fabricate( years = add_level( # Create 10 years # Technically this is 11 years! But that's because I want a preceding year # when lagging outcomes and confounders, so we make 11, lag stuff and then # lop off the first year N = 11, year = 0:10, # Shocks across all countries in a year y_population_shock = rtruncnorm(N, 0.1, 0.1, a = 0), y_gdp_shock = rnorm(N, 0, 0.03), y_democracy_shock = rnorm(N, 0, 0.02), y_corruption_shock = rnorm(N, 0, 0.01) ), countries = add_level( # Create 152 countries N = 152, # Country-specific trends # Confounders c_population_base = rnorm(N, 16.5, 1), c_population_growth = rnorm(N, 0.04, 0.007), c_gdp_base = runif(N, 16, 20.5), c_gdp_growth = runif(N, 0, 0.1), c_gdp_growth_error = runif(N, 0, 0.05), c_democracy_base = runif(N, 0.2, 0.6), c_democracy_growth = rnorm(N, 0, 0.02), c_democracy_error = runif(N, 0, 0.05), c_corruption_base = runif(N, 0.1, 0.6), c_corruption_growth = rnorm(N, 0, 0.04), c_corruption_error = runif(N, 0, 0.01), # Treatment c_vacation_base = runif(N, 10, 16), c_vacation_growth = rnorm(N, 0.75, 0.75), c_policy_base = runif(N, 25, 75), c_policy_growth = rnorm(N, 1.5, 1), # Outcome c_happiness_base = runif(N, 20, 45), c_happiness_growth = rnorm(N, 0.4, 0.05), c_happiness_error = runif(N, 0, 0.7), nest = FALSE ), country_years = cross_levels( # Cross countries and years by = join(years, countries), # Build all confounders log_population = c_population_base + (year * c_population_growth), log_gdp = c_gdp_base + (0.4 * log_population) + y_gdp_shock + (year * c_gdp_growth) + rnorm(N, sd = c_gdp_growth_error), gdp = exp(log_gdp), population = exp(log_population), gdp_cap = gdp / population, log_gdp_cap = log(gdp_cap), democracy = c_democracy_base + y_democracy_shock + (year * c_democracy_growth) + rnorm(N, sd = c_democracy_error), democracy = rescale(democracy, to = c(0, 1)) * 100, corruption = c_corruption_base + y_corruption_shock + (year * c_corruption_growth) + rnorm(N, sd = c_corruption_error), corruption = rescale(corruption, to = c(0, 1)) * 100, # Treatment + outcome, vacation days vacation_days = c_vacation_base + (year * c_vacation_growth) + (0.00012 * gdp_cap) + (0.09 * democracy) + (-0.12 * corruption), vacation_days = round(vacation_days, 0), # THE CAUSAL EFFECT is 1.7 here. An additional vacation day increases happiness by 1.7 happiness_vacation = c_happiness_base + (year * c_happiness_growth) + rnorm(N, sd = c_happiness_error) + (0.00015 * gdp_cap) + (0.11 * democracy) + (-0.15 * corruption) + (1.7 * vacation_days), # Treatment + outcome, 6-hour workday # Generate a latent score for adopting the policy, then rescale it to 0.05 # to 0.6 and then use that probability in rbinom() to assign a country to # adopt the policy policy_score = c_policy_base + (year * c_policy_growth) + (0.00012 * gdp_cap) + (0.15 * democracy) + (-0.29 * corruption) + rnorm(N, 0, 10), policy_prob = rescale(policy_score, to = c(0.05, 0.60)), policy_assigned = rbinom(N, 1, policy_prob) ) ) %>% # Make it so no countries set the policy in the first or second year (since # the second year is technically the first year; year 0 gets removed and is # only here for lagging purposes) mutate(policy_assigned = ifelse(year %in% c(0, 1), 0, policy_assigned)) %>% # Once a country sets its policy, it's permanent, so find the cumulative sum # of the policy_assigned column in each country. If it's ever bigger than 0, # they have the policy; otherwise they don't group_by(countries) %>% mutate(policy = ifelse(cumsum(policy_assigned) > 0, 1, 0)) %>% ungroup() %>% # Create the outcome variable for the binary policy # THE CAUSAL EFFECT is 7 here. The policy increases happiness by 7 points mutate(happiness_policy = (rnorm(n(), 7, 3) * policy) + c_happiness_base + (year * c_happiness_growth) + rnorm(n(), sd = c_happiness_error) + (0.00015 * gdp_cap) + (0.11 * democracy) + (-0.15 * corruption) + (0.7 * vacation_days) + rnorm(n(), 8, 2)) %>% # Fix and remove a bunch of intermediate columns # Make the year more sensical mutate(year = 2009 + year) %>% # Round a bunch of stuff mutate(across(c(happiness_vacation, happiness_policy, democracy, corruption), ~round(., 1))) %>% # Generate sets of pronouncable 5-letter sequences as country names group_by(countries) %>% mutate(country = str_to_sentence(proquint(n_words = 1))) %>% ungroup() %>% # Remove and rearrange columns select(-years, -countries, -country_years, -starts_with("y_"), -starts_with("c_"), -policy_score, -policy_prob, -policy_assigned) %>% select(country, year, vacation_days, policy, happiness_vacation, happiness_policy, everything()) %>% # Lag things group_by(country) %>% mutate(lag_policy = lag(policy, default = 0), lag_happiness_policy = lag(happiness_policy)) %>% mutate(lag_vacation_days = lag(vacation_days), lag_happiness_vacation = lag(happiness_vacation)) %>% ungroup() %>% # Remove first year now that we've lagged stuff filter(year != 2009) write_csv(happiness_data, "happiness_data.csv")