/***************************************************************************** This do file merges endline outcomes with baseline covariates to create a final analysis dataset for the impact evaluation paper of SPOON Guatemala Updated on 9/13/2022 *****************************************************************************/ ******************** CHILD DATA **************************** ** ONLY TARGET use "${el_processed}/child_target_el.dta", clear // 1290 children drop id_caregiver gen double id_caregiver = momID replace id_caregiver=dadID if id_caregiver==. // if momID is missing, use dad ID gen double targetID = id_persona gen child_age = edad_dias/(365.25/12) // use precise age from anthro data label var child_age "Child's age in months" * create age intervals egen child_age_cat = cut(child_age), at(24,30,32,33,34,35,36,37,38,39,40,42, 44) label rename male child_sex lab var child_sex "Child's sex" drop age * merge ENDLINE caregiver age and education merge m:1 folio id_caregiver using "${el_processed}/caregiver_target_el.dta", keepusing(age educ_prim_c) gen not_matched = (_merge==1) // 20 didn't match, probably because not mom or dad who is primary caregiver drop if _merge==2 drop _merge rename age cg_age lab var cg_age "Caregiver age" rename educ_prim_c cg_educ lab var cg_educ "Caregiver primary education" * save list of IDs that don't match to try to get caregiver info (missing IDs) -- 20 total, all in separate houses preserve keep if not_matched==1 keep folio id_persona momID rename id_persona targetID save "${el_processed}/caregivers_nomatch.dta", replace restore preserve use "${el_processed}/caregiver_target_el.dta", clear drop targetID * folio 61091 has 2 caregivers with information. keep mom instead of grandma drop if folio==61091 & id_persona==2 merge m:1 folio using "${el_processed}/caregivers_nomatch.dta" // 17 match, 3 are missing knowledge data... keep if _merge==2 | _merge==3 // 17 matched drop _merge replace id_persona = momID if id_persona==. rename age age_original rename educ_prim_c educ_prim_c_original merge 1:1 folio id_persona using "${el_processed}/demographics_el.dta", keepusing(age educ_prim_c) // get age and education from demographics survey for those 3 keep if _merge==3 replace id_caregiver = id_persona if id_caregiver==. keep folio id_persona age educ_prim_c targetID id_caregiver rename age age_new rename educ_prim_c educ_prim_c_new rename id_caregiver id_caregiver_new save "${el_processed}/caregivers_nomatch_demographics.dta", replace restore merge 1:1 folio targetID using "${el_processed}/caregivers_nomatch_demographics.dta" // match on target instead of on momID replace id_caregiver = id_caregiver_new if _merge==3 replace cg_age = age_new if cg_age==. replace cg_educ = educ_prim_c_new if cg_educ==. drop _merge * add caregiver/household baseline characteristics rename folio idunico gen double idpersona = id_caregiver rename practice_index practice_index_el merge m:1 idunico idpersona using "${bl_processed}/mom.dta", keepusing(idpersona idunico decisiones grit rosenberg inc_h wealth prop_males0_5 prop_males6_18 prop_males19_49 prop_males50 prop_females0_5 prop_females6_18 prop_females19_49 prop_females50 interview_date) // 79 don't match... mostly new households? tab el_only if _merge==1 // 61 are new households ; other 18 have different caregiver ID identified at baseline drop if _merge==2 // drop baseline only drop _merge * rename all baseline variables so it's clear, and impute missing with control mean foreach var in decisiones grit rosenberg inc_h wealth prop_males0_5 prop_males6_18 prop_males19_49 prop_males50 prop_females0_5 prop_females6_18 prop_females19_49 prop_females50 interview_date { rename `var' `var'_bl sum `var'_bl if tratamiento_hogar==3 local mean = `r(mean)' gen `var'_bl_imputed =(`var'_bl==.) replace `var'_bl=`mean' if `var'_bl==. } * same people missing all household characteristics -- make just one variable for those missing and drop others rename wealth_bl_imputed hh_imputed drop prop_males0_5_bl_imputed prop_males6_18_bl_imputed prop_males19_49_bl_imputed prop_males50_bl_imputed prop_females0_5_bl_imputed prop_females6_18_bl_imputed prop_females50_bl_imputed rename practice_index_el practice_index * calculate age for those alive at baseline gen alive_bl = bday<=interview_date_bl // birthday was before the interview date gen child_age_bl = (interview_date_bl - bday)/(365/12) if alive_bl==1 * Re-label baseline variables lab var prop_males0_5_bl "Proportion of males 0-5 years" lab var prop_males6_18_bl "Proportion of males 6-18 years" lab var prop_males19_49_bl "Proportion of males 19-49 years" lab var prop_males50_bl "Proportion of males 50+ years" lab var prop_females0_5_bl "Proportion of females 0-5 years" lab var prop_females6_18_bl "Proportion of females 6-18 years" lab var prop_females19_49_bl "Proportion of females 19-49 years" lab var prop_females50_bl "Proportion of females 50+ years" lab var decisiones_bl "Decision-making power" lab var decisiones_bl_imputed "Decision-making power: Missing" lab var grit_bl_imputed "Grit: Missing" lab var rosenberg_bl_imputed "Rosenberg: Missing" lab var inc_h_bl_imputed "Household income: Missing" lab var hh_imputed "Household characteristics: Missing" lab var interview_date_bl "Baseline interview date" lab var alive_bl "Alive at baseline" lab var child_age_bl "Age at baseline, m" * drop variables we don't need drop momid mom_name child_name mom_personid agemonths fecha_nacimiento sexo fecha_medicion not_matched age_new id_caregiver_new educ_prim_c_new dad_house mom_house id_s16_ma id_s16_ni distinct communityid // includes all 80; restrict to 76 eligible communities drop if comunidad_elegible==0 distinct communityid drop if tratamiento_hogar==. // drop 1 with unknown treatment assignment save "${processed}/child_target_analysis.dta", replace ******************** CAREGIVER DATA **************************** use "${el_processed}/caregiver_target_el.dta", clear rename age cg_age lab var cg_age "Caregiver age" rename educ_prim_c cg_educ lab var cg_educ "Caregiver primary education" rename male cg_sex lab var cg_sex "Caregiver sex" * rename endline variables so baseline can merge foreach var in decisiones grit rosenberg{ rename `var' `var'_el } * add baseline characteristics rename folio idunico gen double idpersona = id_persona merge 1:1 idunico idpersona using "${bl_processed}/mom.dta", keepusing(idpersona idunico conocimiento decisiones grit rosenberg inc_h wealth prop_males0_5 prop_males6_18 prop_males19_49 prop_males50 prop_females0_5 prop_females6_18 prop_females19_49 prop_females50 preg) // 110 don't match... mostly new households? tab el_only if _merge==1 // 62 are new households drop if _merge==2 // drop baseline only drop _merge * rename all baseline variables so it's clear foreach var in decisiones grit rosenberg conocimiento inc_h wealth prop_males0_5 prop_males6_18 prop_males19_49 prop_males50 prop_females0_5 prop_females6_18 prop_females19_49 prop_females50{ rename `var' `var'_bl sum `var'_bl if tratamiento_hogar==3 local mean = `r(mean)' gen `var'_bl_imputed =(`var'_bl==.) replace `var'_bl=`mean' if `var'_bl==. } rename preg preg_bl * same people missing all household characteristics -- make just one variable for those missing and drop others rename wealth_bl_imputed hh_imputed drop prop_males0_5_bl_imputed prop_males6_18_bl_imputed prop_males19_49_bl_imputed prop_males50_bl_imputed prop_females0_5_bl_imputed prop_females6_18_bl_imputed prop_females50_bl_imputed * change endline back foreach var in decisiones grit rosenberg{ rename `var'_el `var' } * Re-label baseline variables lab var prop_males0_5_bl "Proportion of males 0-5 years" lab var prop_males6_18_bl "Proportion of males 6-18 years" lab var prop_males19_49_bl "Proportion of males 19-49 years" lab var prop_males50_bl "Proportion of males 50+ years" lab var prop_females0_5_bl "Proportion of females 0-5 years" lab var prop_females6_18_bl "Proportion of females 6-18 years" lab var prop_females19_49_bl "Proportion of females 19-49 years" lab var prop_females50_bl "Proportion of females 50+ years" lab var decisiones_bl "Decision-making power" lab var conocimiento_bl "Baseline knowledge index" lab var preg_bl "Pregnant at baseline" lab var decisiones_bl_imputed "Decision-making power: Missing" lab var grit_bl_imputed "Grit: Missing" lab var rosenberg_bl_imputed "Rosenberg: Missing" lab var conocimiento_bl_imputed "Baseline knowledge: Missing" lab var inc_h_bl_imputed "Household income: Missing" lab var hh_imputed "Household characteristics: Missing" drop momID know_merge mom_test max_child momID_child max_momID min_momID mom_sub know_test dup_child know_complete dup_know targetID target_max target_min distinct communityid drop if comunidad_elegible==0 distinct communityid drop if tratamiento_hogar==. // 1 without treatment assignment save "${processed}/caregiver_target_analysis.dta", replace