/*****************************************************************************

This do file cleans the administrative data collected for SPOON Guatemala, 
and merges with final treatment status 

Updated on 2/28/2023

*****************************************************************************/

** ADMINISTRATIVE DATA **

import excel "$admin_raw/Base de Antropometria, SPOON.xlsx", sheet("BASE DATOS ANTRO") cellrange(A3:HM986) firstrow case(lower) clear

* rename variables
ren códigoid folio
ren tratamiento1vitalito2 tratamiento 

ren totalretiradas retiradas
ren nombredelamadre nombre_madre
ren fallecidosagregarfechafalle fecha_aborto 
ren rechazoelprograma rechazo

ren sexoniñoa sexo_niño
ren nombredelniño nombre_niño
ren edadactualenmeses edad_niño
ren fechadeentregadecarnet fecha_carnet

ren deentegasdesuplemento n_entregas
lab var n_entregas "Numero de entregas de suplementos (vitalito o chispitas)"

ren nodeantros n_antros

ren fechanacimiento bday
ren edadmeses edadmeses1

* update: waz5 mistakenly named "haz52" ; peso3 named "columna3"
ren haz52 waz5
ren columna3 peso3
 
* drop because empty
drop municipio comunidad nombre_madre nombre_niño

** MERGE WITH TREATMENT ASSIGNMENT FROM SURVEYS **

  * first generate rand_num variable from folio ID for new households and merge
  gen double rand_num = folio if folio<1 // 85 households 
  ren folio folio_temp
  merge m:1 rand_num using "${el_processed}/new_household_assignments.dta", keepusing(rand_num folio) // 83 match, 2 do not
  * .78022 & .97596
  drop if _merge==2
  replace folio_temp=folio if _merge==3 & folio!=. // for matched, replace rand number with household ID
 drop folio _merge
 ren folio_temp folio
  
  * merge by folio to get treatment status
  merge m:1 folio using "${el_processed}/all_households.dta" // 10 don't match -- 2 random numbers mentioned above, 8 not in all_households dataset
  drop if _merge==2 
  drop _merge
  tab tratamiento_hogar
 
* Interview date endline
	rename fecha_ini fecha_ini_edit
	split fecha_ini_edit , parse(/)
	
	destring fecha_ini_edit*, replace

	rename fecha_ini_edit1 fecha_ini_day
	rename fecha_ini_edit2 fecha_ini_month
	rename fecha_ini_edit3 fecha_ini_year

	replace fecha_ini_year = fecha_ini_year + 2000

	gen el_interview_date = mdy(fecha_ini_month, fecha_ini_day, fecha_ini_year)
	format el_interview_date %td 
		
	label var el_interview_date "Endline interview date"
	
	drop fecha_ini*

* Interview date baseline
 rename interview_date bl_interview_date
 label var bl_interview_date "Baseline interview date"

 
** SAVE FILE **
save "$admin_processed/admin_data.dta", replace