/*--------------------------------------------------- HOW TO USE THIS FILE This is an HTML version of the Stata do file ex3.do It is intended to show you the code and to allow links not to use interactively use ex3.do in the data editor for this EVERYTHING INSIDE (STAR SLASH) AND (SLASH STAR) IS TAKEN AS A COMMENT These are shown in black here ---------------------------------------------------------*/ To see output from the commands go to the Stata results.Links in this page
Setting up a survey design and getting SEs Looking at the effect of alternative designs on precision Looking at regional rates Logistic regression Using replication methods Analyse SHlthS data in Stata Exemplar 3 ALL THE COMMANDS IN THIS SECTION CAN ALSO BE RUN FROM DIALOGUE BOXES back to top /*---------------------------------------------------- first set up the survey design and view its properties with svydes note that we need strata within regions (regstrat) ---------------------------------------------------------*/ svyset [pwei=weighta],psu(psu) strata(regstrat) set more off svydes /*------------------------------------------------------ you should find that you have strata each with 2 or (in a few cases) 3 PSUs Now get proportions in cigarette smoking categories and their standard errors ---------------------------------------------------------*/ svyprop cigst1 /*----------------------------------------------------------- svyprop does not give design effects or confidence intervals to get these for smokers you need to recode to a 0/1 variable and get its mean value -----------------------------------------------------------*/ recode cigst1 (-9 -8 -6 =.) (-1 1 2 3=0) (4=1),gen(smoker) svyprop smoker svymean smoker,deff deft ci /*---------------- check OK---------------------*/ table cigst1 smoker back to top /*----------------------------------------------- To investigate the effect of other survey designs one can redo the svyset command BUT before rerunning we need to clear previous settings --------------------------------------------------------------------*/ /*-----------first just weights---------------*/ svyset, clear(all) svyset [pwei=weighta] svymean smoker,deff deft /*-----------then add strata---------------*/ svyset, clear(all) svyset [pwei=weighta],strata(regstrat) svymean smoker,deff deft *--------------now psus no strata----------------*/ /*-----------first just weights---------------*/ svyset, clear(all) svyset [pwei=weighta],psu(psu) svymean smoker,deff deft /*-----------now the full design as before---------------*/ svyset, clear(all) svyset [pwei=weighta],strata(regstrat) psu(psu) svymean smoker,deff deft /*---------------------------------------------------------- now looking at rates by sex -------------------------------------------------------------*/ svymean smoker, by(sex) /*--------- to get a test of differrences by sex use lincom for linear combinations-----------------*/ lincom [smoker]male-[smoker]female /*---------------------------------------------------------- and by adults in the household -------------------------------------------------------------*/ svymean smoker, by(nofad) /*-----and compare nofad=1 with nofad=2-----------------*/ lincom [smoker]1-[smoker]2 back to top /*------------------------------------------------- smoking rates by region or health board are also easily calculated and lincom can give the comparisons between any pair or other combination -------------------------------------------------------*/ svymean smoker, by(region) svymean smoker, by(hboard) lincom [smoker]Fife-[smoker]Lothian lincom [smoker]Lanarksh-[smoker]Ayreshir /*--------- sorry about spelling mistake - in original file-----*/ back to top /*----------------------------------------------------- now logistic regressions to predict smoking To use categorical variables you must first generate a set of dummy variables here for number of adults --------------------------------------------------*/ tabulate nofad,generate(nofad) /*---------------------------------------------- check the data set to see the new variables as there are so few households of more than 5 it seems sensible to group them together and then to carry out the regression ---------------------------------------------------*/ replace nofad5=1 if nofad>5 /*---regressions include the comparisons with nofad1 only--------*/ svylogit smoker nofad2 nofad3 nofad4 /*------------ we can compare with simple logistic regression--------- --------------use coef to get comaparable results to the svy command----*/ logistic smoker nofad2 nofad3 nofad4,coef /*-------------- and we can get more complicated models looking at joint effect of age group sex and number of adults Test commands can be used to check if variables are significant in the larger models --------------------------------------------------------------*/ tabulate hboard,generate(hboard) tabulate ageg,generate(ageg) tabulate sex,generate(sex) svylogit smoker nofad2 nofad3 nofad4 svylogit smoker nofad2 nofad3 nofad4 sex2 ageg2-ageg12 hboard2-hboard15 test sex2 test ageg2 ageg3 ageg4 ageg5 ageg6 ageg7 ageg8 ageg9 ageg10 ageg11 ageg12 /*----------------------------------------------------------------- get dummies for the age sex interaction --------------------------------------------------------------------*/ generate ageg2s=ageg2*(sex==1) generate ageg3s=ageg3*(sex==1) generate ageg4s=ageg4*(sex==1) generate ageg5s=ageg5*(sex==1) generate ageg6s=ageg6*(sex==1) generate ageg7s=ageg7*(sex==1) generate ageg8s=ageg8*(sex==1) generate ageg9s=ageg9*(sex==1) generate ageg10s=ageg10*(sex==1) generate ageg11s=ageg11*(sex==1) generate ageg12s=ageg12*(sex==1) svylogit smoker nofad2 nofad3 nofad4 sex2 ageg2-ageg12 hboard2-hboard15 ageg2s-ageg12s test ageg2s ageg3s ageg4s ageg5s ageg6s ageg7s ageg8s ageg9s ageg10s ageg11s ageg12s back to top /*--------------------------------------------------- now replication methods YOU WILL NEED ONE OF THE LARGER VERSIONS OF Stata (SE or INTERCOOLED) to run this exemplar You need to increase the memory to run this analysis ------------------------------------------------------------------------------------------------------*/ clear set maxvar 3000 set virtual on set memory 800M /*--------- now reopen your saved data file----------------------- first changing directory to whee your file is located read in data and redine design, just to be sure all OK ---------------------------------------------------------------------------------------*/ cd "C:\Documents and Settings\gillian raab\My Documents\aprojects\peas\ex3datafiles\datax" use ex3,clear /*----------------------------------------------------------------------------------------------- the next bit of code adds the sum of the weights by region and age sex groups so that they are added to the data file ready to use for post startiofication This sample has already been post-stratified , but to get the right SEs we need to redo the poststaritification and carry it out on each of the replicates --------------------------------------------------------------------------------------------------*/ collapse (sum) rtot=weight, by (region) /* make data file with totals*/ sort region save rtots,replace /* sort and save it*/ use ex3,clear sort region /* get original file and sort by region*/ merge region using rtots /*----------merge with totals and save as new file--------------*/ save ex3new,replace drop _merge /*---------- or next bit will fail------------------*/ tab ageg gen agesex=ageg*100+sex/* make an age sex varaible---*/ save ex3new,replace tab agesex collapse (sum) asext=weight, by (agesex) /* make data file with totals*/ sort agesex save astots,replace /* sort and save it*/ use ex3new,clear sort agesex /* get original file and sort by ctband*/ merge agesex using astots /*----------merge with totals and save--------------*/ save ex3new,replace /*----------------------------------------------- you now have a file with agesex and region totals -----------------------------------------------------*/ /*------------------- ------------------------------------------------------------------ and make a set of jacknife weights for this survey This next command will create 312 new variables (one for each replicate) where one of the 312 PSUs is dropped from each replication. Look at the data to check this ----------------------------------------------------------------------------------------------------------*/ survwgt create jkn, psu(psu) weight(weight) strata(regstrat) /*---------------- now use the survey replication commands--------------------------------*/ survwgt rake [all] , by(agesex region) totvars( asext rtot) replace save ex3reps,replace /*----------------------------------------------------------- recalculate smoker variable, as above -----------------------------------------------------------*/ recode cigst1 (-9 -8 -6 =.) (-1 1 2 3=0) (4=1),gen(smoker) /*----------------------------------------------- now use the command to get the mean and design effect for smokers using a jacknife method ------------------------------------------------------*/ svrmean smoker