############################################### # HOW TO USE THIS FILE # This is an HTML version of the R script file ex2.R # It is intended to show you the code and to allow links. # NOT to use interactively to run the code # For that download the script file ex2.R # to view the ouptut from these commands in html go to R results page
red stuff (R commands)
# # ######### ANY TEXT AFTER THE # CHARACTER (here shown black) # ARE TREATED AS COMMENTS BY R # # ############################################## # # data are in a data frame shs # to get the names of the variables #
Links in this page
Setting up a survey design
Simple means and proportions Table 2.3
Effect of design aspects on precision of estimates table 2.4
Chi square tests tables 2.6 onwards
Internet use by council area
Survey logistic regressions
Code to plot Figure 1
names(shs)
# # # and to see a histogram of the weights and the range of weights # (pretty large) and the ratio of the most extereme weights #
hist(shs$IND_WT,col='red') range(shs$IND_WT) max(shs$IND_WT)/min(shs$IND_WT)
# to give R access to the survey functions do
library(survey)
# The first step is to create a SURVEY DESIGN OBJECT that # will contain both the data and the information about the design # # note that in the latest version of R survey 2.9.1 it is necessary # to use the grossing-up weight to get the correct design effects # Go to # html help #packages #survey #svymean to see where # this is explained # # version 3 will have an option to overcome this # ##########################################
shs.des <- svydesign(id=~PSU, weights=~GROSSWT,strata=~STRATUM,data=shs) print(summary(shs.des))
# # You will have seen a summary of the strata and for each stratum # the number of PSUs and observations per stratum # in the LAs where there was no clustering the no. of clusters is # the same as the number of observations # NOTE THAT SOME STRATA HAVE BEEN COMBINED TO PREVENT LONELY PSUS # ############# RESULTS FOR TABLE 2.3 IN EXEMPLAR 2 HOME PAGE############## # back to top # now use the command svymean to get the mean of # the 0/1 variable for internet use # for the whole sample and for subgroups #
svymean(~INTUSE,shs.des,deff=T)
# # by sex #
svyby(~INTUSE,~SEX,shs.des,svymean,keep.var=T,deff=T)
# # The error messages are because splitting the data by sex has # produced a few more lonely PSUS. See the html help for # survey.lonely.psu by following links from html help menu to #packages #survey # There are some options to adjust estimates for this # this code will run them #
options("survey.adjust.domain.lonely"=T) options("survey.lonely.psu"='adjust') svyby(~INTUSE,~SEX,shs.des,svymean,keep.var=T,deff=T)
# # this increases the design effect by only a tiny amount # so not worth fussing with here # # Now proportions different using for different hours per week # # Need to usesubset to get rid of the cases where this # is missing for non-internet users #
svymean(~RC5,subset(shs.des,INTUSE==1),deff=T)
# #################### THIS CODE FOR TABLE 2.4 xxendlink ################ # back to top # # testing efect of different designs #
shs.des <- svydesign(id=~UNIQID, weights=~GROSSWT,data=shs) svymean(~INTUSE,shs.des,deff=T) shs.des <- svydesign(id=~PSU, weights=~GROSSWT,data=shs) svymean(~INTUSE,shs.des,deff=T) shs.des <- svydesign(id=~UNIQID, weights=~GROSSWT,strata=~STRATUM,data=shs) svymean(~INTUSE,shs.des,deff=T) shs.des <- svydesign(id=~PSU, weights=~GROSSWT,strata=~STRATUM,data=shs) svymean(~INTUSE,shs.des,deff=T)
# # ################### now tables xxendlink and chi squared tests for tables 2.5 2.6 2.7################ # back to top # # now some chisquare tests # the R help (internet help menu #packages #surveys #svychisq) # gives details of other options
svychisq(~INTUSE+SEX,shs.des) chisq.test(table(shs$INTUSE,shs$SEX),correct=F)
# table gives sum of weights by default #
svytable(~INTUSE+SEX,shs.des) svytable(~SEX+GROC,shs.des)
# # table gives sum of weights # more work would be needed to produce a nice table from this #
svychisq(~GROC+SEX,shs.des) chisq.test(table(shs$GROC,shs$SEX),correct=F) svychisq(~RC5+EMP_STA,subset(shs.des,INTUSE==1)) chisq.test(table(shs$RC5,shs$EMP_STA),correct=T)
# ######################## linkxx by council area xxendlink############ # back to top #
svyby(~INTUSE,~COUNCIL,shs.des,svymean,keep.var=T,deff=T)
# # now get mean internet use and CI and DEFF for Just Orkney #
svymean(~INTUSE,subset(shs.des,COUNCIL=='Orkney Islands'),deff=T)
# # # now logistic regression analyses for survey data ren=moving missing categories # ################# linkxx table 2.7 xxendlink ###################### # back to top
summary(svyglm(INTUSE~GROUPINC,subset(shs.des,!is.na(GROUPINC)),family='binomial'))
# add rurality
summary(svyglm(INTUSE~GROUPINC+SHS_6CLA,subset(shs.des,!is.na(GROUPINC)),family='binomial'))
################### linkxx figure 1.1 xxlink ########################################## # back to top #
fit<-svyglm(INTUSE~GROUPINC,design=shs.des)
# now some more complicated code to fit the curves for age and sex # # shown on the first page of this exemplar # using the splines library in R
library(splines)
# replace one missing value in age # and calculate terms to fit spline model # using cubic splines with 3 knots #
shs$AGE[is.na(shs$AGE)]<-40 splinexs<-bs(shs$AGE,knots=c(25,45,65),degree=3) SP1<-splinexs[,1] SP2<-splinexs[,2] SP3<-splinexs[,3] SP4<-splinexs[,4] SP5<-splinexs[,5] SP6<-splinexs[,6]
# add extra vars to survey design
shs.des<-update(shs.des,SP1=SP1,SP2=SP2,SP3=SP3,SP4=SP4,SP5=SP5,SP6=SP6,AGE=AGE)
# fit spline model #
fit<-svyglm(INTUSE~SP1+SP2+SP3+SP5+SP6 +(as.integer(SEX)-1)+ (as.integer(SEX)-1)*SP1+ (as.integer(SEX)-1)*SP2+(as.integer(SEX)-1)*SP3+ (as.integer(SEX)-1)*SP4+(as.integer(SEX)-1)*SP5+ (as.integer(SEX)-1)*SP6 , design=shs.des, family='binomial')
# # now the age and sex plot shown on Exemplar 1
plot(c(15,83),c(0,65),type='n',xlab='age',ylab='% internet users',)
# fit for men and women calculated from predicted vals
ilogit<-function(x){1/(1+exp(-x))} Mfit<- ilogit(predict(fit))[shs$SEX=='male'][match(c(16:80),shs$AGE[shs$SEX=='male'])]*100 Wfit<- ilogit(predict(fit))[shs$SEX=='female'][match(c(16:80),shs$AGE[shs$SEX=='female'])]*100
# # plot rates and add the fitted lines to the plot #
agemeans<-svyby(~INTUSE,~SEX+AGE,shs.des,svymean) usebyageM<-agemeans[1:65,3]*100 usebyageW<-agemeans[1:65+65,3]*100 plot(c(15,83),c(0,65),type='n',xlab='age',ylab='% internet users',) points(c(16:79,82),usebyageM) points(c(16:79,82),usebyageW,col=2,pch=16) lines(c(16:79,82),Mfit,lwd=2) lines(c(16:79,82),Wfit,lwd=2,col=2)
# ########################################################## # now look at additional effect of other variables #
summary(update(fit,~.+SHS_6CLA)) summary(update(fit,~.+EMP_STA))
# # now let us look at the effect of local authority on # internet use when adjusted for age and sex # # unadjusted effect #
fit1<-svyglm(INTUSE~COUNCIL,design=shs.des)
fit2<-svyglm(INTUSE~COUNCIL+SP1+SP2+SP3+(as.integer(SEX)-1)+(as.integer(SEX)-1)*SP1+(as.integer(SEX)-1)*SP2+(as.integer(SEX)-1)*SP3, design=shs.des, family='binomial')
# # adjusted age sex income #
fit3<-svyglm(INTUSE~COUNCIL+SP1+SP2+SP3+(as.integer(SEX)-1)+(as.integer(SEX)-1)*SP1+(as.integer(SEX)-1)*SP2+(as.integer(SEX)-1)*SP3+GROUPINC, design=shs.des, family='binomial')
# # this next bit compares ranking of adjusted and unadjusted # coefficients. It seems that this large variation between local # authorities is not explained by age sex or household income #
comp<-cbind(rank(-c(0,fit1$coefficients[2:32])), rank(-fit2$coefficients[1:32]+fit2$coefficients[1]), rank(-fit3$coefficients[1:32]+fit3$coefficients[1])) dimnames(comp)[[1]][1]<-'Aberdeen City' dimnames(comp)[[2]]<-c('Unadjusted','Age sex','Age sex wealth') print('Ranking of internet use') print(comp)