exemplar 5 R code

##############################################

# HOW TO USE THIS FILE

# A text version of this file (ex5.R) is also available which can just be sourced

# into the R program

# This file is intended for you to use interactively by pasting into R

# Comments are in black - red bits are commands that can be pasted into R

######### ANY TEXT AFTER THE # CHARACTER ARE TREATED AS COMMENTS

# data stored in c:/GR/aprojects/peas/web/exemp3/data/ex3.Rdata

# this file is c:/GR/aprojects/peas/web/exemp3/program_code/ex3_R.html

##############################################

# data are in a data frame ex5

# to get the names of the variables

names(ex5)

# the general health data is in a factor called ex5$GENHELF

# to get a table of this variable do

table(ex5$GENHELF)

# now it is time to try using the survey package

# to give R access to the survey functions do

library(survey)

# The first step is to create a SURVEY DESIGN OBJECT that

# will contain both the data and the information about the design

# This design has WEIGHTS (WEIGHT), but no clustering or startification

# since it is not clustered the PSU is just the individual respondent

# To represent this to the design, we set the id variable to 1

ex5.des<- svydesign(id=~1, weights=~WEIGHT,data=ex5)

# see the html help file for the function svydesign for more explanation

# the ~ sign refers to it being one of the columns of the data

# the R object now contains all the information about the design

# including the data

# To get details of the design

summary(ex5.des)

# now we can use the svymean commands to get first the weighted proportions

# for categorical variables

svymean(~GENHELF,ex5.des,deff=T)

svymean(~Q85A,ex5.des,deff=T)

# this fails for missing values so need to get subset of design without missing values

svymean(~Q85A,subset(ex5.des,!is.na(Q85A)),deff=T)

# and expressing this as percentages

round(print(svymean(~Q85A,subset(ex5.des,!is.na(Q85A))))*100,1)

# now the means of some continuous variables

svymean(~as.numeric(GENHELF),ex5.des,deff=T) # this treats it as a score not a category

svymean(~SINC,ex5.des,deff=T)

svymean(~SACC,ex5.des,deff=T)

# Now we recode the cannabis and amphetamine scores

# and add them into the design

CANSCORE<-as.numeric(ex5$Q85A)
CANSCORE[CANSCORE<=2]<-0
CANSCORE[CANSCORE %in% c(3,4,5)]<-1
CANSCORE[CANSCORE==6]<-0.5

ex5.des<-update(ex5.des,CANSCORE=CANSCORE)

rm(CANSCORE) # this removes vector in main workspace

AMPSCORE<-as.numeric(ex5$Q85A)
AMPSCORE[AMPSCORE<=2]<-0
AMPSCORE[AMPSCORE %in% c(3,4,5)]<-1
AMPSCORE[AMPSCORE==6]<-0.5

ex5.des<-update(ex5.des,AMPSCORE=AMPSCORE)

rm(AMPSCORE) # this removes vector in main workspace

# now run some regressions

summary(svyglm(as.numeric(GENHELF)~CANSCORE,ex5.des))

summary(svyglm(as.numeric(GENHELF)~AMPSCORE,ex5.des))

summary(svyglm(as.numeric(GENHELF)~CANSCORE+AMPSCORE,ex5.des))

# Now make a table to understand results

can.table<-svytable(~GENHELF+Q85A,ex5.des)
can.tprint<-round(sweep(can.table,2,apply(can.table,2,sum),"/")*100) # calculating column percents
can.tprint<-rbind(can.tprint,table(ex5$Q85A)) # add a row below for base numbers
#

# at any stage type tyhe name of an object to see what it is

#
dimnames(can.tprint)[[1]][6]<-'BASE'
can.tprint