Exemplar 1 R code

# HOW TO USE THIS FILE
# This is an HTML version of the R script file ex1.R
# It is intended to show you the code and to allow links.
# NOT to use interactively to run the code
# Pasting the  into R might work, but it might not
# to view the ouptut from these commands in html go to R results page
red stuff (R commands)

#########  ANY TEXT AFTER THE # CHARACTER (here shown black)
#             ARE TREATED AS COMMENTS BY R

Links in this page
Mean income with different design assumptions
Raking to match Scottish totals 
Jacknife estimation for mean
Subgroup lone parents
 Percentiles

#
# data are in a data frame frs
# to get the names of the variables
#
names(frs)
#
# first find the simple mean of the income variable - no weights
#
mean(frs$HHINC)
#
# and then the weighted mean - go to the html help file if you want to check how this works
#
weighted.mean(frs$HHINC,frs$GROSS2)
###########################################################################
# The next line makes the plot shown on the web page and also
# examines the distribution of the weights
########################################################################
par(omi=c(.1,.1,0,.1))# sets outer margins
boxplot(split(frs$GROSS2,frs$ADULTH),col='red',cex.axis=0.7,cex.lab=0.7,las=2,ylab='Grossing-up weight')
#
# now it is time to try using the survey package
# to give R access to the survey functions do
#

 back to top 
library(survey)
############################################################
#
# The first step is to create a SURVEY DESIGN OBJECT that
# will contain both the data and the information about the design
##########################################

frs.des <- svydesign(id=~PSU, weights=~GROSS2,data=frs)

############################################
# see the html help file for the function svydesign for more explanation
# the ~ sign refers to it being one of the columns of the data
#
#  First get the standard error and design effect for the mean income
#   allowing for weighting  and clustering
###################################################################

svymean(~HHINC,design=frs.des,deff=T)

#######################################################################
# now  reset the design so that only weighting is allowed
# for. To do this the PSU must be set to the individual case identifier (SERNUM)
###########################################################################

frs.des <- svydesign(id=~SERNUM, weights=~GROSS2,data=frs)
svymean(~HHINC,design=frs.des,deff=T)
 back to top 


##########################################################
# next we  get ready to post-stratify this survey to match the population totals on
# council tax band (CTBAND) and tenure (TENTYPE)
# read in totals
# by using the R function xtabs
# to create tables of totals by CTBAND and TENTYPE
# starting with percentages and multiplying by sum of weights
###########################################################

tab.ctband <- xtabs(GROSS2~CTBAND,data=frs)
tab.ctband[1:9]<-c( 24.83 24.62 15.45 11.91 11.96 5.95 3.94 0.45 0.89)*sum(frs$GROSS2)/100
unclass(tab.ctband)

tab.tenure <- xtabs(GROSS2~TENURE,data=frs)
tab.tenure[1:4]<-c(  62.63 , 21.59, 5.58 , 10.20)*sum(frs$GROSS2)/100
unclass(tab.ctband)
#
# now we rake the original survey to match these totals
#
frs.des <- svydesign(id=~PSU, weights=~WEIGHTA,data=frs)
frs.des<-rake(frs.des,list(~CTBAND,~TENURE),list(tab.ctband,tab.tenure))
#
#
# 
svymean(~HHINC,design=frs.des,deff=T)
###########################################################
# R uses a method called calibration weighting 
# to get the standard errors for a raked survey
# see the help on raking
# It is not available for any other program
#  featured on this site, but SUDAAN does it
# An alternative is to use fepication methods
###################################################################
###################################################################
# There are TWO kinds of survey design
#
# 1. Using simple methods , as used above
#       usually for surveys WITHOUT POST-STRATIFICATION
# 2. Using replicate weights, usually for surveys with POST-STARTIFICATION
#
# the next stage is to convert the simple design into one with replicate weights
# we use the default options which produce jacknife samples
# see html help for as.svrepdesign
# Go back to the original design and convert it to a
# replicate one and then post-stratify it
#######################################################
frs.des <- svydesign(id=~PSU, weights=~GROSS2,data=frs)
frs.des <- as.svrepdesign(frs.des)
frs.des<-rake(frs.des,list(~CTBAND,~TENURE),list(tab.ctband,tab.tenure))
 back to top 
#####################################################################
# Now poststratifying
# notice that we overwrite the previous R object with the new one
# this is to save space as each one contains the data and the work space
# can get too big for comfort
#
# svymean recognises that the design is post-stratified and
# uses a jackknife method
#################################################################

frs.des <- svydesign(id=~PSU, weights=~GROSS2,data=frs)
frs.des <- as.svrepdesign(frs.des)

frs.des<-rake(frs.des,list(~CTBAND,~TENURE),list(tab.ctband,tab.tenure))
svymean(~HHINC,design=frs.des,deff=T)

 back to top 

###########################################################################################
# NOW GETTING THE PERCENTILES OF THE INCOME DISTRIBUTION
#
my.svrepquantile(~HHINC,design=frs.des,quantile=c(0.05,0.1,.25,.5,.75,.9,.95))
#
# NEED TO USE THIS JUST NOW (source the file myRfunctions.R
# go to Exemplar 1 main page for link to it
# because the survey package one is not working right
# for replicate designs
#
#svyquantile(~HHINC,design=frs.des,quantile=c(0.05,0.1,.25,.5,.75,.9,.95),ci=T)
#
 back to top 

####################################################################################
#
# INCOME FOR LONE PARENTS
#
# the next command will give a table of the data by household type
#
xtabs(~ADULTH+DEPCHLDH,data=frs)
frs$LONEP<-0
frs$LONEP[frs$ADULTH==1 & frs$DEPCHLDH>0]<-1
xtabs(~LONEP+DEPCHLDH,data=frs)
sum(frs$LONEP)
#
# you will see that there are just 451 single parent households
# we want the mean household income and its standard error
# To do this we need to subset the survey
# In R this means creating a new subset design object
# Need to reset the design so that the new variable LONEP
# is there
#
frs.des <- svydesign(id=~PSU, weights=~GROSS2,data=frs)
frs.des.lonep<-subset.survey.design(frs.des,LONEP==1)
#
# and then apply all the survey functions to this new design
#
svymean(~HHINC,frs.des.lonep,deff=T)
#
# as expected single parent incomes are lower than the rest of the population
#
# Now with poststratification
frs.des <- svydesign(id=~PSU, weights=~GROSS2,data=frs)
frs.des <- as.svrepdesign(frs.des)
frs.des<-rake(frs.des,list(~CTBAND,~TENURE),list(tab.ctband,tab.tenure))
frs.des.lonep<-subset.survey.design(frs.des,LONEP==1)
svymean(~HHINC,frs.des.lonep,deff=T)