############################################### # HOW TO USE THIS FILE # This is an HTML file to show you what R output looks like # Comments relate to interpretation of output # for comments on how torun code see the html R source code
red stuff is (R commands)
blue is output
# Black is comment
# # Links in this page Equal opportunities policies weighted and unweighted - Table 4.3 E O policy by workplace size Table 4.4 Other factors by equal opportunities policy Table 4.5 Logistic regression Table 4.6 Logistic regression Table 4.7 Finite population corrections Table 4.8 Back to top ## ################ table 4.3 ############################### # wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers) svymean(~EO,wers.des) mean SE
EO NA NA # sadly this just gives a missing value since EO has 7 missing values # but the answer is to remove the missing data from the survey design # see the help for is.na and for subset.surveydesign svymean(~EO,subset(wers.des,!is.na(EO)),deff=T) mean SE DEff
EO 0.672785 0.018102 3.2766 # # we can compare this with the unweighted mean had the design been different # wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers) Warning message:
No weights or probabilities supplied, assuming equal probability in: svydesign(id = ~SERNO, strata = ~STRATA, data = wers)
svymean(~EO,subset(wers.des,!is.na(EO)),deff=T)
mean SE DEff
EO 0.8113553 0.0076675 Inf # # Design effects don't come out because no sampling weights # were given and they need to add to the population total Back to top ###################Table 4.4################################## # wers.des <- svydesign(id=~SERNO, strata=~STRATA,weight=GROSSWT,data=wers) svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE) NEMPSIZE statistic.EO SE
10-24 10-24 0.6911197 0.02755723
25-49 25-49 0.7171717 0.02182522
50-99 50-99 0.7743590 0.02034539
100-199 100-199 0.8501292 0.01784374
200-499 200-499 0.8903509 0.01442407
500+ 500+ 0.9189189 0.01548492
There were 50 or more warnings (use warnings() to see the first 50) # # the warning messages refer to lonely PSUs (see theory section on checking data) # This means that the standard errors masy be slightly underestimated # Back to top #######################Table 4.5######################################## # # Now we get means of percent female by EO # svyby(~FEMALE,~EO,subset(wers.des,!is.na(FEMALE)),svymean) EO statistic.FEMALE SE
0 0 40.96654 1.2826394
1 1 51.76528 0.5253835
NA NA 60.65429 10.1791482
There were 23 warnings (use warnings() to see them) # # again the easisest way to get the standard error of # the difference is with a with svyglm (see help) # summary(svyglm(FEMALE~EO,wers.des) )
Call:
svyglm.survey.design(formula = FEMALE ~ EO, design = wers.des)
Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, data = wers)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 40.967 1.283 31.939 < 2e-16 ***
EO 10.799 1.464 7.376 2.31e-13 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 803.8749)
Number of Fisher Scoring iterations: 2
> #
# obviously workplaces with an EO policy have a higher % women
# we can now do similar analyses for % ethnic minority employees
#
svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean)
EO statistic.ETHNIC SE
0 0 3.810382 0.462366
1 1 5.616825 0.266537
NA NA 2.861205 1.424828
There were 19 warnings (use warnings() to see them)
summary(svyglm(ETHNIC~EO,wers.des))
Call:
svyglm.survey.design(formula = ETHNIC ~ EO, design = wers.des)
Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, data = wers)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.8104 0.4624 8.241 3e-16 ***
EO 1.8064 0.5348 3.378 0.000745 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 107.4674)
Number of Fisher Scoring iterations: 2
# # obviously workplaces with an EO policy have a higher % women # we can now do similar analyses for % ethnic minority employees # svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean) EO statistic.ETHNIC SE
0 0 3.810382 0.462366
1 1 5.616825 0.266537
NA NA 2.861205 1.424828
There were 19 warnings (use warnings() to see them) summary(svyglm(ETHNIC~EO,wers.des))
Call:
svyglm.survey.design(formula = ETHNIC ~ EO, design = wers.des)
Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, data = wers)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.8104 0.4624 8.241 3e-16 ***
EO 1.8064 0.5348 3.378 0.000745 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 107.4674)
# # like females a bigger effect in the weighted analysis # but clear evidence of more ethnic minority employees when policy in place # # now find weighted percent in each categories of disabled worker %s # since DISABGRP is a factor R gives proportions # svymean(~DISABGRP,subset(wers.des,!is.na(DISABGRP))) mean SE
DISABGRPnone 0.56271 0.0099
DISABGRPunder 3% 0.35704 0.0094
DISABGRP3%+ 0.08025 0.0059 # # now means of EO by disabled groups # svyby(~EO,~DISABGRP,subset(wers.des,!is.na(DISABGRP) & !is.na(EO)),svymean) # # svyglm compares percent in each disabled group category # between eo and non-eo workplaces # summary(svyglm((DISABGRP=='none')~EO,subset(wers.des,!is.na(wers$DISABGRP) & !is.na(EO)))) Call:
svyglm.survey.design(formula = (DISABGRP == "none") ~ EO, design = subset(wers.des,
!is.na(wers$DISABGRP) & !is.na(EO)))
Survey design:
subset(wers.des, !is.na(wers$DISABGRP) & !is.na(EO))
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.69289 0.02274 30.467 < 2e-16 ***
EO -0.16135 0.02602 -6.201 6.74e-10 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 0.2422422)
summary(svyglm((DISABGRP=='under 3%')~EO,wers.des[!is.na(wers$DISABGRP),])) Call:
svyglm.survey.design(formula = (DISABGRP == "under 3%") ~ EO,
design = wers.des[!is.na(wers$DISABGRP), ])
Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, data = wers)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.22589 0.02052 11.009 < 2e-16 ***
EO 0.16221 0.02390 6.788 1.48e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 0.224932)
Number of Fisher Scoring iterations: 2
summary(svyglm((DISABGRP=='3%+')~EO,wers.des[!is.na(wers$DISABGRP),])) Call:
svyglm.survey.design(formula = (DISABGRP == "3%+") ~ EO, design = wers.des[!is.na(wers$DISABGRP),
])
Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, data = wers)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.0812183 0.0136809 5.937 3.4e-09 ***
EO -0.0008611 0.0152388 -0.057 0.955
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 0.07382358)
Number of Fisher Scoring iterations: 2
############################################################## # now the same things for unweighted data wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers) # # detailed results not shown # svyby(~FEMALE,~EO,subset(wers.des,!is.na(FEMALE)),svymean) summary(svyglm(FEMALE~EO,wers.des) ) svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean) summary(svyglm(ETHNIC~EO,wers.des)) svymean(~DISABGRP,subset(wers.des,!is.na(DISABGRP))) svyby(~EO,~DISABGRP,subset(wers.des,!is.na(DISABGRP) & !is.na(EO)),svymean) summary(svyglm((DISABGRP=='none')~EO,subset(wers.des,!is.na(wers$DISABGRP) & !is.na(EO)))) summary(svyglm((DISABGRP=='under 3%')~EO,wers.des[!is.na(wers$DISABGRP),])) summary(svyglm((DISABGRP=='3%+')~EO,wers.des[!is.na(wers$DISABGRP),])) Back to top ########################################################## # Table 4.6 # now the svyglm command and the glm commands to # fit logistic regressions # first restore design wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers) summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP,wers.des,family='binomial')) # # and unweighted # wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers) summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP,wers.des,family='binomial')) Back to top ################################################################## # table 4.7 # now explore models that include workplace size wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers) summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP+NEMPSIZE,wers.des,family='binomial')) # # and without weighting # wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers) summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP+NEMPSIZE,wers.des,family='binomial')) Back to top ############################################################### # # table 4.8 with finite population corrections # wers.des <- svydesign(id=~SERNO,fpc=~SAMPFRAC, strata=~STRATA,weight=~GROSSWT,data=wers) svymean(~EO,subset(wers.des,!is.na(EO))) svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE) # # and without wers.des <- svydesign(id=~SERNO, strata=~STRATA,weight=~GROSSWT,data=wers) svymean(~EO,subset(wers.des,!is.na(EO))) svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE) # ############################################################# # now some more modelling stuff to make plots # #################################################################