############################################### # HOW TO USE THIS FILE # This is an HTML file to show you what R output looks like # Comments relate to interpretation of output # for comments on how torun code see the html R source code
red stuff is (R commands)
blue is output
# Black is comment # # Links in this page Equal opportunities policies weighted and unweighted - Table 4.3 E O policy by workplace size Table 4.4 Other factors by equal opportunities policy Table 4.5 Logistic regression Table 4.6 Logistic regression Table 4.7 Finite population corrections Table 4.8 Back to top ## ################ table 4.3 ############################### # wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers) svymean(~EO,wers.des) mean SEEO NA NA # sadly this just gives a missing value since EO has 7 missing values # but the answer is to remove the missing data from the survey design # see the help for is.na and for subset.surveydesign svymean(~EO,subset(wers.des,!is.na(EO)),deff=T) mean SE DEffE O 0.672785 0.018102 3.2766 # # we can compare this with the unweighted mean had the design been different # wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers) Warning message:No weights or probabilities supplied, assuming equal probability in: svydesign(id = ~SERNO, strata = ~STRATA, data = wers) svymean(~EO,subset(wers.des,!is.na(EO)),deff=T) mean SE DEffE O 0.8113553 0.0076675 Inf # # Design effects don't come out because no sampling weights # were given and they need to add to the population total Back to top ###################Table 4.4################################## # wers.des <- svydesign(id=~SERNO, strata=~STRATA,weight=GROSSWT,data=wers) svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE) NEMPSIZE statistic.EO SE 10-24 10-24 0.6911197 0.02755723 25-49 25-49 0.7171717 0.02182522 50-99 50-99 0.7743590 0.02034539 100-199 100-199 0.8501292 0.01784374 200-499 200-499 0.8903509 0.01442407 500+ 500+ 0.9189189 0.01548492 There were 50 or more warnings (use warnings() to see the first 50) # # the warning messages refer to lonely PSUs (see theory section on checking data) # This means that the standard errors masy be slightly underestimated # Back to top #######################Table 4.5######################################## # # Now we get means of percent female by EO # svyby(~FEMALE,~EO,subset(wers.des,!is.na(FEMALE)),svymean) EO statistic.FEMALE SE 0 0 40.96654 1.2826394 1 1 51.76528 0.5253835 NA NA 60.65429 10.1791482 There were 23 warnings (use warnings() to see them) # # again the easisest way to get the standard error of # the difference is with a with svyglm (see help) # summary(svyglm(FEMALE~EO,wers.des) ) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 40.967 1.283 31.939 < 2e-16 *** EO 10.799 1.464 7.376 2.31e-13 *** # obviously workplaces with an EO policy have a higher % women # we can now do similar analyses for % ethnic minority employees # svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean) EO statistic.ETHNIC SE 0 0 3.810382 0.462366 1 1 5.616825 0.266537 NA NA 2.861205 1.424828 There were 19 warnings (use warnings() to see them) summary(svyglm(ETHNIC~EO,wers.des)) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 40.967 1.283 31.939 < 2e-16 *** EO 10.799 1.464 7.376 2.31e-13 *** # obviously workplaces with an EO policy have a higher % women # we can now do similar analyses for % ethnic minority employees # svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean) EO statistic.ETHNIC SE 0 0 3.810382 0.462366 1 1 5.616825 0.266537 NA NA 2.861205 1.424828 There were 19 warnings (use warnings() to see them) summary(svyglm(ETHNIC~EO,wers.des)) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 3.8104 0.4624 8.241 3e-16 *** EO 1.8064 0.5348 3.378 0.000745 *** # # like females a bigger effect in the weighted analysis # but clear evidence of more ethnic minority employees when policy in place # # now find weighted percent in each categories of disabled worker %s # since DISABGRP is a factor R gives proportions # svymean(~DISABGRP,subset(wers.des,!is.na(DISABGRP))) mean SE DISABGRPnone 0.56271 0.0099 DISABGRPunder 3% 0.35704 0.0094 DISABGRP3%+ 0.08025 0.0059 # # now means of EO by disabled groups # svyby(~EO,~DISABGRP,subset(wers.des,!is.na(DISABGRP) & !is.na(EO)),svymean) DISABGRP statistic.EO SE none none 0.7658662 0.01172310 under 3% under 3% 0.8798920 0.01171431 3%+ 3%+ 0.8083832 0.03005366 There were 23 warnings (use warnings() to see them) # # svyglm compares percent in each disabled group category # between eo and non-eo workplaces # summary(svyglm((DISABGRP=='none')~EO,subset(wers.des,!is.na(wers$DISABGRP) & !is.na(EO)))) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.69289 0.02274 30.467 < 2e-16 *** EO -0.16135 0.02602 -6.201 6.74e-10 *** summary(svyglm((DISABGRP=='under 3%')~EO,wers.des[!is.na(wers$DISABGRP),])) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.22589 0.02052 11.009 < 2e-16 *** EO 0.16221 0.02390 6.788 1.48e-11 *** summary(svyglm((DISABGRP=='3%+')~EO,wers.des[!is.na(wers$DISABGRP),])) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.0812183 0.0136809 5.937 3.4e-09 *** EO -0.0008611 0.0152388 -0.057 0.955 ############################################################## # now the same things for unweighted data wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers) # # detailed results not shown # svyby(~FEMALE,~EO,subset(wers.des,!is.na(FEMALE)),svymean) summary(svyglm(FEMALE~EO,wers.des) ) svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean) summary(svyglm(ETHNIC~EO,wers.des)) svymean(~DISABGRP,subset(wers.des,!is.na(DISABGRP))) svyby(~EO,~DISABGRP,subset(wers.des,!is.na(DISABGRP) & !is.na(EO)),svymean) summary(svyglm((DISABGRP=='none')~EO,subset(wers.des,!is.na(wers$DISABGRP) & !is.na(EO)))) summary(svyglm((DISABGRP=='under 3%')~EO,wers.des[!is.na(wers$DISABGRP),])) summary(svyglm((DISABGRP=='3%+')~EO,wers.des[!is.na(wers$DISABGRP),])) Back to top ########################################################## # Table 4.6 # now the svyglm command and the glm commands to # fit logistic regressions # first restore design wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers) summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP,wers.des,family='binomial')) Call: svyglm.survey.design(formula = EO ~ ETHNIC + FEMALE + DISABGRP, design = wers.des, family = "binomial") Survey design: svydesign(id = ~SERNO, strata = ~STRATA, weights = ~GROSSWT, data = wers) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -0.412330 0.191134 -2.157 0.03110 * ETHNIC 0.028791 0.011844 2.431 0.01515 * FEMALE 0.018575 0.003247 5.721 1.22e-08 *** DISABGRPunder 3% 0.680734 0.211643 3.216 0.00132 ** DISABGRP3%+ -0.189232 0.375388 -0.504 0.61425 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 (Dispersion parameter for binomial family taken to be 0.970625) Number of Fisher Scoring iterations: 5 Warning message: non-integer #successes in a binomial glm! in: eval(expr, envir, enclos) # # and unweighted # wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers) summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP,wers.des,family='binomial')) Call: svyglm.survey.design(formula = EO ~ ETHNIC + FEMALE + DISABGRP, design = wers.des, family = "binomial") Survey design: svydesign(id = ~SERNO, strata = ~STRATA, data = wers) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.444213 0.108854 4.081 4.66e-05 *** ETHNIC 0.017137 0.008531 2.009 0.0447 * FEMALE 0.013992 0.001876 7.458 1.30e-13 *** DISABGRPunder 3% 0.847344 0.135626 6.248 5.07e-10 *** DISABGRP3%+ 0.108964 0.208531 0.523 0.6014 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 (Dispersion parameter for binomial family taken to be 0.9219255) Number of Fisher Scoring iterations: 5 Back to top ################################################################## # table 4.7 # now explore models that include workplace size wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers) summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP+NEMPSIZE,wers.des,family='binomial')) Call: svyglm.survey.design(formula = EO ~ ETHNIC + FEMALE + DISABGRP + NEMPSIZE, design = wers.des, family = "binomial") Survey design: svydesign(id = ~SERNO, strata = ~STRATA, weights = ~GROSSWT, data = wers) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -0.616667 0.236489 -2.608 0.00919 ** ETHNIC 0.029325 0.012202 2.403 0.01634 * FEMALE 0.019776 0.003333 5.934 3.47e-09 *** DISABGRPunder 3% 0.004699 0.224611 0.021 0.98331 DISABGRP3%+ -0.222865 0.376990 -0.591 0.55447 NEMPSIZE25-49 0.071526 0.220591 0.324 0.74578 NEMPSIZE50-99 0.553989 0.218764 2.532 0.01141 * NEMPSIZE100-199 1.152938 0.250897 4.595 4.59e-06 *** NEMPSIZE200-499 1.653990 0.291210 5.680 1.55e-08 *** NEMPSIZE500+ 1.991005 0.349684 5.694 1.43e-08 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 (Dispersion parameter for binomial family taken to be 0.976199) Number of Fisher Scoring iterations: 5 # # and without weighting # wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers) summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP+NEMPSIZE,wers.des,family='binomial')) Call: svyglm.survey.design(formula = EO ~ ETHNIC + FEMALE + DISABGRP + NEMPSIZE, design = wers.des, family = "binomial") Survey design: svydesign(id = ~SERNO, strata = ~STRATA, data = wers) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -0.207243 0.175511 -1.181 0.23782 ETHNIC 0.015954 0.009046 1.764 0.07795 . FEMALE 0.017123 0.001953 8.768 < 2e-16 *** DISABGRPunder 3% 0.177042 0.161640 1.095 0.27352 DISABGRP3%+ 0.004065 0.213042 0.019 0.98478 NEMPSIZE25-49 0.122007 0.179442 0.680 0.49663 NEMPSIZE50-99 0.545961 0.189638 2.879 0.00403 ** NEMPSIZE100-199 1.096627 0.217922 5.032 5.28e-07 *** NEMPSIZE200-499 1.575925 0.233285 6.755 1.86e-11 *** NEMPSIZE500+ 1.734561 0.305587 5.676 1.58e-08 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 (Dispersion parameter for binomial family taken to be 0.9131494) Number of Fisher Scoring iterations: 5 Back to top ############################################################### # # table 4.8 with finite population corrections # wers.des <- svydesign(id=~SERNO,fpc=~SAMPFRAC, strata=~STRATA,weight=~GROSSWT,data=wers) svymean(~EO,subset(wers.des,!is.na(EO))) svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE) x NEMPSIZE statistic.EO SE 10-24 10-24 0.6343108 0.03275105 25-49 25-49 0.6480435 0.03483782 50-99 50-99 0.7187470 0.02627534 100-199 100-199 0.8124946 0.02334485 200-499 200-499 0.8679404 0.02012385 500+ 500+ 0.9129004 0.01756700 # # and without wers.des <- svydesign(id=~SERNO, strata=~STRATA,weight=~GROSSWT,data=wers) svymean(~EO,subset(wers.des,!is.na(EO))) mean SE EO 0.67279 0.0181 svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE) NEMPSIZE statistic.EO SE 10-24 10-24 0.6343108 0.03280815 25-49 25-49 0.6480435 0.03489757 50-99 50-99 0.7187470 0.02641132 100-199 100-199 0.8124946 0.02358097 200-499 200-499 0.8679404 0.02045991 500+ 500+ 0.9129004 0.01834597 There were 50 or more warnings (use warnings() to see the first 50) # ############################################################# # now some more modelling stuff to make plots # #################################################################