##code for Chapter 8 "Propensity Score Analysis With Structural Equation Models" of book:
#Leite, W. L. (2016). Practical propensity score analysis using R.
#Thousand Oaks, CA: Sage.
#
#this is the code that was used to generate the example results in the book
#As the R software and the R packages used in this example are updated frequently
#some incompatibilities between the current code and new R versions or package versions
#may appear
#Any updates to the code will be posted at:
# http://www.practicalpropensityscore.com
#this example estimates the effect of new teacher participation
#in a network of teachers on their perception of workload manageability
#using data from the 1999--2000 School and Staffing Survey (SASS)
#and 2000--2001 Teacher Follow-up Survey (TFS)
#load the dataset
load(file="Chapter8_SASS_TFS_data_for_example.Rdata")
#rename dataset
data <- newTeachData
rm(newTeachData)
#re-order data so that untreated are first than treated
data <- data[order(data$treat,data$CNTLNUM),]
#load the required library for multiple group SEM analyses
require(lavaan)
#Define the model
model <- 'PSCHMAN =~ T0299 + T0300 + T0306 + T0307 + T0310 + T0312 #Perception of school management
PFAMBACK =~ T0335 + T0336 + T0337 + T0338 #perception of family background
PSTUDEL =~ T0325 + T0326 + T0327 + T0331 + T0332 #perception of student deliquency
PSTUPAR =~ T0321 + T0322 + T0324 #perception of student participation
PTEACSUP =~ T0308 + T0309 + T0311' #perception of teacher support
#indicator names
indicator.names <- c("T0299" , "T0300" , "T0306" , "T0307" , "T0310" , "T0312" ,
"T0335" , "T0336" , "T0337" , "T0338" ,
"T0325" , "T0326" , "T0327" , "T0331" , "T0332" ,
"T0321" , "T0322" , "T0324" ,
"T0308" , "T0309" , "T0311")
#multiple-group CFA assuming strong factorial invariance
mgCFA<- cfa(model, #define the model for the confirmatory factor analysis
ordered = indicator.names, # Define the ordered nature of the data appropriately
data=data,#define the data source
group="treat",#define the group indicator
estimator="WLSMV",#define the estimator to be used
group.equal=c("loadings","thresholds"))#enforce the strong factorial invariance specifications
#obtain the summary statistics
summary(mgCFA, fit.measures=TRUE, standardized=TRUE)
#store model fit
mgCFA.fit <- fitMeasures(mgCFA)
write.csv(data.frame(mgCFA.fit), file="mgCFA_fit_measures.csv")
#Store paramter estimates
mgCFA.estimates <- parameterEstimates(mgCFA, standardized=T)
write.csv(data.frame(mgCFA.estimates),file="mgCFA_estimates_latent_confouders.csv")
#The Scaling Correction Factor that is obtained from summary statement for strong factorial invariance model
scf.mgCFA <- as.numeric(fitMeasures(mgCFA,"chisq.scaling.factor"))
#The degrees of freedom of the chi square test statistic for the model fit that is obtained from summary statement for strong factorial imvariance model
df.mgCFA <- as.numeric(fitMeasures(mgCFA,"df"))
#The chi square test statistic for the model fit that is obtained from summary statement for strong factorial imvariance model
chi.square.mgCFA <- as.numeric(fitMeasures(mgCFA,"chisq.scaled"))
#==============================================================
#Run a multiple-group CFA allowing configural invariance (factor loadings and thresholds differ between groups)
mgCFA.configural <- cfa(model, #define the model for the confirmatory factor analysis
ordered = indicator.names, #Define the ordered nature of the data appropriately
data=data,#define the data source
group="treat",#define the group indicator
estimator="WLSMV")#,#define the estimator to be used
#Obtain the summary statistics
summary(mgCFA.configural, fit.measures=TRUE, standardized=TRUE)
#lets save the required statistics to be used in the Satorra-Bentler Model Comparison Test
#The Scaling Correction Factor that is obtained from summary statement for configural imvariance model
scf.mgCFA.configural <- as.numeric(fitMeasures(mgCFA.configural,"chisq.scaling.factor"))
#The degrees of freedom of the chi square test statistic for the model fit that is obtained from summary statement for configural imvariance model
df.mgCFA.configural <- as.numeric(fitMeasures(mgCFA.configural,"df"))
#The chi square test statistic for the model fit that is obtained from summary statement for configural imvariance model
chi.square.mgCFA.configural <- as.numeric(fitMeasures(mgCFA.configural,"chisq.scaled"))
#==============================================================
#Lets compare model fit of configural invariance and strong factorial invariance using satorra bentler Model Comparison Procedure
#at first step, we have to calculate the overall scaling correction factor.
scaling.correction.factor <- (scf.mgCFA*df.mgCFA-scf.mgCFA.configural*df.mgCFA.configural)/
(df.mgCFA-df.mgCFA.configural)
Satorra.Bentler.chi.square.statistic <- (scf.mgCFA*chi.square.mgCFA-scf.mgCFA.configural*chi.square.mgCFA.configural)/
scaling.correction.factor
#calculate the probability of observing a chi square statistic that is more extreme than the observed chi square statistic under the null hypothesis indicating that the models are equivalent.
pchisq(Satorra.Bentler.chi.square.statistic,(df.mgCFA-df.mgCFA.configural),lower.tail = FALSE)
#since the p value of .46 is greater than nominal alpha level that is .05, we failed to reject the null hypothesis.
#=============================================================
#Estimation of propensity scores
#obtain the factor scores from the strong factorial invariance model
factor.scores <- predict(mgCFA)
data <- cbind(data,rbind(factor.scores[[1]],factor.scores[[2]]))
######
#PS estimation
######
#Latent factors:
#"PSCHMAN" = Perception of school management
#"PFAMBACK" = Perception of family background
#"PSTUDEL" = perception of student delinquency
#"PSTUPAR" = perception of student participation
#"PTEACSUP" = perception of teacher support
#define covariates for propensity score model
covariateNames <- c(
"LEP_T",#Percent of students with limited English proficiency taught in most recent full week, for teachers with self-contained or departmental classes.
"PLAN", #Percentage of scheduled school time teacher had for planning during most recent full week of teaching.
"T0059",#What was your MAIN activity LAST school year?
"T0106",#How did you earn your regular or standard state certificate or advanced professional certificate in your MAIN teaching assignment field?
"T0120",#What was your main teaching assignment field LAST school year?
"T0122",#In what year did you begin your first teaching position, either full-time or part-time, at the elementary or secondary level? (recoded to percentage)
"T0124",#Did your preparation for teaching include- (1) Coursework in how to select and adapt instructional materials?
"T0125",#Did your preparation for teaching include- (2) Coursework in learning theory or psychology appropriate to the age of students you teach?
"T0126",#Did your preparation for teaching include- (3) Your observation of other classroom teaching?
"T0127",#Did your preparation for teaching include- (4) Feedback on your teaching?
"T0147",#In your first year of teaching, did you work closely with a master or mentor teacher?
##In the past 12 months, have you participated in the following activities RELATED TO TEACHING?
"T0150",# a. University course(s) taken for recertification or advanced certification
"T0153",#d. Individual or collaborative research on a topic of interest to you professionally
"T0154",#Regularly-scheduled collaboration with other teachers on issues of instruction
"T0158",#Workshops, conferences or training in which you were the presenter
"T0248",#In the last 3 years, have you had 8 hours or more of training or professional development on how to teach special education students?
"T0250",#In the last 3 years, have you had 8 hours or more of training or professional development on how to teach limited-English proficient students?
"T0208",#At THIS school, what is the total number of students enrolled in the class you taught during your most recent FULL WEEK of teaching?
#factor scores
"PSCHMAN", #perception of school management
"PFAMBACK", #perception of family background
"PSTUDEL" , #perception of student deliquency
"PSTUPAR", #perception of student participation
"PTEACSUP", #perception of teacher support
#indicators of data imputed by NCES
"PUPILS", #total number of pupins in self-contained or departamentalized classes
"teachImputed" #imputed teacher
)
#create formula based on covariate list
#this includes both individual level and school level covariates
psFormula <- paste(covariateNames, collapse="+")
psFormula <- formula(paste("treat~",psFormula, sep=""))
print(psFormula)
#fit logistic regression to estimate propensity scores
psModel <- glm(psFormula, data=data, family=binomial)
#extract propensity scores from first dataset
data$pScores <- fitted(psModel)
with(data,by(pScores,treat,summary))
#evaluate common support with histograms
tiff("chapter8_Histogram_of_common_support.tif", res=600, compression = "lzw", height=6, width=15, units="in")
with(data,
hist(pScores[treat==0], density = 10, angle = 45, main="Propensity Scores",
breaks=seq(0,1,by=0.05),
xlim=c(0,1), ylim=c(0,200), xlab="Shaded = Untreated | Gray = Treated") )
with(data,
hist(pScores[treat==1], col=gray(0.4,0.25), breaks=seq(0,1,by=0.05),
xlim=c(0,1), ylim=c(0,200),add=T) )
dev.off()
#evaluate common support with box and whiskers plot
require(lattice)
tiff("Chapter8_boxplot.tif", res=600, compression = "lzw", height=6, width=15, units="in")
bwplot( pScores~treat,
data = data,
ylab = "Propensity Scores", xlab = "Treatment",auto.key = TRUE)
dev.off()
#evaluate common support with density plots
data$Treat <- factor(data$treat, levels=0:1,labels=c("Teachers not in networks","Teachers in networks"))
require(lattice)
lattice.options(default.theme = standard.theme(color = FALSE))
tiff("Chapter8_figure8-2.tif", res=600, compression = "lzw", height=6, width=15, units="in")
densityplot( ~pScores, groups=Treat, plot.points=F,
data = data, ylab = "Propensity Scores", xlab = "Treatment",auto.key = TRUE)
dev.off()
#convert propensity scores to logits
data$logitPScores <- with(data,log(pScores/(1-pScores)))
#=================================================
# #estimate propensity score with generalized boosted modeling
# data$treat = as.numeric(as.character(data$treat))
# require(twang)
# myGBM <- ps(psFormula, data = data, n.trees=50000, interaction.depth=4,shrinkage=0.0005,
# stop.method=c("es.max"), estimand = "ATT",
# verbose=TRUE)
# #obtain information about balance achieved and iteration numbers
# summary(myGBM)
#
#
# #obtain graph of convergence
# tiff("Chapter8_convergence_GBM.tif", res=600, compression = "lzw", height=6, width=15, units="in")
# plot(myGBM,type="b", color=F, plots=1)
# dev.off()
#
# #obtain graph of common support
# tiff("Chapter8_common_support_GBM.tif", res=600, compression = "lzw", height=6, width=15, units="in")
# plot(myGBM, color=F, plots=2)
# dev.off()
#
# #extract propensity scores
# data = cbind(data,myGBM$ps)
# names(data)[82] = "pScoresGBM"
#
# #convert propensity scores to logits
# data$logitGBM = with(data,log(pScoresGBM/(1-pScoresGBM)))
#
# #====================================================
# #estimate propensity scores with random forests
# set.seed(2016)
# require(party)
# mycontrols <- cforest_unbiased(ntree=1000, mtry=5)
# mycforest <- cforest(psFormula, data=data,
# controls=mycontrols)
#
#
# #obtain a list of predicted probabilities
# predictedProbabilities <- predict(mycforest, type="prob")
#
# #organize the list into a matrix with two columns for the
# #probability of being in treated and control groups.
# #keep only the second column, which are the propensity scores.
# pScoresRf <- matrix(unlist(predictedProbabilities),,2,byrow=T)[,2]
# data$pScoresRf <- pScoresRf
# with(data,by(pScoresRf,treat,summary))
#
# #convert propensity scores to logits
# data$logitRf = with(data,log(pScoresRf/(1-pScoresRf)))
#==================================================
#Perform genetic one-to-one matching without replacement
library(MatchIt, Matching) #library for propensity score matching
data$treat <- data$treat==1
#Perform genetic matching with the logit of propensity score obtained with logitic regression
geneticMatching <- matchit(psFormula,distance=data$logitPScores,
data = data, method = "genetic", pop.size=1000,
replace=F,ties=F,ratio=1)
#the code above gives a warning that does not apply
#diagnose covariate balance
(balance.geneticMatching <- summary(geneticMatching, standardize=T))
write.csv(balance.geneticMatching$sum.matched, file="Chapter8_balance_table.csv")
#obtain the summary of balance before matching
summary(abs(balance.geneticMatching$sum.all$"Std. Mean Diff."))
#obtain the summary of balance after matching
quantile(abs(balance.geneticMatching$sum.matched$"Std. Mean Diff."),na.rm=T)
table(abs(balance.geneticMatching$sum.matched$"Std. Mean Diff.") > 0.1)
# #obtain matched data
matchedData <- match.data(geneticMatching)
#center the variable on the mean of the treated to ensure interpretation
#of the effect as the ATT
matchedData$LEP_T <- with(matchedData, LEP_T - mean(LEP_T[treat==1]))
matchedData$logitPScores <- with(matchedData, logitPScores - mean(logitPScores[treat==1]))
#write matched dataset as a comma-delimited file
write.csv(matchedData, file <- "Chapter8_matched_data.csv", row.names=F)
#============================================
#obtain inverse probability of treatment weights and evaluate balance to estimate the ATT
data$treat <- as.numeric(data$treat)
data$psWeight <- with(data,ifelse(treat==1,1,pScores/(1-pScores)))
require(twang)
balance.psWeight <- bal.stat(data=data,#dataset is defined
vars=covariateNames, #list of the covariates that are used in the treatment assignment model
treat.var ="treat" , #treatment assignment indicator
w.all = data$psWeight, #Weight is defined
get.ks=F, #Avoid estimating KS statistic
sampw = 1, #indicate that the weight is actually a sampling weight
estimand="ATT", #Define proper estimand
multinom=F) #Indicate that we make pairwise comparisons,
#Table with results of balance evaluation.
balance.psWeight <- balance.psWeight$results
#summarize the covariate balance quality
(summary.balance.psWeight <- summary(abs(balance.psWeight$std.eff.sz)))
quantile(abs(balance.psWeight$std.eff.sz),na.rm=T)
#write.csv(data, file="Chapter8_data.csv")
#======================================
#Estimate ATT with multiple-group SEM
#define the multiple-group confirmatory factor analysis for the outcome
modelOutcome <- 'WORKMAN =~ F0108 + F0105 + F0113 + F0116' #Workload management
require(lavaan)
#Fit the multiple-group CFA modelOutcome enforcing strong factorial invariance
mgCFAoutcome <- cfa(modelOutcome, #define the modelOutcome for the confirmatory factor analysis
ordered=c("F0108" , "F0105" , "F0113" , "F0116"),#Define the ordered nature of the data appropriately
data=matchedData,#define the data source
group="treat",#define the group indicator
estimator="WLSMV",#define the estimator to be used
group.equal=c("loadings","thresholds"))#enforce the strong factorial invariance specifications
#print the results
summary(mgCFAoutcome, fit.measures=TRUE, standardized=TRUE)
#obtain fit measures
mgCFAoutcome.fit <- fitMeasures(mgCFAoutcome)
#obtain estimates
mgCFAestimates <- parameterEstimates(mgCFAoutcome,standardized=T)
write.csv(mgCFAestimates, file="table_estimates_mgCFA.csv")
#=============================================
#The Scaling Correction Factor that is obtained from summary statement for strong factorial invariance modelOutcome
scf.mgCFAoutcome <- as.numeric(fitMeasures(mgCFAoutcome,"chisq.scaling.factor"))
#The degrees of freedom of the chi square test statistic for the modelOutcome fit that is obtained from summary statement for strong factorial imvariance modelOutcome
df.mgCFAoutcome <- as.numeric(fitMeasures(mgCFAoutcome,"df"))
#The chi square test statistic for the modelOutcome fit that is obtained from summary statement for strong factorial imvariance modelOutcome
chi.square.mgCFAoutcome <- as.numeric(fitMeasures(mgCFAoutcome,"chisq.scaled"))
#==============================
mgCFAoutcome.configural <- cfa(modelOutcome, #define the modelOutcome for the confirmatory factor analysis
ordered=c("F0108" , "F0105" , "F0113" , "F0116"),#Define the ordered nature of the data appropriately
data=matchedData,#define the data source
group="treat",#define the group indicator
estimator="WLSMV")#,#define the estimator to be used
summary(mgCFAoutcome.configural, fit.measures=TRUE, standardized=TRUE)
#lets save the required statistics to be used in the Satorra-Bentler modelOutcome Comparison Test
#The Scaling Correction Factor that is obtained from summary statement for configural imvariance modelOutcome
scf.mgCFAoutcome.configural <- as.numeric(fitMeasures(mgCFAoutcome.configural,"chisq.scaling.factor"))
#The degrees of freedom of the chi square test statistic for the modelOutcome fit that is obtained from summary statement for configural imvariance modelOutcome
df.mgCFAoutcome.configural <- as.numeric(fitMeasures(mgCFAoutcome.configural,"df"))
#The chi square test statistic for the modelOutcome fit that is obtained from summary statement for configural imvariance modelOutcome
chi.square.mgCFAoutcome.configural <- as.numeric(fitMeasures(mgCFAoutcome.configural,"chisq.scaled"))
#Lets compare modelOutcome fit of configural invariance and strong factorial invariance using satorra bentler modelOutcome Comparison Procedure
#at first step, we have to calculate the overall scaling correction factor.
scaling.correction.factor <- (scf.mgCFAoutcome*df.mgCFAoutcome-scf.mgCFAoutcome.configural*df.mgCFAoutcome.configural)/
(df.mgCFAoutcome-df.mgCFAoutcome.configural)
Satorra.Bentler.chi.square.statistic <- (scf.mgCFAoutcome*chi.square.mgCFAoutcome-scf.mgCFAoutcome.configural*chi.square.mgCFAoutcome.configural)/
scaling.correction.factor
#calculate the probability of observing a chi square statistic that is more extreme than the observed chi square statistic under the null hypothesis indicating that the modelOutcomes are equivalent.
pchisq(Satorra.Bentler.chi.square.statistic,(df.mgCFAoutcome-df.mgCFAoutcome.configural),lower.tail = FALSE)
#since the p value is greater than nominal alpha level that is .05, we failed to reject the null hypothesis.
#==========================
#fit multiple-group SEM to estimate ATT controlling for covariates that did not achieve balane of 0.1
#enforcing strong factorial invariance
#the propensity score is also added as a covariate
#fit the model with covariates
mgSEMmodel <- 'WORKMAN =~ F0108 + F0105 + F0113 + F0116
WORKMAN ~ LEP_T + logitPScores'
mgSEMoutcome <- sem(mgSEMmodel, ordered=c("F0108" , "F0105" , "F0113" ,"F0116"),
data=matchedData, group="treat", estimator="WLSMV",
group.equal=c("loadings","thresholds"))
summary(mgSEMoutcome, fit.measures=TRUE, standardized=TRUE)
#obtain fit measures
mgSEMoutcome.fit <- fitMeasures(mgSEMoutcome)
#obtrain parameter estimates
mgSEMestimates <- parameterEstimates(mgSEMoutcome, standardized=T)
write.csv(mgSEMestimates, file="table_estimates_mgSEM.csv")
#-----------------------------
#Fit MIMIC model
#specify mimic model
MIMICmodel <- 'WORKMAN =~ F0108 + F0105 + F0113 + F0116
WORKMAN ~ treat'
#convert indicators to numeric format, because lavaan.survey only takes numeric indicators
data2 <- data
for (var in names(data)) {
if (class(data2[,var])=="factor") {
levels(data2[,var]) <- 1:nlevels(data2[,var])
data2[,var] <- as.numeric(as.character(data2[,var]))} }
#fit MIMIC model
MIMIC <- sem(MIMICmodel, #define the modelOutcome for the confirmatory factor analysis
data=data2,#define the data source
estimator="MLR")#define the estimator to be used
#re-estimate the model using propensity score weights
#define survey design object
library(survey)
surveyDesign <- svydesign(ids=~1, weights=~psWeight, data = data2) #the TFNLWGT is the SASS final teacher weight
require(lavaan.survey)
#fit MIMIC model with propensity score weights
weighted.MIMIC <- lavaan.survey(MIMIC, surveyDesign, estimator="MLMV")
#print the results
summary(weighted.MIMIC, fit.measures=TRUE, standardized=TRUE)
#=====================================
#calculation of Cronbach's alpha for Table 8.1
#obtain Cronbach's alpha for the constructs
require(psychometric)
(alpha.PSCHMAN <- alpha(data2[indicator.names[1:6]]))#Perception of school management
(alpha.PFAMBACK <- alpha(data2[indicator.names[7:10]]))#perception of family background
(alpha.PSTUDEL <- alpha(data2[indicator.names[11:15]]))#perception of student deliquency
(alpha.PSTUPAR <- alpha(data2[indicator.names[16:18]]))#perception of student participation
(alpha.PTEACSUP <- alpha(data2[indicator.names[19:21]]))#perception of teacher support
(alpha.WORKMAN <- alpha(data2[c("F0108","F0105","F0113","F0116")]))#perception of teacher support
#save all results
#save(list=ls(), file="Chapter8_all_results.Rdata", compress=T)