### TWO-COMPOSITES EXERCISE # data extracted from Grace et al 2016 Nature paper ### Load libraries library(lavaan) library(AICcmodavg) source("E:/Rwork/JimsR-Library/lavaan.modavg.R") ### Read Data and Rename Variables # Set working directory setwd("E:/ppt_files/_education/SEM.10-Modeling with Composite Variables/SEM.10.3") # read data dat <- read.csv("SEM_10_3_Ex1_data.csv") names(dat) # rename and adjust scale of variables for convenience dat2 <- with(dat, data.frame(site)) dat2$Rich <- dat$ln.rich dat2$Prod <- dat$ln.prod dat2$Sand <- dat$sand.prop dat2$Silt <- dat$silt.prop/100 dat2$PH <- dat$ph/100 dat2$P <- dat$ln.p/10 dat2$C <- dat$ln.c dat2$N <- dat$ln.n dat2$K <- dat$ln.k names(dat2) ##### Forming Composites within lavaan ### Step 1: Run model without composites using all soil indicators mod1 <- 'Rich ~ Sand +Silt +PH +P +C +N +K Prod ~ Sand +Silt +PH +P +C +N +K' mod1.fit <- sem(mod1, data=dat2, meanstructure = TRUE) summary(mod1.fit) ############# STOP AT THIS POINT AND DEVELOP YOUR OWN CODE IF YOU LIKE ############# # # ############# BELOW IS THE CODE I USED TO TACKLE THIS EXERCISE ############# ### Step 2: Prune contributing indicators and select final uncomposited model summary(mod1.fit) # look at initial model output # revised model mod1a <- 'Rich ~ Sand +Silt +PH +P +N +K Prod ~ Sand +Silt +PH +P +C +N' mod1a.fit <- sem(mod1a, data=dat2, meanstructure = TRUE) summary(mod1a.fit) mod1b <- 'Rich ~ Sand +Silt +PH +P +N Prod ~ Silt +PH +P +C +N' mod1b.fit <- sem(mod1b, data=dat2, meanstructure = TRUE) summary(mod1b.fit) mod1c <- 'Rich ~ Sand +Silt +P +N Prod ~ Silt +PH +P +C +N' mod1c.fit <- sem(mod1c, data=dat2, meanstructure = TRUE) summary(mod1c.fit) mod1d <- 'Rich ~ Sand +Silt +P Prod ~ Silt +PH +P +C +N' mod1d.fit <- sem(mod1d, data=dat2, meanstructure = TRUE) summary(mod1d.fit) mod1e <- 'Rich ~ Sand +Silt Prod ~ Silt +PH +P +C +N' mod1e.fit <- sem(mod1e, data=dat2, meanstructure = TRUE) summary(mod1e.fit) ### Compare all mods using AICc criterion aictab.lavaan(list(mod1.fit, mod1a.fit, mod1b.fit, mod1c.fit, mod1d.fit, mod1e.fit), c("Model1", "Model1a", "Model1b", "Model1c", "Model1d", "Model1e")) ### Step 3: Create composite variables within lavaan using Model1e run model and evaluate # get output from selected model to extract parameters for specification purposes summary(mod1e.fit) # create model with composites (set loading using uncomposited parameter estimates) mod2 <- 'SoilSuitability <~ -0.656*Sand +Silt SoilFertility <~ 32.581*Silt +PH +P +C +N Rich ~ SoilSuitability Prod ~ SoilFertility' mod2.fit <- sem(mod2, data=dat2, meanstructure = TRUE) summary(mod2.fit) # try model with only 1 composite mod2a <- 'SoilSuitability <~ -0.656*Sand +Silt Rich ~ SoilSuitability Prod ~ Silt +PH +P +C +N' mod2a.fit <- sem(mod2a, data=dat2, meanstructure = TRUE) summary(mod2a.fit) # don't ask for meanstructure mod2a.fit <- sem(mod2a, data=dat2) summary(mod2a.fit) ##### BOTTOMLINE - LAVAAN CANNOT ESTIMATE MORE THAN ONE COMPOSITE ##### Alternative Approach: Forming Composites by hand ### Redo Step 3, computing composites by hand # get output from selected model to extract parameters for specification purposes summary(mod1e.fit) # formulae to calculate composites for model "by hand" and bring into data set dat2$SoilSuitability <- with(dat2, 1.039 -0.656*Sand -95.098*Silt) dat2$SoilFertility <- with(dat2, 0.019 +32.581*Silt -5.764*PH +0.877*P +0.294*C -1.027*N) ### Step 4: Run model and evaluate output mod3 <- 'Rich ~ SoilSuitability Prod ~ SoilFertility' mod3.fit <- sem(mod3, data=dat2, meanstructure = TRUE) summary(mod3.fit, rsq=TRUE, standard=TRUE) ### Compare results with uncomposited model summary(mod1e.fit, rsq=TRUE, standard=TRUE) ### Additional Information # what is the correlation between composites cor(dat2$SoilSuitability, dat2$SoilFertility) cor.test(dat2$SoilSuitability, dat2$SoilFertility)