Is there an effect of the netting treatment on changes in aphid numbers across the sampling times?
What sort of model is appropriate here?
Because we now have repeated measures in time, we have longitudinal data and should consider the methods of Section 7.1.1.
library(ecostats)
data(aphids)
cols=c(rgb(1,0,0,alpha=0.5),rgb(0,0,1,alpha=0.5)) #transparent colours
par(mfrow=c(2,1),mar=c(3,3,1.5,1),mgp=c(2,0.5,0),oma=c(0,0,0.5,0))
with(aphids$oat, interaction.plot(Time,Plot,logcount,legend=FALSE,
col=cols[Treatment], lty=1, ylab="Counts [log(y+1) scale]",
xlab="Time (days since treatment)") )
legend("bottomleft",c("Excluded","Present"),col=cols,lty=1)
mtext("(a)",3,adj=0,line=0.5,cex=1.4)
with(aphids$oat, interaction.plot(Time,Treatment,logcount, col=cols,
lty=1, legend=FALSE, ylab="Counts [log(y+1) scale]",
xlab="Time (days since treatment)"))
legend("topright",c("Excluded","Present"),col=cols,lty=1)
mtext("(b)",3,adj=0,line=0.5,cex=1.4)
library(lme4)
aphid_int = lmer(logcount~Treatment*Time+Treatment*I(Time^2)+(1|Plot),
data=aphids$oat,REML=FALSE) # random intercepts model
aphid_slope = lmer(logcount~Treatment*Time+Treatment*I(Time^2)+(Time|Plot),
data=aphids$oat, REML=FALSE) # random slopes model
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with
#> max|grad| = 0.00715391 (tol = 0.002, component 1)
library(nlme) # refit random intercepts model in nlme to get a ACF:
aphid_int2 = lme(logcount~Treatment*Time+Treatment*I(Time^2),
random=~1|Plot, data=aphids$oat, method="ML")
plot(ACF(aphid_int2),alpha=0.05) # only works for nlme-fitted mixed models
print(aphid_int)
#> Linear mixed model fit by maximum likelihood ['lmerMod']
#> Formula: logcount ~ Treatment * Time + Treatment * I(Time^2) + (1 | Plot)
#> Data: aphids$oat
#> AIC BIC logLik deviance df.resid
#> 128.3401 144.5429 -56.1701 112.3401 48
#> Random effects:
#> Groups Name Std.Dev.
#> Plot (Intercept) 0.202
#> Residual 0.635
#> Number of obs: 56, groups: Plot, 8
#> Fixed Effects:
#> (Intercept) Treatmentpresent Time I(Time^2)
#> 6.5703718 -0.2379806 -0.1943256 0.0028708
#> Treatmentpresent:Time Treatmentpresent:I(Time^2)
#> 0.0495183 -0.0004498
anova(aphid_int)
#> Analysis of Variance Table
#> npar Sum Sq Mean Sq F value
#> Treatment 1 2.0410 2.0410 5.0619
#> Time 1 24.7533 24.7533 61.3917
#> I(Time^2) 1 5.7141 5.7141 14.1717
#> Treatment:Time 1 1.6408 1.6408 4.0693
#> Treatment:I(Time^2) 1 0.0413 0.0413 0.1024
aphid_noTreat = lmer(logcount~Time+I(Time^2)+(1|Plot),
data=aphids$oat, REML=FALSE)
anova(aphid_noTreat,aphid_int)
#> Data: aphids$oat
#> Models:
#> aphid_noTreat: logcount ~ Time + I(Time^2) + (1 | Plot)
#> aphid_int: logcount ~ Treatment * Time + Treatment * I(Time^2) + (1 | Plot)
#> npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
#> aphid_noTreat 5 130.26 140.39 -60.131 120.26
#> aphid_int 8 128.34 144.54 -56.170 112.34 7.9224 3 0.04764 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(aphid_slope)
#> Linear mixed model fit by maximum likelihood ['lmerMod']
#> Formula: logcount ~ Treatment * Time + Treatment * I(Time^2) + (Time | Plot)
#> Data: aphids$oat
#> AIC BIC logLik deviance df.resid
#> 127.1708 147.4244 -53.5854 107.1708 46
#> Random effects:
#> Groups Name Std.Dev. Corr
#> Plot (Intercept) 0.11684
#> Time 0.01907 -1.00
#> Residual 0.57877
#> Number of obs: 56, groups: Plot, 8
#> Fixed Effects:
#> (Intercept) Treatmentpresent Time I(Time^2)
#> 6.5703718 -0.2379806 -0.1943256 0.0028708
#> Treatmentpresent:Time Treatmentpresent:I(Time^2)
#> 0.0495183 -0.0004498
#> optimizer (nloptwrap) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings
anova(aphid_slope)
#> Analysis of Variance Table
#> npar Sum Sq Mean Sq F value
#> Treatment 1 0.7467 0.7467 2.2291
#> Time 1 12.8040 12.8040 38.2239
#> I(Time^2) 1 5.7141 5.7141 17.0582
#> Treatment:Time 1 0.8487 0.8487 2.5337
#> Treatment:I(Time^2) 1 0.0413 0.0413 0.1232
aphid_noTreatS = lmer(logcount~Time+I(Time^2)+(Time|Plot),
data=aphids$oat, REML=FALSE)
#> boundary (singular) fit: see help('isSingular')
anova(aphid_noTreatS,aphid_slope)
#> Data: aphids$oat
#> Models:
#> aphid_noTreatS: logcount ~ Time + I(Time^2) + (Time | Plot)
#> aphid_slope: logcount ~ Treatment * Time + Treatment * I(Time^2) + (Time | Plot)
#> npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
#> aphid_noTreatS 7 125.04 139.22 -55.519 111.04
#> aphid_slope 10 127.17 147.42 -53.585 107.17 3.8666 3 0.2762
Repeat the above longitudinal analyses for data from the wheat field. Which longitudinal model better handles repeated measures in this case?
data(aphids)
cols=c(rgb(1,0,0,alpha=0.5),rgb(0,0,1,alpha=0.5)) #transparent colours
par(mar=c(3,3,1.5,1),mgp=c(2,0.5,0),oma=c(0,0,0.5,0))
with(aphids$wheat, interaction.plot(Time,Plot,logcount,legend=FALSE,
col=cols[Treatment], lty=1, ylab="Counts [log(y+1) scale]",
xlab="Time (days since treatment)") )
legend("bottomleft",c("Excluded","Present"),col=cols,lty=1)
As before we don’t see a lot of lines crossing over so expect some correlation. We also see a similar pattern with aphid counts reducing over time, but possibly being higher without bird exclusion 2-5 weeks into the trial.
library(lme4)
aphidw_int = lmer(logcount~Treatment*Time+Treatment*I(Time^2)+(1|Plot),
data=aphids$wheat,REML=FALSE) # random intercepts model
#> Warning: Some predictor variables are on very different scales: consider rescaling
#> boundary (singular) fit: see help('isSingular')
aphidw_slope = lmer(logcount~Treatment*Time+Treatment*I(Time^2)+(Time|Plot),
data=aphids$wheat, REML=FALSE) # random slopes model
#> Warning: Some predictor variables are on very different scales: consider rescaling
#> boundary (singular) fit: see help('isSingular')
library(nlme) # refit random intercepts model in nlme to get a ACF:
aphidw_int2 = lme(logcount~Treatment*Time+Treatment*I(Time^2),
random=~1|Plot, data=aphids$wheat, method="ML")
plot(ACF(aphidw_int2),alpha=0.05) # only works for nlme-fitted mixed models
# now try a model with a temporally structured random effect:
# for some reason this one returns non-convergence unless I make Tiem a (numerical) factor:
aphidsTimenFac=glmmTMB::numFactor(aphids$wheat$Time)
aphidw_CAR1 = update(aphidw_int2,correlation=corCAR1(,form=~aphidsTimenFac|Plot))
BIC(aphidw_int,aphidw_int2,aphidw_slope,aphidw_CAR1)
#> df BIC
#> aphidw_int 8 165.5916
#> aphidw_int2 8 165.5916
#> aphidw_slope 10 170.1048
#> aphidw_CAR1 9 169.6170
As before the random intercept model seems to be favoured.
Is there evidence that bird exclusion improves biological control of aphids?
aphidw_noTr = lmer(logcount~Time+I(Time^2)+(1|Plot),
data=aphids$wheat,REML=FALSE) # random intercepts model
#> Warning: Some predictor variables are on very different scales: consider rescaling
#> boundary (singular) fit: see help('isSingular')
anova(aphidw_noTr,aphidw_int)
#> Data: aphids$wheat
#> Models:
#> aphidw_noTr: logcount ~ Time + I(Time^2) + (1 | Plot)
#> aphidw_int: logcount ~ Treatment * Time + Treatment * I(Time^2) + (1 | Plot)
#> npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
#> aphidw_noTr 5 151.02 161.15 -70.511 141.02
#> aphidw_int 8 149.39 165.59 -66.694 133.39 7.6327 3 0.05424 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
There is marginal evidence of a treatment effect.
Construct a single, larger model to test for an effect of biological exclusion, and to check if this effect differs across fields.
aphids$oat$field = "oat"
aphids$wheat$field = "wheat"
aphids$wheat$Plot=paste0("W",aphids$wheat$Plot) #making sure we have different names for different Plots across fields
aphids$all = rbind(aphids$oat,aphids$wheat)
aphids$all$field = factor(aphids$all$field)
str(aphids$all)
#> 'data.frame': 112 obs. of 6 variables:
#> $ Plot : Factor w/ 16 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 1 2 ...
#> $ Treatment: Factor w/ 2 levels "excluded","present": 2 2 2 2 1 1 1 1 2 2 ...
#> $ Time : num 3 3 3 3 3 3 3 3 10 10 ...
#> $ counts : int 449 547 597 304 520 587 545 192 185 93 ...
#> $ logcount : num 6.11 6.31 6.39 5.72 6.26 ...
#> $ field : Factor w/ 2 levels "oat","wheat": 1 1 1 1 1 1 1 1 1 1 ...
We will need a mixed model that allows effects to be different across fields (and times)
aphida_int = lmer(logcount~field*Time*Treatment+field*I(Time^2)*Treatment+(1|Plot),
data=aphids$all,REML=FALSE) # random intercepts model
#> Warning: Some predictor variables are on very different scales: consider rescaling
#> boundary (singular) fit: see help('isSingular')
aphida_noTr = lmer(logcount~field*Time+field*I(Time^2)+(1|Plot),
data=aphids$all,REML=FALSE) # random intercepts model
#> Warning: Some predictor variables are on very different scales: consider rescaling
anova(aphida_noTr,aphida_int)
#> Data: aphids$all
#> Models:
#> aphida_noTr: logcount ~ field * Time + field * I(Time^2) + (1 | Plot)
#> aphida_int: logcount ~ field * Time * Treatment + field * I(Time^2) * Treatment + (1 | Plot)
#> npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
#> aphida_noTr 8 280.88 302.63 -132.44 264.88
#> aphida_int 14 276.61 314.67 -124.30 248.61 16.273 6 0.01236 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
In the combined model there is some evidence of a treatment effect.
Ian wanted to know: how does species richness vary from one area to the next, and what are the main environmental correlates of richness?… Plotting richness against spatial location, he found spatial clusters of high or low species richness (Fig. 7.3)… What sort of analysis method should Ian consider using?
He should be looking at fitting a spatial model, along the lines of Code Box 7.5.
data(Myrtaceae)
Myrtaceae$logrich=log(Myrtaceae$richness+1)
ft_rich = lm(logrich~soil+poly(TMP_MAX,TMP_MIN,RAIN_ANN,degree=2),
data=Myrtaceae)
ft_richAdd = lm(logrich~soil+poly(TMP_MAX,degree=2)+
poly(TMP_MIN,degree=2)+poly(RAIN_ANN,degree=2), data=Myrtaceae)
BIC(ft_rich,ft_richAdd)
#> df BIC
#> ft_rich 19 1014.686
#> ft_richAdd 16 1002.806
(The below code chunk takes several minutes to evaluate.)
library(nlme)
richForm = logrich~soil+poly(TMP_MAX,degree=2)+poly(TMP_MIN,degree=2)+
poly(RAIN_ANN,degree=2)
ft_richExp = gls(richForm,data=Myrtaceae,correlation=corExp(form=~X+Y))
ft_richNugg = gls(richForm,data=Myrtaceae,
correlation=corExp(form=~X+Y,nugget=TRUE))
BIC(ft_richExp,ft_richNugg)
#> df BIC
#> ft_richExp 17 1036.2154
#> ft_richNugg 18 979.5212
library(pgirmess)
corRich = with(Myrtaceae,correlog(cbind(X,Y),logrich))
plot(corRich,xlim=c(0,150),ylim=c(-0.05,0.2))
abline(h=0,col="grey90")
Myrtaceae$resid = residuals(ft_richAdd)
corRichResid = with(Myrtaceae,correlog(cbind(X,Y),resid))
plot(corRichResid,xlim=c(0,150),ylim=c(-0.05,0.2))
abline(h=0,col="grey90")
Terje wondered whether egg size was specifically limited by male body size. So he collected data on 71 species of shorebird where the male incubates the egg, measuring egg size, and size of adult males and females… What sort of model might be appropriate here?
We could try a linear model for egg size as a function of female and male bird size.
Can see you see any potential problems satisfying independence assumptions?
A potential issue is that there is a phylogenetic signal in many traits: if shorebird species are more closely related, we can expect their egg sizes to be more similar.
library(caper)
data(shorebird)
shore4d=phylobase::phylo4d(shorebird.tree,shorebird.data)
library(phylosignal)
barplot.phylo4d(shore4d,c("Egg.Mass","F.Mass","M.Mass"))
#> Warning in asMethod(object): trees with unknown order may be unsafe in ape
library(caper)
shorebird = comparative.data(shorebird.tree, shorebird.data,
Species, vcv=TRUE)
pgls_egg = pgls(log(Egg.Mass) ~ log(F.Mass)+log(M.Mass),
data=shorebird)
summary(pgls_egg)
#>
#> Call:
#> pgls(formula = log(Egg.Mass) ~ log(F.Mass) + log(M.Mass), data = shorebird)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.104398 -0.016758 0.005613 0.021228 0.077149
#>
#> Branch length transformations:
#>
#> kappa [Fix] : 1.000
#> lambda [Fix] : 1.000
#> delta [Fix] : 1.000
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -0.37902 0.23172 -1.6357 0.106520
#> log(F.Mass) -0.22255 0.22081 -1.0079 0.317077
#> log(M.Mass) 0.89708 0.22246 4.0325 0.000142 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.03343 on 68 degrees of freedom
#> Multiple R-squared: 0.8476, Adjusted R-squared: 0.8431
#> F-statistic: 189.1 on 2 and 68 DF, p-value: < 2.2e-16
The below code chunk takes several minutes to evaluate so it has not been run
Refit the model allowing \(\lambda\) to be estimated from the data
(using lambda="ML"
), or allowing \(\delta\) to be estimated from the
data.
pgls_eggL = pgls(log(Egg.Mass) ~ log(F.Mass)+log(M.Mass), lambda="ML",
data=shorebird)
summary(pgls_eggL)
#>
#> Call:
#> pgls(formula = log(Egg.Mass) ~ log(F.Mass) + log(M.Mass), data = shorebird,
#> lambda = "ML")
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.076434 -0.014243 0.003655 0.020831 0.064695
#>
#> Branch length transformations:
#>
#> kappa [Fix] : 1.000
#> lambda [ ML] : 0.947
#> lower bound : 0.000, p = 1.1224e-13
#> upper bound : 1.000, p = 0.033109
#> 95.0% CI : (0.839, 0.997)
#> delta [Fix] : 1.000
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -0.43469 0.20689 -2.1011 0.03934 *
#> log(F.Mass) -0.28756 0.22202 -1.2952 0.19963
#> log(M.Mass) 0.97556 0.22304 4.3740 4.293e-05 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.02841 on 68 degrees of freedom
#> Multiple R-squared: 0.8709, Adjusted R-squared: 0.8671
#> F-statistic: 229.3 on 2 and 68 DF, p-value: < 2.2e-16
Does this change results and their interpretation?
Nope – results are pretty much the same as previously.
Now fit a linear model ignoring phylogeny, via lm
.
What happens here?
lm_egg = lm(log(Egg.Mass) ~ log(F.Mass)+log(M.Mass), data=shorebird.data)
summary(lm_egg)
#>
#> Call:
#> lm(formula = log(Egg.Mass) ~ log(F.Mass) + log(M.Mass), data = shorebird.data)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.5843 -0.1104 0.0257 0.1338 0.4347
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -0.2105 0.1296 -1.624 0.109
#> log(F.Mass) -1.0605 0.2261 -4.691 1.36e-05 ***
#> log(M.Mass) 1.7433 0.2283 7.635 1.01e-10 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.2113 on 68 degrees of freedom
#> Multiple R-squared: 0.9089, Adjusted R-squared: 0.9063
#> F-statistic: 339.3 on 2 and 68 DF, p-value: < 2.2e-16
The slopes move further from zero and everything becomes more significant. In particular, while there was no effect of female mass previously, now it is strongly significant.
Is this what you would expect?
Yes this is expected because phylogenetic relatedness introduces positive dependence, leading to under-estimation of uncertainty and a higher chance of false positives.
Look at the log-likelihood (using the logLik
function) to help decide which of these models is a better fit to the
data.
Something seems to be wrong with the df
calculation in
the pgls
model, but the main thing to see here is that the
log-likelihood is substantially higher for the pgls
model,
suggesting it is a much better fit to the data.