• Home
  • Labs
    • Lab 1
    • Lab 2
    • Lab 3 and 4
    • Lab 5
    • Lab 6
    • Lab 7
    • Lab 8
    • Lab 9
    • Lab 10
    • Lab 11
    • Lab 12
    • Lab 13
    • Lab 14
    • Lab 15

Lab 10. Mixed effect models

Assignment link

library(tidyverse); library(lingtypology); library(lme4)

1. Consonants and vowels

lapsyd <- read.csv("https://goo.gl/eD4S5n")
map.feature(lapsyd$name, features = lapsyd$area, 
              label = lapsyd$name,
              label.hide = TRUE)
## Warning: There is no coordinates for languages Gelao, Naxi, Nepali
Africa
Australia
Eurasia
North America
Papua
South America
5000 km
3000 mi
Leaflet | © OpenStreetMap
fit1 <- lm(count_vowel~count_consonant, data = lapsyd)
summary(fit1)
## 
## Call:
## lm(formula = count_vowel ~ count_consonant, data = lapsyd)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.252 -2.992 -1.195  2.201 19.520 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      7.38236    0.54784  13.475   <2e-16 ***
## count_consonant  0.04065    0.02274   1.787   0.0746 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.093 on 400 degrees of freedom
## Multiple R-squared:  0.007923,   Adjusted R-squared:  0.005443 
## F-statistic: 3.195 on 1 and 400 DF,  p-value: 0.07464
lapsyd$model1 <- predict(fit1)

lapsyd %>% 
  ggplot(aes(count_consonant, count_vowel))+
  geom_point()+
  geom_line(aes(count_consonant, model1), color = "blue")+
  labs(x = "number of consonants",
       y = "number of vowels",
       caption = "data from LAPSyD")

fit2 <- lmer(count_vowel ~ count_consonant + (1|area), data = lapsyd)
summary(fit2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: count_vowel ~ count_consonant + (1 | area)
##    Data: lapsyd
## 
## REML criterion at convergence: 2242
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5298 -0.6147 -0.2599  0.5146  4.7043 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  area     (Intercept)  3.983   1.996   
##  Residual             14.753   3.841   
## Number of obs: 402, groups:  area, 6
## 
## Fixed effects:
##                 Estimate Std. Error t value
## (Intercept)       7.8928     0.9852   8.011
## count_consonant  -0.0111     0.0240  -0.463
## 
## Correlation of Fixed Effects:
##             (Intr)
## cont_cnsnnt -0.518
lapsyd$model2 <- predict(fit2)

lapsyd %>% 
  ggplot(aes(count_consonant, count_vowel))+
  geom_point()+
  geom_line(aes(count_consonant, model2, color = area))+
  labs(x = "number of consonants",
       y = "number of vowels",
       caption = "data from LAPSyD")

fit3 <- lmer(count_vowel ~ count_consonant + (1+count_consonant|area), data = lapsyd)
summary(fit3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: count_vowel ~ count_consonant + (1 + count_consonant | area)
##    Data: lapsyd
## 
## REML criterion at convergence: 2236.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7991 -0.6217 -0.2334  0.5299  4.7743 
## 
## Random effects:
##  Groups   Name            Variance Std.Dev. Corr 
##  area     (Intercept)     11.94558 3.4562        
##           count_consonant  0.01355 0.1164   -0.84
##  Residual                 14.32212 3.7845        
## Number of obs: 402, groups:  area, 6
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)      7.866281   1.555096   5.058
## count_consonant -0.005108   0.056987  -0.090
## 
## Correlation of Fixed Effects:
##             (Intr)
## cont_cnsnnt -0.854
lapsyd$model3 <- predict(fit3)

lapsyd %>% 
  ggplot(aes(count_consonant, count_vowel))+
  geom_point()+
  geom_line(aes(count_consonant, model3, color = area))+
  labs(x = "number of consonants",
       y = "number of vowels",
       caption = "data from LAPSyD")

fit4 <- lmer(count_vowel ~ count_consonant + (0+count_consonant|area), data = lapsyd)
summary(fit4)
## Linear mixed model fit by REML ['lmerMod']
## Formula: count_vowel ~ count_consonant + (0 + count_consonant | area)
##    Data: lapsyd
## 
## REML criterion at convergence: 2250.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5967 -0.6774 -0.2688  0.4497  4.6750 
## 
## Random effects:
##  Groups   Name            Variance  Std.Dev.
##  area     count_consonant  0.008151 0.09028 
##  Residual                 15.093094 3.88498 
## Number of obs: 402, groups:  area, 6
## 
## Fixed effects:
##                 Estimate Std. Error t value
## (Intercept)      8.68579    0.59439  14.613
## count_consonant -0.05519    0.04649  -1.187
## 
## Correlation of Fixed Effects:
##             (Intr)
## cont_cnsnnt -0.563
lapsyd$model4 <- predict(fit4)

lapsyd %>% 
  ggplot(aes(count_consonant, count_vowel))+
  geom_point()+
  geom_line(aes(count_consonant, model4, color = area))+
  labs(x = "number of consonants",
       y = "number of vowels",
       caption = "data from LAPSyD")

fit5 <- lmer(count_vowel ~ count_consonant + (1|area) + (0+count_consonant|area), data = lapsyd)
summary(fit5)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## count_vowel ~ count_consonant + (1 | area) + (0 + count_consonant |  
##     area)
##    Data: lapsyd
## 
## REML criterion at convergence: 2239.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6499 -0.6023 -0.2494  0.5018  4.7428 
## 
## Random effects:
##  Groups   Name            Variance Std.Dev.
##  area     (Intercept)      5.67146 2.38148 
##  area.1   count_consonant  0.00606 0.07785 
##  Residual                 14.42645 3.79822 
## Number of obs: 402, groups:  area, 6
## 
## Fixed effects:
##                 Estimate Std. Error t value
## (Intercept)      8.19243    1.15107   7.117
## count_consonant -0.02700    0.04326  -0.624
## 
## Correlation of Fixed Effects:
##             (Intr)
## cont_cnsnnt -0.337
lapsyd$model5 <- predict(fit5)

lapsyd %>% 
  ggplot(aes(count_consonant, count_vowel))+
  geom_point()+
  geom_line(aes(count_consonant, model5, color = area))+
  labs(x = "number of consonants",
       y = "number of vowels",
       caption = "data from LAPSyD")

anova(fit5, fit4, fit3, fit2, fit1)
## refitting model(s) with ML (instead of REML)
## Data: lapsyd
## Models:
## fit1: count_vowel ~ count_consonant
## fit4: count_vowel ~ count_consonant + (0 + count_consonant | area)
## fit2: count_vowel ~ count_consonant + (1 | area)
## fit5: count_vowel ~ count_consonant + (1 | area) + (0 + count_consonant | 
## fit5:     area)
## fit3: count_vowel ~ count_consonant + (1 + count_consonant | area)
##      Df    AIC    BIC  logLik deviance   Chisq Chi Df Pr(>Chisq)    
## fit1  3 2277.8 2289.8 -1135.9   2271.8                              
## fit4  4 2254.8 2270.7 -1123.4   2246.8 25.0256      1  5.657e-07 ***
## fit2  4 2245.8 2261.8 -1118.9   2237.8  8.9793      0  < 2.2e-16 ***
## fit5  5 2246.9 2266.9 -1118.4   2236.9  0.9009      1    0.34254    
## fit3  6 2245.7 2269.6 -1116.8   2233.7  3.2150      1    0.07297 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1