Таблицы сопряженности
v <- c('m', 'f', 'm', 'm', 'f', 'f', 'f', 'f')
df <- data.frame(sex = c('m', 'f', 'm', 'm', 'f', 'f', 'f', 'f'),
hand = c('lf', 'rh', 'rh', 'rh', 'rh', 'lf', 'am', 'rh'))
table(v)
## v
## f m
## 5 3
table(df)
## hand
## sex am lf rh
## f 1 1 3
## m 0 1 2
prop.table(table(v))
## v
## f m
## 0.625 0.375
prop.table(c(1, 2))
## [1] 0.3333333 0.6666667
prop.table(table(df))
## hand
## sex am lf rh
## f 0.125 0.125 0.375
## m 0.000 0.125 0.250
prop.table(table(df))*100
## hand
## sex am lf rh
## f 12.5 12.5 37.5
## m 0.0 12.5 25.0
round(prop.table(table(df))*100)
## hand
## sex am lf rh
## f 12 12 38
## m 0 12 25
library(tidyverse)
words <- read_csv("https://goo.gl/a4xNwj")
words <- c(words$trans_english, words$trans_russian)
words <-sapply(words, function(x){unlist(strsplit(x, ",| "))[1]})
words <- data.frame(words = words)
str(words)
## 'data.frame': 2620 obs. of 1 variable:
## $ words: Factor w/ 2466 levels "account","accuse",..: 731 17 843 171 382 337 589 79 856 489 ...
words$n_char <- nchar(as.character(words$words))
str(words)
## 'data.frame': 2620 obs. of 2 variables:
## $ words : Factor w/ 2466 levels "account","accuse",..: 731 17 843 171 382 337 589 79 856 489 ...
## $ n_char: int 5 6 5 5 6 3 4 5 6 6 ...
nrow(words)
## [1] 2620
words$language <- rep(c("eng", "rus"), each = 1310)
str(words)
## 'data.frame': 2620 obs. of 3 variables:
## $ words : Factor w/ 2466 levels "account","accuse",..: 731 17 843 171 382 337 589 79 856 489 ...
## $ n_char : int 5 6 5 5 6 3 4 5 6 6 ...
## $ language: chr "eng" "eng" "eng" "eng" ...
words %>%
group_by(language) %>%
summarise(my.mean = mean(n_char),
my.min = mean(n_char) - sd(n_char),
my.max = mean(n_char) + sd(n_char)) %>%
ggplot(aes(x = language, y = my.mean, fill = language))+
geom_bar(stat = "identity")+
theme_bw()+
geom_errorbar(aes(ymin = my.min, ymax = my.max), width = 0.2, color = "green")+
xlab("языки")+
ylab("количество символов в слове")+
labs(title = "Это невероятно интересный график")+
guides(fill=FALSE)
