tidyverse — это набор пакетов:
Полезно также знать:
library(tidyverse)head(iris)
as_tibble(iris)
data_frame(id = 1:12,
letters = month.name)library(readr)
df <- read_csv("https://goo.gl/v7nvho")
head(df)df <- read_tsv("https://goo.gl/33r2Ut")
head(df)df <- read_delim("https://goo.gl/33r2Ut", delim = "\t")
head(df)library(readxl)
xlsx_example <- readxl_example("datasets.xlsx")
df <- read_excel(xlsx_example)
head(df)excel_sheets(xlsx_example)## [1] "iris" "mtcars" "chickwts" "quakes"
df <- read_excel(xlsx_example, sheet = "mtcars")
head(df)rm(df)dplyrhomo <- read_csv("http://goo.gl/Zjr9aF")
homoThe majority of examples in that presentation are based on Hau 2007. Experiment consisted of a perception and judgment test aimed at measuring the correlation between acoustic cues and perceived sexual orientation. Naïve Cantonese speakers were asked to listen to the Cantonese speech samples collected in Experiment and judge whether the speakers were gay or heterosexual. There are 14 speakers and following parameters:
dplyr::filter()How many speakers are older than 28?
homo %>%
filter(age > 28, s.duration.ms < 60)%>% — конвеер (pipe) отправляет результат работы одной функции в другую.
sort(sqrt(abs(sin(1:22))), decreasing = TRUE)## [1] 0.9999951 0.9952926 0.9946649 0.9805088 0.9792468 0.9554817 0.9535709
## [8] 0.9173173 0.9146888 0.8699440 0.8665952 0.8105471 0.8064043 0.7375779
## [15] 0.7325114 0.6482029 0.6419646 0.5365662 0.5285977 0.3871398 0.3756594
## [22] 0.0940814
1:22 %>%
sin() %>%
abs() %>%
sqrt() %>%
sort(., decreasing = TRUE) # зачем здесь точка?## [1] 0.9999951 0.9952926 0.9946649 0.9805088 0.9792468 0.9554817 0.9535709
## [8] 0.9173173 0.9146888 0.8699440 0.8665952 0.8105471 0.8064043 0.7375779
## [15] 0.7325114 0.6482029 0.6419646 0.5365662 0.5285977 0.3871398 0.3756594
## [22] 0.0940814
Конвееры в tidyverse пришли из пакета magrittr. Иногда они работают не корректно с функциями не из tidyverse.
dplyr::slice()homo %>%
slice(3:7)dplyr::select()homo %>%
select(8:10)homo %>%
select(speaker:average.f0.Hz)homo %>%
select(-speaker)homo %>%
select(-c(speaker, perceived.as.hetero, perceived.as.homo, perceived.as.homo.percent))homo %>%
select(speaker, age, s.duration.ms)dplyr::arrange()homo %>%
arrange(orientation, desc(age))dplyr::distinct()homo %>%
distinct(orientation)homo %>%
distinct(orientation, age > 20)dplyr::mutate()homo %>%
mutate(f0.mn = average.f0.Hz - f0.range.Hz/2,
f0.mx = (average.f0.Hz + f0.range.Hz/2))dplyr::group_by(...) %>% summarise(...)homo %>%
summarise(min(age), mean(s.duration.ms))homo %>%
group_by(orientation) %>%
summarise(my_mean = mean(s.duration.ms))homo %>%
group_by(orientation) %>%
summarise(mean(s.duration.ms))homo %>%
group_by(orientation) %>%
summarise(mean_by_orientation = mean(s.duration.ms))Если нужно посчитать количество вхождений, то можно использовать функцию n() в summarise() или же функцию count():
homo %>%
group_by(orientation, age > 20) %>%
summarise(my_mean = mean(s.duration.ms), n_observations = n())homo %>%
count(orientation, age > 20)dplyr::.._join()languages <- data_frame(
languages = c("Selkup", "French", "Chukchi", "Kashubian"),
countries = c("Russia", "France", "Russia", "Poland"),
iso = c("sel", "fra", "ckt", "pol")
)
languagescountry_population <- data_frame(
countries = c("Russia", "Poland", "Finland"),
population_mln = c(143, 38, 5))
country_populationinner_join(languages, country_population)left_join(languages, country_population)right_join(languages, country_population)anti_join(languages, country_population)anti_join(country_population, languages)full_join(country_population, languages)Существует достаточно забавный трюк, который позволяет использовать .._join() вместе с group_by() и summarise():
homo %>%
group_by(orientation, age > 20) %>%
summarise(my_mean = mean(s.duration.ms), n_observations = n())homo %>%
group_by(orientation, age > 20) %>%
summarise(my_mean = mean(s.duration.ms), n_observations = n()) %>%
left_join(homo)df.short <- data.frame(
consonant = c("stops", "fricatives", "affricates", "nasals"),
initial = c(123, 87, 73, 7),
intervocal = c(57, 77, 82, 78),
final = c(30, 69, 12, 104))
df.shorttidyr::gather()df.short <- data.frame(
consonant = c("stops", "fricatives", "affricates", "nasals"),
initial = c(123, 87, 73, 7),
intervocal = c(57, 77, 82, 78),
final = c(30, 69, 12, 104))
df.shortdf.short %>%
gather(position, number, initial:final) ->
df.long
df.longtidyr::spread()df.long %>%
spread(position, number) ->
df.short
df.short