Арифметика
2+2
## [1] 4
57-43
## [1] 14
31*2
## [1] 62
27/3
## [1] 9
55**2
## [1] 3025
55^2
## [1] 3025
sum(93, 39, 291, 29)
## [1] 452
prod(2, 3, 4)
## [1] 24
28 %% 3
## [1] 1
choose(6,2) # С из n по k
## [1] 15
Переменные
var <- 57 # Alt и -
var = 57
var_2 <- 34
my.var <- 99
MyVar <- 33
var + 2 # var не изменился
## [1] 59
var <- var + 2 # var не изменился
rm(var_2) # удаляет переменную
ls() # сообщает список переменных
## [1] "a" "db" "df" "fit" "m" "my.var" "MyVar" "var"
типы данных
-54.9 # числа
## [1] -54.9
"фывфыафыа" # строка
## [1] "фывфыафыа"
32 + 5i # мнимое число
## [1] 32+5i
TRUE
## [1] TRUE
FALSE
## [1] FALSE
NA
## [1] NA
Векторы
5:20
## [1] 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
c(46, 34, 28)
## [1] 46 34 28
my_vector <- c(46, 34, 28)
my_vector + 2
## [1] 48 36 30
my_vector ** 2
## [1] 2116 1156 784
my_vector_2 <- c("4628", "наша")
my_vector_2 <- c(4628, "наша")
my_vector_3 <- month.name
my_vector_3[5]
## [1] "May"
my_vector_3[5:7]
## [1] "May" "June" "July"
my_vector_3[c(5, 7, 11)]
## [1] "May" "July" "November"
my_index <- c(5, 7, 11)
my_vector_3[my_index]
## [1] "May" "July" "November"
month.name[-5] # Без 5 элемента
## [1] "January" "February" "March" "April" "June"
## [6] "July" "August" "September" "October" "November"
## [11] "December"
month.name[-c(5, 9)] # Без 5 и 9 элементов
## [1] "January" "February" "March" "April" "June" "July"
## [7] "August" "October" "November" "December"
Сравнения
2 > 8
## [1] FALSE
2 < 8
## [1] TRUE
c(2, 3) > 5
## [1] FALSE FALSE
c(2, 9) > c(5, 1)
## [1] FALSE TRUE
c(2, 3) >= 5
## [1] FALSE FALSE
c(2, 3) <= 5
## [1] TRUE TRUE
c(2, 3) == 5
## [1] FALSE FALSE
c(2, 3) != 5
## [1] TRUE TRUE
Сравнение как индекс
b <- 1:14
b[b > 6]
## [1] 7 8 9 10 11 12 13 14
b[b %% 3 == 0]
## [1] 3 6 9 12
Датафреймы
df <- data.frame(
abc = c("a", "b", "c"),
numbers = 3:1)
df <- data.frame(
abc = c("a", "b", "c"),
numbers = 3:1,
stringsAsFactors = FALSE) # чтобы не было факторов
df$numbers
## [1] 3 2 1
df$abc
## [1] "a" "b" "c"
df[3, 2] # до запятой строки, после - столбец
## [1] 1
df[, 2] # второй столбец
## [1] 3 2 1
df[2, ] # вторая строка
## abc numbers
## 2 b 2
View(mtcars)
mtcars[,5]
## [1] 3.90 3.90 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 3.92 3.07 3.07 3.07
## [15] 2.93 3.00 3.23 4.08 4.93 4.22 3.70 2.76 3.15 3.73 3.08 4.08 4.43 3.77
## [29] 4.22 3.62 3.54 4.11
Считывание и запись файлов. Рабочая дериктория
df <- read.csv("https://raw.githubusercontent.com/agricolamz/2018_data_analysis_for_linguists/master/data/students/Lera-Z/hw1_agreement/hw1_1_zilo_class.csv")
df <- read.csv("/home/agricolamz/work/materials/2018_I_HSE_Data_Analysis_for_linguists/data/students/akv17/hw1_agreement/hw1_1_zilo_class.csv")
getwd()
setwd("/home/agricolamz/work/materials/2018_I_HSE_Data_Analysis_for_linguists/data/students/akv17/hw1_agreement")
df <- read.csv("hw1_2_verbs.csv")
Факторы
a <- c("папа", "мама")
a
## [1] "папа" "мама"
a_factor <- factor(a)
nchar(a)
## [1] 4 4
# nchar(a_factor) не работает :(
length(month.name)
## [1] 12
nchar(month.name)
## [1] 7 8 5 5 3 4 4 6 9 7 8 8
length(month.name[1])
## [1] 1
nchar(month.name[1])
## [1] 7
Пакеты
# установка пакета
install.packages("lingtypology")
# включение пакета
library("lingtypology")
map.feature(c("Adyghe", "Russian"))
install.packages("tidyverse")
library(tidyverse)
Пользовательские функции
my_sqrt <- function(i){
i^2
}
my_sqrt(43)
## [1] 1849
my_sqrt(c(4, 23)) # векторизовано
## [1] 16 529
my_sqrt2 <- function(x, y){
x^y
}
my_sqrt2(4, 12)
## [1] 16777216
my_sqrt3 <- function(x, y) x^y
my_sqrt3(4, 12)
## [1] 16777216
my_sqrt4 <- function(x, y = 12){
x^y
}
my_sqrt4(4,12)
## [1] 16777216
my_sqrt4(4)
## [1] 16777216
my_sqrt4(4:7)
## [1] 16777216 244140625 2176782336 13841287201
Как порождать последовательности чисел
seq(from = 1, to = 4, by = 0.1)
## [1] 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0 2.1 2.2 2.3 2.4 2.5 2.6
## [18] 2.7 2.8 2.9 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0
rep(5, 10)
## [1] 5 5 5 5 5 5 5 5 5 5
rep(c(5, 9), 10)
## [1] 5 9 5 9 5 9 5 9 5 9 5 9 5 9 5 9 5 9 5 9
rep(c(5, 9), each = 10)
## [1] 5 5 5 5 5 5 5 5 5 5 9 9 9 9 9 9 9 9 9 9
# произвольные буквы с повторением
sample(letters, 3, replace = TRUE)
## [1] "u" "c" "q"
# как остановить рандомизатор
set.seed(42)
sample(1:190, 3, replace = TRUE)
## [1] 174 179 55
Распределения
rnorm(100, mean = 100, sd = 20)
## [1] 119.11871 100.95769 77.90801 110.78048 111.60413 86.84994 131.09791
## [8] 76.24717 103.03626 78.27735 132.26746 100.71262 126.29918 119.56335
## [15] 117.63582 109.64409 119.31506 83.70858 105.67916 96.76603 138.71144
## [22] 134.46462 107.16804 106.04862 92.11771 115.76281 113.41408 81.01638
## [29] 100.72271 142.31511 103.34882 82.40327 118.93826 85.90897 78.43661
## [36] 98.96674 111.61858 93.65305 40.92840 79.93791 107.47108 103.20444
## [43] 73.18250 89.81100 30.14187 129.97552 112.50487 100.75534 106.06547
## [50] 84.36343 94.35861 135.63574 112.45319 43.24874 119.62795 97.62474
## [57] 101.87654 92.59864 118.99986 95.48177 104.53435 94.67239 135.62649
## [64] 111.92510 105.24699 130.66386 104.02871 75.86026 106.29970 71.93410
## [71] 115.40122 118.09722 131.91368 79.19157 90.87393 94.64475 115.16338
## [78] 62.11195 109.36316 102.53820 82.95189 81.65881 90.48091 82.15296
## [85] 95.54536 99.01250 78.07552 104.66953 114.71328 121.84422 87.08747
## [92] 113.32417 116.35088 88.80059 115.72294 77.38587 67.69522 75.71313
## [99] 112.33593 100.85808
set.seed(42)
rnorm(100, mean = 100, sd = 20) # нормальное распределение
## [1] 127.41917 88.70604 107.26257 112.65725 108.08537 97.87751 130.23044
## [8] 98.10682 140.36847 98.74572 126.09739 145.73291 72.22279 94.42422
## [15] 97.33357 112.71901 94.31494 46.87089 51.19066 126.40227 93.86723
## [22] 64.37383 96.56165 124.29349 137.90387 91.39062 94.85461 64.73674
## [29] 109.20195 87.20010 109.10900 114.09675 120.70207 87.82147 110.09910
## [36] 65.65983 84.31082 82.98185 51.71585 100.72245 104.11997 92.77885
## [43] 115.16326 85.46590 72.63438 108.65636 83.77214 128.88203 91.37108
## [50] 113.11296 106.43851 84.32322 131.51455 112.85799 101.79521 105.53101
## [57] 113.58578 101.79666 40.13820 105.69766 92.65531 103.70461 111.63647
## [64] 127.99474 85.45416 126.05085 106.71696 120.77012 118.41457 114.41756
## [71] 79.13762 98.19627 112.47036 80.92953 89.14342 111.61993 115.36357
## [78] 109.27535 82.28447 78.00438 130.25414 105.15843 101.76880 97.58207
## [85] 76.11342 112.23994 95.65720 96.34487 118.66693 116.43546 127.84233
## [92] 90.47652 113.00697 127.82221 77.78422 82.78415 77.36523 70.81572
## [99] 101.59965 113.06409
rt(100, 2) # t распределение
## [1] 0.976805727 -0.614956963 -0.877490818 -0.653715997 -0.283422990
## [6] -0.595649463 3.531999994 -0.848157251 0.675056153 -0.252887137
## [11] -1.171047357 -0.003697156 0.068526158 0.116399461 -0.767546249
## [16] -3.732469744 -1.691909696 -0.720028566 -0.327071791 -0.790727424
## [21] -0.410377461 -1.538197029 -0.308727902 -2.196681983 -0.025286323
## [26] 0.208947337 -0.648746223 -0.361973365 6.796443136 -0.104949980
## [31] -0.204171679 -1.124510315 1.368928702 -1.441840623 -0.008118733
## [36] -0.655190490 0.519235653 -5.870148065 -0.443763546 0.567035589
## [41] -1.849471065 -1.064581860 0.224838449 0.205596417 -0.940063195
## [46] -2.267531513 0.162054435 0.046442354 -1.761910114 -1.159386849
## [51] -0.180340869 -0.620251776 0.903934366 1.035202150 1.303562258
## [56] 74.389123997 -0.066004392 0.287157452 0.663396305 -2.806536943
## [61] -1.011024271 -1.974133088 10.971719266 -2.722441935 -0.828281845
## [66] -1.755874761 -0.500150685 -0.884195972 -1.597893127 5.484252925
## [71] -0.547134173 -1.689012819 0.476368704 3.843411107 -2.306505461
## [76] -0.755530652 -0.215984361 -0.268679256 -1.710290169 -3.995852361
## [81] -0.274602624 -0.995021479 -1.005423032 -0.604481518 0.072146124
## [86] 0.913885828 0.756570792 -0.063641017 -1.325332425 -0.452672266
## [91] -1.633589970 -0.247228228 1.447299162 -1.499881051 -0.959638879
## [96] 0.361140012 -0.154984334 0.586969096 -0.664988927 1.131930242
rbinom(100, 75, 0.23) # биномиальное распределение
## [1] 19 16 15 12 19 14 18 13 16 15 20 16 17 17 18 16 24 24 22 12 17 16 19
## [24] 18 21 23 18 17 23 22 16 11 20 23 16 15 12 16 14 13 15 18 13 24 23 26
## [47] 19 13 9 19 19 22 16 16 18 16 13 16 14 19 9 22 22 20 23 18 19 16 12
## [70] 19 13 15 18 16 12 21 16 19 10 18 15 16 13 21 13 22 22 14 17 17 14 17
## [93] 14 15 17 14 22 17 18 20
Условия
x <- 7
if(x > 5){
TRUE
} else{
FALSE
}
## [1] TRUE
if(x == 5){
"equal"
} else if(x > 5){
"greater"
} else{
"less"
}
## [1] "greater"
i <- 2
while(i < 5){
print("wow")
i <- i + 1
}
## [1] "wow"
## [1] "wow"
## [1] "wow"
ifelse(x > 24, "greater", "less")
## [1] "less"