Арифметика

2+2
## [1] 4
57-43
## [1] 14
31*2
## [1] 62
27/3
## [1] 9
55**2
## [1] 3025
55^2
## [1] 3025
sum(93, 39, 291, 29)
## [1] 452
prod(2, 3, 4)
## [1] 24
28 %% 3
## [1] 1
choose(6,2) # С из n по k
## [1] 15

Переменные

var <- 57 # Alt и -
var = 57
var_2 <- 34
my.var <- 99
MyVar <- 33

var + 2 # var не изменился
## [1] 59
var <- var + 2 # var не изменился
rm(var_2) # удаляет переменную
ls() # сообщает список переменных
## [1] "a"      "db"     "df"     "fit"    "m"      "my.var" "MyVar"  "var"

типы данных

-54.9 # числа
## [1] -54.9
"фывфыафыа" # строка
## [1] "фывфыафыа"
32 + 5i # мнимое число
## [1] 32+5i
TRUE
## [1] TRUE
FALSE
## [1] FALSE
NA
## [1] NA

Векторы

5:20
##  [1]  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
c(46, 34, 28)
## [1] 46 34 28
my_vector <- c(46, 34, 28)
my_vector + 2
## [1] 48 36 30
my_vector ** 2
## [1] 2116 1156  784
my_vector_2 <- c("4628", "наша")
my_vector_2 <- c(4628, "наша")
my_vector_3 <- month.name
my_vector_3[5]
## [1] "May"
my_vector_3[5:7]
## [1] "May"  "June" "July"
my_vector_3[c(5, 7, 11)]
## [1] "May"      "July"     "November"
my_index <- c(5, 7, 11)
my_vector_3[my_index]
## [1] "May"      "July"     "November"
month.name[-5] #  Без 5 элемента
##  [1] "January"   "February"  "March"     "April"     "June"     
##  [6] "July"      "August"    "September" "October"   "November" 
## [11] "December"
month.name[-c(5, 9)] # Без 5 и 9 элементов
##  [1] "January"  "February" "March"    "April"    "June"     "July"    
##  [7] "August"   "October"  "November" "December"

Сравнения

2 > 8
## [1] FALSE
2 < 8
## [1] TRUE
c(2, 3) > 5
## [1] FALSE FALSE
c(2, 9) > c(5, 1)
## [1] FALSE  TRUE
c(2, 3) >= 5
## [1] FALSE FALSE
c(2, 3) <= 5
## [1] TRUE TRUE
c(2, 3) == 5
## [1] FALSE FALSE
c(2, 3) != 5
## [1] TRUE TRUE

Сравнение как индекс

b <- 1:14
b[b > 6]
## [1]  7  8  9 10 11 12 13 14
b[b %% 3 == 0]
## [1]  3  6  9 12

Датафреймы

df <- data.frame(
  abc = c("a", "b", "c"),
numbers = 3:1)

df <- data.frame(
  abc = c("a", "b", "c"),
  numbers = 3:1,
  stringsAsFactors = FALSE) # чтобы не было факторов

df$numbers
## [1] 3 2 1
df$abc
## [1] "a" "b" "c"
df[3, 2] # до запятой строки, после - столбец
## [1] 1
df[, 2] # второй столбец
## [1] 3 2 1
df[2, ] # вторая строка
##   abc numbers
## 2   b       2
View(mtcars)
mtcars[,5]
##  [1] 3.90 3.90 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 3.92 3.07 3.07 3.07
## [15] 2.93 3.00 3.23 4.08 4.93 4.22 3.70 2.76 3.15 3.73 3.08 4.08 4.43 3.77
## [29] 4.22 3.62 3.54 4.11

Считывание и запись файлов. Рабочая дериктория

df <- read.csv("https://raw.githubusercontent.com/agricolamz/2018_data_analysis_for_linguists/master/data/students/Lera-Z/hw1_agreement/hw1_1_zilo_class.csv")

df <- read.csv("/home/agricolamz/work/materials/2018_I_HSE_Data_Analysis_for_linguists/data/students/akv17/hw1_agreement/hw1_1_zilo_class.csv")

getwd()

setwd("/home/agricolamz/work/materials/2018_I_HSE_Data_Analysis_for_linguists/data/students/akv17/hw1_agreement")

df <- read.csv("hw1_2_verbs.csv")

Факторы

a <- c("папа", "мама")
a
## [1] "папа" "мама"
a_factor <-  factor(a)
nchar(a)
## [1] 4 4
# nchar(a_factor) не работает :(
length(month.name)
## [1] 12
nchar(month.name)
##  [1] 7 8 5 5 3 4 4 6 9 7 8 8
length(month.name[1])
## [1] 1
nchar(month.name[1])
## [1] 7

Пакеты

# установка пакета
install.packages("lingtypology")
# включение пакета
library("lingtypology")
map.feature(c("Adyghe", "Russian"))

install.packages("tidyverse")
library(tidyverse)

Пользовательские функции

my_sqrt <- function(i){
  i^2
}
my_sqrt(43)
## [1] 1849
my_sqrt(c(4, 23)) # векторизовано
## [1]  16 529
my_sqrt2 <- function(x, y){
  x^y
}
my_sqrt2(4, 12)
## [1] 16777216
my_sqrt3 <- function(x, y) x^y
my_sqrt3(4, 12)
## [1] 16777216
my_sqrt4 <- function(x, y = 12){
  x^y
}

my_sqrt4(4,12)
## [1] 16777216
my_sqrt4(4)
## [1] 16777216
my_sqrt4(4:7)
## [1]    16777216   244140625  2176782336 13841287201

Как порождать последовательности чисел

seq(from = 1, to = 4, by = 0.1)
##  [1] 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0 2.1 2.2 2.3 2.4 2.5 2.6
## [18] 2.7 2.8 2.9 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0
rep(5, 10)
##  [1] 5 5 5 5 5 5 5 5 5 5
rep(c(5, 9), 10)
##  [1] 5 9 5 9 5 9 5 9 5 9 5 9 5 9 5 9 5 9 5 9
rep(c(5, 9), each = 10)
##  [1] 5 5 5 5 5 5 5 5 5 5 9 9 9 9 9 9 9 9 9 9
# произвольные буквы с повторением
sample(letters, 3, replace = TRUE)
## [1] "u" "c" "q"
# как остановить рандомизатор
set.seed(42)
sample(1:190, 3, replace = TRUE)
## [1] 174 179  55

Распределения

rnorm(100, mean = 100, sd = 20)
##   [1] 119.11871 100.95769  77.90801 110.78048 111.60413  86.84994 131.09791
##   [8]  76.24717 103.03626  78.27735 132.26746 100.71262 126.29918 119.56335
##  [15] 117.63582 109.64409 119.31506  83.70858 105.67916  96.76603 138.71144
##  [22] 134.46462 107.16804 106.04862  92.11771 115.76281 113.41408  81.01638
##  [29] 100.72271 142.31511 103.34882  82.40327 118.93826  85.90897  78.43661
##  [36]  98.96674 111.61858  93.65305  40.92840  79.93791 107.47108 103.20444
##  [43]  73.18250  89.81100  30.14187 129.97552 112.50487 100.75534 106.06547
##  [50]  84.36343  94.35861 135.63574 112.45319  43.24874 119.62795  97.62474
##  [57] 101.87654  92.59864 118.99986  95.48177 104.53435  94.67239 135.62649
##  [64] 111.92510 105.24699 130.66386 104.02871  75.86026 106.29970  71.93410
##  [71] 115.40122 118.09722 131.91368  79.19157  90.87393  94.64475 115.16338
##  [78]  62.11195 109.36316 102.53820  82.95189  81.65881  90.48091  82.15296
##  [85]  95.54536  99.01250  78.07552 104.66953 114.71328 121.84422  87.08747
##  [92] 113.32417 116.35088  88.80059 115.72294  77.38587  67.69522  75.71313
##  [99] 112.33593 100.85808
set.seed(42)
rnorm(100, mean = 100, sd = 20) #  нормальное распределение
##   [1] 127.41917  88.70604 107.26257 112.65725 108.08537  97.87751 130.23044
##   [8]  98.10682 140.36847  98.74572 126.09739 145.73291  72.22279  94.42422
##  [15]  97.33357 112.71901  94.31494  46.87089  51.19066 126.40227  93.86723
##  [22]  64.37383  96.56165 124.29349 137.90387  91.39062  94.85461  64.73674
##  [29] 109.20195  87.20010 109.10900 114.09675 120.70207  87.82147 110.09910
##  [36]  65.65983  84.31082  82.98185  51.71585 100.72245 104.11997  92.77885
##  [43] 115.16326  85.46590  72.63438 108.65636  83.77214 128.88203  91.37108
##  [50] 113.11296 106.43851  84.32322 131.51455 112.85799 101.79521 105.53101
##  [57] 113.58578 101.79666  40.13820 105.69766  92.65531 103.70461 111.63647
##  [64] 127.99474  85.45416 126.05085 106.71696 120.77012 118.41457 114.41756
##  [71]  79.13762  98.19627 112.47036  80.92953  89.14342 111.61993 115.36357
##  [78] 109.27535  82.28447  78.00438 130.25414 105.15843 101.76880  97.58207
##  [85]  76.11342 112.23994  95.65720  96.34487 118.66693 116.43546 127.84233
##  [92]  90.47652 113.00697 127.82221  77.78422  82.78415  77.36523  70.81572
##  [99] 101.59965 113.06409
rt(100, 2) #  t распределение
##   [1]  0.976805727 -0.614956963 -0.877490818 -0.653715997 -0.283422990
##   [6] -0.595649463  3.531999994 -0.848157251  0.675056153 -0.252887137
##  [11] -1.171047357 -0.003697156  0.068526158  0.116399461 -0.767546249
##  [16] -3.732469744 -1.691909696 -0.720028566 -0.327071791 -0.790727424
##  [21] -0.410377461 -1.538197029 -0.308727902 -2.196681983 -0.025286323
##  [26]  0.208947337 -0.648746223 -0.361973365  6.796443136 -0.104949980
##  [31] -0.204171679 -1.124510315  1.368928702 -1.441840623 -0.008118733
##  [36] -0.655190490  0.519235653 -5.870148065 -0.443763546  0.567035589
##  [41] -1.849471065 -1.064581860  0.224838449  0.205596417 -0.940063195
##  [46] -2.267531513  0.162054435  0.046442354 -1.761910114 -1.159386849
##  [51] -0.180340869 -0.620251776  0.903934366  1.035202150  1.303562258
##  [56] 74.389123997 -0.066004392  0.287157452  0.663396305 -2.806536943
##  [61] -1.011024271 -1.974133088 10.971719266 -2.722441935 -0.828281845
##  [66] -1.755874761 -0.500150685 -0.884195972 -1.597893127  5.484252925
##  [71] -0.547134173 -1.689012819  0.476368704  3.843411107 -2.306505461
##  [76] -0.755530652 -0.215984361 -0.268679256 -1.710290169 -3.995852361
##  [81] -0.274602624 -0.995021479 -1.005423032 -0.604481518  0.072146124
##  [86]  0.913885828  0.756570792 -0.063641017 -1.325332425 -0.452672266
##  [91] -1.633589970 -0.247228228  1.447299162 -1.499881051 -0.959638879
##  [96]  0.361140012 -0.154984334  0.586969096 -0.664988927  1.131930242
rbinom(100, 75, 0.23) #  биномиальное распределение
##   [1] 19 16 15 12 19 14 18 13 16 15 20 16 17 17 18 16 24 24 22 12 17 16 19
##  [24] 18 21 23 18 17 23 22 16 11 20 23 16 15 12 16 14 13 15 18 13 24 23 26
##  [47] 19 13  9 19 19 22 16 16 18 16 13 16 14 19  9 22 22 20 23 18 19 16 12
##  [70] 19 13 15 18 16 12 21 16 19 10 18 15 16 13 21 13 22 22 14 17 17 14 17
##  [93] 14 15 17 14 22 17 18 20

Условия

x <- 7
if(x > 5){
  TRUE
} else{
  FALSE
} 
## [1] TRUE
if(x == 5){
  "equal"
} else if(x > 5){
  "greater"
} else{
  "less"
}
## [1] "greater"
i <- 2
while(i < 5){
  print("wow")
  i <- i + 1
}
## [1] "wow"
## [1] "wow"
## [1] "wow"
ifelse(x > 24, "greater", "less")
## [1] "less"