### ### Basic R commands ### # assignment, objects x <- 3 # or x = 3 ls() x <- c(1,2,3,4,5,6,7,8,9,10) (x <- c(1,2,3,4,5,6,7,8,9,10)) (x <- 1:10) x[3] <- 4 x[3] <- "four" x (y <- as.list(1:10)) y[3] y[[3]] y[[3]] <- "four" y # --- strings/characters s <- "abcdefghijklmnopqrstuvwxyz" require(stringr) # install.packages("stringr") ?stringr s[1:4] # what you expected? str_sub(s, 1, 4) # --- matrix (xx <- matrix(1:10,nrow=5,ncol=2)) ?matrix (xx <- matrix(1:10,nrow=5,ncol=2,byrow=TRUE)) dim(xx) rowSums(xx) colSums(xx) transpose(xx) t(xx) 2*xx + 3 xx * xx xx^2 (xtx <- t(xx)%*%xx) (ev <- eigen(xtx)) names(ev) ev$values ev[[1]] (udv <- svd(xx)) names(udv) udv$d^2 udv$v ev$vectors # --- data frame x <- 1:10 w <- rnorm(10) y <- c(1,4,2,5,4,NA,7,3,2,10) mean(x) mean(y) mean(y, na.rm=TRUE) group <- c("A","A","A","B","A","A","B","B","B","B") (data <- data.frame(x,w,y,group)) str(data) typeof(data) colnames(data) rownames(data) colMeans(data) lapply(data, mean) my_mean <- function(x) { return( mean(x,na.rm=TRUE) ) } lapply(data, my_mean) # --- input/output getwd() setwd("~/courses/mich/text_analytics/data/") # see file.path data <- read.csv("little_file.csv") dim(data) data data$newVar <- rnorm(4) write.csv(data, "temp.csv") system("cat temp.csv") readLines("temp.csv") # embedded quotes # --- regular expression challenge # dealing with newlines txt <- "\nNow this is the first line. \nAnd this is the second.\n Third and last\n" txt <- str_replace(txt,'^\n','') txt <- str_replace(txt,'\n$','.') # missing last . txt <- str_replace_all(txt,'\n',' ')