###################################################################### # Computer notes, Lecture 4 # Stat 371: Introductory applied statistics for the life sciences ###################################################################### # These notes provide R code related to the course lectures, to # further illustrate the topics in the lectures and to assist the # students to learn R. # # Lines beginning with the symbol '#' are comments in R. All other # lines contain code. # # You can view this file within R by typing: url.show("https://kbroman.org/teaching_old/stat371/comp04.R") ###################################################################### # Note the reference to "url.show" above, which you can use to download # and open the computer notes related to a lecture, directly within R. # For Homework 2, the data set for the first problem (2.79) is # available at the following: # https://kbroman.org/teaching_old/stat371/data_2-79.txt # You can load this into R with the following: x <- scan("https://kbroman.org/teaching_old/stat371/data_2-79.txt") # Type the name of the object (x) and you'll see the data. x # As an alternative, you can download the file to your computer and then # load it into R, again using the scan() function, using a reference to # the location and name of the file. # # It is easiest if you change your working directory (using the menu bar) # to the location of the file. You can then just type: # x <- scan("data_2-79.txt") # As an alternative, you can use the function read.table(). You'll want # to use the argument header=FALSE since the file doesn't have a label # for the data: x <- read.table("https://kbroman.org/teaching_old/stat371/data_2-79.txt", header=FALSE) # This is a bit harder to work with, since the data are then a "data frame". # That is, a rectangular 'matrix' with columns being variables and rows being # subjects. (Here there's just one column.) # Refer to columns and rows using [ , ] with rows being before the comma # and columns being after the comma. # The first five rows x[1:5,] # Save just the first column, to make things easier x <- x[,1] # Now you can calculate the mean, median, and SD and make a dotplot or histogram summary(x) median(x) mean(x) sd(x) hist(x, breaks=12) stripchart(x, method="jitter", pch=1) # For a bit more on data frames...note that R comes with a variety of data sets, # most in the form of data frames. # # Type data() to see the available data sets. data() # Type data(ChickWeight) to load the data set 'ChickWeight' into your workspace. data(ChickWeight) # Type ls() or objects() to see that the data are now in your workspace ls() objects() # summary of the four variables in the data summary(ChickWeight) # the names of the variables in the data names(ChickWeight) # number of rows nrow(ChickWeight) # number of columns ncol(ChickWeight) # the first five rows ChickWeight[1:5,] # the first column/variable ChickWeight[,1] # you can also refer to the columns by name, using a $ ChickWeight$weight # This way you can make a histogram of the first variable hist(ChickWeight$weight, breaks=50) ################## # End of comp04.R ##################