######################################################################
# Computer notes             Statistics for Laboratory Scientists (2)
# Lecture 6                                  Johns Hopkins University 
######################################################################
# These notes provide R code related to the course lectures, to
# further illustrate the topics in the lectures and to assist the
# students to learn R.
#
# Lines beginning with the symbol '#' are comments in R.  All other
# lines contain code.
#
# In R for Windows, you may wish to open this file from the menu bar
# (File:Display file); you can then easily copy commands into the
# command window.  (Use the mouse to highlight one or more lines; then
# right-click and select "Paste to console".)
######################################################################

######################################################################
# Example data
######################################################################
x <- c(672, 747, 749, 792, 875, 888, 930, 962, 994,1295)
y <- c(290, 359, 384, 466, 510, 516, 522, 532, 595, 706)

# plot the data
par(las=1) # make the Y-axis labels vertical rather than horizontal
stripchart(list(x,y), method="jitter",
           group.names=c("X","Y"), pch=1) # pch=1 makes the points circles
# add horizontal lines
abline(h=1:2,lty=2)
# add line segments at the sample means
mx <- mean(x); my <- mean(y)
segments(c(mx,my), 1:2-0.1, c(mx,my), 1:2+0.1, lwd=2, col="blue")

# perform test of whether the population variances are equal
var.test(x,y)

# get 95% confidence interval for the ratio of population SDs
sqrt( var.test(x,y)$conf.int )

######################################################################
# Brute force method
######################################################################
# the ratio of the sample variances
sampvar.ratio <- var(x)/var(y)

# the 2.5th and 97.5th percentiles of the F distribution
quant <- qf(c(0.025, 0.975), length(x)-1, length(y)-1)

# the 95% CI interval for the ratio of the population variances
sampvar.ratio * rev(1/quant) # reverse() reverses the order of a vector

# the 95% CI for the ratio of the population SDs
sqrt(sampvar.ratio * rev(1/quant))

# P-value for the test of whether the two population variances are equal
#     Note: this works only when sampvar.ratio > 1
2*(1 - pf(sampvar.ratio, length(x)-1, length(y)-1))

# If we had focussed on var(y) / var(x) rather than var(x) / var(y),
# we would do the following:
sampvar.ratio <- var(y) / var(x)
quant <- qf(c(0.025, 0.975), length(y)-1, length(x)-1)
sqrt(sampvar.ratio * rev(1/quant)) # 95% for ratio of pop'n SDs
2*pf(sampvar.ratio, length(y)-1, length(x)-1) # use this since sampvar.ratio < 1


######################################################################
# Example data
######################################################################
coag <- c(62, 60, 63, 59,
          63, 67, 71, 64, 65, 66,
          68, 66, 71, 67, 68, 68,
          56, 62, 60, 61, 63, 64, 63, 59)
ttt <-  c("A","A","A","A",
          "B","B","B","B","B","B",
          "C","C","C","C","C","C",
          "D","D","D","D","D","D","D","D")

# make the treatment "ttt" a factor
ttt <- factor(ttt, levels=c("A","B","C","D"))

# plot the data
par(las=1) # makes Y-axis labels horizontal
stripchart(coag ~ ttt, pch=1, method="jitter") # pch=1 makes the points circles

# the within-group means
tapply(coag, ttt, mean)
# note: tapply splits the first vector into groups according to the values
#       in the second vector, and then "applies" the function, mean(), to
#       each group.

# number of data points per group
tapply(coag, ttt, length)

# the ANOVA table in R
summary( aov(coag ~ ttt) ) 


##################
# End of comp06.R
##################