#############################################
# Effects of Warming on Tundra Plant Species
# Date: March 15, 2017
# Author: Annie Fang
# Desc: GEOB 307 LAB UBC
#############################################
#############################################
# STEP 1: Download and Load the Data into R
#############################################
# Personal File Location
# /Users/anniefang/Desktop/GEOB_307/Lab/Assignment_2/R/assignment2.R
# Data name
# data12
file.choose(data12)
# Data Location
# /Users/anniefang/Desktop/GEOB_307/Lab/Assignment_2/R/Data/data12.csv
# Load Data into R, comma separation
data12 gives you the first 6 rows of the dataframe
# tail() -> gives you the last 6 rows of the dataframe
# str()-> gives you the data type in each column of the dataframe
# You can also view the entire table using the functionView().
# Here you should see the x variable “Treatment”, where OTC (warming)
# is the experimental treatment, and then control. LvNewDOY, LvSenDOY,
# FlMatDOY and FlCapDOY are unadjusted phenological variables.
head(data12)
tail(data12)
str(data12)
functionView(data12)
# Check to make sure the x variable “Treatment” is of the type factor (you can see this using str()).
# If it is, you’re good to go. If it is not, recast it as a factor, using the function as.factor()
# note - it was already a factor, but i wanted to do this part anyways.
str(data12$Treatment)
# Factor w/ 2 levels "Control","OTC": 1 1 1 2 2 2 1 1 1 2 ...
# recast variable as a factor
as.factor(data12$Treatment)
# place factor variable back into the dataframe
data12$Treatment changes the line colour.
# You can specify this with words, e.g. “red”
# lty -> changes line type. Y ou can specify this with a number, e.g. 2
# lwd -> changes line width. Y ou can specify this with a number, e.g. 2
hist(LvNew,
breaks = 15,
main = "First Leaf: Dryas integrefolia",
xlab = "Days After Snowmelt")
abline(v = mean(data121$LvNew),
col = "red",
lty = 2,
lwd = 2)
# 4) Split the data into 2 groups to examine it separately for each treatment.
# Do this using the following commands
# control this should just be the labels for control and OTC
# Means this should be the means calculated for each group
# SE this should be the errors cacluclated for each group
# To calculate the mean, use a the built in function:
# mean()
# To calculate the standard error, create a user defined function (we did this last week too),
# using the knowledge that
# Standard Error = Standard Deviation / (Number of Observations)^0.5
# Hint: Standard Deviation can be calculated using the built in function sd()
# Once the vectors are defined, combine them into a dataframe, and then print the dataframe.
###########################
##### LvNew Variables #####
###########################
mean(control)
# LvNew control mean = 7.909091
sd(control)/34^0.5
# LvNew control standard error = 0.5055838
mean(OTC)
# LvNew OTC mean = 7.695652
sd(OTC)/34^0.5
# LvNew OTC standard error = 0.6235889
# Create LvNew Dataframe
Treatment changes the colour. You can specify this with words, e.g. “lightgrey”
# pch -> changes point type. Y ou can specify this with a number, e.g. 20
# Boxplot of LvNew
boxplot(LvNew~Treatment,
data=data121,
main="First New Leaf",
ylab="Days After Snowmelt",
col="lightgrey",
pch=20)
# Boxplot of LvSen
boxplot(LvSen~Treatment,
data=data122,
main="First New Senescing Leaf",
ylab="Days After Snowmelt",
col="lightgrey",
pch=20)
# Boxplot of FlMat
boxplot(FlMat~Treatment,
data=data123,
main="First Mature Flower",
ylab="Days After Snowmelt",
col="lightgrey",
pch=20)
#############################################
# STEP 5: Fit a Model
#############################################
# 1) First try fitting a model without variable transformation. Do this using the following command:
# mod1 plots the residuals
# qqline() -> plots a line for comparison
# b. A histogram of the residuals. As before, use the following function, applied on the
# residuals this time, not the data.
# hist()
# Isolate Residuals for LvNew - Dataframe: data121
resid(modnew)
# Isolate Residuals for LvSen - Dataframe: data122
resid(modsen)
# Isolate Residuals for FlMat - Dataframe: data123
resid(modmat)
# Plot the Quantile-Quantile Plot for LvNew - Dataframe: data121
qqnorm(resid(modnew))
# Plot the Quantile-Quantile Plot for LvSen - Dataframe: data122
qqnorm(resid(modsen))
# Plot the Quantile-Quantile Plot for FlMat - Dataframe: data123
qqnorm(resid(modmat))
# Histogram of the Residuals for LvNew - Dataframe: data121
hist(resid(modnew),
main = "Histogram of Residuals",
xlab = "Residuals")
# Histogram of the Residuals for LvSen - Dataframe: data122
hist(resid(modsen),
main = "Histogram of Residuals",
xlab = "Residuals")
# Histogram of the Residuals for FlMat - Dataframe: data123
hist(resid(modmat),
main = "Histogram of Residuals",
xlab = "Residuals")
# 3) IF THE YOU HAVE NON-NORMALITY, a way to deal with it is to apply transformations to your data,
# specifically to your response variable. Then re-fit the model, and check assumptions again
# (i.e. steps number 1 and 2 of this section). Some classic transformations include the following functions
# log() -> Natural logarithm transform
# log10()) -> log10 logarithm transform
# sqrt()-> Square root transform
# exp()-> Power transform (inverse of log transform)
# A simple was of doing this is the following example:
# mod2 F)
# Treatment 1 178.96 178.959 16.712 0.0001772 ***
# Residuals 45 481.89 10.709
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# (If you used the Wilcoxon test of differences, the anova function won’t work.
# Just print out the results using the name you assigned to the model, so in this case mod3.
# This will give you a p-value.)
# Overall variance is much smaller than treatment variance (F = 16.71, p < 0.001).
# This means that the difference within groups is small compared to the difference between groups.
# Therefore, we can reject H0 (that there are no differences) and accept Ha (that there are large
# differences between the groups).
# We also want to consider our means. In our control plots, how many days after snowmelt did the
# first leaf appear? How about in the OTC plots? Is this difference in means significant?
# In this example, it is (p < 0.05). # ANOVA Table for LvNew anova(modnewtrans) #Analysis of Variance Table # Response: log(LvNew) # Df Sum Sq Mean Sq F value Pr(>F)
# Treatment 1 0.0052 0.00522 0.0208 0.8863
# Residuals 32 8.0478 0.25149
# Analysis of Variance Table
# Response: log(LvSen)
# Df Sum Sq Mean Sq F value Pr(>F)
# Treatment 1 0.00701 0.0070066 0.4504 0.5073
# Residuals 30 0.46667 0.0155557
# Analysis of Variance Table
# Response: log(LvSen)
# Df Sum Sq Mean Sq F value Pr(>F)
# Treatment 1 0.02012 0.020116 1.1534 0.2935
# Residuals 24 0.41858 0.017441
# ANOVA Table for LvSen
anova(modsentrans)
# ANOVA Table for FlMat
anova(modmattrans)
#############################################
################ END SCRIPT #################
#############################################