#1. Data exploration
# Last week, all of you noticed the highly skewed distribution of many of your
# predictor variables and decided to use a log transformation. This is still a good idea.
# Copy your code from last week, or else use the code below, to
# 1) log10+1 transform skewed predictor variables,
# 2) create a new column containing the squared values of woodebris,
# 3) log10+1 transform the response variable for visualization, and
# 4) bind all of these together with the untransformed predictor variables
# that were not skewed into a matrix called Z. Name the columns of this matrix
# and convert it into a data frame. Note that I have placed the response variable
# as the last column. This makes it easier to interpret any pairplots,
# since the response variable will appear on the y-axis, as we are used to seeing it.

# 1)
data <- read.table("Jamesdata.csv", header=TRUE, sep=",")
data$L.X11_30cm <- log10(data$X11_30cm+1)
data$L.X31_60cm <- log10(data$X31_60cm+1)
data$L.totalrock <- log10(data$totalrock+1)
data$L.leafdebris <- log10(data$leafdebris+1)
data$L.seedling <- log10(data$seedling+1)
data$L.sapling <- log10(data$sapling+1)
data$L.maturetrees <- log10(data$maturetrees+1)

# 2)
# prepare to try a quadratic regression with woodebris
data$woodebris2 <- (data$woodebris)^2
# 3)
# log transform the response variable for visualization only
data$L.lizards <- log10(data$lizards+1)

# 4)
# create a new data.frame containing these transformations

Z<-cbind(data$location,
      data$L.X11_30cm,
      data$L.X31_60cm,
      data$L.totalrock,
      data$woodebris,
      data$woodebris2,
      data$L.leafdebris,
      data$L.seedling,
      data$L.sapling,
      data$maturetrees,
      data$L.lizards,
      data$lizards)

dim(Z)
# set names for these new variables
colnames(Z)<-c("Location",

Generalized linear models allow for a number of specific error dist...