Report Subset2 UVV

Loading the packages:

library(devtools)
install_bitbucket("chrisbcl/metabolomicsPackage")
library(metabolomicsUM)

Reading .csv files and creating the dataset:

setwd("~/Dropbox/fernanda")
source("metadata_uvv.R")
samplelist = read.csvs.folder("uvv_samples")

## [1] "Reading sample  uvv_samples/varfruglut.csv"
## [1] "Reading sample  uvv_samples/varfrutose.csv"
## [1] "Reading sample  uvv_samples/varglicose.csv"
## [1] "Reading sample  uvv_samples/vargliglut.csv"
## [1] "Reading sample  uvv_samples/varsacarose.csv"
## [1] "Reading sample  uvv_samples/varsacglut.csv"

get.metadata("uvv_samples", write.file = T, file.name = "metadata_uvv.csv")
metadata = read.metadata("metadata_uvv.csv")
ds = dataset.from.peaks(samplelist, type = "uvv-spectra", metadata = metadata)
ds$labels$x = "wavelength"
ds$labels$val = "absorbance"
sample.names = get.sample.names(ds)
sample.names = gsub("var","", sample.names)
sample.names = gsub("gli","glu",sample.names)
sample.names = gsub("sacarose","sucrose", sample.names)
sample.names = gsub("sac","suc", sample.names)
ds = set.sample.names(ds, sample.names)
sum.dataset(ds)

## Dataset summary:
## Valid dataset
## Description:   
## Type of data:  uvv-spectra 
## Number of samples:  6 
## Number of data points 601 
## Number of metadata variables:  2 
## Label of x-axis values:  wavelength 
## Label of data points:  absorbance 
## Number of missing values in data:  0 
## Mean of data values:  0.6119147 
## Median of data values:  0.3575127 
## Standard deviation:  0.668702 
## Range of values:  0.01892327 2.269357 
## Quantiles: 
##         0%        25%        50%        75%       100% 
## 0.01892327 0.13567692 0.35751266 0.82376963 2.26935750

sub2.ds = subset.x.values.by.interval(ds, 630, 700)

Plotting the spectras:

plot.spectra(sub2.ds, "treatment", cex = 0.7)

Baseline correction and savitzky-golay smoothing interpolation method used. Also, savitzky-golay with the first derivative was calculated:

sub2.bl = baseline.correction(sub2.ds, method = "als")
plot.spectra(sub2.bl, "treatment", cex = 0.7)

sub2.bl.sg = smoothing.interpolation(sub2.bl, method = "savitzky.golay", 
                                     window = 15, p.order = 3, deriv = 0)
plot.spectra(sub2.bl.sg, "treatment", cex = 0.7)

sub2.bl.sg.fd = smoothing.interpolation(sub2.bl, method = "savitzky.golay", 
                                     window = 15, p.order = 3, deriv = 1)
plot.spectra(sub2.bl.sg.fd, "treatment", cex = 0.7)

Univariate analysis. Fold change and t-tests were calculated:

#t-tests on bl
ttest.bl = tTests.dataset(sub2.bl, "glutamine")
ttest.bl[1:10,]

##        p.value    -log10       fdr
## 641 0.02622417 1.5812983 0.2215756
## 640 0.03312706 1.4798171 0.2215756
## 642 0.03948659 1.4035504 0.2215756
## 643 0.05959207 1.2248115 0.2215756
## 644 0.07201278 1.1425904 0.2215756
## 630 0.07619227 1.1180891 0.2215756
## 645 0.08999693 1.0457723 0.2215756
## 631 0.09754883 1.0107779 0.2215756
## 646 0.09936015 1.0027878 0.2215756
## 689 0.11223691 0.9498643 0.2215756

plot.ttests(sub2.bl, ttest.bl, tt.threshold = 0.05)

#t-tests on bl savitzky-golay first derivative
ttest.bl.sg.fd = tTests.dataset(sub2.bl.sg.fd, "glutamine")
ttest.bl.sg.fd[1:10,]

##        p.value   -log10       fdr
## 630 0.02559029 1.591925 0.2538091
## 631 0.03207617 1.493818 0.2538091
## 632 0.03704975 1.431215 0.2538091
## 633 0.03978068 1.400328 0.2538091
## 634 0.04160279 1.380878 0.2538091
## 635 0.04295227 1.367014 0.2538091
## 636 0.04780146 1.320559 0.2538091
## 637 0.05283288 1.277096 0.2538091
## 638 0.06033417 1.219437 0.2538091
## 639 0.07692052 1.113958 0.2538091

plot.ttests(sub2.bl.sg.fd, ttest.bl.sg.fd, tt.threshold = 0.05)

PCA Analysis:

#pca on bl
pca.bl = pca.analysis.dataset(sub2.bl)
summary(pca.bl)

## Importance of components:
##                           PC1     PC2    PC3     PC4     PC5       PC6
## Standard deviation     8.0437 1.77456 1.7269 0.36838 0.17769 2.516e-15
## Proportion of Variance 0.9113 0.04435 0.0420 0.00191 0.00044 0.000e+00
## Cumulative Proportion  0.9113 0.95564 0.9976 0.99956 1.00000 1.000e+00

pca.scoresplot2D(sub2.bl, pca.bl, "glutamine", ellipses = T, labels = T, 
                 leg.pos = "none")

#pca on bl savtizky-golay first derivative
pca.bl.sg.fd = pca.analysis.dataset(sub2.bl.sg.fd)
summary(pca.bl.sg.fd)

## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5       PC6
## Standard deviation     7.7093 2.8521 1.53690 0.95090 0.40764 2.395e-15
## Proportion of Variance 0.8371 0.1146 0.03327 0.01274 0.00234 0.000e+00
## Cumulative Proportion  0.8371 0.9517 0.98492 0.99766 1.00000 1.000e+00

pca.scoresplot2D(sub2.bl.sg.fd, pca.bl.sg.fd, "glutamine", ellipses = T, 
                 labels = T, leg.pos = "none")

Clustering analysis with hierarchical clustering and kmeans:

#hc on bl
hc.bl = clustering(sub2.bl, method = "hc")
dendrogram.plot.col(sub2.bl, hc.bl, "glutamine", leg.pos = "none")

#hc on savitzky-golay first derivative
hc.bl.sg.fd = clustering(sub2.bl.sg.fd, method = "hc")
dendrogram.plot.col(sub2.bl.sg.fd, hc.bl.sg.fd, "glutamine", leg.pos = "none")

#kmeans on bl
kmeans.bl = clustering(sub2.bl, method = "kmeans", num.clusters = 2)
kmeans.plot(sub2.bl, kmeans.bl)

kmeans.result.df(kmeans.bl, 2)

##   cluster                         samples
## 1       1                 frutose glucose
## 2       2 fruglut gluglut sucrose sucglut

#kmeans on bl savtizky-golay first derivative
kmeans.bl.sg.fd = clustering(sub2.bl.sg.fd, method = "kmeans", num.clusters = 2)
kmeans.plot(sub2.bl.sg.fd, kmeans.bl.sg.fd)

kmeans.result.df(kmeans.bl.sg.fd, 2)

##   cluster                         samples
## 1       1 fruglut gluglut sucrose sucglut
## 2       2                 frutose glucose