Report UVV samples

Loading the packages:

library(devtools)
install_bitbucket("chrisbcl/metabolomicsPackage")
library(metabolomicsUM)

Reading .csv files and creating the dataset:

setwd("~/Dropbox/fernanda")
source("metadata_uvv.R")
samplelist = read.csvs.folder("uvv_samples")

## [1] "Reading sample  uvv_samples/varfruglut.csv"
## [1] "Reading sample  uvv_samples/varfrutose.csv"
## [1] "Reading sample  uvv_samples/varglicose.csv"
## [1] "Reading sample  uvv_samples/vargliglut.csv"
## [1] "Reading sample  uvv_samples/varsacarose.csv"
## [1] "Reading sample  uvv_samples/varsacglut.csv"

get.metadata("uvv_samples", write.file = T, file.name = "metadata_uvv.csv")
metadata = read.metadata("metadata_uvv.csv")
ds = dataset.from.peaks(samplelist, type = "uvv-spectra", metadata = metadata)
ds$labels$x = "wavelength"
ds$labels$val = "absorbance"
sample.names = get.sample.names(ds)
sample.names = gsub("var","", sample.names)
sample.names = gsub("gli","glu",sample.names)
sample.names = gsub("sacarose","sucrose", sample.names)
sample.names = gsub("sac","suc", sample.names)
ds = set.sample.names(ds, sample.names)
sum.dataset(ds)

## Dataset summary:
## Valid dataset
## Description:   
## Type of data:  uvv-spectra 
## Number of samples:  6 
## Number of data points 601 
## Number of metadata variables:  2 
## Label of x-axis values:  wavelength 
## Label of data points:  absorbance 
## Number of missing values in data:  0 
## Mean of data values:  0.6119147 
## Median of data values:  0.3575127 
## Standard deviation:  0.668702 
## Range of values:  0.01892327 2.269357 
## Quantiles: 
##         0%        25%        50%        75%       100% 
## 0.01892327 0.13567692 0.35751266 0.82376963 2.26935750

Plotting the spectras:

plot.spectra(ds, "treatment", cex = 0.7)

Baseline correction and savitzky-golay smoothing interpolation method used. Also, savitzky-golay with the first derivative was calculated:

ds.bl = baseline.correction(ds, method = "peakDetection")
plot.spectra(ds.bl, "treatment", cex = 0.7)

ds.sg = smoothing.interpolation(ds.bl, method = "savitzky.golay", window = 15, 
                                p.order = 3, deriv = 0)
plot.spectra(ds.sg, "treatment", cex = 0.7)

ds.sg.fd = smoothing.interpolation(ds.bl, method = "savitzky.golay", window = 15, 
                                p.order = 3, deriv = 1)
plot.spectra(ds.sg.fd, "treatment", cex = 0.7)

Univariate analysis. Fold change and t-tests were calculated:

#fold change
fc.sg = fold.change(ds.sg, "glutamine", "sem.glutamina")
fc.sg[1:10,]

##     FoldChange  log2(FC)
## 200  0.1551163 -2.688578
## 580  0.2270444 -2.138953
## 581  0.2333400 -2.099494
## 579  0.2770276 -1.851898
## 582  0.2988575 -1.742470
## 578  0.3652327 -1.453112
## 213  2.6936800  1.429578
## 212  2.5490463  1.349958
## 583  0.4065358 -1.298546
## 214  2.4559054  1.296255

plot.fold.change(ds.sg, fc.sg, fc.threshold = 2)

#t-tests on savtizky-golay first derivative
ttest.sg.fd = tTests.dataset(ds.sg.fd, "glutamine")
ttest.sg.fd[1:10,]

##          p.value   -log10       fdr
## 226 0.0007289023 3.137331 0.2503438
## 276 0.0008330908 3.079308 0.2503438
## 265 0.0043102251 2.365500 0.5793288
## 397 0.0079699547 2.098544 0.5793288
## 478 0.0110348125 1.957235 0.5793288
## 477 0.0119935165 1.921053 0.5793288
## 479 0.0123632469 1.907867 0.5793288
## 396 0.0138499452 1.858552 0.5793288
## 476 0.0144049759 1.841487 0.5793288
## 475 0.0148465922 1.828373 0.5793288

plot.ttests(ds.sg.fd, ttest.sg.fd, tt.threshold = 0.05)

PCA Analysis:

#pca on savitzky-golay
pca.sg = pca.analysis.dataset(ds.sg)
summary(pca.sg)

## Importance of components:
##                            PC1     PC2    PC3     PC4     PC5       PC6
## Standard deviation     18.4095 12.0955 8.4188 6.11162 2.74966 1.272e-14
## Proportion of Variance  0.5639  0.2434 0.1179 0.06215 0.01258 0.000e+00
## Cumulative Proportion   0.5639  0.8073 0.9253 0.98742 1.00000 1.000e+00

pca.scoresplot2D(ds.sg, pca.sg, "glutamine", ellipses = T, labels = T, 
                 leg.pos = "none")

#pca on savitzky-golay first derivative
pca.sg.fd = pca.analysis.dataset(ds.sg.fd)
summary(pca.sg.fd)

## Importance of components:
##                            PC1     PC2    PC3    PC4     PC5       PC6
## Standard deviation     16.2544 11.8297 9.8786 7.7734 6.23223 9.941e-15
## Proportion of Variance  0.4396  0.2329 0.1624 0.1005 0.06463 0.000e+00
## Cumulative Proportion   0.4396  0.6725 0.8348 0.9354 1.00000 1.000e+00

pca.scoresplot2D(ds.sg.fd, pca.sg.fd, "glutamine", ellipses = T, labels = T, 
                 leg.pos = "none")

Clustering analysis with hierarchical clustering and kmeans:

#hc on savtizky-golay
hc.ds = clustering(ds.sg, method = "hc")
dendrogram.plot.col(ds.sg, hc.ds, "glutamine", leg.pos = "none")

#hc on savitzky-golay first derivative
hc.sg.fd = clustering(ds.sg.fd, method = "hc")
dendrogram.plot.col(ds.sg.fd, hc.sg.fd, "glutamine", leg.pos = "none")

#kmeans on savitzky-golay
kmeans.ds = clustering(ds.sg, method = "kmeans", num.clusters = 2)
kmeans.plot(ds.sg, kmeans.ds)

kmeans.result.df(kmeans.ds, 2)

##   cluster                 samples
## 1       1 fruglut frutose glucose
## 2       2 gluglut sucrose sucglut

#kmeans on savitzky-golay first derivative
kmeans.sg.fd = clustering(ds.sg.fd, method = "kmeans", num.clusters = 2)
kmeans.plot(ds.sg.fd, kmeans.sg.fd)

kmeans.result.df(kmeans.sg.fd, 2)

##   cluster                         samples
## 1       1                 frutose glucose
## 2       2 fruglut gluglut sucrose sucglut