Loading the packages:
library(devtools)
install_bitbucket("chrisbcl/metabolomicsPackage")
library(metabolomicsUM)
Reading .csv files and creating the dataset:
setwd("~/Dropbox/fernanda")
source("metadata_uvv.R")
samplelist = read.csvs.folder("uvv_samples")
## [1] "Reading sample uvv_samples/varfruglut.csv"
## [1] "Reading sample uvv_samples/varfrutose.csv"
## [1] "Reading sample uvv_samples/varglicose.csv"
## [1] "Reading sample uvv_samples/vargliglut.csv"
## [1] "Reading sample uvv_samples/varsacarose.csv"
## [1] "Reading sample uvv_samples/varsacglut.csv"
get.metadata("uvv_samples", write.file = T, file.name = "metadata_uvv.csv")
metadata = read.metadata("metadata_uvv.csv")
ds = dataset.from.peaks(samplelist, type = "uvv-spectra", metadata = metadata)
ds$labels$x = "wavelength"
ds$labels$val = "absorbance"
sample.names = get.sample.names(ds)
sample.names = gsub("var","", sample.names)
sample.names = gsub("gli","glu",sample.names)
sample.names = gsub("sacarose","sucrose", sample.names)
sample.names = gsub("sac","suc", sample.names)
ds = set.sample.names(ds, sample.names)
sum.dataset(ds)
## Dataset summary:
## Valid dataset
## Description:
## Type of data: uvv-spectra
## Number of samples: 6
## Number of data points 601
## Number of metadata variables: 2
## Label of x-axis values: wavelength
## Label of data points: absorbance
## Number of missing values in data: 0
## Mean of data values: 0.6119147
## Median of data values: 0.3575127
## Standard deviation: 0.668702
## Range of values: 0.01892327 2.269357
## Quantiles:
## 0% 25% 50% 75% 100%
## 0.01892327 0.13567692 0.35751266 0.82376963 2.26935750
Plotting the spectras:
plot.spectra(ds, "treatment", cex = 0.7)
Baseline correction and savitzky-golay smoothing interpolation method used. Also, savitzky-golay with the first derivative was calculated:
ds.bl = baseline.correction(ds, method = "peakDetection")
plot.spectra(ds.bl, "treatment", cex = 0.7)
ds.sg = smoothing.interpolation(ds.bl, method = "savitzky.golay", window = 15,
p.order = 3, deriv = 0)
plot.spectra(ds.sg, "treatment", cex = 0.7)
ds.sg.fd = smoothing.interpolation(ds.bl, method = "savitzky.golay", window = 15,
p.order = 3, deriv = 1)
plot.spectra(ds.sg.fd, "treatment", cex = 0.7)
Univariate analysis. Fold change and t-tests were calculated:
#fold change
fc.sg = fold.change(ds.sg, "glutamine", "sem.glutamina")
fc.sg[1:10,]
## FoldChange log2(FC)
## 200 0.1551163 -2.688578
## 580 0.2270444 -2.138953
## 581 0.2333400 -2.099494
## 579 0.2770276 -1.851898
## 582 0.2988575 -1.742470
## 578 0.3652327 -1.453112
## 213 2.6936800 1.429578
## 212 2.5490463 1.349958
## 583 0.4065358 -1.298546
## 214 2.4559054 1.296255
plot.fold.change(ds.sg, fc.sg, fc.threshold = 2)
#t-tests on savtizky-golay first derivative
ttest.sg.fd = tTests.dataset(ds.sg.fd, "glutamine")
ttest.sg.fd[1:10,]
## p.value -log10 fdr
## 226 0.0007289023 3.137331 0.2503438
## 276 0.0008330908 3.079308 0.2503438
## 265 0.0043102251 2.365500 0.5793288
## 397 0.0079699547 2.098544 0.5793288
## 478 0.0110348125 1.957235 0.5793288
## 477 0.0119935165 1.921053 0.5793288
## 479 0.0123632469 1.907867 0.5793288
## 396 0.0138499452 1.858552 0.5793288
## 476 0.0144049759 1.841487 0.5793288
## 475 0.0148465922 1.828373 0.5793288
plot.ttests(ds.sg.fd, ttest.sg.fd, tt.threshold = 0.05)
PCA Analysis:
#pca on savitzky-golay
pca.sg = pca.analysis.dataset(ds.sg)
summary(pca.sg)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 18.4095 12.0955 8.4188 6.11162 2.74966 1.272e-14
## Proportion of Variance 0.5639 0.2434 0.1179 0.06215 0.01258 0.000e+00
## Cumulative Proportion 0.5639 0.8073 0.9253 0.98742 1.00000 1.000e+00
pca.scoresplot2D(ds.sg, pca.sg, "glutamine", ellipses = T, labels = T,
leg.pos = "none")
#pca on savitzky-golay first derivative
pca.sg.fd = pca.analysis.dataset(ds.sg.fd)
summary(pca.sg.fd)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 16.2544 11.8297 9.8786 7.7734 6.23223 9.941e-15
## Proportion of Variance 0.4396 0.2329 0.1624 0.1005 0.06463 0.000e+00
## Cumulative Proportion 0.4396 0.6725 0.8348 0.9354 1.00000 1.000e+00
pca.scoresplot2D(ds.sg.fd, pca.sg.fd, "glutamine", ellipses = T, labels = T,
leg.pos = "none")
Clustering analysis with hierarchical clustering and kmeans:
#hc on savtizky-golay
hc.ds = clustering(ds.sg, method = "hc")
dendrogram.plot.col(ds.sg, hc.ds, "glutamine", leg.pos = "none")
#hc on savitzky-golay first derivative
hc.sg.fd = clustering(ds.sg.fd, method = "hc")
dendrogram.plot.col(ds.sg.fd, hc.sg.fd, "glutamine", leg.pos = "none")
#kmeans on savitzky-golay
kmeans.ds = clustering(ds.sg, method = "kmeans", num.clusters = 2)
kmeans.plot(ds.sg, kmeans.ds)
kmeans.result.df(kmeans.ds, 2)
## cluster samples
## 1 1 fruglut frutose glucose
## 2 2 gluglut sucrose sucglut
#kmeans on savitzky-golay first derivative
kmeans.sg.fd = clustering(ds.sg.fd, method = "kmeans", num.clusters = 2)
kmeans.plot(ds.sg.fd, kmeans.sg.fd)
kmeans.result.df(kmeans.sg.fd, 2)
## cluster samples
## 1 1 frutose glucose
## 2 2 fruglut gluglut sucrose sucglut