This vignette contains code snippets to display results of the NestLink project (p1875). Briefly, it analyzes the amno acid frequencies of the in-silico composed and measured flycodes.
NestLink 1.6.0
library(NestLink)
ESP_Prediction was generated using an application https://genepattern.broadinstitute.org (???).
library(ggplot2)
ESP <- rbind(getFC(), getNB())
## snapshotDate(): 2020-10-02
## see ?NestLink and browseVignettes('NestLink') for documentation
## loading from cache
## snapshotDate(): 2020-10-02
## see ?NestLink and browseVignettes('NestLink') for documentation
## loading from cache
p <- ggplot(ESP, aes(x = ESP_Prediction, fill = cond)) +
geom_histogram(bins = 50, alpha = 0.4, position="identity") +
labs(x = "detectability in LC-MS (ESP prediction)") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
print(p)
ESP <- rbind(getFC(), NB.unambiguous(getNB()))
## snapshotDate(): 2020-10-02
## see ?NestLink and browseVignettes('NestLink') for documentation
## loading from cache
## snapshotDate(): 2020-10-02
## see ?NestLink and browseVignettes('NestLink') for documentation
## loading from cache
p <- ggplot(ESP, aes(x = ESP_Prediction, fill = cond)) +
geom_histogram(bins = 50, alpha = 0.4, position="identity") +
labs(x = "detectability in LC-MS (ESP prediction)") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
print(p)
ESP <- rbind(getFC(), NB.unique(getNB()))
## snapshotDate(): 2020-10-02
## see ?NestLink and browseVignettes('NestLink') for documentation
## loading from cache
## snapshotDate(): 2020-10-02
## see ?NestLink and browseVignettes('NestLink') for documentation
## loading from cache
p <- ggplot(ESP, aes(x = ESP_Prediction, fill = cond)) +
geom_histogram(bins = 50, alpha = 0.4, position="identity") +
labs(x = "detectability in LC-MS (ESP prediction)") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
print(p)
ESP <- rbind(getNB(), NB.unambiguous(getNB()), NB.unique(getNB()))
## snapshotDate(): 2020-10-02
## see ?NestLink and browseVignettes('NestLink') for documentation
## loading from cache
## snapshotDate(): 2020-10-02
## see ?NestLink and browseVignettes('NestLink') for documentation
## loading from cache
## snapshotDate(): 2020-10-02
## see ?NestLink and browseVignettes('NestLink') for documentation
## loading from cache
p <- ggplot(ESP, aes(x = ESP_Prediction, fill = cond)) +
geom_histogram(bins = 50, alpha = 0.4, position="identity") +
labs(x = "detectability in LC-MS (ESP prediction)") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
print(p)
table(ESP$cond)
##
## NB NB.unambiguous NB.unique
## 27681 5832 7705
# library(ExperimentHub)
# eh <- ExperimentHub();
# load(query(eh, c("NestLink", "WU160118.RData"))[[1]])
load(getExperimentHubFilename("WU160118.RData"))
WU <- WU160118
filtering
PATTERN <- "^GS[ASTNQDEFVLYWGP]{7}(WR|WLTVR|WQEGGR|WLR|WQSR)$"
idx <- grepl(PATTERN, WU$pep_seq)
WU <- WU[idx & WU$pep_score > 25,]
determine charge state frequency through counting
WU$chargeInt <- as.integer(substr(WU$query_charge, 0, 1))
table(WU$chargeInt)
##
## 2 3 4
## 33392 157 1
in percent
round(100 * (table(WU$chargeInt) / nrow(WU)), 1)
##
## 2 3 4
## 99.5 0.5 0.0
as histograms
library(scales)
ggplot(WU, aes(x = moverz * chargeInt, fill = (query_charge),
colour = (query_charge))) +
facet_wrap(~ datfilename, ncol = 2) +
geom_histogram(binwidth= 10, alpha=.3, position="identity") +
labs(title = "Precursor mass to charge frequency plot",
subtitle = "Plotting frequency of precursor masses for each charge state",
x = "Precursor mass [neutral mass]",
y = "Frequency [counts]",
fill = "Charge State",
colour = "Charge State") +
scale_x_continuous(breaks = pretty_breaks(8)) +
theme_light()
We confirmed this prediction by experimental data and found that 99.9 percent of flycode precursor ions correspond to doubly charge species (data not shown). The omission of positively charged residues is also critical in order to render trypsin a site-specific protease.
WU$suffix <- substr(WU$pep_seq, 10, 100)
ggplot(WU, aes(x = moverz * chargeInt, fill = suffix, colour = suffix)) +
geom_histogram(binwidth= 10, alpha=.2, position="identity") +
#facet_wrap(~ substr(pep_seq, 10, 100)) +
theme_light()
ggplot(WU, aes(x = moverz * chargeInt, fill = suffix)) +
geom_histogram(binwidth= 10, alpha=.9, position="identity") +
facet_wrap(~ substr(pep_seq, 10, 100)) +
theme_light()
ggplot(WU, aes(x = pep_score, fill = query_charge, colour = query_charge)) +
geom_histogram(binwidth= 2, alpha=.5, position="identity") +
facet_wrap(~ substr(pep_seq, 10, 100)) +
theme_light()