Help:WikiPathways Webservice/Gene set enrichment in R

From WikiPathways

(Difference between revisions)
Jump to: navigation, search
(New page: The R script below runs the Parametric Analysis of Gene Set Enrichment (PAGE) algorithm on [http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?acc=GSE7023 GEO dataset GSE7023], using h...)
Current revision (18:31, 14 November 2018) (view source)
(replaced outdated content with link to R Vignette)
 
Line 1: Line 1:
-
The R script below runs the Parametric Analysis of Gene Set Enrichment (PAGE) algorithm on [http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?acc=GSE7023 GEO dataset GSE7023], using human WikiPathways pathway as gene sets. See the manual of the [http://www.bioconductor.org/packages/2.4/bioc/vignettes/PGSEA/inst/doc/PGSEA2.pdf PGSEA libraray] for more details on the gene set enrichment calculation.
+
This tutorial has moved and is now available as an R Vignette: [https://bioconductor.org/packages/release/bioc/vignettes/rWikiPathways/inst/doc/Pathway-Analysis.html Pathway Analysis with rWikiPathways].
-
 
+
-
<pre>
+
-
 
+
-
# Load the required libraries
+
-
library(SSOAP) #To use WikiPathways web service
+
-
library(GEOquery) #To download GEO experiments
+
-
library(GSEABase) #Gene Set data structures
+
-
library(PGSEA) #Parametric Gene Set Enrichment
+
-
library(limma) #Used for mining the GSEA results
+
-
 
+
-
# Create a SOAPServer instance for the web service
+
-
srv = SOAPServer("http://www.wikipathways.org/wpi/webservice/webservice.php");
+
-
 
+
-
# Get a list of all pathways from WikiPathways
+
-
reply = .SOAP(
+
-
srv, "listPathways",
+
-
action=I("listPathways"), handlers=NULL
+
-
)
+
-
# Parse the xml document
+
-
doc = xmlParse(reply$content, asText=TRUE)
+
-
 
+
-
# Extract the pathway info
+
-
pathwayNodes = xmlElementsByTagName(xmlRoot(doc), "pathways", TRUE)
+
-
pathways = lapply(pathwayNodes, function(n) {
+
-
children = xmlChildren(n, addNames= TRUE)
+
-
if(xmlValue(children$species) == "Homo sapiens") {
+
-
p = list()
+
-
p[["id"]] = xmlValue(children$id)
+
-
p[["name"]] = xmlValue(children$name)
+
-
p[["species"]] = xmlValue(children$species)
+
-
p[["url"]] = xmlValue(children$url)
+
-
return(p)
+
-
} else {
+
-
return() # Skip non-human pathways
+
-
}
+
-
})
+
-
 
+
-
# Remove NULL entries (non-human pathways)
+
-
pathways = pathways[!sapply(pathways, is.null)]
+
-
 
+
-
# A function that downloads a GeneSet for a pathway
+
-
createGS = function(p) {
+
-
print(p[["id"]])
+
-
# Download the gene list (translated to Entrez ids)
+
-
reply = .SOAP(srv, "getXrefList", pwId = p[["id"]], code="L",
+
-
action=I("getXrefList"), handlers=NULL)
+
-
doc = xmlParse(reply$content, asText=TRUE)
+
-
# Find the xref nodes with an xpath query
+
-
resultNodes = xmlElementsByTagName(xmlRoot(doc), "xrefs", TRUE)
+
-
# Extract the gene ids
+
-
geneIds = sapply(resultNodes, xmlValue)
+
-
if(length(geneIds) > 0) { # Skip empty lists
+
-
# Create a GeneSet object
+
-
geneSet = GeneSet(geneIds, geneIdType=EntrezIdentifier(),
+
-
setName=paste(p[["id"]], " (", p[["name"]], ")", sep="")
+
-
)
+
-
return(geneSet)
+
-
}
+
-
}
+
-
geneSets = lapply(pathways, createGS) #Apply the createGS function on all pathways
+
-
geneSets = geneSets[!sapply(geneSets, is.null)] #Remove empty sets
+
-
geneSetCollection = GeneSetCollection(geneSets)
+
-
 
+
-
# Save the downloaded gene sets, so we can use them for later calculations
+
-
save(geneSetCollection, file="/home/thomas/code/webservice-examples/Hs_genesets.Rd")
+
-
 
+
-
# Get an expression set from GEO
+
-
gse = getGEO("GSE7023", GSEMatrix = TRUE)
+
-
eset = gse[[1]]
+
-
 
+
-
# Reformat the phenotype labels
+
-
subtype = gsub("subtype: ", "", phenoData(eset)$characteristics_ch1)
+
-
subtype = gsub("\\.", "_", subtype)
+
-
 
+
-
# Run the PGSEA calculation (reference sample is 'NO')
+
-
pg = PGSEA(eset, geneSetCollection, ref = which(subtype == "NO"), p.value=0.005)
+
-
 
+
-
# Plot the result matrix
+
-
smcPlot(
+
-
pg, factor(subtype), scale=c(-15,15), show.grid = TRUE, margins = c(1, 1, 4, 10), col = .rwb, r.cex = 0.35
+
-
)
+
-
 
+
-
# Find the pathways most different between P1 and P2B
+
-
pgNF <- PGSEA(eset, geneSetCollection, ref = which(subtype == "NO"), p.value = NA)
+
-
design <- model.matrix(~-1 + factor(subtype))
+
-
colnames(design) <- names(table(subtype))
+
-
fit <- lmFit(pgNF, design)
+
-
contrast.matrix <- makeContrasts(P2B - P1, levels = design)
+
-
fit <- contrasts.fit(fit, contrast.matrix)
+
-
fit <- eBayes(fit)
+
-
 
+
-
# Plot the top 25
+
-
smcPlot(pg[as.numeric(rownames(topTable(fit, n = 30, resort.by = "logFC"))),], factor(subtype, levels = c("P1", "P2B")), col = .rwb, scale = c(-15, 15), margins = c(1, 1, 4, 10), show.grid = TRUE, r.cex = 0.75)
+
-
 
+
-
</pre>
+

Current revision

This tutorial has moved and is now available as an R Vignette: Pathway Analysis with rWikiPathways.

Personal tools