Skip to content

Commit a7fd670

Browse files
authored
Merge pull request #22 from chrisdjscott/rcpp
Rcpp versions of some functions
2 parents 88c329c + 9a049aa commit a7fd670

File tree

2 files changed

+217
-11
lines changed

2 files changed

+217
-11
lines changed

GBS-Chip-Gmatrix.R

Lines changed: 102 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,38 @@ if (!exists("cex.pointsize")) cex.pointsize <- 1
1010
if (!exists("functions.only")) functions.only <- FALSE
1111
if (!exists("outlevel")) outlevel <- 9
1212

13+
# function to locate Rcpp file (assume it is in the same directory as this file and this file was 'sourced')
14+
pathToCppFile = function() {
15+
cpp.name <- "GBS-Rcpp-functions.cpp"
16+
this.file <- NULL
17+
for (i in -(1:sys.nframe())) {
18+
if (identical(sys.function(i), base::source)) this.file <- (normalizePath(sys.frame(i)$ofile))
19+
}
20+
if (!is.null(this.file)) {
21+
source.dir <- dirname(this.file)
22+
return(file.path(source.dir, cpp.name))
23+
}
24+
else {
25+
# assume it is in the current working directory
26+
return(cpp.name)
27+
}
28+
}
29+
30+
# load C++ functions
31+
# compiling can take ~10 seconds, so we cache the file (under ~/R/RcppCache)
32+
# it will only be recompiled when the C++ file changes or is moved
33+
have_rcpp <- FALSE
34+
if (require(Rcpp)) {
35+
# RcppArmadillo is also required, but we don't need to load it here
36+
if (is.element("RcppArmadillo", installed.packages()[,"Package"])) {
37+
have_rcpp <- TRUE
38+
cpp.path <- pathToCppFile()
39+
cat("Loading C++ functions:", cpp.path, "\n")
40+
sourceCpp(file = cpp.path, showOutput = TRUE,
41+
cacheDir = file.path(path.expand("~"), "R", "RcppCache"))
42+
}
43+
}
44+
1345
readGBS <- function(genofilefn = genofile) {
1446
if (gform == "chip") readChip(genofilefn)
1547
if (gform == "ANGSDcounts") readANGSD(genofilefn)
@@ -210,7 +242,12 @@ GBSsummary <- function() {
210242
havedepth <- exists("depth") # if depth present, assume it and genon are correct & shouldn't be recalculated (as alleles may be the wrong one)
211243
if(gform != "chip") {
212244
if (!havedepth) depth <<- alleles[, seq(1, 2 * nsnps - 1, 2)] + alleles[, seq(2, 2 * nsnps, 2)]
213-
sampdepth.max <<- apply(depth, 1, max)
245+
if (have_rcpp) {
246+
sampdepth.max <<- rcpp_rowMaximums(depth)
247+
}
248+
else {
249+
sampdepth.max <<- apply(depth, 1, max)
250+
}
214251
sampdepth <<- rowMeans(depth)
215252
u0 <- which(sampdepth.max == 0)
216253
u1 <- setdiff(which(sampdepth.max == 1 | sampdepth < sampdepth.thresh), u0)
@@ -295,7 +332,15 @@ cat("Analysing", nind, "individuals and", nsnps, "SNPs\n")
295332
l10LRT <<- -log10(exp(1)) * pchisq(LRT, 1, lower.tail = FALSE, log.p = TRUE)
296333

297334
sampdepth <<- rowMeans(depth) # recalc after removing SNPs and samples
298-
if(outlevel > 4) sampdepth.med <<- apply(depth, 1, median)
335+
#if(outlevel > 4) sampdepth.med <<- apply(depth, 1, median)
336+
if(outlevel > 4) {
337+
if (have_rcpp) {
338+
sampdepth.med <<- rcpp_rowMedians(depth)
339+
}
340+
else {
341+
sampdepth.med <<- apply(depth, 1, median)
342+
}
343+
}
299344
depth0 <- rowSums(depth == 0)
300345
snpdepth <<- colMeans(depth)
301346
missrate <- sum(depth == 0)/nrow(depth)/ncol(depth)
@@ -366,19 +411,47 @@ if(!functions.only) {
366411

367412

368413
################## functions
369-
depth2K <- function(depthvals) 1/2^depthvals # convert depth to K value assuming binomial
414+
r_depth2K <- function(depthvals) 1/2^depthvals # convert depth to K value assuming binomial
415+
# select R or Rcpp version of depth2K depending on whether Rcpp is installed
416+
if (have_rcpp) {
417+
depth2K <- function(depthvals) {
418+
# Rcpp version only works with matrix as input, so fallback to R version otherwise
419+
if (is.matrix(depthvals)) {
420+
result <- rcpp_depth2K(depthvals)
421+
} else {
422+
result <- r_depth2K(depthvals)
423+
}
424+
return(result)
425+
}
426+
} else {
427+
depth2K <- r_depth2K
428+
}
370429

371430
depth2Kbb <- function(depthvals, alph=Inf) {
372431
# convert depth to K value assuming beta-binomial with parameters alpha=beta=alph. Inf gives binomial
373432
if (alph==Inf) 1/2^depthvals else beta(alph,depthvals+alph)/beta(alph,alph)
374433
}
375434

376435
# convert depth to K value modp model. prob of seeing same allele as last time is modp (usually >= 0.5)
377-
depth2Kmodp <- function(depthvals, modp=0.5 ) {
436+
r_depth2Kmodp <- function(depthvals, modp=0.5 ) {
378437
Kvals <- 0.5*modp^(depthvals-1)
379438
Kvals[which(depthvals==0)] <- 1
380439
Kvals
381440
}
441+
# select R or Rcpp version depending on whether Rcpp is installed
442+
if (have_rcpp) {
443+
depth2Kmodp <- function(depthvals, modp=0.5) {
444+
# Rcpp version only works with matrix as input, so fallback to R version otherwise
445+
if (is.matrix(depthvals)) {
446+
result <- rcpp_depth2Kmodp(depthvals, modp)
447+
} else {
448+
result <- r_depth2Kmodp(depthvals, modp)
449+
}
450+
return(result)
451+
}
452+
} else {
453+
depth2Kmodp <- r_depth2Kmodp
454+
}
382455

383456
depth2Kchoose <- function(dmodel="bb",param) { # function to choose redefine depth2K
384457
if (!dmodel=="modp") dmodel <- "bb"
@@ -389,7 +462,6 @@ depth2Kchoose <- function(dmodel="bb",param) { # function to choose redefine de
389462
depth2K
390463
}
391464

392-
393465
upper.vec <- function(sqMatrix) as.vector(sqMatrix[upper.tri(sqMatrix)])
394466
#seq2samp <- function(seqIDvec=seqID) read.table(text=seqIDvec,sep="_",stringsAsFactors=FALSE,fill=TRUE)[,1] # undoc AgR function # might not get number of cols right
395467
seq2samp <- function(seqIDvec=seqID) sapply(strsplit(seqIDvec,split="_"),"[",1)
@@ -594,7 +666,12 @@ calcG <- function(snpsubset, sfx = "", puse, indsubset, depth.min = 0, depth.max
594666
hist(upper.vec(cocall)/nsnpsub, breaks = 50, xlab = "Co-call rate (for sample pairs)", main="", col = "grey")
595667
dev.off()
596668
lowpairs <- which(cocall/nsnpsub <= cocall.thresh & upper.tri(cocall),arr.ind=TRUE)
597-
sampdepth.max <- apply(depthsub, 1, max)
669+
if (have_rcpp) {
670+
sampdepth.max <- rcpp_rowMaximums(depthsub)
671+
}
672+
else {
673+
sampdepth.max <- apply(depthsub, 1, max)
674+
}
598675
samp.removed <- NULL
599676
if(cocall.thresh >= 0) { # remove samples which wont get self-rel
600677
samp.removed <- which(sampdepth.max < 2)
@@ -655,11 +732,17 @@ calcG <- function(snpsubset, sfx = "", puse, indsubset, depth.min = 0, depth.max
655732
rm(GGBS4top)
656733

657734
genon01 <- genon0
658-
genon01[depth[indsubset, snpsubset] < 2] <- 0
659735
P0 <- matrix(puse[snpsubset], nrow = nindsub, ncol = nsnpsub, byrow = TRUE)
660736
P1 <- 1 - P0
661-
P0[!usegeno | depth[indsubset, snpsubset] < 2] <- 0
662-
P1[!usegeno | depth[indsubset, snpsubset] < 2] <- 0
737+
if (have_rcpp) {
738+
rcpp_assignP0P1Genon01(P0, P1, genon01, usegeno, depth[indsubset, snpsubset])
739+
}
740+
else {
741+
genon01[depth[indsubset, snpsubset] < 2] <- 0
742+
P0[!usegeno | depth[indsubset, snpsubset] < 2] <- 0
743+
P1[!usegeno | depth[indsubset, snpsubset] < 2] <- 0
744+
}
745+
663746
# div0 <- 2 * diag(tcrossprod(P0, P1)) # rowSums version faster
664747
div0 <- 2 * rowSums(P0 * P1)
665748
Kdepth <- depth2K(depth[indsubset, snpsubset])
@@ -702,10 +785,18 @@ calcG <- function(snpsubset, sfx = "", puse, indsubset, depth.min = 0, depth.max
702785
#htmlwidgets::saveWidget(temp_p, paste0("Heatmap-G5", sfx, ".html"))
703786
} else {
704787
png(paste0("Heatmap-G5", sfx, ".png"), width = 2000, height = 2000, pointsize = cex.pointsize * 18)
788+
if (require(parallelDist)) {
789+
cat("Using parallelDist function in heatmap\n")
790+
distfun <- parDist
791+
}
792+
else {
793+
cat("Using normal dist function in heatmap\n")
794+
distfun <- dist
795+
}
705796
if(length(table(pcacolo)) > 1) {
706-
hmout <- heatmap(temp, col = rev(heat.colors(50)), ColSideColors=pcacolo, RowSideColors=pcacolo, symm=T, revC=F)
797+
hmout <- heatmap(temp, col = rev(heat.colors(50)), ColSideColors=pcacolo, RowSideColors=pcacolo, symm=T, revC=F, distfun=distfun)
707798
} else {
708-
hmout <- heatmap(temp, col = rev(heat.colors(50)), symm=T, revC=F)
799+
hmout <- heatmap(temp, col = rev(heat.colors(50)), symm=T, revC=F, distfun=distfun)
709800
}
710801
hmdat <- data.frame(rowInd=hmout$rowInd,seqIDInd=indsubset[pcasamps[hmout$rowInd]],seqID=seqID[indsubset[pcasamps[hmout$rowInd]]])
711802
write.csv(hmdat,paste0("HeatmapOrder", sfx, ".csv"),row.names=FALSE,quote=FALSE)

GBS-Rcpp-functions.cpp

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
// disable all run-time checks in armadillo (e.g. bounds checking, etc.)
2+
#define ARMA_NO_DEBUG
3+
4+
// we depend on the R package "RcppArmadillo"
5+
// [[Rcpp::depends(RcppArmadillo)]]
6+
#include <RcppArmadillo.h>
7+
8+
// we need OpenMP for parallelisation
9+
// [[Rcpp::plugins(openmp)]]
10+
11+
// function for finding row medians (alternative to apply(depth, 1, median))
12+
// requires integer type matrix as input, returns list of doubles
13+
// [[Rcpp::export]]
14+
std::vector<double> rcpp_rowMedians(const arma::imat &depth) {
15+
// number of rows
16+
const int nrows = depth.n_rows;
17+
18+
// vector for storing the result
19+
std::vector<double> medians(nrows);
20+
21+
// loop over the rows
22+
#pragma omp parallel for
23+
for (int i = 0; i < nrows; i++) {
24+
// convert the row to double type, to compute median correctly
25+
arma::rowvec row = arma::conv_to<arma::rowvec>::from(depth.row(i));
26+
27+
// compute the median for this row
28+
medians[i] = arma::median(row);
29+
}
30+
31+
return medians;
32+
}
33+
34+
// function for finding row maximums (alternative to apply(mat, 1, max))
35+
// requires integer type matrix as input, return list of integers
36+
// [[Rcpp::export]]
37+
std::vector<int> rcpp_rowMaximums(const arma::imat &mat) {
38+
const int nrows = mat.n_rows;
39+
40+
// create vector to store the result
41+
std::vector<int> maximums(nrows);
42+
43+
// loop over rows
44+
#pragma omp parallel for
45+
for (int i = 0; i < nrows; i++) {
46+
// find the maximum for this row
47+
maximums[i] = mat.row(i).max();
48+
}
49+
50+
return maximums;
51+
}
52+
53+
// C++ version of depth2K function
54+
// [[Rcpp::export]]
55+
Rcpp::NumericMatrix rcpp_depth2K(const Rcpp::NumericMatrix &A) {
56+
// create the output matrix (same size as input)
57+
Rcpp::NumericMatrix Aout(A.rows(), A.cols());
58+
59+
// number of elements
60+
const long Asize = A.rows() * A.cols();
61+
62+
// loop over elements in parallel and apply operation
63+
#pragma omp parallel for
64+
for (long i = 0; i < Asize; i++) {
65+
Aout[i] = 1.0 / pow(2.0, A[i]);
66+
}
67+
68+
// return matrix
69+
return Aout;
70+
}
71+
72+
// C++ version of depth2Kmodp function
73+
// [[Rcpp::export]]
74+
Rcpp::NumericMatrix rcpp_depth2Kmodp(const Rcpp::NumericMatrix &depthvals, double modp = 0.5) {
75+
// create matrix for storing the result
76+
Rcpp::NumericMatrix result(depthvals.rows(), depthvals.cols());
77+
78+
// size of the matrix
79+
const long size = depthvals.rows() * depthvals.cols();
80+
81+
// loop over the elements in parallel
82+
#pragma omp parallel for
83+
for (long i = 0; i < size; i++) {
84+
double value = 0.5 * pow(modp, depthvals[i] - 1.0);
85+
result[i] = (value == 0) ? 1.0 : value;
86+
}
87+
return result;
88+
}
89+
90+
// function for setting unused values of P0, P1 and genon01 to zero
91+
// modifies the matrices in-place (i.e. doesn't return anything)
92+
// [[Rcpp::export]]
93+
void rcpp_assignP0P1Genon01(Rcpp::NumericMatrix &P0, Rcpp::NumericMatrix &P1, Rcpp::NumericMatrix &genon01,
94+
const Rcpp::LogicalMatrix &usegeno, const Rcpp::NumericMatrix &dsub) {
95+
// number of elements (assumes all inputs are the same size!)
96+
const long size = P0.rows() * P0.cols();
97+
98+
// loop over elements in parallel
99+
#pragma omp parallel for
100+
for (long i = 0; i < size; i++) {
101+
// set to zero if they match the conditions
102+
if (dsub[i] < 2.0) {
103+
P0[i] = 0.0;
104+
P1[i] = 0.0;
105+
genon01[i] = 0.0;
106+
}
107+
else if (!usegeno[i]) {
108+
P0[i] = 0.0;
109+
P1[i] = 0.0;
110+
}
111+
}
112+
113+
// nothing to return, matrices are modified in-place
114+
return;
115+
}

0 commit comments

Comments
 (0)