@@ -10,6 +10,38 @@ if (!exists("cex.pointsize")) cex.pointsize <- 1
1010if (! exists(" functions.only" )) functions.only <- FALSE
1111if (! exists(" outlevel" )) outlevel <- 9
1212
13+ # function to locate Rcpp file (assume it is in the same directory as this file and this file was 'sourced')
14+ pathToCppFile = function () {
15+ cpp.name <- " GBS-Rcpp-functions.cpp"
16+ this.file <- NULL
17+ for (i in - (1 : sys.nframe())) {
18+ if (identical(sys.function(i ), base :: source )) this.file <- (normalizePath(sys.frame(i )$ ofile ))
19+ }
20+ if (! is.null(this.file )) {
21+ source.dir <- dirname(this.file )
22+ return (file.path(source.dir , cpp.name ))
23+ }
24+ else {
25+ # assume it is in the current working directory
26+ return (cpp.name )
27+ }
28+ }
29+
30+ # load C++ functions
31+ # compiling can take ~10 seconds, so we cache the file (under ~/R/RcppCache)
32+ # it will only be recompiled when the C++ file changes or is moved
33+ have_rcpp <- FALSE
34+ if (require(Rcpp )) {
35+ # RcppArmadillo is also required, but we don't need to load it here
36+ if (is.element(" RcppArmadillo" , installed.packages()[," Package" ])) {
37+ have_rcpp <- TRUE
38+ cpp.path <- pathToCppFile()
39+ cat(" Loading C++ functions:" , cpp.path , " \n " )
40+ sourceCpp(file = cpp.path , showOutput = TRUE ,
41+ cacheDir = file.path(path.expand(" ~" ), " R" , " RcppCache" ))
42+ }
43+ }
44+
1345readGBS <- function (genofilefn = genofile ) {
1446 if (gform == " chip" ) readChip(genofilefn )
1547 if (gform == " ANGSDcounts" ) readANGSD(genofilefn )
@@ -210,7 +242,12 @@ GBSsummary <- function() {
210242 havedepth <- exists(" depth" ) # if depth present, assume it and genon are correct & shouldn't be recalculated (as alleles may be the wrong one)
211243 if (gform != " chip" ) {
212244 if (! havedepth ) depth <<- alleles [, seq(1 , 2 * nsnps - 1 , 2 )] + alleles [, seq(2 , 2 * nsnps , 2 )]
213- sampdepth.max <<- apply(depth , 1 , max )
245+ if (have_rcpp ) {
246+ sampdepth.max <<- rcpp_rowMaximums(depth )
247+ }
248+ else {
249+ sampdepth.max <<- apply(depth , 1 , max )
250+ }
214251 sampdepth <<- rowMeans(depth )
215252 u0 <- which(sampdepth.max == 0 )
216253 u1 <- setdiff(which(sampdepth.max == 1 | sampdepth < sampdepth.thresh ), u0 )
@@ -295,7 +332,15 @@ cat("Analysing", nind, "individuals and", nsnps, "SNPs\n")
295332 l10LRT <<- - log10(exp(1 )) * pchisq(LRT , 1 , lower.tail = FALSE , log.p = TRUE )
296333
297334 sampdepth <<- rowMeans(depth ) # recalc after removing SNPs and samples
298- if (outlevel > 4 ) sampdepth.med <<- apply(depth , 1 , median )
335+ # if(outlevel > 4) sampdepth.med <<- apply(depth, 1, median)
336+ if (outlevel > 4 ) {
337+ if (have_rcpp ) {
338+ sampdepth.med <<- rcpp_rowMedians(depth )
339+ }
340+ else {
341+ sampdepth.med <<- apply(depth , 1 , median )
342+ }
343+ }
299344 depth0 <- rowSums(depth == 0 )
300345 snpdepth <<- colMeans(depth )
301346 missrate <- sum(depth == 0 )/ nrow(depth )/ ncol(depth )
@@ -366,19 +411,47 @@ if(!functions.only) {
366411
367412
368413# ################# functions
369- depth2K <- function (depthvals ) 1 / 2 ^ depthvals # convert depth to K value assuming binomial
414+ r_depth2K <- function (depthvals ) 1 / 2 ^ depthvals # convert depth to K value assuming binomial
415+ # select R or Rcpp version of depth2K depending on whether Rcpp is installed
416+ if (have_rcpp ) {
417+ depth2K <- function (depthvals ) {
418+ # Rcpp version only works with matrix as input, so fallback to R version otherwise
419+ if (is.matrix(depthvals )) {
420+ result <- rcpp_depth2K(depthvals )
421+ } else {
422+ result <- r_depth2K(depthvals )
423+ }
424+ return (result )
425+ }
426+ } else {
427+ depth2K <- r_depth2K
428+ }
370429
371430 depth2Kbb <- function (depthvals , alph = Inf ) {
372431 # convert depth to K value assuming beta-binomial with parameters alpha=beta=alph. Inf gives binomial
373432 if (alph == Inf ) 1 / 2 ^ depthvals else beta(alph ,depthvals + alph )/ beta(alph ,alph )
374433 }
375434
376435# convert depth to K value modp model. prob of seeing same allele as last time is modp (usually >= 0.5)
377- depth2Kmodp <- function (depthvals , modp = 0.5 ) {
436+ r_depth2Kmodp <- function (depthvals , modp = 0.5 ) {
378437 Kvals <- 0.5 * modp ^ (depthvals - 1 )
379438 Kvals [which(depthvals == 0 )] <- 1
380439 Kvals
381440 }
441+ # select R or Rcpp version depending on whether Rcpp is installed
442+ if (have_rcpp ) {
443+ depth2Kmodp <- function (depthvals , modp = 0.5 ) {
444+ # Rcpp version only works with matrix as input, so fallback to R version otherwise
445+ if (is.matrix(depthvals )) {
446+ result <- rcpp_depth2Kmodp(depthvals , modp )
447+ } else {
448+ result <- r_depth2Kmodp(depthvals , modp )
449+ }
450+ return (result )
451+ }
452+ } else {
453+ depth2Kmodp <- r_depth2Kmodp
454+ }
382455
383456depth2Kchoose <- function (dmodel = " bb" ,param ) { # function to choose redefine depth2K
384457 if (! dmodel == " modp" ) dmodel <- " bb"
@@ -389,7 +462,6 @@ depth2Kchoose <- function(dmodel="bb",param) { # function to choose redefine de
389462 depth2K
390463 }
391464
392-
393465upper.vec <- function (sqMatrix ) as.vector(sqMatrix [upper.tri(sqMatrix )])
394466# seq2samp <- function(seqIDvec=seqID) read.table(text=seqIDvec,sep="_",stringsAsFactors=FALSE,fill=TRUE)[,1] # undoc AgR function # might not get number of cols right
395467seq2samp <- function (seqIDvec = seqID ) sapply(strsplit(seqIDvec ,split = " _" )," [" ,1 )
@@ -594,7 +666,12 @@ calcG <- function(snpsubset, sfx = "", puse, indsubset, depth.min = 0, depth.max
594666 hist(upper.vec(cocall )/ nsnpsub , breaks = 50 , xlab = " Co-call rate (for sample pairs)" , main = " " , col = " grey" )
595667 dev.off()
596668 lowpairs <- which(cocall / nsnpsub < = cocall.thresh & upper.tri(cocall ),arr.ind = TRUE )
597- sampdepth.max <- apply(depthsub , 1 , max )
669+ if (have_rcpp ) {
670+ sampdepth.max <- rcpp_rowMaximums(depthsub )
671+ }
672+ else {
673+ sampdepth.max <- apply(depthsub , 1 , max )
674+ }
598675 samp.removed <- NULL
599676 if (cocall.thresh > = 0 ) { # remove samples which wont get self-rel
600677 samp.removed <- which(sampdepth.max < 2 )
@@ -655,11 +732,17 @@ calcG <- function(snpsubset, sfx = "", puse, indsubset, depth.min = 0, depth.max
655732 rm(GGBS4top )
656733
657734 genon01 <- genon0
658- genon01 [depth [indsubset , snpsubset ] < 2 ] <- 0
659735 P0 <- matrix (puse [snpsubset ], nrow = nindsub , ncol = nsnpsub , byrow = TRUE )
660736 P1 <- 1 - P0
661- P0 [! usegeno | depth [indsubset , snpsubset ] < 2 ] <- 0
662- P1 [! usegeno | depth [indsubset , snpsubset ] < 2 ] <- 0
737+ if (have_rcpp ) {
738+ rcpp_assignP0P1Genon01(P0 , P1 , genon01 , usegeno , depth [indsubset , snpsubset ])
739+ }
740+ else {
741+ genon01 [depth [indsubset , snpsubset ] < 2 ] <- 0
742+ P0 [! usegeno | depth [indsubset , snpsubset ] < 2 ] <- 0
743+ P1 [! usegeno | depth [indsubset , snpsubset ] < 2 ] <- 0
744+ }
745+
663746# div0 <- 2 * diag(tcrossprod(P0, P1)) # rowSums version faster
664747 div0 <- 2 * rowSums(P0 * P1 )
665748 Kdepth <- depth2K(depth [indsubset , snpsubset ])
@@ -702,10 +785,18 @@ calcG <- function(snpsubset, sfx = "", puse, indsubset, depth.min = 0, depth.max
702785 # htmlwidgets::saveWidget(temp_p, paste0("Heatmap-G5", sfx, ".html"))
703786 } else {
704787 png(paste0(" Heatmap-G5" , sfx , " .png" ), width = 2000 , height = 2000 , pointsize = cex.pointsize * 18 )
788+ if (require(parallelDist )) {
789+ cat(" Using parallelDist function in heatmap\n " )
790+ distfun <- parDist
791+ }
792+ else {
793+ cat(" Using normal dist function in heatmap\n " )
794+ distfun <- dist
795+ }
705796 if (length(table(pcacolo )) > 1 ) {
706- hmout <- heatmap(temp , col = rev(heat.colors(50 )), ColSideColors = pcacolo , RowSideColors = pcacolo , symm = T , revC = F )
797+ hmout <- heatmap(temp , col = rev(heat.colors(50 )), ColSideColors = pcacolo , RowSideColors = pcacolo , symm = T , revC = F , distfun = distfun )
707798 } else {
708- hmout <- heatmap(temp , col = rev(heat.colors(50 )), symm = T , revC = F )
799+ hmout <- heatmap(temp , col = rev(heat.colors(50 )), symm = T , revC = F , distfun = distfun )
709800 }
710801 hmdat <- data.frame (rowInd = hmout $ rowInd ,seqIDInd = indsubset [pcasamps [hmout $ rowInd ]],seqID = seqID [indsubset [pcasamps [hmout $ rowInd ]]])
711802 write.csv(hmdat ,paste0(" HeatmapOrder" , sfx , " .csv" ),row.names = FALSE ,quote = FALSE )
0 commit comments