MoseleyBioinformaticsLab
diff --git a/‎DESCRIPTION‎
Lines changed: 4 additions & 3 deletions b/‎DESCRIPTION‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎NEWS.md‎
Lines changed: 4 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎R/correlations.R‎
Lines changed: 15 additions & 1 deletion b/‎R/correlations.R‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎docs/404.html‎
Lines changed: 6 additions & 6 deletions b/‎docs/404.html‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎docs/LICENSE-text.html‎
Lines changed: 4 additions & 4 deletions b/‎docs/LICENSE-text.html‎
Lines changed: 4 additions & 4 deletions
@@ -1,5 +1,5 @@
 Package: visualizationQualityControl
-Version: 0.4.10
+Version: 0.4.11
 Title: Development of visualization methods for quality control
 Description: Provides utilities useful quality control of
         high-throughput -omics datasets.
@@ -11,12 +11,13 @@ Date: 2021-12-28
 Depends: R (>= 3.1.1)
 biocViews:
 Imports: ComplexHeatmap (>= 1.2.1), stats, dendsort, colorspace, dplyr,
-        ggplot2, broom, knitrProgressBar, magrittr
+        ggplot2, broom, knitrProgressBar, magrittr, purrr
 License: MIT + file LICENSE
 VignetteBuilder: knitr
 Suggests: testthat, knitr, rmarkdown, circlize, viridis, ICIKendallTau,
         ggforce
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.3
+Encoding: UTF-8
 LinkingTo: Rcpp
 URL:
         https://moseleybioinformaticslab.github.io/visualizationQualityControl
 
@@ -1,3 +1,7 @@
+# vsualizationQualityControl 0.4.11
+
+* Added a new argument `only_low` to `determine_outliers` to only look at the low end of the score distribution for outliers, as sometimes `boxplot.stats` will pick up outliers at the high end as well.
+
 # visualizationQualityControl 0.4.10
 
 * Updated the quality_control vignette to use ICIKendallTau instead of other correlation measures.
 
@@ -812,6 +812,7 @@ outlier_fraction <- function(data, sample_classes = NULL, n_trim = 3,
 #' @param outlier_fraction outlier fractions
 #' @param cor_weight how much weight for the correlation score?
 #' @param frac_weight how much weight for the outlier fraction?
+#' @param only_low should only things at the low end of score be removed?
 #' 
 #' @details For outlier sample detection, one should 
 #'   first generate median correlations using
@@ -826,7 +827,7 @@ outlier_fraction <- function(data, sample_classes = NULL, n_trim = 3,
 #' @export
 #' @return data.frame
 determine_outliers = function(median_correlations = NULL, outlier_fraction = NULL,
-                              cor_weight = 1, frac_weight = 1){
+                              cor_weight = 1, frac_weight = 1, only_low = TRUE){
 
   if (!is.null(median_correlations) && !is.null(outlier_fraction)) {
     full_data = dplyr::left_join(median_correlations, outlier_fraction, by = "sample_id", suffix = c(".cor", ".frac"))
@@ -866,6 +867,19 @@ determine_outliers = function(median_correlations = NULL, outlier_fraction = NUL
   full_data$outlier = FALSE
   full_data$outlier[full_data$sample_id %in% all_out] = TRUE
 
+  if (only_low) {
+    split_data = split(full_data, full_data$sample_class)
+    full_data = purrr::map(split_data, \(in_data){
+      mean_score = mean(in_data$score)
+      wrong_side = in_data |>
+        dplyr::filter(score < mean_score, outlier) |>
+        dplyr::pull(sample_id)
+      in_data$outlier[in_data$sample_id %in% wrong_side] = FALSE
+      in_data
+    }) |>
+      dplyr::bind_rows()
+  }
+  
   full_data
 
 }