|
1 | 1 | # if (!isGeneric("spc2mpspline")) |
2 | 2 | setGeneric("spc2mpspline", function(object, |
3 | 3 | var_name = NULL, |
| 4 | + method = c("est_1cm", "est_icm", "est_dcm"), |
4 | 5 | pattern = "R|Cr|Cd|qm", |
5 | | - hzdesgn = guessHzDesgnName(object), ...) |
| 6 | + hzdesgn = NULL, ...) |
6 | 7 | standardGeneric("spc2mpspline")) |
7 | 8 |
|
8 | | -#' @title Missing-data-safe, SPC-wide wrapper around mpspline2::mpspline "continuous" 1cm output |
| 9 | +#' @title SoilProfileCollection wrapper for `mpspline2::mpspline()` |
9 | 10 | #' |
10 | | -#' @description Facilitate safe use of just about any numeric SPC horizon attribute, from any SPC, with \code{mpspline2::mpspline}. Currently only works with a single attribute.This function will automatically filter profiles with \code{NA} in attribute of interest which may be more conservative filtering than you expect. The intention here is that a SPC of related profile instances could be splined, and then the spline results aggregated over the full interval where data was available. |
| 11 | +#' @description Generate mass-preserving splines for any numeric attribute in a SoilProfileCollectuion using `mpspline2::mpspline()`. mpspline2 implements the method described in Bishop et al. (1999). Currently this function only works with a single `var_name` at a time. |
| 12 | +#' |
| 13 | +#' @details This function now relies on the missing data checks provided by the mpspline2 package. See `attr(..., 'removed')` to see whole profiles that were removed from the set. Horizons containing `NA` in the property of interest are dropped with a message. |
11 | 14 | #' |
12 | | -#' Data completeness is assessed and the input SPC is filtered and truncated to create a container for the 1cm results from \code{mpspline2::mpspline}. |
| 15 | +#' Data completeness is assessed and the input SoilProfileCollection is filtered and truncated to create a container for the results from `mpspline2::mpspline()`. |
13 | 16 | #' |
14 | 17 | #' @param object A SoilProfileCollection |
15 | | -#' @param var_name Column name in \code{@horizons} slot of \code{object} containing numeric values to spline |
16 | | -#' @param pattern Regex pattern to match for bottom of profile (passed to estimateSoilDepth) default: "R|Cr|Cd|qm" |
17 | | -#' @param hzdesgn Column name in \code{@horizons} slot of \code{object} containing horizon designations default: \code{aqp::guessHzDesgnName(object, required = TRUE)} |
18 | | -#' @param ... Additional arguments to \code{mpspline2::mpspline} |
| 18 | +#' @param var_name Column name in `@horizons` slot of `object` containing numeric values to spline |
| 19 | +#' @param pattern Regex pattern to match for bottom of profile (passed to `minDepthOf()`) default: "R|Cr|Cd|qm"; only used if `hzdesgn` is specified |
| 20 | +#' @param hzdesgn Column name in `@horizons` slot of `object` containing horizon designations default: `NULL` |
| 21 | +#' @param method Options include "est_1cm" (default; 1cm estimates), "est_icm" (estimates over original layer boundaries), "est_dcm" (estimates over constant interval, specified with `d` argument to `mpspline3::mpspline()`). Default value for `d` is `c(0, 5, 15, 30, 60, 100, 200)`. |
| 22 | +#' @param ... Additional arguments to `mpspline2::mpspline()` |
19 | 23 | #' |
20 | 24 | #' @author Andrew G. Brown |
21 | 25 | #' |
22 | | -#' @return A SoilProfileCollection with 1cm slices. Spline variables are in columns prefixed with "spline_" and RMSE/RMSE_IQR are in columns prefixed with "rmse_". If any profiles were removed from the collection, their profile IDs are stored in attr(result, 'removed'). |
| 26 | +#' @return A SoilProfileCollection with 1cm slices. Spline variables are in columns prefixed with "spline_" and RMSE/RMSE_IQR are in columns prefixed with "rmse_". If any profiles were removed from the collection, their profile IDs are stored in `attr(result, 'removed')`. |
23 | 27 | #' |
24 | 28 | #' @export spc2mpspline,SoilProfileCollection-method |
25 | 29 | #' @aliases spc2mpspline |
26 | | -#' |
| 30 | +#' @references T.F.A. Bishop, A.B. McBratney, G.M. Laslett (1999) Modelling soil attribute depth functions with equal-area quadratic smoothing splines. Geoderma 91(1–2), pp. 27-45. \doi{https://doi.org/10.1016/S0016-7061(99)00003-8} |
| 31 | +#' |
| 32 | +#' O'Brien, Lauren (2022). mpspline2: Mass-Preserving Spline Functions for Soil Data. R package version 0.1.6. \url{https://cran.r-project.org/package=mpspline2} |
| 33 | +#' |
27 | 34 | #' @examples |
28 | 35 | #' |
29 | 36 | #' data(sp1) |
|
34 | 41 | #' plotSPC(res[1:5,], color = "prop_spline", divide.hz = FALSE) |
35 | 42 | #' |
36 | 43 | setMethod("spc2mpspline", signature(object = "SoilProfileCollection"), |
37 | | - function(object, var_name = NULL, |
38 | | - pattern = "R|Cr|Cd|qm", |
39 | | - hzdesgn = guessHzDesgnName(object), |
40 | | - ...) { |
41 | | - |
42 | | - if (!requireNamespace('mpspline2')) |
43 | | - stop("package `mpspline2` is required", call. = FALSE) |
44 | | - |
45 | | - if (is.null(var_name) | !(var_name %in% horizonNames(object))) |
46 | | - stop("argument `var_name` must specify a single horizon-level variable", call. = FALSE) |
47 | | - |
48 | | - if (!hzdesgn %in% horizonNames(object)) { |
49 | | - hzdesgn <- guessHzDesgnName(object, required = TRUE) |
50 | | - } |
| 44 | + function(object, |
| 45 | + var_name = NULL, |
| 46 | + method = c("est_1cm", "est_icm", "est_dcm"), |
| 47 | + pattern = "R|Cr|Cd|qm", |
| 48 | + hzdesgn = NULL, |
| 49 | + ...) { |
| 50 | + .NHZ <- NULL |
| 51 | + .LAST <- NULL |
| 52 | + .HZID <- NULL |
51 | 53 |
|
52 | | - hztop <- horizonDepths(object)[1] |
53 | | - hzbot <- horizonDepths(object)[2] |
54 | | - |
55 | | - # glom to "available interval" in each profile |
56 | | - # NOTE: we will handle warnings (profiles with no data at all) at end |
57 | | - spc.sub <- suppressWarnings(glomApply(object, function(p) { |
58 | | - i <- which(diff(c(0, cumsum(!is.na(p[[var_name]])))) == 1) |
59 | | - h <- horizons(p) |
60 | | - # basically this excludes NA values at top and bottom of profile |
61 | | - # (O horizons, bedrock) but wont check missing values inbetween |
62 | | - # need at least two horizons to make a spline |
63 | | - if (length(i) < 2) |
64 | | - return(c(0, 0)) |
65 | | - top_depth <- h[[hztop]][i[1]] |
66 | | - bot_depth <- h[[hzbot]][i[length(i)]] |
67 | | - return(c(top_depth, bot_depth)) |
68 | | - })) |
69 | | - |
70 | | - # debug : inspect horizon values for var_name |
71 | | - #plot(spc.sub[1:10,], color=var_name) |
72 | | - |
73 | | - # only take profiles that have 100% data coverage in above interval |
74 | | - # i.e. if a single horizon is missing data, remove whole profile |
75 | | - spc.sub$nona <- profileApply(spc.sub, function(p) any(is.na(p[[var_name]]))) |
76 | | - spc.sub <- spc.sub[which(!spc.sub$nona),] |
77 | | - |
78 | | - # calculate the deepest top depth and shallowest bottom depth |
79 | | - mindepth <- max(profileApply(spc.sub, function(p) p[,1][[hztop]])) |
80 | | - maxdepth <- min(profileApply(spc.sub, estimateSoilDepth, p = pattern, name = hzdesgn)) |
81 | | - |
82 | | - # we will only make interpolations that the "whole SPC supports" |
83 | | - # the thought is that these 1cm slices will be further aggregated downstream |
84 | | - spc.sub <- glomApply(spc.sub, function(p) c(mindepth, maxdepth), truncate = TRUE) |
85 | | - |
86 | | - # do the splines |
87 | | - res <- mpspline2::mpspline(horizons(spc.sub)[c(idname(spc.sub), |
88 | | - horizonDepths(spc.sub), |
89 | | - var_name)], |
90 | | - var_name = var_name, ...) |
91 | | - |
92 | | - # concatenate results for re-insertion |
93 | | - res2 <- do.call('c', lapply(profile_id(spc.sub), function(pid) { |
94 | | - drange <- mindepth:maxdepth |
95 | | - zero.idx <- drange == 0 |
96 | | - if (any(zero.idx)) |
97 | | - drange <- drange[-which(zero.idx)] |
98 | | - return(res[[pid]]$est_1cm[drange]) |
99 | | - # this returns the 1cm estimate which conforms with sliced spc |
100 | | - # |
101 | | - # debug: prove that mass is preserved in output by returning block estimates |
102 | | - # return(res[[pid]]$est_icm) |
103 | | - })) |
104 | | - |
105 | | - # get the RMSE |
106 | | - reserr <- do.call('c', lapply(profile_id(spc.sub), function(pid) { |
107 | | - return(res[[pid]]$est_err) |
108 | | - })) |
109 | | - |
110 | | - # make 1:1 with site |
111 | | - reserr_iqr <- reserr[names(reserr) == "RMSE_IQR"] |
112 | | - reserr <- reserr[names(reserr) == "RMSE"] |
113 | | - |
114 | | - # inspect |
115 | | - #reserr_iqr |
116 | | - #reserr |
117 | | - |
118 | | - # single horizon results cannot be splined, filter those out |
119 | | - spc.sub <- subApply(spc.sub, function(p) nrow(p) > 1) |
120 | | - |
121 | | - # adjustment for aqp::slice index logic versus glom interval logic |
122 | | - if (mindepth == 0) { |
123 | | - maxdepth <- maxdepth - 1 |
124 | | - } |
125 | | - |
126 | | - # create slices 1cm thick to insert spline result |
127 | | - spc.spl <- aqp::slice(spc.sub, formula(sprintf("%s:%s ~ %s", |
128 | | - mindepth, maxdepth, |
129 | | - var_name))) |
130 | | - |
131 | | - # create new "spline_"+var_name variable |
132 | | - spc.spl[[paste0(var_name,"_spline")]] <- res2 |
133 | | - |
134 | | - # create new "rmse_"+var_name as site level attributes |
135 | | - spc.spl[[paste0(var_name,"_rmse")]] <- reserr |
136 | | - spc.spl[[paste0(var_name,"_rmse_iqr")]] <- reserr_iqr |
137 | | - |
138 | | - # determine what profiles were removed |
139 | | - removed <- profile_id(object)[!profile_id(object) %in% profile_id(spc.spl)] |
140 | | - |
141 | | - # add an attribute with removed profile IDs. there are three steps |
142 | | - # that possibly remove data: |
143 | | - # - profiles removed by glomApply have no var_name data at all. |
144 | | - # - 100% coverage filtering step -- conservative filter to keep from making splines from bad data |
145 | | - # - mpspline itself will remove profiles with e.g. just one horizon |
146 | | - attr(spc.spl, "removed") <- unique(removed) |
| 54 | + if (!requireNamespace('mpspline2')) |
| 55 | + stop("package `mpspline2` is required", call. = FALSE) |
| 56 | + |
| 57 | + if (is.null(var_name) | !(var_name %in% horizonNames(object))) |
| 58 | + stop("argument `var_name` must specify a single horizon-level variable", call. = FALSE) |
147 | 59 |
|
148 | | - return(spc.spl) |
149 | | -}) |
| 60 | + method <- match.arg(method[1], c("est_1cm", "est_icm", "est_dcm")) |
| 61 | + |
| 62 | + hztop <- horizonDepths(object)[1] |
| 63 | + hzbot <- horizonDepths(object)[2] |
| 64 | + |
| 65 | + # glom to "available interval" in each profile |
| 66 | + spc.sub <- glom(object, object[[hztop]][object[, 1, .HZID]], |
| 67 | + object[[hzbot]][object[, , .LAST, .HZID]]) |
| 68 | + |
| 69 | + # remove any horizons that have 0 or NA thickness (no mass) |
| 70 | + .sameTopBottom <- NULL |
| 71 | + |
| 72 | + # handle any gaps at the surface (e.g. truncated data) |
| 73 | + spc.sub$.sameTopBottom <- spc.sub[[hztop]] == spc.sub[[hzbot]] |
| 74 | + spc.sub$.sameTopBottom <- spc.sub$.sameTopBottom | is.na(spc.sub$.sameTopBottom) |
| 75 | + spc.sub <- subsetHz(spc.sub, !.sameTopBottom) |
| 76 | + |
| 77 | + spc.sub$.mindepth_orig <- spc.sub[, 1][[hztop]] |
| 78 | + |
| 79 | + # handle any gaps at the surface (e.g. truncated data) |
| 80 | + spc.sub <- fillHzGaps(spc.sub, to_top = 0, to_bottom = NULL) |
| 81 | + |
| 82 | + # calculate the top depth and bottom depth for each profile |
| 83 | + spc.sub$.mindepth <- spc.sub[, 1][[hztop]] |
| 84 | + |
| 85 | + # optionally constrained by some pattern matching |
| 86 | + if (!missing(hzdesgn) && !is.null(hzdesgn)) { |
| 87 | + hzpatdep <- minDepthOf( |
| 88 | + spc.sub, |
| 89 | + pattern = pattern, |
| 90 | + hzdesgn = hzdesgn, |
| 91 | + no.contact.assigned = Inf |
| 92 | + )[[hztop]] |
| 93 | + } else { |
| 94 | + hzpatdep <- rep(Inf, length(spc.sub)) |
| 95 | + } |
| 96 | + |
| 97 | + # either the bottom depth of last horizon or the matched pattern top depth |
| 98 | + spc.sub$.maxdepth <- pmin(hzpatdep, spc.sub[, , .LAST][[hzbot]], na.rm = TRUE) |
| 99 | + |
| 100 | + # truncate using vectors of top and bottom |
| 101 | + spc.sub <- trunc(spc.sub, spc.sub$.mindepth, spc.sub$.maxdepth) |
| 102 | + |
| 103 | + # do the splines |
| 104 | + res <- mpspline2::mpspline(horizons(spc.sub)[c(idname(spc.sub), |
| 105 | + horizonDepths(spc.sub), |
| 106 | + var_name)], |
| 107 | + var_name = var_name, ...) |
| 108 | + |
| 109 | + # concatenate results for re-insertion |
| 110 | + pid <- profile_id(spc.sub) |
| 111 | + res2 <- do.call('c', lapply(seq_along(pid), function(i) { |
| 112 | + switch(method, |
| 113 | + "est_1cm" = { |
| 114 | + drange <- spc.sub$.mindepth[i]:spc.sub$.maxdepth[i] |
| 115 | + zero.idx <- drange == 0 |
| 116 | + if (any(zero.idx)) |
| 117 | + drange <- drange[-which(zero.idx)] |
| 118 | + return(res[[pid[i]]]$est_1cm[drange]) |
| 119 | + }, { |
| 120 | + return(res[[pid[i]]][[method]]) |
| 121 | + } |
| 122 | + ) |
| 123 | + # this returns the 1cm estimate which conforms with sliced spc |
| 124 | + # |
| 125 | + # debug: prove that mass is preserved in output by returning block estimates |
| 126 | + # return(res[[pid]]$est_icm) |
| 127 | + })) |
| 128 | + |
| 129 | + # get the RMSE |
| 130 | + reserr <- do.call('c', lapply(profile_id(spc.sub), function(pid) { |
| 131 | + return(res[[pid]]$est_err) |
| 132 | + })) |
| 133 | + |
| 134 | + # make 1:1 with site |
| 135 | + reserr_iqr <- reserr[names(reserr) == "RMSE_IQR"] |
| 136 | + reserr <- reserr[names(reserr) == "RMSE"] |
| 137 | + |
| 138 | + # profiles removed (NA in all horizons) |
| 139 | + spc.sub <- spc.sub[profile_id(spc.sub) %in% names(res),] |
| 140 | + |
| 141 | + # create slices 1cm thick to insert spline result |
| 142 | + switch(method, |
| 143 | + "est_1cm" = { |
| 144 | + spc.spl <- suppressMessages(aqp::dice(spc.sub)) |
| 145 | + }, |
| 146 | + "est_icm" = { |
| 147 | + spc.spl <- spc.sub |
| 148 | + }, |
| 149 | + "est_dcm" = { |
| 150 | + .new_d_horizons <- function(x, ...) { |
| 151 | + if (is.null(list(...)$d)) { |
| 152 | + d <- c(0, 5, 15, 30, 60, 100, 200) |
| 153 | + } else { |
| 154 | + d <- list(...)$d |
| 155 | + } |
| 156 | + newhzd <- data.frame( |
| 157 | + id = profile_id(x), |
| 158 | + top = do.call('c', lapply(d[1:(length(d) - 1)], rep, length(x))), |
| 159 | + bottom = do.call('c', lapply(d[2:length(d)], rep, length(x))) |
| 160 | + ) |
| 161 | + colnames(newhzd) <- c(idname(x), horizonDepths(x)) |
| 162 | + depths(newhzd) <- colnames(newhzd) |
| 163 | + newhzd |
| 164 | + } |
| 165 | + |
| 166 | + spc.spl <- .new_d_horizons(spc.sub, ...) |
| 167 | + }) |
| 168 | + |
| 169 | + # create new "spline_"+var_name variable |
| 170 | + spc.spl[[paste0(var_name, "_spline")]] <- res2 |
| 171 | + |
| 172 | + # create new "rmse_"+var_name as site level attributes |
| 173 | + spc.spl[[paste0(var_name, "_rmse")]] <- reserr |
| 174 | + spc.spl[[paste0(var_name, "_rmse_iqr")]] <- reserr_iqr |
| 175 | + |
| 176 | + # determine what profiles were removed |
| 177 | + removed <- profile_id(object)[!profile_id(object) %in% profile_id(spc.spl)] |
| 178 | + |
| 179 | + # add an attribute with removed profile IDs |
| 180 | + attr(spc.spl, "removed") <- unique(removed) |
| 181 | + |
| 182 | + if (method != "est_dcm") { |
| 183 | + spc.spl <- trunc(spc.spl, spc.sub$.mindepth_orig, spc.sub$.maxdepth) |
| 184 | + } |
| 185 | + |
| 186 | + return(spc.spl) |
| 187 | + }) |
0 commit comments