Skip to content

Commit 816c4c9

Browse files
authored
spc2mpspline(): upgrades (#250)
* `spc2mpspline()`: upgrades * Relax constraints * `spc2mpspline()`: specify `d` parameter in tests for splines beyond 200cm * Further relaxing requirements @dylanbeaudette @pierreroudier * Docs * cleanup * Further relaxing missing data rules * Add `method` argument for interpolations for alternate output depths: `est_icm` (original depths) and `est_dcm` (static depth) in addition to default `est_1cm` ("continous") * Fix/test `d` argument with `method='est_dcm'`
1 parent 4579ef9 commit 816c4c9

File tree

3 files changed

+223
-153
lines changed

3 files changed

+223
-153
lines changed

R/spc2mpspline.R

Lines changed: 159 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,36 @@
11
# if (!isGeneric("spc2mpspline"))
22
setGeneric("spc2mpspline", function(object,
33
var_name = NULL,
4+
method = c("est_1cm", "est_icm", "est_dcm"),
45
pattern = "R|Cr|Cd|qm",
5-
hzdesgn = guessHzDesgnName(object), ...)
6+
hzdesgn = NULL, ...)
67
standardGeneric("spc2mpspline"))
78

8-
#' @title Missing-data-safe, SPC-wide wrapper around mpspline2::mpspline "continuous" 1cm output
9+
#' @title SoilProfileCollection wrapper for `mpspline2::mpspline()`
910
#'
10-
#' @description Facilitate safe use of just about any numeric SPC horizon attribute, from any SPC, with \code{mpspline2::mpspline}. Currently only works with a single attribute.This function will automatically filter profiles with \code{NA} in attribute of interest which may be more conservative filtering than you expect. The intention here is that a SPC of related profile instances could be splined, and then the spline results aggregated over the full interval where data was available.
11+
#' @description Generate mass-preserving splines for any numeric attribute in a SoilProfileCollectuion using `mpspline2::mpspline()`. mpspline2 implements the method described in Bishop et al. (1999). Currently this function only works with a single `var_name` at a time.
12+
#'
13+
#' @details This function now relies on the missing data checks provided by the mpspline2 package. See `attr(..., 'removed')` to see whole profiles that were removed from the set. Horizons containing `NA` in the property of interest are dropped with a message.
1114
#'
12-
#' Data completeness is assessed and the input SPC is filtered and truncated to create a container for the 1cm results from \code{mpspline2::mpspline}.
15+
#' Data completeness is assessed and the input SoilProfileCollection is filtered and truncated to create a container for the results from `mpspline2::mpspline()`.
1316
#'
1417
#' @param object A SoilProfileCollection
15-
#' @param var_name Column name in \code{@horizons} slot of \code{object} containing numeric values to spline
16-
#' @param pattern Regex pattern to match for bottom of profile (passed to estimateSoilDepth) default: "R|Cr|Cd|qm"
17-
#' @param hzdesgn Column name in \code{@horizons} slot of \code{object} containing horizon designations default: \code{aqp::guessHzDesgnName(object, required = TRUE)}
18-
#' @param ... Additional arguments to \code{mpspline2::mpspline}
18+
#' @param var_name Column name in `@horizons` slot of `object` containing numeric values to spline
19+
#' @param pattern Regex pattern to match for bottom of profile (passed to `minDepthOf()`) default: "R|Cr|Cd|qm"; only used if `hzdesgn` is specified
20+
#' @param hzdesgn Column name in `@horizons` slot of `object` containing horizon designations default: `NULL`
21+
#' @param method Options include "est_1cm" (default; 1cm estimates), "est_icm" (estimates over original layer boundaries), "est_dcm" (estimates over constant interval, specified with `d` argument to `mpspline3::mpspline()`). Default value for `d` is `c(0, 5, 15, 30, 60, 100, 200)`.
22+
#' @param ... Additional arguments to `mpspline2::mpspline()`
1923
#'
2024
#' @author Andrew G. Brown
2125
#'
22-
#' @return A SoilProfileCollection with 1cm slices. Spline variables are in columns prefixed with "spline_" and RMSE/RMSE_IQR are in columns prefixed with "rmse_". If any profiles were removed from the collection, their profile IDs are stored in attr(result, 'removed').
26+
#' @return A SoilProfileCollection with 1cm slices. Spline variables are in columns prefixed with "spline_" and RMSE/RMSE_IQR are in columns prefixed with "rmse_". If any profiles were removed from the collection, their profile IDs are stored in `attr(result, 'removed')`.
2327
#'
2428
#' @export spc2mpspline,SoilProfileCollection-method
2529
#' @aliases spc2mpspline
26-
#'
30+
#' @references T.F.A. Bishop, A.B. McBratney, G.M. Laslett (1999) Modelling soil attribute depth functions with equal-area quadratic smoothing splines. Geoderma 91(1–2), pp. 27-45. \doi{https://doi.org/10.1016/S0016-7061(99)00003-8}
31+
#'
32+
#' O'Brien, Lauren (2022). mpspline2: Mass-Preserving Spline Functions for Soil Data. R package version 0.1.6. \url{https://cran.r-project.org/package=mpspline2}
33+
#'
2734
#' @examples
2835
#'
2936
#' data(sp1)
@@ -34,116 +41,147 @@
3441
#' plotSPC(res[1:5,], color = "prop_spline", divide.hz = FALSE)
3542
#'
3643
setMethod("spc2mpspline", signature(object = "SoilProfileCollection"),
37-
function(object, var_name = NULL,
38-
pattern = "R|Cr|Cd|qm",
39-
hzdesgn = guessHzDesgnName(object),
40-
...) {
41-
42-
if (!requireNamespace('mpspline2'))
43-
stop("package `mpspline2` is required", call. = FALSE)
44-
45-
if (is.null(var_name) | !(var_name %in% horizonNames(object)))
46-
stop("argument `var_name` must specify a single horizon-level variable", call. = FALSE)
47-
48-
if (!hzdesgn %in% horizonNames(object)) {
49-
hzdesgn <- guessHzDesgnName(object, required = TRUE)
50-
}
44+
function(object,
45+
var_name = NULL,
46+
method = c("est_1cm", "est_icm", "est_dcm"),
47+
pattern = "R|Cr|Cd|qm",
48+
hzdesgn = NULL,
49+
...) {
50+
.NHZ <- NULL
51+
.LAST <- NULL
52+
.HZID <- NULL
5153

52-
hztop <- horizonDepths(object)[1]
53-
hzbot <- horizonDepths(object)[2]
54-
55-
# glom to "available interval" in each profile
56-
# NOTE: we will handle warnings (profiles with no data at all) at end
57-
spc.sub <- suppressWarnings(glomApply(object, function(p) {
58-
i <- which(diff(c(0, cumsum(!is.na(p[[var_name]])))) == 1)
59-
h <- horizons(p)
60-
# basically this excludes NA values at top and bottom of profile
61-
# (O horizons, bedrock) but wont check missing values inbetween
62-
# need at least two horizons to make a spline
63-
if (length(i) < 2)
64-
return(c(0, 0))
65-
top_depth <- h[[hztop]][i[1]]
66-
bot_depth <- h[[hzbot]][i[length(i)]]
67-
return(c(top_depth, bot_depth))
68-
}))
69-
70-
# debug : inspect horizon values for var_name
71-
#plot(spc.sub[1:10,], color=var_name)
72-
73-
# only take profiles that have 100% data coverage in above interval
74-
# i.e. if a single horizon is missing data, remove whole profile
75-
spc.sub$nona <- profileApply(spc.sub, function(p) any(is.na(p[[var_name]])))
76-
spc.sub <- spc.sub[which(!spc.sub$nona),]
77-
78-
# calculate the deepest top depth and shallowest bottom depth
79-
mindepth <- max(profileApply(spc.sub, function(p) p[,1][[hztop]]))
80-
maxdepth <- min(profileApply(spc.sub, estimateSoilDepth, p = pattern, name = hzdesgn))
81-
82-
# we will only make interpolations that the "whole SPC supports"
83-
# the thought is that these 1cm slices will be further aggregated downstream
84-
spc.sub <- glomApply(spc.sub, function(p) c(mindepth, maxdepth), truncate = TRUE)
85-
86-
# do the splines
87-
res <- mpspline2::mpspline(horizons(spc.sub)[c(idname(spc.sub),
88-
horizonDepths(spc.sub),
89-
var_name)],
90-
var_name = var_name, ...)
91-
92-
# concatenate results for re-insertion
93-
res2 <- do.call('c', lapply(profile_id(spc.sub), function(pid) {
94-
drange <- mindepth:maxdepth
95-
zero.idx <- drange == 0
96-
if (any(zero.idx))
97-
drange <- drange[-which(zero.idx)]
98-
return(res[[pid]]$est_1cm[drange])
99-
# this returns the 1cm estimate which conforms with sliced spc
100-
#
101-
# debug: prove that mass is preserved in output by returning block estimates
102-
# return(res[[pid]]$est_icm)
103-
}))
104-
105-
# get the RMSE
106-
reserr <- do.call('c', lapply(profile_id(spc.sub), function(pid) {
107-
return(res[[pid]]$est_err)
108-
}))
109-
110-
# make 1:1 with site
111-
reserr_iqr <- reserr[names(reserr) == "RMSE_IQR"]
112-
reserr <- reserr[names(reserr) == "RMSE"]
113-
114-
# inspect
115-
#reserr_iqr
116-
#reserr
117-
118-
# single horizon results cannot be splined, filter those out
119-
spc.sub <- subApply(spc.sub, function(p) nrow(p) > 1)
120-
121-
# adjustment for aqp::slice index logic versus glom interval logic
122-
if (mindepth == 0) {
123-
maxdepth <- maxdepth - 1
124-
}
125-
126-
# create slices 1cm thick to insert spline result
127-
spc.spl <- aqp::slice(spc.sub, formula(sprintf("%s:%s ~ %s",
128-
mindepth, maxdepth,
129-
var_name)))
130-
131-
# create new "spline_"+var_name variable
132-
spc.spl[[paste0(var_name,"_spline")]] <- res2
133-
134-
# create new "rmse_"+var_name as site level attributes
135-
spc.spl[[paste0(var_name,"_rmse")]] <- reserr
136-
spc.spl[[paste0(var_name,"_rmse_iqr")]] <- reserr_iqr
137-
138-
# determine what profiles were removed
139-
removed <- profile_id(object)[!profile_id(object) %in% profile_id(spc.spl)]
140-
141-
# add an attribute with removed profile IDs. there are three steps
142-
# that possibly remove data:
143-
# - profiles removed by glomApply have no var_name data at all.
144-
# - 100% coverage filtering step -- conservative filter to keep from making splines from bad data
145-
# - mpspline itself will remove profiles with e.g. just one horizon
146-
attr(spc.spl, "removed") <- unique(removed)
54+
if (!requireNamespace('mpspline2'))
55+
stop("package `mpspline2` is required", call. = FALSE)
56+
57+
if (is.null(var_name) | !(var_name %in% horizonNames(object)))
58+
stop("argument `var_name` must specify a single horizon-level variable", call. = FALSE)
14759

148-
return(spc.spl)
149-
})
60+
method <- match.arg(method[1], c("est_1cm", "est_icm", "est_dcm"))
61+
62+
hztop <- horizonDepths(object)[1]
63+
hzbot <- horizonDepths(object)[2]
64+
65+
# glom to "available interval" in each profile
66+
spc.sub <- glom(object, object[[hztop]][object[, 1, .HZID]],
67+
object[[hzbot]][object[, , .LAST, .HZID]])
68+
69+
# remove any horizons that have 0 or NA thickness (no mass)
70+
.sameTopBottom <- NULL
71+
72+
# handle any gaps at the surface (e.g. truncated data)
73+
spc.sub$.sameTopBottom <- spc.sub[[hztop]] == spc.sub[[hzbot]]
74+
spc.sub$.sameTopBottom <- spc.sub$.sameTopBottom | is.na(spc.sub$.sameTopBottom)
75+
spc.sub <- subsetHz(spc.sub, !.sameTopBottom)
76+
77+
spc.sub$.mindepth_orig <- spc.sub[, 1][[hztop]]
78+
79+
# handle any gaps at the surface (e.g. truncated data)
80+
spc.sub <- fillHzGaps(spc.sub, to_top = 0, to_bottom = NULL)
81+
82+
# calculate the top depth and bottom depth for each profile
83+
spc.sub$.mindepth <- spc.sub[, 1][[hztop]]
84+
85+
# optionally constrained by some pattern matching
86+
if (!missing(hzdesgn) && !is.null(hzdesgn)) {
87+
hzpatdep <- minDepthOf(
88+
spc.sub,
89+
pattern = pattern,
90+
hzdesgn = hzdesgn,
91+
no.contact.assigned = Inf
92+
)[[hztop]]
93+
} else {
94+
hzpatdep <- rep(Inf, length(spc.sub))
95+
}
96+
97+
# either the bottom depth of last horizon or the matched pattern top depth
98+
spc.sub$.maxdepth <- pmin(hzpatdep, spc.sub[, , .LAST][[hzbot]], na.rm = TRUE)
99+
100+
# truncate using vectors of top and bottom
101+
spc.sub <- trunc(spc.sub, spc.sub$.mindepth, spc.sub$.maxdepth)
102+
103+
# do the splines
104+
res <- mpspline2::mpspline(horizons(spc.sub)[c(idname(spc.sub),
105+
horizonDepths(spc.sub),
106+
var_name)],
107+
var_name = var_name, ...)
108+
109+
# concatenate results for re-insertion
110+
pid <- profile_id(spc.sub)
111+
res2 <- do.call('c', lapply(seq_along(pid), function(i) {
112+
switch(method,
113+
"est_1cm" = {
114+
drange <- spc.sub$.mindepth[i]:spc.sub$.maxdepth[i]
115+
zero.idx <- drange == 0
116+
if (any(zero.idx))
117+
drange <- drange[-which(zero.idx)]
118+
return(res[[pid[i]]]$est_1cm[drange])
119+
}, {
120+
return(res[[pid[i]]][[method]])
121+
}
122+
)
123+
# this returns the 1cm estimate which conforms with sliced spc
124+
#
125+
# debug: prove that mass is preserved in output by returning block estimates
126+
# return(res[[pid]]$est_icm)
127+
}))
128+
129+
# get the RMSE
130+
reserr <- do.call('c', lapply(profile_id(spc.sub), function(pid) {
131+
return(res[[pid]]$est_err)
132+
}))
133+
134+
# make 1:1 with site
135+
reserr_iqr <- reserr[names(reserr) == "RMSE_IQR"]
136+
reserr <- reserr[names(reserr) == "RMSE"]
137+
138+
# profiles removed (NA in all horizons)
139+
spc.sub <- spc.sub[profile_id(spc.sub) %in% names(res),]
140+
141+
# create slices 1cm thick to insert spline result
142+
switch(method,
143+
"est_1cm" = {
144+
spc.spl <- suppressMessages(aqp::dice(spc.sub))
145+
},
146+
"est_icm" = {
147+
spc.spl <- spc.sub
148+
},
149+
"est_dcm" = {
150+
.new_d_horizons <- function(x, ...) {
151+
if (is.null(list(...)$d)) {
152+
d <- c(0, 5, 15, 30, 60, 100, 200)
153+
} else {
154+
d <- list(...)$d
155+
}
156+
newhzd <- data.frame(
157+
id = profile_id(x),
158+
top = do.call('c', lapply(d[1:(length(d) - 1)], rep, length(x))),
159+
bottom = do.call('c', lapply(d[2:length(d)], rep, length(x)))
160+
)
161+
colnames(newhzd) <- c(idname(x), horizonDepths(x))
162+
depths(newhzd) <- colnames(newhzd)
163+
newhzd
164+
}
165+
166+
spc.spl <- .new_d_horizons(spc.sub, ...)
167+
})
168+
169+
# create new "spline_"+var_name variable
170+
spc.spl[[paste0(var_name, "_spline")]] <- res2
171+
172+
# create new "rmse_"+var_name as site level attributes
173+
spc.spl[[paste0(var_name, "_rmse")]] <- reserr
174+
spc.spl[[paste0(var_name, "_rmse_iqr")]] <- reserr_iqr
175+
176+
# determine what profiles were removed
177+
removed <- profile_id(object)[!profile_id(object) %in% profile_id(spc.spl)]
178+
179+
# add an attribute with removed profile IDs
180+
attr(spc.spl, "removed") <- unique(removed)
181+
182+
if (method != "est_dcm") {
183+
spc.spl <- trunc(spc.spl, spc.sub$.mindepth_orig, spc.sub$.maxdepth)
184+
}
185+
186+
return(spc.spl)
187+
})

man/spc2mpspline-SoilProfileCollection-method.Rd

Lines changed: 20 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)