Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,18 @@ export(accept)
export(accept1)
export(accept2)
export(accept3)
export(accept3_uk)
export(plotExacerbations)
export(plotHeatMap)
export(predictCountProb)
export(set_openai_api_key)
export(show_openai_api_key)
import(dplyr)
import(tidyselect)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(dplyr,starts_with)
importFrom(dplyr,tibble)
importFrom(hardhat,scream)
importFrom(reldist,wtd.quantile)
importFrom(splines,bs)
Expand Down
248 changes: 243 additions & 5 deletions R/predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -700,8 +700,8 @@ accept <- function(newdata, format="tibble", version = "accept3", prediction_int
if (length(invalid_rows) > 0) {
invalid_ids <- if ("ID" %in% colnames(newdata)) newdata$ID[invalid_rows] else invalid_rows
stop(paste0("LastYrSevExacCount exceeds LastYrExacCount for patient(s) with ID(s): ",
paste(invalid_ids, collapse = ", "),
". Severe exacerbation count cannot be greater than total exacerbation count."))
paste(invalid_ids, collapse = ", "),
". Severe exacerbation count cannot be greater than total exacerbation count."))
}
}

Expand Down Expand Up @@ -814,7 +814,7 @@ accept <- function(newdata, format="tibble", version = "accept3", prediction_int
# Add predictor columns if requested
if (return_predictors) {
result_df <- cbind(newdata, result_df[, c("predicted_exac_probability", "predicted_exac_rate",
"predicted_severe_exac_probability", "predicted_severe_exac_rate")])
"predicted_severe_exac_probability", "predicted_severe_exac_rate")])
}

return(as_tibble(result_df))
Expand Down Expand Up @@ -1050,8 +1050,8 @@ predictCountProb <- function (patientResults, n = 10, shortened = TRUE){
accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LABA, LAMA, LastYrExacCount, LastYrSevExacCount, FEV1, SGRQ = NA, oxygen, obs_modsev_risk) {
# Create base tibble
df <- tibble(country = country, ID = ID, age = age, male = male, BMI = BMI, smoker = smoker, statin = CVD,
ICS = ICS, LABA = LABA, LAMA = LAMA, LastYrExacCount = LastYrExacCount, LastYrSevExacCount = LastYrSevExacCount,
FEV1 = FEV1, oxygen = oxygen, obs_modsev_risk = obs_modsev_risk)
ICS = ICS, LABA = LABA, LAMA = LAMA, LastYrExacCount = LastYrExacCount, LastYrSevExacCount = LastYrSevExacCount,
FEV1 = FEV1, oxygen = oxygen, obs_modsev_risk = obs_modsev_risk)

# Add SGRQ or mMRC depending on what's available
if (!is.na(SGRQ)) {
Expand Down Expand Up @@ -1082,3 +1082,241 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA
}




#' Predicts COPD exacerbation risk for UK patients using ACCEPT 3.0-UK
#'
#' @description
#' ACCEPT 3.0-UK is a UK-specific recalibration of ACCEPT 2.0 derived from the
#' CPRD (Clinical Practice Research Datalink) primary-care dataset. It applies
#' two Cox-model recalibration parameters: an optimism-corrected baseline
#' hazard (H0) and slope (beta), separately for moderate-to-severe and severe
#' exacerbations.
#'
#' When optional predictors (LABA, oxygen, ICS, LAMA, statin/CVD, BMI, smoker)
#' are missing, a UK-specific sequential triangular regression imputation model
#' fills them in before prediction.
#'
#' @param patientData A tibble of patients in the same format as
#' \code{samplePatients}. Mandatory columns: \code{ID}, \code{age},
#' \code{male}, \code{FEV1}, \code{LastYrExacCount},
#' \code{LastYrSevExacCount}, and either \code{mMRC} or \code{SGRQ}.
#' Optional columns: \code{LABA}, \code{oxygen}, \code{ICS}, \code{LAMA},
#' \code{statin}, \code{BMI}, \code{smoker}.
#' @param return_predictors Logical. If \code{TRUE}, the input predictors are
#' returned alongside predictions. Default \code{FALSE}.
#'
#'@param quiet Logical. If \code{FALSE}, suppresses imputation messages. Default \code{TRUE}.
#'
#' @return A tibble with columns:
#' \itemize{
#' \item \code{predicted_exac_probability} - recalibrated moderate-to-severe risk
#' \item \code{predicted_exac_rate} - corresponding rate (-log(1-p))
#' \item \code{predicted_severe_exac_probability} - recalibrated severe risk
#' \item \code{predicted_severe_exac_rate} - corresponding rate
#' }
#' If \code{return_predictors = TRUE}, input columns are prepended.
#'
#' @details
#' ## Recalibration formula
#' For each outcome the recalibrated 1-year risk is:
#' \deqn{\hat{p}_{UK} = 1 - \exp\!\Bigl(-H_0 \cdot \exp(\beta \cdot \log(-\log(1-\hat{p}_{2})))\Bigr)}
#' where \eqn{\hat{p}_{2}} is the ACCEPT 2.0 predicted probability, and
#' \eqn{H_0} and \eqn{\beta} are the optimism-corrected parameters from the
#' CPRD Cox model (Table in manuscript):
#' \itemize{
#' \item Moderate-to-severe: \eqn{H_0 = 0.676}, \eqn{\beta = 0.986}
#' \item Severe: \eqn{H_0 = 0.482}, \eqn{\beta = 1.124}
#' }
Comment on lines +1126 to +1130
Copy link

Copilot AI Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The severe-outcome recalibration parameters are inconsistent within this function’s documentation/comments vs the actual constants used. The roxygen block lists severe H0 = 1.124 and beta = 0.482, while the implementation/comment block uses H0_sev <- 0.482 and beta_sev <- 1.124. Please verify the correct CPRD values and make the roxygen formula section, in-code comments, and constants agree.

Copilot uses AI. Check for mistakes.
#'
#' ## Optional-predictor imputation
#' Missing optional predictors are imputed sequentially (triangular matrix):
#' LABA -> oxygen -> ICS -> LAMA -> statin -> BMI -> smoker.
#' Each model uses mandatory predictors (age, male, mMRC, FEV1,
#' LastYrSevExacCount, LastYrExacCount) plus any previously imputed
#' optional predictors. Binary variables are imputed with logistic regression
#' (probability rounded to 0/1); BMI with linear regression.
#'
#' @examples
#' results <- accept3_uk(samplePatients)
#'
#' @seealso \code{\link{accept}}, \code{\link{accept2}}, \code{\link{accept3}}
#'
#' @importFrom dplyr tibble mutate select starts_with
#' @export
accept3_uk <- function(patientData,
Comment on lines +1145 to +1147
Copy link

Copilot AI Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The roxygen @export tag adds a new public function, but this PR doesn’t update the generated NAMESPACE (and corresponding .Rd in man/). As-is, accept3_uk() won’t be exported/accessible when installing from source. Regenerate and commit NAMESPACE/documentation via roxygen2 (or add them manually if roxygen isn’t part of the workflow).

Copilot uses AI. Check for mistakes.
return_predictors = FALSE,
quiet = FALSE) {

if (!tibble::is_tibble(patientData)) {
stop("patientData must be a tibble. Use as_tibble() to convert.")
}
Comment on lines +1147 to +1153
Copy link

Copilot AI Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This adds substantial new behavior (UK-specific sequential imputation + recalibration) but there are no tests covering accept3_uk() in tests/testthat/test-predict.R. Add tests for: (1) baseline output columns/shape, (2) expected predictions on fixed inputs, and (3) at least one missing-optional-predictor scenario to assert imputation + determinism.

Copilot uses AI. Check for mistakes.
if (any(patientData$LastYrSevExacCount > patientData$LastYrExacCount)) {
invalid_ids <- patientData$ID[patientData$LastYrSevExacCount > patientData$LastYrExacCount]
stop(paste0("LastYrSevExacCount exceeds LastYrExacCount for patient(s): ",
paste(invalid_ids, collapse = ", "),
". Severe count cannot exceed total count."))
}

# 1. Convert mMRC -> SGRQ if needed
if (!"SGRQ" %in% colnames(patientData)) {
if ("CAT" %in% colnames(patientData)) {
warning("SGRQ not found. Using CAT score instead.")
patientData$SGRQ <- 18.87 + 1.53 * patientData$CAT
} else if ("mMRC" %in% colnames(patientData)) {
message("SGRQ not found. Using mMRC score instead.")
patientData$SGRQ <- 20.43 + 14.77 * patientData$mMRC
} else {
stop("Either mMRC, SGRQ, or CAT must be provided. None were found in patientData.")
}
}

# 2. UK-specific optional-predictor imputation
# Intercepts and coefficients from Table A3 of the ACCEPT 3.0-UK manuscript.
# Column order in each coef vector:
# (Intercept), age, male, mMRC, FEV1, LastYrSevExacCount,
# LastYrExacCount [, LABA [, oxygen [, ICS [, LAMA [, statin [, BMI]]]]]]

uk_imp <- list(

LABA = list(
binary = TRUE,
coef = c(0.418, -0.013, 0.009, 0.438, -0.004, -0.063, 0.333)
),

oxygen = list(
binary = TRUE,
coef = c(-7.600, 0.006, -0.197, 0.942, -0.005, 0.188, 0.040,
0.556) # +LABA
),

ICS = list(
binary = TRUE,
coef = c(-1.430, -0.004, -0.157, 0.085, -0.002, -0.030, 0.168,
3.372, 0.409) # +LABA, oxygen
),

LAMA = list(
binary = TRUE,
coef = c(-0.586, -0.007, 0.089, 0.337, -0.003, 0.000, 0.153,
1.545, 0.203, -0.632) # +LABA, oxygen, ICS
),

statin = list(
binary = TRUE,
coef = c(-2.711, 0.015, 0.200, -0.004, 0.001, -0.032, 0.009,
0.180, -0.196, -0.259, 0.207) # +LABA, oxygen, ICS, LAMA
),

BMI = list(
binary = FALSE,
clamp_low = 10,
clamp_hi = 60,
coef = c(28.944, -0.074, 0.195, 0.421, 0.027, -0.527, 0.010,
0.451, -0.093, -0.051, -0.105, 1.629) # +LABA, oxygen, ICS, LAMA, statin
Comment on lines +1211 to +1216
Copy link

Copilot AI Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BMI imputation clamps to a maximum of 70, but the package documentation for samplePatients (and accept3 parameter docs) specifies BMI is expected in the 10–60 range. Clamping above the validated range may yield out-of-domain predictions; consider clamping to 60 (or explicitly documenting/justifying the higher cap if the UK recalibration supports it).

Copilot uses AI. Check for mistakes.
),

smoker = list(
binary = TRUE,
coef = c(5.449, -0.061, -0.099, 0.123, 0.000, 0.039, -0.017,
-0.056, -0.648, -0.246, 0.069, 0.031, -0.058) # all optional
)
)

# Sequential imputation order matches the triangular matrix
optional_order <- c("LABA", "oxygen", "ICS", "LAMA", "statin", "BMI", "smoker")

# Use mMRC if available, otherwise back-transform from SGRQ
if ("SGRQ" %in% colnames(patientData)) {
if (!"mMRC" %in% colnames(patientData)) {
patientData$mMRC <- (patientData$SGRQ - 20.43) / 14.77
} else {
na_idx <- is.na(patientData$mMRC)
if (any(na_idx)) {
patientData$mMRC[na_idx] <- (patientData$SGRQ[na_idx] - 20.43) / 14.77
}
}
}
Comment on lines +1229 to +1239
Copy link

Copilot AI Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mMRC is treated as a mandatory predictor for the imputation models, but the code only back-calculates mMRC when the column is entirely absent. If mMRC exists but has NA values (while SGRQ is present), those rows will keep NA in X, causing lp/imputations to become NA. Consider filling mMRC where it is missing using SGRQ when available (and/or explicitly error if mandatory predictors contain missing values).

Copilot uses AI. Check for mistakes.
mandatory_preds <- c("age", "male", "mMRC", "FEV1",
"LastYrSevExacCount", "LastYrExacCount")
imputed_vars <- character(0)

for (vname in optional_order) {
if (!vname %in% colnames(patientData) ||
any(is.na(patientData[[vname]]))) {

spec <- uk_imp[[vname]]
pred_cols <- c(mandatory_preds, imputed_vars)

X <- as.matrix(cbind(1, patientData[, pred_cols]))
X <- matrix(as.numeric(X), nrow = nrow(X), ncol = ncol(X))
lp <- as.vector(X %*% spec$coef)

if (spec$binary) {
pred_vals <- round(exp(lp) / (1 + exp(lp)))
} else {
pred_vals <- lp
if (!is.null(spec$clamp_low)) pred_vals <- pmax(pred_vals, spec$clamp_low)
if (!is.null(spec$clamp_hi)) pred_vals <- pmin(pred_vals, spec$clamp_hi)
}

if (!vname %in% colnames(patientData)) {
patientData[[vname]] <- pred_vals
if (quiet) message(paste0("accept3_uk: '", vname, "' not found - imputed using UK model."))
} else {
na_idx <- is.na(patientData[[vname]])
patientData[[vname]][na_idx] <- pred_vals[na_idx]
if (any(na_idx) && quiet)
message(paste0("accept3_uk: ", sum(na_idx), " missing value(s) in '",
vname, "' imputed using UK model."))
}
Comment on lines +1263 to +1272
Copy link

Copilot AI Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sequential imputation emits multiple message() calls (per missing column / per NA count), which can be noisy in batch prediction and hard to suppress selectively. Consider switching to a single aggregated warning()/message() at the end (or add a quiet/verbose flag consistent with the rest of the API) so callers can control console output more easily.

Copilot uses AI. Check for mistakes.
}

# Always track all optional variables for subsequent models
# regardless of whether they needed imputation
if (vname %in% colnames(patientData)) {
imputed_vars <- c(imputed_vars, vname)
}
}
# 3. ACCEPT 2.0 predictions
accept2_preds <- accept2(patientData = patientData)

p2_modsev <- accept2_preds$predicted_exac_probability
p2_sev <- accept2_preds$predicted_severe_exac_probability
Comment on lines +1281 to +1285
Copy link

Copilot AI Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unlike accept(), this function doesn’t validate LastYrSevExacCount <= LastYrExacCount. Since those fields directly influence both imputation and downstream accept2() predictions, add the same guard here (or call through the existing accept() validation path) to fail fast with a clear error for inconsistent history inputs.

Copilot uses AI. Check for mistakes.

# 4. UK recalibration
# Optimism-corrected parameters from CPRD bootstrap:
# Moderate-to-severe: Baseline Hazard H0 = 0.676 (95% CI 0.671-0.681)
# Slope beta = 0.986 (95% CI 0.977-0.995)
# Severe: Baseline Hazard H0 = 0.482 (95% CI 0.472-0.495)
# Slope beta = 1.124 (95% CI 1.107-1.142)

H0_modsev <- 0.676
beta_modsev <- 0.986

H0_sev <- 0.482
beta_sev <- 1.124

lp_modsev <- log(-log(1 - p2_modsev))
lp_sev <- log(-log(1 - p2_sev))

uk_modsev <- 1 - exp(-H0_modsev * exp(beta_modsev * lp_modsev))
uk_sev <- 1 - exp(-H0_sev * exp(beta_sev * lp_sev))

# 4. Assemble output
out <- dplyr::tibble(
predicted_exac_probability = round(uk_modsev, 4),
predicted_exac_rate = round(-log(1 - uk_modsev), 4),
predicted_severe_exac_probability = round(uk_sev, 4),
predicted_severe_exac_rate = round(-log(1 - uk_sev), 4)
)

if (return_predictors) {
out <- dplyr::bind_cols(patientData, out)
} else {
out$ID <- patientData$ID
out <- dplyr::select(out, "ID", dplyr::everything())
}

return(out)
}
69 changes: 69 additions & 0 deletions man/accept3_uk.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading