From eeaac6f1b0b4d4f7e2776773aa217aa62f13eea0 Mon Sep 17 00:00:00 2001 From: jeenatm <141190076+jeenatm@users.noreply.github.com> Date: Tue, 7 Apr 2026 21:01:36 -0700 Subject: [PATCH 1/5] Add accept3_uk() for UK-specific CPRD recalibration --- R/predict.R | 238 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 233 insertions(+), 5 deletions(-) diff --git a/R/predict.R b/R/predict.R index b2888dd..e7e9954 100644 --- a/R/predict.R +++ b/R/predict.R @@ -700,8 +700,8 @@ accept <- function(newdata, format="tibble", version = "accept3", prediction_int if (length(invalid_rows) > 0) { invalid_ids <- if ("ID" %in% colnames(newdata)) newdata$ID[invalid_rows] else invalid_rows stop(paste0("LastYrSevExacCount exceeds LastYrExacCount for patient(s) with ID(s): ", - paste(invalid_ids, collapse = ", "), - ". Severe exacerbation count cannot be greater than total exacerbation count.")) + paste(invalid_ids, collapse = ", "), + ". Severe exacerbation count cannot be greater than total exacerbation count.")) } } @@ -814,7 +814,7 @@ accept <- function(newdata, format="tibble", version = "accept3", prediction_int # Add predictor columns if requested if (return_predictors) { result_df <- cbind(newdata, result_df[, c("predicted_exac_probability", "predicted_exac_rate", - "predicted_severe_exac_probability", "predicted_severe_exac_rate")]) + "predicted_severe_exac_probability", "predicted_severe_exac_rate")]) } return(as_tibble(result_df)) @@ -1050,8 +1050,8 @@ predictCountProb <- function (patientResults, n = 10, shortened = TRUE){ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LABA, LAMA, LastYrExacCount, LastYrSevExacCount, FEV1, SGRQ = NA, oxygen, obs_modsev_risk) { # Create base tibble df <- tibble(country = country, ID = ID, age = age, male = male, BMI = BMI, smoker = smoker, statin = CVD, - ICS = ICS, LABA = LABA, LAMA = LAMA, LastYrExacCount = LastYrExacCount, LastYrSevExacCount = LastYrSevExacCount, - FEV1 = FEV1, oxygen = oxygen, obs_modsev_risk = obs_modsev_risk) + ICS = ICS, LABA = LABA, LAMA = LAMA, LastYrExacCount = LastYrExacCount, LastYrSevExacCount = LastYrSevExacCount, + FEV1 = FEV1, oxygen = oxygen, obs_modsev_risk = obs_modsev_risk) # Add SGRQ or mMRC depending on what's available if (!is.na(SGRQ)) { @@ -1082,3 +1082,231 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA } + + +#' Predicts COPD exacerbation risk for UK patients using ACCEPT 3.0-UK +#' +#' @description +#' ACCEPT 3.0-UK is a UK-specific recalibration of ACCEPT 2.0 derived from the +#' CPRD (Clinical Practice Research Datalink) primary-care dataset. It applies +#' two Cox-model recalibration parameters: an optimism-corrected baseline +#' hazard (H0) and slope (beta), separately for moderate-to-severe and severe +#' exacerbations. +#' +#' When optional predictors (LABA, oxygen, ICS, LAMA, statin/CVD, BMI, smoker) +#' are missing, a UK-specific sequential triangular regression imputation model +#' fills them in before prediction. +#' +#' @param patientData A tibble of patients in the same format as +#' \code{samplePatients}. Mandatory columns: \code{ID}, \code{age}, +#' \code{male}, \code{FEV1}, \code{LastYrExacCount}, +#' \code{LastYrSevExacCount}, and either \code{mMRC} or \code{SGRQ}. +#' Optional columns: \code{LABA}, \code{oxygen}, \code{ICS}, \code{LAMA}, +#' \code{statin}, \code{BMI}, \code{smoker}. +#' @param prediction_interval Logical. If \code{TRUE}, also returns lower/upper +#' 80\% prediction intervals. Default \code{FALSE}. +#' @param return_predictors Logical. If \code{TRUE}, the input predictors are +#' returned alongside predictions. Default \code{FALSE}. +#' +#' @return A tibble with columns: +#' \itemize{ +#' \item \code{predicted_exac_probability} — recalibrated moderate-to-severe risk +#' \item \code{predicted_exac_rate} — corresponding rate (-log(1-p)) +#' \item \code{predicted_severe_exac_probability} — recalibrated severe risk +#' \item \code{predicted_severe_exac_rate} — corresponding rate +#' } +#' If \code{return_predictors = TRUE}, input columns are prepended. +#' +#' @details +#' ## Recalibration formula +#' For each outcome the recalibrated 1-year risk is: +#' \deqn{\hat{p}_{UK} = 1 - \exp\!\Bigl(-H_0 \cdot \exp(\beta \cdot \log(-\log(1-\hat{p}_{2})))\Bigr)} +#' where \eqn{\hat{p}_{2}} is the ACCEPT 2.0 predicted probability, and +#' \eqn{H_0} and \eqn{\beta} are the optimism-corrected parameters from the +#' CPRD Cox model (Table in manuscript): +#' \itemize{ +#' \item Moderate-to-severe: \eqn{H_0 = 0.676}, \eqn{\beta = 0.986} +#' \item Severe: \eqn{H_0 = 1.124}, \eqn{\beta = 0.482} +#' } +#' +#' ## Optional-predictor imputation +#' Missing optional predictors are imputed sequentially (triangular matrix): +#' LABA → oxygen → ICS → LAMA → statin → BMI → smoker. +#' Each model uses mandatory predictors (age, male, mMRC, FEV1, +#' LastYrSevExacCount, LastYrModExacCount) plus any previously imputed +#' optional predictors. Binary variables are imputed with logistic regression +#' (probability rounded to 0/1); BMI with linear regression. +#' +#' @examples +#' results <- accept3_uk(samplePatients) +#' +#' @seealso \code{\link{accept}}, \code{\link{accept2}}, \code{\link{accept3}} +#' +#' @importFrom dplyr tibble mutate select starts_with +#' @export +accept3_uk <- function(patientData, + prediction_interval = FALSE, + return_predictors = FALSE) { + + if (!tibble::is_tibble(patientData)) { + stop("patientData must be a tibble. Use as_tibble() to convert.") + } + + # ── 0. Convert mMRC → SGRQ if needed────────────────── + if (!"SGRQ" %in% colnames(patientData)) { + if ("CAT" %in% colnames(patientData)) { + warning("SGRQ not found. Using CAT score instead.") + patientData$SGRQ <- 18.87 + 1.53 * patientData$CAT + } else if ("mMRC" %in% colnames(patientData)) { + message("SGRQ not found. Using mMRC score instead.") + patientData$SGRQ <- 20.43 + 14.77 * patientData$mMRC + } + } + + # ── 1. UK-specific optional-predictor imputation ─────────────── + # Intercepts and coefficients from Table A3 of the ACCEPT 3.0-UK manuscript. + # Column order in each coef vector: + # (Intercept), age, male, mMRC, FEV1, LastYrSevExacCount, + # LastYrModExacCount [, LABA [, oxygen [, ICS [, LAMA [, statin [, BMI]]]]]] + + uk_imp <- list( + + LABA = list( + binary = TRUE, + coef = c(0.418, -0.013, 0.009, 0.438, -0.004, 0.270, 0.333) + ), + + oxygen = list( + binary = TRUE, + coef = c(-7.600, 0.006, -0.197, 0.942, -0.005, 0.228, 0.040, + 0.556) # +LABA + ), + + ICS = list( + binary = TRUE, + coef = c(-1.430, -0.004, -0.157, 0.085, -0.002, 0.138, 0.168, + 3.372, 0.409) # +LABA, oxygen + ), + + LAMA = list( + binary = TRUE, + coef = c(-0.586, -0.007, 0.089, 0.337, -0.003, 0.153, 0.153, + 1.545, 0.203, -0.632) # +LABA, oxygen, ICS + ), + + statin = list( + binary = TRUE, + coef = c(-2.711, 0.015, 0.200, -0.004, 0.001, -0.023, 0.009, + 0.180, -0.196, -0.259, 0.207) # +LABA, oxygen, ICS, LAMA + ), + + BMI = list( + binary = FALSE, + clamp_low = 10, + clamp_hi = 70, + coef = c(28.944, -0.074, 0.195, 0.421, 0.027, -0.517, 0.010, + 0.451, -0.093, -0.051, -0.105, 1.629) # +LABA, oxygen, ICS, LAMA, statin + ), + + smoker = list( + binary = TRUE, + coef = c(5.449, -0.061, -0.099, 0.123, 0.000, 0.022, -0.017, + -0.056, -0.648, -0.246, 0.069, 0.031, -0.058) # all optional + ) + ) + + # Sequential imputation order matches the triangular matrix + optional_order <- c("LABA", "oxygen", "ICS", "LAMA", "statin", "BMI", "smoker") + + # Rename LastYrModExacCount alias if only total count is present + if (!"LastYrModExacCount" %in% colnames(patientData) && + "LastYrExacCount" %in% colnames(patientData) && + "LastYrSevExacCount" %in% colnames(patientData)) { + patientData$LastYrModExacCount <- + patientData$LastYrExacCount - patientData$LastYrSevExacCount + } + + mandatory_preds <- c("age", "male", "mMRC", "FEV1", + "LastYrSevExacCount", "LastYrModExacCount") + + imputed_vars <- character(0) + + for (vname in optional_order) { + if (!vname %in% colnames(patientData) || + any(is.na(patientData[[vname]]))) { + + spec <- uk_imp[[vname]] + pred_cols <- c(mandatory_preds, imputed_vars) + + # mMRC proxy: use converted SGRQ back-transform if mMRC missing + if (!"mMRC" %in% colnames(patientData) && "SGRQ" %in% colnames(patientData)) { + patientData$mMRC <- (patientData$SGRQ - 20.43) / 14.77 + } + + X <- as.matrix(cbind(1, patientData[, pred_cols])) + lp <- as.vector(X %*% spec$coef) + + if (spec$binary) { + pred_vals <- round(exp(lp) / (1 + exp(lp))) + } else { + pred_vals <- lp + if (!is.null(spec$clamp_low)) pred_vals <- pmax(pred_vals, spec$clamp_low) + if (!is.null(spec$clamp_hi)) pred_vals <- pmin(pred_vals, spec$clamp_hi) + } + + if (!vname %in% colnames(patientData)) { + patientData[[vname]] <- pred_vals + message(paste0("accept3_uk: '", vname, "' not found — imputed using UK model.")) + } else { + na_idx <- is.na(patientData[[vname]]) + patientData[[vname]][na_idx] <- pred_vals[na_idx] + if (any(na_idx)) + message(paste0("accept3_uk: ", sum(na_idx), " missing value(s) in '", + vname, "' imputed using UK model.")) + } + imputed_vars <- c(imputed_vars, vname) + } + } + + # ── 2. Get ACCEPT 2.0 predictions ───────────────────────────────────────── + accept2_preds <- accept2(patientData = patientData) + + p2_modsev <- accept2_preds$predicted_exac_probability + p2_sev <- accept2_preds$predicted_severe_exac_probability + + # ── 3. Apply UK recalibration (Table caption / Figure 2) ────────────────── + # Optimism-corrected parameters from CPRD bootstrap (200 resamples): + # Moderate-to-severe: Baseline Hazard H0 = 0.676 (95% CI 0.671–0.681) + # Slope beta = 0.986 (95% CI 0.977–0.995) + # Severe: Baseline Hazard H0 = 1.124 (95% CI 1.107–1.142) + # Slope beta = 0.482 (95% CI 0.472–0.495) + + H0_modsev <- 0.676 + beta_modsev <- 0.986 + + H0_sev <- 0.482 + beta_sev <- 1.124 + + lp_modsev <- log(-log(1 - p2_modsev)) + lp_sev <- log(-log(1 - p2_sev)) + + uk_modsev <- 1 - exp(-H0_modsev * exp(beta_modsev * lp_modsev)) + uk_sev <- 1 - exp(-H0_sev * exp(beta_sev * lp_sev)) + + # ── 4. Assemble output ──────────────────────────────────────────────────── + out <- dplyr::tibble( + predicted_exac_probability = round(uk_modsev, 4), + predicted_exac_rate = round(-log(1 - uk_modsev), 4), + predicted_severe_exac_probability = round(uk_sev, 4), + predicted_severe_exac_rate = round(-log(1 - uk_sev), 4) + ) + + if (return_predictors) { + out <- dplyr::bind_cols(patientData, out) + } else { + out$ID <- patientData$ID + out <- dplyr::select(out, ID, dplyr::everything()) + } + + return(out) +} From 7a0164e9f930324b89fe9d4163b4ef0877ac96b5 Mon Sep 17 00:00:00 2001 From: jeenatm <141190076+jeenatm@users.noreply.github.com> Date: Tue, 7 Apr 2026 21:27:12 -0700 Subject: [PATCH 2/5] Fix non-ASCII characters and ID variable binding --- R/predict.R | 55 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/R/predict.R b/R/predict.R index e7e9954..9891af9 100644 --- a/R/predict.R +++ b/R/predict.R @@ -1110,10 +1110,10 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA #' #' @return A tibble with columns: #' \itemize{ -#' \item \code{predicted_exac_probability} — recalibrated moderate-to-severe risk -#' \item \code{predicted_exac_rate} — corresponding rate (-log(1-p)) -#' \item \code{predicted_severe_exac_probability} — recalibrated severe risk -#' \item \code{predicted_severe_exac_rate} — corresponding rate +#' \item \code{predicted_exac_probability} - recalibrated moderate-to-severe risk +#' \item \code{predicted_exac_rate} - corresponding rate (-log(1-p)) +#' \item \code{predicted_severe_exac_probability} - recalibrated severe risk +#' \item \code{predicted_severe_exac_rate} - corresponding rate #' } #' If \code{return_predictors = TRUE}, input columns are prepended. #' @@ -1131,7 +1131,7 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA #' #' ## Optional-predictor imputation #' Missing optional predictors are imputed sequentially (triangular matrix): -#' LABA → oxygen → ICS → LAMA → statin → BMI → smoker. +#' LABA -> oxygen -> ICS -> LAMA -> statin -> BMI -> smoker. #' Each model uses mandatory predictors (age, male, mMRC, FEV1, #' LastYrSevExacCount, LastYrModExacCount) plus any previously imputed #' optional predictors. Binary variables are imputed with logistic regression @@ -1152,7 +1152,7 @@ accept3_uk <- function(patientData, stop("patientData must be a tibble. Use as_tibble() to convert.") } - # ── 0. Convert mMRC → SGRQ if needed────────────────── + # -- 0. Convert mMRC -> SGRQ if needed------------------ if (!"SGRQ" %in% colnames(patientData)) { if ("CAT" %in% colnames(patientData)) { warning("SGRQ not found. Using CAT score instead.") @@ -1160,10 +1160,12 @@ accept3_uk <- function(patientData, } else if ("mMRC" %in% colnames(patientData)) { message("SGRQ not found. Using mMRC score instead.") patientData$SGRQ <- 20.43 + 14.77 * patientData$mMRC + } else { + stop("Either mMRC, SGRQ, or CAT must be provided. None were found in patientData.") } } - # ── 1. UK-specific optional-predictor imputation ─────────────── + # 1. UK-specific optional-predictor imputation # Intercepts and coefficients from Table A3 of the ACCEPT 3.0-UK manuscript. # Column order in each coef vector: # (Intercept), age, male, mMRC, FEV1, LastYrSevExacCount, @@ -1226,6 +1228,11 @@ accept3_uk <- function(patientData, patientData$LastYrExacCount - patientData$LastYrSevExacCount } + # Use mMRC if available, otherwise back-transform from SGRQ + if (!"mMRC" %in% colnames(patientData) && "SGRQ" %in% colnames(patientData)) { + patientData$mMRC <- (patientData$SGRQ - 20.43) / 14.77 + } + mandatory_preds <- c("age", "male", "mMRC", "FEV1", "LastYrSevExacCount", "LastYrModExacCount") @@ -1235,15 +1242,11 @@ accept3_uk <- function(patientData, if (!vname %in% colnames(patientData) || any(is.na(patientData[[vname]]))) { - spec <- uk_imp[[vname]] - pred_cols <- c(mandatory_preds, imputed_vars) - - # mMRC proxy: use converted SGRQ back-transform if mMRC missing - if (!"mMRC" %in% colnames(patientData) && "SGRQ" %in% colnames(patientData)) { - patientData$mMRC <- (patientData$SGRQ - 20.43) / 14.77 - } + spec <- uk_imp[[vname]] + pred_cols <- c(mandatory_preds, imputed_vars) X <- as.matrix(cbind(1, patientData[, pred_cols])) + X <- matrix(as.numeric(X), nrow = nrow(X), ncol = ncol(X)) lp <- as.vector(X %*% spec$coef) if (spec$binary) { @@ -1256,7 +1259,7 @@ accept3_uk <- function(patientData, if (!vname %in% colnames(patientData)) { patientData[[vname]] <- pred_vals - message(paste0("accept3_uk: '", vname, "' not found — imputed using UK model.")) + message(paste0("accept3_uk: '", vname, "' not found - imputed using UK model.")) } else { na_idx <- is.na(patientData[[vname]]) patientData[[vname]][na_idx] <- pred_vals[na_idx] @@ -1264,22 +1267,26 @@ accept3_uk <- function(patientData, message(paste0("accept3_uk: ", sum(na_idx), " missing value(s) in '", vname, "' imputed using UK model.")) } + } + + # Always track available optional variables for subsequent models + if (vname %in% colnames(patientData)) { imputed_vars <- c(imputed_vars, vname) } } - # ── 2. Get ACCEPT 2.0 predictions ───────────────────────────────────────── + # 2. ACCEPT 2.0 predictions accept2_preds <- accept2(patientData = patientData) p2_modsev <- accept2_preds$predicted_exac_probability p2_sev <- accept2_preds$predicted_severe_exac_probability - # ── 3. Apply UK recalibration (Table caption / Figure 2) ────────────────── - # Optimism-corrected parameters from CPRD bootstrap (200 resamples): - # Moderate-to-severe: Baseline Hazard H0 = 0.676 (95% CI 0.671–0.681) - # Slope beta = 0.986 (95% CI 0.977–0.995) - # Severe: Baseline Hazard H0 = 1.124 (95% CI 1.107–1.142) - # Slope beta = 0.482 (95% CI 0.472–0.495) + # 3. UK recalibration + # Optimism-corrected parameters from CPRD bootstrap: + # Moderate-to-severe: Baseline Hazard H0 = 0.676 (95% CI 0.671-0.681) + # Slope beta = 0.986 (95% CI 0.977-0.995) + # Severe: Baseline Hazard H0 = 1.124 (95% CI 1.107-1.142) + # Slope beta = 0.482 (95% CI 0.472-0.495) H0_modsev <- 0.676 beta_modsev <- 0.986 @@ -1293,7 +1300,7 @@ accept3_uk <- function(patientData, uk_modsev <- 1 - exp(-H0_modsev * exp(beta_modsev * lp_modsev)) uk_sev <- 1 - exp(-H0_sev * exp(beta_sev * lp_sev)) - # ── 4. Assemble output ──────────────────────────────────────────────────── + # 4. Assemble output out <- dplyr::tibble( predicted_exac_probability = round(uk_modsev, 4), predicted_exac_rate = round(-log(1 - uk_modsev), 4), @@ -1305,7 +1312,7 @@ accept3_uk <- function(patientData, out <- dplyr::bind_cols(patientData, out) } else { out$ID <- patientData$ID - out <- dplyr::select(out, ID, dplyr::everything()) + out <- dplyr::select(out, "ID", dplyr::everything()) } return(out) From 6ee4bf0e6e5733c7f04313ad22afe38d96a65cf9 Mon Sep 17 00:00:00 2001 From: jeenatm <141190076+jeenatm@users.noreply.github.com> Date: Tue, 7 Apr 2026 23:55:52 -0700 Subject: [PATCH 3/5] Fix imputation: use LastYrExacCount, move mMRC back-transform before loop, fix imputed_vars tracking --- R/predict.R | 49 ++++++++++++++++++++----------------------------- 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/R/predict.R b/R/predict.R index 9891af9..eb944db 100644 --- a/R/predict.R +++ b/R/predict.R @@ -1133,7 +1133,7 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA #' Missing optional predictors are imputed sequentially (triangular matrix): #' LABA -> oxygen -> ICS -> LAMA -> statin -> BMI -> smoker. #' Each model uses mandatory predictors (age, male, mMRC, FEV1, -#' LastYrSevExacCount, LastYrModExacCount) plus any previously imputed +#' LastYrSevExacCount, LastYrExacCount) plus any previously imputed #' optional predictors. Binary variables are imputed with logistic regression #' (probability rounded to 0/1); BMI with linear regression. #' @@ -1152,7 +1152,7 @@ accept3_uk <- function(patientData, stop("patientData must be a tibble. Use as_tibble() to convert.") } - # -- 0. Convert mMRC -> SGRQ if needed------------------ + # 1. Convert mMRC -> SGRQ if needed if (!"SGRQ" %in% colnames(patientData)) { if ("CAT" %in% colnames(patientData)) { warning("SGRQ not found. Using CAT score instead.") @@ -1165,40 +1165,40 @@ accept3_uk <- function(patientData, } } - # 1. UK-specific optional-predictor imputation + # 2. UK-specific optional-predictor imputation # Intercepts and coefficients from Table A3 of the ACCEPT 3.0-UK manuscript. # Column order in each coef vector: # (Intercept), age, male, mMRC, FEV1, LastYrSevExacCount, - # LastYrModExacCount [, LABA [, oxygen [, ICS [, LAMA [, statin [, BMI]]]]]] + # LastYrExacCount [, LABA [, oxygen [, ICS [, LAMA [, statin [, BMI]]]]]] uk_imp <- list( LABA = list( binary = TRUE, - coef = c(0.418, -0.013, 0.009, 0.438, -0.004, 0.270, 0.333) + coef = c(0.418, -0.013, 0.009, 0.438, -0.004, -0.063, 0.333) ), oxygen = list( binary = TRUE, - coef = c(-7.600, 0.006, -0.197, 0.942, -0.005, 0.228, 0.040, + coef = c(-7.600, 0.006, -0.197, 0.942, -0.005, 0.188, 0.040, 0.556) # +LABA ), ICS = list( binary = TRUE, - coef = c(-1.430, -0.004, -0.157, 0.085, -0.002, 0.138, 0.168, + coef = c(-1.430, -0.004, -0.157, 0.085, -0.002, -0.030, 0.168, 3.372, 0.409) # +LABA, oxygen ), LAMA = list( binary = TRUE, - coef = c(-0.586, -0.007, 0.089, 0.337, -0.003, 0.153, 0.153, + coef = c(-0.586, -0.007, 0.089, 0.337, -0.003, 0.000, 0.153, 1.545, 0.203, -0.632) # +LABA, oxygen, ICS ), statin = list( binary = TRUE, - coef = c(-2.711, 0.015, 0.200, -0.004, 0.001, -0.023, 0.009, + coef = c(-2.711, 0.015, 0.200, -0.004, 0.001, -0.032, 0.009, 0.180, -0.196, -0.259, 0.207) # +LABA, oxygen, ICS, LAMA ), @@ -1206,13 +1206,13 @@ accept3_uk <- function(patientData, binary = FALSE, clamp_low = 10, clamp_hi = 70, - coef = c(28.944, -0.074, 0.195, 0.421, 0.027, -0.517, 0.010, + coef = c(28.944, -0.074, 0.195, 0.421, 0.027, -0.527, 0.010, 0.451, -0.093, -0.051, -0.105, 1.629) # +LABA, oxygen, ICS, LAMA, statin ), smoker = list( binary = TRUE, - coef = c(5.449, -0.061, -0.099, 0.123, 0.000, 0.022, -0.017, + coef = c(5.449, -0.061, -0.099, 0.123, 0.000, 0.039, -0.017, -0.056, -0.648, -0.246, 0.069, 0.031, -0.058) # all optional ) ) @@ -1220,22 +1220,13 @@ accept3_uk <- function(patientData, # Sequential imputation order matches the triangular matrix optional_order <- c("LABA", "oxygen", "ICS", "LAMA", "statin", "BMI", "smoker") - # Rename LastYrModExacCount alias if only total count is present - if (!"LastYrModExacCount" %in% colnames(patientData) && - "LastYrExacCount" %in% colnames(patientData) && - "LastYrSevExacCount" %in% colnames(patientData)) { - patientData$LastYrModExacCount <- - patientData$LastYrExacCount - patientData$LastYrSevExacCount - } - # Use mMRC if available, otherwise back-transform from SGRQ if (!"mMRC" %in% colnames(patientData) && "SGRQ" %in% colnames(patientData)) { patientData$mMRC <- (patientData$SGRQ - 20.43) / 14.77 } mandatory_preds <- c("age", "male", "mMRC", "FEV1", - "LastYrSevExacCount", "LastYrModExacCount") - + "LastYrSevExacCount", "LastYrExacCount") imputed_vars <- character(0) for (vname in optional_order) { @@ -1245,8 +1236,8 @@ accept3_uk <- function(patientData, spec <- uk_imp[[vname]] pred_cols <- c(mandatory_preds, imputed_vars) - X <- as.matrix(cbind(1, patientData[, pred_cols])) - X <- matrix(as.numeric(X), nrow = nrow(X), ncol = ncol(X)) + X <- as.matrix(cbind(1, patientData[, pred_cols])) + X <- matrix(as.numeric(X), nrow = nrow(X), ncol = ncol(X)) lp <- as.vector(X %*% spec$coef) if (spec$binary) { @@ -1269,24 +1260,24 @@ accept3_uk <- function(patientData, } } - # Always track available optional variables for subsequent models + # Always track all optional variables for subsequent models + # regardless of whether they needed imputation if (vname %in% colnames(patientData)) { imputed_vars <- c(imputed_vars, vname) } } - - # 2. ACCEPT 2.0 predictions + # 3. ACCEPT 2.0 predictions accept2_preds <- accept2(patientData = patientData) p2_modsev <- accept2_preds$predicted_exac_probability p2_sev <- accept2_preds$predicted_severe_exac_probability - # 3. UK recalibration + # 4. UK recalibration # Optimism-corrected parameters from CPRD bootstrap: # Moderate-to-severe: Baseline Hazard H0 = 0.676 (95% CI 0.671-0.681) # Slope beta = 0.986 (95% CI 0.977-0.995) - # Severe: Baseline Hazard H0 = 1.124 (95% CI 1.107-1.142) - # Slope beta = 0.482 (95% CI 0.472-0.495) + # Severe: Baseline Hazard H0 = 0.482 (95% CI 0.472-0.495) + # Slope beta = 1.124 (95% CI 1.107-1.142) H0_modsev <- 0.676 beta_modsev <- 0.986 From b3f35683d5e5116e70f0fd93c3bf563501e4139b Mon Sep 17 00:00:00 2001 From: jeenatm <141190076+jeenatm@users.noreply.github.com> Date: Wed, 8 Apr 2026 11:28:47 -0700 Subject: [PATCH 4/5] Add tests, verbose flag, address all Copilot review comments --- NAMESPACE | 5 +++ R/predict.R | 34 +++++++++++------ man/accept3_uk.Rd | 69 +++++++++++++++++++++++++++++++++++ tests/testthat/test-predict.R | 56 ++++++++++++++++++++++++++-- 4 files changed, 149 insertions(+), 15 deletions(-) create mode 100644 man/accept3_uk.Rd diff --git a/NAMESPACE b/NAMESPACE index 2a45c64..e371f63 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(accept) export(accept1) export(accept2) export(accept3) +export(accept3_uk) export(plotExacerbations) export(plotHeatMap) export(predictCountProb) @@ -16,6 +17,10 @@ export(set_openai_api_key) export(show_openai_api_key) import(dplyr) import(tidyselect) +importFrom(dplyr,mutate) +importFrom(dplyr,select) +importFrom(dplyr,starts_with) +importFrom(dplyr,tibble) importFrom(hardhat,scream) importFrom(reldist,wtd.quantile) importFrom(splines,bs) diff --git a/R/predict.R b/R/predict.R index eb944db..11e5bd4 100644 --- a/R/predict.R +++ b/R/predict.R @@ -1103,11 +1103,11 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA #' \code{LastYrSevExacCount}, and either \code{mMRC} or \code{SGRQ}. #' Optional columns: \code{LABA}, \code{oxygen}, \code{ICS}, \code{LAMA}, #' \code{statin}, \code{BMI}, \code{smoker}. -#' @param prediction_interval Logical. If \code{TRUE}, also returns lower/upper -#' 80\% prediction intervals. Default \code{FALSE}. #' @param return_predictors Logical. If \code{TRUE}, the input predictors are #' returned alongside predictions. Default \code{FALSE}. #' +#'@param verbose Logical. If \code{FALSE}, suppresses imputation messages. Default \code{TRUE}. +#' #' @return A tibble with columns: #' \itemize{ #' \item \code{predicted_exac_probability} - recalibrated moderate-to-severe risk @@ -1126,7 +1126,7 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA #' CPRD Cox model (Table in manuscript): #' \itemize{ #' \item Moderate-to-severe: \eqn{H_0 = 0.676}, \eqn{\beta = 0.986} -#' \item Severe: \eqn{H_0 = 1.124}, \eqn{\beta = 0.482} +#' \item Severe: \eqn{H_0 = 0.482}, \eqn{\beta = 1.124} #' } #' #' ## Optional-predictor imputation @@ -1145,12 +1145,18 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA #' @importFrom dplyr tibble mutate select starts_with #' @export accept3_uk <- function(patientData, - prediction_interval = FALSE, - return_predictors = FALSE) { + return_predictors = FALSE, + verbose = TRUE) { if (!tibble::is_tibble(patientData)) { stop("patientData must be a tibble. Use as_tibble() to convert.") } + if (any(patientData$LastYrSevExacCount > patientData$LastYrExacCount)) { + invalid_ids <- patientData$ID[patientData$LastYrSevExacCount > patientData$LastYrExacCount] + stop(paste0("LastYrSevExacCount exceeds LastYrExacCount for patient(s): ", + paste(invalid_ids, collapse = ", "), + ". Severe count cannot exceed total count.")) + } # 1. Convert mMRC -> SGRQ if needed if (!"SGRQ" %in% colnames(patientData)) { @@ -1205,7 +1211,7 @@ accept3_uk <- function(patientData, BMI = list( binary = FALSE, clamp_low = 10, - clamp_hi = 70, + clamp_hi = 60, coef = c(28.944, -0.074, 0.195, 0.421, 0.027, -0.527, 0.010, 0.451, -0.093, -0.051, -0.105, 1.629) # +LABA, oxygen, ICS, LAMA, statin ), @@ -1221,10 +1227,16 @@ accept3_uk <- function(patientData, optional_order <- c("LABA", "oxygen", "ICS", "LAMA", "statin", "BMI", "smoker") # Use mMRC if available, otherwise back-transform from SGRQ - if (!"mMRC" %in% colnames(patientData) && "SGRQ" %in% colnames(patientData)) { - patientData$mMRC <- (patientData$SGRQ - 20.43) / 14.77 + if ("SGRQ" %in% colnames(patientData)) { + if (!"mMRC" %in% colnames(patientData)) { + patientData$mMRC <- (patientData$SGRQ - 20.43) / 14.77 + } else { + na_idx <- is.na(patientData$mMRC) + if (any(na_idx)) { + patientData$mMRC[na_idx] <- (patientData$SGRQ[na_idx] - 20.43) / 14.77 + } + } } - mandatory_preds <- c("age", "male", "mMRC", "FEV1", "LastYrSevExacCount", "LastYrExacCount") imputed_vars <- character(0) @@ -1250,11 +1262,11 @@ accept3_uk <- function(patientData, if (!vname %in% colnames(patientData)) { patientData[[vname]] <- pred_vals - message(paste0("accept3_uk: '", vname, "' not found - imputed using UK model.")) + if (verbose) message(paste0("accept3_uk: '", vname, "' not found - imputed using UK model.")) } else { na_idx <- is.na(patientData[[vname]]) patientData[[vname]][na_idx] <- pred_vals[na_idx] - if (any(na_idx)) + if (any(na_idx) && verbose) message(paste0("accept3_uk: ", sum(na_idx), " missing value(s) in '", vname, "' imputed using UK model.")) } diff --git a/man/accept3_uk.Rd b/man/accept3_uk.Rd new file mode 100644 index 0000000..f0bbaca --- /dev/null +++ b/man/accept3_uk.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/predict.R +\name{accept3_uk} +\alias{accept3_uk} +\title{Predicts COPD exacerbation risk for UK patients using ACCEPT 3.0-UK} +\usage{ +accept3_uk(patientData, return_predictors = FALSE) +} +\arguments{ +\item{patientData}{A tibble of patients in the same format as +\code{samplePatients}. Mandatory columns: \code{ID}, \code{age}, +\code{male}, \code{FEV1}, \code{LastYrExacCount}, +\code{LastYrSevExacCount}, and either \code{mMRC} or \code{SGRQ}. +Optional columns: \code{LABA}, \code{oxygen}, \code{ICS}, \code{LAMA}, +\code{statin}, \code{BMI}, \code{smoker}.} + +\item{return_predictors}{Logical. If \code{TRUE}, the input predictors are +returned alongside predictions. Default \code{FALSE}.} + +\item{verbose}{Logical. If \code{FALSE}, suppresses imputation messages. Default \code{TRUE}.} +} +\value{ +A tibble with columns: + \itemize{ + \item \code{predicted_exac_probability} - recalibrated moderate-to-severe risk + \item \code{predicted_exac_rate} - corresponding rate (-log(1-p)) + \item \code{predicted_severe_exac_probability} - recalibrated severe risk + \item \code{predicted_severe_exac_rate} - corresponding rate + } + If \code{return_predictors = TRUE}, input columns are prepended. +} +\description{ +ACCEPT 3.0-UK is a UK-specific recalibration of ACCEPT 2.0 derived from the +CPRD (Clinical Practice Research Datalink) primary-care dataset. It applies +two Cox-model recalibration parameters: an optimism-corrected baseline +hazard (H0) and slope (beta), separately for moderate-to-severe and severe +exacerbations. + +When optional predictors (LABA, oxygen, ICS, LAMA, statin/CVD, BMI, smoker) +are missing, a UK-specific sequential triangular regression imputation model +fills them in before prediction. +} +\details{ +## Recalibration formula +For each outcome the recalibrated 1-year risk is: +\deqn{\hat{p}_{UK} = 1 - \exp\!\Bigl(-H_0 \cdot \exp(\beta \cdot \log(-\log(1-\hat{p}_{2})))\Bigr)} +where \eqn{\hat{p}_{2}} is the ACCEPT 2.0 predicted probability, and +\eqn{H_0} and \eqn{\beta} are the optimism-corrected parameters from the +CPRD Cox model (Table in manuscript): +\itemize{ + \item Moderate-to-severe: \eqn{H_0 = 0.676}, \eqn{\beta = 0.986} + \item Severe: \eqn{H_0 = 0.482}, \eqn{\beta = 1.124} +} + +## Optional-predictor imputation +Missing optional predictors are imputed sequentially (triangular matrix): +LABA -> oxygen -> ICS -> LAMA -> statin -> BMI -> smoker. +Each model uses mandatory predictors (age, male, mMRC, FEV1, +LastYrSevExacCount, LastYrExacCount) plus any previously imputed +optional predictors. Binary variables are imputed with logistic regression +(probability rounded to 0/1); BMI with linear regression. +} +\examples{ +results <- accept3_uk(samplePatients) + +} +\seealso{ +\code{\link{accept}}, \code{\link{accept2}}, \code{\link{accept3}} +} diff --git a/tests/testthat/test-predict.R b/tests/testthat/test-predict.R index a487783..f369716 100644 --- a/tests/testthat/test-predict.R +++ b/tests/testthat/test-predict.R @@ -21,11 +21,11 @@ test_that("accept3 default prediction works", { expect_true("predicted_severe_exac_probability" %in% names(result)) expect_true("predicted_severe_exac_rate" %in% names(result)) expect_equal(nrow(result), 2) - + # Verify rate calculation: rate = -log(1-p) expected_rate <- -log(1 - result$predicted_exac_probability) expect_equal(result$predicted_exac_rate, expected_rate, tolerance = 1e-6) - + expected_sev_rate <- -log(1 - result$predicted_severe_exac_probability) expect_equal(result$predicted_severe_exac_rate, expected_sev_rate, tolerance = 1e-6) }) @@ -35,9 +35,9 @@ test_that("accept3 requires country parameter", { }) test_that("accept1 and accept2 warn when country parameter is provided", { - expect_warning(accept(samplePatients[1,], version="accept1", country="CAN"), + expect_warning(accept(samplePatients[1,], version="accept1", country="CAN"), "not used by accept1.*ignored") - expect_warning(accept(samplePatients[1,], version="accept2", country="USA"), + expect_warning(accept(samplePatients[1,], version="accept2", country="USA"), "not used by accept2.*ignored") }) @@ -372,3 +372,51 @@ test_that("Different SGRQ values produce different results (no conversion)", { # Different SGRQ values should produce different results expect_false(isTRUE(all.equal(result_45$predicted_exac_probability, result_50$predicted_exac_probability))) }) + + + +test_that("accept3_uk returns correct columns and shape", { + results <- accept3_uk(samplePatients) + expect_true(tibble::is_tibble(results)) + expect_equal(nrow(results), nrow(samplePatients)) + expect_true(all(c("ID", "predicted_exac_probability", + "predicted_exac_rate", + "predicted_severe_exac_probability", + "predicted_severe_exac_rate") %in% colnames(results))) +}) + +test_that("accept3_uk predictions are between 0 and 1", { + results <- accept3_uk(samplePatients) + expect_true(all(results$predicted_exac_probability > 0 & results$predicted_exac_probability < 1)) + expect_true(all(results$predicted_severe_exac_probability > 0 & results$predicted_severe_exac_probability < 1)) +}) + +test_that("accept3_uk gives expected predictions on fixed input", { + results <- accept3_uk(samplePatients) + expect_equal(results$predicted_exac_probability[1], 0.6986, tolerance = 0.001) + expect_equal(results$predicted_severe_exac_probability[1], 0.356, tolerance = 0.001) +}) + +test_that("accept3_uk imputes missing optional predictors", { + patients_no_optional <- samplePatients + patients_no_optional$LABA <- NULL + patients_no_optional$oxygen <- NULL + patients_no_optional$ICS <- NULL + patients_no_optional$LAMA <- NULL + patients_no_optional$statin <- NULL + patients_no_optional$BMI <- NULL + patients_no_optional$smoker <- NULL + results <- accept3_uk(patients_no_optional, verbose = FALSE) + expect_equal(nrow(results), nrow(samplePatients)) + expect_true(all(results$predicted_exac_probability > 0 & results$predicted_exac_probability < 1)) +}) + +test_that("accept3_uk errors on non-tibble input", { + expect_error(accept3_uk(as.data.frame(samplePatients))) +}) + +test_that("accept3_uk errors when severe count exceeds total", { + patients_invalid <- samplePatients + patients_invalid$LastYrSevExacCount[1] <- 99 + expect_error(accept3_uk(patients_invalid)) +}) From 79a9b0b20aa22a98292a4af585ca430389361e18 Mon Sep 17 00:00:00 2001 From: jeenatm <141190076+jeenatm@users.noreply.github.com> Date: Wed, 8 Apr 2026 18:26:28 -0700 Subject: [PATCH 5/5] Replace verbose with quiet flag, regenerate documentation --- R/predict.R | 8 ++++---- man/accept3_uk.Rd | 4 ++-- tests/testthat/test-predict.R | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/predict.R b/R/predict.R index 11e5bd4..258e349 100644 --- a/R/predict.R +++ b/R/predict.R @@ -1106,7 +1106,7 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA #' @param return_predictors Logical. If \code{TRUE}, the input predictors are #' returned alongside predictions. Default \code{FALSE}. #' -#'@param verbose Logical. If \code{FALSE}, suppresses imputation messages. Default \code{TRUE}. +#'@param quiet Logical. If \code{FALSE}, suppresses imputation messages. Default \code{TRUE}. #' #' @return A tibble with columns: #' \itemize{ @@ -1146,7 +1146,7 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA #' @export accept3_uk <- function(patientData, return_predictors = FALSE, - verbose = TRUE) { + quiet = FALSE) { if (!tibble::is_tibble(patientData)) { stop("patientData must be a tibble. Use as_tibble() to convert.") @@ -1262,11 +1262,11 @@ accept3_uk <- function(patientData, if (!vname %in% colnames(patientData)) { patientData[[vname]] <- pred_vals - if (verbose) message(paste0("accept3_uk: '", vname, "' not found - imputed using UK model.")) + if (quiet) message(paste0("accept3_uk: '", vname, "' not found - imputed using UK model.")) } else { na_idx <- is.na(patientData[[vname]]) patientData[[vname]][na_idx] <- pred_vals[na_idx] - if (any(na_idx) && verbose) + if (any(na_idx) && quiet) message(paste0("accept3_uk: ", sum(na_idx), " missing value(s) in '", vname, "' imputed using UK model.")) } diff --git a/man/accept3_uk.Rd b/man/accept3_uk.Rd index f0bbaca..25278c6 100644 --- a/man/accept3_uk.Rd +++ b/man/accept3_uk.Rd @@ -4,7 +4,7 @@ \alias{accept3_uk} \title{Predicts COPD exacerbation risk for UK patients using ACCEPT 3.0-UK} \usage{ -accept3_uk(patientData, return_predictors = FALSE) +accept3_uk(patientData, return_predictors = FALSE, quiet = FALSE) } \arguments{ \item{patientData}{A tibble of patients in the same format as @@ -17,7 +17,7 @@ Optional columns: \code{LABA}, \code{oxygen}, \code{ICS}, \code{LAMA}, \item{return_predictors}{Logical. If \code{TRUE}, the input predictors are returned alongside predictions. Default \code{FALSE}.} -\item{verbose}{Logical. If \code{FALSE}, suppresses imputation messages. Default \code{TRUE}.} +\item{quiet}{Logical. If \code{FALSE}, suppresses imputation messages. Default \code{TRUE}.} } \value{ A tibble with columns: diff --git a/tests/testthat/test-predict.R b/tests/testthat/test-predict.R index f369716..a7a9117 100644 --- a/tests/testthat/test-predict.R +++ b/tests/testthat/test-predict.R @@ -406,7 +406,7 @@ test_that("accept3_uk imputes missing optional predictors", { patients_no_optional$statin <- NULL patients_no_optional$BMI <- NULL patients_no_optional$smoker <- NULL - results <- accept3_uk(patients_no_optional, verbose = FALSE) + results <- accept3_uk(patients_no_optional, quiet = TRUE) expect_equal(nrow(results), nrow(samplePatients)) expect_true(all(results$predicted_exac_probability > 0 & results$predicted_exac_probability < 1)) })