-
Notifications
You must be signed in to change notification settings - Fork 3
Add accept3_uk() for UK-specific CPRD recalibration #21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
eeaac6f
7a0164e
6ee4bf0
b3f3568
79a9b0b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -700,8 +700,8 @@ accept <- function(newdata, format="tibble", version = "accept3", prediction_int | |
| if (length(invalid_rows) > 0) { | ||
| invalid_ids <- if ("ID" %in% colnames(newdata)) newdata$ID[invalid_rows] else invalid_rows | ||
| stop(paste0("LastYrSevExacCount exceeds LastYrExacCount for patient(s) with ID(s): ", | ||
| paste(invalid_ids, collapse = ", "), | ||
| ". Severe exacerbation count cannot be greater than total exacerbation count.")) | ||
| paste(invalid_ids, collapse = ", "), | ||
| ". Severe exacerbation count cannot be greater than total exacerbation count.")) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -814,7 +814,7 @@ accept <- function(newdata, format="tibble", version = "accept3", prediction_int | |
| # Add predictor columns if requested | ||
| if (return_predictors) { | ||
| result_df <- cbind(newdata, result_df[, c("predicted_exac_probability", "predicted_exac_rate", | ||
| "predicted_severe_exac_probability", "predicted_severe_exac_rate")]) | ||
| "predicted_severe_exac_probability", "predicted_severe_exac_rate")]) | ||
| } | ||
|
|
||
| return(as_tibble(result_df)) | ||
|
|
@@ -1050,8 +1050,8 @@ predictCountProb <- function (patientResults, n = 10, shortened = TRUE){ | |
| accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LABA, LAMA, LastYrExacCount, LastYrSevExacCount, FEV1, SGRQ = NA, oxygen, obs_modsev_risk) { | ||
| # Create base tibble | ||
| df <- tibble(country = country, ID = ID, age = age, male = male, BMI = BMI, smoker = smoker, statin = CVD, | ||
| ICS = ICS, LABA = LABA, LAMA = LAMA, LastYrExacCount = LastYrExacCount, LastYrSevExacCount = LastYrSevExacCount, | ||
| FEV1 = FEV1, oxygen = oxygen, obs_modsev_risk = obs_modsev_risk) | ||
| ICS = ICS, LABA = LABA, LAMA = LAMA, LastYrExacCount = LastYrExacCount, LastYrSevExacCount = LastYrSevExacCount, | ||
| FEV1 = FEV1, oxygen = oxygen, obs_modsev_risk = obs_modsev_risk) | ||
|
|
||
| # Add SGRQ or mMRC depending on what's available | ||
| if (!is.na(SGRQ)) { | ||
|
|
@@ -1082,3 +1082,241 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA | |
| } | ||
|
|
||
|
|
||
|
|
||
|
|
||
| #' Predicts COPD exacerbation risk for UK patients using ACCEPT 3.0-UK | ||
| #' | ||
| #' @description | ||
| #' ACCEPT 3.0-UK is a UK-specific recalibration of ACCEPT 2.0 derived from the | ||
| #' CPRD (Clinical Practice Research Datalink) primary-care dataset. It applies | ||
| #' two Cox-model recalibration parameters: an optimism-corrected baseline | ||
| #' hazard (H0) and slope (beta), separately for moderate-to-severe and severe | ||
| #' exacerbations. | ||
| #' | ||
| #' When optional predictors (LABA, oxygen, ICS, LAMA, statin/CVD, BMI, smoker) | ||
| #' are missing, a UK-specific sequential triangular regression imputation model | ||
| #' fills them in before prediction. | ||
| #' | ||
| #' @param patientData A tibble of patients in the same format as | ||
| #' \code{samplePatients}. Mandatory columns: \code{ID}, \code{age}, | ||
| #' \code{male}, \code{FEV1}, \code{LastYrExacCount}, | ||
| #' \code{LastYrSevExacCount}, and either \code{mMRC} or \code{SGRQ}. | ||
| #' Optional columns: \code{LABA}, \code{oxygen}, \code{ICS}, \code{LAMA}, | ||
| #' \code{statin}, \code{BMI}, \code{smoker}. | ||
| #' @param return_predictors Logical. If \code{TRUE}, the input predictors are | ||
| #' returned alongside predictions. Default \code{FALSE}. | ||
| #' | ||
| #'@param quiet Logical. If \code{FALSE}, suppresses imputation messages. Default \code{TRUE}. | ||
| #' | ||
| #' @return A tibble with columns: | ||
| #' \itemize{ | ||
| #' \item \code{predicted_exac_probability} - recalibrated moderate-to-severe risk | ||
| #' \item \code{predicted_exac_rate} - corresponding rate (-log(1-p)) | ||
| #' \item \code{predicted_severe_exac_probability} - recalibrated severe risk | ||
| #' \item \code{predicted_severe_exac_rate} - corresponding rate | ||
| #' } | ||
| #' If \code{return_predictors = TRUE}, input columns are prepended. | ||
| #' | ||
| #' @details | ||
| #' ## Recalibration formula | ||
| #' For each outcome the recalibrated 1-year risk is: | ||
| #' \deqn{\hat{p}_{UK} = 1 - \exp\!\Bigl(-H_0 \cdot \exp(\beta \cdot \log(-\log(1-\hat{p}_{2})))\Bigr)} | ||
| #' where \eqn{\hat{p}_{2}} is the ACCEPT 2.0 predicted probability, and | ||
| #' \eqn{H_0} and \eqn{\beta} are the optimism-corrected parameters from the | ||
| #' CPRD Cox model (Table in manuscript): | ||
| #' \itemize{ | ||
| #' \item Moderate-to-severe: \eqn{H_0 = 0.676}, \eqn{\beta = 0.986} | ||
| #' \item Severe: \eqn{H_0 = 0.482}, \eqn{\beta = 1.124} | ||
| #' } | ||
| #' | ||
| #' ## Optional-predictor imputation | ||
| #' Missing optional predictors are imputed sequentially (triangular matrix): | ||
| #' LABA -> oxygen -> ICS -> LAMA -> statin -> BMI -> smoker. | ||
| #' Each model uses mandatory predictors (age, male, mMRC, FEV1, | ||
| #' LastYrSevExacCount, LastYrExacCount) plus any previously imputed | ||
| #' optional predictors. Binary variables are imputed with logistic regression | ||
| #' (probability rounded to 0/1); BMI with linear regression. | ||
| #' | ||
| #' @examples | ||
| #' results <- accept3_uk(samplePatients) | ||
| #' | ||
| #' @seealso \code{\link{accept}}, \code{\link{accept2}}, \code{\link{accept3}} | ||
| #' | ||
| #' @importFrom dplyr tibble mutate select starts_with | ||
| #' @export | ||
| accept3_uk <- function(patientData, | ||
|
Comment on lines
+1145
to
+1147
|
||
| return_predictors = FALSE, | ||
| quiet = FALSE) { | ||
|
|
||
| if (!tibble::is_tibble(patientData)) { | ||
| stop("patientData must be a tibble. Use as_tibble() to convert.") | ||
| } | ||
|
Comment on lines
+1147
to
+1153
|
||
| if (any(patientData$LastYrSevExacCount > patientData$LastYrExacCount)) { | ||
| invalid_ids <- patientData$ID[patientData$LastYrSevExacCount > patientData$LastYrExacCount] | ||
| stop(paste0("LastYrSevExacCount exceeds LastYrExacCount for patient(s): ", | ||
| paste(invalid_ids, collapse = ", "), | ||
| ". Severe count cannot exceed total count.")) | ||
| } | ||
|
|
||
| # 1. Convert mMRC -> SGRQ if needed | ||
| if (!"SGRQ" %in% colnames(patientData)) { | ||
| if ("CAT" %in% colnames(patientData)) { | ||
| warning("SGRQ not found. Using CAT score instead.") | ||
| patientData$SGRQ <- 18.87 + 1.53 * patientData$CAT | ||
| } else if ("mMRC" %in% colnames(patientData)) { | ||
| message("SGRQ not found. Using mMRC score instead.") | ||
| patientData$SGRQ <- 20.43 + 14.77 * patientData$mMRC | ||
| } else { | ||
| stop("Either mMRC, SGRQ, or CAT must be provided. None were found in patientData.") | ||
| } | ||
| } | ||
|
|
||
| # 2. UK-specific optional-predictor imputation | ||
| # Intercepts and coefficients from Table A3 of the ACCEPT 3.0-UK manuscript. | ||
| # Column order in each coef vector: | ||
| # (Intercept), age, male, mMRC, FEV1, LastYrSevExacCount, | ||
| # LastYrExacCount [, LABA [, oxygen [, ICS [, LAMA [, statin [, BMI]]]]]] | ||
|
|
||
| uk_imp <- list( | ||
|
|
||
| LABA = list( | ||
| binary = TRUE, | ||
| coef = c(0.418, -0.013, 0.009, 0.438, -0.004, -0.063, 0.333) | ||
| ), | ||
|
|
||
| oxygen = list( | ||
| binary = TRUE, | ||
| coef = c(-7.600, 0.006, -0.197, 0.942, -0.005, 0.188, 0.040, | ||
| 0.556) # +LABA | ||
| ), | ||
|
|
||
| ICS = list( | ||
| binary = TRUE, | ||
| coef = c(-1.430, -0.004, -0.157, 0.085, -0.002, -0.030, 0.168, | ||
| 3.372, 0.409) # +LABA, oxygen | ||
| ), | ||
|
|
||
| LAMA = list( | ||
| binary = TRUE, | ||
| coef = c(-0.586, -0.007, 0.089, 0.337, -0.003, 0.000, 0.153, | ||
| 1.545, 0.203, -0.632) # +LABA, oxygen, ICS | ||
| ), | ||
|
|
||
| statin = list( | ||
| binary = TRUE, | ||
| coef = c(-2.711, 0.015, 0.200, -0.004, 0.001, -0.032, 0.009, | ||
| 0.180, -0.196, -0.259, 0.207) # +LABA, oxygen, ICS, LAMA | ||
| ), | ||
|
|
||
| BMI = list( | ||
| binary = FALSE, | ||
| clamp_low = 10, | ||
| clamp_hi = 60, | ||
| coef = c(28.944, -0.074, 0.195, 0.421, 0.027, -0.527, 0.010, | ||
| 0.451, -0.093, -0.051, -0.105, 1.629) # +LABA, oxygen, ICS, LAMA, statin | ||
|
Comment on lines
+1211
to
+1216
|
||
| ), | ||
|
|
||
| smoker = list( | ||
| binary = TRUE, | ||
| coef = c(5.449, -0.061, -0.099, 0.123, 0.000, 0.039, -0.017, | ||
| -0.056, -0.648, -0.246, 0.069, 0.031, -0.058) # all optional | ||
| ) | ||
| ) | ||
|
|
||
| # Sequential imputation order matches the triangular matrix | ||
| optional_order <- c("LABA", "oxygen", "ICS", "LAMA", "statin", "BMI", "smoker") | ||
|
|
||
| # Use mMRC if available, otherwise back-transform from SGRQ | ||
| if ("SGRQ" %in% colnames(patientData)) { | ||
| if (!"mMRC" %in% colnames(patientData)) { | ||
| patientData$mMRC <- (patientData$SGRQ - 20.43) / 14.77 | ||
| } else { | ||
| na_idx <- is.na(patientData$mMRC) | ||
| if (any(na_idx)) { | ||
| patientData$mMRC[na_idx] <- (patientData$SGRQ[na_idx] - 20.43) / 14.77 | ||
| } | ||
| } | ||
| } | ||
|
Comment on lines
+1229
to
+1239
|
||
| mandatory_preds <- c("age", "male", "mMRC", "FEV1", | ||
| "LastYrSevExacCount", "LastYrExacCount") | ||
| imputed_vars <- character(0) | ||
|
|
||
| for (vname in optional_order) { | ||
| if (!vname %in% colnames(patientData) || | ||
| any(is.na(patientData[[vname]]))) { | ||
|
|
||
| spec <- uk_imp[[vname]] | ||
| pred_cols <- c(mandatory_preds, imputed_vars) | ||
|
|
||
| X <- as.matrix(cbind(1, patientData[, pred_cols])) | ||
| X <- matrix(as.numeric(X), nrow = nrow(X), ncol = ncol(X)) | ||
| lp <- as.vector(X %*% spec$coef) | ||
|
|
||
| if (spec$binary) { | ||
| pred_vals <- round(exp(lp) / (1 + exp(lp))) | ||
| } else { | ||
| pred_vals <- lp | ||
| if (!is.null(spec$clamp_low)) pred_vals <- pmax(pred_vals, spec$clamp_low) | ||
| if (!is.null(spec$clamp_hi)) pred_vals <- pmin(pred_vals, spec$clamp_hi) | ||
| } | ||
|
|
||
| if (!vname %in% colnames(patientData)) { | ||
| patientData[[vname]] <- pred_vals | ||
| if (quiet) message(paste0("accept3_uk: '", vname, "' not found - imputed using UK model.")) | ||
| } else { | ||
| na_idx <- is.na(patientData[[vname]]) | ||
| patientData[[vname]][na_idx] <- pred_vals[na_idx] | ||
| if (any(na_idx) && quiet) | ||
| message(paste0("accept3_uk: ", sum(na_idx), " missing value(s) in '", | ||
| vname, "' imputed using UK model.")) | ||
| } | ||
|
Comment on lines
+1263
to
+1272
|
||
| } | ||
|
|
||
| # Always track all optional variables for subsequent models | ||
| # regardless of whether they needed imputation | ||
| if (vname %in% colnames(patientData)) { | ||
| imputed_vars <- c(imputed_vars, vname) | ||
| } | ||
| } | ||
| # 3. ACCEPT 2.0 predictions | ||
| accept2_preds <- accept2(patientData = patientData) | ||
|
|
||
| p2_modsev <- accept2_preds$predicted_exac_probability | ||
| p2_sev <- accept2_preds$predicted_severe_exac_probability | ||
|
Comment on lines
+1281
to
+1285
|
||
|
|
||
| # 4. UK recalibration | ||
| # Optimism-corrected parameters from CPRD bootstrap: | ||
| # Moderate-to-severe: Baseline Hazard H0 = 0.676 (95% CI 0.671-0.681) | ||
| # Slope beta = 0.986 (95% CI 0.977-0.995) | ||
| # Severe: Baseline Hazard H0 = 0.482 (95% CI 0.472-0.495) | ||
| # Slope beta = 1.124 (95% CI 1.107-1.142) | ||
|
|
||
| H0_modsev <- 0.676 | ||
| beta_modsev <- 0.986 | ||
|
|
||
| H0_sev <- 0.482 | ||
| beta_sev <- 1.124 | ||
|
|
||
| lp_modsev <- log(-log(1 - p2_modsev)) | ||
| lp_sev <- log(-log(1 - p2_sev)) | ||
|
|
||
| uk_modsev <- 1 - exp(-H0_modsev * exp(beta_modsev * lp_modsev)) | ||
| uk_sev <- 1 - exp(-H0_sev * exp(beta_sev * lp_sev)) | ||
|
|
||
| # 4. Assemble output | ||
| out <- dplyr::tibble( | ||
| predicted_exac_probability = round(uk_modsev, 4), | ||
| predicted_exac_rate = round(-log(1 - uk_modsev), 4), | ||
| predicted_severe_exac_probability = round(uk_sev, 4), | ||
| predicted_severe_exac_rate = round(-log(1 - uk_sev), 4) | ||
| ) | ||
|
|
||
| if (return_predictors) { | ||
| out <- dplyr::bind_cols(patientData, out) | ||
| } else { | ||
| out$ID <- patientData$ID | ||
| out <- dplyr::select(out, "ID", dplyr::everything()) | ||
| } | ||
|
|
||
| return(out) | ||
| } | ||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The severe-outcome recalibration parameters are inconsistent within this function’s documentation/comments vs the actual constants used. The roxygen block lists severe
H0 = 1.124andbeta = 0.482, while the implementation/comment block usesH0_sev <- 0.482andbeta_sev <- 1.124. Please verify the correct CPRD values and make the roxygen formula section, in-code comments, and constants agree.