From eeaac6f1b0b4d4f7e2776773aa217aa62f13eea0 Mon Sep 17 00:00:00 2001
From: jeenatm <141190076+jeenatm@users.noreply.github.com>
Date: Tue, 7 Apr 2026 21:01:36 -0700
Subject: [PATCH 1/5] Add accept3_uk() for UK-specific CPRD recalibration

---
 R/predict.R | 238 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 233 insertions(+), 5 deletions(-)

diff --git a/R/predict.R b/R/predict.R
index b2888dd..e7e9954 100644
--- a/R/predict.R
+++ b/R/predict.R
@@ -700,8 +700,8 @@ accept <- function(newdata, format="tibble", version = "accept3", prediction_int
     if (length(invalid_rows) > 0) {
       invalid_ids <- if ("ID" %in% colnames(newdata)) newdata$ID[invalid_rows] else invalid_rows
       stop(paste0("LastYrSevExacCount exceeds LastYrExacCount for patient(s) with ID(s): ",
-                   paste(invalid_ids, collapse = ", "),
-                   ". Severe exacerbation count cannot be greater than total exacerbation count."))
+                  paste(invalid_ids, collapse = ", "),
+                  ". Severe exacerbation count cannot be greater than total exacerbation count."))
     }
   }
 
@@ -814,7 +814,7 @@ accept <- function(newdata, format="tibble", version = "accept3", prediction_int
     # Add predictor columns if requested
     if (return_predictors) {
       result_df <- cbind(newdata, result_df[, c("predicted_exac_probability", "predicted_exac_rate",
-                                                  "predicted_severe_exac_probability", "predicted_severe_exac_rate")])
+                                                "predicted_severe_exac_probability", "predicted_severe_exac_rate")])
     }
 
     return(as_tibble(result_df))
@@ -1050,8 +1050,8 @@ predictCountProb <- function (patientResults, n = 10, shortened = TRUE){
 accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LABA, LAMA, LastYrExacCount,  LastYrSevExacCount, FEV1, SGRQ = NA, oxygen, obs_modsev_risk) {
   # Create base tibble
   df <- tibble(country = country, ID = ID, age = age, male = male, BMI = BMI, smoker = smoker, statin = CVD,
-                   ICS = ICS, LABA = LABA, LAMA = LAMA, LastYrExacCount = LastYrExacCount, LastYrSevExacCount = LastYrSevExacCount,
-                   FEV1 = FEV1, oxygen = oxygen, obs_modsev_risk = obs_modsev_risk)
+               ICS = ICS, LABA = LABA, LAMA = LAMA, LastYrExacCount = LastYrExacCount, LastYrSevExacCount = LastYrSevExacCount,
+               FEV1 = FEV1, oxygen = oxygen, obs_modsev_risk = obs_modsev_risk)
 
   # Add SGRQ or mMRC depending on what's available
   if (!is.na(SGRQ)) {
@@ -1082,3 +1082,231 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA
 }
 
 
+
+
+#' Predicts COPD exacerbation risk for UK patients using ACCEPT 3.0-UK
+#'
+#' @description
+#' ACCEPT 3.0-UK is a UK-specific recalibration of ACCEPT 2.0 derived from the
+#' CPRD (Clinical Practice Research Datalink) primary-care dataset. It applies
+#' two Cox-model recalibration parameters: an optimism-corrected baseline
+#' hazard (H0) and slope (beta), separately for moderate-to-severe and severe
+#' exacerbations.
+#'
+#' When optional predictors (LABA, oxygen, ICS, LAMA, statin/CVD, BMI, smoker)
+#' are missing, a UK-specific sequential triangular regression imputation model
+#' fills them in before prediction.
+#'
+#' @param patientData A tibble of patients in the same format as
+#'   \code{samplePatients}. Mandatory columns: \code{ID}, \code{age},
+#'   \code{male}, \code{FEV1}, \code{LastYrExacCount},
+#'   \code{LastYrSevExacCount}, and either \code{mMRC} or \code{SGRQ}.
+#'   Optional columns: \code{LABA}, \code{oxygen}, \code{ICS}, \code{LAMA},
+#'   \code{statin}, \code{BMI}, \code{smoker}.
+#' @param prediction_interval Logical. If \code{TRUE}, also returns lower/upper
+#'   80\% prediction intervals. Default \code{FALSE}.
+#' @param return_predictors Logical. If \code{TRUE}, the input predictors are
+#'   returned alongside predictions. Default \code{FALSE}.
+#'
+#' @return A tibble with columns:
+#'   \itemize{
+#'     \item \code{predicted_exac_probability}        — recalibrated moderate-to-severe risk
+#'     \item \code{predicted_exac_rate}               — corresponding rate (-log(1-p))
+#'     \item \code{predicted_severe_exac_probability} — recalibrated severe risk
+#'     \item \code{predicted_severe_exac_rate}        — corresponding rate
+#'   }
+#'   If \code{return_predictors = TRUE}, input columns are prepended.
+#'
+#' @details
+#' ## Recalibration formula
+#' For each outcome the recalibrated 1-year risk is:
+#' \deqn{\hat{p}_{UK} = 1 - \exp\!\Bigl(-H_0 \cdot \exp(\beta \cdot \log(-\log(1-\hat{p}_{2})))\Bigr)}
+#' where \eqn{\hat{p}_{2}} is the ACCEPT 2.0 predicted probability, and
+#' \eqn{H_0} and \eqn{\beta} are the optimism-corrected parameters from the
+#' CPRD Cox model (Table in manuscript):
+#' \itemize{
+#'   \item Moderate-to-severe: \eqn{H_0 = 0.676}, \eqn{\beta = 0.986}
+#'   \item Severe:             \eqn{H_0 = 1.124}, \eqn{\beta = 0.482}
+#' }
+#'
+#' ## Optional-predictor imputation
+#' Missing optional predictors are imputed sequentially (triangular matrix):
+#' LABA → oxygen → ICS → LAMA → statin → BMI → smoker.
+#' Each model uses mandatory predictors (age, male, mMRC, FEV1,
+#' LastYrSevExacCount, LastYrModExacCount) plus any previously imputed
+#' optional predictors. Binary variables are imputed with logistic regression
+#' (probability rounded to 0/1); BMI with linear regression.
+#'
+#' @examples
+#' results <- accept3_uk(samplePatients)
+#'
+#' @seealso \code{\link{accept}}, \code{\link{accept2}}, \code{\link{accept3}}
+#'
+#' @importFrom dplyr tibble mutate select starts_with
+#' @export
+accept3_uk <- function(patientData,
+                       prediction_interval = FALSE,
+                       return_predictors   = FALSE) {
+
+  if (!tibble::is_tibble(patientData)) {
+    stop("patientData must be a tibble. Use as_tibble() to convert.")
+  }
+
+  # ── 0. Convert mMRC → SGRQ if needed──────────────────
+  if (!"SGRQ" %in% colnames(patientData)) {
+    if ("CAT" %in% colnames(patientData)) {
+      warning("SGRQ not found. Using CAT score instead.")
+      patientData$SGRQ <- 18.87 + 1.53 * patientData$CAT
+    } else if ("mMRC" %in% colnames(patientData)) {
+      message("SGRQ not found. Using mMRC score instead.")
+      patientData$SGRQ <- 20.43 + 14.77 * patientData$mMRC
+    }
+  }
+
+  # ── 1. UK-specific optional-predictor imputation ───────────────
+  # Intercepts and coefficients from Table A3 of the ACCEPT 3.0-UK manuscript.
+  # Column order in each coef vector:
+  #   (Intercept), age, male, mMRC, FEV1, LastYrSevExacCount,
+  #   LastYrModExacCount [, LABA [, oxygen [, ICS [, LAMA [, statin [, BMI]]]]]]
+
+  uk_imp <- list(
+
+    LABA = list(
+      binary = TRUE,
+      coef   = c(0.418, -0.013, 0.009, 0.438, -0.004, 0.270, 0.333)
+    ),
+
+    oxygen = list(
+      binary = TRUE,
+      coef   = c(-7.600, 0.006, -0.197, 0.942, -0.005, 0.228, 0.040,
+                 0.556)                                     # +LABA
+    ),
+
+    ICS = list(
+      binary = TRUE,
+      coef   = c(-1.430, -0.004, -0.157, 0.085, -0.002, 0.138, 0.168,
+                 3.372, 0.409)                              # +LABA, oxygen
+    ),
+
+    LAMA = list(
+      binary = TRUE,
+      coef   = c(-0.586, -0.007, 0.089, 0.337, -0.003, 0.153, 0.153,
+                 1.545, 0.203, -0.632)                     # +LABA, oxygen, ICS
+    ),
+
+    statin = list(
+      binary = TRUE,
+      coef   = c(-2.711, 0.015, 0.200, -0.004, 0.001, -0.023, 0.009,
+                 0.180, -0.196, -0.259, 0.207)             # +LABA, oxygen, ICS, LAMA
+    ),
+
+    BMI = list(
+      binary    = FALSE,
+      clamp_low = 10,
+      clamp_hi  = 70,
+      coef      = c(28.944, -0.074, 0.195, 0.421, 0.027, -0.517, 0.010,
+                    0.451, -0.093, -0.051, -0.105, 1.629) # +LABA, oxygen, ICS, LAMA, statin
+    ),
+
+    smoker = list(
+      binary = TRUE,
+      coef   = c(5.449, -0.061, -0.099, 0.123, 0.000, 0.022, -0.017,
+                 -0.056, -0.648, -0.246, 0.069, 0.031, -0.058) # all optional
+    )
+  )
+
+  # Sequential imputation order matches the triangular matrix
+  optional_order <- c("LABA", "oxygen", "ICS", "LAMA", "statin", "BMI", "smoker")
+
+  # Rename LastYrModExacCount alias if only total count is present
+  if (!"LastYrModExacCount" %in% colnames(patientData) &&
+      "LastYrExacCount"    %in% colnames(patientData) &&
+      "LastYrSevExacCount" %in% colnames(patientData)) {
+    patientData$LastYrModExacCount <-
+      patientData$LastYrExacCount - patientData$LastYrSevExacCount
+  }
+
+  mandatory_preds <- c("age", "male", "mMRC", "FEV1",
+                       "LastYrSevExacCount", "LastYrModExacCount")
+
+  imputed_vars <- character(0)
+
+  for (vname in optional_order) {
+    if (!vname %in% colnames(patientData) ||
+        any(is.na(patientData[[vname]]))) {
+
+      spec        <- uk_imp[[vname]]
+      pred_cols   <- c(mandatory_preds, imputed_vars)
+
+      # mMRC proxy: use converted SGRQ back-transform if mMRC missing
+      if (!"mMRC" %in% colnames(patientData) && "SGRQ" %in% colnames(patientData)) {
+        patientData$mMRC <- (patientData$SGRQ - 20.43) / 14.77
+      }
+
+      X <- as.matrix(cbind(1, patientData[, pred_cols]))
+      lp <- as.vector(X %*% spec$coef)
+
+      if (spec$binary) {
+        pred_vals <- round(exp(lp) / (1 + exp(lp)))
+      } else {
+        pred_vals <- lp
+        if (!is.null(spec$clamp_low)) pred_vals <- pmax(pred_vals, spec$clamp_low)
+        if (!is.null(spec$clamp_hi))  pred_vals <- pmin(pred_vals, spec$clamp_hi)
+      }
+
+      if (!vname %in% colnames(patientData)) {
+        patientData[[vname]] <- pred_vals
+        message(paste0("accept3_uk: '", vname, "' not found — imputed using UK model."))
+      } else {
+        na_idx <- is.na(patientData[[vname]])
+        patientData[[vname]][na_idx] <- pred_vals[na_idx]
+        if (any(na_idx))
+          message(paste0("accept3_uk: ", sum(na_idx), " missing value(s) in '",
+                         vname, "' imputed using UK model."))
+      }
+      imputed_vars <- c(imputed_vars, vname)
+    }
+  }
+
+  # ── 2. Get ACCEPT 2.0 predictions ─────────────────────────────────────────
+  accept2_preds <- accept2(patientData = patientData)
+
+  p2_modsev <- accept2_preds$predicted_exac_probability
+  p2_sev    <- accept2_preds$predicted_severe_exac_probability
+
+  # ── 3. Apply UK recalibration (Table caption / Figure 2) ──────────────────
+  # Optimism-corrected parameters from CPRD bootstrap (200 resamples):
+  #   Moderate-to-severe: Baseline Hazard H0 = 0.676 (95% CI 0.671–0.681)
+  #                       Slope beta         = 0.986 (95% CI 0.977–0.995)
+  #   Severe:             Baseline Hazard H0 = 1.124 (95% CI 1.107–1.142)
+  #                       Slope beta         = 0.482 (95% CI 0.472–0.495)
+
+  H0_modsev   <- 0.676
+  beta_modsev <- 0.986
+
+  H0_sev      <- 0.482
+  beta_sev    <- 1.124
+
+  lp_modsev <- log(-log(1 - p2_modsev))
+  lp_sev    <- log(-log(1 - p2_sev))
+
+  uk_modsev <- 1 - exp(-H0_modsev * exp(beta_modsev * lp_modsev))
+  uk_sev    <- 1 - exp(-H0_sev    * exp(beta_sev    * lp_sev))
+
+  # ── 4. Assemble output ────────────────────────────────────────────────────
+  out <- dplyr::tibble(
+    predicted_exac_probability        = round(uk_modsev, 4),
+    predicted_exac_rate               = round(-log(1 - uk_modsev), 4),
+    predicted_severe_exac_probability = round(uk_sev, 4),
+    predicted_severe_exac_rate        = round(-log(1 - uk_sev), 4)
+  )
+
+  if (return_predictors) {
+    out <- dplyr::bind_cols(patientData, out)
+  } else {
+    out$ID <- patientData$ID
+    out    <- dplyr::select(out, ID, dplyr::everything())
+  }
+
+  return(out)
+}

From 7a0164e9f930324b89fe9d4163b4ef0877ac96b5 Mon Sep 17 00:00:00 2001
From: jeenatm <141190076+jeenatm@users.noreply.github.com>
Date: Tue, 7 Apr 2026 21:27:12 -0700
Subject: [PATCH 2/5] Fix non-ASCII characters and ID variable binding

---
 R/predict.R | 55 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/R/predict.R b/R/predict.R
index e7e9954..9891af9 100644
--- a/R/predict.R
+++ b/R/predict.R
@@ -1110,10 +1110,10 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA
 #'
 #' @return A tibble with columns:
 #'   \itemize{
-#'     \item \code{predicted_exac_probability}        — recalibrated moderate-to-severe risk
-#'     \item \code{predicted_exac_rate}               — corresponding rate (-log(1-p))
-#'     \item \code{predicted_severe_exac_probability} — recalibrated severe risk
-#'     \item \code{predicted_severe_exac_rate}        — corresponding rate
+#'     \item \code{predicted_exac_probability}        - recalibrated moderate-to-severe risk
+#'     \item \code{predicted_exac_rate}               - corresponding rate (-log(1-p))
+#'     \item \code{predicted_severe_exac_probability} - recalibrated severe risk
+#'     \item \code{predicted_severe_exac_rate}        - corresponding rate
 #'   }
 #'   If \code{return_predictors = TRUE}, input columns are prepended.
 #'
@@ -1131,7 +1131,7 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA
 #'
 #' ## Optional-predictor imputation
 #' Missing optional predictors are imputed sequentially (triangular matrix):
-#' LABA → oxygen → ICS → LAMA → statin → BMI → smoker.
+#' LABA -> oxygen -> ICS -> LAMA -> statin -> BMI -> smoker.
 #' Each model uses mandatory predictors (age, male, mMRC, FEV1,
 #' LastYrSevExacCount, LastYrModExacCount) plus any previously imputed
 #' optional predictors. Binary variables are imputed with logistic regression
@@ -1152,7 +1152,7 @@ accept3_uk <- function(patientData,
     stop("patientData must be a tibble. Use as_tibble() to convert.")
   }
 
-  # ── 0. Convert mMRC → SGRQ if needed──────────────────
+  # -- 0. Convert mMRC -> SGRQ if needed------------------
   if (!"SGRQ" %in% colnames(patientData)) {
     if ("CAT" %in% colnames(patientData)) {
       warning("SGRQ not found. Using CAT score instead.")
@@ -1160,10 +1160,12 @@ accept3_uk <- function(patientData,
     } else if ("mMRC" %in% colnames(patientData)) {
       message("SGRQ not found. Using mMRC score instead.")
       patientData$SGRQ <- 20.43 + 14.77 * patientData$mMRC
+    } else {
+      stop("Either mMRC, SGRQ, or CAT must be provided. None were found in patientData.")
     }
   }
 
-  # ── 1. UK-specific optional-predictor imputation ───────────────
+  # 1. UK-specific optional-predictor imputation
   # Intercepts and coefficients from Table A3 of the ACCEPT 3.0-UK manuscript.
   # Column order in each coef vector:
   #   (Intercept), age, male, mMRC, FEV1, LastYrSevExacCount,
@@ -1226,6 +1228,11 @@ accept3_uk <- function(patientData,
       patientData$LastYrExacCount - patientData$LastYrSevExacCount
   }
 
+  # Use mMRC if available, otherwise back-transform from SGRQ
+  if (!"mMRC" %in% colnames(patientData) && "SGRQ" %in% colnames(patientData)) {
+    patientData$mMRC <- (patientData$SGRQ - 20.43) / 14.77
+  }
+
   mandatory_preds <- c("age", "male", "mMRC", "FEV1",
                        "LastYrSevExacCount", "LastYrModExacCount")
 
@@ -1235,15 +1242,11 @@ accept3_uk <- function(patientData,
     if (!vname %in% colnames(patientData) ||
         any(is.na(patientData[[vname]]))) {
 
-      spec        <- uk_imp[[vname]]
-      pred_cols   <- c(mandatory_preds, imputed_vars)
-
-      # mMRC proxy: use converted SGRQ back-transform if mMRC missing
-      if (!"mMRC" %in% colnames(patientData) && "SGRQ" %in% colnames(patientData)) {
-        patientData$mMRC <- (patientData$SGRQ - 20.43) / 14.77
-      }
+      spec      <- uk_imp[[vname]]
+      pred_cols <- c(mandatory_preds, imputed_vars)
 
       X <- as.matrix(cbind(1, patientData[, pred_cols]))
+      X <- matrix(as.numeric(X), nrow = nrow(X), ncol = ncol(X))
       lp <- as.vector(X %*% spec$coef)
 
       if (spec$binary) {
@@ -1256,7 +1259,7 @@ accept3_uk <- function(patientData,
 
       if (!vname %in% colnames(patientData)) {
         patientData[[vname]] <- pred_vals
-        message(paste0("accept3_uk: '", vname, "' not found — imputed using UK model."))
+        message(paste0("accept3_uk: '", vname, "' not found - imputed using UK model."))
       } else {
         na_idx <- is.na(patientData[[vname]])
         patientData[[vname]][na_idx] <- pred_vals[na_idx]
@@ -1264,22 +1267,26 @@ accept3_uk <- function(patientData,
           message(paste0("accept3_uk: ", sum(na_idx), " missing value(s) in '",
                          vname, "' imputed using UK model."))
       }
+    }
+
+    # Always track available optional variables for subsequent models
+    if (vname %in% colnames(patientData)) {
       imputed_vars <- c(imputed_vars, vname)
     }
   }
 
-  # ── 2. Get ACCEPT 2.0 predictions ─────────────────────────────────────────
+  # 2. ACCEPT 2.0 predictions
   accept2_preds <- accept2(patientData = patientData)
 
   p2_modsev <- accept2_preds$predicted_exac_probability
   p2_sev    <- accept2_preds$predicted_severe_exac_probability
 
-  # ── 3. Apply UK recalibration (Table caption / Figure 2) ──────────────────
-  # Optimism-corrected parameters from CPRD bootstrap (200 resamples):
-  #   Moderate-to-severe: Baseline Hazard H0 = 0.676 (95% CI 0.671–0.681)
-  #                       Slope beta         = 0.986 (95% CI 0.977–0.995)
-  #   Severe:             Baseline Hazard H0 = 1.124 (95% CI 1.107–1.142)
-  #                       Slope beta         = 0.482 (95% CI 0.472–0.495)
+  # 3. UK recalibration
+  # Optimism-corrected parameters from CPRD bootstrap:
+  #   Moderate-to-severe: Baseline Hazard H0 = 0.676 (95% CI 0.671-0.681)
+  #                       Slope beta         = 0.986 (95% CI 0.977-0.995)
+  #   Severe:             Baseline Hazard H0 = 1.124 (95% CI 1.107-1.142)
+  #                       Slope beta         = 0.482 (95% CI 0.472-0.495)
 
   H0_modsev   <- 0.676
   beta_modsev <- 0.986
@@ -1293,7 +1300,7 @@ accept3_uk <- function(patientData,
   uk_modsev <- 1 - exp(-H0_modsev * exp(beta_modsev * lp_modsev))
   uk_sev    <- 1 - exp(-H0_sev    * exp(beta_sev    * lp_sev))
 
-  # ── 4. Assemble output ────────────────────────────────────────────────────
+  # 4. Assemble output
   out <- dplyr::tibble(
     predicted_exac_probability        = round(uk_modsev, 4),
     predicted_exac_rate               = round(-log(1 - uk_modsev), 4),
@@ -1305,7 +1312,7 @@ accept3_uk <- function(patientData,
     out <- dplyr::bind_cols(patientData, out)
   } else {
     out$ID <- patientData$ID
-    out    <- dplyr::select(out, ID, dplyr::everything())
+    out    <- dplyr::select(out, "ID", dplyr::everything())
   }
 
   return(out)

From 6ee4bf0e6e5733c7f04313ad22afe38d96a65cf9 Mon Sep 17 00:00:00 2001
From: jeenatm <141190076+jeenatm@users.noreply.github.com>
Date: Tue, 7 Apr 2026 23:55:52 -0700
Subject: [PATCH 3/5] Fix imputation: use LastYrExacCount, move mMRC
 back-transform before loop, fix imputed_vars tracking

---
 R/predict.R | 49 ++++++++++++++++++++-----------------------------
 1 file changed, 20 insertions(+), 29 deletions(-)

diff --git a/R/predict.R b/R/predict.R
index 9891af9..eb944db 100644
--- a/R/predict.R
+++ b/R/predict.R
@@ -1133,7 +1133,7 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA
 #' Missing optional predictors are imputed sequentially (triangular matrix):
 #' LABA -> oxygen -> ICS -> LAMA -> statin -> BMI -> smoker.
 #' Each model uses mandatory predictors (age, male, mMRC, FEV1,
-#' LastYrSevExacCount, LastYrModExacCount) plus any previously imputed
+#' LastYrSevExacCount, LastYrExacCount) plus any previously imputed
 #' optional predictors. Binary variables are imputed with logistic regression
 #' (probability rounded to 0/1); BMI with linear regression.
 #'
@@ -1152,7 +1152,7 @@ accept3_uk <- function(patientData,
     stop("patientData must be a tibble. Use as_tibble() to convert.")
   }
 
-  # -- 0. Convert mMRC -> SGRQ if needed------------------
+  # 1. Convert mMRC -> SGRQ if needed
   if (!"SGRQ" %in% colnames(patientData)) {
     if ("CAT" %in% colnames(patientData)) {
       warning("SGRQ not found. Using CAT score instead.")
@@ -1165,40 +1165,40 @@ accept3_uk <- function(patientData,
     }
   }
 
-  # 1. UK-specific optional-predictor imputation
+  # 2. UK-specific optional-predictor imputation
   # Intercepts and coefficients from Table A3 of the ACCEPT 3.0-UK manuscript.
   # Column order in each coef vector:
   #   (Intercept), age, male, mMRC, FEV1, LastYrSevExacCount,
-  #   LastYrModExacCount [, LABA [, oxygen [, ICS [, LAMA [, statin [, BMI]]]]]]
+  #   LastYrExacCount [, LABA [, oxygen [, ICS [, LAMA [, statin [, BMI]]]]]]
 
   uk_imp <- list(
 
     LABA = list(
       binary = TRUE,
-      coef   = c(0.418, -0.013, 0.009, 0.438, -0.004, 0.270, 0.333)
+      coef   = c(0.418, -0.013, 0.009, 0.438, -0.004, -0.063, 0.333)
     ),
 
     oxygen = list(
       binary = TRUE,
-      coef   = c(-7.600, 0.006, -0.197, 0.942, -0.005, 0.228, 0.040,
+      coef   = c(-7.600, 0.006, -0.197, 0.942, -0.005, 0.188, 0.040,
                  0.556)                                     # +LABA
     ),
 
     ICS = list(
       binary = TRUE,
-      coef   = c(-1.430, -0.004, -0.157, 0.085, -0.002, 0.138, 0.168,
+      coef   = c(-1.430, -0.004, -0.157, 0.085, -0.002, -0.030, 0.168,
                  3.372, 0.409)                              # +LABA, oxygen
     ),
 
     LAMA = list(
       binary = TRUE,
-      coef   = c(-0.586, -0.007, 0.089, 0.337, -0.003, 0.153, 0.153,
+      coef   = c(-0.586, -0.007, 0.089, 0.337, -0.003, 0.000, 0.153,
                  1.545, 0.203, -0.632)                     # +LABA, oxygen, ICS
     ),
 
     statin = list(
       binary = TRUE,
-      coef   = c(-2.711, 0.015, 0.200, -0.004, 0.001, -0.023, 0.009,
+      coef   = c(-2.711, 0.015, 0.200, -0.004, 0.001, -0.032, 0.009,
                  0.180, -0.196, -0.259, 0.207)             # +LABA, oxygen, ICS, LAMA
     ),
 
@@ -1206,13 +1206,13 @@ accept3_uk <- function(patientData,
       binary    = FALSE,
       clamp_low = 10,
       clamp_hi  = 70,
-      coef      = c(28.944, -0.074, 0.195, 0.421, 0.027, -0.517, 0.010,
+      coef      = c(28.944, -0.074, 0.195, 0.421, 0.027, -0.527, 0.010,
                     0.451, -0.093, -0.051, -0.105, 1.629) # +LABA, oxygen, ICS, LAMA, statin
     ),
 
     smoker = list(
       binary = TRUE,
-      coef   = c(5.449, -0.061, -0.099, 0.123, 0.000, 0.022, -0.017,
+      coef   = c(5.449, -0.061, -0.099, 0.123, 0.000, 0.039, -0.017,
                  -0.056, -0.648, -0.246, 0.069, 0.031, -0.058) # all optional
     )
   )
@@ -1220,22 +1220,13 @@ accept3_uk <- function(patientData,
   # Sequential imputation order matches the triangular matrix
   optional_order <- c("LABA", "oxygen", "ICS", "LAMA", "statin", "BMI", "smoker")
 
-  # Rename LastYrModExacCount alias if only total count is present
-  if (!"LastYrModExacCount" %in% colnames(patientData) &&
-      "LastYrExacCount"    %in% colnames(patientData) &&
-      "LastYrSevExacCount" %in% colnames(patientData)) {
-    patientData$LastYrModExacCount <-
-      patientData$LastYrExacCount - patientData$LastYrSevExacCount
-  }
-
   # Use mMRC if available, otherwise back-transform from SGRQ
   if (!"mMRC" %in% colnames(patientData) && "SGRQ" %in% colnames(patientData)) {
     patientData$mMRC <- (patientData$SGRQ - 20.43) / 14.77
   }
 
   mandatory_preds <- c("age", "male", "mMRC", "FEV1",
-                       "LastYrSevExacCount", "LastYrModExacCount")
-
+                       "LastYrSevExacCount", "LastYrExacCount")
   imputed_vars <- character(0)
 
   for (vname in optional_order) {
@@ -1245,8 +1236,8 @@ accept3_uk <- function(patientData,
       spec      <- uk_imp[[vname]]
       pred_cols <- c(mandatory_preds, imputed_vars)
 
-      X <- as.matrix(cbind(1, patientData[, pred_cols]))
-      X <- matrix(as.numeric(X), nrow = nrow(X), ncol = ncol(X))
+      X  <- as.matrix(cbind(1, patientData[, pred_cols]))
+      X  <- matrix(as.numeric(X), nrow = nrow(X), ncol = ncol(X))
       lp <- as.vector(X %*% spec$coef)
 
       if (spec$binary) {
@@ -1269,24 +1260,24 @@ accept3_uk <- function(patientData,
       }
     }
 
-    # Always track available optional variables for subsequent models
+    # Always track all optional variables for subsequent models
+    # regardless of whether they needed imputation
     if (vname %in% colnames(patientData)) {
       imputed_vars <- c(imputed_vars, vname)
     }
   }
-
-  # 2. ACCEPT 2.0 predictions
+  # 3. ACCEPT 2.0 predictions
   accept2_preds <- accept2(patientData = patientData)
 
   p2_modsev <- accept2_preds$predicted_exac_probability
   p2_sev    <- accept2_preds$predicted_severe_exac_probability
 
-  # 3. UK recalibration
+  # 4. UK recalibration
   # Optimism-corrected parameters from CPRD bootstrap:
   #   Moderate-to-severe: Baseline Hazard H0 = 0.676 (95% CI 0.671-0.681)
   #                       Slope beta         = 0.986 (95% CI 0.977-0.995)
-  #   Severe:             Baseline Hazard H0 = 1.124 (95% CI 1.107-1.142)
-  #                       Slope beta         = 0.482 (95% CI 0.472-0.495)
+  #   Severe:             Baseline Hazard H0 = 0.482 (95% CI 0.472-0.495)
+  #                       Slope beta         = 1.124 (95% CI 1.107-1.142)
 
   H0_modsev   <- 0.676
   beta_modsev <- 0.986

From b3f35683d5e5116e70f0fd93c3bf563501e4139b Mon Sep 17 00:00:00 2001
From: jeenatm <141190076+jeenatm@users.noreply.github.com>
Date: Wed, 8 Apr 2026 11:28:47 -0700
Subject: [PATCH 4/5] Add tests, verbose flag, address all Copilot review
 comments

---
 NAMESPACE                     |  5 +++
 R/predict.R                   | 34 +++++++++++------
 man/accept3_uk.Rd             | 69 +++++++++++++++++++++++++++++++++++
 tests/testthat/test-predict.R | 56 ++++++++++++++++++++++++++--
 4 files changed, 149 insertions(+), 15 deletions(-)
 create mode 100644 man/accept3_uk.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 2a45c64..e371f63 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -9,6 +9,7 @@ export(accept)
 export(accept1)
 export(accept2)
 export(accept3)
+export(accept3_uk)
 export(plotExacerbations)
 export(plotHeatMap)
 export(predictCountProb)
@@ -16,6 +17,10 @@ export(set_openai_api_key)
 export(show_openai_api_key)
 import(dplyr)
 import(tidyselect)
+importFrom(dplyr,mutate)
+importFrom(dplyr,select)
+importFrom(dplyr,starts_with)
+importFrom(dplyr,tibble)
 importFrom(hardhat,scream)
 importFrom(reldist,wtd.quantile)
 importFrom(splines,bs)
diff --git a/R/predict.R b/R/predict.R
index eb944db..11e5bd4 100644
--- a/R/predict.R
+++ b/R/predict.R
@@ -1103,11 +1103,11 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA
 #'   \code{LastYrSevExacCount}, and either \code{mMRC} or \code{SGRQ}.
 #'   Optional columns: \code{LABA}, \code{oxygen}, \code{ICS}, \code{LAMA},
 #'   \code{statin}, \code{BMI}, \code{smoker}.
-#' @param prediction_interval Logical. If \code{TRUE}, also returns lower/upper
-#'   80\% prediction intervals. Default \code{FALSE}.
 #' @param return_predictors Logical. If \code{TRUE}, the input predictors are
 #'   returned alongside predictions. Default \code{FALSE}.
 #'
+#'@param verbose Logical. If \code{FALSE}, suppresses imputation messages. Default \code{TRUE}.
+#'
 #' @return A tibble with columns:
 #'   \itemize{
 #'     \item \code{predicted_exac_probability}        - recalibrated moderate-to-severe risk
@@ -1126,7 +1126,7 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA
 #' CPRD Cox model (Table in manuscript):
 #' \itemize{
 #'   \item Moderate-to-severe: \eqn{H_0 = 0.676}, \eqn{\beta = 0.986}
-#'   \item Severe:             \eqn{H_0 = 1.124}, \eqn{\beta = 0.482}
+#'   \item Severe:             \eqn{H_0 = 0.482}, \eqn{\beta = 1.124}
 #' }
 #'
 #' ## Optional-predictor imputation
@@ -1145,12 +1145,18 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA
 #' @importFrom dplyr tibble mutate select starts_with
 #' @export
 accept3_uk <- function(patientData,
-                       prediction_interval = FALSE,
-                       return_predictors   = FALSE) {
+                       return_predictors = FALSE,
+                       verbose = TRUE) {
 
   if (!tibble::is_tibble(patientData)) {
     stop("patientData must be a tibble. Use as_tibble() to convert.")
   }
+  if (any(patientData$LastYrSevExacCount > patientData$LastYrExacCount)) {
+    invalid_ids <- patientData$ID[patientData$LastYrSevExacCount > patientData$LastYrExacCount]
+    stop(paste0("LastYrSevExacCount exceeds LastYrExacCount for patient(s): ",
+                paste(invalid_ids, collapse = ", "),
+                ". Severe count cannot exceed total count."))
+  }
 
   # 1. Convert mMRC -> SGRQ if needed
   if (!"SGRQ" %in% colnames(patientData)) {
@@ -1205,7 +1211,7 @@ accept3_uk <- function(patientData,
     BMI = list(
       binary    = FALSE,
       clamp_low = 10,
-      clamp_hi  = 70,
+      clamp_hi  = 60,
       coef      = c(28.944, -0.074, 0.195, 0.421, 0.027, -0.527, 0.010,
                     0.451, -0.093, -0.051, -0.105, 1.629) # +LABA, oxygen, ICS, LAMA, statin
     ),
@@ -1221,10 +1227,16 @@ accept3_uk <- function(patientData,
   optional_order <- c("LABA", "oxygen", "ICS", "LAMA", "statin", "BMI", "smoker")
 
   # Use mMRC if available, otherwise back-transform from SGRQ
-  if (!"mMRC" %in% colnames(patientData) && "SGRQ" %in% colnames(patientData)) {
-    patientData$mMRC <- (patientData$SGRQ - 20.43) / 14.77
+  if ("SGRQ" %in% colnames(patientData)) {
+    if (!"mMRC" %in% colnames(patientData)) {
+      patientData$mMRC <- (patientData$SGRQ - 20.43) / 14.77
+    } else {
+      na_idx <- is.na(patientData$mMRC)
+      if (any(na_idx)) {
+        patientData$mMRC[na_idx] <- (patientData$SGRQ[na_idx] - 20.43) / 14.77
+      }
+    }
   }
-
   mandatory_preds <- c("age", "male", "mMRC", "FEV1",
                        "LastYrSevExacCount", "LastYrExacCount")
   imputed_vars <- character(0)
@@ -1250,11 +1262,11 @@ accept3_uk <- function(patientData,
 
       if (!vname %in% colnames(patientData)) {
         patientData[[vname]] <- pred_vals
-        message(paste0("accept3_uk: '", vname, "' not found - imputed using UK model."))
+        if (verbose) message(paste0("accept3_uk: '", vname, "' not found - imputed using UK model."))
       } else {
         na_idx <- is.na(patientData[[vname]])
         patientData[[vname]][na_idx] <- pred_vals[na_idx]
-        if (any(na_idx))
+        if (any(na_idx) && verbose)
           message(paste0("accept3_uk: ", sum(na_idx), " missing value(s) in '",
                          vname, "' imputed using UK model."))
       }
diff --git a/man/accept3_uk.Rd b/man/accept3_uk.Rd
new file mode 100644
index 0000000..f0bbaca
--- /dev/null
+++ b/man/accept3_uk.Rd
@@ -0,0 +1,69 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/predict.R
+\name{accept3_uk}
+\alias{accept3_uk}
+\title{Predicts COPD exacerbation risk for UK patients using ACCEPT 3.0-UK}
+\usage{
+accept3_uk(patientData, return_predictors = FALSE)
+}
+\arguments{
+\item{patientData}{A tibble of patients in the same format as
+\code{samplePatients}. Mandatory columns: \code{ID}, \code{age},
+\code{male}, \code{FEV1}, \code{LastYrExacCount},
+\code{LastYrSevExacCount}, and either \code{mMRC} or \code{SGRQ}.
+Optional columns: \code{LABA}, \code{oxygen}, \code{ICS}, \code{LAMA},
+\code{statin}, \code{BMI}, \code{smoker}.}
+
+\item{return_predictors}{Logical. If \code{TRUE}, the input predictors are
+returned alongside predictions. Default \code{FALSE}.}
+
+\item{verbose}{Logical. If \code{FALSE}, suppresses imputation messages. Default \code{TRUE}.}
+}
+\value{
+A tibble with columns:
+  \itemize{
+    \item \code{predicted_exac_probability}        - recalibrated moderate-to-severe risk
+    \item \code{predicted_exac_rate}               - corresponding rate (-log(1-p))
+    \item \code{predicted_severe_exac_probability} - recalibrated severe risk
+    \item \code{predicted_severe_exac_rate}        - corresponding rate
+  }
+  If \code{return_predictors = TRUE}, input columns are prepended.
+}
+\description{
+ACCEPT 3.0-UK is a UK-specific recalibration of ACCEPT 2.0 derived from the
+CPRD (Clinical Practice Research Datalink) primary-care dataset. It applies
+two Cox-model recalibration parameters: an optimism-corrected baseline
+hazard (H0) and slope (beta), separately for moderate-to-severe and severe
+exacerbations.
+
+When optional predictors (LABA, oxygen, ICS, LAMA, statin/CVD, BMI, smoker)
+are missing, a UK-specific sequential triangular regression imputation model
+fills them in before prediction.
+}
+\details{
+## Recalibration formula
+For each outcome the recalibrated 1-year risk is:
+\deqn{\hat{p}_{UK} = 1 - \exp\!\Bigl(-H_0 \cdot \exp(\beta \cdot \log(-\log(1-\hat{p}_{2})))\Bigr)}
+where \eqn{\hat{p}_{2}} is the ACCEPT 2.0 predicted probability, and
+\eqn{H_0} and \eqn{\beta} are the optimism-corrected parameters from the
+CPRD Cox model (Table in manuscript):
+\itemize{
+  \item Moderate-to-severe: \eqn{H_0 = 0.676}, \eqn{\beta = 0.986}
+  \item Severe:             \eqn{H_0 = 0.482}, \eqn{\beta = 1.124}
+}
+
+## Optional-predictor imputation
+Missing optional predictors are imputed sequentially (triangular matrix):
+LABA -> oxygen -> ICS -> LAMA -> statin -> BMI -> smoker.
+Each model uses mandatory predictors (age, male, mMRC, FEV1,
+LastYrSevExacCount, LastYrExacCount) plus any previously imputed
+optional predictors. Binary variables are imputed with logistic regression
+(probability rounded to 0/1); BMI with linear regression.
+}
+\examples{
+results <- accept3_uk(samplePatients)
+
+}
+\seealso{
+\code{\link{accept}}, \code{\link{accept2}}, \code{\link{accept3}}
+}
diff --git a/tests/testthat/test-predict.R b/tests/testthat/test-predict.R
index a487783..f369716 100644
--- a/tests/testthat/test-predict.R
+++ b/tests/testthat/test-predict.R
@@ -21,11 +21,11 @@ test_that("accept3 default prediction works", {
   expect_true("predicted_severe_exac_probability" %in% names(result))
   expect_true("predicted_severe_exac_rate" %in% names(result))
   expect_equal(nrow(result), 2)
-  
+
   # Verify rate calculation: rate = -log(1-p)
   expected_rate <- -log(1 - result$predicted_exac_probability)
   expect_equal(result$predicted_exac_rate, expected_rate, tolerance = 1e-6)
-  
+
   expected_sev_rate <- -log(1 - result$predicted_severe_exac_probability)
   expect_equal(result$predicted_severe_exac_rate, expected_sev_rate, tolerance = 1e-6)
 })
@@ -35,9 +35,9 @@ test_that("accept3 requires country parameter", {
 })
 
 test_that("accept1 and accept2 warn when country parameter is provided", {
-  expect_warning(accept(samplePatients[1,], version="accept1", country="CAN"), 
+  expect_warning(accept(samplePatients[1,], version="accept1", country="CAN"),
                  "not used by accept1.*ignored")
-  expect_warning(accept(samplePatients[1,], version="accept2", country="USA"), 
+  expect_warning(accept(samplePatients[1,], version="accept2", country="USA"),
                  "not used by accept2.*ignored")
 })
 
@@ -372,3 +372,51 @@ test_that("Different SGRQ values produce different results (no conversion)", {
   # Different SGRQ values should produce different results
   expect_false(isTRUE(all.equal(result_45$predicted_exac_probability, result_50$predicted_exac_probability)))
 })
+
+
+
+test_that("accept3_uk returns correct columns and shape", {
+  results <- accept3_uk(samplePatients)
+  expect_true(tibble::is_tibble(results))
+  expect_equal(nrow(results), nrow(samplePatients))
+  expect_true(all(c("ID", "predicted_exac_probability",
+                    "predicted_exac_rate",
+                    "predicted_severe_exac_probability",
+                    "predicted_severe_exac_rate") %in% colnames(results)))
+})
+
+test_that("accept3_uk predictions are between 0 and 1", {
+  results <- accept3_uk(samplePatients)
+  expect_true(all(results$predicted_exac_probability > 0 & results$predicted_exac_probability < 1))
+  expect_true(all(results$predicted_severe_exac_probability > 0 & results$predicted_severe_exac_probability < 1))
+})
+
+test_that("accept3_uk gives expected predictions on fixed input", {
+  results <- accept3_uk(samplePatients)
+  expect_equal(results$predicted_exac_probability[1], 0.6986, tolerance = 0.001)
+  expect_equal(results$predicted_severe_exac_probability[1], 0.356, tolerance = 0.001)
+})
+
+test_that("accept3_uk imputes missing optional predictors", {
+  patients_no_optional <- samplePatients
+  patients_no_optional$LABA   <- NULL
+  patients_no_optional$oxygen <- NULL
+  patients_no_optional$ICS    <- NULL
+  patients_no_optional$LAMA   <- NULL
+  patients_no_optional$statin <- NULL
+  patients_no_optional$BMI    <- NULL
+  patients_no_optional$smoker <- NULL
+  results <- accept3_uk(patients_no_optional, verbose = FALSE)
+  expect_equal(nrow(results), nrow(samplePatients))
+  expect_true(all(results$predicted_exac_probability > 0 & results$predicted_exac_probability < 1))
+})
+
+test_that("accept3_uk errors on non-tibble input", {
+  expect_error(accept3_uk(as.data.frame(samplePatients)))
+})
+
+test_that("accept3_uk errors when severe count exceeds total", {
+  patients_invalid <- samplePatients
+  patients_invalid$LastYrSevExacCount[1] <- 99
+  expect_error(accept3_uk(patients_invalid))
+})

From 79a9b0b20aa22a98292a4af585ca430389361e18 Mon Sep 17 00:00:00 2001
From: jeenatm <141190076+jeenatm@users.noreply.github.com>
Date: Wed, 8 Apr 2026 18:26:28 -0700
Subject: [PATCH 5/5] Replace verbose with quiet flag, regenerate documentation

---
 R/predict.R                   | 8 ++++----
 man/accept3_uk.Rd             | 4 ++--
 tests/testthat/test-predict.R | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/R/predict.R b/R/predict.R
index 11e5bd4..258e349 100644
--- a/R/predict.R
+++ b/R/predict.R
@@ -1106,7 +1106,7 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA
 #' @param return_predictors Logical. If \code{TRUE}, the input predictors are
 #'   returned alongside predictions. Default \code{FALSE}.
 #'
-#'@param verbose Logical. If \code{FALSE}, suppresses imputation messages. Default \code{TRUE}.
+#'@param quiet Logical. If \code{FALSE}, suppresses imputation messages. Default \code{TRUE}.
 #'
 #' @return A tibble with columns:
 #'   \itemize{
@@ -1146,7 +1146,7 @@ accept3 <- function(country, ID, age, male, BMI, smoker, mMRC = NA, CVD, ICS, LA
 #' @export
 accept3_uk <- function(patientData,
                        return_predictors = FALSE,
-                       verbose = TRUE) {
+                       quiet = FALSE) {
 
   if (!tibble::is_tibble(patientData)) {
     stop("patientData must be a tibble. Use as_tibble() to convert.")
@@ -1262,11 +1262,11 @@ accept3_uk <- function(patientData,
 
       if (!vname %in% colnames(patientData)) {
         patientData[[vname]] <- pred_vals
-        if (verbose) message(paste0("accept3_uk: '", vname, "' not found - imputed using UK model."))
+        if (quiet) message(paste0("accept3_uk: '", vname, "' not found - imputed using UK model."))
       } else {
         na_idx <- is.na(patientData[[vname]])
         patientData[[vname]][na_idx] <- pred_vals[na_idx]
-        if (any(na_idx) && verbose)
+        if (any(na_idx) && quiet)
           message(paste0("accept3_uk: ", sum(na_idx), " missing value(s) in '",
                          vname, "' imputed using UK model."))
       }
diff --git a/man/accept3_uk.Rd b/man/accept3_uk.Rd
index f0bbaca..25278c6 100644
--- a/man/accept3_uk.Rd
+++ b/man/accept3_uk.Rd
@@ -4,7 +4,7 @@
 \alias{accept3_uk}
 \title{Predicts COPD exacerbation risk for UK patients using ACCEPT 3.0-UK}
 \usage{
-accept3_uk(patientData, return_predictors = FALSE)
+accept3_uk(patientData, return_predictors = FALSE, quiet = FALSE)
 }
 \arguments{
 \item{patientData}{A tibble of patients in the same format as
@@ -17,7 +17,7 @@ Optional columns: \code{LABA}, \code{oxygen}, \code{ICS}, \code{LAMA},
 \item{return_predictors}{Logical. If \code{TRUE}, the input predictors are
 returned alongside predictions. Default \code{FALSE}.}
 
-\item{verbose}{Logical. If \code{FALSE}, suppresses imputation messages. Default \code{TRUE}.}
+\item{quiet}{Logical. If \code{FALSE}, suppresses imputation messages. Default \code{TRUE}.}
 }
 \value{
 A tibble with columns:
diff --git a/tests/testthat/test-predict.R b/tests/testthat/test-predict.R
index f369716..a7a9117 100644
--- a/tests/testthat/test-predict.R
+++ b/tests/testthat/test-predict.R
@@ -406,7 +406,7 @@ test_that("accept3_uk imputes missing optional predictors", {
   patients_no_optional$statin <- NULL
   patients_no_optional$BMI    <- NULL
   patients_no_optional$smoker <- NULL
-  results <- accept3_uk(patients_no_optional, verbose = FALSE)
+  results <- accept3_uk(patients_no_optional, quiet = TRUE)
   expect_equal(nrow(results), nrow(samplePatients))
   expect_true(all(results$predicted_exac_probability > 0 & results$predicted_exac_probability < 1))
 })