We can generalize my Dunnett framework into a modular validator that supports multiple test types (Williams, Welch/Student t, Wilcoxon, Kruskal–Wallis + Dunn, Fisher, Shapiro/Levene), each with its own discovery, execution, and metric mapping. Below are concise, practical code suggestions and targeted R commands I need you to run so I can tailor the handlers precisely.
# Existing 'dunnett' entry stays the same
test_registry <- list(
dunnett = list(
keyword = "Dunnett",
discover_fgs = function(res, data) build_dunnett_fgs(res, data),
run_actual = function(endpoint_data, alternative) {
out <- drcHelper::dunnett_test(
data = endpoint_data,
response_var = "Response",
dose_var = "Dose_numeric",
include_random_effect = FALSE,
alternative = alternative
)
actual_df <- as.data.frame(out$results_table)
actual_df$Dose <- dose_from_comparison(as.character(actual_df$comparison))
names(actual_df)[names(actual_df) == "statistic"] <- "Actual_T"
names(actual_df)[names(actual_df) == "p.value"] <- "Actual_P"
names(actual_df)[names(actual_df) == "estimate"] <- "Actual_Diff"
rownames(actual_df) <- NULL
group_means <- endpoint_data %>%
dplyr::mutate(Dose = convert_dose(Dose)) %>%
dplyr::filter(!is.na(Dose)) %>%
dplyr::group_by(Dose) %>%
dplyr::summarise(Actual_Mean = mean(Response, na.rm = TRUE), .groups = "drop")
list(actual_df = actual_df, group_means = group_means)
},
metrics = c("Mean", "T-value", "P-value"),
comparator = "many_to_one"
),
# Williams trend test (uses tukeytrend; drcHelper has broom_williams/getwilliamRes)
williams = list(
keyword = "Williams",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Williams"),
run_actual = function(endpoint_data, alternative) {
# Placeholder: will wire once we see the output shapes
# Likely via drcHelper::broom_williams or drcHelper::getwilliamRes
stop("Williams handler: please provide structure of broom_williams/getwilliamRes outputs (see commands below).")
},
metrics = c("Mean", "T-value", "P-value", "Tcrit", "df", "significance"),
comparator = "trend"
),
# Student t-test (var.equal = TRUE) many-to-one (dose vs control)
student_t = list(
keyword = "Student's t-test",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Student's t-test"),
run_actual = function(endpoint_data, alternative) {
# Compute per-dose vs control t-tests (unadjusted)
ed <- endpoint_data %>% dplyr::mutate(Dose = convert_dose(Dose))
ctrl <- ed %>% dplyr::filter(Dose == min(Dose, na.rm = TRUE)) %>% dplyr::pull(Response)
by_dose <- ed %>% dplyr::filter(Dose != min(Dose, na.rm = TRUE)) %>% split(.$Dose)
rows <- lapply(names(by_dose), function(d) {
trt <- by_dose[[d]]$Response
tt <- stats::t.test(trt, ctrl, alternative = switch(tolower(alternative),
"smaller" = "less", "greater" = "greater", "two-sided" = "two.sided"),
var.equal = TRUE)
data.frame(Dose = as.numeric(d), Actual_T = unname(tt$statistic), Actual_P = tt$p.value)
})
actual_df <- dplyr::bind_rows(rows)
group_means <- ed %>% dplyr::group_by(Dose) %>% dplyr::summarise(Actual_Mean = mean(Response, na.rm = TRUE), .groups = "drop")
list(actual_df = actual_df, group_means = group_means)
},
metrics = c("Mean", "T-value", "P-value", "df"),
comparator = "many_to_one"
),
# Welch t-test (var.equal = FALSE)
welch = list(
keyword = "Welch",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Welch"),
run_actual = function(endpoint_data, alternative) {
ed <- endpoint_data %>% dplyr::mutate(Dose = convert_dose(Dose))
ctrl <- ed %>% dplyr::filter(Dose == min(Dose, na.rm = TRUE)) %>% dplyr::pull(Response)
by_dose <- ed %>% dplyr::filter(Dose != min(Dose, na.rm = TRUE)) %>% split(.$Dose)
rows <- lapply(names(by_dose), function(d) {
trt <- by_dose[[d]]$Response
tt <- stats::t.test(trt, ctrl, alternative = switch(tolower(alternative),
"smaller" = "less", "greater" = "greater", "two-sided" = "two.sided"),
var.equal = FALSE)
data.frame(Dose = as.numeric(d), Actual_T = unname(tt$statistic), Actual_P = tt$p.value)
})
actual_df <- dplyr::bind_rows(rows)
group_means <- ed %>% dplyr::group_by(Dose) %>% dplyr::summarise(Actual_Mean = mean(Response, na.rm = TRUE), .groups = "drop")
list(actual_df = actual_df, group_means = group_means)
},
metrics = c("Mean", "T-value", "P-value", "df"),
comparator = "many_to_one"
),
# Wilcoxon (Mann–Whitney) per dose vs control
wilcoxon = list(
keyword = "Wilcoxon test",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Wilcoxon test"),
run_actual = function(endpoint_data, alternative) {
ed <- endpoint_data %>% dplyr::mutate(Dose = convert_dose(Dose))
ctrl <- ed %>% dplyr::filter(Dose == min(Dose, na.rm = TRUE)) %>% dplyr::pull(Response)
by_dose <- ed %>% dplyr::filter(Dose != min(Dose, na.rm = TRUE)) %>% split(.$Dose)
rows <- lapply(names(by_dose), function(d) {
trt <- by_dose[[d]]$Response
wt <- stats::wilcox.test(trt, ctrl, alternative = switch(tolower(alternative),
"smaller" = "less", "greater" = "greater", "two-sided" = "two.sided"))
data.frame(Dose = as.numeric(d), Actual_W = unname(wt$statistic), Actual_P = wt$p.value)
})
actual_df <- dplyr::bind_rows(rows) # Note: metric names differ (W-value, P-value)
group_means <- ed %>% dplyr::group_by(Dose) %>% dplyr::summarise(Actual_Mean = mean(Response, na.rm = TRUE), .groups = "drop")
list(actual_df = actual_df, group_means = group_means)
},
metrics = c("Mean", "W-value", "P-value"),
comparator = "many_to_one"
),
# Kruskal–Wallis + Dunn’s post-hoc
dunn = list(
keyword = "Dunn's test",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Dunn's test"),
run_actual = function(endpoint_data, alternative) {
ed <- endpoint_data %>% dplyr::mutate(Dose = convert_dose(Dose))
ed <- ed %>% dplyr::filter(!is.na(Dose))
# KW omnibus
kw <- stats::kruskal.test(Response ~ as.factor(Dose), data = ed)
# Dunn post-hoc many-to-one vs control
if (!requireNamespace("rstatix", quietly = TRUE)) stop("Please install rstatix for Dunn's test")
dunn_res <- rstatix::dunn_test(ed, Response ~ as.factor(Dose), p.adjust.method = "bonferroni") # adjust if needed
# Extract many-to-one contrasts vs lowest Dose (control)
ctrl_label <- as.character(min(ed$Dose, na.rm = TRUE))
dunn_vs_ctrl <- dunn_res %>%
dplyr::filter(grepl(paste0("^", ctrl_label, " - "), comparison) | grepl(paste0(" - ", ctrl_label, "$"), comparison)) %>%
dplyr::mutate(Dose = dose_from_comparison(comparison)) %>%
dplyr::rename(Actual_z = statistic, Actual_P = p.adj)
actual_df <- dunn_vs_ctrl %>% dplyr::select(Dose, Actual_z, Actual_P)
group_means <- ed %>% dplyr::group_by(Dose) %>% dplyr::summarise(Actual_Mean = mean(Response, na.rm = TRUE), .groups = "drop")
list(actual_df = actual_df, group_means = group_means, kw = kw)
},
metrics = c("Mean", "z-value", "P-value"), # plus optionally H-statistic for KW
comparator = "many_to_one"
),
# Fisher’s exact (binary endpoints; drcHelper provides many_to_one_fisher_test)
fisher = list(
keyword = "Fishers' test",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Fishers' test"),
run_actual = function(endpoint_data, alternative) {
# Structure depends on Alive/Dead counts; we’ll wire after seeing a concrete example
stop("Fisher handler: please provide example endpoint_data (Alive/Dead/Total) and expected rows.")
},
metrics = c("P-value", "significance"),
comparator = "many_to_one"
),
# Model tests (Shapiro/Levene) on residuals
shapiro = list(
keyword = "Shapiro-Wilk",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Shapiro-Wilk"),
run_actual = function(endpoint_data, alternative) {
# Are these computed on residuals from Response ~ Dose? Please confirm
stop("Shapiro handler: confirm target vector (residuals vs raw group).")
},
metrics = c("W-value", "P-value"),
comparator = "per_group"
),
levene = list(
keyword = "Levene",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Levene"),
run_actual = function(endpoint_data, alternative) {
stop("Levene handler: confirm target (raw groups vs residuals).")
},
metrics = c("F-value", "P-value"),
comparator = "per_group"
)
)
w_ep <- test_cases_data %>% dplyr::filter(`Study ID` == "MOCK0065") # replace with the actual Williams study
w_ep$Dose_numeric <- convert_dose(w_ep$Dose)
w_out <- try(drcHelper::williamsTest_JG(data = w_ep, response_var = "Response", dose_var = "Dose_numeric"), silent = TRUE)
str(w_out)
bw_out <- try(drcHelper::broom_williams(data = w_ep, response_var = "Response", dose_var = "Dose_numeric"), silent = TRUE)
str(bw_out)
test_cases_res %>%
dplyr::filter(grepl("Williams", `Brief description`, ignore.case = TRUE)) %>%
dplyr::select(`Function group ID`, `Study ID`, `Endpoint`, `Brief description`, Dose, `expected result value`) %>%
dplyr::slice(1:20) %>% print(n = 20)
test_cases_res %>%
dplyr::filter(grepl("Dunn's test", `Brief description`, ignore.case = TRUE)) %>%
dplyr::select(`Function group ID`, `Study ID`, `Endpoint`, `Brief description`, Dose, `expected result value`) %>%
dplyr::slice(1:20) %>% print(n = 20)
test_cases_res %>%
dplyr::filter(grepl("Wilcoxon test", `Brief description`, ignore.case = TRUE)) %>%
dplyr::select(`Function group ID`, `Study ID`, `Endpoint`, `Brief description`, Dose, `expected result value`) %>%
dplyr::slice(1:20) %>% print(n = 20)
test_cases_res %>%
dplyr::filter(grepl("Fishers' test", `Brief description`, ignore.case = TRUE)) %>%
dplyr::select(`Function group ID`, `Study ID`, `Endpoint`, `Brief description`, Dose, `expected result value`) %>%
dplyr::slice(1:20) %>% print(n = 20)
Once I see these structures, I’ll wire the run_actual handlers to return the same shape as your Dunnett handler (actual_df + group_means), extend the expected metric parser for labels like “Tcrit”, “df”, “z-value”, “W-value”, “%Inhibition/%Reduction”, and finalize tolerances by metric/test.
We can generalize my Dunnett framework into a modular validator that supports multiple test types (Williams, Welch/Student t, Wilcoxon, Kruskal–Wallis + Dunn, Fisher, Shapiro/Levene), each with its own discovery, execution, and metric mapping. Below are concise, practical code suggestions and targeted R commands I need you to run so I can tailor the handlers precisely.
What we’ll reuse
Registry design (extensible)
Register each test type with:
Code: extend the registry with stubs you can fill in
Generic runner
Dose embedded in Brief description
When preparing expected tables, set
Dose = coalesce(convert_dose(Dose), dose_from_description(Brief description)).Key formulas you’ll likely need
If your expected results use a different baseline or direction, tell me and I’ll adapt.
What I need you to run so I can finalize handlers and metric mapping
Please run these on one Williams FG/study and paste the outputs:
lm(Response ~ Dose_numeric)? If yes, we’ll fit the same lm, then use stats::shapiro.test on residuals and car::leveneTest on grouped raw data, or another agreed approach. Please confirm.lm(Response ~ Dose_factor)Once I see these structures, I’ll wire the run_actual handlers to return the same shape as your Dunnett handler (actual_df + group_means), extend the expected metric parser for labels like “Tcrit”, “df”, “z-value”, “W-value”, “%Inhibition/%Reduction”, and finalize tolerances by metric/test.