This R Markdown document contains the code for the case study about lot acceptance testing for a composite material.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(furrr)
## Loading required package: future
##
## Attaching package: 'future'
##
## The following object is masked from 'package:rmarkdown':
##
## run
library(cmstatr)
library(EquivSample)
library(latex2exp)
plan(multisession, workers = 12)
First, read the data from disk.
dat <- read_csv("lot-acceptance-data.csv",
col_types = list(col_character(),
col_double()))
First, we’ll look at the qualification sample and compute some sample statistics.
qual_stats <- dat %>%
filter(grepl("Qual.*", Lot)) %>%
summarise(
mean = mean(Strength),
sd = sd(Strength),
n = n()
)
qual_stats
## # A tibble: 1 × 3
## mean sd n
## <dbl> <dbl> <int>
## 1 255. 18.7 22
We can calculate the B-Basis value from the qualification sample. In this case, the company that provided the qualification sample did not indicate which observations were from which lot of material (in a multi-lot qualification sample), so we can’t do the within-batch and between-batch diagnostic tests that are normally done when computing Basis values. For more information on this, see CMH-17-1G or the documentation for the R Package cmstatr
.
dat %>%
filter(grepl("Qual.*", Lot)) %>%
basis_normal(
Strength,
p = 0.90,
conf = 0.95,
override = c("outliers_within_batch",
"between_batch_variability")
)
##
## Call:
## basis_normal(data = ., x = Strength, p = 0.9, conf = 0.95, override = c("outliers_within_batch",
## "between_batch_variability"))
##
## Distribution: Normal ( n = 22 )
## The following diagnostic tests were overridden:
## `outliers_within_batch`,
## `between_batch_variability`
## B-Basis: ( p = 0.9 , conf = 0.95 )
## 219.57
In this case study, we consider two values of \(\alpha\). For these two values of \(\alpha\) we’ll calculate the acceptance limits using both Vangel’s method and the two-sample method presented in the accompanying paper.
factors <- (dat %>%
filter(!grepl("Qual.*", Lot)) %>%
group_by(Lot) %>%
summarise(m = n()))[["m"]] %>%
unique() %>%
future_map_dfr(
function(m) {
map_dfr(
c(0.01, 0.05),
function(alpha) {
two_sample <- k_equiv_two_sample(qual_stats$n[[1]], m, alpha)
vangel <- k_equiv(alpha, m)
data.frame(
m = m,
alpha = alpha,
Method = c("Vangel", "Two-Sample"),
k1 = c(vangel[1], two_sample[1]),
k2 = c(vangel[2], two_sample[2])
)
}
)
}
) %>%
mutate(A1 = qual_stats$mean[1] - k1 * qual_stats$sd[1],
A2 = qual_stats$mean[1] - k2 * qual_stats$sd[1])
factors
## m alpha Method k1 k2 A1 A2
## 1 3 0.01 Vangel 2.902734 1.466618 200.5307 227.4342
## 2 3 0.01 Two-Sample 3.312500 1.700856 192.8543 223.0461
## 3 3 0.05 Vangel 2.324068 1.086896 211.3711 234.5477
## 4 3 0.05 Two-Sample 2.544922 1.212721 207.2338 232.1906
Based on these acceptance limits, we can evaluate each of the lots to determine whether they would be accepted.
acceptance_res <- dat %>%
filter(!grepl("Qual.*", Lot)) %>%
group_by(Lot) %>%
summarise(
`Lot Min` = min(Strength),
`Lot Mean` = mean(Strength),
`Lot m` = n()
) %>%
inner_join(
factors,
by = c("Lot m" = "m")
) %>%
mutate(Reject = `Lot Min` < A1 | `Lot Mean` < A2) %>%
select(-c(k1, k2))
acceptance_res
## # A tibble: 556 × 9
## Lot `Lot Min` `Lot Mean` `Lot m` alpha Method A1 A2 Reject
## <chr> <dbl> <dbl> <int> <dbl> <chr> <dbl> <dbl> <lgl>
## 1 001 237. 246. 3 0.01 Vangel 201. 227. FALSE
## 2 001 237. 246. 3 0.01 Two-Sample 193. 223. FALSE
## 3 001 237. 246. 3 0.05 Vangel 211. 235. FALSE
## 4 001 237. 246. 3 0.05 Two-Sample 207. 232. FALSE
## 5 002 227. 244. 3 0.01 Vangel 201. 227. FALSE
## 6 002 227. 244. 3 0.01 Two-Sample 193. 223. FALSE
## 7 002 227. 244. 3 0.05 Vangel 211. 235. FALSE
## 8 002 227. 244. 3 0.05 Two-Sample 207. 232. FALSE
## 9 003 249. 260. 3 0.01 Vangel 201. 227. FALSE
## 10 003 249. 260. 3 0.01 Two-Sample 193. 223. FALSE
## # … with 546 more rows
## # ℹ Use `print(n = ...)` to see more rows
We can summarize the number of lots that would be rejected using each of the acceptance limits as follows:
acceptance_res %>%
group_by(Method, alpha) %>%
summarise(`Rejected Lots` = sum(Reject),
`Total Lots` = n(),
.groups = "drop") %>%
arrange(alpha) %>%
group_by(Method, alpha, `Total Lots`, `Rejected Lots`) %>%
nest() %>%
mutate(binom = map(data, ~binom.test(`Rejected Lots`, `Total Lots`)),
`Conf Int` = map(binom, ~paste0(
sprintf(.x$conf.int, fmt = "%.3f"), collapse = " - "
))) %>%
select(-c(data, binom)) %>%
unnest(`Conf Int`) %>%
mutate(`Rejection Rate Est` = `Rejected Lots` / `Total Lots`)
## # A tibble: 4 × 6
## # Groups: Method, alpha, Total Lots, Rejected Lots [4]
## Method alpha `Rejected Lots` `Total Lots` `Conf Int` Rejection Rate E…¹
## <chr> <dbl> <int> <int> <chr> <dbl>
## 1 Two-Sample 0.01 2 139 0.002 - 0.051 0.0144
## 2 Vangel 0.01 3 139 0.004 - 0.062 0.0216
## 3 Two-Sample 0.05 6 139 0.016 - 0.092 0.0432
## 4 Vangel 0.05 11 139 0.040 - 0.137 0.0791
## # … with abbreviated variable name ¹`Rejection Rate Est`
acceptance_res %>%
mutate(Lot = as.integer(Lot)) %>%
ggplot(aes(x = Lot, y = `Lot Min`)) +
geom_point() +
geom_hline(aes(yintercept = A1, color = Method, linetype = alpha),
data = factors %>%
mutate(alpha = as_factor(alpha)))
acceptance_res %>%
mutate(Lot = as.integer(Lot)) %>%
ggplot(aes(x = Lot, y = `Lot Mean`)) +
geom_point() +
geom_hline(aes(yintercept = A2, color = Method, linetype = alpha),
data = factors %>%
mutate(alpha = as_factor(alpha)))
g <- acceptance_res %>%
filter(alpha == 0.05) %>%
select(-c(Reject)) %>%
pivot_longer(cols = c(`Lot Min`, `Lot Mean`),
names_to = "Variable",
values_to = "Strength") %>%
mutate(Result = if_else(
Variable == "Lot Min",
Strength < A1,
Strength < A2
)) %>%
select(-c(A1, A2)) %>%
pivot_wider(names_from = "Method", values_from = "Result") %>%
mutate(Result = case_when(
Vangel & `Two-Sample` ~ "Both", # 8
Vangel ~ "Vangel", # 3
`Two-Sample` ~ "Two-Sample", # 4
TRUE ~ "Accept" # 20
)) %>%
mutate(Result = lvls_expand(
as_factor(Result),
c("Both", "Vangel", "Two-Sample", "Accept")
)) %>%
mutate(Lot = as.integer(Lot)) %>%
ggplot(aes(x = Lot, y = Strength, shape = Result)) +
geom_point() +
geom_hline(
aes(
yintercept = Strength,
color = `Acceptance Limit`,
linetype = `Acceptance Limit`
),
data = factors %>%
filter(alpha == 0.05) %>%
rename(`Acceptance Limit` = Method) %>%
pivot_longer(cols = c("A1", "A2"),
names_to = "Variable",
values_to = "Strength") %>%
mutate(Variable = if_else(Variable == "A1", "Lot Min", "Lot Mean"))
) +
facet_grid(Variable ~ .) +
theme_bw() +
scale_shape_manual(
values = c(4, 3, 8, 20),
breaks = c("Vangel", "Two-Sample", "Both", "Accept"),
labels = c("Reject, Vangel Only",
"Reject, Two-Sample Only ",
"Reject, Both Methods",
"Accept"),
drop = FALSE
) +
theme(axis.title.y = element_text(margin = margin(
t = 0,
r = 10,
b = 0,
l = 0)),
legend.box.margin = margin(r = 10)
)
ggsave(filename = "figure12.eps",
plot = g,
device = "eps",
dpi = 1200,
width = 6,
height = 3,
units = "in"
)
g
mu_pop <- qual_stats$mean
sd_pop <- qual_stats$sd
mu_equiv <- seq(210, mu_pop, length.out = 9)
n_qual_2 <- 5000
n_equiv_2 <- 5000
power_simulation <- function(delta_list, equiv_generation) {
cur_n <- qual_stats$n
cur_m <- 3
future_map_dfr(
c(0.01, 0.05),
.options = furrr_options(seed = 23456),
function(sim_alpha) {
vangel <- k_equiv(sim_alpha, cur_m)
k1 <- vangel[1]
k2 <- vangel[2]
r1 <- factors$k1[factors$m == cur_m
& factors$alpha == sim_alpha
& factors$Method == "Two-Sample"]
r2 <- factors$k2[factors$m == cur_m
& factors$alpha == sim_alpha
& factors$Method == "Two-Sample"]
t_crit <- qt(1 - sim_alpha / 2, cur_n - 1)
f_crit <- qf(1 - sim_alpha / 2, cur_m - 1, cur_n - 1)
future_map_dfr(
mu_equiv,
.options = furrr_options(seed = 23456),
function(mu_equiv_cur) {
equiv_samples <- future_map_dfr(
1:n_equiv_2,
.options = furrr_options(seed = 34567),
function(i_equiv) {
x <- equiv_generation(i_equiv, mu_equiv_cur, cur_m)
data.frame(
min = min(x),
avg = mean(x),
sd_eq = sd(x)
)
}
)
future_map_dfr(
1:n_qual_2,
.options = furrr_options(seed = 45678),
function(i_qual) {
x_qual <- rnorm(cur_n, mu_pop, sd_pop)
avg_qual <- mean(x_qual)
sd_qual <- sd(x_qual)
# Ref. Escobar, pp. 55
t_min <- avg_qual - t_crit * sqrt(1 / cur_n + 1 / cur_m) * sd_qual
# Ref. Escobar, pp. 58
f_max <- sd_qual * sqrt(f_crit)
accept_min <- equiv_samples$min > avg_qual - r1 * sd_qual
accept_avg <- equiv_samples$avg > avg_qual - r2 * sd_qual
accept_t <- equiv_samples$avg >= t_min
accept_f <- equiv_samples$sd_eq <= f_max
accept_vangel_min <- equiv_samples$min > avg_qual - k1 * sd_qual
accept_vangel_avg <- equiv_samples$avg > avg_qual - k2 * sd_qual
data.frame(
accept = c(accept_min & accept_avg,
accept_t & accept_f,
accept_vangel_min & accept_vangel_avg),
method = as.factor(c(rep_len("Two-Sample", n_equiv_2),
rep_len("MSD", n_equiv_2),
rep_len("Vangel", n_equiv_2)))
)
}
) %>%
group_by(method) %>%
summarise(`Rejection Rate` = sum(!accept) / n(),
.groups = "drop") %>%
mutate(n = cur_n, m = cur_m, mu = mu_equiv_cur, alpha = sim_alpha)
}
)
}
)
}
sim_power_mean <- power_simulation(
mu_equiv,
function(i_equiv, mu_equiv_cur, cur_m) {
rnorm(cur_m, mu_equiv_cur, sd_pop)
}
)
ls_method_breaks <- c("Two-Sample", "Vangel", "MSD")
ls_method_values <- c("solid", "dashed", "dotted")
# tikz(file = "case_study_simulation.tex", width = 6, height = 3, timestamp = FALSE)
g <- sim_power_mean %>%
mutate(`Nominal alpha` = as.character(alpha)) %>%
rename(Method = method) %>%
ggplot(aes(x = mu, y = `Rejection Rate`, color = `Nominal alpha`,
linetype = Method, shape = `Nominal alpha`)) +
geom_line() +
geom_point(size = 2) +
xlab(TeX("$\\mu$")) +
theme_bw() +
theme(axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0))) +
scale_shape_manual(values = c("0.01" = 3, "0.05" = 16)) +
scale_linetype_manual(
breaks = ls_method_breaks,
values = ls_method_values
)
ggsave(filename = "figure13.eps",
plot = g,
device = "eps",
dpi = 1200,
width = 6,
height = 3,
units = "in"
)
g
For the case where \(\delta = 0\), the rejection rates are as follows:
sim_power_mean %>%
filter(mu == mu_pop)
## # A tibble: 6 × 6
## method `Rejection Rate` n m mu alpha
## <fct> <dbl> <int> <dbl> <dbl> <dbl>
## 1 MSD 0.00916 22 3 255. 0.01
## 2 Two-Sample 0.00968 22 3 255. 0.01
## 3 Vangel 0.0220 22 3 255. 0.01
## 4 MSD 0.0465 22 3 255. 0.05
## 5 Two-Sample 0.0476 22 3 255. 0.05
## 6 Vangel 0.0709 22 3 255. 0.05