This R Markdown document contains the code for the case study about lot acceptance testing for a composite material.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(furrr)
## Loading required package: future
## 
## Attaching package: 'future'
## 
## The following object is masked from 'package:rmarkdown':
## 
##     run
library(cmstatr)
library(EquivSample)
library(latex2exp)

plan(multisession, workers = 12)

First, read the data from disk.

dat <- read_csv("lot-acceptance-data.csv",
                col_types = list(col_character(),
                                 col_double()))

First, we’ll look at the qualification sample and compute some sample statistics.

qual_stats <- dat %>%
  filter(grepl("Qual.*", Lot)) %>% 
  summarise(
    mean = mean(Strength),
    sd = sd(Strength),
    n = n()
  )
qual_stats
## # A tibble: 1 × 3
##    mean    sd     n
##   <dbl> <dbl> <int>
## 1  255.  18.7    22

We can calculate the B-Basis value from the qualification sample. In this case, the company that provided the qualification sample did not indicate which observations were from which lot of material (in a multi-lot qualification sample), so we can’t do the within-batch and between-batch diagnostic tests that are normally done when computing Basis values. For more information on this, see CMH-17-1G or the documentation for the R Package cmstatr.

dat %>%
  filter(grepl("Qual.*", Lot)) %>%
  basis_normal(
    Strength,
    p = 0.90,
    conf = 0.95,
    override = c("outliers_within_batch",
                 "between_batch_variability")
  )
## 
## Call:
## basis_normal(data = ., x = Strength, p = 0.9, conf = 0.95, override = c("outliers_within_batch", 
##     "between_batch_variability"))
## 
## Distribution:  Normal    ( n = 22 )
## The following diagnostic tests were overridden:
##     `outliers_within_batch`,
##     `between_batch_variability`
## B-Basis:   ( p = 0.9 , conf = 0.95 )
## 219.57

In this case study, we consider two values of \(\alpha\). For these two values of \(\alpha\) we’ll calculate the acceptance limits using both Vangel’s method and the two-sample method presented in the accompanying paper.

factors <- (dat %>% 
  filter(!grepl("Qual.*", Lot)) %>% 
  group_by(Lot) %>% 
  summarise(m = n()))[["m"]] %>% 
  unique() %>% 
  future_map_dfr(
    function(m) {
      map_dfr(
        c(0.01, 0.05),
        function(alpha) {
          two_sample <- k_equiv_two_sample(qual_stats$n[[1]], m, alpha)
          vangel <- k_equiv(alpha, m)
          data.frame(
            m = m,
            alpha = alpha,
            Method = c("Vangel", "Two-Sample"),
            k1 = c(vangel[1], two_sample[1]),
            k2 = c(vangel[2], two_sample[2])
          )
        }
      )
    }
  ) %>% 
  mutate(A1 = qual_stats$mean[1] - k1 * qual_stats$sd[1],
         A2 = qual_stats$mean[1] - k2 * qual_stats$sd[1])
factors
##   m alpha     Method       k1       k2       A1       A2
## 1 3  0.01     Vangel 2.902734 1.466618 200.5307 227.4342
## 2 3  0.01 Two-Sample 3.312500 1.700856 192.8543 223.0461
## 3 3  0.05     Vangel 2.324068 1.086896 211.3711 234.5477
## 4 3  0.05 Two-Sample 2.544922 1.212721 207.2338 232.1906

Based on these acceptance limits, we can evaluate each of the lots to determine whether they would be accepted.

acceptance_res <- dat %>% 
  filter(!grepl("Qual.*", Lot)) %>% 
  group_by(Lot) %>% 
  summarise(
    `Lot Min` = min(Strength),
    `Lot Mean` = mean(Strength),
    `Lot m` = n()
  ) %>% 
  inner_join(
    factors,
    by = c("Lot m" = "m")
  ) %>% 
  mutate(Reject = `Lot Min` < A1 | `Lot Mean` < A2) %>% 
  select(-c(k1, k2))
acceptance_res
## # A tibble: 556 × 9
##    Lot   `Lot Min` `Lot Mean` `Lot m` alpha Method        A1    A2 Reject
##    <chr>     <dbl>      <dbl>   <int> <dbl> <chr>      <dbl> <dbl> <lgl> 
##  1 001        237.       246.       3  0.01 Vangel      201.  227. FALSE 
##  2 001        237.       246.       3  0.01 Two-Sample  193.  223. FALSE 
##  3 001        237.       246.       3  0.05 Vangel      211.  235. FALSE 
##  4 001        237.       246.       3  0.05 Two-Sample  207.  232. FALSE 
##  5 002        227.       244.       3  0.01 Vangel      201.  227. FALSE 
##  6 002        227.       244.       3  0.01 Two-Sample  193.  223. FALSE 
##  7 002        227.       244.       3  0.05 Vangel      211.  235. FALSE 
##  8 002        227.       244.       3  0.05 Two-Sample  207.  232. FALSE 
##  9 003        249.       260.       3  0.01 Vangel      201.  227. FALSE 
## 10 003        249.       260.       3  0.01 Two-Sample  193.  223. FALSE 
## # … with 546 more rows
## # ℹ Use `print(n = ...)` to see more rows

We can summarize the number of lots that would be rejected using each of the acceptance limits as follows:

acceptance_res %>% 
  group_by(Method, alpha) %>% 
  summarise(`Rejected Lots` = sum(Reject),
            `Total Lots` = n(),
            .groups = "drop") %>% 
  arrange(alpha) %>% 
  group_by(Method, alpha, `Total Lots`, `Rejected Lots`) %>% 
  nest() %>%
  mutate(binom = map(data, ~binom.test(`Rejected Lots`, `Total Lots`)),
          `Conf Int` = map(binom, ~paste0(
           sprintf(.x$conf.int, fmt = "%.3f"), collapse = " - "
           ))) %>% 
  select(-c(data, binom)) %>% 
  unnest(`Conf Int`) %>% 
  mutate(`Rejection Rate Est` = `Rejected Lots` / `Total Lots`)
## # A tibble: 4 × 6
## # Groups:   Method, alpha, Total Lots, Rejected Lots [4]
##   Method     alpha `Rejected Lots` `Total Lots` `Conf Int`    Rejection Rate E…¹
##   <chr>      <dbl>           <int>        <int> <chr>                      <dbl>
## 1 Two-Sample  0.01               2          139 0.002 - 0.051             0.0144
## 2 Vangel      0.01               3          139 0.004 - 0.062             0.0216
## 3 Two-Sample  0.05               6          139 0.016 - 0.092             0.0432
## 4 Vangel      0.05              11          139 0.040 - 0.137             0.0791
## # … with abbreviated variable name ¹​`Rejection Rate Est`
acceptance_res %>% 
  mutate(Lot = as.integer(Lot)) %>% 
  ggplot(aes(x = Lot, y = `Lot Min`)) +
  geom_point() +
  geom_hline(aes(yintercept = A1, color = Method, linetype = alpha),
             data = factors %>%
               mutate(alpha = as_factor(alpha)))

acceptance_res %>% 
  mutate(Lot = as.integer(Lot)) %>% 
  ggplot(aes(x = Lot, y = `Lot Mean`)) +
  geom_point() +
  geom_hline(aes(yintercept = A2, color = Method, linetype = alpha),
             data = factors %>%
               mutate(alpha = as_factor(alpha)))

g <- acceptance_res %>%
  filter(alpha == 0.05) %>%
    select(-c(Reject)) %>% 
  pivot_longer(cols = c(`Lot Min`, `Lot Mean`),
             names_to = "Variable",
             values_to = "Strength") %>%
  mutate(Result = if_else(
    Variable == "Lot Min",
    Strength < A1,
    Strength < A2
  )) %>%
  select(-c(A1, A2)) %>%
  pivot_wider(names_from = "Method", values_from = "Result") %>%
  mutate(Result = case_when(
    Vangel & `Two-Sample` ~ "Both", # 8
    Vangel ~ "Vangel", # 3
    `Two-Sample` ~ "Two-Sample", # 4
    TRUE ~ "Accept" # 20
  )) %>%
  mutate(Result = lvls_expand(
    as_factor(Result),
    c("Both", "Vangel", "Two-Sample", "Accept")
  )) %>%
  mutate(Lot = as.integer(Lot)) %>%
  ggplot(aes(x = Lot, y = Strength, shape = Result)) +
  geom_point() +
  geom_hline(
    aes(
      yintercept = Strength,
      color = `Acceptance Limit`,
      linetype = `Acceptance Limit`
    ),
    data = factors %>%
      filter(alpha == 0.05) %>%
      rename(`Acceptance Limit` = Method) %>%
      pivot_longer(cols = c("A1", "A2"),
                   names_to = "Variable",
                   values_to = "Strength") %>%
      mutate(Variable = if_else(Variable == "A1", "Lot Min", "Lot Mean"))
  ) +
  facet_grid(Variable ~ .) +
  theme_bw() +
  scale_shape_manual(
    values = c(4, 3, 8, 20),
    breaks = c("Vangel", "Two-Sample", "Both", "Accept"),
    labels = c("Reject, Vangel Only",
               "Reject, Two-Sample Only  ",
               "Reject, Both Methods",
               "Accept"),
    drop = FALSE
  ) +
  theme(axis.title.y = element_text(margin = margin(
    t = 0,
    r = 10,
    b = 0,
    l = 0)),
    legend.box.margin = margin(r = 10)
    )
ggsave(filename = "figure12.eps", 
       plot = g, 
       device = "eps", 
       dpi = 1200, 
       width = 6,
       height = 3, 
       units = "in"
)
g

Power Comparison

mu_pop <- qual_stats$mean
sd_pop <- qual_stats$sd
mu_equiv <- seq(210, mu_pop, length.out = 9)
n_qual_2 <- 5000
n_equiv_2 <- 5000
power_simulation <- function(delta_list, equiv_generation) {
  cur_n <- qual_stats$n
  cur_m <- 3
  
  future_map_dfr(
    c(0.01, 0.05),
    .options = furrr_options(seed = 23456),
    function(sim_alpha) {
      vangel <- k_equiv(sim_alpha, cur_m)
      k1 <- vangel[1]
      k2 <- vangel[2]
      
      r1 <- factors$k1[factors$m == cur_m
                       & factors$alpha == sim_alpha
                       & factors$Method == "Two-Sample"]
      r2 <- factors$k2[factors$m == cur_m
                       & factors$alpha == sim_alpha
                       & factors$Method == "Two-Sample"]
      
      t_crit <- qt(1 - sim_alpha / 2, cur_n - 1)
      f_crit <- qf(1 - sim_alpha / 2, cur_m - 1, cur_n - 1)
      
      future_map_dfr(
        mu_equiv,
        .options = furrr_options(seed = 23456),
        function(mu_equiv_cur) {
          equiv_samples <- future_map_dfr(
            1:n_equiv_2,
            .options = furrr_options(seed = 34567),
            function(i_equiv) {
              x <- equiv_generation(i_equiv, mu_equiv_cur, cur_m)
              data.frame(
                min = min(x),
                avg = mean(x),
                sd_eq = sd(x)
              )
            }
          )
          
          future_map_dfr(
            1:n_qual_2,
            .options = furrr_options(seed = 45678),
            function(i_qual) {
              x_qual <- rnorm(cur_n, mu_pop, sd_pop)
              avg_qual <- mean(x_qual)
              sd_qual <- sd(x_qual)
              
              # Ref. Escobar, pp. 55
              t_min <- avg_qual - t_crit * sqrt(1 / cur_n + 1 / cur_m) * sd_qual
              
              # Ref. Escobar, pp. 58
              f_max <- sd_qual * sqrt(f_crit)
              
              accept_min <- equiv_samples$min > avg_qual - r1 * sd_qual
              accept_avg <- equiv_samples$avg > avg_qual - r2 * sd_qual
              accept_t <- equiv_samples$avg >= t_min
              accept_f <- equiv_samples$sd_eq <= f_max
              accept_vangel_min <- equiv_samples$min > avg_qual - k1 * sd_qual
              accept_vangel_avg <- equiv_samples$avg > avg_qual - k2 * sd_qual
              
              data.frame(
                accept = c(accept_min & accept_avg,
                           accept_t & accept_f,
                           accept_vangel_min & accept_vangel_avg),
                method = as.factor(c(rep_len("Two-Sample", n_equiv_2),
                                     rep_len("MSD", n_equiv_2),
                                     rep_len("Vangel", n_equiv_2)))
              )
            }
          ) %>% 
            group_by(method) %>% 
            summarise(`Rejection Rate` = sum(!accept) / n(),
                      .groups = "drop") %>%
            mutate(n = cur_n, m = cur_m, mu = mu_equiv_cur, alpha = sim_alpha)
        }
      )
    }
  )
}

sim_power_mean <- power_simulation(
  mu_equiv,
  function(i_equiv, mu_equiv_cur, cur_m) {
    rnorm(cur_m, mu_equiv_cur, sd_pop)
  }
)
ls_method_breaks <- c("Two-Sample", "Vangel", "MSD")
ls_method_values <- c("solid", "dashed", "dotted")

# tikz(file = "case_study_simulation.tex", width = 6, height = 3, timestamp = FALSE)

g <- sim_power_mean %>% 
  mutate(`Nominal alpha` = as.character(alpha)) %>% 
  rename(Method = method) %>% 
  ggplot(aes(x = mu, y = `Rejection Rate`, color = `Nominal alpha`,
             linetype = Method, shape = `Nominal alpha`)) +
  geom_line() +
  geom_point(size = 2) +
  xlab(TeX("$\\mu$")) +
  theme_bw() +
  theme(axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0))) +
  scale_shape_manual(values = c("0.01" = 3, "0.05" = 16)) +
  scale_linetype_manual(
    breaks = ls_method_breaks,
    values = ls_method_values
  )
ggsave(filename = "figure13.eps", 
       plot = g, 
       device = "eps", 
       dpi = 1200, 
       width = 6,
       height = 3, 
       units = "in"
)
g

For the case where \(\delta = 0\), the rejection rates are as follows:

sim_power_mean %>% 
  filter(mu == mu_pop)
## # A tibble: 6 × 6
##   method     `Rejection Rate`     n     m    mu alpha
##   <fct>                 <dbl> <int> <dbl> <dbl> <dbl>
## 1 MSD                 0.00916    22     3  255.  0.01
## 2 Two-Sample          0.00968    22     3  255.  0.01
## 3 Vangel              0.0220     22     3  255.  0.01
## 4 MSD                 0.0465     22     3  255.  0.05
## 5 Two-Sample          0.0476     22     3  255.  0.05
## 6 Vangel              0.0709     22     3  255.  0.05