1 Set up
We first create the necessary directories, if they don't exist already.
# Save simulation output here
dir.create("./models/sims", FALSE, TRUE)
# Save figure output here
dir.create("./figs/sample-size", FALSE)
2 Sample size
2.1 Estimate teams' standard deviation
We obtain an estimate of the teams' standard deviation based on data from Silberzahn et al.
# Load in Silberzahn et al. data: https://osf.io/fa743/
silber <- read_csv("https://osf.io/fa743/download") %>%
mutate(
z_score = log(OR) / ((log(OR) - log(OR_lo)) / 1.96)
) %>%
# Filter out extremely high z_scores
filter(z_score < 6)
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Team = col_double(),
## Analytic.Approach = col_character(),
## Effect.size.units = col_character(),
## Estimate = col_double(),
## Low = col_double(),
## Hi = col_double(),
## OR = col_double(),
## OR_lo = col_double(),
## OR_hi = col_double()
## )
# Mean z-score
round(mean(silber$z_score), 2)
## [1] 2.29
# Set weakly informative priors for intercept and scale parameters.
priors <- c(
prior(normal(0, 3), class = Intercept),
prior(cauchy(0, 1), class = sd)
)
# Run brm to estimate teams' SD (= `(1 | Team)` in the model)
silber_sd <- brm(
z_score ~ (1 | Team),
data = silber,
prior = priors,
chain = 4,
iter = 4000,
cores = 4,
seed = my_seed,
# Use cmdstanr to enable multi-core multi-threading processing
backend = "cmdstanr", threads = threading(4),
control = list(adapt_delta = 0.9999, max_treedepth = 15),
file = "./models/sims/silber_sd"
)
# Extract SD estimate and 95 CrI
SD <- summary(silber_sd)$random$Team[1]
## Warning: There were 3 divergent transitions after warmup. Increasing adapt_delta
## above may help. See http://mc-stan.org/misc/warnings.html#divergent-transitions-
## after-warmup
lower <- summary(silber_sd)$random$Team[3]
## Warning: There were 3 divergent transitions after warmup. Increasing adapt_delta
## above may help. See http://mc-stan.org/misc/warnings.html#divergent-transitions-
## after-warmup
upper <- summary(silber_sd)$random$Team[4]
## Warning: There were 3 divergent transitions after warmup. Increasing adapt_delta
## above may help. See http://mc-stan.org/misc/warnings.html#divergent-transitions-
## after-warmup
# Create a vector of quantiles of the estimated teams' SD
# min, 1st quartile, mean, 3rd quartile, max = 0.00 0.25 0.50 0.75 1.00
quantiles <- quantile(seq(lower, upper, by = 0.1), probs = seq(0, 1, 0.25), na.rm = FALSE)
quantiles <- c(quantiles[[1]], quantiles[[2]], quantiles[[3]], quantiles[[4]], quantiles[[5]])
n_teams <- seq(5, 40, by = 5)
# Get all unique combos of number of teams and quantiles of SD.
n_ts_qs <- expand_grid(n_teams, quantiles)
2.2 Prepare functions for simulation
The following functions define routines to simulate data, fir the models to the data, and set up the simulation loop.
# Simulate data ####
# For each iteration in the simulation loop, simulate data table of z-scores
# n = number of teams
# sd = teams' SD used to simulate z-scores
simulate_data <- function(n, sd) {
tibble::tibble(
z_score = rnorm(n, 0, sd),
team = letters[1:n]
)
}
# Fit brm to simulated data ####
run_brm <- function(data) {
# weakly informative priors
priors <- c(
brms::prior(normal(0, 3), class = Intercept),
brms::prior(cauchy(0, 1), class = sd)
)
# fit brm
meta_bm <- brms::brm(
z_score ~
(1 | team),
data = data,
prior = priors,
chain = 4,
iter = 6000,
cores = 4,
backend = "cmdstanr", threads = brms::threading(4),
control = list(adapt_delta = 0.9999, max_treedepth = 15),
)
return(meta_bm)
}
# Simulation iteration loop ####
# This function defines the simulation loop.
# The mean and SD of the simulated teams' SD across 10 iterations (per
# n-teams/sd combos) are returned.
run_simulations <- function(n, sd) {
data <- simulate_data(n, sd)
sd_team_i <- numeric()
log_info("Model: {n}, {sd}")
for (iter in 1:10) {
log_info("--- Iteration {iter}")
brm_model <- run_brm(data)
brm_summ <- summary(brm_model)
sd_team_i <- c(sd_team_i, brm_summ$random$team[4] - brm_summ$random$team[3])
}
sd_team_mean <- mean(sd_team_i, na.rm = TRUE)
sd_team_sd <- sd(sd_team_i, na.rm = TRUE)
return(list(sd_team_mean, sd_team_sd))
}
2.3 Run the simulations
sample_file <- "./models/sims/sample_sims.rds"
# If results file already exists, read it. Otherwise run the simulations.
if (file.exists(sample_file)) {
# read results file
log_info("Reading file...\n")
sample_sims <- readRDS(sample_file)
log_info("Done!")
} else {
# run simulations
sample_sims <- tibble(
n_teams = n_ts_qs$n_teams,
quants = n_ts_qs$quantiles,
sd_team = map2(
n_teams, quants,
~run_simulations(.x, .y)
)
)
# save results to file
saveRDS(sample_sims, sample_file)
}
2.4 Plot results
sample_cri <- sample_sims %>%
mutate(
cri = as.numeric(map(sd_team, ~.x[[1]])),
cri_sd = as.numeric(map(sd_team, ~.x[[2]]))
)
labels <- tibble(
x = c(0.5, 0.25, 0.9, 0.3, 1.3),
y = c(1.25, 0.7, 1.5, 0.1, 0.9),
labs = c("Mean", "1st quart.", "3rd quart.", "Min", "Max")
)
arrows <- tibble(
x1 = c(0.5, 0.25, 0.9, 0.25, 1.3),
y1 = c(1.15, 0.6, 1.4, 0.1, 1),
x2 = c(0.63, 0.33, 0.97, 0.05, 1.33),
y2 = c(0.8, 0.45, 1.2, 0.1, 1.2)
)
sample_cri %>%
ggplot(aes(quants, cri)) +
geom_line(aes(colour = as.factor(n_teams))) +
geom_point(aes(colour = as.factor(n_teams)), size = 4) +
geom_hline(yintercept = 1) +
geom_hline(yintercept = 2, linetype = "dashed") +
geom_text(
data = labels, aes(x, y, label = labs),
family = "Lato"
) +
geom_curve(
data = arrows, aes(x = x1, y = y1, xend = x2, yend = y2),
arrow = arrow(length = unit(0.07, "inch")), size = 0.4,
color = "gray20", curvature = 0.3
) +
scale_x_continuous(breaks = round(quantiles, 2)) +
scale_color_manual(values = c("#e0ecf4", "#bfd3e6", "#9ebcda", "#8c96c6", "#8c6bb1", "#88419d", "#810f7c", "#4d004b")) +
labs(
title = "95% CrI width depending on teams' true SD and number of teams",
x = "Teams' true standard deviation",
y = "95% CrI width",
colour = "N of teams",
caption = "The values on the x-axis are the quartiles (0, 0.25, 0.5, 0.75, 1)\nof the estimated teams' SD from Silberzahn et al."
)
ggsave("./figs/sample-size/cri-width-nteams-true-sd.pdf", width = 7, height = 5, device = cairo_pdf)
3 Simulate data
We first simulate a dataset (eta_tbl
) of standardised effect sizes. Each team (\(n = 12\)) contributes one standardized effect size \(\eta_i\), with respective standard error \(\text{se}_i\).
3.1 Helper functions
Let's create helper functions:
at_least()
ensures that there is at least a1
when sampling for predictors (= there must be at least one predictor in the model).sdi_mean()
calculates the mean SDI for the pop-level and group-level predictions.
at_least <- function(x) {
if (1 %in% x) {x} else {sample(c(1, 0, 0, 0, 0))}
}
sdi_mean <- function(preds) {
bpair <- tibble::tibble(preds) %>% tidyr::unnest_wider(preds, names_sep = ".") %>%
as.matrix() %>%
betapart::beta.pair(.)
sdim <- as.matrix(bpair$beta.sor)
diag(sdim) <- NA
rowSums(sdim, na.rm = T) / length(preds)
}
3.2 Tibble
The \(\eta_i\) is randomly sampled from a \(Normal(1, 0.5)\) distribution, while \(\text{se}_i\) from a \(HalfCauchy(0, 0.15)\). See comments in code for the other variables.
set.seed(my_seed)
# Number of teams, each team contributes one effect size
n <- 12
# standardized effect size
eta <- rnorm(n, 1, 0.5)
# standard error of the standardised effect size
se <- rhcauchy(n, 0.15)
# label each team
team <- letters[1:n]
# pop-level predictors
pop_pred <- lapply(replicate(n, list(rbinom(5, 1, 0.5))), at_least)
# group-level predictors
group_pred <- lapply(replicate(n, list(rbinom(5, 1, 0.5))), at_least)
# number of post-hoc changes to the measures
phoc_n <- rpois(n, 1)
# number of models run
mod_n <- sample(1:5, n, replace = T)
# major dimension
mdim <- sample(c("duration", "amplitude", "f0", "spectral", "other"), n, TRUE)
# temporal window
twin <- sample(c("segment", "syllable", "word", "phrase"), n, T)
# reviewers' ratings
rating <- replicate(n, sample(0:100, sample(3:4, 1), replace = T))
# research experience as years from PhD
exp <- replicate(n, sample(-3:20, sample(1:10, 1), replace = T))
# prior belief in the research hypothesis
belf <- lapply(lengths(exp), function(.x) rbinom(.x, 1, 0.5))
We can now put the tibble together.
# put together in a tibble
eta_tbl <- tibble(
eta,
se,
team,
pop_pred,
pop_n = map(pop_pred, sum),
group_pred,
group_n = map(group_pred, sum),
pop_sdi = sdi_mean(pop_pred),
phoc_n,
mod_n,
mdim,
twin,
rating,
exp,
belf,
rating_mean = unlist(map(rating, mean)),
rating_sd = unlist(map(rating, sd)),
exp_mean = unlist(map(exp, mean)),
exp_sd = unlist(map(exp, sd)),
belf_mean = unlist(map(belf, mean)),
belf_sd = unlist(map(belf, sd))
) %>%
mutate(
# need the following in cases where score = 0
rating_mean = ifelse(rating_mean == 0, 0.01, rating_mean),
exp_mean = ifelse(exp_mean == 0, 0.01, exp_mean),
belf_mean = ifelse(belf_mean == 0, 0.01, belf_mean),
# need the following in cases where there is only one person in the team
rating_sd = ifelse(is.na(rating_sd) | rating_sd == 0, 0.01, rating_sd),
exp_sd = ifelse(is.na(exp_sd) | exp_sd == 0, 0.01, exp_sd),
belf_sd = ifelse(is.na(belf_sd) | belf_sd == 0, 0.01, belf_sd)
)
4 Meta-analytical model
See paper for details.
To summarize the variability in reported effect sized, we will follow Bayesian random-effects meta-analytical techniques. Since the variables used by the analysis teams might substantially differ in their measurement scales (e.g, Hz for frequency vs ms for duration), we will standardize all reported effects by refitting each reported model with centered and scaled continuous variables (z-scores, i.e. the observed values subtracted from the mean divided by the standard deviation) and sum-coded factor variables.
4.1 Model formula
The estimated coefficients of the critical predictors (i.e. critical according to the analysis teams' self-reported inferential criteria), as obtained from the standardized models, will be used as the standardized effect size (\(\eta_i\)) of each reported model. If multiple predictors within a single analysis have been reported as critical, each will be included in the meta-analytical model (described in details in the next paragraph). Moreover, to account for the differing degree of uncertainty around each standardized effect size, we will use the standard deviation of each effect size returned by the standardized models as the standard error (\(\text{se}_i\)) of the effect size.
After having obtained the standardized effect sizes \(\eta_i\) with related standard errors \(\text{se}_i\), for each critical predictor of the individual reported analyses, the initiating authors will fit a Bayesian random-effects meta-analysis using a multilevel (intercept-only) regression model, as described in the following. The outcome variable will be the set of standardized effect sizes \(\eta_i\). The likelihood of \(\eta_i\) is assumed to correspond to a normal distribution (Knight 2000). The analysis teams will be entered as a group-level effect (i.e., random effect, (1 | team)
). The standard errors \(\text{se}_i\) will be included as the standard deviation \(\sigma_i\) of \(\eta_i\) to fit a measurement-error model, as discussed above. We will use regularizing weakly-informative priors for the intercept (\(Normal(0, 1)\)) and for the group-level standard deviation \(\sigma_{\alpha_{\text{t}}}\) (\(HalfCauchy(0, 1)\)).
\[ \begin{aligned} \eta_i & \sim \text{Normal}(\mu_i, \sigma_i) \\ \mu_i & = \alpha + \alpha_{\text{t}[i]} \\ \alpha & \sim \text{Normal}(0, 1) \\ \sigma_{\alpha_{\text{t}}} & \sim \text{HalfCauchy}(0, 1) \\ \sigma_i & = \text{se}_i \end{aligned} \]
4.2 Model priors
For the meta-analytical model, we will use the following priors:
- A \(Normal(0, 1)\) distribution for the prior of the intercept \(\alpha\).
- A \(HalfCauchy(0, 1)\) distribution for the prior of the teams' intercept standard deviation \(\sigma_{\alpha_{\text{t}}}\).
The following graph illustrates the density function of \(HalfCauchy(0, 1)\). The long tail ensures that values of \(\sigma_{\alpha_{\text{t}}}\) greater than 5 be possible but less probable. This is desirable since the effect sizes are standardized (i.e. they should in principle be within a [-3, +3] range).
tibble(
x = seq(0, 10, by = 0.05),
density = dhcauchy(x, 1)
) %>%
ggplot(aes(x, density)) +
geom_area(fill = "#325756", alpha = 0.9) +
labs(
title = "HalfCauchy(0, 1)",
x = "Teams' intercept standard deviation"
)
priors <- c(
prior(normal(0, 1), class = Intercept),
prior(cauchy(0, 1), class = sd)
)
4.2.1 Prior predictive checks
We run prior predictive checks to assess the goodness of the prior specification before model fitting.
# run checks
meta_bm_ppred <- brm(
eta | se(se) ~
(1 | team),
data = eta_tbl,
prior = priors,
sample_prior = "only",
chain = 4,
seed = my_seed,
file = "./models/sims/meta_bm_ppred"
)
# get samples
ppred_samples <- posterior_samples(meta_bm_ppred) %>%
rename(intercept = b_Intercept, `group-level SD` = sd_team__Intercept)
We can now inspect the prior probability distributions of the intercept and the standard deviation.
ppred_long <- ppred_samples %>%
pivot_longer(intercept:`group-level SD`, names_to = "term") %>%
mutate(term = factor(term, levels = c("intercept", "group-level SD")))
int <- ppred_long %>%
filter(term == "intercept") %>%
ggplot(aes(value)) +
geom_density(colour = NA, fill = "#325756", alpha = 0.9, bw = 0.4) +
labs(title = "Intercept")
sd <- ppred_long %>%
filter(term == "group-level SD") %>%
ggplot(aes(value)) +
geom_density(colour = NA, fill = "#325756", alpha = 0.9, bw = 1) +
xlim(0, 25) +
labs(title = "Group-level SD")
int + sd
And the joint prior probability distribution.
ppred_samples %>%
select(intercept, `r_team[a,Intercept]`:`r_team[l,Intercept]`) %>%
pivot_longer(-intercept, names_to = "team") %>%
mutate(eta = intercept + value) %>%
ggplot(aes(eta)) +
geom_density(colour = NA, fill = "#325756", alpha = 0.9, bw = 0.4) +
xlim(-7, 7)
4.3 Run
The following code fits the meta-analytical model to the simulated data.
meta_bm <- brm(
eta | se(se) ~
(1 | team),
data = eta_tbl,
prior = priors,
chain = 4,
seed = my_seed,
cores = 4,
file = "./models/sims/meta_bm"
)
meta_bm
## Warning: There were 35 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: eta | se(se) ~ (1 | team)
## Data: eta_tbl (Number of observations: 12)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~team (Number of levels: 12)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.24 0.08 0.13 0.45 1.01 984 1208
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.98 0.10 0.79 1.18 1.00 883 1234
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.00 0.00 0.00 0.00 1.00 4000 4000
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(meta_bm)
The degree of variability among the team's reported effects is within the range [0.41, 1.14] in SD units at 95% probability.
References
Knight, K. 2000. Mathematical Statistics. Chapman; Hall, New York.