Causal Estimands

Lucy D’Agostino McGowan

  • Estimands
  • Estimators
  • Estimates

Estimand

@tigga_mac TikTok

Estimator



https://www.bluey.tv/make/bluey-celebration-cake/

Estimate


Coles Mudcake Hacks Facebook

Estimand

\[E[Y(1) - Y(0)]\]

Estimator

\[\sum_{i=1}^N\frac{Y_i\times X_i}{N_A}-\frac{Y_i \times(1 - X_i)}{N_B}\]

Estimate

library(tidyverse)
set.seed(928)
ab <- tibble(
  x = rbinom(100, 1, 0.5),
  y = x + rnorm(100)
)

ab |>
  summarise(
    n_a = sum(x),
    n_b = sum(1 - x),
    estimate = sum(
      (y * x) / n_a -
        y * (1 - x) / n_b
    )
  )
# A tibble: 1 × 3
    n_a   n_b estimate
  <int> <dbl>    <dbl>
1    54    46     1.15

Weighted estimator

\[\frac{\sum_{i=1}^NY_i\times X_i\times w_i}{\sum_{i=1}^NX_i\times w_i}-\frac{\sum_{i=1}^NY_i\times(1-X_i)\times w_i}{\sum_{i=1}^N(1-X_i)\times w_i}\]

Fit the propensity model

library(broom)
library(touringplans)

seven_dwarfs <- seven_dwarfs_train_2018 |>
  filter(wait_hour == 9) |>
  mutate(park_extra_magic_morning = factor(
    park_extra_magic_morning,
    labels = c("No Magic Hours", "Extra Magic Hours")
  ))

seven_dwarfs_with_ps <- glm(
  park_extra_magic_morning ~ park_ticket_season + park_close + park_temperature_high,
  data = seven_dwarfs,
  family = binomial()
) |>
  augment(type.predict = "response", data = seven_dwarfs)

Examine the unadjusted covariates

library(gtsummary)
library(labelled)
seven_dwarfs_with_ps <- seven_dwarfs_with_ps |>
  set_variable_labels(
    park_ticket_season = "Ticket Season",
    park_close = "Close Time",
    park_temperature_high = "Historic High Temperature"
  )

tbl_summary(
  seven_dwarfs_with_ps,
  by = park_extra_magic_morning,
  include = c(park_ticket_season, park_close, park_temperature_high)
) |>
  # add an overall column to the table
  add_overall(last = TRUE)

Examine the unadjusted covariates

Characteristic No Magic Hours, N = 2941 Extra Magic Hours, N = 601 Overall, N = 3541
Ticket Season
    peak 60 (20%) 18 (30%) 78 (22%)
    regular 158 (54%) 35 (58%) 193 (55%)
    value 76 (26%) 7 (12%) 83 (23%)
Close Time
    16:30:00 1 (0.3%) 0 (0%) 1 (0.3%)
    18:00:00 37 (13%) 18 (30%) 55 (16%)
    20:00:00 18 (6.1%) 2 (3.3%) 20 (5.6%)
    21:00:00 28 (9.5%) 0 (0%) 28 (7.9%)
    22:00:00 91 (31%) 11 (18%) 102 (29%)
    23:00:00 78 (27%) 11 (18%) 89 (25%)
    24:00:00 40 (14%) 17 (28%) 57 (16%)
    25:00:00 1 (0.3%) 1 (1.7%) 2 (0.6%)
Historic High Temperature 84 (78, 89) 83 (76, 87) 84 (78, 89)
1 n (%); Median (IQR)

Average treatment effect

Question

Should we make every day an extra magic morning (or not)?

Estimand

\(E[Y(1)-Y(0)]\)

Weight

\[w_{ATE} = \frac{X}{p} + \frac{(1 - X)}{1 - p}\]

ATE weights

library(propensity)
seven_dwarfs_wts <- seven_dwarfs_with_ps |>
  mutate(w_ate = wt_ate(.fitted, park_extra_magic_morning))

ggplot(seven_dwarfs_wts, aes(x = w_ate)) +
  geom_histogram(bins = 50)

ATE weights

ATE weighted table

library(survey)
seven_dwarfs_svy <- svydesign(
  ids = ~1,
  data = seven_dwarfs_wts,
  weights = ~w_ate
)
tbl_svysummary(
  seven_dwarfs_svy,
  by = park_extra_magic_morning,
  include = c(park_ticket_season, park_close, park_temperature_high)
) |>
  add_overall(last = TRUE)

ATE weighted table

Characteristic No Magic Hours, N = 3541 Extra Magic Hours, N = 3571 Overall, N = 7111
Ticket Season
    peak 78 (22%) 81 (23%) 160 (22%)
    regular 193 (54%) 187 (52%) 380 (53%)
    value 83 (23%) 89 (25%) 172 (24%)
Close Time
    16:30:00 2 (0.4%) 0 (0%) 2 (0.2%)
    18:00:00 50 (14%) 72 (20%) 122 (17%)
    20:00:00 20 (5.6%) 19 (5.3%) 39 (5.5%)
    21:00:00 31 (8.7%) 0 (0%) 31 (4.3%)
    22:00:00 108 (30%) 86 (24%) 193 (27%)
    23:00:00 95 (27%) 81 (23%) 176 (25%)
    24:00:00 48 (14%) 94 (26%) 142 (20%)
    25:00:00 1 (0.3%) 6 (1.6%) 7 (1.0%)
Historic High Temperature 84 (78, 89) 83 (78, 87) 84 (78, 88)
1 n (%); Median (IQR)

ATE weighted histogram

library(halfmoon)
ggplot(seven_dwarfs_wts, aes(.fitted, group = park_extra_magic_morning)) +
  geom_mirror_histogram(bins = 50) +
  geom_mirror_histogram(
    aes(fill = park_extra_magic_morning, weight = w_ate),
    bins = 50,
    alpha = 0.5
  ) +
  scale_y_continuous(labels = abs) +
  labs(
    x = "propensity score",
    fill = "Extra Magic Morning"
  )

ATE weighted histogram

Average treatment effect among the treated

Question

Should we stop extra magic hours on days that have them?

Estimand

\(E[Y(1)-Y(0) | X = 1]\)

Weight

\[w_{ATT} = X + \frac{(1 - X)p}{1 - p}\]

ATT weights

seven_dwarfs_wts <- seven_dwarfs_wts |>
  mutate(w_att = wt_att(.fitted, park_extra_magic_morning))

ggplot(seven_dwarfs_wts, aes(w_att)) +
  geom_histogram(bins = 50)

ATT weights

ATT weighted table

seven_dwarfs_svy <- svydesign(
  ids = ~1,
  data = seven_dwarfs_wts,
  weights = ~w_att
)
tbl_svysummary(
  seven_dwarfs_svy,
  by = park_extra_magic_morning,
  include = c(park_ticket_season, park_close, park_temperature_high)
) |>
  add_overall(last = TRUE)

ATT weighted table

Characteristic No Magic Hours, N = 601 Extra Magic Hours, N = 601 Overall, N = 1201
Ticket Season
    peak 18 (30%) 18 (30%) 36 (30%)
    regular 35 (58%) 35 (58%) 70 (58%)
    value 7 (12%) 7 (12%) 14 (12%)
Close Time
    16:30:00 1 (0.9%) 0 (0%) 1 (0.4%)
    18:00:00 13 (21%) 18 (30%) 31 (26%)
    20:00:00 2 (3.3%) 2 (3.3%) 4 (3.3%)
    21:00:00 3 (4.6%) 0 (0%) 3 (2.3%)
    22:00:00 17 (28%) 11 (18%) 28 (23%)
    23:00:00 17 (28%) 11 (18%) 28 (23%)
    24:00:00 8 (14%) 17 (28%) 25 (21%)
    25:00:00 0 (0.3%) 1 (1.7%) 1 (1.0%)
Historic High Temperature 83 (74, 88) 83 (75, 87) 83 (75, 88)
1 n (%); Median (IQR)

ATT weighted histogram

ggplot(seven_dwarfs_wts, aes(.fitted, group = park_extra_magic_morning)) +
  geom_mirror_histogram(bins = 50) +
  geom_mirror_histogram(
    aes(fill = park_extra_magic_morning, weight = w_att),
    bins = 50,
    alpha = 0.5
  ) +
  scale_y_continuous(labels = abs) +
  labs(
    x = "propensity score",
    fill = "Extra Magic Morning"
  )

ATT weighted histogram

Application Exercise

  1. What causal question would the ATC weights answer?
  2. What causal question would the ATM/ATO weights answer?

Application Exercise

Open appex-03

  1. Calculate ATO weights
  2. Create a weighted table using the ATO weights
  3. Examine a mirrored histogram using the ATO weights
  4. What causal question is this answering?