Analyse nach Intersectional Multilevel Analysis of Individual Heterogeneity and Discriminatory Accuracy (MAIHDA)

Autor:in

Lukas Batschelet

Veröffentlichungsdatum

16. August 2025

Analyse nach Evans et al. (2024).

Um die Darstellung etwas übersichtlicher zu halten ist berechnender Code standarmässig ausgeklappt, und Code zur Darstellung und Übersetzung nicht.

1 Setup

Code
# Pakete
library(tidyverse)
library(janitor)
library(readr)
library(lme4)
library(performance)
library(broom.mixed)
library(gt)
library(forcats)
library(here)
library(scales)
library(dplyr)
library(tidyr)
library(purrr)
library(ggplot2)
theme_set(
  theme_minimal(base_size = 11) +
    theme(
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      plot.title       = element_text(face = "bold"),
      plot.subtitle    = element_text(face = "plain")
    )
)
# Reproduzierbarkeit
set.seed(42)
options(dplyr.summarise.inform = FALSE)

# Arbeitsverzeichnis
here()
[1] "/Users/luba/Documents/GitHub/Designing-InterMind"
Code
# Einlesen & Spaltennamen bereinigen
survey <- read_csv("survey_answers.csv", show_col_types = FALSE) |> clean_names()
people <- read_csv("people_data.csv",   show_col_types = FALSE) |> clean_names()

# Schlüssel prüfen
stopifnot("device_id" %in% names(survey), "device_id" %in% names(people))
if (nrow(people) != n_distinct(people$device_id)) {
  warning("Hinweis: people_data enthält doppelte device_id-Einträge. Bitte bereinigen.")
}

2 Deskriptive Statistik (Stichprobe)

2.1 Grundzahlen, Verteilung „Surveys pro Person“, Missingness

Code
# Grundzahlen & Inputs für Tabellen/Plots

# Snapshots & Geräte
N_snapshots <- nrow(survey)
N_devices   <- dplyr::n_distinct(survey$device_id)

# Verteilung: Anzahl Surveys pro Person
surveys_per_person <- survey |>
  dplyr::count(device_id, name = "n_surveys")

# Kennzahlen (wie pandas .describe)
surveys_per_person_summ <- surveys_per_person |>
  dplyr::summarise(
    n    = dplyr::n(),
    min  = min(n_surveys, na.rm = TRUE),
    q25  = as.numeric(quantile(n_surveys, 0.25, na.rm = TRUE)),
    med  = as.numeric(quantile(n_surveys, 0.50, na.rm = TRUE)),
    q75  = as.numeric(quantile(n_surveys, 0.75, na.rm = TRUE)),
    max  = max(n_surveys, na.rm = TRUE),
    mean = mean(n_surveys, na.rm = TRUE),
    sd   = sd(n_surveys,   na.rm = TRUE)
  )

# Missing-Anteil je Spalte (in %)
na_share_tbl <- survey |>
  dplyr::summarise(dplyr::across(dplyr::everything(), ~ mean(is.na(.)) * 100)) |>
  tidyr::pivot_longer(dplyr::everything(), names_to = "Spalte", values_to = "Prozent_NA") |>
  dplyr::mutate(Prozent_NA = round(Prozent_NA, 1)) |>
  dplyr::arrange(dplyr::desc(Prozent_NA))
Code
# Ausgabe (gt + Plot) & Export


# Kopfzeile
tibble::tibble(
  Kennzahl = c("Momentaufnahmen", "Geräte"),
  Wert     = c(N_snapshots, N_devices)
) |>
  gt() |>
  tab_header(title = "Grundzahlen der Stichprobe") |>
  fmt_number(columns = Wert, decimals = 0)
Grundzahlen der Stichprobe
Kennzahl Wert
Momentaufnahmen 106
Geräte 25
Code
# Kennzahlen zur Verteilung der Surveys je Person
surveys_per_person_summ |>
  tidyr::pivot_longer(dplyr::everything(), names_to = "Stat", values_to = "Wert") |>
  dplyr::mutate(Wert = round(Wert, 2)) |>
  gt() |>
  tab_header(title = "Surveys pro Person — Kennzahlen")
Surveys pro Person — Kennzahlen
Stat Wert
n 25.00
min 1.00
q25 2.00
med 3.00
q75 6.00
max 12.00
mean 4.24
sd 2.91
Code
# Missingness (Top 30 Spalten mit den meisten NAs)
na_share_tbl |>
  dplyr::slice_max(order_by = Prozent_NA, n = 30, with_ties = FALSE) |>
  gt() |>
  tab_header(title = "Fehlende Werte je Spalte (Top 30)") |>
  cols_label(Spalte = "Spalte", Prozent_NA = "% fehlend")
Fehlende Werte je Spalte (Top 30)
Spalte % fehlend
other_factors_positive 93.4
other_factors_negative 87.7
axis_of_opression 77.4
factors_sense_of_belonging 6.6
majority_comparison 5.7
device_id 0.0
survey_num 0.0
survey_created_at 0.0
activity 0.0
awake 0.0
content 0.0
environmen_pleasure 0.0
environment_lively 0.0
environment_nature 0.0
environment_noise 0.0
general_wellbeing 0.0
indoors_outdoors 0.0
location_category 0.0
people_with_you 0.0
sense_of_belonging 0.0
tense_relaxed 0.0
Code
# Balkendiagramm: wie viele Personen haben 1, 2, 3, … Surveys?
dir.create("plots", showWarnings = FALSE)
freq_df <- surveys_per_person |>
  dplyr::count(n_surveys, name = "n_personen")

p_counts <- ggplot(freq_df, aes(x = n_surveys, y = n_personen)) +
  geom_col(fill = "#c4c4c4", width = 0.8) +
  scale_x_continuous(breaks = seq(min(freq_df$n_surveys), max(freq_df$n_surveys), by = 1)) +
  labs(
    title = "Verteilung der Teilnahme",
    x = "Anzahl abgeschlossene Umfragen pro Person",
    y = "Anzahl Personen"
  ) +
  theme()

print(p_counts)

Code
ggsave("plots/survey_counts.pdf", p_counts, width = 8/2.54, height = 5/2.54, device = cairo_pdf)

2.2 Kategoriale Verteilungen (einzeln, mit deutschen Labels)

Code
# Häufigkeiten (ggf. Mehrfachnennungen via ';') pro Kategorie – FIXED


cat_cols <- c("activity", "location_category", "indoors_outdoors", "people_with_you")

# Deutsche Titel
title_mapping <- c(
  activity = "Aktivität",
  location_category = "Ortskategorie",
  indoors_outdoors = "Drinnen / Draussen",
  people_with_you  = "Personen um mich"
)

# Deutsche Label-Übersetzung
label_mapping <- c(
  "working / studying"               = "Arbeiten / Lernen",
  "leisure / relaxation"             = "Freizeit / Entspannung",
  "travelling / commuting"           = "Unterwegs / Pendeln",
  "using media"                      = "Mediennutzung",
  "cooking / eating"                 = "Kochen / Essen",
  "social activities"                = "Soziale Aktivitäten",
  "shopping / errands"               = "Einkaufen / Erledigungen",
  "resting / sleeping"               = "Ruhen / Schlafen",
  "housework / tidying"              = "Hausarbeit / Carearbeit",
  "other"                            = "Sonstiges",
  "school / university"              = "Schule / Universität",
  "at home"                          = "Zuhause",
  "on the move (foot / bike / car)"  = "Unterwegs (zu Fuss / Rad / Auto)",
  "public transport"                 = "Öffentliche Verkehrsmittel",
  "at someone else’s home"           = "Bei anderen zuhause",
  "park / green space"               = "Park / Grünanlage",
  "workplace"                        = "Arbeitsplatz",
  "shopping / services"              = "Einkaufen / Dienstleistungen",
  "other place"                      = "Anderer Ort",
  "leisure / sports facility"        = "Freizeit- / Sporteinrichtung",
  "indoors"                          = "Drinnen",
  "outdoors"                         = "Draussen",
  "friends"                          = "Freunde",
  "alone"                            = "Alleine",
  "strangers"                        = "Fremde",
  "colleagues"                       = "Kollegen",
  "family"                           = "Familie",
  "acquaintances"                    = "Bekannte",
  "partner"                          = "Partner",
  "missing"                          = "Fehlend"
)

# String-sichere Version (kein {{col}}, sondern .data[[col]])
cat_pct <- function(df, col) {
  s <- ifelse(is.na(df[[col]]), "missing", as.character(df[[col]]))
  tibble(tmp = s) |>
    separate_rows(tmp, sep = ";") |>
    mutate(tmp = trimws(tmp)) |>
    filter(tmp != "") |>
    count(tmp, name = "n") |>
    mutate(
      pct   = 100 * n / sum(n),
      label = dplyr::recode(tmp, !!!label_mapping, .default = tmp)
    ) |>
    arrange(pct)
}

cat_pct_list <- setNames(lapply(cat_cols, function(x) cat_pct(survey, x)), cat_cols)
Code
# Ein Plot pro Kategorie + PDF-Export
dir.create("plots", showWarnings = FALSE)

for (col in names(cat_pct_list)) {
  dfp <- cat_pct_list[[col]]
  p <- ggplot(dfp, aes(x = pct, y = reorder(label, pct))) +
    geom_col(fill = "#c4c4c4", width = 0.8) +
    geom_text(aes(label = sprintf("%.1f %%", pct)), hjust = -0.05, size = 3) +
    scale_x_continuous(limits = c(0, min(100, ceiling(max(dfp$pct) + 10)))) +
    labs(x = "Anteil (%)", y = NULL, title = title_mapping[[col]]) +
    theme()
  print(p)
  ggsave(sprintf("plots/cat_dist_%s.pdf", col), p,
         width  = if (col == "indoors_outdoors") 5/2.54 else 10/2.54,
         height = if (col == "indoors_outdoors") 3/2.54 else 8/2.54,
         device = cairo_pdf)
}

2.3 „Axis of Oppression“: Anzahl Kategorien pro Person & Kategorienhäufigkeiten

Code
# Berechnung auf Basis people_data.csv  → Objekt: people

library(stringr)
NO_TOKENS <- c("no", "nein", "", "nan")

# 1) Anzahl Kategorien pro Person
axis_counts <- people |>
  dplyr::mutate(axis_vec = strsplit(tolower(dplyr::coalesce(axis_codes, "")), ";")) |>
  dplyr::mutate(n_kategorien = purrr::map_int(axis_vec, function(v) {
    v2 <- trimws(v); v2 <- v2[v2 != ""]
    if (length(v2) == 0) return(0L)
    if (any(v2 %in% NO_TOKENS)) return(0L)
    length(v2)
  }))

axis_dist_table <- axis_counts |>
  dplyr::count(n_kategorien, name = "count") |>
  dplyr::arrange(n_kategorien) |>
  dplyr::mutate(percent = round(100 * count / sum(count), 1))

# 2) Häufigkeiten einzelner Kategorien
axis_summary <- people |>
  dplyr::mutate(x = strsplit(tolower(dplyr::coalesce(axis_codes, "")), ";")) |>
  tidyr::unnest(x) |>
  dplyr::mutate(x = trimws(x)) |>
  dplyr::filter(!x %in% NO_TOKENS, x != "") |>
  dplyr::count(category = x, name = "count") |>
  dplyr::arrange(dplyr::desc(count)) |>
  dplyr::mutate(percent = round(100 * count / dplyr::n_distinct(people$device_id), 1))
Code
# Ausgabe Tabellen
axis_summary |>
  gt() |>
  tab_header(title = "axis_codes – Häufigkeiten (aufgesplittet)") |>
  cols_label(category = "Kategorie", count = "Anzahl", percent = "Prozent (%)")
axis_codes – Häufigkeiten (aufgesplittet)
Kategorie Anzahl Prozent (%)
background 4 16.7
gender 4 16.7
language / accent 4 16.7
sexual orientation 3 12.5
clothing / style 1 4.2
skin / appearance 1 4.2
social / financial 1 4.2
Code
axis_dist_table |>
  gt() |>
  tab_header(title = "Anzahl unterschiedlicher Diskriminierungsarten pro Person") |>
  cols_label(n_kategorien = "n Kategorien", count = "Anzahl", percent = "Prozent (%)")
Anzahl unterschiedlicher Diskriminierungsarten pro Person
n Kategorien Anzahl Prozent (%)
0 12 50.0
1 7 29.2
2 4 16.7
3 1 4.2

2.4 Kreuztabelle: Gender × Altersgruppe (Counts & Spalten-%)

Code
# (optional) Übersetzen wie oben genutzt
gender_de <- c("woman" = "weiblich", "man" = "männlich")
people_de <- people |>
  dplyr::mutate(
    gender = dplyr::recode(as.character(gender), !!!gender_de, .default = as.character(gender))
  )

cross <- with(people_de, table(gender, age_group))
cross_pct <- prop.table(cross, margin = 2) * 100

# Counts
as.data.frame.matrix(cross) |>
  tibble::rownames_to_column("Gender") |>
  gt() |>
  tab_header(title = "Gender × Altersgruppe — Häufigkeiten")
Gender × Altersgruppe — Häufigkeiten
Gender 16 – 25 26 – 35
männlich 12 2
weiblich 8 1
Code
# Spalten-Prozente
as.data.frame.matrix(round(cross_pct, 1)) |>
  tibble::rownames_to_column("Gender") |>
  gt() |>
  tab_header(title = "Gender × Altersgruppe — Spalten-Prozente (%)")
Gender × Altersgruppe — Spalten-Prozente (%)
Gender 16 – 25 26 – 35
männlich 60 66.7
weiblich 40 33.3

2.5 Slider-Histogramme (auf –1 … +1 skaliert, 9 Facets)

Code
slider_cols <- c(
  "general_wellbeing","content","tense_relaxed","awake",
  "environment_noise","environment_nature","environment_lively",
  "environmen_pleasure","sense_of_belonging"
)

GERMAN_LABELS <- c(
  environment_noise   = "Wie nimmst du die Geräuschkulisse wahr?",
  environment_nature  = "Wie viel Natur ist sichtbar?",
  environment_lively  = "Wie lebhaft oder ruhig wirkt der Ort?",
  environmen_pleasure = "Wie angenehm empfindest du den Ort?",
  content             = "Wie fühlst du dich insgesamt?",
  general_wellbeing   = "Ganz allgemein – wie zufrieden bist du?",
  tense_relaxed       = "Wie angespannt oder entspannt fühlst du dich?",
  awake               = "Wie wach fühlst du dich?",
  sense_of_belonging  = "Wie zugehörig oder fremd fühlst du dich?"
)

sliders_long <- survey |>
  dplyr::select(dplyr::all_of(c("device_id", slider_cols))) |>
  tidyr::pivot_longer(cols = dplyr::all_of(slider_cols),
                      names_to = "variable", values_to = "value") |>
  dplyr::mutate(
    value = as.numeric(value),
    scaled = (value - 0.5) * 2,
    var_de = dplyr::recode(variable, !!!GERMAN_LABELS)
  ) |>
  dplyr::filter(!is.na(scaled))

# Reihenfolge (wie im Python)
var_order <- c("environment_noise","environment_nature","environment_lively",
               "environmen_pleasure","content","general_wellbeing",
               "tense_relaxed","awake","sense_of_belonging")
sliders_long$var_de <- factor(sliders_long$var_de,
                              levels = GERMAN_LABELS[var_order])
Code
dir.create("plots", showWarnings = FALSE)

p_sliders <- ggplot(sliders_long, aes(x = scaled)) +
  geom_histogram(binwidth = 0.1, boundary = -1, closed = "left",
                 fill = "#c4c4c4", color = "white") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red", linewidth = 0.5) +
  scale_x_continuous(limits = c(-1, 1), breaks = c(-1, 0, 1)) +
  labs(x = NULL, y = "Häufigkeit") +
  facet_wrap(~ var_de, ncol = 2, scales = "fixed") +
  theme()

print(p_sliders)

Code
ggsave("plots/slider_hists.pdf", p_sliders, width = 15/2.54, height = 22/2.54, device = cairo_pdf)

2.6 Merge der Datensätze

Code
# Merge
dat <- survey |> left_join(people, by = "device_id")

# Strata (Level 3)
dat <- dat |>
  mutate(
    gender             = as.factor(gender),
    age_group          = as.factor(age_group),
    sexual_orientation = as.factor(sexual_orientation),
    eq_income_group    = as.factor(eq_income_group),
    stratum_id = interaction(gender, age_group, sexual_orientation, eq_income_group,
                             sep = "_", drop = TRUE) |> as.factor()
  )

2.7 Freitext-Antworten

Code
# Quelle: survey_answers.csv → Objekt: survey

free_cols <- c("other_factors_negative", "other_factors_positive")

collect_text <- function(x) {
  v <- as.character(x)
  v <- v[!is.na(v)]
  v <- gsub("\\s+", " ", v)
  v <- trimws(v)
  v <- v[v != ""]
  unique(v)
}

vals_neg <- collect_text(survey[["other_factors_negative"]])
vals_pos <- collect_text(survey[["other_factors_positive"]])

tbl_neg <- tibble::tibble(
  Nr = seq_along(vals_neg),
  Antwort = vals_neg
) |>
  gt() |>
  tab_header(title = paste0("other_factors_negative — ", length(vals_neg), " einzigartige Antworten"))

tbl_pos <- tibble::tibble(
  Nr = seq_along(vals_pos),
  Antwort = vals_pos
) |>
  gt() |>
  tab_header(title = paste0("other_factors_positive — ", length(vals_pos), " einzigartige Antworten"))

tbl_neg
other_factors_negative — 11 einzigartige Antworten
Nr Antwort
1 heat
2 Everyone is doing the same, so it kind of feels like being at the right place
3 The contact with strangers
4 Bed
5 health issues
6 no natural sunlight room without windows no fresh air
7 a lot of people - personal space
8 No
9 /
10 no
11 Not really
Code
tbl_pos
other_factors_positive — 7 einzigartige Antworten
Nr Antwort
1 place i know and is mine i have control over it
2 know this place and can do what i want
3 my room and cozy for the night
4 pets
5 spending time with family pets
6 I am not by myself
7 Less noise from construction works

2.8 Outcome (Wohlbefinden)

Code
# Items auf [0,1], geometrisches Mittel = Index
outcome_vars <- c("general_wellbeing","tense_relaxed","sense_of_belonging","content","awake")

dat <- dat |>
  mutate(across(all_of(outcome_vars), ~ pmin(pmax(as.numeric(.), 0), 1))) |>
  mutate(
    wb_prod = reduce(across(all_of(outcome_vars)), `*`),
    wb_geom = wb_prod^(1/5),                                   # geometrisches Mittel
    wb_mean = rowMeans(across(all_of(outcome_vars)), na.rm = TRUE)  # Vergleich
  )
Code
# Ausgabe: Deskriptivstatistik (reine Darstellung)
dat |>
  summarise(across(
    c(wb_geom, wb_mean),
    list(
      n      = ~ sum(!is.na(.x)),
      mittel = ~ mean(.x, na.rm = TRUE),
      sd     = ~ sd(.x,   na.rm = TRUE),
      min    = ~ min(.x,  na.rm = TRUE),
      max    = ~ max(.x,  na.rm = TRUE)
    )
  )) |>
  pivot_longer(everything(),
               names_to = c("variable","stat"),
               names_pattern = "^(.*)_(n|mittel|sd|min|max)$") |>
  pivot_wider(names_from = stat, values_from = value) |>
  mutate(across(c(mittel, sd, min, max), ~ round(.x, 3))) |>
  gt() |>
  tab_header(title = "Deskriptive Statistik: Wohlbefinden-Indizes") |>
  cols_label(variable = "Variable", n = "N", mittel = "Mittelwert",
             sd = "SD", min = "Minimum", max = "Maximum")
Deskriptive Statistik: Wohlbefinden-Indizes
Variable N Mittelwert SD Minimum Maximum
wb_geom 106 0.580 0.180 0.000 0.944
wb_mean 106 0.622 0.163 0.164 0.945

2.9 Strata-Überblick

Code
# Zählungen (Level 1 & Personen) je Stratum
strata_counts_responses <- dat |> count(stratum_id, name = "n_befragungen")

people_strata <- people |>
  mutate(
    gender             = as.factor(gender),
    age_group          = as.factor(age_group),
    sexual_orientation = as.factor(sexual_orientation),
    eq_income_group    = as.factor(eq_income_group),
    stratum_id = interaction(gender, age_group, sexual_orientation, eq_income_group,
                             sep = "_", drop = TRUE) |> as.factor()
  )

strata_counts_persons <- people_strata |>
  count(stratum_id, name = "n_personen") |>
  semi_join(strata_counts_responses, by = "stratum_id")

strata_overview <- strata_counts_persons |>
  full_join(strata_counts_responses, by = "stratum_id") |>
  replace_na(list(n_personen = 0L, n_befragungen = 0L)) |>
  mutate(befragungen_pro_person = ifelse(n_personen > 0, n_befragungen / n_personen, NA_real_)) |>
  arrange(desc(n_personen), desc(n_befragungen))
Code
# Ausgabe: Tabelle (reine Darstellung, inkl. Übersetzung)

# Übersetzungen wie unten
gender_de <- c("woman" = "weiblich", "man" = "männlich")
so_de     <- c("straight / hetero" = "heterosexuell",
               "gay / lesbian"     = "homosexuell",
               "bisexual"           = "bisexuell",
               "queer"              = "queer")
eq_de     <- c("Very Low"  = "sehr niedrig",
               "Low"       = "niedrig",
               "High"      = "hoch",
               "Very High" = "sehr hoch",
               "Unknown"   = "—")

strata_overview |>
  tidyr::separate(
    col = stratum_id,
    into = c("Gender","Altersgruppe","Sexuelle Orientierung","Äquivalenzeinkommensgruppe"),
    sep = "_", remove = TRUE, fill = "right"
  ) |>
  # Übersetzen (fehlende Werte → Gedankenstrich)
  mutate(
    Gender                       = recode(Gender, !!!gender_de, .default = Gender, .missing = "—"),
    `Sexuelle Orientierung`      = recode(`Sexuelle Orientierung`, !!!so_de, .default = `Sexuelle Orientierung`, .missing = "—"),
    `Äquivalenzeinkommensgruppe` = recode(`Äquivalenzeinkommensgruppe`, !!!eq_de, .default = `Äquivalenzeinkommensgruppe`, .missing = "—"),
    `Befragungen pro Person`     = round(befragungen_pro_person, 2),
    `Anzahl Personen`            = n_personen,
    `Anzahl Befragungen`         = n_befragungen
  ) |>
  select(
    Gender, Altersgruppe, `Sexuelle Orientierung`, `Äquivalenzeinkommensgruppe`,
    `Anzahl Personen`, `Anzahl Befragungen`, `Befragungen pro Person`
  ) |>
  mutate(across(everything(), ~ ifelse(is.na(.), "—", as.character(.)))) |>
  gt() |>
  tab_header(title = "Übersicht über Strata (aufgetrennt nach Achsen)") |>
  tab_source_note("— = keine Daten verfügbar") |>
  cols_align(
    align = "center",
    columns = c("Anzahl Personen","Anzahl Befragungen","Befragungen pro Person")
  )
Übersicht über Strata (aufgetrennt nach Achsen)
Gender Altersgruppe Sexuelle Orientierung Äquivalenzeinkommensgruppe Anzahl Personen Anzahl Befragungen Befragungen pro Person
weiblich 16 – 25 heterosexuell hoch 3 13 4.33
männlich 16 – 25 heterosexuell sehr niedrig 3 9 3
männlich 16 – 25 heterosexuell 2 9 4.5
weiblich 16 – 25 heterosexuell 2 8 4
weiblich 16 – 25 bisexuell 1 12 12
männlich 16 – 25 heterosexuell sehr hoch 1 9 9
männlich 16 – 25 heterosexuell niedrig 1 8 8
männlich 16 – 25 homosexuell niedrig 1 7 7
weiblich 16 – 25 heterosexuell sehr niedrig 1 6 6
weiblich 26 – 35 heterosexuell sehr niedrig 1 5 5
männlich 26 – 35 heterosexuell sehr hoch 1 4 4
männlich 16 – 25 homosexuell hoch 1 3 3
männlich 16 – 25 heterosexuell hoch 1 3 3
männlich 16 – 25 homosexuell 1 3 3
männlich 16 – 25 bisexuell sehr niedrig 1 2 2
weiblich 16 – 25 queer sehr niedrig 1 2 2
1 2 2
männlich 26 – 35 bisexuell sehr niedrig 1 1 1
— = keine Daten verfügbar

2.10 Kontextvariablen

Code
# Metrische Variablen: Person-Mean-Centering (Within/Between)
env_cont <- c("environmen_pleasure","environment_lively","environment_nature","environment_noise")  # 'environmen_pleasure' = Datensatzbezeichnung
env_cat  <- c("activity","location_category","indoors_outdoors","people_with_you","majority_comparison")

dat <- dat |> mutate(across(all_of(env_cont), as.numeric))

dat <- dat |>
  group_by(device_id) |>
  mutate(across(all_of(env_cont),
                list(pm = ~ mean(.x, na.rm = TRUE)), .names = "{.col}_pm")) |>
  ungroup() |>
  mutate(across(all_of(env_cont),
                ~ .x - get(paste0(cur_column(), "_pm")),
                .names = "{.col}_within")) |>
  mutate(across(all_of(env_cat), as.factor))
Code
# Ausgabe: Variable-Listing (reine Darstellung)
tibble(
  Typ = c("Kategorial","Metrisch (Original)","Metrisch (Person-Mean)","Metrisch (Within)"),
  Variablen = list(env_cat, env_cont, paste0(env_cont,"_pm"), paste0(env_cont,"_within"))
) |>
  unnest(Variablen) |>
  gt() |>
  tab_header(title = "Verfügbare Kontextvariablen nach Typ")
Verfügbare Kontextvariablen nach Typ
Typ Variablen
Kategorial activity
Kategorial location_category
Kategorial indoors_outdoors
Kategorial people_with_you
Kategorial majority_comparison
Metrisch (Original) environmen_pleasure
Metrisch (Original) environment_lively
Metrisch (Original) environment_nature
Metrisch (Original) environment_noise
Metrisch (Person-Mean) environmen_pleasure_pm
Metrisch (Person-Mean) environment_lively_pm
Metrisch (Person-Mean) environment_nature_pm
Metrisch (Person-Mean) environment_noise_pm
Metrisch (Within) environmen_pleasure_within
Metrisch (Within) environment_lively_within
Metrisch (Within) environment_nature_within
Metrisch (Within) environment_noise_within

2.11 Minimalfilter

Code
# Strata mit >= 2 Befragungen; Personen mit >= 2 Messungen
dat_before <- dat
valid_strata  <- strata_overview |> filter(n_befragungen >= 2) |> pull(stratum_id)
dat_after_s   <- dat_before |> filter(stratum_id %in% valid_strata)

valid_persons <- dat_after_s |> count(device_id, name = "n_obs") |> filter(n_obs >= 2) |> pull(device_id)
dat           <- dat_after_s |> filter(device_id %in% valid_persons)

# Stufen säubern
dat <- dat |>
  mutate(across(c(stratum_id, device_id, gender, age_group, sexual_orientation, eq_income_group, all_of(env_cat)),
                ~ droplevels(as.factor(.))))
Code
# Ausgabe: Reduktionen (reine Darstellung)
tibble(
  Schritt          = c("Vor Filter","Nach Strata-Filter","Nach Personen-Filter"),
  N_Beobachtungen  = c(nrow(dat_before), nrow(dat_after_s), nrow(dat)),
  N_Personen       = c(n_distinct(dat_before$device_id), n_distinct(dat_after_s$device_id), n_distinct(dat$device_id)),
  N_Strata         = c(n_distinct(dat_before$stratum_id), n_distinct(dat_after_s$stratum_id), n_distinct(dat$stratum_id))
) |>
  gt() |>
  tab_header(title = "Datenfilter: Übersicht über Reduktionen")
Datenfilter: Übersicht über Reduktionen
Schritt N_Beobachtungen N_Personen N_Strata
Vor Filter 106 25 18
Nach Strata-Filter 105 24 17
Nach Personen-Filter 102 21 16

3 Mehrebenenmodelle

3.1 Dreistufiges Nullmodell (ICC)

Code
m0 <- lmer(
  wb_geom ~ 1 + (1 | stratum_id) + (1 | device_id),
  data = dat |> filter(!is.na(wb_geom)),
  REML = TRUE
)
summary(m0)  # boundary (singular) fit bei sehr wenig Level-2-Varianz erwartbar
Linear mixed model fit by REML ['lmerMod']
Formula: wb_geom ~ 1 + (1 | stratum_id) + (1 | device_id)
   Data: filter(dat, !is.na(wb_geom))

REML criterion at convergence: -52.5

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-3.09258 -0.64336  0.00523  0.71822  1.96022 

Random effects:
 Groups     Name        Variance Std.Dev.
 device_id  (Intercept) 0.000000 0.0000  
 stratum_id (Intercept) 0.003025 0.0550  
 Residual               0.031073 0.1763  
Number of obs: 102, groups:  device_id, 21; stratum_id, 16

Fixed effects:
            Estimate Std. Error t value
(Intercept)  0.57997    0.02297   25.24
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')
Code
# ICC manuell (robust bei Singularität)
vc_df    <- as.data.frame(VarCorr(m0))
v_strata <- vc_df |> filter(grp == "stratum_id") |> pull(vcov); v_strata <- ifelse(length(v_strata)==0, 0, v_strata)
v_person <- vc_df |> filter(grp == "device_id")  |> pull(vcov); v_person <- ifelse(length(v_person)==0,  0, v_person)
v_resid  <- vc_df |> filter(grp == "Residual")   |> pull(vcov); v_resid  <- ifelse(length(v_resid)==0,   0, v_resid)

icc_stratum <- v_strata / (v_strata + v_person + v_resid)
icc_person  <- v_person / (v_strata + v_person + v_resid)
Code
# Ausgabe: ICC-Tabelle (reine Darstellung)
tibble(
  Ebene       = c("Strata (Level 3)","Person (Level 2)","Momentaufnahmen (Level 1)"),
  Varianz     = c(v_strata, v_person, v_resid),
  ICC_Anteil  = c(icc_stratum, icc_person, 1 - icc_stratum - icc_person),
  ICC_Prozent = round(100 * c(icc_stratum, icc_person, 1 - icc_stratum - icc_person), 2)
) |>
  gt() |>
  tab_header(title = "Varianzzerlegung: Dreistufiges Nullmodell") |>
  cols_label(Ebene = "Ebene", Varianz = "Varianzkomponente",
             ICC_Anteil = "ICC (Anteil)", ICC_Prozent = "ICC (%)") |>
  tab_source_note("ICC = Intraklassenkorrelation; Anteil der Gesamtvarianz je Ebene.")
Varianzzerlegung: Dreistufiges Nullmodell
Ebene Varianzkomponente ICC (Anteil) ICC (%)
Strata (Level 3) 0.003024511 0.08870257 8.87
Person (Level 2) 0.000000000 0.00000000 0.00
Momentaufnahmen (Level 1) 0.031072709 0.91129743 91.13
ICC = Intraklassenkorrelation; Anteil der Gesamtvarianz je Ebene.

Befund: Personenebene ≈ 0 → dreistufiges Modell nicht informativ.

3.2 Zweistufiges Nullmodell

Code
dat_2L <- dat |>
  filter(!is.na(wb_geom)) |>
  mutate(stratum_id = droplevels(stratum_id))

tibble(
  n_beobachtungen = nrow(dat_2L),
  n_personen      = n_distinct(dat_2L$device_id),
  n_strata        = n_distinct(dat_2L$stratum_id)
)
Code
m0_2L <- lmer(
  wb_geom ~ 1 + (1 | stratum_id),
  data = dat_2L,
  REML = TRUE
)
summary(m0_2L)
Linear mixed model fit by REML ['lmerMod']
Formula: wb_geom ~ 1 + (1 | stratum_id)
   Data: dat_2L

REML criterion at convergence: -52.5

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-3.09258 -0.64335  0.00524  0.71822  1.96022 

Random effects:
 Groups     Name        Variance Std.Dev.
 stratum_id (Intercept) 0.003024 0.05499 
 Residual               0.031073 0.17627 
Number of obs: 102, groups:  stratum_id, 16

Fixed effects:
            Estimate Std. Error t value
(Intercept)  0.57997    0.02297   25.24
Code
vc2           <- as.data.frame(VarCorr(m0_2L))
v_stratum_2L  <- vc2 |> filter(grp == "stratum_id") |> pull(vcov)
v_resid_2L    <- vc2 |> filter(grp == "Residual")   |> pull(vcov)
icc_stratum_2L <- v_stratum_2L / (v_stratum_2L + v_resid_2L)
Code
# Ausgabe: 2-Level-Zerlegung (reine Darstellung)
tibble(
  Parameter = c("Intercept (Gesamtmittel)","Strata-Varianz","Residual-Varianz","VPC Strata"),
  Wert      = c(fixef(m0_2L)[["(Intercept)"]], v_stratum_2L, v_resid_2L, icc_stratum_2L),
  Einheit   = c("Wohlbefinden (0–1)","Varianz","Varianz","Anteil")
) |>
  mutate(Wert = round(Wert, 6)) |>
  gt() |>
  tab_header(title = "Zweistufiges Nullmodell: Varianzzerlegung & VPC(Strata)") |>
  cols_label(Parameter = "Parameter", Wert = "Geschätzter Wert", Einheit = "Einheit")
Zweistufiges Nullmodell: Varianzzerlegung & VPC(Strata)
Parameter Geschätzter Wert Einheit
Intercept (Gesamtmittel) 0.579968 Wohlbefinden (0–1)
Strata-Varianz 0.003024 Varianz
Residual-Varianz 0.031073 Varianz
VPC Strata 0.088699 Anteil

4 MAIHDA (2 Ebenen)

4.1 Additivmodell (M1_2L)

Code
m1_2L <- lmer(
  wb_geom ~ gender + age_group + sexual_orientation + eq_income_group +
    (1 | stratum_id),
  data = dat_2L,
  REML = TRUE
)

# PEV_strata relativ zu M0
var_strata_m0 <- as.data.frame(VarCorr(m0_2L)) |> filter(grp == "stratum_id") |> pull(vcov)
var_strata_m1 <- as.data.frame(VarCorr(m1_2L)) |> filter(grp == "stratum_id") |> pull(vcov)
PEV_strata_M0_M1 <- (var_strata_m0 - var_strata_m1) / var_strata_m0
Code
# Ausgabe: Vergleich M0 vs. M1 (reine Darstellung)
tibble(
  Modell     = c("M0_2L (Null)","M1_2L (additive Achsen)"),
  Var_Strata = c(var_strata_m0, var_strata_m1),
  PEV_vs_M0  = c(0, PEV_strata_M0_M1)
) |>
  mutate(Var_Strata = round(Var_Strata, 6),
         PEV_vs_M0  = round(PEV_vs_M0, 3)) |>
  gt() |>
  tab_header(title = "Strata-Varianz: Null vs. Additives MAIHDA-Modell") |>
  tab_source_note(paste0("PEV_strata (M0→M1) = ", round(PEV_strata_M0_M1, 3),
                         " → Anteil der Strata-Varianz durch additive Haupteffekte."))
Strata-Varianz: Null vs. Additives MAIHDA-Modell
Modell Var_Strata PEV_vs_M0
M0_2L (Null) 0.003024 0.000
M1_2L (additive Achsen) 0.001110 0.633
PEV_strata (M0→M1) = 0.633 → Anteil der Strata-Varianz durch additive Haupteffekte.

4.2 + Kontext (M2_2L)

Code
# Typen absichern
dat_2L <- dat_2L |>
  mutate(
    activity            = as.factor(activity),
    location_category   = as.factor(location_category),
    indoors_outdoors    = as.factor(indoors_outdoors),
    people_with_you     = as.factor(people_with_you),
    majority_comparison = as.factor(majority_comparison),
    environment_noise   = as.numeric(environment_noise),
    environment_nature  = as.numeric(environment_nature),
    environment_lively  = as.numeric(environment_lively),
    environmen_pleasure = as.numeric(environmen_pleasure)  # Bezeichnung wie im Datensatz
  ) |>
  droplevels()
Code
m2_2L_context <- lmer(
  wb_geom ~ gender + age_group + sexual_orientation + eq_income_group +
    activity + location_category + indoors_outdoors + people_with_you + majority_comparison +
    environment_noise + environment_nature + environment_lively + environmen_pleasure +
    (1 | stratum_id),
  data = dat_2L,
  REML = TRUE
)
Code
extract_var <- function(model) {
  vc <- as.data.frame(VarCorr(model))
  tibble(
    var_strata = vc |> filter(grp == "stratum_id") |> pull(vcov),
    var_resid  = vc |> filter(grp == "Residual")   |> pull(vcov)
  )
}

vars <- bind_rows(
  extract_var(m0_2L)         |> mutate(Modell = "M0_2L (Null)"),
  extract_var(m1_2L)         |> mutate(Modell = "M1_2L (+ Achsen)"),
  extract_var(m2_2L_context) |> mutate(Modell = "M2_2L (+ Kontext)")
) |>
  relocate(Modell)

var0_s <- vars$var_strata[vars$Modell == "M0_2L (Null)"]
var0_e <- vars$var_resid[ vars$Modell == "M0_2L (Null)"]
Code
# Ausgabe: Varianzvergleich (reine Darstellung)
vars |>
  mutate(
    PEV_strata_vs_M0 = (var0_s - var_strata) / var0_s,
    PEV_resid_vs_M0  = (var0_e - var_resid)  / var0_e
  ) |>
  mutate(across(c(var_strata, var_resid, PEV_strata_vs_M0, PEV_resid_vs_M0), ~ round(.x, 6))) |>
  gt() |>
  tab_header(title = "Varianzvergleich (2-Level): Null vs. +Achsen vs. +Kontext") |>
  tab_source_note("PEV = Proportional Explained Variance relativ zu M0. 
MAIHDA: Rest-Strata-Varianz in M1 = intersektionaler Überschuss.") |>
  cols_label(Modell = "Modell", var_strata = "Strata-Varianz",
             var_resid = "Residual-Varianz", PEV_strata_vs_M0 = "PEV Strata vs. M0",
             PEV_resid_vs_M0 = "PEV Residual vs. M0")
Varianzvergleich (2-Level): Null vs. +Achsen vs. +Kontext
Modell Strata-Varianz Residual-Varianz PEV Strata vs. M0 PEV Residual vs. M0
M0_2L (Null) 0.003024 0.031073 0.000000 0.000000
M1_2L (+ Achsen) 0.001110 0.030322 0.633048 0.024175
M2_2L (+ Kontext) 0.000004 0.000223 0.998703 0.992815
PEV = Proportional Explained Variance relativ zu M0. MAIHDA: Rest-Strata-Varianz in M1 = intersektionaler Überschuss.

5 Vertiefungen

5.1 Intersektionaler Überschuss (Random Intercepts von M1)

Code
# BLUPs + 95%-CIs
re1     <- ranef(m1_2L, condVar = TRUE)$stratum_id
postVar <- attr(ranef(m1_2L, condVar = TRUE)[[1]], "postVar")
ci_lo   <- re1[,1] - 1.96 * sqrt(postVar[1,1,])
ci_hi   <- re1[,1] + 1.96 * sqrt(postVar[1,1,])

strata_effekte <- tibble(
  stratum_id = rownames(re1),
  eff        = re1[,1],
  ci_lower   = ci_lo,
  ci_upper   = ci_hi
) |>
  left_join(dat_2L |> distinct(stratum_id, gender, age_group, sexual_orientation, eq_income_group),
            by = "stratum_id") |>
  arrange(desc(eff))
Code
# Ausgabe: Tabelle (reine Darstellung, übersetzt & strukturiert)

strata_effekte |>
  mutate(
    eff      = round(eff, 3),
    ci_lower = round(ci_lower, 3),
    ci_upper = round(ci_upper, 3),
    Geschl.        = recode(as.character(gender), !!!gender_de, .default = as.character(gender)),
    Alter          = as.character(age_group),
    `Sex. Orient.` = recode(as.character(sexual_orientation), !!!so_de, .default = as.character(sexual_orientation)),
    `Äquiv.-Eink.` = recode(as.character(eq_income_group), !!!eq_de, .default = as.character(eq_income_group))
  ) |>
  select(
    Geschl., Alter, `Sex. Orient.`, `Äquiv.-Eink.`,
    Effekt = eff, `CI-untere Grenze` = ci_lower, `CI-obere Grenze` = ci_upper
  ) |>
  mutate(across(everything(), ~ ifelse(is.na(.), "—", as.character(.)))) |>
  gt() |>
  tab_header(title = "Intersektionaler Überschuss (M1) nach Stratum") |>
  tab_source_note("Werte nahe 0: geringe Abweichung vom durch additive Achsen erklärten Erwartungswert.") |>
  cols_align(
    align = "center",
    columns = c(Effekt, `CI-untere Grenze`, `CI-obere Grenze`)
  )
Intersektionaler Überschuss (M1) nach Stratum
Geschl. Alter Sex. Orient. Äquiv.-Eink. Effekt CI-untere Grenze CI-obere Grenze
männlich 16 – 25 homosexuell niedrig 0.014 -0.044 0.072
männlich 16 – 25 heterosexuell 0.012 -0.045 0.07
männlich 16 – 25 heterosexuell sehr niedrig 0.01 -0.047 0.066
weiblich 16 – 25 bisexuell 0.007 -0.047 0.062
weiblich 16 – 25 heterosexuell hoch 0.007 -0.047 0.061
männlich 26 – 35 heterosexuell sehr hoch 0.005 -0.056 0.066
weiblich 16 – 25 heterosexuell sehr niedrig 0.002 -0.057 0.062
weiblich 16 – 25 queer sehr niedrig 0 -0.063 0.063
männlich 16 – 25 heterosexuell hoch -0.001 -0.063 0.061
weiblich 26 – 35 heterosexuell sehr niedrig -0.005 -0.065 0.055
männlich 16 – 25 heterosexuell sehr hoch -0.005 -0.061 0.052
männlich 16 – 25 homosexuell hoch -0.006 -0.068 0.056
männlich 16 – 25 bisexuell sehr niedrig -0.007 -0.07 0.056
männlich 16 – 25 homosexuell -0.008 -0.07 0.054
weiblich 16 – 25 heterosexuell -0.012 -0.069 0.045
männlich 16 – 25 heterosexuell niedrig -0.014 -0.071 0.043
Werte nahe 0: geringe Abweichung vom durch additive Achsen erklärten Erwartungswert.
Code
# Visualisierung (Berechnung + Plot sichtbar)
# Daten für Plot: übersetzen und Label bauen
se_plot <- strata_effekte |>
  dplyr::mutate(
    Geschl.        = dplyr::recode(as.character(gender), !!!gender_de, .default = as.character(gender)),
    Alter          = as.character(age_group),
    `Sex. Orient.` = dplyr::recode(as.character(sexual_orientation), !!!so_de, .default = as.character(sexual_orientation)),
    `Äquiv.-Eink.` = dplyr::recode(as.character(eq_income_group), !!!eq_de, .default = as.character(eq_income_group)),
    label          = paste(Geschl., Alter, `Sex. Orient.`, `Äquiv.-Eink.`, sep = " / ")
  )

# Plot
ggplot(se_plot, aes(
  x = reorder(label, eff),
  y = eff
)) +
  geom_point(size = 3, color = "steelblue") +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2, color = "steelblue") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red", alpha = 0.7) +
  coord_flip() +
  labs(
    title = "Intersektionaler Überschuss pro Stratum (M1)",
    subtitle = "Abweichung vom durch additive Achsen erklärten Erwartungswert",
    x = "Stratum (Achsenkombination, übersetzt)",
    y = "Abweichung (Random Intercept)"
  ) +
  theme()

5.2 Situative Effekte (M2, Fixed Effects)

Code
fixef_m2 <- broom.mixed::tidy(m2_2L_context, effects = "fixed") |>
  filter(term != "(Intercept)")

if (!"statistic" %in% names(fixef_m2)) fixef_m2 <- fixef_m2 |> mutate(statistic = NA_real_)
if (!"p.value"  %in% names(fixef_m2)) fixef_m2  <- fixef_m2  |> mutate(p.value  = NA_real_)
Code
# Kompakte Ausgabe: kontinuierlich (pro +1 SD) & kategorial (ggü. Referenz)
library(dplyr)

library(stringr)

# Erwartet: 'fixef_m2' (aus vorherigem Chunk) und 'dat_2L' (Datenbasis von M2)

# 1) Definitionen
cont_vars <- c("environment_noise","environment_nature","environment_lively","environmen_pleasure")
cont_de   <- c(environment_noise = "Lautstärke",
               environment_nature = "Natur",
               environment_lively = "Lebhaftigkeit",
               environmen_pleasure = "Angenehmkeit")

cat_vars  <- c("activity","location_category","indoors_outdoors","people_with_you","majority_comparison")
cat_de    <- c(activity = "Aktivität",
               location_category = "Ortskategorie",
               indoors_outdoors = "drinnen/draußen",
               people_with_you = "Begleitung",
               majority_comparison = "Mehrheitsvergleich")

# 2) Kontinuierliche Effekte: standardisiert (Δ WB pro +1 SD der Prädiktorvariable)
sd_lookup <- sapply(cont_vars, function(v) sd(dat_2L[[v]], na.rm = TRUE))
cont_tbl <- fixef_m2 %>%
  filter(term %in% cont_vars) %>%
  mutate(
    var_de      = recode(term, !!!cont_de),
    sd_x        = sd_lookup[term],
    estimate_sd = estimate * sd_x,
    se_sd       = std.error * sd_x,
    ci_lo       = estimate_sd - 1.96 * se_sd,
    ci_hi       = estimate_sd + 1.96 * se_sd
  ) %>%
  transmute(
    Variable = var_de,
    `Δ Wohlbefinden (+1 SD)` = round(estimate_sd, 3),
    `CI-untere Grenze`       = round(ci_lo, 3),
    `CI-obere Grenze`        = round(ci_hi, 3)
  ) %>%
  arrange(desc(abs(`Δ Wohlbefinden (+1 SD)`)))

cont_gt <- cont_tbl %>%
  gt() %>%
  tab_header(title = "Situative Einflüsse (kontinuierlich, pro +1 SD)") %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_source_note("Effekt auf den Wohlbefindensindex (0–1) pro +1 SD des Prädiktors.")

# 3) Kategoriale Effekte: Kontraste vs. Referenz
#    Referenzen sind i. d. R. die ersten Factor-Levels (Treatment-Coding).
ref_levels <- lapply(cat_vars, function(v) levels(dat_2L[[v]])[1])
names(ref_levels) <- cat_vars

cat_rows <- fixef_m2 %>%
  filter(!term %in% c("(Intercept)", cont_vars)) %>%
  mutate(
    var = vapply(term, function(t) {
      hit <- cat_vars[sapply(cat_vars, function(v) startsWith(t, v))]
      if (length(hit) == 0) NA_character_ else hit[1]
    }, character(1)),
    level = ifelse(is.na(var), term, str_sub(term, nchar(var) + 1L))
  ) %>%
  filter(!is.na(var)) %>%
  mutate(
    var_de = recode(var, !!!cat_de),
    ref    = vapply(var, function(v) ref_levels[[v]], character(1)),
    ci_lo  = estimate - 1.96 * std.error,
    ci_hi  = estimate + 1.96 * std.error,
    sig    = !is.na(ci_lo) & !is.na(ci_hi) & (ci_lo * ci_hi > 0),
    Label  = paste0(var_de, ": ", level, " (vs. ", ref, ")")
  ) %>%
  transmute(
    Variable = Label,
    `Δ Wohlbefinden`   = round(estimate, 3),
    `CI-untere Grenze` = round(ci_lo, 3),
    `CI-obere Grenze`  = round(ci_hi, 3),
    sig = sig
  ) %>%
  arrange(desc(abs(`Δ Wohlbefinden`)))

# Kompakt: erst „signifikante“ Kontraste (95%-CI ohne 0), max. 12 Zeilen;
# Fallback: falls keine signifikanten, die 12 größten nach Betrag.
cat_tbl <- cat_rows %>% filter(sig) %>% slice_head(n = 12)
if (nrow(cat_tbl) == 0) cat_tbl <- cat_rows %>% slice_head(n = 12)

cat_gt <- cat_tbl %>%
  gt() %>%
  tab_header(title = "Situative Einflüsse (kategorial, vs. Referenz)") %>%
  cols_align(align = "center", columns = c(`Δ Wohlbefinden`,`CI-untere Grenze`,`CI-obere Grenze`)) %>%
  cols_hide(columns = "sig") %>%
  tab_source_note(
    paste(
      "Referenzen:",
      paste(paste(recode(names(ref_levels), !!!cat_de), shQuote(unlist(ref_levels)), sep = " = "),
            collapse = "; ")
    )
  )

# 4) Ausgeben (beide Tabellen)
cont_gt
Situative Einflüsse (kontinuierlich, pro +1 SD)
Variable Δ Wohlbefinden (+1 SD) CI-untere Grenze CI-obere Grenze
Lebhaftigkeit 0.235 0.177 0.294
Lautstärke -0.101 -0.143 -0.060
Natur 0.043 -0.041 0.127
Angenehmkeit -0.026 -0.194 0.142
Effekt auf den Wohlbefindensindex (0–1) pro +1 SD des Prädiktors.
Code
cat_gt
Situative Einflüsse (kategorial, vs. Referenz)
Variable Δ Wohlbefinden CI-untere Grenze CI-obere Grenze
Mehrheitsvergleich: gender;age;background;skin / appearance;language / accent;social / financial situation;health / disability (vs. age) -1.124 -1.811 -0.438
Aktivität: resting / sleeping (vs. cooking / eating) 1.036 0.444 1.629
Mehrheitsvergleich: background;skin / appearance;language / accent (vs. age) 0.970 0.110 1.829
Mehrheitsvergleich: gender;age;background;skin / appearance;language / accent;clothing / style;social / financial situation;sexual orientation;health / disability (vs. age) 0.714 0.438 0.990
Mehrheitsvergleich: age;background;skin / appearance;language / accent;social / financial situation;clothing / style;health / disability (vs. age) 0.659 0.175 1.143
Mehrheitsvergleich: gender;skin / appearance;social / financial situation;clothing / style;sexual orientation;health / disability;age (vs. age) -0.547 -0.742 -0.353
Aktivität: using media;leisure / relaxation (vs. cooking / eating) 0.450 0.229 0.670
Mehrheitsvergleich: gender;skin / appearance;language / accent;social / financial situation;clothing / style;sexual orientation;health / disability (vs. age) -0.447 -0.536 -0.358
Aktivität: cooking / eating;social activities (vs. cooking / eating) -0.420 -0.701 -0.140
Begleitung: strangers;friends (vs. acquaintances;friends) 0.328 0.028 0.627
Mehrheitsvergleich: gender;skin / appearance;language / accent;health / disability (vs. age) -0.327 -0.606 -0.048
Aktivität: leisure / relaxation;cooking / eating (vs. cooking / eating) -0.268 -0.403 -0.133
Referenzen: Aktivität = 'cooking / eating'; Ortskategorie = 'at home'; drinnen/draußen = 'indoors'; Begleitung = 'acquaintances;friends'; Mehrheitsvergleich = 'age'

5.3 Random Slopes (within-Variablen je Stratum)

Code
# Voraussetzungen
stopifnot(
  exists("dat_2L"),
  all(c("wb_geom","stratum_id","gender","age_group","sexual_orientation","eq_income_group") %in% names(dat_2L))
)

need_within <- c("environment_noise_within","environment_nature_within",
                 "environment_lively_within","environmen_pleasure_within")  # Datensatzbezeichnung
stopifnot(all(need_within %in% names(dat_2L)))

axes <- list(
  noise  = list(within = "environment_noise_within"),
  nature = list(within = "environment_nature_within"),
  lively = list(within = "environment_lively_within"),
  pleas  = list(within = "environmen_pleasure_within")
)

fit_axis <- function(dat, within_var, axis_id) {
  x_w <- within_var
  x_z <- paste0(x_w, "_z")
  dat <- dat |> mutate("{x_z}" := as.numeric(scale(.data[[x_w]])))

  form <- as.formula(
    glue::glue("wb_geom ~ gender + age_group + sexual_orientation + eq_income_group +
               `{x_z}` + (1 + `{x_z}` | stratum_id)")
  )
  m <- lmer(form, data = dat, REML = TRUE)

  cf   <- as.data.frame(coef(summary(m)))
  beta <- cf[x_z, "Estimate"]; se_b <- cf[x_z, "Std. Error"]

  re      <- ranef(m, condVar = TRUE)$stratum_id
  postVar <- attr(re, "postVar")
  col_id  <- which(colnames(re) == x_z)

  strata_meta <- dat |> distinct(stratum_id, gender, age_group, sexual_orientation, eq_income_group)

  tibble(
    stratum_id = rownames(re),
    slope_dev  = re[, x_z],
    se_dev     = sqrt(postVar[col_id, col_id, ])
  ) |>
    mutate(
      beta        = beta,
      se_b        = se_b,
      slope_total = beta + slope_dev,
      se_total    = sqrt(se_b^2 + se_dev^2),
      ci_lo       = slope_total - 1.96 * se_total,
      ci_hi       = slope_total + 1.96 * se_total,
      sig_flag    = dplyr::case_when(ci_lo > 0 ~ "klar > 0",
                                     ci_hi < 0 ~ "klar < 0",
                                     TRUE      ~ "unsicher"),
      axis_id     = axis_id
    ) |>
    left_join(strata_meta, by = "stratum_id")
}

slopes_raw <- bind_rows(
  fit_axis(dat_2L, axes$noise$within,  "noise"),
  fit_axis(dat_2L, axes$nature$within, "nature"),
  fit_axis(dat_2L, axes$lively$within, "lively"),
  fit_axis(dat_2L, axes$pleas$within,  "pleas")
)
Code
# Ausgabe: Heatmap-Tabelle (reine Darstellung)



axis_labels <- c(noise="Lautstärke", nature="Natur", lively="Lebhaftigkeit", pleas="Angenehmkeit")

slopes_disp <- slopes_raw |>
  mutate(
    Geschl.        = recode(gender, !!!gender_de, .default = "—"),
    Alter          = age_group,
    `Sex. Orient.` = recode(sexual_orientation, !!!so_de, .default = "—"),
    `Äquiv.-Eink.` = recode(eq_income_group, !!!eq_de, .default = "—"),
    Achse          = recode(axis_id, !!!axis_labels),
    Wert           = slope_total,
    Sig            = sig_flag
  )

# Falls vorhanden: empirische Befragungszählungen je Stratum für Sortierung
stratum_counts <- tibble::tribble(
  ~Geschl., ~Alter, ~`Sex. Orient.`, ~`Äquiv.-Eink.`, ~Befr.,
  "weiblich","16 – 25","heterosexuell","hoch",13,
  "männlich","16 – 25","heterosexuell","sehr niedrig",9,
  "männlich","16 – 25","heterosexuell","—",9,
  "weiblich","16 – 25","heterosexuell","—",8,
  "weiblich","16 – 25","bisexuell","—",12,
  "männlich","16 – 25","heterosexuell","sehr hoch",9,
  "männlich","16 – 25","heterosexuell","niedrig",8,
  "männlich","16 – 25","homosexuell","niedrig",7,
  "weiblich","16 – 25","heterosexuell","sehr niedrig",6,
  "weiblich","26 – 35","heterosexuell","sehr niedrig",5,
  "männlich","26 – 35","heterosexuell","sehr hoch",4,
  "männlich","16 – 25","homosexuell","hoch",3,
  "männlich","16 – 25","heterosexuell","hoch",3,
  "männlich","16 – 25","homosexuell","—",3,
  "männlich","16 – 25","bisexuell","sehr niedrig",2,
  "weiblich","16 – 25","queer","sehr niedrig",2
)

slopes_disp <- slopes_disp |>
  left_join(stratum_counts, by = c("Geschl.","Alter","Sex. Orient.","Äquiv.-Eink.")) |>
  mutate(Befr. = ifelse(is.na(Befr.), -Inf, Befr.)) |>
  arrange(desc(Befr.))

wide <- slopes_disp |>
  select(Geschl., Alter, `Sex. Orient.`, `Äquiv.-Eink.`, Befr., Achse, Wert, Sig) |>
  mutate(Achse = factor(Achse, levels = c("Lautstärke","Natur","Lebhaftigkeit","Angenehmkeit"))) |>
  pivot_wider(names_from = Achse, values_from = c(Wert, Sig))

names(wide) <- gsub("^Wert_", "", names(wide))
names(wide) <- gsub("^Sig_",  "sig_", names(wide))

wide <- wide |>
  select(Geschl., Alter, `Sex. Orient.`, `Äquiv.-Eink.`, Befr.,
         Lautstärke, Natur, Lebhaftigkeit, Angenehmkeit,
         sig_Lautstärke, sig_Natur, sig_Lebhaftigkeit, sig_Angenehmkeit)

heat_cols <- c("Lautstärke","Natur","Lebhaftigkeit","Angenehmkeit")
rng      <- range(unlist(wide[heat_cols]), na.rm = TRUE)
max_abs  <- max(abs(rng))
pal <- col_numeric(palette = c("#2b8cbe","#f7f7f7","#d7301f"),
                   domain  = c(-max_abs, max_abs), na.color = "#f0f0f0")

wide |>
  gt() |>
  tab_header(
    title    = "Effekte pro Stratum (Δ Wohlbefinden pro +1 SD)",
    subtitle = "Links Strata-Merkmale; rechts Heatmap je Achse. Fett = 95%-KI ≠ 0."
  ) |>
  fmt_number(columns = Befr., decimals = 0) |>
  fmt_number(columns = all_of(heat_cols), decimals = 2) |>
  data_color(columns = all_of(heat_cols), colors = pal) |>
  tab_style(style = cell_text(weight = "bold"), locations = cells_body(columns = "Lautstärke",     rows = sig_Lautstärke != "unsicher")) |>
  tab_style(style = cell_text(weight = "bold"), locations = cells_body(columns = "Natur",          rows = sig_Natur != "unsicher")) |>
  tab_style(style = cell_text(weight = "bold"), locations = cells_body(columns = "Lebhaftigkeit",  rows = sig_Lebhaftigkeit != "unsicher")) |>
  tab_style(style = cell_text(weight = "bold"), locations = cells_body(columns = "Angenehmkeit",   rows = sig_Angenehmkeit != "unsicher")) |>
  cols_hide(starts_with("sig_")) |>
  cols_label(`Sex. Orient.` = "Sex. Orient.", `Äquiv.-Eink.` = "Äquiv.-Eink.", Befr. = "Befr.") |>
  tab_options(table.font.size = px(14), data_row.padding = px(6))
Effekte pro Stratum (Δ Wohlbefinden pro +1 SD)
Links Strata-Merkmale; rechts Heatmap je Achse. Fett = 95%-KI ≠ 0.
Geschl. Alter Sex. Orient. Äquiv.-Eink. Befr. Lautstärke Natur Lebhaftigkeit Angenehmkeit
weiblich 16 – 25 heterosexuell hoch 13 0.04 0.04 0.04 0.04
weiblich 16 – 25 bisexuell 12 0.04 0.05 0.07 0.07
männlich 16 – 25 heterosexuell 9 0.05 0.07 0.06 0.05
männlich 16 – 25 heterosexuell sehr hoch 9 0.04 0.01 0.01 0.03
männlich 16 – 25 heterosexuell sehr niedrig 9 0.04 0.07 0.03 0.03
männlich 16 – 25 heterosexuell niedrig 8 0.04 0.03 0.00 0.02
weiblich 16 – 25 heterosexuell 8 0.04 0.02 0.04 0.03
männlich 16 – 25 homosexuell niedrig 7 0.05 0.07 0.05 0.05
weiblich 16 – 25 heterosexuell sehr niedrig 6 0.04 0.05 0.04 0.04
weiblich 26 – 35 heterosexuell sehr niedrig 5 0.04 0.04 0.03 0.04
männlich 26 – 35 heterosexuell sehr hoch 4 0.04 0.05 0.04 0.05
männlich 16 – 25 homosexuell hoch 3 0.04 0.02 0.02 0.02
männlich 16 – 25 heterosexuell hoch 3 0.04 0.04 0.03 0.03
männlich 16 – 25 homosexuell 3 0.04 0.07 0.03 0.02
männlich 16 – 25 bisexuell sehr niedrig 2 0.04 0.03 0.02 0.02
weiblich 16 – 25 queer sehr niedrig 2 0.04 0.04 0.03 0.04

Literatur

Evans, Clare R., George Leckie, S.V. Subramanian, Andrew Bell und Juan Merlo 2024: A Tutorial for Conducting Intersectional Multilevel Analysis of Individual Heterogeneity and Discriminatory Accuracy (MAIHDA). SSM - Population Health 26 : 101664. DOI: https://doi.org/10.1016/j.ssmph.2024.101664