Analyse nach Intersectional Multilevel Analysis of Individual Heterogeneity and Discriminatory Accuracy (MAIHDA)

Autor:in

Lukas Batschelet

Veröffentlichungsdatum

16. August 2025

Analyse nach Evans et al. (2024).

Um die Darstellung etwas übersichtlicher zu halten ist berechnender Code standarmässig ausgeklappt, und Code zur Darstellung und Übersetzung nicht.

1 Setup

Code

# Pakete
library(tidyverse)
library(janitor)
library(readr)
library(lme4)
library(performance)
library(broom.mixed)
library(gt)
library(forcats)
library(here)
library(scales)
library(dplyr)
library(tidyr)
library(purrr)
library(ggplot2)
theme_set(
  theme_minimal(base_size = 11) +
    theme(
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      plot.title       = element_text(face = "bold"),
      plot.subtitle    = element_text(face = "plain")
    )
)
# Reproduzierbarkeit
set.seed(42)
options(dplyr.summarise.inform = FALSE)

# Arbeitsverzeichnis
here()

[1] "/Users/luba/Documents/GitHub/Designing-InterMind"

Code

# Einlesen & Spaltennamen bereinigen
survey <- read_csv("survey_answers.csv", show_col_types = FALSE) |> clean_names()
people <- read_csv("people_data.csv",   show_col_types = FALSE) |> clean_names()

# Schlüssel prüfen
stopifnot("device_id" %in% names(survey), "device_id" %in% names(people))
if (nrow(people) != n_distinct(people$device_id)) {
  warning("Hinweis: people_data enthält doppelte device_id-Einträge. Bitte bereinigen.")
}

2 Deskriptive Statistik (Stichprobe)

2.1 Grundzahlen, Verteilung „Surveys pro Person“, Missingness

Code

# Grundzahlen & Inputs für Tabellen/Plots

# Snapshots & Geräte
N_snapshots <- nrow(survey)
N_devices   <- dplyr::n_distinct(survey$device_id)

# Verteilung: Anzahl Surveys pro Person
surveys_per_person <- survey |>
  dplyr::count(device_id, name = "n_surveys")

# Kennzahlen (wie pandas .describe)
surveys_per_person_summ <- surveys_per_person |>
  dplyr::summarise(
    n    = dplyr::n(),
    min  = min(n_surveys, na.rm = TRUE),
    q25  = as.numeric(quantile(n_surveys, 0.25, na.rm = TRUE)),
    med  = as.numeric(quantile(n_surveys, 0.50, na.rm = TRUE)),
    q75  = as.numeric(quantile(n_surveys, 0.75, na.rm = TRUE)),
    max  = max(n_surveys, na.rm = TRUE),
    mean = mean(n_surveys, na.rm = TRUE),
    sd   = sd(n_surveys,   na.rm = TRUE)
  )

# Missing-Anteil je Spalte (in %)
na_share_tbl <- survey |>
  dplyr::summarise(dplyr::across(dplyr::everything(), ~ mean(is.na(.)) * 100)) |>
  tidyr::pivot_longer(dplyr::everything(), names_to = "Spalte", values_to = "Prozent_NA") |>
  dplyr::mutate(Prozent_NA = round(Prozent_NA, 1)) |>
  dplyr::arrange(dplyr::desc(Prozent_NA))

Code

# Ausgabe (gt + Plot) & Export


# Kopfzeile
tibble::tibble(
  Kennzahl = c("Momentaufnahmen", "Geräte"),
  Wert     = c(N_snapshots, N_devices)
) |>
  gt() |>
  tab_header(title = "Grundzahlen der Stichprobe") |>
  fmt_number(columns = Wert, decimals = 0)

Grundzahlen der Stichprobe
Kennzahl	Wert
Momentaufnahmen	106
Geräte	25

Code

# Kennzahlen zur Verteilung der Surveys je Person
surveys_per_person_summ |>
  tidyr::pivot_longer(dplyr::everything(), names_to = "Stat", values_to = "Wert") |>
  dplyr::mutate(Wert = round(Wert, 2)) |>
  gt() |>
  tab_header(title = "Surveys pro Person — Kennzahlen")

Surveys pro Person — Kennzahlen
Stat	Wert
n	25.00
min	1.00
q25	2.00
med	3.00
q75	6.00
max	12.00
mean	4.24
sd	2.91

Code

# Missingness (Top 30 Spalten mit den meisten NAs)
na_share_tbl |>
  dplyr::slice_max(order_by = Prozent_NA, n = 30, with_ties = FALSE) |>
  gt() |>
  tab_header(title = "Fehlende Werte je Spalte (Top 30)") |>
  cols_label(Spalte = "Spalte", Prozent_NA = "% fehlend")

Fehlende Werte je Spalte (Top 30)
Spalte	% fehlend
other_factors_positive	93.4
other_factors_negative	87.7
axis_of_opression	77.4
factors_sense_of_belonging	6.6
majority_comparison	5.7
device_id	0.0
survey_num	0.0
survey_created_at	0.0
activity	0.0
awake	0.0
content	0.0
environmen_pleasure	0.0
environment_lively	0.0
environment_nature	0.0
environment_noise	0.0
general_wellbeing	0.0
indoors_outdoors	0.0
location_category	0.0
people_with_you	0.0
sense_of_belonging	0.0
tense_relaxed	0.0

Code

# Balkendiagramm: wie viele Personen haben 1, 2, 3, … Surveys?
dir.create("plots", showWarnings = FALSE)
freq_df <- surveys_per_person |>
  dplyr::count(n_surveys, name = "n_personen")

p_counts <- ggplot(freq_df, aes(x = n_surveys, y = n_personen)) +
  geom_col(fill = "#c4c4c4", width = 0.8) +
  scale_x_continuous(breaks = seq(min(freq_df$n_surveys), max(freq_df$n_surveys), by = 1)) +
  labs(
    title = "Verteilung der Teilnahme",
    x = "Anzahl abgeschlossene Umfragen pro Person",
    y = "Anzahl Personen"
  ) +
  theme()

print(p_counts)

Code

ggsave("plots/survey_counts.pdf", p_counts, width = 8/2.54, height = 5/2.54, device = cairo_pdf)

2.2 Kategoriale Verteilungen (einzeln, mit deutschen Labels)

Code

# Häufigkeiten (ggf. Mehrfachnennungen via ';') pro Kategorie – FIXED


cat_cols <- c("activity", "location_category", "indoors_outdoors", "people_with_you")

# Deutsche Titel
title_mapping <- c(
  activity = "Aktivität",
  location_category = "Ortskategorie",
  indoors_outdoors = "Drinnen / Draussen",
  people_with_you  = "Personen um mich"
)

# Deutsche Label-Übersetzung
label_mapping <- c(
  "working / studying"               = "Arbeiten / Lernen",
  "leisure / relaxation"             = "Freizeit / Entspannung",
  "travelling / commuting"           = "Unterwegs / Pendeln",
  "using media"                      = "Mediennutzung",
  "cooking / eating"                 = "Kochen / Essen",
  "social activities"                = "Soziale Aktivitäten",
  "shopping / errands"               = "Einkaufen / Erledigungen",
  "resting / sleeping"               = "Ruhen / Schlafen",
  "housework / tidying"              = "Hausarbeit / Carearbeit",
  "other"                            = "Sonstiges",
  "school / university"              = "Schule / Universität",
  "at home"                          = "Zuhause",
  "on the move (foot / bike / car)"  = "Unterwegs (zu Fuss / Rad / Auto)",
  "public transport"                 = "Öffentliche Verkehrsmittel",
  "at someone else’s home"           = "Bei anderen zuhause",
  "park / green space"               = "Park / Grünanlage",
  "workplace"                        = "Arbeitsplatz",
  "shopping / services"              = "Einkaufen / Dienstleistungen",
  "other place"                      = "Anderer Ort",
  "leisure / sports facility"        = "Freizeit- / Sporteinrichtung",
  "indoors"                          = "Drinnen",
  "outdoors"                         = "Draussen",
  "friends"                          = "Freunde",
  "alone"                            = "Alleine",
  "strangers"                        = "Fremde",
  "colleagues"                       = "Kollegen",
  "family"                           = "Familie",
  "acquaintances"                    = "Bekannte",
  "partner"                          = "Partner",
  "missing"                          = "Fehlend"
)

# String-sichere Version (kein {{col}}, sondern .data[[col]])
cat_pct <- function(df, col) {
  s <- ifelse(is.na(df[[col]]), "missing", as.character(df[[col]]))
  tibble(tmp = s) |>
    separate_rows(tmp, sep = ";") |>
    mutate(tmp = trimws(tmp)) |>
    filter(tmp != "") |>
    count(tmp, name = "n") |>
    mutate(
      pct   = 100 * n / sum(n),
      label = dplyr::recode(tmp, !!!label_mapping, .default = tmp)
    ) |>
    arrange(pct)
}

cat_pct_list <- setNames(lapply(cat_cols, function(x) cat_pct(survey, x)), cat_cols)

Code

# Ein Plot pro Kategorie + PDF-Export
dir.create("plots", showWarnings = FALSE)

for (col in names(cat_pct_list)) {
  dfp <- cat_pct_list[[col]]
  p <- ggplot(dfp, aes(x = pct, y = reorder(label, pct))) +
    geom_col(fill = "#c4c4c4", width = 0.8) +
    geom_text(aes(label = sprintf("%.1f %%", pct)), hjust = -0.05, size = 3) +
    scale_x_continuous(limits = c(0, min(100, ceiling(max(dfp$pct) + 10)))) +
    labs(x = "Anteil (%)", y = NULL, title = title_mapping[[col]]) +
    theme()
  print(p)
  ggsave(sprintf("plots/cat_dist_%s.pdf", col), p,
         width  = if (col == "indoors_outdoors") 5/2.54 else 10/2.54,
         height = if (col == "indoors_outdoors") 3/2.54 else 8/2.54,
         device = cairo_pdf)
}

2.3 „Axis of Oppression“: Anzahl Kategorien pro Person & Kategorienhäufigkeiten

Code

# Berechnung auf Basis people_data.csv  → Objekt: people

library(stringr)
NO_TOKENS <- c("no", "nein", "", "nan")

# 1) Anzahl Kategorien pro Person
axis_counts <- people |>
  dplyr::mutate(axis_vec = strsplit(tolower(dplyr::coalesce(axis_codes, "")), ";")) |>
  dplyr::mutate(n_kategorien = purrr::map_int(axis_vec, function(v) {
    v2 <- trimws(v); v2 <- v2[v2 != ""]
    if (length(v2) == 0) return(0L)
    if (any(v2 %in% NO_TOKENS)) return(0L)
    length(v2)
  }))

axis_dist_table <- axis_counts |>
  dplyr::count(n_kategorien, name = "count") |>
  dplyr::arrange(n_kategorien) |>
  dplyr::mutate(percent = round(100 * count / sum(count), 1))

# 2) Häufigkeiten einzelner Kategorien
axis_summary <- people |>
  dplyr::mutate(x = strsplit(tolower(dplyr::coalesce(axis_codes, "")), ";")) |>
  tidyr::unnest(x) |>
  dplyr::mutate(x = trimws(x)) |>
  dplyr::filter(!x %in% NO_TOKENS, x != "") |>
  dplyr::count(category = x, name = "count") |>
  dplyr::arrange(dplyr::desc(count)) |>
  dplyr::mutate(percent = round(100 * count / dplyr::n_distinct(people$device_id), 1))

Code

# Ausgabe Tabellen
axis_summary |>
  gt() |>
  tab_header(title = "axis_codes – Häufigkeiten (aufgesplittet)") |>
  cols_label(category = "Kategorie", count = "Anzahl", percent = "Prozent (%)")

axis_codes – Häufigkeiten (aufgesplittet)
Kategorie	Anzahl	Prozent (%)
background	4	16.7
gender	4	16.7
language / accent	4	16.7
sexual orientation	3	12.5
clothing / style	1	4.2
skin / appearance	1	4.2
social / financial	1	4.2

Code

axis_dist_table |>
  gt() |>
  tab_header(title = "Anzahl unterschiedlicher Diskriminierungsarten pro Person") |>
  cols_label(n_kategorien = "n Kategorien", count = "Anzahl", percent = "Prozent (%)")

Anzahl unterschiedlicher Diskriminierungsarten pro Person
n Kategorien	Anzahl	Prozent (%)
0	12	50.0
1	7	29.2
2	4	16.7
3	1	4.2

2.4 Kreuztabelle: Gender × Altersgruppe (Counts & Spalten-%)

Code

# (optional) Übersetzen wie oben genutzt
gender_de <- c("woman" = "weiblich", "man" = "männlich")
people_de <- people |>
  dplyr::mutate(
    gender = dplyr::recode(as.character(gender), !!!gender_de, .default = as.character(gender))
  )

cross <- with(people_de, table(gender, age_group))
cross_pct <- prop.table(cross, margin = 2) * 100

# Counts
as.data.frame.matrix(cross) |>
  tibble::rownames_to_column("Gender") |>
  gt() |>
  tab_header(title = "Gender × Altersgruppe — Häufigkeiten")

Gender × Altersgruppe — Häufigkeiten
Gender	16 – 25	26 – 35
männlich	12	2
weiblich	8	1

Code

# Spalten-Prozente
as.data.frame.matrix(round(cross_pct, 1)) |>
  tibble::rownames_to_column("Gender") |>
  gt() |>
  tab_header(title = "Gender × Altersgruppe — Spalten-Prozente (%)")

Gender × Altersgruppe — Spalten-Prozente (%)
Gender	16 – 25	26 – 35
männlich	60	66.7
weiblich	40	33.3

2.5 Slider-Histogramme (auf –1 … +1 skaliert, 9 Facets)

Code

slider_cols <- c(
  "general_wellbeing","content","tense_relaxed","awake",
  "environment_noise","environment_nature","environment_lively",
  "environmen_pleasure","sense_of_belonging"
)

GERMAN_LABELS <- c(
  environment_noise   = "Wie nimmst du die Geräuschkulisse wahr?",
  environment_nature  = "Wie viel Natur ist sichtbar?",
  environment_lively  = "Wie lebhaft oder ruhig wirkt der Ort?",
  environmen_pleasure = "Wie angenehm empfindest du den Ort?",
  content             = "Wie fühlst du dich insgesamt?",
  general_wellbeing   = "Ganz allgemein – wie zufrieden bist du?",
  tense_relaxed       = "Wie angespannt oder entspannt fühlst du dich?",
  awake               = "Wie wach fühlst du dich?",
  sense_of_belonging  = "Wie zugehörig oder fremd fühlst du dich?"
)

sliders_long <- survey |>
  dplyr::select(dplyr::all_of(c("device_id", slider_cols))) |>
  tidyr::pivot_longer(cols = dplyr::all_of(slider_cols),
                      names_to = "variable", values_to = "value") |>
  dplyr::mutate(
    value = as.numeric(value),
    scaled = (value - 0.5) * 2,
    var_de = dplyr::recode(variable, !!!GERMAN_LABELS)
  ) |>
  dplyr::filter(!is.na(scaled))

# Reihenfolge (wie im Python)
var_order <- c("environment_noise","environment_nature","environment_lively",
               "environmen_pleasure","content","general_wellbeing",
               "tense_relaxed","awake","sense_of_belonging")
sliders_long$var_de <- factor(sliders_long$var_de,
                              levels = GERMAN_LABELS[var_order])

Code

dir.create("plots", showWarnings = FALSE)

p_sliders <- ggplot(sliders_long, aes(x = scaled)) +
  geom_histogram(binwidth = 0.1, boundary = -1, closed = "left",
                 fill = "#c4c4c4", color = "white") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red", linewidth = 0.5) +
  scale_x_continuous(limits = c(-1, 1), breaks = c(-1, 0, 1)) +
  labs(x = NULL, y = "Häufigkeit") +
  facet_wrap(~ var_de, ncol = 2, scales = "fixed") +
  theme()

print(p_sliders)

Code

ggsave("plots/slider_hists.pdf", p_sliders, width = 15/2.54, height = 22/2.54, device = cairo_pdf)

2.6 Merge der Datensätze

Code

# Merge
dat <- survey |> left_join(people, by = "device_id")

# Strata (Level 3)
dat <- dat |>
  mutate(
    gender             = as.factor(gender),
    age_group          = as.factor(age_group),
    sexual_orientation = as.factor(sexual_orientation),
    eq_income_group    = as.factor(eq_income_group),
    stratum_id = interaction(gender, age_group, sexual_orientation, eq_income_group,
                             sep = "_", drop = TRUE) |> as.factor()
  )

2.7 Freitext-Antworten

Code

# Quelle: survey_answers.csv → Objekt: survey

free_cols <- c("other_factors_negative", "other_factors_positive")

collect_text <- function(x) {
  v <- as.character(x)
  v <- v[!is.na(v)]
  v <- gsub("\\s+", " ", v)
  v <- trimws(v)
  v <- v[v != ""]
  unique(v)
}

vals_neg <- collect_text(survey[["other_factors_negative"]])
vals_pos <- collect_text(survey[["other_factors_positive"]])

tbl_neg <- tibble::tibble(
  Nr = seq_along(vals_neg),
  Antwort = vals_neg
) |>
  gt() |>
  tab_header(title = paste0("other_factors_negative — ", length(vals_neg), " einzigartige Antworten"))

tbl_pos <- tibble::tibble(
  Nr = seq_along(vals_pos),
  Antwort = vals_pos
) |>
  gt() |>
  tab_header(title = paste0("other_factors_positive — ", length(vals_pos), " einzigartige Antworten"))

tbl_neg

other_factors_negative — 11 einzigartige Antworten
Nr	Antwort
1	heat
2	Everyone is doing the same, so it kind of feels like being at the right place
3	The contact with strangers
4	Bed
5	health issues
6	no natural sunlight room without windows no fresh air
7	a lot of people - personal space
8	No
9	/
10	no
11	Not really

Code

tbl_pos

other_factors_positive — 7 einzigartige Antworten
Nr	Antwort
1	place i know and is mine i have control over it
2	know this place and can do what i want
3	my room and cozy for the night
4	pets
5	spending time with family pets
6	I am not by myself
7	Less noise from construction works

2.8 Outcome (Wohlbefinden)

Code

# Items auf [0,1], geometrisches Mittel = Index
outcome_vars <- c("general_wellbeing","tense_relaxed","sense_of_belonging","content","awake")

dat <- dat |>
  mutate(across(all_of(outcome_vars), ~ pmin(pmax(as.numeric(.), 0), 1))) |>
  mutate(
    wb_prod = reduce(across(all_of(outcome_vars)), `*`),
    wb_geom = wb_prod^(1/5),                                   # geometrisches Mittel
    wb_mean = rowMeans(across(all_of(outcome_vars)), na.rm = TRUE)  # Vergleich
  )

Code

# Ausgabe: Deskriptivstatistik (reine Darstellung)
dat |>
  summarise(across(
    c(wb_geom, wb_mean),
    list(
      n      = ~ sum(!is.na(.x)),
      mittel = ~ mean(.x, na.rm = TRUE),
      sd     = ~ sd(.x,   na.rm = TRUE),
      min    = ~ min(.x,  na.rm = TRUE),
      max    = ~ max(.x,  na.rm = TRUE)
    )
  )) |>
  pivot_longer(everything(),
               names_to = c("variable","stat"),
               names_pattern = "^(.*)_(n|mittel|sd|min|max)$") |>
  pivot_wider(names_from = stat, values_from = value) |>
  mutate(across(c(mittel, sd, min, max), ~ round(.x, 3))) |>
  gt() |>
  tab_header(title = "Deskriptive Statistik: Wohlbefinden-Indizes") |>
  cols_label(variable = "Variable", n = "N", mittel = "Mittelwert",
             sd = "SD", min = "Minimum", max = "Maximum")

Deskriptive Statistik: Wohlbefinden-Indizes
Variable	N	Mittelwert	SD	Minimum	Maximum
wb_geom	106	0.580	0.180	0.000	0.944
wb_mean	106	0.622	0.163	0.164	0.945

2.9 Strata-Überblick

Code

# Zählungen (Level 1 & Personen) je Stratum
strata_counts_responses <- dat |> count(stratum_id, name = "n_befragungen")

people_strata <- people |>
  mutate(
    gender             = as.factor(gender),
    age_group          = as.factor(age_group),
    sexual_orientation = as.factor(sexual_orientation),
    eq_income_group    = as.factor(eq_income_group),
    stratum_id = interaction(gender, age_group, sexual_orientation, eq_income_group,
                             sep = "_", drop = TRUE) |> as.factor()
  )

strata_counts_persons <- people_strata |>
  count(stratum_id, name = "n_personen") |>
  semi_join(strata_counts_responses, by = "stratum_id")

strata_overview <- strata_counts_persons |>
  full_join(strata_counts_responses, by = "stratum_id") |>
  replace_na(list(n_personen = 0L, n_befragungen = 0L)) |>
  mutate(befragungen_pro_person = ifelse(n_personen > 0, n_befragungen / n_personen, NA_real_)) |>
  arrange(desc(n_personen), desc(n_befragungen))

Code

# Ausgabe: Tabelle (reine Darstellung, inkl. Übersetzung)

# Übersetzungen wie unten
gender_de <- c("woman" = "weiblich", "man" = "männlich")
so_de     <- c("straight / hetero" = "heterosexuell",
               "gay / lesbian"     = "homosexuell",
               "bisexual"           = "bisexuell",
               "queer"              = "queer")
eq_de     <- c("Very Low"  = "sehr niedrig",
               "Low"       = "niedrig",
               "High"      = "hoch",
               "Very High" = "sehr hoch",
               "Unknown"   = "—")

strata_overview |>
  tidyr::separate(
    col = stratum_id,
    into = c("Gender","Altersgruppe","Sexuelle Orientierung","Äquivalenzeinkommensgruppe"),
    sep = "_", remove = TRUE, fill = "right"
  ) |>
  # Übersetzen (fehlende Werte → Gedankenstrich)
  mutate(
    Gender                       = recode(Gender, !!!gender_de, .default = Gender, .missing = "—"),
    `Sexuelle Orientierung`      = recode(`Sexuelle Orientierung`, !!!so_de, .default = `Sexuelle Orientierung`, .missing = "—"),
    `Äquivalenzeinkommensgruppe` = recode(`Äquivalenzeinkommensgruppe`, !!!eq_de, .default = `Äquivalenzeinkommensgruppe`, .missing = "—"),
    `Befragungen pro Person`     = round(befragungen_pro_person, 2),
    `Anzahl Personen`            = n_personen,
    `Anzahl Befragungen`         = n_befragungen
  ) |>
  select(
    Gender, Altersgruppe, `Sexuelle Orientierung`, `Äquivalenzeinkommensgruppe`,
    `Anzahl Personen`, `Anzahl Befragungen`, `Befragungen pro Person`
  ) |>
  mutate(across(everything(), ~ ifelse(is.na(.), "—", as.character(.)))) |>
  gt() |>
  tab_header(title = "Übersicht über Strata (aufgetrennt nach Achsen)") |>
  tab_source_note("— = keine Daten verfügbar") |>
  cols_align(
    align = "center",
    columns = c("Anzahl Personen","Anzahl Befragungen","Befragungen pro Person")
  )

Übersicht über Strata (aufgetrennt nach Achsen)
Gender	Altersgruppe	Sexuelle Orientierung	Äquivalenzeinkommensgruppe	Anzahl Personen	Anzahl Befragungen	Befragungen pro Person
weiblich	16 – 25	heterosexuell	hoch	3	13	4.33
männlich	16 – 25	heterosexuell	sehr niedrig	3	9	3
männlich	16 – 25	heterosexuell	—	2	9	4.5
weiblich	16 – 25	heterosexuell	—	2	8	4
weiblich	16 – 25	bisexuell	—	1	12	12
männlich	16 – 25	heterosexuell	sehr hoch	1	9	9
männlich	16 – 25	heterosexuell	niedrig	1	8	8
männlich	16 – 25	homosexuell	niedrig	1	7	7
weiblich	16 – 25	heterosexuell	sehr niedrig	1	6	6
weiblich	26 – 35	heterosexuell	sehr niedrig	1	5	5
männlich	26 – 35	heterosexuell	sehr hoch	1	4	4
männlich	16 – 25	homosexuell	hoch	1	3	3
männlich	16 – 25	heterosexuell	hoch	1	3	3
männlich	16 – 25	homosexuell	—	1	3	3
männlich	16 – 25	bisexuell	sehr niedrig	1	2	2
weiblich	16 – 25	queer	sehr niedrig	1	2	2
—	—	—	—	1	2	2
männlich	26 – 35	bisexuell	sehr niedrig	1	1	1
— = keine Daten verfügbar

2.10 Kontextvariablen

Code

# Metrische Variablen: Person-Mean-Centering (Within/Between)
env_cont <- c("environmen_pleasure","environment_lively","environment_nature","environment_noise")  # 'environmen_pleasure' = Datensatzbezeichnung
env_cat  <- c("activity","location_category","indoors_outdoors","people_with_you","majority_comparison")

dat <- dat |> mutate(across(all_of(env_cont), as.numeric))

dat <- dat |>
  group_by(device_id) |>
  mutate(across(all_of(env_cont),
                list(pm = ~ mean(.x, na.rm = TRUE)), .names = "{.col}_pm")) |>
  ungroup() |>
  mutate(across(all_of(env_cont),
                ~ .x - get(paste0(cur_column(), "_pm")),
                .names = "{.col}_within")) |>
  mutate(across(all_of(env_cat), as.factor))

Code

# Ausgabe: Variable-Listing (reine Darstellung)
tibble(
  Typ = c("Kategorial","Metrisch (Original)","Metrisch (Person-Mean)","Metrisch (Within)"),
  Variablen = list(env_cat, env_cont, paste0(env_cont,"_pm"), paste0(env_cont,"_within"))
) |>
  unnest(Variablen) |>
  gt() |>
  tab_header(title = "Verfügbare Kontextvariablen nach Typ")

Verfügbare Kontextvariablen nach Typ
Typ	Variablen
Kategorial	activity
Kategorial	location_category
Kategorial	indoors_outdoors
Kategorial	people_with_you
Kategorial	majority_comparison
Metrisch (Original)	environmen_pleasure
Metrisch (Original)	environment_lively
Metrisch (Original)	environment_nature
Metrisch (Original)	environment_noise
Metrisch (Person-Mean)	environmen_pleasure_pm
Metrisch (Person-Mean)	environment_lively_pm
Metrisch (Person-Mean)	environment_nature_pm
Metrisch (Person-Mean)	environment_noise_pm
Metrisch (Within)	environmen_pleasure_within
Metrisch (Within)	environment_lively_within
Metrisch (Within)	environment_nature_within
Metrisch (Within)	environment_noise_within

2.11 Minimalfilter

Code

# Strata mit >= 2 Befragungen; Personen mit >= 2 Messungen
dat_before <- dat
valid_strata  <- strata_overview |> filter(n_befragungen >= 2) |> pull(stratum_id)
dat_after_s   <- dat_before |> filter(stratum_id %in% valid_strata)

valid_persons <- dat_after_s |> count(device_id, name = "n_obs") |> filter(n_obs >= 2) |> pull(device_id)
dat           <- dat_after_s |> filter(device_id %in% valid_persons)

# Stufen säubern
dat <- dat |>
  mutate(across(c(stratum_id, device_id, gender, age_group, sexual_orientation, eq_income_group, all_of(env_cat)),
                ~ droplevels(as.factor(.))))

Code

# Ausgabe: Reduktionen (reine Darstellung)
tibble(
  Schritt          = c("Vor Filter","Nach Strata-Filter","Nach Personen-Filter"),
  N_Beobachtungen  = c(nrow(dat_before), nrow(dat_after_s), nrow(dat)),
  N_Personen       = c(n_distinct(dat_before$device_id), n_distinct(dat_after_s$device_id), n_distinct(dat$device_id)),
  N_Strata         = c(n_distinct(dat_before$stratum_id), n_distinct(dat_after_s$stratum_id), n_distinct(dat$stratum_id))
) |>
  gt() |>
  tab_header(title = "Datenfilter: Übersicht über Reduktionen")

Datenfilter: Übersicht über Reduktionen
Schritt	N_Beobachtungen	N_Personen	N_Strata
Vor Filter	106	25	18
Nach Strata-Filter	105	24	17
Nach Personen-Filter	102	21	16

3 Mehrebenenmodelle

3.1 Dreistufiges Nullmodell (ICC)

Code

m0 <- lmer(
  wb_geom ~ 1 + (1 | stratum_id) + (1 | device_id),
  data = dat |> filter(!is.na(wb_geom)),
  REML = TRUE
)
summary(m0)  # boundary (singular) fit bei sehr wenig Level-2-Varianz erwartbar

Linear mixed model fit by REML ['lmerMod']
Formula: wb_geom ~ 1 + (1 | stratum_id) + (1 | device_id)
   Data: filter(dat, !is.na(wb_geom))

REML criterion at convergence: -52.5

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-3.09258 -0.64336  0.00523  0.71822  1.96022 

Random effects:
 Groups     Name        Variance Std.Dev.
 device_id  (Intercept) 0.000000 0.0000  
 stratum_id (Intercept) 0.003025 0.0550  
 Residual               0.031073 0.1763  
Number of obs: 102, groups:  device_id, 21; stratum_id, 16

Fixed effects:
            Estimate Std. Error t value
(Intercept)  0.57997    0.02297   25.24
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')

Code

# ICC manuell (robust bei Singularität)
vc_df    <- as.data.frame(VarCorr(m0))
v_strata <- vc_df |> filter(grp == "stratum_id") |> pull(vcov); v_strata <- ifelse(length(v_strata)==0, 0, v_strata)
v_person <- vc_df |> filter(grp == "device_id")  |> pull(vcov); v_person <- ifelse(length(v_person)==0,  0, v_person)
v_resid  <- vc_df |> filter(grp == "Residual")   |> pull(vcov); v_resid  <- ifelse(length(v_resid)==0,   0, v_resid)

icc_stratum <- v_strata / (v_strata + v_person + v_resid)
icc_person  <- v_person / (v_strata + v_person + v_resid)

Code

# Ausgabe: ICC-Tabelle (reine Darstellung)
tibble(
  Ebene       = c("Strata (Level 3)","Person (Level 2)","Momentaufnahmen (Level 1)"),
  Varianz     = c(v_strata, v_person, v_resid),
  ICC_Anteil  = c(icc_stratum, icc_person, 1 - icc_stratum - icc_person),
  ICC_Prozent = round(100 * c(icc_stratum, icc_person, 1 - icc_stratum - icc_person), 2)
) |>
  gt() |>
  tab_header(title = "Varianzzerlegung: Dreistufiges Nullmodell") |>
  cols_label(Ebene = "Ebene", Varianz = "Varianzkomponente",
             ICC_Anteil = "ICC (Anteil)", ICC_Prozent = "ICC (%)") |>
  tab_source_note("ICC = Intraklassenkorrelation; Anteil der Gesamtvarianz je Ebene.")

Varianzzerlegung: Dreistufiges Nullmodell
Ebene	Varianzkomponente	ICC (Anteil)	ICC (%)
Strata (Level 3)	0.003024511	0.08870257	8.87
Person (Level 2)	0.000000000	0.00000000	0.00
Momentaufnahmen (Level 1)	0.031072709	0.91129743	91.13
ICC = Intraklassenkorrelation; Anteil der Gesamtvarianz je Ebene.

Befund: Personenebene ≈ 0 → dreistufiges Modell nicht informativ.

3.2 Zweistufiges Nullmodell

Code

dat_2L <- dat |>
  filter(!is.na(wb_geom)) |>
  mutate(stratum_id = droplevels(stratum_id))

tibble(
  n_beobachtungen = nrow(dat_2L),
  n_personen      = n_distinct(dat_2L$device_id),
  n_strata        = n_distinct(dat_2L$stratum_id)
)

Code

m0_2L <- lmer(
  wb_geom ~ 1 + (1 | stratum_id),
  data = dat_2L,
  REML = TRUE
)
summary(m0_2L)

Linear mixed model fit by REML ['lmerMod']
Formula: wb_geom ~ 1 + (1 | stratum_id)
   Data: dat_2L

REML criterion at convergence: -52.5

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-3.09258 -0.64335  0.00524  0.71822  1.96022 

Random effects:
 Groups     Name        Variance Std.Dev.
 stratum_id (Intercept) 0.003024 0.05499 
 Residual               0.031073 0.17627 
Number of obs: 102, groups:  stratum_id, 16

Fixed effects:
            Estimate Std. Error t value
(Intercept)  0.57997    0.02297   25.24

Code

vc2           <- as.data.frame(VarCorr(m0_2L))
v_stratum_2L  <- vc2 |> filter(grp == "stratum_id") |> pull(vcov)
v_resid_2L    <- vc2 |> filter(grp == "Residual")   |> pull(vcov)
icc_stratum_2L <- v_stratum_2L / (v_stratum_2L + v_resid_2L)

Code

# Ausgabe: 2-Level-Zerlegung (reine Darstellung)
tibble(
  Parameter = c("Intercept (Gesamtmittel)","Strata-Varianz","Residual-Varianz","VPC Strata"),
  Wert      = c(fixef(m0_2L)[["(Intercept)"]], v_stratum_2L, v_resid_2L, icc_stratum_2L),
  Einheit   = c("Wohlbefinden (0–1)","Varianz","Varianz","Anteil")
) |>
  mutate(Wert = round(Wert, 6)) |>
  gt() |>
  tab_header(title = "Zweistufiges Nullmodell: Varianzzerlegung & VPC(Strata)") |>
  cols_label(Parameter = "Parameter", Wert = "Geschätzter Wert", Einheit = "Einheit")

Zweistufiges Nullmodell: Varianzzerlegung & VPC(Strata)
Parameter	Geschätzter Wert	Einheit
Intercept (Gesamtmittel)	0.579968	Wohlbefinden (0–1)
Strata-Varianz	0.003024	Varianz
Residual-Varianz	0.031073	Varianz
VPC Strata	0.088699	Anteil

4 MAIHDA (2 Ebenen)

4.1 Additivmodell (M1_2L)

Code

m1_2L <- lmer(
  wb_geom ~ gender + age_group + sexual_orientation + eq_income_group +
    (1 | stratum_id),
  data = dat_2L,
  REML = TRUE
)

# PEV_strata relativ zu M0
var_strata_m0 <- as.data.frame(VarCorr(m0_2L)) |> filter(grp == "stratum_id") |> pull(vcov)
var_strata_m1 <- as.data.frame(VarCorr(m1_2L)) |> filter(grp == "stratum_id") |> pull(vcov)
PEV_strata_M0_M1 <- (var_strata_m0 - var_strata_m1) / var_strata_m0

Code

# Ausgabe: Vergleich M0 vs. M1 (reine Darstellung)
tibble(
  Modell     = c("M0_2L (Null)","M1_2L (additive Achsen)"),
  Var_Strata = c(var_strata_m0, var_strata_m1),
  PEV_vs_M0  = c(0, PEV_strata_M0_M1)
) |>
  mutate(Var_Strata = round(Var_Strata, 6),
         PEV_vs_M0  = round(PEV_vs_M0, 3)) |>
  gt() |>
  tab_header(title = "Strata-Varianz: Null vs. Additives MAIHDA-Modell") |>
  tab_source_note(paste0("PEV_strata (M0→M1) = ", round(PEV_strata_M0_M1, 3),
                         " → Anteil der Strata-Varianz durch additive Haupteffekte."))

Strata-Varianz: Null vs. Additives MAIHDA-Modell
Modell	Var_Strata	PEV_vs_M0
M0_2L (Null)	0.003024	0.000
M1_2L (additive Achsen)	0.001110	0.633
PEV_strata (M0→M1) = 0.633 → Anteil der Strata-Varianz durch additive Haupteffekte.

4.2 + Kontext (M2_2L)

Code

# Typen absichern
dat_2L <- dat_2L |>
  mutate(
    activity            = as.factor(activity),
    location_category   = as.factor(location_category),
    indoors_outdoors    = as.factor(indoors_outdoors),
    people_with_you     = as.factor(people_with_you),
    majority_comparison = as.factor(majority_comparison),
    environment_noise   = as.numeric(environment_noise),
    environment_nature  = as.numeric(environment_nature),
    environment_lively  = as.numeric(environment_lively),
    environmen_pleasure = as.numeric(environmen_pleasure)  # Bezeichnung wie im Datensatz
  ) |>
  droplevels()

Code

m2_2L_context <- lmer(
  wb_geom ~ gender + age_group + sexual_orientation + eq_income_group +
    activity + location_category + indoors_outdoors + people_with_you + majority_comparison +
    environment_noise + environment_nature + environment_lively + environmen_pleasure +
    (1 | stratum_id),
  data = dat_2L,
  REML = TRUE
)

Code

extract_var <- function(model) {
  vc <- as.data.frame(VarCorr(model))
  tibble(
    var_strata = vc |> filter(grp == "stratum_id") |> pull(vcov),
    var_resid  = vc |> filter(grp == "Residual")   |> pull(vcov)
  )
}

vars <- bind_rows(
  extract_var(m0_2L)         |> mutate(Modell = "M0_2L (Null)"),
  extract_var(m1_2L)         |> mutate(Modell = "M1_2L (+ Achsen)"),
  extract_var(m2_2L_context) |> mutate(Modell = "M2_2L (+ Kontext)")
) |>
  relocate(Modell)

var0_s <- vars$var_strata[vars$Modell == "M0_2L (Null)"]
var0_e <- vars$var_resid[ vars$Modell == "M0_2L (Null)"]

Code

# Ausgabe: Varianzvergleich (reine Darstellung)
vars |>
  mutate(
    PEV_strata_vs_M0 = (var0_s - var_strata) / var0_s,
    PEV_resid_vs_M0  = (var0_e - var_resid)  / var0_e
  ) |>
  mutate(across(c(var_strata, var_resid, PEV_strata_vs_M0, PEV_resid_vs_M0), ~ round(.x, 6))) |>
  gt() |>
  tab_header(title = "Varianzvergleich (2-Level): Null vs. +Achsen vs. +Kontext") |>
  tab_source_note("PEV = Proportional Explained Variance relativ zu M0. 
MAIHDA: Rest-Strata-Varianz in M1 = intersektionaler Überschuss.") |>
  cols_label(Modell = "Modell", var_strata = "Strata-Varianz",
             var_resid = "Residual-Varianz", PEV_strata_vs_M0 = "PEV Strata vs. M0",
             PEV_resid_vs_M0 = "PEV Residual vs. M0")

Varianzvergleich (2-Level): Null vs. +Achsen vs. +Kontext
Modell	Strata-Varianz	Residual-Varianz	PEV Strata vs. M0	PEV Residual vs. M0
M0_2L (Null)	0.003024	0.031073	0.000000	0.000000
M1_2L (+ Achsen)	0.001110	0.030322	0.633048	0.024175
M2_2L (+ Kontext)	0.000004	0.000223	0.998703	0.992815
PEV = Proportional Explained Variance relativ zu M0. MAIHDA: Rest-Strata-Varianz in M1 = intersektionaler Überschuss.

5 Vertiefungen

5.1 Intersektionaler Überschuss (Random Intercepts von M1)

Code

# BLUPs + 95%-CIs
re1     <- ranef(m1_2L, condVar = TRUE)$stratum_id
postVar <- attr(ranef(m1_2L, condVar = TRUE)[[1]], "postVar")
ci_lo   <- re1[,1] - 1.96 * sqrt(postVar[1,1,])
ci_hi   <- re1[,1] + 1.96 * sqrt(postVar[1,1,])

strata_effekte <- tibble(
  stratum_id = rownames(re1),
  eff        = re1[,1],
  ci_lower   = ci_lo,
  ci_upper   = ci_hi
) |>
  left_join(dat_2L |> distinct(stratum_id, gender, age_group, sexual_orientation, eq_income_group),
            by = "stratum_id") |>
  arrange(desc(eff))

Code

# Ausgabe: Tabelle (reine Darstellung, übersetzt & strukturiert)

strata_effekte |>
  mutate(
    eff      = round(eff, 3),
    ci_lower = round(ci_lower, 3),
    ci_upper = round(ci_upper, 3),
    Geschl.        = recode(as.character(gender), !!!gender_de, .default = as.character(gender)),
    Alter          = as.character(age_group),
    `Sex. Orient.` = recode(as.character(sexual_orientation), !!!so_de, .default = as.character(sexual_orientation)),
    `Äquiv.-Eink.` = recode(as.character(eq_income_group), !!!eq_de, .default = as.character(eq_income_group))
  ) |>
  select(
    Geschl., Alter, `Sex. Orient.`, `Äquiv.-Eink.`,
    Effekt = eff, `CI-untere Grenze` = ci_lower, `CI-obere Grenze` = ci_upper
  ) |>
  mutate(across(everything(), ~ ifelse(is.na(.), "—", as.character(.)))) |>
  gt() |>
  tab_header(title = "Intersektionaler Überschuss (M1) nach Stratum") |>
  tab_source_note("Werte nahe 0: geringe Abweichung vom durch additive Achsen erklärten Erwartungswert.") |>
  cols_align(
    align = "center",
    columns = c(Effekt, `CI-untere Grenze`, `CI-obere Grenze`)
  )

Intersektionaler Überschuss (M1) nach Stratum
Geschl.	Alter	Sex. Orient.	Äquiv.-Eink.	Effekt	CI-untere Grenze	CI-obere Grenze
männlich	16 – 25	homosexuell	niedrig	0.014	-0.044	0.072
männlich	16 – 25	heterosexuell	—	0.012	-0.045	0.07
männlich	16 – 25	heterosexuell	sehr niedrig	0.01	-0.047	0.066
weiblich	16 – 25	bisexuell	—	0.007	-0.047	0.062
weiblich	16 – 25	heterosexuell	hoch	0.007	-0.047	0.061
männlich	26 – 35	heterosexuell	sehr hoch	0.005	-0.056	0.066
weiblich	16 – 25	heterosexuell	sehr niedrig	0.002	-0.057	0.062
weiblich	16 – 25	queer	sehr niedrig	0	-0.063	0.063
männlich	16 – 25	heterosexuell	hoch	-0.001	-0.063	0.061
weiblich	26 – 35	heterosexuell	sehr niedrig	-0.005	-0.065	0.055
männlich	16 – 25	heterosexuell	sehr hoch	-0.005	-0.061	0.052
männlich	16 – 25	homosexuell	hoch	-0.006	-0.068	0.056
männlich	16 – 25	bisexuell	sehr niedrig	-0.007	-0.07	0.056
männlich	16 – 25	homosexuell	—	-0.008	-0.07	0.054
weiblich	16 – 25	heterosexuell	—	-0.012	-0.069	0.045
männlich	16 – 25	heterosexuell	niedrig	-0.014	-0.071	0.043
Werte nahe 0: geringe Abweichung vom durch additive Achsen erklärten Erwartungswert.

Code

# Visualisierung (Berechnung + Plot sichtbar)
# Daten für Plot: übersetzen und Label bauen
se_plot <- strata_effekte |>
  dplyr::mutate(
    Geschl.        = dplyr::recode(as.character(gender), !!!gender_de, .default = as.character(gender)),
    Alter          = as.character(age_group),
    `Sex. Orient.` = dplyr::recode(as.character(sexual_orientation), !!!so_de, .default = as.character(sexual_orientation)),
    `Äquiv.-Eink.` = dplyr::recode(as.character(eq_income_group), !!!eq_de, .default = as.character(eq_income_group)),
    label          = paste(Geschl., Alter, `Sex. Orient.`, `Äquiv.-Eink.`, sep = " / ")
  )

# Plot
ggplot(se_plot, aes(
  x = reorder(label, eff),
  y = eff
)) +
  geom_point(size = 3, color = "steelblue") +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2, color = "steelblue") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red", alpha = 0.7) +
  coord_flip() +
  labs(
    title = "Intersektionaler Überschuss pro Stratum (M1)",
    subtitle = "Abweichung vom durch additive Achsen erklärten Erwartungswert",
    x = "Stratum (Achsenkombination, übersetzt)",
    y = "Abweichung (Random Intercept)"
  ) +
  theme()

5.2 Situative Effekte (M2, Fixed Effects)

Code

fixef_m2 <- broom.mixed::tidy(m2_2L_context, effects = "fixed") |>
  filter(term != "(Intercept)")

if (!"statistic" %in% names(fixef_m2)) fixef_m2 <- fixef_m2 |> mutate(statistic = NA_real_)
if (!"p.value"  %in% names(fixef_m2)) fixef_m2  <- fixef_m2  |> mutate(p.value  = NA_real_)

Code

# Kompakte Ausgabe: kontinuierlich (pro +1 SD) & kategorial (ggü. Referenz)
library(dplyr)

library(stringr)

# Erwartet: 'fixef_m2' (aus vorherigem Chunk) und 'dat_2L' (Datenbasis von M2)

# 1) Definitionen
cont_vars <- c("environment_noise","environment_nature","environment_lively","environmen_pleasure")
cont_de   <- c(environment_noise = "Lautstärke",
               environment_nature = "Natur",
               environment_lively = "Lebhaftigkeit",
               environmen_pleasure = "Angenehmkeit")

cat_vars  <- c("activity","location_category","indoors_outdoors","people_with_you","majority_comparison")
cat_de    <- c(activity = "Aktivität",
               location_category = "Ortskategorie",
               indoors_outdoors = "drinnen/draußen",
               people_with_you = "Begleitung",
               majority_comparison = "Mehrheitsvergleich")

# 2) Kontinuierliche Effekte: standardisiert (Δ WB pro +1 SD der Prädiktorvariable)
sd_lookup <- sapply(cont_vars, function(v) sd(dat_2L[[v]], na.rm = TRUE))
cont_tbl <- fixef_m2 %>%
  filter(term %in% cont_vars) %>%
  mutate(
    var_de      = recode(term, !!!cont_de),
    sd_x        = sd_lookup[term],
    estimate_sd = estimate * sd_x,
    se_sd       = std.error * sd_x,
    ci_lo       = estimate_sd - 1.96 * se_sd,
    ci_hi       = estimate_sd + 1.96 * se_sd
  ) %>%
  transmute(
    Variable = var_de,
    `Δ Wohlbefinden (+1 SD)` = round(estimate_sd, 3),
    `CI-untere Grenze`       = round(ci_lo, 3),
    `CI-obere Grenze`        = round(ci_hi, 3)
  ) %>%
  arrange(desc(abs(`Δ Wohlbefinden (+1 SD)`)))

cont_gt <- cont_tbl %>%
  gt() %>%
  tab_header(title = "Situative Einflüsse (kontinuierlich, pro +1 SD)") %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_source_note("Effekt auf den Wohlbefindensindex (0–1) pro +1 SD des Prädiktors.")

# 3) Kategoriale Effekte: Kontraste vs. Referenz
#    Referenzen sind i. d. R. die ersten Factor-Levels (Treatment-Coding).
ref_levels <- lapply(cat_vars, function(v) levels(dat_2L[[v]])[1])
names(ref_levels) <- cat_vars

cat_rows <- fixef_m2 %>%
  filter(!term %in% c("(Intercept)", cont_vars)) %>%
  mutate(
    var = vapply(term, function(t) {
      hit <- cat_vars[sapply(cat_vars, function(v) startsWith(t, v))]
      if (length(hit) == 0) NA_character_ else hit[1]
    }, character(1)),
    level = ifelse(is.na(var), term, str_sub(term, nchar(var) + 1L))
  ) %>%
  filter(!is.na(var)) %>%
  mutate(
    var_de = recode(var, !!!cat_de),
    ref    = vapply(var, function(v) ref_levels[[v]], character(1)),
    ci_lo  = estimate - 1.96 * std.error,
    ci_hi  = estimate + 1.96 * std.error,
    sig    = !is.na(ci_lo) & !is.na(ci_hi) & (ci_lo * ci_hi > 0),
    Label  = paste0(var_de, ": ", level, " (vs. ", ref, ")")
  ) %>%
  transmute(
    Variable = Label,
    `Δ Wohlbefinden`   = round(estimate, 3),
    `CI-untere Grenze` = round(ci_lo, 3),
    `CI-obere Grenze`  = round(ci_hi, 3),
    sig = sig
  ) %>%
  arrange(desc(abs(`Δ Wohlbefinden`)))

# Kompakt: erst „signifikante“ Kontraste (95%-CI ohne 0), max. 12 Zeilen;
# Fallback: falls keine signifikanten, die 12 größten nach Betrag.
cat_tbl <- cat_rows %>% filter(sig) %>% slice_head(n = 12)
if (nrow(cat_tbl) == 0) cat_tbl <- cat_rows %>% slice_head(n = 12)

cat_gt <- cat_tbl %>%
  gt() %>%
  tab_header(title = "Situative Einflüsse (kategorial, vs. Referenz)") %>%
  cols_align(align = "center", columns = c(`Δ Wohlbefinden`,`CI-untere Grenze`,`CI-obere Grenze`)) %>%
  cols_hide(columns = "sig") %>%
  tab_source_note(
    paste(
      "Referenzen:",
      paste(paste(recode(names(ref_levels), !!!cat_de), shQuote(unlist(ref_levels)), sep = " = "),
            collapse = "; ")
    )
  )

# 4) Ausgeben (beide Tabellen)
cont_gt

Situative Einflüsse (kontinuierlich, pro +1 SD)
Variable	Δ Wohlbefinden (+1 SD)	CI-untere Grenze	CI-obere Grenze
Lebhaftigkeit	0.235	0.177	0.294
Lautstärke	-0.101	-0.143	-0.060
Natur	0.043	-0.041	0.127
Angenehmkeit	-0.026	-0.194	0.142
Effekt auf den Wohlbefindensindex (0–1) pro +1 SD des Prädiktors.

Code

cat_gt

Situative Einflüsse (kategorial, vs. Referenz)
Variable	Δ Wohlbefinden	CI-untere Grenze	CI-obere Grenze
Mehrheitsvergleich: gender;age;background;skin / appearance;language / accent;social / financial situation;health / disability (vs. age)	-1.124	-1.811	-0.438
Aktivität: resting / sleeping (vs. cooking / eating)	1.036	0.444	1.629
Mehrheitsvergleich: background;skin / appearance;language / accent (vs. age)	0.970	0.110	1.829
Mehrheitsvergleich: gender;age;background;skin / appearance;language / accent;clothing / style;social / financial situation;sexual orientation;health / disability (vs. age)	0.714	0.438	0.990
Mehrheitsvergleich: age;background;skin / appearance;language / accent;social / financial situation;clothing / style;health / disability (vs. age)	0.659	0.175	1.143
Mehrheitsvergleich: gender;skin / appearance;social / financial situation;clothing / style;sexual orientation;health / disability;age (vs. age)	-0.547	-0.742	-0.353
Aktivität: using media;leisure / relaxation (vs. cooking / eating)	0.450	0.229	0.670
Mehrheitsvergleich: gender;skin / appearance;language / accent;social / financial situation;clothing / style;sexual orientation;health / disability (vs. age)	-0.447	-0.536	-0.358
Aktivität: cooking / eating;social activities (vs. cooking / eating)	-0.420	-0.701	-0.140
Begleitung: strangers;friends (vs. acquaintances;friends)	0.328	0.028	0.627
Mehrheitsvergleich: gender;skin / appearance;language / accent;health / disability (vs. age)	-0.327	-0.606	-0.048
Aktivität: leisure / relaxation;cooking / eating (vs. cooking / eating)	-0.268	-0.403	-0.133
Referenzen: Aktivität = 'cooking / eating'; Ortskategorie = 'at home'; drinnen/draußen = 'indoors'; Begleitung = 'acquaintances;friends'; Mehrheitsvergleich = 'age'

5.3 Random Slopes (within-Variablen je Stratum)

Code

# Voraussetzungen
stopifnot(
  exists("dat_2L"),
  all(c("wb_geom","stratum_id","gender","age_group","sexual_orientation","eq_income_group") %in% names(dat_2L))
)

need_within <- c("environment_noise_within","environment_nature_within",
                 "environment_lively_within","environmen_pleasure_within")  # Datensatzbezeichnung
stopifnot(all(need_within %in% names(dat_2L)))

axes <- list(
  noise  = list(within = "environment_noise_within"),
  nature = list(within = "environment_nature_within"),
  lively = list(within = "environment_lively_within"),
  pleas  = list(within = "environmen_pleasure_within")
)

fit_axis <- function(dat, within_var, axis_id) {
  x_w <- within_var
  x_z <- paste0(x_w, "_z")
  dat <- dat |> mutate("{x_z}" := as.numeric(scale(.data[[x_w]])))

  form <- as.formula(
    glue::glue("wb_geom ~ gender + age_group + sexual_orientation + eq_income_group +
               `{x_z}` + (1 + `{x_z}` | stratum_id)")
  )
  m <- lmer(form, data = dat, REML = TRUE)

  cf   <- as.data.frame(coef(summary(m)))
  beta <- cf[x_z, "Estimate"]; se_b <- cf[x_z, "Std. Error"]

  re      <- ranef(m, condVar = TRUE)$stratum_id
  postVar <- attr(re, "postVar")
  col_id  <- which(colnames(re) == x_z)

  strata_meta <- dat |> distinct(stratum_id, gender, age_group, sexual_orientation, eq_income_group)

  tibble(
    stratum_id = rownames(re),
    slope_dev  = re[, x_z],
    se_dev     = sqrt(postVar[col_id, col_id, ])
  ) |>
    mutate(
      beta        = beta,
      se_b        = se_b,
      slope_total = beta + slope_dev,
      se_total    = sqrt(se_b^2 + se_dev^2),
      ci_lo       = slope_total - 1.96 * se_total,
      ci_hi       = slope_total + 1.96 * se_total,
      sig_flag    = dplyr::case_when(ci_lo > 0 ~ "klar > 0",
                                     ci_hi < 0 ~ "klar < 0",
                                     TRUE      ~ "unsicher"),
      axis_id     = axis_id
    ) |>
    left_join(strata_meta, by = "stratum_id")
}

slopes_raw <- bind_rows(
  fit_axis(dat_2L, axes$noise$within,  "noise"),
  fit_axis(dat_2L, axes$nature$within, "nature"),
  fit_axis(dat_2L, axes$lively$within, "lively"),
  fit_axis(dat_2L, axes$pleas$within,  "pleas")
)

Code

# Ausgabe: Heatmap-Tabelle (reine Darstellung)



axis_labels <- c(noise="Lautstärke", nature="Natur", lively="Lebhaftigkeit", pleas="Angenehmkeit")

slopes_disp <- slopes_raw |>
  mutate(
    Geschl.        = recode(gender, !!!gender_de, .default = "—"),
    Alter          = age_group,
    `Sex. Orient.` = recode(sexual_orientation, !!!so_de, .default = "—"),
    `Äquiv.-Eink.` = recode(eq_income_group, !!!eq_de, .default = "—"),
    Achse          = recode(axis_id, !!!axis_labels),
    Wert           = slope_total,
    Sig            = sig_flag
  )

# Falls vorhanden: empirische Befragungszählungen je Stratum für Sortierung
stratum_counts <- tibble::tribble(
  ~Geschl., ~Alter, ~`Sex. Orient.`, ~`Äquiv.-Eink.`, ~Befr.,
  "weiblich","16 – 25","heterosexuell","hoch",13,
  "männlich","16 – 25","heterosexuell","sehr niedrig",9,
  "männlich","16 – 25","heterosexuell","—",9,
  "weiblich","16 – 25","heterosexuell","—",8,
  "weiblich","16 – 25","bisexuell","—",12,
  "männlich","16 – 25","heterosexuell","sehr hoch",9,
  "männlich","16 – 25","heterosexuell","niedrig",8,
  "männlich","16 – 25","homosexuell","niedrig",7,
  "weiblich","16 – 25","heterosexuell","sehr niedrig",6,
  "weiblich","26 – 35","heterosexuell","sehr niedrig",5,
  "männlich","26 – 35","heterosexuell","sehr hoch",4,
  "männlich","16 – 25","homosexuell","hoch",3,
  "männlich","16 – 25","heterosexuell","hoch",3,
  "männlich","16 – 25","homosexuell","—",3,
  "männlich","16 – 25","bisexuell","sehr niedrig",2,
  "weiblich","16 – 25","queer","sehr niedrig",2
)

slopes_disp <- slopes_disp |>
  left_join(stratum_counts, by = c("Geschl.","Alter","Sex. Orient.","Äquiv.-Eink.")) |>
  mutate(Befr. = ifelse(is.na(Befr.), -Inf, Befr.)) |>
  arrange(desc(Befr.))

wide <- slopes_disp |>
  select(Geschl., Alter, `Sex. Orient.`, `Äquiv.-Eink.`, Befr., Achse, Wert, Sig) |>
  mutate(Achse = factor(Achse, levels = c("Lautstärke","Natur","Lebhaftigkeit","Angenehmkeit"))) |>
  pivot_wider(names_from = Achse, values_from = c(Wert, Sig))

names(wide) <- gsub("^Wert_", "", names(wide))
names(wide) <- gsub("^Sig_",  "sig_", names(wide))

wide <- wide |>
  select(Geschl., Alter, `Sex. Orient.`, `Äquiv.-Eink.`, Befr.,
         Lautstärke, Natur, Lebhaftigkeit, Angenehmkeit,
         sig_Lautstärke, sig_Natur, sig_Lebhaftigkeit, sig_Angenehmkeit)

heat_cols <- c("Lautstärke","Natur","Lebhaftigkeit","Angenehmkeit")
rng      <- range(unlist(wide[heat_cols]), na.rm = TRUE)
max_abs  <- max(abs(rng))
pal <- col_numeric(palette = c("#2b8cbe","#f7f7f7","#d7301f"),
                   domain  = c(-max_abs, max_abs), na.color = "#f0f0f0")

wide |>
  gt() |>
  tab_header(
    title    = "Effekte pro Stratum (Δ Wohlbefinden pro +1 SD)",
    subtitle = "Links Strata-Merkmale; rechts Heatmap je Achse. Fett = 95%-KI ≠ 0."
  ) |>
  fmt_number(columns = Befr., decimals = 0) |>
  fmt_number(columns = all_of(heat_cols), decimals = 2) |>
  data_color(columns = all_of(heat_cols), colors = pal) |>
  tab_style(style = cell_text(weight = "bold"), locations = cells_body(columns = "Lautstärke",     rows = sig_Lautstärke != "unsicher")) |>
  tab_style(style = cell_text(weight = "bold"), locations = cells_body(columns = "Natur",          rows = sig_Natur != "unsicher")) |>
  tab_style(style = cell_text(weight = "bold"), locations = cells_body(columns = "Lebhaftigkeit",  rows = sig_Lebhaftigkeit != "unsicher")) |>
  tab_style(style = cell_text(weight = "bold"), locations = cells_body(columns = "Angenehmkeit",   rows = sig_Angenehmkeit != "unsicher")) |>
  cols_hide(starts_with("sig_")) |>
  cols_label(`Sex. Orient.` = "Sex. Orient.", `Äquiv.-Eink.` = "Äquiv.-Eink.", Befr. = "Befr.") |>
  tab_options(table.font.size = px(14), data_row.padding = px(6))

Effekte pro Stratum (Δ Wohlbefinden pro +1 SD)
Links Strata-Merkmale; rechts Heatmap je Achse. Fett = 95%-KI ≠ 0.
Geschl.	Alter	Sex. Orient.	Äquiv.-Eink.	Befr.	Lautstärke	Natur	Lebhaftigkeit	Angenehmkeit
weiblich	16 – 25	heterosexuell	hoch	13	0.04	0.04	0.04	0.04
weiblich	16 – 25	bisexuell	—	12	0.04	0.05	0.07	0.07
männlich	16 – 25	heterosexuell	—	9	0.05	0.07	0.06	0.05
männlich	16 – 25	heterosexuell	sehr hoch	9	0.04	0.01	0.01	0.03
männlich	16 – 25	heterosexuell	sehr niedrig	9	0.04	0.07	0.03	0.03
männlich	16 – 25	heterosexuell	niedrig	8	0.04	0.03	0.00	0.02
weiblich	16 – 25	heterosexuell	—	8	0.04	0.02	0.04	0.03
männlich	16 – 25	homosexuell	niedrig	7	0.05	0.07	0.05	0.05
weiblich	16 – 25	heterosexuell	sehr niedrig	6	0.04	0.05	0.04	0.04
weiblich	26 – 35	heterosexuell	sehr niedrig	5	0.04	0.04	0.03	0.04
männlich	26 – 35	heterosexuell	sehr hoch	4	0.04	0.05	0.04	0.05
männlich	16 – 25	homosexuell	hoch	3	0.04	0.02	0.02	0.02
männlich	16 – 25	heterosexuell	hoch	3	0.04	0.04	0.03	0.03
männlich	16 – 25	homosexuell	—	3	0.04	0.07	0.03	0.02
männlich	16 – 25	bisexuell	sehr niedrig	2	0.04	0.03	0.02	0.02
weiblich	16 – 25	queer	sehr niedrig	2	0.04	0.04	0.03	0.04

Literatur

Evans, Clare R., George Leckie, S.V. Subramanian, Andrew Bell und Juan Merlo 2024: A Tutorial for Conducting Intersectional Multilevel Analysis of Individual Heterogeneity and Discriminatory Accuracy (MAIHDA). SSM - Population Health 26 : 101664. DOI: https://doi.org/10.1016/j.ssmph.2024.101664