Data is available only upon formal request and subject to approval.
Approved users receive a secure institute account and work with the data exclusively in our Trusted Research Environment (TRE) via remote desktop.
Request this datasetThis study aims to analyze the relationship between medical urgency status at the time of transplant and post-transplant outcomes, taking into account the total waiting time and donor-recipient compatibility characteristics. By examining waiting list parameters and transplant data from the Eurotransplant cohort, we hope to identify key predictors of transplant success and equitable allocation.
Specifically, we are interested in whether higher urgency statuses (e.g., HU, NT) are associated with different short- and long-term outcomes compared to elective transplants, and how these are influenced by donor-recipient ABO compatibility, weight mismatch, and cold ischemia times.
id_gdm_ther
ther1_early_gdm_ther
ther2_early_gdm_ther
anzahl_ogtt_early_gdm_ther
bem1_early_gdm_ther
ther_n_ogtt_late_gdm_ther
bem2_gdm_late_gdm_ther
therapie_late_gdm_fin
bz_late_gdm_ther
aerztl_gdm_late_gdm_fin
therapie_late_bem_gdm_fin
kein_late_gdm_fin
id_bd_ss
ssw1_bd_ss
bdsys1_bd_ss
bddia1_bd_ss
ssw2_bd_ss
bdsys2_bd_ss
bddia2_bd_ss
ssw3_bd_ss
bdsys3_bd_ss
bddia3_bd_ss
ssw4_bd_ss
bdsys4_bd_ss
bddia4_bd_ss
ssw5_bd_ss
bdsys5_bd_ss
bddia5_bd_ss
ssw6_bd_ss
bdsys6_bd_ss
bddia6_bd_ss
ssw7_bd_ss
bdsys7_bd_ss
bddia7_bd_ss
ssw8_bd_ss
bdsys8_bd_ss
bddia8_bd_ss
ssw9_bd_ss
bdsys9_bd_ss
bddia9_bd_ss
ssw10_bd_ss
bdsys10_bd_ss
bddia10_bd_ss
ssw11_bd_ss
bdsys11_bd_ss
bddia11_bd_ss
ssw12_bd_ss
bdsys12_bd_ss
bddia12_bd_ss
ssw13_bd_ss
bdsys13_bd_ss
bddia13_bd_ss
ssw14_bd_ss
bdsys14_bd_ss
bddia14_bd_ss
ssw15_bd_ss
bdsys15_bd_ss
bddia15_bd_ss
ssw16_bd_ss
bdsys16_bd_ss
bddia16_bd_ss
ssw17_bd_ss
bdsys17_bd_ss
bddia17_bd_ss
ssw18_bd_ss
bdsys18_bd_ss
bddia18_bd_ss
ssw19_bd_ss
bdsys19_bd_ss
bddia19_bd_ss
ssw20_bd_ss
bdsys20_bd_ss
bddia20_bd_ss
ssw21_bd_ss
bdsys21_bd_ss
bddia21_bd_ss
ssw22_bd_ss
bdsys22_bd_ss
bddia22_bd_ss
ssw23_bd_ss
bdsys23_bd_ss
bddia23_bd_ss
ssw24_bd_ss
bdsys24_bd_ss
bddia24_bd_ss
ssw25_bd_ss
bdsys25_bd_ss
bddia25_bd_ss
ssw26_bd_ss
bdsys26_bd_ss
bddia26_bd_ss
ssw27_bd_ss
bdsys27_bd_ss
bddia27_bd_ss
ssw28_bd_ss
bdsys28_bd_ss
bddia28_bd_ss
ssw29_bd_ss
bdsys29_bd_ss
bddia29_bd_ss
ssw30_bd_ss
bdsys30_bd_ss
bddia30_bd_ss
ssw31_bd_ss
bdsys31_bd_ss
bddia31_bd_ss
ssw32_bd_ss
bdsys32_bd_ss
bddia32_bd_ss
ssw33_bd_ss
bdsys33_bd_ss
bddia33_bd_ss
bem_bd_ss
eing_bd_ss
kontr_bd_ss
aend1_bd_ss
aend2_bd_ss
aend3_bd_ss
id_urin
ssw1_urin
uz1_urin
ssw2_urin
uz2_urin
ssw3_urin
uz3_urin
ssw4_urin
uz4_urin
ssw5_urin
uz5_urin
ssw6_urin
uz6_urin
ssw7_urin
uz7_urin
ssw8_urin
uz8_urin
ssw9_urin
uz9_urin
ssw10_urin
uz10_urin
ssw11_urin
uz11_urin
ssw12_urin
uz12_urin
ssw13_urin
uz13_urin
ssw14_urin
uz14_urin
ssw15_urin
uz15_urin
ssw16_urin
uz16_urin
ssw17_urin
uz17_urin
ssw18_urin
uz18_urin
ssw19_urin
uz19_urin
ssw20_urin
uz20_urin
ssw21_urin
uz21_urin
ssw22_urin
uz22_urin
ssw23_urin
uz23_urin
ssw24_urin
uz24_urin
ssw25_urin
uz25_urin
eing_urin
kontr_urin
aend1_urin
aend2_urin
aend3_urin
id_mp
geb_art_mp
gew_vor_ss_mp
gew1_mp
ssw1_mp
gew_letztes_mp
ssw_letzte_mp
gwg1_mp
dif1_ssw_mp
gwg2_mp
dif2_ssw_mp
geb_mu_mp
geb_ki_mp
alt_mu_geburt_mp
alt_mu_gest_mp
alt_mu_gest_ger_mp
gravida_mp
para_mp
aborte_mp
abbr_ss_mp
eileiter_ss_mp
tot_geb_mp
geb_einl_mp
geb_anaest_mp
geb_dauer_mp
pruef_mp
bem_mp
aend1_mp
aend2_mp
aend3_mp
korr_ssw1_mp
korr_ssw_letzte_mp
korr_alt_mu_gest_ger_mp
prae_bmi_mp
prae_bmi_berr_mp
id_tbl_gp
inf_gp
ikt_gp
malf_her_gp
hypogly_gp
statauf_ki_gp
statauf_tage_gp
diab1_fam_gp
diab2_fam_gp
diab3_fam_gp
anamn_fam_gp
geschw1_alt_jahr_gp
geschw1_alt_mo_gp
geschw1_geschl_gp
geschw1_geszu_gp
geschw1_geszu_bem_gp
geschw2_alt_jahr_gp
geschw2_alt_mo_gp
geschw2_geschl_gp
geschw2_geszu_gp
geschw2_geszu_bem_gp
still_gp
ang_still_gp
diab1o2_gp
schildr_kh_gp
pcosyndr_gp
med_gp
depr_gp
angst_gp
hypert_gp
hypert_beginn_gp
praekl_gest_gp
bulimie_gp
magersucht_gp
rauchen_gp
rauchen_wann_gp
rauchen_stopp_gp
bem_ausschl_gp
mu_adip_seit_gp
fols_gp
mg_gp
fe_gp
iod_gp
multivit_gp
bem_gp
uebel_ss_gp
uebel_ss_wann_gp
erbr_ss_gp
sodbr_ss_gp
dt_gp
dolm_gp
mu_deuts_gp
va_deuts_gp
motiv_gp
anruf_foto_gp
bem_beruf_gp
dat_gp
eing_gp
kontr_gp
aend1_gp
aend2_gp
aend3_gp
bem_ki_krank_gp
# R Script for Logistic Regression
# --------------------------------------------------------------------------
# I. SETUP: Install and load necessary packages
# --------------------------------------------------------------------------
# This function checks if a package is installed. If not, it installs it.
# Then, it loads the package into the R session.
install_and_load <- function(package) {
if (!require(package, character.only = TRUE)) {
install.packages(package, dependencies = TRUE)
library(package, character.only = TRUE)
}
}
# List of packages to install and load
packages <- c("tidyverse", "caret", "caTools")
lapply(packages, install_and_load)
# --------------------------------------------------------------------------
# II. DATA IMPORT: Read the CSV file
# --------------------------------------------------------------------------
# IMPORTANT: Replace "path/to/your/file.csv" with the actual path to your CSV file.
# For example, "C:/Users/YourName/Documents/data.csv" on Windows or
# "/home/YourName/data/data.csv" on Linux/macOS.
# If the file is in your current working directory, you can just use "file.csv".
tryCatch({
# Read the data from the CSV file
# The `read_csv` function from the `readr` package (part of tidyverse) is used here.
df <- read_csv("path/to/your/file.csv")
# Display the first few rows of the dataframe to inspect it
print("Data successfully loaded. Here are the first 6 rows:")
print(head(df))
# Display the structure of the dataframe to see column types
print("Structure of the dataframe:")
str(df)
}, error = function(e) {
# This message will be displayed if the file is not found or another error occurs.
message("Error reading the CSV file. Please check the following:")
message("- The file path is correct.")
message("- The file is a valid CSV.")
message("- You have the necessary permissions to read the file.")
message("Original error message:")
message(e)
})
# --------------------------------------------------------------------------
# III. DATA PREPROCESSING
# --------------------------------------------------------------------------
# This section assumes your dataframe `df` is now loaded.
if (exists("df")) {
# 1. Handle Missing Values (NA)
# -----------------------------
# Check for missing values in each column
print("Count of missing values in each column:")
print(sapply(df, function(x) sum(is.na(x))))
# Option A: Remove rows with any missing values.
# This is the simplest method but can lead to data loss.
# df <- na.omit(df)
# Option B: Impute missing values.
# For numeric columns, we can replace NA with the mean or median.
# For categorical columns, we can replace NA with the mode.
df <- df %>%
mutate(across(where(is.numeric), ~ifelse(is.na(.), mean(., na.rm = TRUE), .))) %>%
mutate(across(where(is.character), ~ifelse(is.na(.), names(which.max(table(.))), .))) %>%
mutate(across(where(is.factor), ~ifelse(is.na(.), names(which.max(table(.))), .)))
print("Missing values have been imputed.")
print(sapply(df, function(x) sum(is.na(x))))
# 2. Encode Categorical Variables
# ---------------------------------
# Logistic regression requires the dependent variable to be a factor.
# Replace 'YourDependentVariable' with the name of your outcome column.
# The dependent variable should be binary (have only two levels, e.g., "Yes"/"No", 0/1).
# Example: If your dependent variable is named 'churn' with values "Yes" and "No"
# df$churn <- as.factor(df$churn)
# Make sure to replace 'YourDependentVariable' with your actual column name
# dependent_variable_name <- "YourDependentVariable"
# df[[dependent_variable_name]] <- as.factor(df[[dependent_variable_name]])
# It's also good practice to convert character predictors to factors
df <- df %>%
mutate(across(where(is.character), as.factor))
print("Data types after encoding:")
str(df)
# 3. Data Splitting: Training and Testing Sets
# ----------------------------------------------
# We'll split the data to train the model and then test its performance on unseen data.
# Replace 'YourDependentVariable' with the name of your outcome column.
# set.seed(123) # for reproducibility
# sample_split <- sample.split(df$YourDependentVariable, SplitRatio = 0.80)
# train_set <- subset(df, sample_split == TRUE)
# test_set <- subset(df, sample_split == FALSE)
# print(paste("Training set size:", nrow(train_set)))
# print(paste("Test set size:", nrow(test_set)))
}
# --------------------------------------------------------------------------
# IV. LOGISTIC REGRESSION MODEL
# --------------------------------------------------------------------------
# This section builds the logistic regression model.
# Make sure you have defined `train_set` and `test_set` above.
# Replace 'YourDependentVariable' with your actual dependent variable name.
if (exists("train_set") && exists("test_set")) {
# 1. Build the Model
# --------------------
# The formula `YourDependentVariable ~ .` means we are predicting the dependent variable
# using all other columns as predictors.
# You can also specify predictors manually, e.g., `YourDependentVariable ~ predictor1 + predictor2`
# logistic_model <- glm(YourDependentVariable ~ .,
# data = train_set,
# family = binomial)
# print("Logistic Regression Model Summary:")
# print(summary(logistic_model))
# 2. Make Predictions
# -------------------
# We use the trained model to predict probabilities on the test set.
# `type = "response"` gives probabilities P(Y=1).
# predicted_probs <- predict(logistic_model, newdata = test_set, type = "response")
# Convert probabilities to class predictions ("0" or "1", "Yes" or "No")
# We use a threshold of 0.5. If probability > 0.5, predict 1 (or "Yes"), otherwise 0 (or "No").
# predicted_classes <- ifelse(predicted_probs > 0.5, 1, 0) # Use 1/0 or "Yes"/"No" depending on your factor levels
# 3. Evaluate Model Performance
# -----------------------------
# Create a confusion matrix to see how well the model performed.
# Make sure the levels of your predicted classes and actual values match.
# actual_values <- test_set$YourDependentVariable
# confusion_matrix <- table(Predicted = predicted_classes, Actual = actual_values)
# print("Confusion Matrix:")
# print(confusion_matrix)
# Calculate accuracy
# accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
# print(paste("Model Accuracy:", round(accuracy, 4)))
# Using caret for more detailed metrics
# confusionMatrix_caret <- confusionMatrix(data = as.factor(predicted_classes),
# reference = as.factor(actual_values))
# print(confusionMatrix_caret)
} else {
message("\nSkipping Model Building: 'train_set' or 'test_set' not found.")
message("Please uncomment and run the data splitting section (III.3) first.")
message("Remember to replace 'YourDependentVariable' with your actual target column name.")
}