############################################################
# DATA INTEGRATION WORKFLOW
# Author: Sowjanya Batchu
# Description: Automated CSV → REDCap integration workflow
############################################################

############################
# 1️⃣ Libraries
############################
library(dplyr)
library(janitor)
library(readr)
library(stringr)
library(lubridate)
library(httr)
library(jsonlite)

############################################################
# 2️⃣ Data Preprocessing
############################################################

# Folder containing CSV files
folder_path <- "PATH/TO/YOUR/CSV/FOLDER"

file_list <- list.files(
  path = folder_path,
  pattern = "\\.csv$",
  full.names = TRUE
)

# Auto-detect separator
read_csv_auto <- function(file_path) {
  first_line <- readLines(file_path, n = 1)
  separator <- ifelse(grepl(";", first_line), ";", ",")
  
  read.csv(
    file_path,
    sep = separator,
    fileEncoding = "UTF-8",
    stringsAsFactors = FALSE
  )
}

############################################################
# 3️⃣ Standardize Column Names
############################################################

standardize_columns <- function(df) {
  
  names(df) <- janitor::make_clean_names(names(df))
  
  if ("subject" %in% names(df) & !("subjid_drv" %in% names(df))) {
    df <- df %>% rename(subjid_drv = subject)
  }
  
  repeat_cols <- names(df)[tolower(names(df)) %in% 
                             c("repeat_number", "repeatnumber")]
  
  if (length(repeat_cols) > 0) {
    df <- df %>% rename(redcap_repeat_instance = all_of(repeat_cols[1]))
  }
  
  visit_cols <- intersect(c("visitid","visit_id"), names(df))
  
  if (length(visit_cols) > 0) {
    df <- df %>% rename(redcap_event_name = all_of(visit_cols[1]))
  }
  
  if ("site" %in% names(df)) {
    df <- df %>% rename(redcap_data_access_group = site)
  }
  
  df <- df %>%
    mutate(across(everything(), ~na_if(., "f.A.")))
  
  return(df)
}

############################################################
# 4️⃣ Apply Cleaning to All Files
############################################################

data_list <- lapply(file_list, function(f) {
  df <- read_csv_auto(f)
  df_clean <- standardize_columns(df)
  return(df_clean)
})

############################################################
# 5️⃣ REDCap Dictionary Alignment
############################################################

redcap_dict <- read_csv("Redcap_dictionary.csv")

align_by_text_validation <- function(df, redcap_dict) {
  
  redcap_dict$`Variable / Field Name` <-
    tolower(redcap_dict$`Variable / Field Name`)
  
  system_vars <- c(
    "redcap_event_name",
    "redcap_data_access_group",
    "eform",
    "subjid_drv",
    "redcap_repeat_instance"
  )
  
  for (i in 1:nrow(redcap_dict)) {
    
    var <- redcap_dict$`Variable / Field Name`[i]
    validation <- redcap_dict$`Text Validation Type OR Show Slider Number`[i]
    
    if (var %in% colnames(df) & !(var %in% system_vars)) {
      
      if (is.na(validation) | validation == "") next
      
      df[[var]] <- str_trim(as.character(df[[var]]))
      
      if (validation == "date_dmy") {
        df[[var]] <- parse_date_time(
          df[[var]],
          orders = c("dmy", "dmY", "Ymd", "Y-m-d", "d-m-Y")
        )
        df[[var]] <- as.Date(df[[var]])
        
      } else if (validation == "integer") {
        df[[var]] <- as.integer(df[[var]])
        
      } else if (validation == "number") {
        df[[var]] <- as.numeric(df[[var]])
      }
    }
  }
  
  return(df)
}

############################################################
# 6️⃣ REDCap Import Function
############################################################

import_to_redcap <- function(data, api_token, api_url) {
  
  data_json <- toJSON(data, dataframe = "rows", auto_unbox = TRUE)
  
  body <- list(
    token = api_token,
    content = "record",
    format = "json",
    type = "flat",
    overwriteBehavior = "normal",
    data = data_json,
    returnContent = "count",
    returnFormat = "json"
  )
  
  response <- POST(api_url, body = body, encode = "form")
  
  if (status_code(response) == 200) {
    cat("Data imported successfully!\n")
    return(content(response, "parsed"))
  } else {
    cat("Failed to import data. Status code:",
        status_code(response), "\n")
    return(content(response, "text"))
  }
}

############################################################
# 7️⃣ Example Import
############################################################

# ⚠️ NEVER hardcode tokens in production
# api_token <- Sys.getenv("REDCAP_API_TOKEN")
# api_url <- "https://your-redcap-server/api/"

# Example:
# DM <- data_list[[11]]
# DM_import <- import_to_redcap(DM, api_token, api_url)

############################################################
# END OF SCRIPT
############################################################
