Author: Martin Donovan | GitHub gist | profile.json

Payroll tip-sheet Excel parser — R: reads two different weekly tip-sheet Excel layouts (one per restaurant), reconstructs dynamic column names from numeric date serials via openxlsx::convertToDate, pivots wide to long (employee x date x shift), and inserts to PostgreSQL with a duplicate-date guard via dbWithTransaction.

library(DBI)
library(RPostgres)
library(readxl)
library(openxlsx)
library(tidyverse)
library(janitor)

# Parses weekly payroll tip-sheet Excel files (two different layouts — one per
# restaurant) into a normalized long-form dataframe keyed by employee + date +
# shift, then inserts into PostgreSQL with a duplicate-date guard.
#
# Challenge: the Excel files use numeric date serials as column headers (not
# strings), and the two restaurants use different sheet layouts requiring
# separate parsers. Both pivot from wide (one column per day/shift) to long.
#
# Credentials loaded from .Renviron: DB_HOST, DB_PORT, DB_USER, DB_PASS

getDBConnection <- function() {
  dbConnect(
    RPostgres::Postgres(),
    dbname   = "lhrc_data",
    host     = Sys.getenv("DB_HOST"),
    port     = Sys.getenv("DB_PORT"),
    user     = Sys.getenv("DB_USER"),
    password = Sys.getenv("DB_PASS")
  )
}

getShifts <- function(con) {
  dbGetQuery(con, "select * from shifts where active = true")
}

getRestaurants <- function(con) {
  dbGetQuery(con, "select * from entities where active = true")
}

getDataImportSourceID <- function(con) {
  dbGetQuery(con,
    "select data_import_source_id from data_import_sources
     where lower(data_import_source_name) = 'tip sheets'"
  )[1, 1]
}

isInExistingDateRange <- function(dates, entity, con) {
  datesString <- paste(shQuote(dates, type = "sh"), collapse = ", ")
  res <- dbGetQuery(con,
    paste0("select count(*) from employee_tips ",
           "where entity_id = ", entity,
           " and tip_date in (", datesString, ")")
  )[1, 1]
  as.integer(res) > 0
}

insertTipSheet <- function(DF, entityNumber, con) {
  minDate   <- min(DF$tip_date, na.rm = TRUE)
  maxDate   <- max(DF$tip_date, na.rm = TRUE)
  dateRange <- DF %>%
    mutate(tip_date = as.character(tip_date)) %>%
    select(tip_date) %>%
    distinct() %>%
    unlist(use.names = FALSE)

  if (isInExistingDateRange(dateRange, entityNumber, con)) {
    message("Tip sheet already imported for this date range — skipping.")
    return(FALSE)
  }

  dataSourceID    <- getDataImportSourceID(con)
  maxImportIdQuery <- paste0(
    "select coalesce(max(data_import_id), 0) from data_imports ",
    "where entity_id = ", entityNumber, " and import_source_id = ", dataSourceID)
  insertQuery <- paste0(
    "insert into data_imports ",
    "(entity_id, import_source_id, is_date_range, import_start_date_range, import_end_date_range) values ",
    "(", entityNumber, ", ", dataSourceID, ", TRUE, '", minDate, "', '", maxDate, "')")

  dbWithTransaction(con, {
    currentImportId <- as.integer(dbGetQuery(con, maxImportIdQuery)[1, 1]) + 1
    dbExecute(con, insertQuery)
    DF <- DF %>%
      mutate(data_import_id = currentImportId, entity_id = entityNumber)
    dbWriteTable(con, "employee_tips", DF,
      row.names = FALSE, overwrite = FALSE, append = TRUE,
      field.types = NULL, temporary = FALSE, copy = TRUE)
  })

  TRUE
}

# --- Parser: Restaurant 1 tip sheet -----------------------------------------
# Layout: "Summary" sheet. Row 1 = date serials (numeric), row 2 = shift names.
# Column 3 is a duplicate subtotal — dropped. Dates converted via openxlsx.

getDFFromBRGTipSheet <- function(f, entityId, shifts) {
  getShiftId <- Vectorize(function(name) {
    as.integer(shifts %>% filter(tolower(shift_name) == tolower(name)) %>% pull(shift_id))
  })

  char_date <- function(x) as.character(openxlsx::convertToDate(as.numeric(x)))

  headers <- readxl::read_excel(f, sheet = "Summary", n_max = 2, col_names = FALSE) %>%
    select(-`...1`)
  headers[2, 12] <- "Lunch"   # correct a mislabeled header in this sheet layout

  # Build "YYYY-MM-DD Shift" column names from numeric date serials in row 1
  date_cols <- as.character(headers[1, seq(2, 14, by = 2)])
  cnames <- c("payroll_id", "employee_name",
               unlist(lapply(date_cols, function(d) c(paste(char_date(d), "Lunch"),
                                                       paste(char_date(d), "Dinner")))))

  tips <- readxl::read_excel(f, sheet = "Summary", skip = 2) %>%
    select(-3) %>%
    setNames(cnames) %>%
    filter(!is.na(payroll_id) & !is.na(employee_name)) %>%
    mutate_at(3:16, as.numeric) %>%
    mutate_at(3:16, ~ replace_na(., 0)) %>%
    pivot_longer(cols = 3:16, names_to = "shift", values_to = "tip_amount") %>%
    filter(tip_amount != 0) %>%
    separate(shift, into = c("date", "shift"), sep = " ") %>%
    mutate(
      tip_amount                  = round(tip_amount, 2),
      tip_date                    = as.Date(date, "%Y-%m-%d"),
      entity_id                   = entityId,
      shift_id                    = getShiftId(shift),
      employee_r365_payroll_id    = payroll_id
    ) %>%
    select(-c(date, shift, payroll_id)) %>%
    arrange(tip_date, desc(shift_id), employee_r365_payroll_id)

  tips
}

# --- Parser: Restaurant 2 tip sheet -----------------------------------------
# Layout: "Server Detail Page" sheet. Dates are NOT in the file — they are
# inferred from the week-end date passed in as endDate (supplied externally,
# e.g. parsed from the filename). Sparse columns removed via janitor.

getDFFromBonesTipSheet <- function(file, entityId, shifts, endDate) {
  getShiftId <- Vectorize(function(name) {
    as.integer(shifts %>% filter(tolower(shift_name) == tolower(name)) %>% pull(shift_id))
  })

  # Reconstruct the 7-day sequence ending on endDate
  week_seq <- rev(seq(endDate, length = 7, by = "-1 day"))

  cnames <- c("employee_r365_payroll_id", "employee_name",
               unlist(lapply(week_seq, function(d) c(paste(d, "Lunch"), paste(d, "Dinner")))))
  # Note: seq[[6]] has no Lunch column in this layout (Bones closes for lunch Sat/Sun)
  cnames[length(cnames) - 1] <- paste(week_seq[[6]], "Dinner")

  tips <- readxl::read_excel(file, sheet = "Server Detail Page",
                               skip = 3, .name_repair = "unique_quiet") %>%
    select(1:14) %>%
    setNames(cnames) %>%
    filter(!is.na(employee_r365_payroll_id)) %>%
    janitor::remove_empty("cols") %>%
    mutate_at(3:ncol(.), as.numeric) %>%
    mutate_at(3:ncol(.), ~ replace_na(., 0)) %>%
    pivot_longer(cols = 3:ncol(.), names_to = "shift", values_to = "tip_amount") %>%
    filter(tip_amount != 0) %>%
    separate(shift, into = c("tip_date", "shift"), sep = " ") %>%
    mutate(
      employee_r365_payroll_id = as.integer(employee_r365_payroll_id),
      tip_date                 = as.Date(tip_date, "%Y-%m-%d"),
      shift_id                 = as.numeric(getShiftId(shift)),
      entity_id                = entityId
    ) %>%
    select(-shift) %>%
    arrange(tip_date, desc(shift_id), employee_r365_payroll_id)

  tips
}