#!/bin/bash
# -*- coding: utf-8 -*-
# Timestamp: "2026-01-19 02:59:33 (ywatanabe)"
# File: ./scripts/shell/modules/process_tables.sh
ORIG_DIR="$(pwd)"
THIS_DIR="$(cd $(dirname ${BASH_SOURCE[0]}) && pwd)"
LOG_PATH="$THIS_DIR/.$(basename $0).log"
echo >"$LOG_PATH"
GIT_ROOT="$(git rev-parse --show-toplevel 2>/dev/null)"
GRAY='\033[0;90m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
RED='\033[0;31m'
NC='\033[0m' # No Color
echo_info() { echo -e "${GRAY}INFO: $1${NC}"; }
echo_success() { echo -e "${GREEN}SUCC: $1${NC}"; }
echo_warning() { echo -e "${YELLOW}WARN: $1${NC}"; }
echo_error() { echo -e "${RED}ERRO: $1${NC}"; }
echo_header() { echo_info "=== $1 ==="; }
# ---------------------------------------
# Quick check for --no_tables BEFORE expensive config loading
NO_TABLES_ARG="${1:-false}"
if [ "$NO_TABLES_ARG" = true ]; then
echo -e "\033[0;90mINFO: Running $0 ...\033[0m"
echo -e "\033[0;90mINFO: Skipping all table processing (--no_tables specified)\033[0m"
exit 0
fi
log_info() {
if [ "${SCITEX_LOG_LEVEL:-1}" -ge 2 ]; then
echo -e " \033[0;90m→ $1\033[0m"
fi
}
# Timestamp tracking for table processing
TABLE_STAGE_START=0
log_table_stage_start() {
TABLE_STAGE_START=$(date +%s)
local timestamp=$(date '+%H:%M:%S')
echo_info " [$timestamp] $1"
}
log_table_stage_end() {
local end=$(date +%s)
local elapsed=$((end - TABLE_STAGE_START))
local timestamp=$(date '+%H:%M:%S')
echo_success " [$timestamp] $1 (${elapsed}s)"
}
# Configurations
source ./config/load_config.sh $SCITEX_WRITER_DOC_TYPE
# Logging
touch "$LOG_PATH" >/dev/null 2>&1
echo
log_info "Running $0 ..."
function init_tables() {
# Cleanup and prepare directories
rm -f "$SCITEX_WRITER_TABLE_COMPILED_DIR"/*.tex
mkdir -p "$SCITEX_WRITER_TABLE_DIR" >/dev/null
mkdir -p "$SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR" >/dev/null
mkdir -p "$SCITEX_WRITER_TABLE_COMPILED_DIR" >/dev/null
echo >"$SCITEX_WRITER_TABLE_COMPILED_FILE"
}
function xlsx2csv_convert() {
# Convert Excel files to CSV if xlsx2csv is available
if command -v xlsx2csv >/dev/null 2>&1; then
for xlsx_file in "$SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR"/[0-9]*.{xlsx,xls}; do
[ -e "$xlsx_file" ] || continue
base_name=$(basename "$xlsx_file" | sed 's/\.\(xlsx\|xls\)$//')
csv_file="${SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR}/${base_name}.csv"
# Convert only if CSV doesn't exist or is older than Excel file
if [ ! -f "$csv_file" ] || [ "$xlsx_file" -nt "$csv_file" ]; then
echo_info " Converting $xlsx_file to CSV..."
xlsx2csv "$xlsx_file" "$csv_file"
if [ $? -eq 0 ]; then
echo_success " Created $csv_file from Excel"
else
echo_warning " Failed to convert $xlsx_file"
fi
fi
done
fi
}
function ensure_caption() {
# Create default captions for any table without one
for csv_file in "$SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR"/[0-9]*.csv; do
[ -e "$csv_file" ] || continue
local base_name=$(basename "$csv_file" .csv)
# Extract table number from filename like 01_seizure_count
local table_number=""
if [[ "$base_name" =~ ^([0-9]+)_ ]]; then
table_number="${BASH_REMATCH[1]}"
else
table_number="$base_name"
fi
local caption_file="${SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR}/${base_name}.tex"
if [ ! -f "$caption_file" ] && [ ! -L "$caption_file" ]; then
echo_info " Creating default caption for table $base_name"
mkdir -p $(dirname "$caption_file")
local rel_path="${caption_file#./}"
local escaped_path="${rel_path//_/\\_}"
cat >"$caption_file" <<EOF
%% Edit this file: $rel_path
\\caption{\\textbf{TABLE TITLE HERE}\\\\
\\smallskip
TABLE CAPTION HERE. Edit this caption at \\texttt{$escaped_path}.
}
EOF
fi
done
}
# Function removed - no longer needed for new naming convention
function check_csv_for_special_chars() {
# Check CSV file for potential problematic characters
local csv_file="$1"
local problem_chars="[&%$#_{}^~\\|<>]"
local problems=$(grep -n "$problem_chars" "$csv_file" 2>/dev/null || echo "")
if [ -n "$problems" ]; then
echo_warn " Potential LaTeX special characters found in $csv_file:"
echo -e ${YELLOW}
echo "$problems" | head -5
echo "These may need proper LaTeX escaping."
echo -e ${NC}
fi
}
function csv2tex() {
# Determine best method for CSV processing
local use_method="fallback"
# Check for csv2latex command (best option)
if command -v csv2latex >/dev/null 2>&1; then
use_method="csv2latex"
echo_info " Using csv2latex for table processing"
# Check if Python and pandas are available (second best)
elif command -v python3 >/dev/null 2>&1 && python3 -c "import pandas" 2>/dev/null; then
use_method="python"
echo_info " Using Python with pandas for table processing"
# Check if Python is available without pandas
elif command -v python3 >/dev/null 2>&1; then
use_method="python_basic"
echo_info " Using Python (basic) for table processing"
else
echo_warning " Using fallback AWK processing for tables"
fi
# Process each CSV file
for csv_file in "$SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR"/[0-9]*.csv; do
[ -e "$csv_file" ] || continue
base_name=$(basename "$csv_file" .csv)
caption_file="${SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR}/${base_name}.tex"
compiled_file="$SCITEX_WRITER_TABLE_COMPILED_DIR/${base_name}.tex"
case "$use_method" in
csv2latex)
# Use csv2latex command - generate basic table structure
# Note: csv2latex doesn't support captions or labels directly
{
local _tbl_title=""
local _tbl_num="${base_name%%_*}"
if [ -f "$caption_file" ] || [ -L "$caption_file" ]; then
_tbl_title=$(sed -n 's/.*\\textbf{\([^}]*\)}.*/\1/p' "$caption_file" | head -1)
[ -z "$_tbl_title" ] && _tbl_title=$(sed -n 's/.*\\caption{\([^.]*\)\..*/\1/p' "$caption_file" | head -1)
fi
_tbl_title="${_tbl_title%.}"
local _tbl_bookmark="Table ${_tbl_num}"
[ -n "$_tbl_title" ] && _tbl_bookmark="Table ${_tbl_num} --- ${_tbl_title}"
echo "\\pdfbookmark[2]{${_tbl_bookmark}}{table_${base_name}}"
echo "\\begin{table}[htbp]"
echo "\\centering"
echo "\\footnotesize"
# Generate the tabular environment using csv2latex
csv2latex --nohead --separator comma "$csv_file" 2>/dev/null
# Add caption if it exists
if [ -f "$caption_file" ] || [ -L "$caption_file" ]; then
cat "$caption_file"
else
echo "\\caption{Table ${base_name#0}: ${base_name#*_}}"
fi
# Add label only if caption doesn't already contain one
if ! grep -q '\\label{tab:' "$caption_file" 2>/dev/null; then
echo "\\label{tab:${base_name}}"
fi
echo "\\end{table}"
} >"$compiled_file"
# Check if csv2latex succeeded (look for \begin{tabular} with single backslash)
if [ -s "$compiled_file" ] && grep -q "\\\\begin{tabular}" "$compiled_file" 2>/dev/null; then
echo_success " $compiled_file compiled (using csv2latex)"
else
echo_warning " csv2latex failed, trying fallback"
csv2tex_single_fallback "$csv_file" "$compiled_file" "$caption_file"
fi
;;
python | python_basic)
# Use our Python script
local caption_arg=""
if [ -f "$caption_file" ] || [ -L "$caption_file" ]; then
temp_caption_file="/tmp/caption_${base_name}.txt"
cat "$caption_file" >"$temp_caption_file"
caption_arg="--caption-file $temp_caption_file"
fi
if [ -f "./scripts/python/csv_to_latex.py" ]; then
python3 ./scripts/python/csv_to_latex.py "$csv_file" "$compiled_file" $caption_arg
if [ $? -eq 0 ]; then
echo_success " $compiled_file compiled (using Python)"
else
echo_warning " Python processing failed, trying fallback"
csv2tex_single_fallback "$csv_file" "$compiled_file" "$caption_file"
fi
[ -f "$temp_caption_file" ] && rm -f "$temp_caption_file"
else
csv2tex_single_fallback "$csv_file" "$compiled_file" "$caption_file"
fi
;;
*)
# Use fallback AWK processing
csv2tex_single_fallback "$csv_file" "$compiled_file" "$caption_file"
;;
esac
done
}
function csv2tex_single_fallback() {
# Basic CSV processing for a single file (fallback method)
local csv_file="$1"
local compiled_file="$2"
local caption_file="$3"
base_name=$(basename "$csv_file" .csv)
# Extract table number from filename like 01_seizure_count
if [[ "$base_name" =~ ^([0-9]+)_ ]]; then
table_number="${BASH_REMATCH[1]}"
table_clean_name="${base_name#*_}"
else
table_number="$base_name"
table_clean_name="$base_name"
fi
# Pre-check CSV for problematic characters
check_csv_for_special_chars "$csv_file"
# Basic AWK processing (existing code)
num_columns=$(head -n 1 "$csv_file" | awk -F, '{print NF}')
num_rows=$(wc -l <"$csv_file")
max_rows=30
# Use standard font size for tables
# Standard academic paper convention: \footnotesize (8pt) for tables
fontsize="\\footnotesize"
# Check if truncation needed
truncated=false
if [ $num_rows -gt $((max_rows + 1)) ]; then # +1 for header
truncated=true
rows_omitted=$((num_rows - max_rows - 1))
fi
{
local _tbl_title=""
if [ -f "$caption_file" ] || [ -L "$caption_file" ]; then
_tbl_title=$(sed -n 's/.*\\textbf{\([^}]*\)}.*/\1/p' "$caption_file" | head -1)
[ -z "$_tbl_title" ] && _tbl_title=$(sed -n 's/.*\\caption{\([^.]*\)\..*/\1/p' "$caption_file" | head -1)
fi
_tbl_title="${_tbl_title%.}"
local _tbl_bookmark="Table ${table_number}"
[ -n "$_tbl_title" ] && _tbl_bookmark="Table ${table_number} --- ${_tbl_title}"
echo "\\pdfbookmark[2]{${_tbl_bookmark}}{table_${base_name}}"
echo "\\begin{table}[htbp]"
echo "\\centering"
echo "$fontsize"
# Adjust tabcolsep based on number of columns to fit width
if [ $num_columns -gt 8 ]; then
echo "\\setlength{\\tabcolsep}{2pt}" # Very tight for many columns
elif [ $num_columns -gt 6 ]; then
echo "\\setlength{\\tabcolsep}{3pt}" # Tight spacing
elif [ $num_columns -gt 4 ]; then
echo "\\setlength{\\tabcolsep}{4pt}" # Medium spacing
else
echo "\\setlength{\\tabcolsep}{6pt}" # Normal spacing
fi
# Use resizebox to ensure table fits within text width
echo "\\resizebox{\\textwidth}{!}{%"
echo "\\begin{tabular}{*{$num_columns}{l}}"
echo "\\toprule"
# Simple header processing
head -n 1 "$csv_file" | awk -F, '{
for (ii=1; ii<=NF; ii++) {
val = $ii
gsub(/[_]/, "\\\\_", val)
printf("\\textbf{%s}", val)
if (ii < NF) printf(" & ")
}
print "\\\\"
}'
echo "\\midrule"
# Process data with potential truncation
if [ "$truncated" = true ]; then
# Show first max_rows-2 rows
tail -n +2 "$csv_file" | head -n $((max_rows - 2)) | awk -F, '{
for (i=1; i<=NF; i++) {
val = $i
gsub(/[_]/, "\\\\_", val)
printf("%s", val)
if (i < NF) printf(" & ")
}
print "\\\\"
}'
# Add truncation indicator
echo "\\midrule"
echo "\\multicolumn{$num_columns}{c}{\\textit{... $rows_omitted rows omitted ...}} \\\\"
echo "\\midrule"
# Show last 2 rows
tail -n 2 "$csv_file" | awk -F, '{
for (i=1; i<=NF; i++) {
val = $i
gsub(/[_]/, "\\\\_", val)
printf("%s", val)
if (i < NF) printf(" & ")
}
print "\\\\"
}'
else
# Simple data processing without truncation
tail -n +2 "$csv_file" | awk -F, '{
for (i=1; i<=NF; i++) {
val = $i
gsub(/[_]/, "\\\\_", val)
printf("%s", val)
if (i < NF) printf(" & ")
}
print "\\\\"
}'
fi
echo "\\bottomrule"
echo "\\end{tabular}"
echo "}" # Close resizebox
if [ -f "$caption_file" ] || [ -L "$caption_file" ]; then
if [ "$truncated" = true ]; then
# Add truncation note to caption
cat "$caption_file" | sed 's/}$//'
echo "\\textit{Note: Table truncated to $max_rows rows from $num_rows total rows for display purposes.}}"
else
cat "$caption_file"
fi
else
echo "\\caption{Table ${table_number#0}: ${table_clean_name//_/ }"
if [ "$truncated" = true ]; then
echo "\\textit{Note: Table truncated to $max_rows rows from $num_rows total rows for display purposes.}"
fi
echo "}"
fi
# Add label only if caption doesn't already contain one
if ! grep -q '\\label{tab:' "$caption_file" 2>/dev/null; then
echo "\\label{tab:${base_name}}"
fi
echo "\\end{table}"
} >"$compiled_file"
echo_info " $compiled_file compiled (using fallback)"
}
function csv2tex_fallback() {
# Process all CSV files with fallback method
for csv_file in "$SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR"/[0-9]*.csv; do
[ -e "$csv_file" ] || continue
base_name=$(basename "$csv_file" .csv)
caption_file="${SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR}/${base_name}.tex"
compiled_file="$SCITEX_WRITER_TABLE_COMPILED_DIR/${base_name}.tex"
csv2tex_single_fallback "$csv_file" "$compiled_file" "$caption_file"
done
}
function create_table_header() {
# Create a header/template table when no real tables exist
local header_file="$SCITEX_WRITER_TABLE_COMPILED_DIR/00_Tables_Header.tex"
cat >"$header_file" <<'EOF'
% Template table when no actual tables are present
\begin{table}[htbp]
\centering
\caption{\textbf{Placeholder table demonstrating the table format for this manuscript template}\\
\smallskip
To add tables to your manuscript, place CSV files in \texttt{caption\_and\_media/} with format \texttt{XX\_description.csv}, create matching caption files \texttt{XX\_description.tex}, and reference in text using \texttt{Table\textasciitilde\textbackslash ref\{tab:XX\_description\}}. Example can be seen at \texttt{01\_seizure\_count.csv} with \texttt{01\_seizure\_count.tex}
}
\label{tab:0_Tables_Header}
\begin{tabular}{p{0.3\textwidth}p{0.6\textwidth}}
\toprule
\textbf{Step} & \textbf{Instructions} \\
\midrule
1. Add CSV & Place file like \texttt{01\_data.csv} in \texttt{caption\_and\_media/} \\
2. Add Caption & Create \texttt{01\_data.tex} with table caption \\
3. Compile & Run \texttt{./compile -m} to process tables \\
4. Reference & Use \texttt{\textbackslash ref\{tab:01\_data\}} in manuscript \\
\bottomrule
\end{tabular}
\end{table}
EOF
echo_info " Created table header template with instructions"
}
function gather_table_tex_files() {
# Gather all table tex files into the final compiled file
output_file="${SCITEX_WRITER_TABLE_COMPILED_FILE}"
rm -f "$output_file" >/dev/null 2>&1
echo "% Auto-generated file containing all table inputs" >"$output_file"
echo "% Generated by gather_table_tex_files()" >>"$output_file"
echo "" >>"$output_file"
# First check if there are any real table files
local table_files=($(find "$SCITEX_WRITER_TABLE_COMPILED_DIR" -maxdepth 1 -name "[0-9]*.tex" 2>/dev/null | grep -v "00_Tables_Header.tex" | sort))
local has_real_tables=false
if [ ${#table_files[@]} -gt 0 ]; then
has_real_tables=true
fi
# If no real tables, create the header/template
if [ "$has_real_tables" = false ]; then
create_table_header
fi
# Count available tables
table_count=0
for table_tex in $(find "$SCITEX_WRITER_TABLE_COMPILED_DIR" -name "[0-9]*.tex" 2>/dev/null | sort); do
if [ -f "$table_tex" ] || [ -L "$table_tex" ]; then
# Skip header if we have real tables
local basename=$(basename "$table_tex")
if [[ "$basename" == "00_Tables_Header.tex" ]] && [ "$has_real_tables" = true ]; then
continue
fi
# For header template when no real tables exist
if [[ "$basename" == "00_Tables_Header.tex" ]] && [ "$has_real_tables" = false ]; then
echo "\\input{$table_tex}" >>"$output_file"
else
# For real tables, input them directly
echo "% Table from: $basename" >>"$output_file"
echo "\\input{$table_tex}" >>"$output_file"
fi
echo "" >>"$output_file"
table_count=$((table_count + 1))
fi
done
if [ $table_count -eq 0 ]; then
echo_warning " No tables were found to compile."
else
echo_success " $table_count tables compiled"
fi
}
# Main execution
log_table_stage_start "Initializing tables"
init_tables
log_table_stage_end "Initializing tables"
log_table_stage_start "Converting XLSX to CSV"
xlsx2csv_convert # Convert Excel files to CSV first
log_table_stage_end "Converting XLSX to CSV"
log_table_stage_start "Ensuring captions exist"
ensure_caption
log_table_stage_end "Ensuring captions exist"
log_table_stage_start "Converting CSV to LaTeX"
csv2tex
log_table_stage_end "Converting CSV to LaTeX"
log_table_stage_start "Gathering table files"
gather_table_tex_files
log_table_stage_end "Gathering table files"
# EOF