test-5

ywatanabe / test-5 / / / / /
No commits yet
Blame
process_tables.sh • 18.5 KB
Raw
#!/bin/bash
# -*- coding: utf-8 -*-
# Timestamp: "2026-01-19 02:59:33 (ywatanabe)"
# File: ./scripts/shell/modules/process_tables.sh

ORIG_DIR="$(pwd)"
THIS_DIR="$(cd $(dirname ${BASH_SOURCE[0]}) && pwd)"
LOG_PATH="$THIS_DIR/.$(basename $0).log"
echo >"$LOG_PATH"

GIT_ROOT="$(git rev-parse --show-toplevel 2>/dev/null)"

GRAY='\033[0;90m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
RED='\033[0;31m'
NC='\033[0m' # No Color

echo_info() { echo -e "${GRAY}INFO: $1${NC}"; }
echo_success() { echo -e "${GREEN}SUCC: $1${NC}"; }
echo_warning() { echo -e "${YELLOW}WARN: $1${NC}"; }
echo_error() { echo -e "${RED}ERRO: $1${NC}"; }
echo_header() { echo_info "=== $1 ==="; }
# ---------------------------------------

# Quick check for --no_tables BEFORE expensive config loading
NO_TABLES_ARG="${1:-false}"
if [ "$NO_TABLES_ARG" = true ]; then
    echo -e "\033[0;90mINFO: Running $0 ...\033[0m"
    echo -e "\033[0;90mINFO:     Skipping all table processing (--no_tables specified)\033[0m"
    exit 0
fi

log_info() {
    if [ "${SCITEX_LOG_LEVEL:-1}" -ge 2 ]; then
        echo -e "  \033[0;90m→ $1\033[0m"
    fi
}

# Timestamp tracking for table processing
TABLE_STAGE_START=0
log_table_stage_start() {
    TABLE_STAGE_START=$(date +%s)
    local timestamp=$(date '+%H:%M:%S')
    echo_info "  [$timestamp] $1"
}

log_table_stage_end() {
    local end=$(date +%s)
    local elapsed=$((end - TABLE_STAGE_START))
    local timestamp=$(date '+%H:%M:%S')
    echo_success "  [$timestamp] $1 (${elapsed}s)"
}

# Configurations
source ./config/load_config.sh $SCITEX_WRITER_DOC_TYPE

# Logging
touch "$LOG_PATH" >/dev/null 2>&1
echo
log_info "Running $0 ..."

function init_tables() {
    # Cleanup and prepare directories
    rm -f "$SCITEX_WRITER_TABLE_COMPILED_DIR"/*.tex
    mkdir -p "$SCITEX_WRITER_TABLE_DIR" >/dev/null
    mkdir -p "$SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR" >/dev/null
    mkdir -p "$SCITEX_WRITER_TABLE_COMPILED_DIR" >/dev/null
    echo >"$SCITEX_WRITER_TABLE_COMPILED_FILE"
}

function xlsx2csv_convert() {
    # Convert Excel files to CSV if xlsx2csv is available
    if command -v xlsx2csv >/dev/null 2>&1; then
        for xlsx_file in "$SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR"/[0-9]*.{xlsx,xls}; do
            [ -e "$xlsx_file" ] || continue

            base_name=$(basename "$xlsx_file" | sed 's/\.\(xlsx\|xls\)$//')
            csv_file="${SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR}/${base_name}.csv"

            # Convert only if CSV doesn't exist or is older than Excel file
            if [ ! -f "$csv_file" ] || [ "$xlsx_file" -nt "$csv_file" ]; then
                echo_info "    Converting $xlsx_file to CSV..."
                xlsx2csv "$xlsx_file" "$csv_file"
                if [ $? -eq 0 ]; then
                    echo_success "    Created $csv_file from Excel"
                else
                    echo_warning "    Failed to convert $xlsx_file"
                fi
            fi
        done
    fi
}

function ensure_caption() {
    # Create default captions for any table without one
    for csv_file in "$SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR"/[0-9]*.csv; do
        [ -e "$csv_file" ] || continue
        local base_name=$(basename "$csv_file" .csv)
        # Extract table number from filename like 01_seizure_count
        local table_number=""
        if [[ "$base_name" =~ ^([0-9]+)_ ]]; then
            table_number="${BASH_REMATCH[1]}"
        else
            table_number="$base_name"
        fi
        local caption_file="${SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR}/${base_name}.tex"

        if [ ! -f "$caption_file" ] && [ ! -L "$caption_file" ]; then
            echo_info "    Creating default caption for table $base_name"
            mkdir -p $(dirname "$caption_file")
            local rel_path="${caption_file#./}"
            local escaped_path="${rel_path//_/\\_}"
            cat >"$caption_file" <<EOF
%% Edit this file: $rel_path
\\caption{\\textbf{TABLE TITLE HERE}\\\\
\\smallskip
TABLE CAPTION HERE. Edit this caption at \\texttt{$escaped_path}.
}
EOF
        fi
    done
}

# Function removed - no longer needed for new naming convention

function check_csv_for_special_chars() {
    # Check CSV file for potential problematic characters
    local csv_file="$1"
    local problem_chars="[&%$#_{}^~\\|<>]"
    local problems=$(grep -n "$problem_chars" "$csv_file" 2>/dev/null || echo "")
    if [ -n "$problems" ]; then
        echo_warn "    Potential LaTeX special characters found in $csv_file:"
        echo -e ${YELLOW}
        echo "$problems" | head -5
        echo "These may need proper LaTeX escaping."
        echo -e ${NC}
    fi
}

function csv2tex() {
    # Determine best method for CSV processing
    local use_method="fallback"

    # Check for csv2latex command (best option)
    if command -v csv2latex >/dev/null 2>&1; then
        use_method="csv2latex"
        echo_info "    Using csv2latex for table processing"
    # Check if Python and pandas are available (second best)
    elif command -v python3 >/dev/null 2>&1 && python3 -c "import pandas" 2>/dev/null; then
        use_method="python"
        echo_info "    Using Python with pandas for table processing"
    # Check if Python is available without pandas
    elif command -v python3 >/dev/null 2>&1; then
        use_method="python_basic"
        echo_info "    Using Python (basic) for table processing"
    else
        echo_warning "    Using fallback AWK processing for tables"
    fi

    # Process each CSV file
    for csv_file in "$SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR"/[0-9]*.csv; do
        [ -e "$csv_file" ] || continue

        base_name=$(basename "$csv_file" .csv)
        caption_file="${SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR}/${base_name}.tex"
        compiled_file="$SCITEX_WRITER_TABLE_COMPILED_DIR/${base_name}.tex"

        case "$use_method" in
        csv2latex)
            # Use csv2latex command - generate basic table structure
            # Note: csv2latex doesn't support captions or labels directly
            {
                local _tbl_title=""
                local _tbl_num="${base_name%%_*}"
                if [ -f "$caption_file" ] || [ -L "$caption_file" ]; then
                    _tbl_title=$(sed -n 's/.*\\textbf{\([^}]*\)}.*/\1/p' "$caption_file" | head -1)
                    [ -z "$_tbl_title" ] && _tbl_title=$(sed -n 's/.*\\caption{\([^.]*\)\..*/\1/p' "$caption_file" | head -1)
                fi
                _tbl_title="${_tbl_title%.}"
                local _tbl_bookmark="Table ${_tbl_num}"
                [ -n "$_tbl_title" ] && _tbl_bookmark="Table ${_tbl_num} --- ${_tbl_title}"
                echo "\\pdfbookmark[2]{${_tbl_bookmark}}{table_${base_name}}"
                echo "\\begin{table}[htbp]"
                echo "\\centering"
                echo "\\footnotesize"

                # Generate the tabular environment using csv2latex
                csv2latex --nohead --separator comma "$csv_file" 2>/dev/null

                # Add caption if it exists
                if [ -f "$caption_file" ] || [ -L "$caption_file" ]; then
                    cat "$caption_file"
                else
                    echo "\\caption{Table ${base_name#0}: ${base_name#*_}}"
                fi

                # Add label only if caption doesn't already contain one
                if ! grep -q '\\label{tab:' "$caption_file" 2>/dev/null; then
                    echo "\\label{tab:${base_name}}"
                fi
                echo "\\end{table}"
            } >"$compiled_file"

            # Check if csv2latex succeeded (look for \begin{tabular} with single backslash)
            if [ -s "$compiled_file" ] && grep -q "\\\\begin{tabular}" "$compiled_file" 2>/dev/null; then
                echo_success "    $compiled_file compiled (using csv2latex)"
            else
                echo_warning "    csv2latex failed, trying fallback"
                csv2tex_single_fallback "$csv_file" "$compiled_file" "$caption_file"
            fi
            ;;

        python | python_basic)
            # Use our Python script
            local caption_arg=""
            if [ -f "$caption_file" ] || [ -L "$caption_file" ]; then
                temp_caption_file="/tmp/caption_${base_name}.txt"
                cat "$caption_file" >"$temp_caption_file"
                caption_arg="--caption-file $temp_caption_file"
            fi

            if [ -f "./scripts/python/csv_to_latex.py" ]; then
                python3 ./scripts/python/csv_to_latex.py "$csv_file" "$compiled_file" $caption_arg
                if [ $? -eq 0 ]; then
                    echo_success "    $compiled_file compiled (using Python)"
                else
                    echo_warning "    Python processing failed, trying fallback"
                    csv2tex_single_fallback "$csv_file" "$compiled_file" "$caption_file"
                fi
                [ -f "$temp_caption_file" ] && rm -f "$temp_caption_file"
            else
                csv2tex_single_fallback "$csv_file" "$compiled_file" "$caption_file"
            fi
            ;;

        *)
            # Use fallback AWK processing
            csv2tex_single_fallback "$csv_file" "$compiled_file" "$caption_file"
            ;;
        esac
    done
}

function csv2tex_single_fallback() {
    # Basic CSV processing for a single file (fallback method)
    local csv_file="$1"
    local compiled_file="$2"
    local caption_file="$3"

    base_name=$(basename "$csv_file" .csv)
    # Extract table number from filename like 01_seizure_count
    if [[ "$base_name" =~ ^([0-9]+)_ ]]; then
        table_number="${BASH_REMATCH[1]}"
        table_clean_name="${base_name#*_}"
    else
        table_number="$base_name"
        table_clean_name="$base_name"
    fi

    # Pre-check CSV for problematic characters
    check_csv_for_special_chars "$csv_file"

    # Basic AWK processing (existing code)
    num_columns=$(head -n 1 "$csv_file" | awk -F, '{print NF}')
    num_rows=$(wc -l <"$csv_file")
    max_rows=30

    # Use standard font size for tables
    # Standard academic paper convention: \footnotesize (8pt) for tables
    fontsize="\\footnotesize"

    # Check if truncation needed
    truncated=false
    if [ $num_rows -gt $((max_rows + 1)) ]; then # +1 for header
        truncated=true
        rows_omitted=$((num_rows - max_rows - 1))
    fi

    {
        local _tbl_title=""
        if [ -f "$caption_file" ] || [ -L "$caption_file" ]; then
            _tbl_title=$(sed -n 's/.*\\textbf{\([^}]*\)}.*/\1/p' "$caption_file" | head -1)
            [ -z "$_tbl_title" ] && _tbl_title=$(sed -n 's/.*\\caption{\([^.]*\)\..*/\1/p' "$caption_file" | head -1)
        fi
        _tbl_title="${_tbl_title%.}"
        local _tbl_bookmark="Table ${table_number}"
        [ -n "$_tbl_title" ] && _tbl_bookmark="Table ${table_number} --- ${_tbl_title}"
        echo "\\pdfbookmark[2]{${_tbl_bookmark}}{table_${base_name}}"
        echo "\\begin{table}[htbp]"
        echo "\\centering"
        echo "$fontsize"

        # Adjust tabcolsep based on number of columns to fit width
        if [ $num_columns -gt 8 ]; then
            echo "\\setlength{\\tabcolsep}{2pt}" # Very tight for many columns
        elif [ $num_columns -gt 6 ]; then
            echo "\\setlength{\\tabcolsep}{3pt}" # Tight spacing
        elif [ $num_columns -gt 4 ]; then
            echo "\\setlength{\\tabcolsep}{4pt}" # Medium spacing
        else
            echo "\\setlength{\\tabcolsep}{6pt}" # Normal spacing
        fi

        # Use resizebox to ensure table fits within text width
        echo "\\resizebox{\\textwidth}{!}{%"
        echo "\\begin{tabular}{*{$num_columns}{l}}"
        echo "\\toprule"

        # Simple header processing
        head -n 1 "$csv_file" | awk -F, '{
            for (ii=1; ii<=NF; ii++) {
                val = $ii
                gsub(/[_]/, "\\\\_", val)
                printf("\\textbf{%s}", val)
                if (ii < NF) printf(" & ")
            }
            print "\\\\"
        }'

        echo "\\midrule"

        # Process data with potential truncation
        if [ "$truncated" = true ]; then
            # Show first max_rows-2 rows
            tail -n +2 "$csv_file" | head -n $((max_rows - 2)) | awk -F, '{
                for (i=1; i<=NF; i++) {
                    val = $i
                    gsub(/[_]/, "\\\\_", val)
                    printf("%s", val)
                    if (i < NF) printf(" & ")
                }
                print "\\\\"
            }'

            # Add truncation indicator
            echo "\\midrule"
            echo "\\multicolumn{$num_columns}{c}{\\textit{... $rows_omitted rows omitted ...}} \\\\"
            echo "\\midrule"

            # Show last 2 rows
            tail -n 2 "$csv_file" | awk -F, '{
                for (i=1; i<=NF; i++) {
                    val = $i
                    gsub(/[_]/, "\\\\_", val)
                    printf("%s", val)
                    if (i < NF) printf(" & ")
                }
                print "\\\\"
            }'
        else
            # Simple data processing without truncation
            tail -n +2 "$csv_file" | awk -F, '{
                for (i=1; i<=NF; i++) {
                    val = $i
                    gsub(/[_]/, "\\\\_", val)
                    printf("%s", val)
                    if (i < NF) printf(" & ")
                }
                print "\\\\"
            }'
        fi

        echo "\\bottomrule"
        echo "\\end{tabular}"
        echo "}" # Close resizebox

        if [ -f "$caption_file" ] || [ -L "$caption_file" ]; then
            if [ "$truncated" = true ]; then
                # Add truncation note to caption
                cat "$caption_file" | sed 's/}$//'
                echo "\\textit{Note: Table truncated to $max_rows rows from $num_rows total rows for display purposes.}}"
            else
                cat "$caption_file"
            fi
        else
            echo "\\caption{Table ${table_number#0}: ${table_clean_name//_/ }"
            if [ "$truncated" = true ]; then
                echo "\\textit{Note: Table truncated to $max_rows rows from $num_rows total rows for display purposes.}"
            fi
            echo "}"
        fi

        # Add label only if caption doesn't already contain one
        if ! grep -q '\\label{tab:' "$caption_file" 2>/dev/null; then
            echo "\\label{tab:${base_name}}"
        fi
        echo "\\end{table}"
    } >"$compiled_file"

    echo_info "    $compiled_file compiled (using fallback)"
}

function csv2tex_fallback() {
    # Process all CSV files with fallback method
    for csv_file in "$SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR"/[0-9]*.csv; do
        [ -e "$csv_file" ] || continue
        base_name=$(basename "$csv_file" .csv)
        caption_file="${SCITEX_WRITER_TABLE_CAPTION_MEDIA_DIR}/${base_name}.tex"
        compiled_file="$SCITEX_WRITER_TABLE_COMPILED_DIR/${base_name}.tex"
        csv2tex_single_fallback "$csv_file" "$compiled_file" "$caption_file"
    done
}

function create_table_header() {
    # Create a header/template table when no real tables exist
    local header_file="$SCITEX_WRITER_TABLE_COMPILED_DIR/00_Tables_Header.tex"

    cat >"$header_file" <<'EOF'
% Template table when no actual tables are present
\begin{table}[htbp]
    \centering
      \caption{\textbf{Placeholder table demonstrating the table format for this manuscript template}\\
    \smallskip
    To add tables to your manuscript, place CSV files in \texttt{caption\_and\_media/} with format \texttt{XX\_description.csv}, create matching caption files \texttt{XX\_description.tex}, and reference in text using \texttt{Table\textasciitilde\textbackslash ref\{tab:XX\_description\}}. Example can be seen at \texttt{01\_seizure\_count.csv} with \texttt{01\_seizure\_count.tex}
    }
    \label{tab:0_Tables_Header}
    \begin{tabular}{p{0.3\textwidth}p{0.6\textwidth}}
        \toprule
        \textbf{Step} & \textbf{Instructions} \\
        \midrule
        1. Add CSV & Place file like \texttt{01\_data.csv} in \texttt{caption\_and\_media/} \\
        2. Add Caption & Create \texttt{01\_data.tex} with table caption \\
        3. Compile & Run \texttt{./compile -m} to process tables \\
        4. Reference & Use \texttt{\textbackslash ref\{tab:01\_data\}} in manuscript \\
        \bottomrule
    \end{tabular}
\end{table}
EOF
    echo_info "    Created table header template with instructions"
}

function gather_table_tex_files() {
    # Gather all table tex files into the final compiled file
    output_file="${SCITEX_WRITER_TABLE_COMPILED_FILE}"
    rm -f "$output_file" >/dev/null 2>&1
    echo "% Auto-generated file containing all table inputs" >"$output_file"
    echo "% Generated by gather_table_tex_files()" >>"$output_file"
    echo "" >>"$output_file"

    # First check if there are any real table files
    local table_files=($(find "$SCITEX_WRITER_TABLE_COMPILED_DIR" -maxdepth 1 -name "[0-9]*.tex" 2>/dev/null | grep -v "00_Tables_Header.tex" | sort))
    local has_real_tables=false
    if [ ${#table_files[@]} -gt 0 ]; then
        has_real_tables=true
    fi

    # If no real tables, create the header/template
    if [ "$has_real_tables" = false ]; then
        create_table_header
    fi

    # Count available tables
    table_count=0
    for table_tex in $(find "$SCITEX_WRITER_TABLE_COMPILED_DIR" -name "[0-9]*.tex" 2>/dev/null | sort); do
        if [ -f "$table_tex" ] || [ -L "$table_tex" ]; then
            # Skip header if we have real tables
            local basename=$(basename "$table_tex")
            if [[ "$basename" == "00_Tables_Header.tex" ]] && [ "$has_real_tables" = true ]; then
                continue
            fi

            # For header template when no real tables exist
            if [[ "$basename" == "00_Tables_Header.tex" ]] && [ "$has_real_tables" = false ]; then
                echo "\\input{$table_tex}" >>"$output_file"
            else
                # For real tables, input them directly
                echo "% Table from: $basename" >>"$output_file"
                echo "\\input{$table_tex}" >>"$output_file"
            fi
            echo "" >>"$output_file"
            table_count=$((table_count + 1))
        fi
    done

    if [ $table_count -eq 0 ]; then
        echo_warning "    No tables were found to compile."
    else
        echo_success "    $table_count tables compiled"
    fi
}

# Main execution
log_table_stage_start "Initializing tables"
init_tables
log_table_stage_end "Initializing tables"

log_table_stage_start "Converting XLSX to CSV"
xlsx2csv_convert # Convert Excel files to CSV first
log_table_stage_end "Converting XLSX to CSV"

log_table_stage_start "Ensuring captions exist"
ensure_caption
log_table_stage_end "Ensuring captions exist"

log_table_stage_start "Converting CSV to LaTeX"
csv2tex
log_table_stage_end "Converting CSV to LaTeX"

log_table_stage_start "Gathering table files"
gather_table_tex_files
log_table_stage_end "Gathering table files"

# EOF