#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File: src/scitex_writer/_utils/_csv_latex.py
"""
CSV <-> LaTeX table conversion utilities.
"""
from __future__ import annotations
import logging
import re
from pathlib import Path
from typing import Optional, Union
logger = logging.getLogger(__name__)
def csv2latex(
csv_path: Union[str, Path],
output_path: Optional[Union[str, Path]] = None,
caption: Optional[str] = None,
label: Optional[str] = None,
escape: bool = True,
longtable: bool = False,
index: bool = False,
column_format: Optional[str] = None,
**kwargs,
) -> str:
"""
Convert CSV file to LaTeX table.
Parameters
----------
csv_path : str or Path
Path to CSV file
output_path : str or Path, optional
If provided, save LaTeX to this file
caption : str, optional
Table caption
label : str, optional
Table label for referencing
escape : bool, default True
Escape special LaTeX characters
longtable : bool, default False
Use longtable environment for multi-page tables
index : bool, default False
Include DataFrame index in output
column_format : str, optional
LaTeX column format (e.g., 'lcr', 'l|cc|r')
**kwargs
Additional arguments passed to pandas.DataFrame.to_latex()
Returns
-------
str
LaTeX table string
Examples
--------
>>> latex = csv2latex("data.csv", caption="Results", label="tab:results")
>>> csv2latex("data.csv", "table.tex") # Save to file
"""
import pandas as pd
csv_path = Path(csv_path)
if not csv_path.exists():
raise FileNotFoundError(f"CSV file not found: {csv_path}")
# Load CSV
df = pd.read_csv(csv_path)
# Build to_latex arguments
latex_kwargs = {
"index": index,
"escape": escape,
"caption": caption,
"label": label,
}
if longtable:
latex_kwargs["longtable"] = True
if column_format:
latex_kwargs["column_format"] = column_format
# Merge with user kwargs
latex_kwargs.update(kwargs)
# Convert to LaTeX
latex_content = df.to_latex(**latex_kwargs)
# Save if output path provided
if output_path:
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as f:
f.write(latex_content)
logger.info(f"Saved LaTeX table to {output_path}")
return latex_content
def latex2csv(
latex_path: Union[str, Path],
output_path: Optional[Union[str, Path]] = None,
table_index: int = 0,
):
"""
Convert LaTeX table to CSV/DataFrame.
Parameters
----------
latex_path : str or Path
Path to LaTeX file containing table
output_path : str or Path, optional
If provided, save CSV to this file
table_index : int, default 0
Which table to extract if multiple tables exist
Returns
-------
pd.DataFrame
Extracted table as DataFrame
Examples
--------
>>> df = latex2csv("table.tex")
>>> df = latex2csv("table.tex", "output.csv")
"""
import pandas as pd
latex_path = Path(latex_path)
if not latex_path.exists():
raise FileNotFoundError(f"LaTeX file not found: {latex_path}")
with open(latex_path) as f:
content = f.read()
# Extract table content (between \begin{tabular} and \end{tabular})
# Also handle longtable
patterns = [
r"\\begin\{tabular\}.*?\n(.*?)\\end\{tabular\}",
r"\\begin\{longtable\}.*?\n(.*?)\\end\{longtable\}",
]
tables = []
for pattern in patterns:
matches = re.findall(pattern, content, re.DOTALL)
tables.extend(matches)
if not tables:
raise ValueError("No table found in LaTeX file")
if table_index >= len(tables):
raise IndexError(
f"Table index {table_index} out of range. Found {len(tables)} tables."
)
table_content = tables[table_index]
# Parse table rows
rows = []
for line in table_content.split("\n"):
line = line.strip()
if not line or line.startswith("\\"):
continue
if "&" in line:
# Remove trailing \\ and split by &
line = re.sub(r"\\\\.*$", "", line)
cells = [cell.strip() for cell in line.split("&")]
rows.append(cells)
if not rows:
raise ValueError("Could not parse table rows")
# Create DataFrame (first row as header if it looks like headers)
if len(rows) > 1:
df = pd.DataFrame(rows[1:], columns=rows[0])
else:
df = pd.DataFrame(rows)
# Save if output path provided
if output_path:
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(output_path, index=False)
logger.info(f"Saved CSV to {output_path}")
return df
__all__ = ["csv2latex", "latex2csv"]
# EOF