#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File: src/scitex_writer/_utils/_figures.py
"""
Figure listing and conversion utilities.
"""
from __future__ import annotations
import logging
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
logger = logging.getLogger(__name__)
def list_figures(
project_dir: Union[str, Path],
extensions: Optional[List[str]] = None,
) -> List[Dict[str, Any]]:
"""
List all figures in a writer project.
Parameters
----------
project_dir : str or Path
Path to writer project directory
extensions : list of str, optional
Figure extensions to include. Default: common image formats.
Returns
-------
list of dict
List of figure info dicts with path, name, size, etc.
Examples
--------
>>> figures = list_figures("my_paper")
>>> for fig in figures:
... print(fig['name'], fig['size_kb'])
"""
project_dir = Path(project_dir)
if not project_dir.exists():
raise FileNotFoundError(f"Project directory not found: {project_dir}")
if extensions is None:
extensions = [
".png",
".jpg",
".jpeg",
".pdf",
".eps",
".svg",
".tif",
".tiff",
".ppt",
".pptx",
]
# Search in common figure locations
figure_dirs = [
project_dir / "00_shared" / "figures",
project_dir / "00_shared" / "figs",
project_dir / "01_manuscript" / "figures",
project_dir / "01_manuscript" / "figs",
project_dir / "02_supplementary" / "figures",
project_dir / "02_supplementary" / "figs",
]
figures = []
for fig_dir in figure_dirs:
if fig_dir.exists():
for ext in extensions:
for filepath in fig_dir.glob(f"*{ext}"):
stat = filepath.stat()
figures.append(
{
"path": str(filepath),
"name": filepath.name,
"stem": filepath.stem,
"extension": filepath.suffix,
"size_bytes": stat.st_size,
"size_kb": round(stat.st_size / 1024, 2),
"directory": str(fig_dir),
"relative_path": str(filepath.relative_to(project_dir)),
}
)
# Sort by name
figures.sort(key=lambda x: x["name"])
logger.info(f"Found {len(figures)} figures in {project_dir}")
return figures
def convert_figure(
input_path: Union[str, Path],
output_path: Union[str, Path],
dpi: int = 300,
quality: int = 95,
) -> Dict[str, Any]:
"""
Convert figure between formats.
Parameters
----------
input_path : str or Path
Input figure path
output_path : str or Path
Output figure path (format determined by extension)
dpi : int, default 300
Resolution for rasterization (PDF/SVG to raster)
quality : int, default 95
JPEG quality (1-100)
Returns
-------
dict
Conversion result with paths and sizes
Examples
--------
>>> convert_figure("fig1.pdf", "fig1.png", dpi=300)
>>> convert_figure("fig1.png", "fig1.jpg", quality=90)
"""
input_path = Path(input_path)
output_path = Path(output_path)
if not input_path.exists():
raise FileNotFoundError(f"Input file not found: {input_path}")
output_path.parent.mkdir(parents=True, exist_ok=True)
input_ext = input_path.suffix.lower()
output_ext = output_path.suffix.lower()
# Handle PDF input
if input_ext == ".pdf":
_convert_pdf_to_image(input_path, output_path, dpi, quality)
else:
# Standard image conversion with PIL
_convert_image_to_image(input_path, output_path, quality)
# Get output size
output_stat = output_path.stat()
return {
"input_path": str(input_path),
"output_path": str(output_path),
"input_size_kb": round(input_path.stat().st_size / 1024, 2),
"output_size_kb": round(output_stat.st_size / 1024, 2),
"dpi": dpi,
"quality": quality if output_ext in [".jpg", ".jpeg"] else None,
}
def _convert_pdf_to_image(
input_path: Path, output_path: Path, dpi: int, quality: int
) -> None:
"""Convert PDF to image format."""
try:
import fitz
from PIL import Image
except ImportError:
raise ImportError("PyMuPDF and Pillow required for PDF conversion")
doc = fitz.open(input_path)
page = doc[0]
zoom = dpi / 72.0
matrix = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=matrix)
output_ext = output_path.suffix.lower()
if output_ext in [".jpg", ".jpeg"]:
# Save as PNG first, then convert
import io
img_data = pix.tobytes("png")
img = Image.open(io.BytesIO(img_data))
if img.mode != "RGB":
img = img.convert("RGB")
img.save(str(output_path), "JPEG", quality=quality)
else:
pix.save(str(output_path))
doc.close()
def _convert_image_to_image(input_path: Path, output_path: Path, quality: int) -> None:
"""Convert image to image format using PIL."""
from PIL import Image
img = Image.open(input_path)
output_ext = output_path.suffix.lower()
# Handle format-specific conversions
if output_ext in [".jpg", ".jpeg"]:
if img.mode in ("RGBA", "LA", "P"):
background = Image.new("RGB", img.size, (255, 255, 255))
if img.mode == "P":
img = img.convert("RGBA")
if img.mode == "RGBA":
background.paste(img, mask=img.split()[-1])
else:
background.paste(img)
img = background
elif img.mode != "RGB":
img = img.convert("RGB")
img.save(str(output_path), "JPEG", quality=quality)
else:
img.save(str(output_path))
__all__ = ["list_figures", "convert_figure"]
# EOF