Loading...
No commits yet
Not committed History
Blame
_figures.py • 6.0 KB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File: src/scitex_writer/_utils/_figures.py

"""
Figure listing and conversion utilities.
"""

from __future__ import annotations

import logging
from pathlib import Path
from typing import Any, Dict, List, Optional, Union

logger = logging.getLogger(__name__)


def list_figures(
    project_dir: Union[str, Path],
    extensions: Optional[List[str]] = None,
) -> List[Dict[str, Any]]:
    """
    List all figures in a writer project.

    Parameters
    ----------
    project_dir : str or Path
        Path to writer project directory
    extensions : list of str, optional
        Figure extensions to include. Default: common image formats.

    Returns
    -------
    list of dict
        List of figure info dicts with path, name, size, etc.

    Examples
    --------
    >>> figures = list_figures("my_paper")
    >>> for fig in figures:
    ...     print(fig['name'], fig['size_kb'])
    """
    project_dir = Path(project_dir)
    if not project_dir.exists():
        raise FileNotFoundError(f"Project directory not found: {project_dir}")

    if extensions is None:
        extensions = [
            ".png",
            ".jpg",
            ".jpeg",
            ".pdf",
            ".eps",
            ".svg",
            ".tif",
            ".tiff",
            ".ppt",
            ".pptx",
        ]

    # Search in common figure locations
    figure_dirs = [
        project_dir / "00_shared" / "figures",
        project_dir / "00_shared" / "figs",
        project_dir / "01_manuscript" / "figures",
        project_dir / "01_manuscript" / "figs",
        project_dir / "02_supplementary" / "figures",
        project_dir / "02_supplementary" / "figs",
    ]

    figures = []
    for fig_dir in figure_dirs:
        if fig_dir.exists():
            for ext in extensions:
                for filepath in fig_dir.glob(f"*{ext}"):
                    stat = filepath.stat()
                    figures.append(
                        {
                            "path": str(filepath),
                            "name": filepath.name,
                            "stem": filepath.stem,
                            "extension": filepath.suffix,
                            "size_bytes": stat.st_size,
                            "size_kb": round(stat.st_size / 1024, 2),
                            "directory": str(fig_dir),
                            "relative_path": str(filepath.relative_to(project_dir)),
                        }
                    )

    # Sort by name
    figures.sort(key=lambda x: x["name"])

    logger.info(f"Found {len(figures)} figures in {project_dir}")
    return figures


def convert_figure(
    input_path: Union[str, Path],
    output_path: Union[str, Path],
    dpi: int = 300,
    quality: int = 95,
) -> Dict[str, Any]:
    """
    Convert figure between formats.

    Parameters
    ----------
    input_path : str or Path
        Input figure path
    output_path : str or Path
        Output figure path (format determined by extension)
    dpi : int, default 300
        Resolution for rasterization (PDF/SVG to raster)
    quality : int, default 95
        JPEG quality (1-100)

    Returns
    -------
    dict
        Conversion result with paths and sizes

    Examples
    --------
    >>> convert_figure("fig1.pdf", "fig1.png", dpi=300)
    >>> convert_figure("fig1.png", "fig1.jpg", quality=90)
    """

    input_path = Path(input_path)
    output_path = Path(output_path)

    if not input_path.exists():
        raise FileNotFoundError(f"Input file not found: {input_path}")

    output_path.parent.mkdir(parents=True, exist_ok=True)

    input_ext = input_path.suffix.lower()
    output_ext = output_path.suffix.lower()

    # Handle PDF input
    if input_ext == ".pdf":
        _convert_pdf_to_image(input_path, output_path, dpi, quality)
    else:
        # Standard image conversion with PIL
        _convert_image_to_image(input_path, output_path, quality)

    # Get output size
    output_stat = output_path.stat()

    return {
        "input_path": str(input_path),
        "output_path": str(output_path),
        "input_size_kb": round(input_path.stat().st_size / 1024, 2),
        "output_size_kb": round(output_stat.st_size / 1024, 2),
        "dpi": dpi,
        "quality": quality if output_ext in [".jpg", ".jpeg"] else None,
    }


def _convert_pdf_to_image(
    input_path: Path, output_path: Path, dpi: int, quality: int
) -> None:
    """Convert PDF to image format."""
    try:
        import fitz
        from PIL import Image
    except ImportError:
        raise ImportError("PyMuPDF and Pillow required for PDF conversion")

    doc = fitz.open(input_path)
    page = doc[0]
    zoom = dpi / 72.0
    matrix = fitz.Matrix(zoom, zoom)
    pix = page.get_pixmap(matrix=matrix)

    output_ext = output_path.suffix.lower()
    if output_ext in [".jpg", ".jpeg"]:
        # Save as PNG first, then convert
        import io

        img_data = pix.tobytes("png")
        img = Image.open(io.BytesIO(img_data))
        if img.mode != "RGB":
            img = img.convert("RGB")
        img.save(str(output_path), "JPEG", quality=quality)
    else:
        pix.save(str(output_path))

    doc.close()


def _convert_image_to_image(input_path: Path, output_path: Path, quality: int) -> None:
    """Convert image to image format using PIL."""
    from PIL import Image

    img = Image.open(input_path)
    output_ext = output_path.suffix.lower()

    # Handle format-specific conversions
    if output_ext in [".jpg", ".jpeg"]:
        if img.mode in ("RGBA", "LA", "P"):
            background = Image.new("RGB", img.size, (255, 255, 255))
            if img.mode == "P":
                img = img.convert("RGBA")
            if img.mode == "RGBA":
                background.paste(img, mask=img.split()[-1])
            else:
                background.paste(img)
            img = background
        elif img.mode != "RGB":
            img = img.convert("RGB")
        img.save(str(output_path), "JPEG", quality=quality)
    else:
        img.save(str(output_path))


__all__ = ["list_figures", "convert_figure"]

# EOF