Loading...
No commits yet
Not committed History
Blame
test_migration.py • 16.0 KB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File: tests/python/test_migration.py

"""Tests for the migration module (Overleaf import/export)."""

import zipfile
from pathlib import Path

import pytest

# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture
def simple_overleaf_zip(tmp_path):
    """Create a minimal Overleaf-style ZIP for testing."""
    project_dir = tmp_path / "overleaf_project"
    project_dir.mkdir()

    # main.tex with \documentclass and inline metadata
    (project_dir / "main.tex").write_text(
        r"""\documentclass{article}
\title{Test Paper Title}
\author{John Doe}
\begin{document}
\maketitle
\input{sections/introduction}
\input{sections/methods}
\input{sections/results}
\input{sections/discussion}
\bibliography{refs}
\end{document}
""",
        encoding="utf-8",
    )

    # Sections
    sections = project_dir / "sections"
    sections.mkdir()
    (sections / "introduction.tex").write_text(
        r"\section{Introduction}" "\nThis is the introduction.",
        encoding="utf-8",
    )
    (sections / "methods.tex").write_text(
        r"\section{Methods}" "\nThese are the methods.",
        encoding="utf-8",
    )
    (sections / "results.tex").write_text(
        r"\section{Results}" "\nThese are the results.",
        encoding="utf-8",
    )
    (sections / "discussion.tex").write_text(
        r"\section{Discussion}" "\nThis is the discussion.",
        encoding="utf-8",
    )

    # Bibliography
    (project_dir / "refs.bib").write_text(
        "@article{doe2025, author={Doe}, title={Test}, year={2025}}\n",
        encoding="utf-8",
    )

    # Image
    images = project_dir / "images"
    images.mkdir()
    (images / "fig1.png").write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 100)

    # Create ZIP
    zip_path = tmp_path / "test_project.zip"
    with zipfile.ZipFile(zip_path, "w") as zf:
        for f in project_dir.rglob("*"):
            if f.is_file():
                zf.write(f, f.relative_to(project_dir))

    return zip_path


@pytest.fixture
def inline_overleaf_zip(tmp_path):
    """Overleaf ZIP with all content inline in main.tex (no \\input)."""
    project_dir = tmp_path / "inline_project"
    project_dir.mkdir()

    (project_dir / "main.tex").write_text(
        r"""\documentclass{article}
\title{Inline Paper}
\author{Jane Doe}
\begin{document}
\maketitle
\begin{abstract}
This is the abstract.
\end{abstract}
\section{Introduction}
Intro text here.
\section{Methods}
Methods text here.
\section{Results}
Results text here.
\section{Discussion}
Discussion text here.
\end{document}
""",
        encoding="utf-8",
    )

    zip_path = tmp_path / "inline_project.zip"
    with zipfile.ZipFile(zip_path, "w") as zf:
        for f in project_dir.rglob("*"):
            if f.is_file():
                zf.write(f, f.relative_to(project_dir))

    return zip_path


# ---------------------------------------------------------------------------
# Parsing tests
# ---------------------------------------------------------------------------


class TestParsing:
    def test_detect_main_tex(self, tmp_path):
        from scitex_writer.migration._parsing import detect_main_tex

        (tmp_path / "main.tex").write_text(r"\documentclass{article}", encoding="utf-8")
        (tmp_path / "helper.tex").write_text(r"\section{Foo}", encoding="utf-8")

        result = detect_main_tex(tmp_path)
        assert result is not None
        assert result.name == "main.tex"

    def test_detect_main_tex_no_documentclass(self, tmp_path):
        from scitex_writer.migration._parsing import detect_main_tex

        (tmp_path / "notes.tex").write_text("Just some text", encoding="utf-8")

        result = detect_main_tex(tmp_path)
        assert result is None

    def test_detect_main_tex_prefers_main(self, tmp_path):
        from scitex_writer.migration._parsing import detect_main_tex

        (tmp_path / "main.tex").write_text(r"\documentclass{article}", encoding="utf-8")
        (tmp_path / "paper.tex").write_text(
            r"\documentclass{article}", encoding="utf-8"
        )

        result = detect_main_tex(tmp_path)
        assert result.name == "main.tex"

    def test_classify_section_by_filename(self):
        from scitex_writer.migration._parsing import classify_section

        assert classify_section(Path("intro.tex"), "") == "introduction"
        assert classify_section(Path("methods.tex"), "") == "methods"
        assert classify_section(Path("results.tex"), "") == "results"
        assert classify_section(Path("discussion.tex"), "") == "discussion"
        assert classify_section(Path("abstract.tex"), "") == "abstract"
        assert classify_section(Path("random.tex"), "") is None

    def test_classify_section_by_heading(self):
        from scitex_writer.migration._parsing import classify_section

        content = r"\section{Introduction and Background}"
        assert classify_section(Path("ch1.tex"), content) == "introduction"

        content = r"\section{Experimental Procedure}"
        assert classify_section(Path("ch2.tex"), content) == "methods"

    def test_extract_metadata(self):
        from scitex_writer.migration._parsing import extract_metadata

        content = r"""
\title{My Great Paper}
\author{Alice Bob}
\keywords{science, testing}
"""
        meta = extract_metadata(content)
        assert meta["title"] == "My Great Paper"
        assert meta["authors_block"] == "Alice Bob"
        assert meta["keywords"] == "science, testing"

    def test_extract_metadata_empty(self):
        from scitex_writer.migration._parsing import extract_metadata

        meta = extract_metadata("No metadata here")
        assert meta["title"] is None
        assert meta["authors_block"] is None
        assert meta["keywords"] is None

    def test_split_inline_sections(self):
        from scitex_writer.migration._parsing import split_inline_sections

        content = r"""
\section{Introduction}
Intro text.
\section{Methods}
Methods text.
\section{Conclusion}
Conclusion text.
"""
        sections = split_inline_sections(content)
        assert "introduction" in sections
        assert "methods" in sections
        assert "discussion" in sections  # conclusion maps to discussion

    def test_unique_dest(self, tmp_path):
        from scitex_writer.migration._parsing import unique_dest

        f = tmp_path / "test.txt"
        assert unique_dest(f) == f

        f.write_text("existing")
        result = unique_dest(f)
        assert result.name == "test_1.txt"


# ---------------------------------------------------------------------------
# Import tests
# ---------------------------------------------------------------------------


class TestImport:
    def test_import_dry_run(self, simple_overleaf_zip, tmp_path):
        from scitex_writer.migration import from_overleaf

        result = from_overleaf(
            str(simple_overleaf_zip),
            output_dir=str(tmp_path / "output"),
            dry_run=True,
        )

        assert result["success"] is True
        assert result["dry_run"] is True
        assert "mapping_report" in result
        report = result["mapping_report"]
        assert report["main_tex"] == "main.tex"
        assert "introduction" in report["sections"]
        assert len(report["bib_files"]) == 1
        assert len(report["images"]) >= 1

    def test_import_nonexistent_zip(self):
        from scitex_writer.migration import from_overleaf

        result = from_overleaf("/nonexistent/file.zip")
        assert result["success"] is False
        assert "not found" in result["error"].lower()

    def test_import_invalid_zip(self, tmp_path):
        from scitex_writer.migration import from_overleaf

        fake = tmp_path / "fake.zip"
        fake.write_text("not a zip")
        result = from_overleaf(str(fake))
        assert result["success"] is False
        assert "not a valid zip" in result["error"].lower()

    def test_import_output_exists_no_force(self, simple_overleaf_zip, tmp_path):
        from scitex_writer.migration import from_overleaf

        out = tmp_path / "existing"
        out.mkdir()
        result = from_overleaf(str(simple_overleaf_zip), output_dir=str(out))
        assert result["success"] is False
        assert "already exists" in result["error"].lower()

    def test_import_inline_dry_run(self, inline_overleaf_zip, tmp_path):
        from scitex_writer.migration import from_overleaf

        result = from_overleaf(
            str(inline_overleaf_zip),
            output_dir=str(tmp_path / "inline_out"),
            dry_run=True,
        )

        assert result["success"] is True
        report = result["mapping_report"]
        assert report["metadata"]["title"] == "Inline Paper"

    def test_import_success_creates_project(self, simple_overleaf_zip, tmp_path):
        from unittest.mock import patch

        from scitex_writer.migration import from_overleaf

        out = tmp_path / "imported_project"

        def fake_clone(project_dir, git_strategy="child", **kwargs):
            """Create minimal scitex-writer directory structure."""
            p = Path(project_dir)
            p.mkdir(parents=True, exist_ok=True)
            (p / "00_shared" / "bib_files").mkdir(parents=True)
            (p / "00_shared" / "latex_styles").mkdir(parents=True)
            (p / "01_manuscript" / "contents" / "figures" / "caption_and_media").mkdir(
                parents=True
            )
            (p / "01_manuscript" / "contents" / "tables" / "caption_and_media").mkdir(
                parents=True
            )
            for sec in ["abstract", "introduction", "methods", "results", "discussion"]:
                (p / "01_manuscript" / "contents" / f"{sec}.tex").write_text("")
            return True

        with patch(
            "scitex_writer._project._create.clone_writer_project",
            side_effect=fake_clone,
        ):
            result = from_overleaf(str(simple_overleaf_zip), output_dir=str(out))

        assert result["success"] is True
        assert result["dry_run"] is False
        assert out.exists()

        # Sections were written
        contents = out / "01_manuscript" / "contents"
        assert (contents / "introduction.tex").exists()
        intro_text = (contents / "introduction.tex").read_text()
        assert "introduction" in intro_text.lower()

        # Bib was copied
        bib_files = list((out / "00_shared" / "bib_files").glob("*.bib"))
        assert len(bib_files) >= 1

        # Image was copied
        images = list((contents / "figures" / "caption_and_media").glob("*"))
        assert len(images) >= 1

        # Metadata was written
        assert (out / "00_shared" / "title.tex").exists()
        title = (out / "00_shared" / "title.tex").read_text()
        assert "Test Paper Title" in title

    def test_import_force_overwrites(self, simple_overleaf_zip, tmp_path):
        from unittest.mock import patch

        from scitex_writer.migration import from_overleaf

        out = tmp_path / "force_project"
        out.mkdir()
        (out / "marker.txt").write_text("old")

        def fake_clone(project_dir, git_strategy="child", **kwargs):
            p = Path(project_dir)
            p.mkdir(parents=True, exist_ok=True)
            (p / "00_shared" / "bib_files").mkdir(parents=True)
            (p / "01_manuscript" / "contents" / "figures" / "caption_and_media").mkdir(
                parents=True
            )
            (p / "01_manuscript" / "contents" / "tables" / "caption_and_media").mkdir(
                parents=True
            )
            for sec in ["abstract", "introduction", "methods", "results", "discussion"]:
                (p / "01_manuscript" / "contents" / f"{sec}.tex").write_text("")
            return True

        with patch(
            "scitex_writer._project._create.clone_writer_project",
            side_effect=fake_clone,
        ):
            result = from_overleaf(
                str(simple_overleaf_zip), output_dir=str(out), force=True
            )

        assert result["success"] is True
        assert not (out / "marker.txt").exists()


# ---------------------------------------------------------------------------
# Export tests
# ---------------------------------------------------------------------------


@pytest.fixture
def mock_scitex_project(tmp_path):
    """Create a minimal scitex-writer project structure for export testing."""
    proj = tmp_path / "mock_project"
    proj.mkdir()

    shared = proj / "00_shared"
    shared.mkdir()
    (shared / "title.tex").write_text("My Test Title\n")
    (shared / "authors.tex").write_text("Test Author\n")

    bib_dir = shared / "bib_files"
    bib_dir.mkdir()
    (bib_dir / "refs.bib").write_text(
        "@article{test2025, author={A}, title={B}, year={2025}}\n"
    )

    contents = proj / "01_manuscript" / "contents"
    contents.mkdir(parents=True)
    (contents / "introduction.tex").write_text(
        r"\section{Introduction}" "\nIntro content.\n"
    )
    (contents / "methods.tex").write_text(r"\section{Methods}" "\nMethods content.\n")
    (contents / "results.tex").write_text(r"\section{Results}" "\nResults content.\n")
    (contents / "discussion.tex").write_text(
        r"\section{Discussion}" "\nDiscussion content.\n"
    )

    fig_dir = contents / "figures" / "caption_and_media"
    fig_dir.mkdir(parents=True)
    (fig_dir / "fig1.png").write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 50)

    return proj


class TestExport:
    def test_export_nonexistent_project(self):
        from scitex_writer.migration import to_overleaf

        result = to_overleaf("/nonexistent/project")
        assert result["success"] is False

    def test_export_not_a_project(self, tmp_path):
        from scitex_writer.migration import to_overleaf

        result = to_overleaf(str(tmp_path))
        assert result["success"] is False
        assert "does not look like" in result["error"].lower()

    def test_export_dry_run(self, mock_scitex_project):
        from scitex_writer.migration import to_overleaf

        result = to_overleaf(str(mock_scitex_project), dry_run=True)

        assert result["success"] is True
        assert result["dry_run"] is True
        assert result["file_count"] > 0
        assert "main.tex" in result["files_included"]
        assert "references.bib" in result["files_included"]

    def test_export_creates_zip(self, mock_scitex_project, tmp_path):
        from scitex_writer.migration import to_overleaf

        zip_out = tmp_path / "export.zip"
        result = to_overleaf(str(mock_scitex_project), output_path=str(zip_out))

        assert result["success"] is True
        assert result["dry_run"] is False
        assert zip_out.exists()

        # Verify ZIP contents
        with zipfile.ZipFile(zip_out) as zf:
            names = zf.namelist()
            assert "main.tex" in names
            assert "references.bib" in names
            assert any("sections/" in n for n in names)
            assert any("images/" in n for n in names)

            # main.tex has documentclass and title
            main_content = zf.read("main.tex").decode()
            assert r"\documentclass" in main_content
            assert r"\title{My Test Title}" in main_content


# ---------------------------------------------------------------------------
# Module API tests
# ---------------------------------------------------------------------------


class TestModuleAPI:
    def test_migration_in_all(self):
        import scitex_writer as sw

        assert "migration" in sw.__all__

    def test_from_overleaf_callable(self):
        from scitex_writer import migration

        assert callable(migration.from_overleaf)

    def test_to_overleaf_callable(self):
        from scitex_writer import migration

        assert callable(migration.to_overleaf)

    def test_cli_help(self):
        import subprocess

        result = subprocess.run(
            ["scitex-writer", "migration", "--help"],
            capture_output=True,
            text=True,
            timeout=10,
        )
        assert result.returncode == 0
        assert "import" in result.stdout
        assert "export" in result.stdout


# EOF