Loading...
No commits yet
Not committed History
Blame
test_check_references.py • 13.0 KB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Test file for: check_references.py

import os
import sys
from pathlib import Path

# Add scripts/python to path for imports
ROOT_DIR = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(ROOT_DIR / "scripts" / "python"))

from check_references import (  # noqa: E402
    collect_tex_files,
    extract_bib_keys,
    extract_citations,
    extract_labels,
    extract_refs,
    infer_auto_labels,
)

# ============================================================================
# Test extract_refs
# ============================================================================


def test_extract_refs_basic(tmp_path):
    """Test basic \\ref extraction from .tex files."""
    tex_file = tmp_path / "test.tex"
    tex_file.write_text(
        r"""
See Figure~\ref{fig:01_example} for details.
Results are shown in \ref{tab:01_data}.
"""
    )
    refs = extract_refs([tex_file])
    assert "fig:01_example" in refs
    assert "tab:01_data" in refs
    assert len(refs) == 2
    assert refs["fig:01_example"][0][1] == 2  # Line number


def test_extract_refs_skips_comments(tmp_path):
    """Test that \\ref in comments is ignored."""
    tex_file = tmp_path / "test.tex"
    tex_file.write_text(
        r"""
\ref{fig:real_ref}
% This is a comment with \ref{fig:commented_ref}
Some text % inline \ref{fig:inline_comment}
"""
    )
    refs = extract_refs([tex_file])
    assert "fig:real_ref" in refs
    assert "fig:commented_ref" not in refs
    assert "fig:inline_comment" not in refs


def test_extract_refs_skips_macro_args(tmp_path):
    """Test that \\ref{#1} (macro arguments) are skipped."""
    tex_file = tmp_path / "test.tex"
    tex_file.write_text(
        r"\newcommand{\figref}[1]{\ref{#1}}" "\n" r"\ref{fig:actual_ref}"
    )
    refs = extract_refs([tex_file])
    assert "fig:actual_ref" in refs
    assert "#1" not in refs


# ============================================================================
# Test extract_labels
# ============================================================================


def test_extract_labels_basic(tmp_path):
    """Test basic \\label extraction from .tex files."""
    tex_file = tmp_path / "test.tex"
    tex_file.write_text(
        r"""
\begin{figure}
  \label{fig:01_example}
\end{figure}
\begin{table}
  \label{tab:01_data}
\end{table}
"""
    )
    labels = extract_labels([tex_file])
    assert "fig:01_example" in labels
    assert "tab:01_data" in labels
    assert len(labels) == 2


def test_extract_labels_skips_comments(tmp_path):
    """Test that \\label in comments is ignored."""
    tex_file = tmp_path / "test.tex"
    tex_file.write_text(
        r"""
\label{sec:real_label}
% \label{sec:commented_label}
"""
    )
    labels = extract_labels([tex_file])
    assert "sec:real_label" in labels
    assert "sec:commented_label" not in labels


def test_extract_labels_multiple_definitions(tmp_path):
    """Test that multiply-defined labels are tracked."""
    tex1 = tmp_path / "file1.tex"
    tex2 = tmp_path / "file2.tex"
    tex1.write_text(r"\label{fig:duplicate}")
    tex2.write_text(r"\label{fig:duplicate}")
    labels = extract_labels([tex1, tex2])
    assert len(labels["fig:duplicate"]) == 2


# ============================================================================
# Test extract_citations
# ============================================================================


def test_extract_citations_single(tmp_path):
    """Test extraction of single citation keys."""
    tex_file = tmp_path / "test.tex"
    tex_file.write_text(r"Previous work \cite{author2020} showed that...")
    cites = extract_citations([tex_file])
    assert "author2020" in cites
    assert len(cites) == 1


def test_extract_citations_multi_key(tmp_path):
    """Test extraction of multiple keys in one cite command."""
    tex_file = tmp_path / "test.tex"
    tex_file.write_text(r"\citep{Smith2018, Jones2019, Brown2020}")
    cites = extract_citations([tex_file])
    assert all(k in cites for k in ["Smith2018", "Jones2019", "Brown2020"])
    assert len(cites) == 3


def test_extract_citations_variants(tmp_path):
    """Test that various citation commands are recognized."""
    tex_file = tmp_path / "test.tex"
    tex_file.write_text(
        r"""
\cite{ref1}
\citep{ref2}
\citet{ref3}
\citealt{ref4}
\citeauthor{ref5}
\citeyear{ref6}
"""
    )
    cites = extract_citations([tex_file])
    assert all(f"ref{i}" in cites for i in range(1, 7))
    assert len(cites) == 6


def test_extract_citations_skips_comments(tmp_path):
    """Test that citations in comments are ignored."""
    tex_file = tmp_path / "test.tex"
    tex_file.write_text(r"\cite{real_ref}" "\n" r"% \cite{commented_ref}")
    cites = extract_citations([tex_file])
    assert "real_ref" in cites
    assert "commented_ref" not in cites


def test_extract_citations_whitespace_handling(tmp_path):
    """Test citation extraction handles whitespace in keys."""
    tex_file = tmp_path / "test.tex"
    tex_file.write_text(r"\cite{Key1,  Key2,   Key3}")
    cites = extract_citations([tex_file])
    assert all(k in cites for k in ["Key1", "Key2", "Key3"])


# ============================================================================
# Test extract_bib_keys
# ============================================================================


def test_extract_bib_keys(tmp_path):
    """Test extraction of citation keys from .bib files."""
    bib_file = tmp_path / "refs.bib"
    bib_file.write_text(
        """
@article{Smith2020, author={John Smith}}
@book{Jones2019, title={A Book}}
@inproceedings{Brown2018, booktitle={Proceedings}}
"""
    )
    keys = extract_bib_keys(tmp_path)
    assert all(k in keys for k in ["Smith2020", "Jones2019", "Brown2018"])
    assert keys["Smith2020"] == bib_file


def test_extract_bib_keys_multiple_files(tmp_path):
    """Test extraction from multiple .bib files."""
    (tmp_path / "refs1.bib").write_text("@article{Key1, title={Test}}")
    (tmp_path / "refs2.bib").write_text("@book{Key2, title={Test}}")
    keys = extract_bib_keys(tmp_path)
    assert "Key1" in keys and "Key2" in keys


def test_extract_bib_keys_no_directory(tmp_path):
    """Test that missing directory returns empty dict."""
    assert extract_bib_keys(tmp_path / "nonexistent") == {}


def test_extract_bib_keys_various_formats(tmp_path):
    """Test extraction with various BibTeX entry formats."""
    bib_file = tmp_path / "refs.bib"
    bib_file.write_text(
        "@article{NoSpaces,author={Test}}\n"
        "@book{WithComma, title={Test}}\n"
        "@misc{TrailingComma,}"
    )
    keys = extract_bib_keys(tmp_path)
    assert all(k in keys for k in ["NoSpaces", "WithComma", "TrailingComma"])


# ============================================================================
# Test infer_auto_labels
# ============================================================================


def test_infer_auto_labels(tmp_path):
    """Test inference of auto-generated labels from caption files."""
    doc_dir = tmp_path / "01_manuscript"
    fig_dir = doc_dir / "contents" / "figures" / "caption_and_media"
    tab_dir = doc_dir / "contents" / "tables" / "caption_and_media"
    fig_dir.mkdir(parents=True)
    tab_dir.mkdir(parents=True)

    (fig_dir / "01_example.tex").write_text("Figure caption")
    (fig_dir / "02_results.tex").write_text("Results figure")
    (tab_dir / "01_data.tex").write_text("Data table")

    labels = infer_auto_labels(doc_dir)
    assert all(k in labels for k in ["fig:01_example", "fig:02_results", "tab:01_data"])
    assert len(labels) == 3


def test_infer_auto_labels_skips_panels(tmp_path):
    """Test that panel files (01a_name) are skipped."""
    doc_dir = tmp_path / "01_manuscript"
    fig_dir = doc_dir / "contents" / "figures" / "caption_and_media"
    fig_dir.mkdir(parents=True)

    (fig_dir / "01_main.tex").write_text("Main")
    (fig_dir / "01a_panel.tex").write_text("Panel A")
    (fig_dir / "01b_panel.tex").write_text("Panel B")
    (fig_dir / "02_other.tex").write_text("Other")

    labels = infer_auto_labels(doc_dir)
    assert "fig:01_main" in labels and "fig:02_other" in labels
    assert "fig:01a_panel" not in labels and "fig:01b_panel" not in labels


def test_infer_auto_labels_no_contents_dir(tmp_path):
    """Test behavior when contents directory doesn't exist."""
    doc_dir = tmp_path / "01_manuscript"
    doc_dir.mkdir()
    assert infer_auto_labels(doc_dir) == {}


def test_infer_auto_labels_line_zero(tmp_path):
    """Test that inferred labels have line number 0 (auto-generated marker)."""
    doc_dir = tmp_path / "01_manuscript"
    fig_dir = doc_dir / "contents" / "figures" / "caption_and_media"
    fig_dir.mkdir(parents=True)
    (fig_dir / "01_test.tex").write_text("Test")
    labels = infer_auto_labels(doc_dir)
    assert labels["fig:01_test"][0][1] == 0  # Line number should be 0


# ============================================================================
# Test collect_tex_files
# ============================================================================


def test_collect_tex_files_basic(tmp_path):
    """Test collection of source .tex files."""
    doc_dir = tmp_path / "01_manuscript"
    content_dir = doc_dir / "contents"
    content_dir.mkdir(parents=True)

    (content_dir / "intro.tex").write_text("Intro")
    (content_dir / "methods.tex").write_text("Methods")
    (doc_dir / "base.tex").write_text("Base")

    files = collect_tex_files(doc_dir)
    file_names = {f.name for f in files}
    assert all(name in file_names for name in ["intro.tex", "methods.tex", "base.tex"])


def test_collect_tex_files_skips_generated(tmp_path):
    """Test that generated files are skipped."""
    doc_dir = tmp_path / "01_manuscript"
    doc_dir.mkdir()

    (doc_dir / "manuscript.tex").write_text("Generated")
    (doc_dir / "manuscript_diff.tex").write_text("Generated diff")
    (doc_dir / "base.tex").write_text("Source")

    files = collect_tex_files(doc_dir)
    assert len(files) == 1 and files[0].name == "base.tex"


def test_collect_tex_files_skips_versioned(tmp_path):
    """Test that versioned files (_v01.tex) are skipped."""
    doc_dir = tmp_path / "01_manuscript"
    content_dir = doc_dir / "contents"
    content_dir.mkdir(parents=True)

    (content_dir / "intro.tex").write_text("Current")
    (content_dir / "intro_v01.tex").write_text("Version 1")
    (content_dir / "intro_v02.tex").write_text("Version 2")

    files = collect_tex_files(doc_dir)
    assert len(files) == 1 and files[0].name == "intro.tex"


def test_collect_tex_files_includes_captions(tmp_path):
    """Test that caption files are included."""
    doc_dir = tmp_path / "01_manuscript"
    fig_dir = doc_dir / "contents" / "figures" / "caption_and_media"
    tab_dir = doc_dir / "contents" / "tables" / "caption_and_media"
    fig_dir.mkdir(parents=True)
    tab_dir.mkdir(parents=True)

    (fig_dir / "01_fig.tex").write_text("Fig")
    (tab_dir / "01_tab.tex").write_text("Tab")

    files = collect_tex_files(doc_dir)
    file_names = {f.name for f in files}
    assert "01_fig.tex" in file_names and "01_tab.tex" in file_names


def test_collect_tex_files_nonexistent_dir(tmp_path):
    """Test behavior with nonexistent directories."""
    assert collect_tex_files(tmp_path / "nonexistent") == []


# ============================================================================
# Test integration scenarios
# ============================================================================


def test_refs_and_labels_match(tmp_path):
    """Integration test: refs should find matching labels."""
    tex_file = tmp_path / "test.tex"
    tex_file.write_text(
        r"\section{Intro}\label{sec:intro}" "\n" r"See \ref{sec:intro}."
    )
    refs = extract_refs([tex_file])
    labels = extract_labels([tex_file])
    for ref_key in refs:
        assert ref_key in labels


def test_citations_and_bib_match(tmp_path):
    """Integration test: citations should find matching bib entries."""
    tex_file = tmp_path / "test.tex"
    bib_dir = tmp_path / "bib"
    bib_dir.mkdir()

    tex_file.write_text(r"\cite{Smith2020}")
    (bib_dir / "refs.bib").write_text("@article{Smith2020, title={Test}}")

    cites = extract_citations([tex_file])
    bib_keys = extract_bib_keys(bib_dir)
    for cite_key in cites:
        assert cite_key in bib_keys


def test_auto_labels_vs_explicit(tmp_path):
    """Test that auto-inferred labels work like explicit ones."""
    doc_dir = tmp_path / "01_manuscript"
    content_dir = doc_dir / "contents"
    fig_dir = content_dir / "figures" / "caption_and_media"
    fig_dir.mkdir(parents=True)

    (fig_dir / "01_auto.tex").write_text("Auto")
    tex_file = content_dir / "text.tex"
    tex_file.write_text(
        r"\ref{fig:01_auto}" "\n" r"\ref{fig:explicit}" "\n" r"\label{fig:explicit}"
    )

    refs = extract_refs([tex_file])
    labels = extract_labels([tex_file])
    auto_labels = infer_auto_labels(doc_dir)
    all_labels = {**labels, **auto_labels}

    assert all(k in refs for k in ["fig:01_auto", "fig:explicit"])
    assert all(k in all_labels for k in ["fig:01_auto", "fig:explicit"])


if __name__ == "__main__":
    import pytest

    pytest.main([os.path.abspath(__file__), "-v"])