#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Test file for: check_references.py
import os
import sys
from pathlib import Path
# Add scripts/python to path for imports
ROOT_DIR = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(ROOT_DIR / "scripts" / "python"))
from check_references import ( # noqa: E402
collect_tex_files,
extract_bib_keys,
extract_citations,
extract_labels,
extract_refs,
infer_auto_labels,
)
# ============================================================================
# Test extract_refs
# ============================================================================
def test_extract_refs_basic(tmp_path):
"""Test basic \\ref extraction from .tex files."""
tex_file = tmp_path / "test.tex"
tex_file.write_text(
r"""
See Figure~\ref{fig:01_example} for details.
Results are shown in \ref{tab:01_data}.
"""
)
refs = extract_refs([tex_file])
assert "fig:01_example" in refs
assert "tab:01_data" in refs
assert len(refs) == 2
assert refs["fig:01_example"][0][1] == 2 # Line number
def test_extract_refs_skips_comments(tmp_path):
"""Test that \\ref in comments is ignored."""
tex_file = tmp_path / "test.tex"
tex_file.write_text(
r"""
\ref{fig:real_ref}
% This is a comment with \ref{fig:commented_ref}
Some text % inline \ref{fig:inline_comment}
"""
)
refs = extract_refs([tex_file])
assert "fig:real_ref" in refs
assert "fig:commented_ref" not in refs
assert "fig:inline_comment" not in refs
def test_extract_refs_skips_macro_args(tmp_path):
"""Test that \\ref{#1} (macro arguments) are skipped."""
tex_file = tmp_path / "test.tex"
tex_file.write_text(
r"\newcommand{\figref}[1]{\ref{#1}}" "\n" r"\ref{fig:actual_ref}"
)
refs = extract_refs([tex_file])
assert "fig:actual_ref" in refs
assert "#1" not in refs
# ============================================================================
# Test extract_labels
# ============================================================================
def test_extract_labels_basic(tmp_path):
"""Test basic \\label extraction from .tex files."""
tex_file = tmp_path / "test.tex"
tex_file.write_text(
r"""
\begin{figure}
\label{fig:01_example}
\end{figure}
\begin{table}
\label{tab:01_data}
\end{table}
"""
)
labels = extract_labels([tex_file])
assert "fig:01_example" in labels
assert "tab:01_data" in labels
assert len(labels) == 2
def test_extract_labels_skips_comments(tmp_path):
"""Test that \\label in comments is ignored."""
tex_file = tmp_path / "test.tex"
tex_file.write_text(
r"""
\label{sec:real_label}
% \label{sec:commented_label}
"""
)
labels = extract_labels([tex_file])
assert "sec:real_label" in labels
assert "sec:commented_label" not in labels
def test_extract_labels_multiple_definitions(tmp_path):
"""Test that multiply-defined labels are tracked."""
tex1 = tmp_path / "file1.tex"
tex2 = tmp_path / "file2.tex"
tex1.write_text(r"\label{fig:duplicate}")
tex2.write_text(r"\label{fig:duplicate}")
labels = extract_labels([tex1, tex2])
assert len(labels["fig:duplicate"]) == 2
# ============================================================================
# Test extract_citations
# ============================================================================
def test_extract_citations_single(tmp_path):
"""Test extraction of single citation keys."""
tex_file = tmp_path / "test.tex"
tex_file.write_text(r"Previous work \cite{author2020} showed that...")
cites = extract_citations([tex_file])
assert "author2020" in cites
assert len(cites) == 1
def test_extract_citations_multi_key(tmp_path):
"""Test extraction of multiple keys in one cite command."""
tex_file = tmp_path / "test.tex"
tex_file.write_text(r"\citep{Smith2018, Jones2019, Brown2020}")
cites = extract_citations([tex_file])
assert all(k in cites for k in ["Smith2018", "Jones2019", "Brown2020"])
assert len(cites) == 3
def test_extract_citations_variants(tmp_path):
"""Test that various citation commands are recognized."""
tex_file = tmp_path / "test.tex"
tex_file.write_text(
r"""
\cite{ref1}
\citep{ref2}
\citet{ref3}
\citealt{ref4}
\citeauthor{ref5}
\citeyear{ref6}
"""
)
cites = extract_citations([tex_file])
assert all(f"ref{i}" in cites for i in range(1, 7))
assert len(cites) == 6
def test_extract_citations_skips_comments(tmp_path):
"""Test that citations in comments are ignored."""
tex_file = tmp_path / "test.tex"
tex_file.write_text(r"\cite{real_ref}" "\n" r"% \cite{commented_ref}")
cites = extract_citations([tex_file])
assert "real_ref" in cites
assert "commented_ref" not in cites
def test_extract_citations_whitespace_handling(tmp_path):
"""Test citation extraction handles whitespace in keys."""
tex_file = tmp_path / "test.tex"
tex_file.write_text(r"\cite{Key1, Key2, Key3}")
cites = extract_citations([tex_file])
assert all(k in cites for k in ["Key1", "Key2", "Key3"])
# ============================================================================
# Test extract_bib_keys
# ============================================================================
def test_extract_bib_keys(tmp_path):
"""Test extraction of citation keys from .bib files."""
bib_file = tmp_path / "refs.bib"
bib_file.write_text(
"""
@article{Smith2020, author={John Smith}}
@book{Jones2019, title={A Book}}
@inproceedings{Brown2018, booktitle={Proceedings}}
"""
)
keys = extract_bib_keys(tmp_path)
assert all(k in keys for k in ["Smith2020", "Jones2019", "Brown2018"])
assert keys["Smith2020"] == bib_file
def test_extract_bib_keys_multiple_files(tmp_path):
"""Test extraction from multiple .bib files."""
(tmp_path / "refs1.bib").write_text("@article{Key1, title={Test}}")
(tmp_path / "refs2.bib").write_text("@book{Key2, title={Test}}")
keys = extract_bib_keys(tmp_path)
assert "Key1" in keys and "Key2" in keys
def test_extract_bib_keys_no_directory(tmp_path):
"""Test that missing directory returns empty dict."""
assert extract_bib_keys(tmp_path / "nonexistent") == {}
def test_extract_bib_keys_various_formats(tmp_path):
"""Test extraction with various BibTeX entry formats."""
bib_file = tmp_path / "refs.bib"
bib_file.write_text(
"@article{NoSpaces,author={Test}}\n"
"@book{WithComma, title={Test}}\n"
"@misc{TrailingComma,}"
)
keys = extract_bib_keys(tmp_path)
assert all(k in keys for k in ["NoSpaces", "WithComma", "TrailingComma"])
# ============================================================================
# Test infer_auto_labels
# ============================================================================
def test_infer_auto_labels(tmp_path):
"""Test inference of auto-generated labels from caption files."""
doc_dir = tmp_path / "01_manuscript"
fig_dir = doc_dir / "contents" / "figures" / "caption_and_media"
tab_dir = doc_dir / "contents" / "tables" / "caption_and_media"
fig_dir.mkdir(parents=True)
tab_dir.mkdir(parents=True)
(fig_dir / "01_example.tex").write_text("Figure caption")
(fig_dir / "02_results.tex").write_text("Results figure")
(tab_dir / "01_data.tex").write_text("Data table")
labels = infer_auto_labels(doc_dir)
assert all(k in labels for k in ["fig:01_example", "fig:02_results", "tab:01_data"])
assert len(labels) == 3
def test_infer_auto_labels_skips_panels(tmp_path):
"""Test that panel files (01a_name) are skipped."""
doc_dir = tmp_path / "01_manuscript"
fig_dir = doc_dir / "contents" / "figures" / "caption_and_media"
fig_dir.mkdir(parents=True)
(fig_dir / "01_main.tex").write_text("Main")
(fig_dir / "01a_panel.tex").write_text("Panel A")
(fig_dir / "01b_panel.tex").write_text("Panel B")
(fig_dir / "02_other.tex").write_text("Other")
labels = infer_auto_labels(doc_dir)
assert "fig:01_main" in labels and "fig:02_other" in labels
assert "fig:01a_panel" not in labels and "fig:01b_panel" not in labels
def test_infer_auto_labels_no_contents_dir(tmp_path):
"""Test behavior when contents directory doesn't exist."""
doc_dir = tmp_path / "01_manuscript"
doc_dir.mkdir()
assert infer_auto_labels(doc_dir) == {}
def test_infer_auto_labels_line_zero(tmp_path):
"""Test that inferred labels have line number 0 (auto-generated marker)."""
doc_dir = tmp_path / "01_manuscript"
fig_dir = doc_dir / "contents" / "figures" / "caption_and_media"
fig_dir.mkdir(parents=True)
(fig_dir / "01_test.tex").write_text("Test")
labels = infer_auto_labels(doc_dir)
assert labels["fig:01_test"][0][1] == 0 # Line number should be 0
# ============================================================================
# Test collect_tex_files
# ============================================================================
def test_collect_tex_files_basic(tmp_path):
"""Test collection of source .tex files."""
doc_dir = tmp_path / "01_manuscript"
content_dir = doc_dir / "contents"
content_dir.mkdir(parents=True)
(content_dir / "intro.tex").write_text("Intro")
(content_dir / "methods.tex").write_text("Methods")
(doc_dir / "base.tex").write_text("Base")
files = collect_tex_files(doc_dir)
file_names = {f.name for f in files}
assert all(name in file_names for name in ["intro.tex", "methods.tex", "base.tex"])
def test_collect_tex_files_skips_generated(tmp_path):
"""Test that generated files are skipped."""
doc_dir = tmp_path / "01_manuscript"
doc_dir.mkdir()
(doc_dir / "manuscript.tex").write_text("Generated")
(doc_dir / "manuscript_diff.tex").write_text("Generated diff")
(doc_dir / "base.tex").write_text("Source")
files = collect_tex_files(doc_dir)
assert len(files) == 1 and files[0].name == "base.tex"
def test_collect_tex_files_skips_versioned(tmp_path):
"""Test that versioned files (_v01.tex) are skipped."""
doc_dir = tmp_path / "01_manuscript"
content_dir = doc_dir / "contents"
content_dir.mkdir(parents=True)
(content_dir / "intro.tex").write_text("Current")
(content_dir / "intro_v01.tex").write_text("Version 1")
(content_dir / "intro_v02.tex").write_text("Version 2")
files = collect_tex_files(doc_dir)
assert len(files) == 1 and files[0].name == "intro.tex"
def test_collect_tex_files_includes_captions(tmp_path):
"""Test that caption files are included."""
doc_dir = tmp_path / "01_manuscript"
fig_dir = doc_dir / "contents" / "figures" / "caption_and_media"
tab_dir = doc_dir / "contents" / "tables" / "caption_and_media"
fig_dir.mkdir(parents=True)
tab_dir.mkdir(parents=True)
(fig_dir / "01_fig.tex").write_text("Fig")
(tab_dir / "01_tab.tex").write_text("Tab")
files = collect_tex_files(doc_dir)
file_names = {f.name for f in files}
assert "01_fig.tex" in file_names and "01_tab.tex" in file_names
def test_collect_tex_files_nonexistent_dir(tmp_path):
"""Test behavior with nonexistent directories."""
assert collect_tex_files(tmp_path / "nonexistent") == []
# ============================================================================
# Test integration scenarios
# ============================================================================
def test_refs_and_labels_match(tmp_path):
"""Integration test: refs should find matching labels."""
tex_file = tmp_path / "test.tex"
tex_file.write_text(
r"\section{Intro}\label{sec:intro}" "\n" r"See \ref{sec:intro}."
)
refs = extract_refs([tex_file])
labels = extract_labels([tex_file])
for ref_key in refs:
assert ref_key in labels
def test_citations_and_bib_match(tmp_path):
"""Integration test: citations should find matching bib entries."""
tex_file = tmp_path / "test.tex"
bib_dir = tmp_path / "bib"
bib_dir.mkdir()
tex_file.write_text(r"\cite{Smith2020}")
(bib_dir / "refs.bib").write_text("@article{Smith2020, title={Test}}")
cites = extract_citations([tex_file])
bib_keys = extract_bib_keys(bib_dir)
for cite_key in cites:
assert cite_key in bib_keys
def test_auto_labels_vs_explicit(tmp_path):
"""Test that auto-inferred labels work like explicit ones."""
doc_dir = tmp_path / "01_manuscript"
content_dir = doc_dir / "contents"
fig_dir = content_dir / "figures" / "caption_and_media"
fig_dir.mkdir(parents=True)
(fig_dir / "01_auto.tex").write_text("Auto")
tex_file = content_dir / "text.tex"
tex_file.write_text(
r"\ref{fig:01_auto}" "\n" r"\ref{fig:explicit}" "\n" r"\label{fig:explicit}"
)
refs = extract_refs([tex_file])
labels = extract_labels([tex_file])
auto_labels = infer_auto_labels(doc_dir)
all_labels = {**labels, **auto_labels}
assert all(k in refs for k in ["fig:01_auto", "fig:explicit"])
assert all(k in all_labels for k in ["fig:01_auto", "fig:explicit"])
if __name__ == "__main__":
import pytest
pytest.main([os.path.abspath(__file__), "-v"])