#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Test file for: check_cited_states.py import os import re import sys from pathlib import Path # Add scripts/python to path for imports ROOT_DIR = Path(__file__).resolve().parent.parent.parent sys.path.insert(0, str(ROOT_DIR / "scripts" / "python")) import pytest # noqa: E402 # Re-implement key functions locally for testing def extract_bib_keys(bib_path): """Local copy for testing.""" if not bib_path.exists(): return set() content = bib_path.read_text(encoding="utf-8") pattern = r"@\w+\s*\{\s*([^,\s]+)" return set(re.findall(pattern, content)) def extract_citations_from_tex(tex_path): """Local copy for testing.""" if not tex_path.exists() or not tex_path.is_file(): return set() content = tex_path.read_text(encoding="utf-8") lines = content.split("\n") lines = [line.split("%")[0] for line in lines] content = "\n".join(lines) pattern = r"\\cite\w*\s*(?:\[[^\]]*\])?\s*(?:\[[^\]]*\])?\s*\{([^}]+)\}" matches = re.findall(pattern, content) citations = set() for match in matches: keys = [k.strip() for k in match.split(",")] citations.update(keys) return citations def generate_citation_data(all_bib_keys, all_citations, bib_files, tex_files): """Local copy for testing.""" cited = sorted(all_bib_keys & all_citations) uncited = sorted(all_bib_keys - all_citations) missing = sorted(all_citations - all_bib_keys) return { "summary": { "total_references": len(all_bib_keys), "total_citations": len(all_citations), "successfully_cited": len(cited), "uncited": len(uncited), "missing": len(missing), }, "details": { "successfully_cited": cited, "uncited_references": uncited, "missing_references": missing, }, "files": { "bib_files": [str(f) for f in sorted(bib_files)], "tex_files": [str(f) for f in sorted(tex_files)], }, } # Tests for extract_bib_keys def test_extract_bib_keys_article(tmp_path): """Test extracting single article entry.""" bib_file = tmp_path / "test.bib" bib_file.write_text("@article{smith2020,\n title={Test}\n}") keys = extract_bib_keys(bib_file) assert keys == {"smith2020"} def test_extract_bib_keys_multiple(tmp_path): """Test extracting multiple entries.""" bib_file = tmp_path / "test.bib" bib_file.write_text(""" @article{smith2020, title={Test} } @book{jones2019, title={Book} } @inproceedings{doe2021, title={Proceedings} } """) keys = extract_bib_keys(bib_file) assert keys == {"smith2020", "jones2019", "doe2021"} def test_extract_bib_keys_empty_file(tmp_path): """Test empty bib file returns empty set.""" bib_file = tmp_path / "empty.bib" bib_file.write_text("") keys = extract_bib_keys(bib_file) assert keys == set() def test_extract_bib_keys_missing_file(tmp_path): """Test non-existent file returns empty set.""" bib_file = tmp_path / "nonexistent.bib" keys = extract_bib_keys(bib_file) assert keys == set() def test_extract_bib_keys_various_formats(tmp_path): """Test various BibTeX entry formats.""" bib_file = tmp_path / "test.bib" bib_file.write_text(""" @article { key_with_underscore_2020 , title={Test} } @book{ KeyWithNoSpace2019, title={Book} } @misc{doi-123.456, title={DOI format} } """) keys = extract_bib_keys(bib_file) assert "key_with_underscore_2020" in keys assert "KeyWithNoSpace2019" in keys assert "doi-123.456" in keys # Tests for extract_citations_from_tex def test_extract_citations_cite(tmp_path): """Test extracting simple cite command.""" tex_file = tmp_path / "test.tex" tex_file.write_text("This is text \\cite{smith2020} and more.") citations = extract_citations_from_tex(tex_file) assert citations == {"smith2020"} def test_extract_citations_citep(tmp_path): """Test extracting citep command with multiple keys.""" tex_file = tmp_path / "test.tex" tex_file.write_text("Previous work \\citep{smith2020, jones2019} showed.") citations = extract_citations_from_tex(tex_file) assert citations == {"smith2020", "jones2019"} def test_extract_citations_commented(tmp_path): """Test that commented citations are ignored.""" tex_file = tmp_path / "test.tex" tex_file.write_text(""" This is cited \\cite{smith2020} % This is commented \\cite{hidden2019} Also cited \\cite{jones2021} """) citations = extract_citations_from_tex(tex_file) assert citations == {"smith2020", "jones2021"} assert "hidden2019" not in citations def test_extract_citations_multiple_commands(tmp_path): """Test multiple citation commands in one file.""" tex_file = tmp_path / "test.tex" tex_file.write_text(""" First \\cite{a} Second \\citep{b} Third \\citet{c} Fourth \\citeauthor{d} Fifth \\citeyear{e} """) citations = extract_citations_from_tex(tex_file) assert citations == {"a", "b", "c", "d", "e"} def test_extract_citations_optional_args(tmp_path): """Test citation commands with optional arguments.""" tex_file = tmp_path / "test.tex" tex_file.write_text(""" See \\cite[p.~5]{key1} Also \\cite[Chapter 2][p.~10-15]{key2} And \\citep[see][]{key3} """) citations = extract_citations_from_tex(tex_file) assert citations == {"key1", "key2", "key3"} def test_extract_citations_empty_file(tmp_path): """Test empty tex file returns empty set.""" tex_file = tmp_path / "empty.tex" tex_file.write_text("") citations = extract_citations_from_tex(tex_file) assert citations == set() def test_extract_citations_missing_file(tmp_path): """Test non-existent file returns empty set.""" tex_file = tmp_path / "nonexistent.tex" citations = extract_citations_from_tex(tex_file) assert citations == set() def test_extract_citations_inline_comments(tmp_path): """Test inline comments are removed.""" tex_file = tmp_path / "test.tex" tex_file.write_text(""" Valid \\cite{valid} % inline comment \\cite{invalid} Another \\cite{valid2} """) citations = extract_citations_from_tex(tex_file) assert citations == {"valid", "valid2"} assert "invalid" not in citations # Tests for generate_citation_data def test_generate_citation_data_all_cited(tmp_path): """Test when all bib keys are cited.""" bib_keys = {"smith2020", "jones2019"} citations = {"smith2020", "jones2019"} data = generate_citation_data(bib_keys, citations, [], []) assert data["summary"]["total_references"] == 2 assert data["summary"]["total_citations"] == 2 assert data["summary"]["successfully_cited"] == 2 assert data["summary"]["uncited"] == 0 assert data["summary"]["missing"] == 0 assert set(data["details"]["successfully_cited"]) == {"smith2020", "jones2019"} assert data["details"]["uncited_references"] == [] assert data["details"]["missing_references"] == [] def test_generate_citation_data_uncited(tmp_path): """Test when some bib keys are not cited.""" bib_keys = {"smith2020", "jones2019", "doe2021"} citations = {"smith2020"} data = generate_citation_data(bib_keys, citations, [], []) assert data["summary"]["total_references"] == 3 assert data["summary"]["total_citations"] == 1 assert data["summary"]["successfully_cited"] == 1 assert data["summary"]["uncited"] == 2 assert data["summary"]["missing"] == 0 assert data["details"]["successfully_cited"] == ["smith2020"] assert set(data["details"]["uncited_references"]) == {"jones2019", "doe2021"} def test_generate_citation_data_missing(tmp_path): """Test when some citations are not in bib.""" bib_keys = {"smith2020", "jones2019"} citations = {"smith2020", "unknown2021", "missing2022"} data = generate_citation_data(bib_keys, citations, [], []) assert data["summary"]["total_references"] == 2 assert data["summary"]["total_citations"] == 3 assert data["summary"]["successfully_cited"] == 1 assert data["summary"]["uncited"] == 1 assert data["summary"]["missing"] == 2 assert set(data["details"]["missing_references"]) == {"unknown2021", "missing2022"} def test_generate_citation_data_empty(tmp_path): """Test with no bib keys or citations.""" data = generate_citation_data(set(), set(), [], []) assert data["summary"]["total_references"] == 0 assert data["summary"]["total_citations"] == 0 assert data["summary"]["successfully_cited"] == 0 assert data["summary"]["uncited"] == 0 assert data["summary"]["missing"] == 0 def test_generate_citation_data_sorted(tmp_path): """Test that output lists are sorted.""" bib_keys = {"zebra", "alpha", "beta"} citations = {"beta", "gamma"} data = generate_citation_data(bib_keys, citations, [], []) # Check that lists are sorted alphabetically assert data["details"]["successfully_cited"] == ["beta"] assert data["details"]["uncited_references"] == ["alpha", "zebra"] assert data["details"]["missing_references"] == ["gamma"] if __name__ == "__main__": import pytest pytest.main([os.path.abspath(__file__), "-v"])