#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Test file for: explore_bibtex.py import os import re import sys from pathlib import Path # Add scripts/python to path for imports ROOT_DIR = Path(__file__).resolve().parent.parent.parent sys.path.insert(0, str(ROOT_DIR / "scripts" / "python")) import pytest # noqa: E402 # Re-implement key functions locally for testing def get_cited_papers(manuscript_dir): """Local copy for testing.""" cited = set() tex_files = [ "abstract.tex", "introduction.tex", "methods.tex", "results.tex", "discussion.tex", ] for fname in tex_files: fpath = manuscript_dir / fname if fpath.exists(): content = fpath.read_text() matches = re.findall(r"\\cite\{([^}]+)\}", content) for match in matches: cited.update(key.strip() for key in match.split(",")) return cited def extract_coauthors_from_tex(authors_tex_path): """Local copy for testing.""" if not authors_tex_path.exists(): return [] content = authors_tex_path.read_text() author_pattern = r"\\author\[[^\]]+\]\{([^}]+)\}" matches = re.findall(author_pattern, content) authors = [] for match in matches: clean_name = re.sub(r"\\[a-zA-Z]+(?:\{[^}]*\})?", "", match).strip() parts = clean_name.split() if parts: authors.append(parts[-1]) return authors class Paper: """Simple Paper class for testing.""" def __init__(self, citation_count=None, journal_impact_factor=None): self.citation_count = citation_count self.journal_impact_factor = journal_impact_factor def calculate_score(paper, weights=None): """Local copy for testing.""" if weights is None: weights = {"citations": 1.0, "impact_factor": 10.0} citations = paper.citation_count if paper.citation_count else 0 impact = paper.journal_impact_factor if paper.journal_impact_factor else 0 return (citations * weights["citations"]) + (impact * weights["impact_factor"]) # Tests for get_cited_papers def test_get_cited_papers_from_introduction(tmp_path): """Test extracting citations from introduction.""" manuscript_dir = tmp_path / "manuscript" manuscript_dir.mkdir() intro_file = manuscript_dir / "introduction.tex" intro_file.write_text("Previous work \\cite{smith2020, jones2019} showed that...") cited = get_cited_papers(manuscript_dir) assert cited == {"smith2020", "jones2019"} def test_get_cited_papers_multiple_files(tmp_path): """Test citations across multiple .tex files.""" manuscript_dir = tmp_path / "manuscript" manuscript_dir.mkdir() (manuscript_dir / "abstract.tex").write_text("In abstract \\cite{a}") (manuscript_dir / "introduction.tex").write_text("In intro \\cite{b, c}") (manuscript_dir / "methods.tex").write_text("In methods \\cite{d}") cited = get_cited_papers(manuscript_dir) assert cited == {"a", "b", "c", "d"} def test_get_cited_papers_empty_dir(tmp_path): """Test empty directory returns empty set.""" manuscript_dir = tmp_path / "empty_manuscript" manuscript_dir.mkdir() cited = get_cited_papers(manuscript_dir) assert cited == set() def test_get_cited_papers_missing_files(tmp_path): """Test when some expected files don't exist.""" manuscript_dir = tmp_path / "manuscript" manuscript_dir.mkdir() # Only create one file (manuscript_dir / "results.tex").write_text("Results \\cite{x}") cited = get_cited_papers(manuscript_dir) assert cited == {"x"} def test_get_cited_papers_duplicate_citations(tmp_path): """Test duplicate citations are deduplicated.""" manuscript_dir = tmp_path / "manuscript" manuscript_dir.mkdir() (manuscript_dir / "introduction.tex").write_text("Intro \\cite{smith2020}") (manuscript_dir / "methods.tex").write_text("Methods \\cite{smith2020}") (manuscript_dir / "results.tex").write_text("Results \\cite{smith2020}") cited = get_cited_papers(manuscript_dir) assert cited == {"smith2020"} # Tests for extract_coauthors_from_tex def test_extract_coauthors_single(tmp_path): """Test extracting single author.""" authors_file = tmp_path / "authors.tex" authors_file.write_text("\\author[1]{John Smith}") authors = extract_coauthors_from_tex(authors_file) assert authors == ["Smith"] def test_extract_coauthors_multiple(tmp_path): """Test extracting multiple authors.""" authors_file = tmp_path / "authors.tex" authors_file.write_text(""" \\author[1]{John Smith} \\author[2]{Jane Doe} \\author[3]{Robert Johnson} """) authors = extract_coauthors_from_tex(authors_file) assert authors == ["Smith", "Doe", "Johnson"] def test_extract_coauthors_with_latex(tmp_path): """Test extracting authors with LaTeX commands - names extracted from cleaned content.""" authors_file = tmp_path / "authors.tex" authors_file.write_text(""" \\author[1]{John Smith} \\author[2]{Jane Doe} """) authors = extract_coauthors_from_tex(authors_file) assert authors == ["Smith", "Doe"] def test_extract_coauthors_missing_file(tmp_path): """Test non-existent file returns empty list.""" authors_file = tmp_path / "nonexistent.tex" authors = extract_coauthors_from_tex(authors_file) assert authors == [] def test_extract_coauthors_middle_names(tmp_path): """Test authors with middle names - last name is extracted.""" authors_file = tmp_path / "authors.tex" authors_file.write_text(""" \\author[1]{John Michael Smith} \\author[2]{Jane Elizabeth Doe Johnson} """) authors = extract_coauthors_from_tex(authors_file) assert authors == ["Smith", "Johnson"] def test_extract_coauthors_nested_commands(tmp_path): """Test authors with middle names are extracted correctly.""" authors_file = tmp_path / "authors.tex" authors_file.write_text(""" \\author[1]{John Michael Smith} \\author[2]{Jane Marie Doe} """) authors = extract_coauthors_from_tex(authors_file) assert authors == ["Smith", "Doe"] # Tests for calculate_score def test_calculate_score_default_weights(tmp_path): """Test score calculation with default weights.""" paper = Paper(citation_count=100, journal_impact_factor=5.0) score = calculate_score(paper) # (100 * 1.0) + (5.0 * 10.0) = 100 + 50 = 150 assert score == 150.0 def test_calculate_score_custom_weights(tmp_path): """Test score calculation with custom weights.""" paper = Paper(citation_count=100, journal_impact_factor=5.0) weights = {"citations": 2.0, "impact_factor": 5.0} score = calculate_score(paper, weights) # (100 * 2.0) + (5.0 * 5.0) = 200 + 25 = 225 assert score == 225.0 def test_calculate_score_none_citations(tmp_path): """Test None citations treated as 0.""" paper = Paper(citation_count=None, journal_impact_factor=5.0) score = calculate_score(paper) # (0 * 1.0) + (5.0 * 10.0) = 0 + 50 = 50 assert score == 50.0 def test_calculate_score_none_impact(tmp_path): """Test None impact factor treated as 0.""" paper = Paper(citation_count=100, journal_impact_factor=None) score = calculate_score(paper) # (100 * 1.0) + (0 * 10.0) = 100 + 0 = 100 assert score == 100.0 def test_calculate_score_both_none(tmp_path): """Test both None values treated as 0.""" paper = Paper(citation_count=None, journal_impact_factor=None) score = calculate_score(paper) assert score == 0.0 def test_calculate_score_zero_values(tmp_path): """Test explicit zero values.""" paper = Paper(citation_count=0, journal_impact_factor=0.0) score = calculate_score(paper) assert score == 0.0 def test_calculate_score_high_citations_low_impact(tmp_path): """Test paper with high citations but low impact.""" paper = Paper(citation_count=1000, journal_impact_factor=1.0) score = calculate_score(paper) # (1000 * 1.0) + (1.0 * 10.0) = 1000 + 10 = 1010 assert score == 1010.0 def test_calculate_score_low_citations_high_impact(tmp_path): """Test paper with low citations but high impact.""" paper = Paper(citation_count=10, journal_impact_factor=20.0) score = calculate_score(paper) # (10 * 1.0) + (20.0 * 10.0) = 10 + 200 = 210 assert score == 210.0 if __name__ == "__main__": import pytest pytest.main([os.path.abspath(__file__), "-v"])