#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Timestamp: 2026-01-27
# File: src/scitex_writer/bib.py
"""Bibliography management functions.
Usage::
import scitex_writer as sw
# List bib files
result = sw.bib.list_files("./my-paper")
# List entries
result = sw.bib.list_entries("./my-paper")
# Add an entry
sw.bib.add("./my-paper", "@article{Smith2024, ...}")
# Merge all bib files
sw.bib.merge("./my-paper")
"""
import re as _re
from typing import Optional as _Optional
from ._mcp.utils import resolve_project_path as _resolve_project_path
def list_files(project_dir: str) -> dict:
"""List all bibliography files in the project.
Args:
project_dir: Path to scitex-writer project.
Returns:
Dict with bibfiles list and count.
"""
try:
project_path = _resolve_project_path(project_dir)
bib_dir = project_path / "00_shared" / "bib_files"
if not bib_dir.exists():
return {"success": True, "bibfiles": [], "count": 0}
bibfiles = []
for bib_file in sorted(bib_dir.glob("*.bib")):
content = bib_file.read_text(encoding="utf-8")
entry_count = content.count("@")
bibfiles.append(
{
"name": bib_file.name,
"path": str(bib_file),
"entry_count": entry_count,
"is_merged": bib_file.name == "bibliography.bib",
}
)
return {"success": True, "bibfiles": bibfiles, "count": len(bibfiles)}
except Exception as e:
return {"success": False, "error": str(e)}
def list_entries(project_dir: str, bibfile: _Optional[str] = None) -> dict:
"""List all BibTeX entries in the project or specific file.
Args:
project_dir: Path to scitex-writer project.
bibfile: Specific bib file name (optional).
Returns:
Dict with entries list and count.
"""
try:
project_path = _resolve_project_path(project_dir)
bib_dir = project_path / "00_shared" / "bib_files"
if not bib_dir.exists():
return {"success": True, "entries": [], "count": 0}
entries = []
files_to_scan = [bib_dir / bibfile] if bibfile else list(bib_dir.glob("*.bib"))
for bib_file in files_to_scan:
if not bib_file.exists():
continue
content = bib_file.read_text(encoding="utf-8")
pattern = r"@(\w+)\{([^,\s]+)"
for match in _re.finditer(pattern, content):
entry_type, citation_key = match.groups()
entries.append(
{
"citation_key": citation_key,
"entry_type": entry_type,
"bibfile": bib_file.name,
}
)
return {"success": True, "entries": entries, "count": len(entries)}
except Exception as e:
return {"success": False, "error": str(e)}
def get(project_dir: str, citation_key: str) -> dict:
"""Get a specific BibTeX entry by citation key.
Args:
project_dir: Path to scitex-writer project.
citation_key: The citation key to find.
Returns:
Dict with entry content and bibfile path.
"""
try:
project_path = _resolve_project_path(project_dir)
bib_dir = project_path / "00_shared" / "bib_files"
if not bib_dir.exists():
return {"success": False, "error": "No bib_files directory found"}
for bib_file in bib_dir.glob("*.bib"):
content = bib_file.read_text(encoding="utf-8")
pattern = rf"(@\w+\{{{citation_key}\s*,.*?(?=\n@|\Z))"
match = _re.search(pattern, content, _re.DOTALL)
if match:
return {
"success": True,
"citation_key": citation_key,
"bibfile": str(bib_file),
"entry": match.group(1).strip(),
}
return {"success": False, "error": f"Citation key not found: {citation_key}"}
except Exception as e:
return {"success": False, "error": str(e)}
def add(
project_dir: str,
bibtex_entry: str,
bibfile: str = "custom.bib",
deduplicate: bool = True,
) -> dict:
"""Add a BibTeX entry to a bibliography file.
Args:
project_dir: Path to scitex-writer project.
bibtex_entry: The BibTeX entry to add.
bibfile: Target bib file name (default: custom.bib).
deduplicate: Check for existing entry with same key.
Returns:
Dict with bibfile path and citation_key.
"""
try:
project_path = _resolve_project_path(project_dir)
bib_dir = project_path / "00_shared" / "bib_files"
bib_dir.mkdir(parents=True, exist_ok=True)
key_match = _re.search(r"@\w+\{([^,\s]+)", bibtex_entry)
if not key_match:
return {"success": False, "error": "Could not parse citation key"}
citation_key = key_match.group(1)
if deduplicate:
existing = get(project_dir, citation_key)
if existing.get("success"):
return {
"success": False,
"error": f"Duplicate citation key: {citation_key}",
"existing_file": existing.get("bibfile"),
}
bib_path = bib_dir / bibfile
if bib_path.exists():
current_content = bib_path.read_text(encoding="utf-8")
if not current_content.endswith("\n"):
current_content += "\n"
new_content = current_content + "\n" + bibtex_entry.strip() + "\n"
else:
new_content = bibtex_entry.strip() + "\n"
bib_path.write_text(new_content, encoding="utf-8")
return {
"success": True,
"bibfile": str(bib_path),
"citation_key": citation_key,
}
except Exception as e:
return {"success": False, "error": str(e)}
def remove(project_dir: str, citation_key: str) -> dict:
"""Remove a BibTeX entry by citation key.
Args:
project_dir: Path to scitex-writer project.
citation_key: The citation key to remove.
Returns:
Dict with removed_from path.
"""
try:
project_path = _resolve_project_path(project_dir)
bib_dir = project_path / "00_shared" / "bib_files"
if not bib_dir.exists():
return {"success": False, "error": "No bib_files directory found"}
for bib_file in bib_dir.glob("*.bib"):
content = bib_file.read_text(encoding="utf-8")
pattern = rf"@\w+\{{{citation_key}\s*,.*?(?=\n@|\Z)"
match = _re.search(pattern, content, _re.DOTALL)
if match:
new_content = content[: match.start()] + content[match.end() :]
new_content = _re.sub(r"\n{3,}", "\n\n", new_content).strip() + "\n"
bib_file.write_text(new_content, encoding="utf-8")
return {
"success": True,
"citation_key": citation_key,
"removed_from": str(bib_file),
}
return {"success": False, "error": f"Citation key not found: {citation_key}"}
except Exception as e:
return {"success": False, "error": str(e)}
def merge(
project_dir: str,
output_file: str = "bibliography.bib",
deduplicate: bool = True,
) -> dict:
"""Merge all .bib files into one, with optional deduplication.
Args:
project_dir: Path to scitex-writer project.
output_file: Output filename (default: bibliography.bib).
deduplicate: Skip duplicate citation keys.
Returns:
Dict with entry_count and duplicates_skipped.
"""
try:
project_path = _resolve_project_path(project_dir)
bib_dir = project_path / "00_shared" / "bib_files"
if not bib_dir.exists():
return {"success": False, "error": "No bib_files directory found"}
output_path = bib_dir / output_file
seen_keys = set()
merged_entries = []
duplicates = []
for bib_file in sorted(bib_dir.glob("*.bib")):
if bib_file.name == output_file:
continue
content = bib_file.read_text(encoding="utf-8")
entries = _re.findall(r"(@\w+\{[^@]*)", content, _re.DOTALL)
for entry in entries:
entry = entry.strip()
if not entry:
continue
key_match = _re.search(r"@\w+\{([^,\s]+)", entry)
if not key_match:
continue
citation_key = key_match.group(1)
if deduplicate and citation_key in seen_keys:
duplicates.append({"key": citation_key, "file": bib_file.name})
continue
seen_keys.add(citation_key)
merged_entries.append(entry)
output_content = "\n\n".join(merged_entries) + "\n"
output_path.write_text(output_content, encoding="utf-8")
return {
"success": True,
"output_file": str(output_path),
"entry_count": len(merged_entries),
"duplicates_skipped": len(duplicates),
}
except Exception as e:
return {"success": False, "error": str(e)}
__all__ = ["list_files", "list_entries", "get", "add", "remove", "merge"]
# EOF