Loading...
No commits yet
Not committed History
Blame
_arxiv_compliance.py • 3.7 KB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File: src/scitex_writer/export/_arxiv_compliance.py

"""Compliance checking for arXiv submission.

Pure functions to validate manuscript metadata and LaTeX structure
against arXiv submission requirements.  No Django or ORM dependencies.
"""

import re
from typing import Any, Dict

# arXiv limits
MAX_TITLE_LENGTH = 256
MAX_ABSTRACT_LENGTH = 1920
MIN_ABSTRACT_LENGTH = 100


def check_compliance(
    title: str,
    abstract: str,
    latex_content: str,
) -> Dict[str, Any]:
    """Check manuscript compliance with arXiv requirements.

    Args:
        title: Manuscript title.
        abstract: Manuscript abstract.
        latex_content: Full LaTeX source.

    Returns:
        Dict with keys: is_compliant (bool), errors (list), warnings (list),
        checks (dict of sub-check results).
    """
    result = {
        "is_compliant": True,
        "errors": [],
        "warnings": [],
        "checks": {},
    }

    title_check = _check_title(title)
    result["checks"]["title"] = title_check
    if not title_check["passed"]:
        result["errors"].extend(title_check["errors"])
        result["is_compliant"] = False

    abstract_check = _check_abstract(abstract)
    result["checks"]["abstract"] = abstract_check
    if not abstract_check["passed"]:
        result["errors"].extend(abstract_check["errors"])
        result["is_compliant"] = False

    latex_check = check_latex_structure(latex_content)
    result["checks"]["latex"] = latex_check
    if not latex_check["passed"]:
        result["errors"].extend(latex_check["errors"])
        result["is_compliant"] = False

    return result


def _check_title(title: str) -> Dict[str, Any]:
    """Check title compliance."""
    errors = []
    warnings = []

    if not title or not title.strip():
        errors.append("Title is required")
    elif len(title) > MAX_TITLE_LENGTH:
        errors.append(f"Title exceeds maximum length of {MAX_TITLE_LENGTH} characters")

    if title and len(title) < 10:
        warnings.append("Title is very short")

    return {"passed": len(errors) == 0, "errors": errors, "warnings": warnings}


def _check_abstract(abstract: str) -> Dict[str, Any]:
    """Check abstract compliance."""
    errors = []
    warnings = []

    if not abstract or not abstract.strip():
        errors.append("Abstract is required")
    else:
        if len(abstract) > MAX_ABSTRACT_LENGTH:
            errors.append(
                f"Abstract exceeds maximum length of {MAX_ABSTRACT_LENGTH} characters"
            )
        elif len(abstract) < MIN_ABSTRACT_LENGTH:
            warnings.append(
                f"Abstract is shorter than recommended minimum of {MIN_ABSTRACT_LENGTH} characters"
            )

    return {"passed": len(errors) == 0, "errors": errors, "warnings": warnings}


def check_latex_structure(latex_content: str) -> Dict[str, Any]:
    """Check LaTeX structure compliance.

    Args:
        latex_content: Full LaTeX source string.

    Returns:
        Dict with passed (bool), errors (list), warnings (list).
    """
    errors = []
    warnings = []

    if not re.search(r"\\documentclass", latex_content):
        errors.append("Missing \\documentclass declaration")

    if not re.search(r"\\begin\{document\}", latex_content):
        errors.append("Missing \\begin{document}")
    if not re.search(r"\\end\{document\}", latex_content):
        errors.append("Missing \\end{document}")

    if not re.search(r"\\title\{", latex_content):
        warnings.append("Missing \\title command")
    if not re.search(r"\\author\{", latex_content):
        warnings.append("Missing \\author command")

    return {"passed": len(errors) == 0, "errors": errors, "warnings": warnings}


# EOF