#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File: scripts/python/check_float_order.py
# Purpose: Validate and auto-renumber figure/table reference ordering in LaTeX manuscripts
# Usage:
# python check_float_order.py [project_dir] [--fix] [--doc-type manuscript|supplementary]
#
# Checks that figures and tables are referenced in numerical order in the text.
# With --fix, renumbers files and updates all \ref{} and \label{} to match appearance order.
import argparse
import os
import re
import shutil
import sys
from collections import OrderedDict
from pathlib import Path
# ANSI colors
GREEN = "\033[0;32m"
YELLOW = "\033[1;33m"
RED = "\033[0;31m"
DIM = "\033[0;90m"
BOLD = "\033[1m"
NC = "\033[0m"
def find_references(content_dir, float_type):
"""Find all \\ref{fig:*} or \\ref{tab:*} in text files, in order of appearance.
Parameters
----------
content_dir : Path
Directory containing .tex content files.
float_type : str
'fig' or 'tab'.
Returns
-------
list of tuple
(label_key, file, line_number) in order of first appearance.
"""
# Read order: follow typical IMRAD structure
section_order = [
"abstract",
"introduction",
"results",
"methods",
"star_methods",
"discussion",
"additional_info",
"data_availability",
"bigger_picture",
"highlights",
"graphical_abstract",
]
tex_files = []
for name in section_order:
p = content_dir / f"{name}.tex"
if p.exists():
tex_files.append(p)
# Add any remaining .tex files not in the order list
for p in sorted(content_dir.glob("*.tex")):
if p not in tex_files:
tex_files.append(p)
pattern = re.compile(r"\\ref\{" + float_type + r":([^}]+)\}")
seen = OrderedDict()
refs = []
for tex_file in tex_files:
text = tex_file.read_text(encoding="utf-8")
for line_no, line in enumerate(text.splitlines(), 1):
for m in pattern.finditer(line):
key = m.group(1)
if key not in seen:
seen[key] = (tex_file.name, line_no)
refs.append((key, tex_file.name, line_no))
return refs
def find_labels(content_dir, float_type):
"""Find all \\label{fig:*} or \\label{tab:*} definitions.
Searches both caption files in figures/tables subdirs and inline in content .tex files.
Returns
-------
dict
label_key -> {'file': Path, 'line': int, 'source': 'caption_file'|'inline'}
"""
labels = {}
prefix = float_type
# Search caption files
if float_type == "fig":
media_dir = content_dir / "figures" / "caption_and_media"
else:
media_dir = content_dir / "tables" / "caption_and_media"
if media_dir.exists():
for tex_file in media_dir.glob("[0-9]*.tex"):
text = tex_file.read_text(encoding="utf-8")
for line_no, line in enumerate(text.splitlines(), 1):
m = re.search(r"\\label\{" + prefix + r":([^}]+)\}", line)
if m:
labels[m.group(1)] = {
"file": tex_file,
"line": line_no,
"source": "caption_file",
}
# Search inline in content .tex files
for tex_file in content_dir.glob("*.tex"):
text = tex_file.read_text(encoding="utf-8")
for line_no, line in enumerate(text.splitlines(), 1):
m = re.search(r"\\label\{" + prefix + r":([^}]+)\}", line)
if m:
key = m.group(1)
if key not in labels:
labels[key] = {
"file": tex_file,
"line": line_no,
"source": "inline",
}
return labels
def extract_number_and_name(key):
"""Extract numeric prefix and descriptive name from a key like '04_modules'.
Returns (4, 'modules') or (None, key) if no numeric prefix.
"""
m = re.match(r"^(\d+)_(.+)$", key)
if m:
return int(m.group(1)), m.group(2)
m = re.match(r"^(\d+)$", key)
if m:
return int(m.group(1)), ""
return None, key
def check_order(content_dir, float_type, label):
"""Check if references appear in numerical order.
Returns
-------
tuple
(is_ok, refs, desired_mapping)
refs: list of (key, file, line)
desired_mapping: dict of old_key -> new_key if reordering needed
"""
refs = find_references(content_dir, float_type)
labels = find_labels(content_dir, float_type)
if not refs:
print(f" {GREEN}[PASS]{NC} {label} references - none found")
return True, refs, {}
# Check if numbered keys appear in order
numbered_refs = []
for key, fname, line in refs:
num, name = extract_number_and_name(key)
if num is not None:
numbered_refs.append((num, name, key, fname, line))
if not numbered_refs:
print(f" {GREEN}[PASS]{NC} {label} references - no numbered references")
return True, refs, {}
# Check sequential ordering
numbers = [n for n, _, _, _, _ in numbered_refs]
is_sequential = all(numbers[i] < numbers[i + 1] for i in range(len(numbers) - 1))
if is_sequential:
# Check if numbering starts at 01 and is contiguous
expected = list(range(1, len(numbered_refs) + 1))
actual = numbers
if actual == expected:
print(
f" {GREEN}[PASS]{NC} {label} reference order (1..{len(numbered_refs)})"
)
else:
print(
f" {YELLOW}[WARN]{NC} {label} references sequential but not contiguous: {numbers}"
)
return True, refs, {}
# Out of order - build mapping
print(f" {RED}[FAIL]{NC} {label} reference order")
desired_mapping = {}
for new_num_0, (old_num, name, old_key, fname, line) in enumerate(numbered_refs):
new_num = new_num_0 + 1
new_key = f"{new_num:02d}_{name}" if name else f"{new_num:02d}"
if old_key != new_key:
desired_mapping[old_key] = new_key
print(
f" {DIM}{fname}:{line}: "
f"\\ref{{{float_type}:{old_key}}} -> should be {new_num:02d}{NC}"
)
# Report orphaned labels (defined but never referenced)
ref_keys = {key for key, _, _ in refs}
for label_key, info in labels.items():
if label_key not in ref_keys:
print(
f" {YELLOW}[WARN]{NC} \\label{{{float_type}:{label_key}}} "
f"defined in {info['file'].name}:{info['line']} but never referenced"
)
return False, refs, desired_mapping
def apply_fix(content_dir, float_type, mapping, dry_run=False):
"""Rename files and update all \\ref{} and \\label{} in place.
Parameters
----------
content_dir : Path
float_type : str
'fig' or 'tab'
mapping : dict
old_key -> new_key
dry_run : bool
If True, only print what would be done.
"""
if not mapping:
return
prefix = float_type
# 1. Collect all .tex files that may contain references
all_tex_files = list(content_dir.glob("*.tex"))
if float_type == "fig":
media_dir = content_dir / "figures" / "caption_and_media"
else:
media_dir = content_dir / "tables" / "caption_and_media"
if media_dir.exists():
all_tex_files.extend(media_dir.glob("*.tex"))
# Also check the parent manuscript .tex files
doc_dir = content_dir.parent
for tex in doc_dir.glob("*.tex"):
all_tex_files.append(tex)
all_tex_files = list(set(all_tex_files))
# 2. Text replacements in all .tex files
# Use intermediate keys to avoid collision (old_key -> __TEMP_N__ -> new_key)
temp_keys = {old: f"__REORDER_TEMP_{i}__" for i, old in enumerate(mapping)}
for tex_file in all_tex_files:
text = tex_file.read_text(encoding="utf-8")
original = text
# Pass 1: old -> temp
for old_key, temp_key in temp_keys.items():
text = text.replace(f"{prefix}:{old_key}", f"{prefix}:{temp_key}")
# Pass 2: temp -> new
for old_key, new_key in mapping.items():
temp_key = temp_keys[old_key]
text = text.replace(f"{prefix}:{temp_key}", f"{prefix}:{new_key}")
if text != original:
action = "Would update" if dry_run else "Updated"
print(f" {action} references in {tex_file.name}")
if not dry_run:
tex_file.write_text(text, encoding="utf-8")
# 3. Rename media files (caption .tex, images, CSV, etc.)
if not media_dir or not media_dir.exists():
return
# Build file rename map using temp names to avoid collision
renames = [] # (old_path, temp_path, new_path)
for old_key, new_key in mapping.items():
old_prefix_str = old_key # e.g., "04_modules"
new_prefix_str = new_key # e.g., "01_modules"
for f in media_dir.iterdir():
if f.is_dir():
continue
fname = f.name
stem = f.stem
if stem == old_prefix_str or fname.startswith(old_prefix_str + "."):
new_name = fname.replace(old_prefix_str, new_prefix_str, 1)
temp_name = fname.replace(old_prefix_str, f"__TEMP_{old_key}__", 1)
renames.append((f, media_dir / temp_name, media_dir / new_name))
# Also check symlinks in jpg_for_compilation
jpg_dir = media_dir / "jpg_for_compilation"
if jpg_dir.exists():
for f in jpg_dir.iterdir():
fname = f.name
if fname.startswith(old_prefix_str + ".") or f.stem == old_prefix_str:
new_name = fname.replace(old_prefix_str, new_prefix_str, 1)
temp_name = fname.replace(old_prefix_str, f"__TEMP_{old_key}__", 1)
renames.append((f, jpg_dir / temp_name, jpg_dir / new_name))
if renames:
# Pass 1: old -> temp
for old_path, temp_path, _ in renames:
if old_path.exists():
action = "Would rename" if dry_run else "Renamed"
if not dry_run:
old_path.rename(temp_path)
# Pass 2: temp -> new
for _, temp_path, new_path in renames:
if temp_path.exists():
action = "Would rename" if dry_run else "Renamed"
print(f" {action} {temp_path.parent.name}/{new_path.name}")
if not dry_run:
temp_path.rename(new_path)
def main():
parser = argparse.ArgumentParser(
description="Check and fix figure/table reference ordering in LaTeX manuscripts"
)
parser.add_argument(
"project_dir",
nargs="?",
default=".",
help="Project root directory (default: current directory)",
)
parser.add_argument(
"--fix",
action="store_true",
help="Auto-renumber files and update references to match appearance order",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show what --fix would do without making changes",
)
parser.add_argument(
"--doc-type",
choices=["manuscript", "supplementary", "all"],
default="all",
help="Which document type to check (default: all)",
)
args = parser.parse_args()
project_dir = Path(args.project_dir).resolve()
doc_types = []
if args.doc_type in ("manuscript", "all"):
d = project_dir / "01_manuscript" / "contents"
if d.exists():
doc_types.append(("manuscript", d))
if args.doc_type in ("supplementary", "all"):
d = project_dir / "02_supplementary" / "contents"
if d.exists():
doc_types.append(("supplementary", d))
if not doc_types:
print(f"{RED}No content directories found in {project_dir}{NC}")
sys.exit(1)
print(f"\n{BOLD}=== Float Reference Order Check ==={NC}\n")
has_issues = False
all_mappings = []
for doc_label, content_dir in doc_types:
for float_type, float_label in [("fig", "Figure"), ("tab", "Table")]:
label = f"{float_label} ({doc_label})"
ok, refs, mapping = check_order(content_dir, float_type, label)
if not ok:
has_issues = True
if mapping:
all_mappings.append((content_dir, float_type, mapping, label))
print()
if not has_issues:
print(f"{GREEN}All float references are in order.{NC}")
return 0
if args.fix or args.dry_run:
mode = "DRY RUN" if args.dry_run else "FIXING"
print(f"{BOLD}--- {mode} ---{NC}\n")
for content_dir, float_type, mapping, label in all_mappings:
print(f" {label}: renumbering {len(mapping)} items")
for old, new in mapping.items():
print(f" {old} -> {new}")
apply_fix(content_dir, float_type, mapping, dry_run=args.dry_run)
print()
if not args.dry_run:
print(f"{GREEN}Renumbering complete. Re-run to verify.{NC}")
return 0
else:
print(f"{YELLOW}Run with --fix to auto-renumber, or --dry-run to preview.{NC}")
return 1
if __name__ == "__main__":
sys.exit(main())