#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Timestamp: "2025-05-06 20:20:52 (ywatanabe)" # File: /home/ywatanabe/proj/SciTex/manuscript/scripts/python/pptx2tif.py # ---------------------------------------- import os __FILE__ = "./manuscript/scripts/python/pptx2tif.py" __DIR__ = os.path.dirname(__FILE__) # ---------------------------------------- """ PowerPoint to TIF Conversion Utility This script converts PowerPoint presentations (.pptx) to TIF images, optimized for inclusion in scientific manuscripts. """ import argparse import subprocess import sys import tempfile from pathlib import Path from typing import List, Optional, Union # Optional imports that might not be installed LIBREOFFICE_AVAILABLE = False PYTHON_PPT_AVAILABLE = False PIL_AVAILABLE = False try: # Check for python-pptx package from pptx import Presentation PYTHON_PPT_AVAILABLE = True except ImportError: pass try: # Check for PIL/Pillow from PIL import Image PIL_AVAILABLE = True except ImportError: pass def check_libreoffice_installed() -> bool: """Check if LibreOffice is installed.""" try: result = subprocess.run( ["which", "libreoffice"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, ) return result.returncode == 0 except Exception: return False def convert_pptx_to_tif_libreoffice( input_path: str, output_dir: Optional[str] = None, resolution: int = 300, verbose: bool = False, ) -> List[str]: """ Convert a PowerPoint file to TIF using LibreOffice. Args: input_path: Path to the PowerPoint file output_dir: Directory to save output files (defaults to same directory as input) resolution: Image resolution in DPI verbose: Whether to print detailed information Returns: List of generated TIF file paths """ if not os.path.exists(input_path): raise FileNotFoundError(f"PowerPoint file not found: {input_path}") # Set output directory if output_dir is None: output_dir = os.path.dirname(os.path.abspath(input_path)) elif not os.path.exists(output_dir): os.makedirs(output_dir) input_path = os.path.abspath(input_path) output_dir = os.path.abspath(output_dir) # Get the base name without extension base_name = os.path.splitext(os.path.basename(input_path))[0] # Create a temporary directory for conversion with tempfile.TemporaryDirectory() as temp_dir: # Convert to TIF using LibreOffice if verbose: print(f"Converting {input_path} to TIF using LibreOffice...") cmd = [ "libreoffice", "--headless", "--convert-to", "tiff", "--outdir", temp_dir, input_path, ] try: result = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) if result.returncode != 0: raise RuntimeError(f"LibreOffice conversion failed: {result.stderr}") if verbose: print(result.stdout) # Find generated files tif_files = [ os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.lower().endswith((".tif", ".tiff")) ] if not tif_files: raise FileNotFoundError("No TIF files were generated during conversion") # Move files to output directory with proper naming output_files = [] for i, tif_file in enumerate(tif_files): # For single slide presentations, use the base name # For multi-slide presentations, append slide number if len(tif_files) == 1: output_name = f"{base_name}.tif" else: output_name = f"{base_name}_slide_{i + 1}.tif" output_path = os.path.join(output_dir, output_name) # Copy file to output directory with ( open(tif_file, "rb") as contents_file, open(output_path, "wb") as dst_file, ): dst_file.write(contents_file.read()) output_files.append(output_path) if verbose: print(f"Saved: {output_path}") return output_files except Exception as e: raise RuntimeError(f"Error during LibreOffice conversion: {str(e)}") def convert_pptx_to_tif_python( input_path: str, output_dir: Optional[str] = None, resolution: int = 300, verbose: bool = False, ) -> List[str]: """ Convert a PowerPoint file to TIF using python-pptx and PIL. Note: This method has limitations and may not work for all presentations. Args: input_path: Path to the PowerPoint file output_dir: Directory to save output files (defaults to same directory as input) resolution: Image resolution in DPI verbose: Whether to print detailed information Returns: List of generated TIF file paths """ if not PYTHON_PPT_AVAILABLE: raise ImportError("python-pptx package is not installed") if not PIL_AVAILABLE: raise ImportError("PIL/Pillow package is not installed") if not os.path.exists(input_path): raise FileNotFoundError(f"PowerPoint file not found: {input_path}") # Set output directory if output_dir is None: output_dir = os.path.dirname(os.path.abspath(input_path)) elif not os.path.exists(output_dir): os.makedirs(output_dir) # Get the base name without extension base_name = os.path.splitext(os.path.basename(input_path))[0] # Load the presentation if verbose: print(f"Opening PowerPoint file: {input_path}") prs = Presentation(input_path) output_files = [] # Convert each slide for i, slide in enumerate(prs.slides): if verbose: print(f"Processing slide {i + 1}/{len(prs.slides)}") # For multi-slide presentations, append slide number # For single slide presentations, use the base name if len(prs.slides) == 1: output_name = f"{base_name}.tif" else: output_name = f"{base_name}_slide_{i + 1}.tif" output_path = os.path.join(output_dir, output_name) # This is a placeholder - python-pptx cannot directly render slides # We would need an additional library to render the slides # Instead, we'll just output a message print( f"Warning: Python-only conversion is limited. Slide {i + 1} would be saved to {output_path}" ) print( "For better results, please install LibreOffice or use the LibreOffice conversion method" ) # Add to output_files even though we're not creating the file output_files.append(output_path) return output_files def convert_pptx_to_tif( input_path: Union[str, Path], output_dir: Optional[Union[str, Path]] = None, method: str = "auto", resolution: int = 300, crop_whitespace: bool = True, margin: int = 30, verbose: bool = False, ) -> List[str]: """ Convert a PowerPoint file to TIF using the best available method. Args: input_path: Path to the PowerPoint file output_dir: Directory to save output files (defaults to same directory as input) method: Conversion method ('libreoffice', 'python', or 'auto') resolution: Image resolution in DPI crop_whitespace: Whether to crop excess whitespace margin: Margin in pixels to add around the content area when cropping verbose: Whether to print detailed information Returns: List of generated TIF file paths """ # Convert paths to strings if they're Path objects if isinstance(input_path, Path): input_path = str(input_path) if output_dir is not None and isinstance(output_dir, Path): output_dir = str(output_dir) # Determine the best method to use if method == "auto": if check_libreoffice_installed(): method = "libreoffice" if verbose: print("Using LibreOffice for conversion") elif PYTHON_PPT_AVAILABLE and PIL_AVAILABLE: method = "python" if verbose: print("Using python-pptx for conversion (limited functionality)") else: raise RuntimeError( "No suitable conversion method available. Please install LibreOffice or " "the python-pptx and Pillow packages." ) # Perform the conversion if method == "libreoffice": output_files = convert_pptx_to_tif_libreoffice( input_path, output_dir, resolution, verbose ) elif method == "python": output_files = convert_pptx_to_tif_python( input_path, output_dir, resolution, verbose ) else: raise ValueError(f"Unknown conversion method: {method}") # Crop whitespace if requested if crop_whitespace and output_files: if verbose: print("\nCropping whitespace from generated images...") # Import crop_tif dynamically to avoid circular imports try: from crop_tif import crop_tif for tif_file in output_files: if verbose: print(f"Cropping: {tif_file}") try: crop_tif(tif_file, tif_file, margin, True, verbose) except Exception as e: print(f"Warning: Failed to crop {tif_file}: {e}") except ImportError: print( "Warning: crop_tif module not available. Skipping whitespace cropping." ) return output_files def batch_convert_pptx_to_tif( directory: Union[str, Path], output_dir: Optional[Union[str, Path]] = None, method: str = "auto", resolution: int = 300, crop_whitespace: bool = True, margin: int = 30, recursive: bool = False, verbose: bool = False, ) -> List[str]: """ Convert all PowerPoint files in a directory to TIF. Args: directory: Directory containing PowerPoint files output_dir: Directory to save output files (defaults to same as input) method: Conversion method ('libreoffice', 'python', or 'auto') resolution: Image resolution in DPI crop_whitespace: Whether to crop excess whitespace margin: Margin in pixels to add around the content area when cropping recursive: Whether to process subdirectories verbose: Whether to print detailed information Returns: List of generated TIF file paths """ # Convert paths to strings if they're Path objects if isinstance(directory, Path): directory = str(directory) if output_dir is not None and isinstance(output_dir, Path): output_dir = str(output_dir) if not os.path.isdir(directory): raise ValueError(f"Directory not found: {directory}") # Get the list of PowerPoint files pptx_files = [] if recursive: for root, _, filenames in os.walk(directory): for filename in filenames: if filename.lower().endswith((".ppt", ".pptx")): pptx_files.append(os.path.join(root, filename)) else: pptx_files = [ os.path.join(directory, f) for f in os.listdir(directory) if f.lower().endswith((".ppt", ".pptx")) ] if not pptx_files: print(f"No PowerPoint files found in {directory}") return [] # Process each file all_output_files = [] for pptx_file in pptx_files: if verbose: print(f"\nProcessing: {pptx_file}") # Determine output directory if output_dir is None: file_output_dir = os.path.dirname(pptx_file) else: rel_path = os.path.relpath(os.path.dirname(pptx_file), directory) file_output_dir = os.path.join(output_dir, rel_path) # Create the directory if it doesn't exist if not os.path.exists(file_output_dir): os.makedirs(file_output_dir) # Convert the file try: output_files = convert_pptx_to_tif( pptx_file, file_output_dir, method, resolution, crop_whitespace, margin, verbose, ) all_output_files.extend(output_files) except Exception as e: print(f"Error processing {pptx_file}: {e}") return all_output_files def main(): """Parse command-line arguments and execute the appropriate action.""" # Set up argument parser parser = argparse.ArgumentParser( description="Convert PowerPoint files to TIF format.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # Add subparsers for the different modes subparsers = parser.add_subparsers(dest="mode", help="Operation mode") # Single file mode file_parser = subparsers.add_parser("file", help="Process a single file") file_parser.add_argument( "-i", "--input", required=True, help="Input PowerPoint file path" ) file_parser.add_argument( "-o", "--output-dir", help="Output directory for TIF files" ) # Batch mode batch_parser = subparsers.add_parser("batch", help="Process multiple files") batch_parser.add_argument( "-d", "--directory", required=True, help="Directory containing PowerPoint files", ) batch_parser.add_argument( "-o", "--output-dir", help="Output directory for TIF files" ) batch_parser.add_argument( "-r", "--recursive", action="store_true", help="Process subdirectories recursively", ) # Common arguments for subparser in [file_parser, batch_parser]: subparser.add_argument( "--method", choices=["auto", "libreoffice", "python"], default="auto", help="Conversion method to use", ) subparser.add_argument( "--resolution", type=int, default=300, help="Output image resolution (DPI)", ) subparser.add_argument( "--no-crop", action="store_true", help="Disable automatic cropping of whitespace", ) subparser.add_argument( "--margin", type=int, default=30, help="Margin size around the content area when cropping", ) subparser.add_argument( "-v", "--verbose", action="store_true", help="Enable verbose output", ) # Parse arguments args = parser.parse_args() # Execute the appropriate action if args.mode == "file": try: output_files = convert_pptx_to_tif( args.input, args.output_dir, args.method, args.resolution, not args.no_crop, args.margin, args.verbose, ) print(f"\nConversion complete. Generated {len(output_files)} TIF file(s).") except Exception as e: print(f"Error: {e}") sys.exit(1) elif args.mode == "batch": try: output_files = batch_convert_pptx_to_tif( args.directory, args.output_dir, args.method, args.resolution, not args.no_crop, args.margin, args.recursive, args.verbose, ) print( f"\nBatch conversion complete. Generated {len(output_files)} TIF file(s)." ) except Exception as e: print(f"Error: {e}") sys.exit(1) else: parser.print_help() if __name__ == "__main__": main() # EOF