#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ TIF Image Cropping Utility This script provides functionality for cropping TIF images by automatically detecting content areas and removing excess whitespace. """ import argparse import os from typing import Optional, Tuple import cv2 import numpy as np def find_content_area(image_path: str) -> Tuple[int, int, int, int]: """ Find the bounding box of the content area in an image. Args: image_path: Path to the image file Returns: Tuple containing (x, y, width, height) of the content area Raises: FileNotFoundError: If the image cannot be read """ # Read the image img = cv2.imread(image_path) if img is None: raise FileNotFoundError(f"Unable to read image file: {image_path}") # Convert to grayscale gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Apply adaptive thresholding for better detection of content # This works better for images with varying brightness thresh = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2 ) # Alternative method: standard thresholding # _, thresh = cv2.threshold(gray, 250, 255, cv2.THRESH_BINARY_INV) # Find all non-zero points (colored or non-white pixels) points = cv2.findNonZero(thresh) # Get the bounding rectangle around the non-zero points if points is not None: x, y, w, h = cv2.boundingRect(points) return x, y, w, h else: # If no points found, return the whole image h, w = img.shape[:2] return 0, 0, w, h def resize_image( img: np.ndarray, target_dpi: int = 600, max_width: int = 2000, max_height: int = 2000, verbose: bool = False, ) -> np.ndarray: """ Resize an image to meet target DPI and maximum dimensions constraints. Args: img: Input image as numpy array target_dpi: Target DPI (dots per inch) for the image max_width: Maximum width in pixels max_height: Maximum height in pixels verbose: Whether to print detailed information Returns: Resized image as numpy array """ height, width = img.shape[:2] # Calculate if resizing is needed if width > max_width or height > max_height: # Determine the scaling factor to fit within max dimensions scale_factor = min(max_width / width, max_height / height) # Calculate new dimensions new_width = int(width * scale_factor) new_height = int(height * scale_factor) if verbose: print(f"Resizing image from {width}x{height} to {new_width}x{new_height}") print(f"Scale factor: {scale_factor:.2f}") # Resize the image resized_img = cv2.resize( img, (new_width, new_height), interpolation=cv2.INTER_AREA ) return resized_img else: if verbose: print( f"No resizing needed, image dimensions {width}x{height} are within limits" ) return img def crop_tif( input_path: str, output_path: Optional[str] = None, margin: int = 30, resize: bool = True, target_dpi: int = 600, max_width: int = 2000, max_height: int = 2000, overwrite: bool = False, verbose: bool = False, ) -> None: """ Crop a TIF image to its content area with a specified margin and optionally resize it. Args: input_path: Path to the input TIF image output_path: Path to save the cropped image (defaults to input_path if overwrite=True) margin: Margin in pixels to add around the content area resize: Whether to resize the image to meet target constraints target_dpi: Target DPI for the resized image max_width: Maximum width in pixels for the resized image max_height: Maximum height in pixels for the resized image overwrite: Whether to overwrite the input file verbose: Whether to print detailed information Raises: FileNotFoundError: If the input image cannot be read ValueError: If output path is not specified and overwrite is False """ # Determine output path if output_path is None: if overwrite: output_path = input_path else: raise ValueError("output_path must be specified if overwrite=False") # Read the image img = cv2.imread(input_path) if img is None: raise FileNotFoundError(f"Unable to read image file: {input_path}") original_height, original_width = img.shape[:2] if verbose: print(f"Original image dimensions: {original_width}x{original_height}") # Find the content area x, y, w, h = find_content_area(input_path) if verbose: print(f"Content area detected at: x={x}, y={y}, width={w}, height={h}") # Calculate the coordinates with margin, clamping to the image boundaries x_start = max(x - margin, 0) y_start = max(y - margin, 0) x_end = min(x + w + margin, img.shape[1]) y_end = min(y + h + margin, img.shape[0]) if verbose: print(f"Cropping to: x={x_start}:{x_end}, y={y_start}:{y_end}") print(f"New dimensions: {x_end - x_start}x{y_end - y_start}") # Crop the image using the bounding rectangle with margin cropped_img = img[y_start:y_end, x_start:x_end] # Resize the image if requested if resize: cropped_img = resize_image( cropped_img, target_dpi, max_width, max_height, verbose ) # Save the processed image cv2.imwrite(output_path, cropped_img) # Calculate space saved final_height, final_width = cropped_img.shape[:2] area_reduction = 1 - ( (final_width * final_height) / (original_width * original_height) ) area_reduction_pct = area_reduction * 100 print(f"Image processed: {input_path}") print( f"Size changed from {original_width}x{original_height} to {final_width}x{final_height}" ) print(f"Saved {area_reduction_pct:.1f}% of the original area") if output_path != input_path: print(f"Saved to: {output_path}") def batch_crop_tifs( directory: str, output_directory: Optional[str] = None, margin: int = 30, resize: bool = True, target_dpi: int = 600, max_width: int = 2000, max_height: int = 2000, recursive: bool = False, verbose: bool = False, ) -> None: """ Process all TIF files in a directory. Args: directory: Directory containing TIF files to process output_directory: Directory to save processed files (defaults to same as input) margin: Margin to add around the content area resize: Whether to resize images to meet target constraints target_dpi: Target DPI for resized images max_width: Maximum width in pixels for resized images max_height: Maximum height in pixels for resized images recursive: Whether to process subdirectories verbose: Whether to print detailed information """ if not os.path.isdir(directory): raise ValueError(f"Directory not found: {directory}") # Create output directory if it doesn't exist if output_directory and not os.path.exists(output_directory): os.makedirs(output_directory) # Get the list of TIF files files = [] if recursive: for root, _, filenames in os.walk(directory): for filename in filenames: if filename.lower().endswith((".tif", ".tiff")): files.append(os.path.join(root, filename)) else: files = [ os.path.join(directory, f) for f in os.listdir(directory) if f.lower().endswith((".tif", ".tiff")) ] print(f"Found {len(files)} TIF files to process") # Process each file for index, file_path in enumerate(files): if verbose: print(f"\nProcessing [{index + 1}/{len(files)}]: {file_path}") else: print( f"Processing [{index + 1}/{len(files)}]: {os.path.basename(file_path)}" ) # Determine output path if output_directory: rel_path = os.path.relpath(file_path, directory) output_path = os.path.join(output_directory, rel_path) # Create subdirectories if needed output_subdir = os.path.dirname(output_path) if not os.path.exists(output_subdir): os.makedirs(output_subdir) else: output_path = file_path # Process the image try: crop_tif( file_path, output_path, margin, resize, target_dpi, max_width, max_height, output_path == file_path, verbose, ) except Exception as e: print(f"Error processing {file_path}: {e}") def main(): """Main function to parse arguments and execute the appropriate action.""" # Set up argument parser parser = argparse.ArgumentParser( description="Process TIF images: crop to content area, resize, and optimize for publications.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # Add subparsers for the different modes subparsers = parser.add_subparsers(dest="mode", help="Operation mode") # Single file mode file_parser = subparsers.add_parser("file", help="Process a single file") file_parser.add_argument("-i", "--input", required=True, help="Input TIF file path") file_parser.add_argument( "-o", "--output", help="Output TIF file path (defaults to input if --overwrite)" ) file_parser.add_argument( "--overwrite", action="store_true", help="Overwrite the input file" ) # Batch mode batch_parser = subparsers.add_parser("batch", help="Process multiple files") batch_parser.add_argument( "-d", "--directory", required=True, help="Directory containing TIF files" ) batch_parser.add_argument( "-o", "--output-directory", help="Output directory for processed files" ) batch_parser.add_argument( "-r", "--recursive", action="store_true", help="Process subdirectories recursively", ) # Common arguments for subparser in [file_parser, batch_parser]: subparser.add_argument( "--margin", type=int, default=30, help="Margin size around the content area" ) subparser.add_argument( "--no-resize", action="store_true", help="Disable automatic resizing" ) subparser.add_argument( "--dpi", type=int, default=600, help="Target DPI for resized images" ) subparser.add_argument( "--max-width", type=int, default=2000, help="Maximum width in pixels" ) subparser.add_argument( "--max-height", type=int, default=2000, help="Maximum height in pixels" ) subparser.add_argument( "-v", "--verbose", action="store_true", help="Enable verbose output" ) # Parse arguments args = parser.parse_args() # Execute the appropriate action if args.mode == "file": crop_tif( args.input, args.output, args.margin, not args.no_resize, # Invert --no-resize flag to get resize boolean args.dpi, args.max_width, args.max_height, args.overwrite, args.verbose, ) elif args.mode == "batch": batch_crop_tifs( args.directory, args.output_directory, args.margin, not args.no_resize, # Invert --no-resize flag to get resize boolean args.dpi, args.max_width, args.max_height, args.recursive, args.verbose, ) else: parser.print_help() if __name__ == "__main__": main()