Error Handling Project
Checking access...
Apply exception handling, file I/O, and logging to build a robust file processing tool.
Project: Robust File Processor
Create file_processor.py:
"""A robust file processing tool with comprehensive error handling."""
import csvimport jsonimport loggingfrom pathlib import Pathfrom typing import Any, Dict, List, Optionalfrom dataclasses import dataclass, fieldfrom datetime import datetime
# --- Logging Setup ---logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[ logging.FileHandler("processor.log"), logging.StreamHandler(), ],)logger = logging.getLogger(__name__)
# --- Custom Exceptions ---class FileProcessorError(Exception): """Base exception for file processor.""" pass
class UnsupportedFormatError(FileProcessorError): def __init__(self, format): super().__init__(f"Unsupported file format: {format}")
class ValidationError(FileProcessorError): def __init__(self, message, row=None): self.row = row super().__init__(f"Validation error: {message}" + (f" (row {row})" if row else ""))
# --- Data Models ---@dataclassclass ProcessedFile: """Result of processing a file.""" filename: str format: str row_count: int columns: List[str] preview: List[Dict[str, Any]] = field(default_factory=list) processing_time: float = 0.0 errors: List[str] = field(default_factory=list)
# --- File Processing ---class FileProcessor: """Process various file formats with error handling."""
SUPPORTED_FORMATS = {".csv", ".json", ".txt"}
def __init__(self, input_dir: str = ".", output_dir: str = "output"): self.input_dir = Path(input_dir) self.output_dir = Path(output_dir) self.output_dir.mkdir(parents=True, exist_ok=True)
def process_file(self, filename: str) -> Optional[ProcessedFile]: """Process a single file with comprehensive error handling.""" filepath = self.input_dir / filename
if not filepath.exists(): logger.error(f"File not found: {filepath}") return None
suffix = filepath.suffix.lower() if suffix not in self.SUPPORTED_FORMATS: logger.error(f"Unsupported format: {suffix}") raise UnsupportedFormatError(suffix)
logger.info(f"Processing: {filename}")
try: import time start = time.time()
if suffix == ".csv": result = self._process_csv(filepath) elif suffix == ".json": result = self._process_json(filepath) elif suffix == ".txt": result = self._process_txt(filepath)
result.processing_time = time.time() - start logger.info(f"Processed {filename} in {result.processing_time:.2f}s") return result
except PermissionError: logger.error(f"Permission denied: {filepath}") return None except UnicodeDecodeError: logger.error(f"Encoding error: {filepath}") return None except Exception as e: logger.exception(f"Unexpected error processing {filename}: {e}") return None
def _process_csv(self, filepath: Path) -> ProcessedFile: """Process a CSV file.""" result = ProcessedFile( filename=filepath.name, format="csv", row_count=0, columns=[], )
try: with open(filepath, "r", encoding="utf-8") as f: reader = csv.DictReader(f) result.columns = reader.fieldnames or []
for i, row in enumerate(reader, 1): try: self._validate_row(row, result.columns) result.row_count += 1 if len(result.preview) < 5: result.preview.append(row) except ValidationError as e: result.errors.append(str(e)) logger.warning(str(e))
except csv.Error as e: raise FileProcessorError(f"CSV parse error: {e}")
return result
def _process_json(self, filepath: Path) -> ProcessedFile: """Process a JSON file.""" try: with open(filepath, "r", encoding="utf-8") as f: data = json.load(f)
if isinstance(data, list): records = data elif isinstance(data, dict): records = [data] else: raise FileProcessorError("JSON must be object or array")
result = ProcessedFile( filename=filepath.name, format="json", row_count=len(records), columns=list(records[0].keys()) if records else [], )
for i, record in enumerate(records, 1): if len(result.preview) < 5: result.preview.append(record)
return result
except json.JSONDecodeError as e: raise FileProcessorError(f"JSON parse error: {e}")
def _process_txt(self, filepath: Path) -> ProcessedFile: """Process a text file.""" with open(filepath, "r", encoding="utf-8") as f: lines = f.readlines()
result = ProcessedFile( filename=filepath.name, format="txt", row_count=len(lines), columns=["line"], )
for i, line in enumerate(lines[:5], 1): result.preview.append({"line": i, "content": line.rstrip("\n")})
return result
def _validate_row(self, row: Dict, columns: List[str], row_num: int = None): """Validate a data row.""" for col in columns: if col not in row: raise ValidationError(f"Missing column: {col}", row_num)
def process_directory(self) -> List[ProcessedFile]: """Process all supported files in the input directory.""" results = [] for filepath in sorted(self.input_dir.iterdir()): if filepath.suffix.lower() in self.SUPPORTED_FORMATS: try: result = self.process_file(filepath.name) if result: results.append(result) except UnsupportedFormatError: continue return results
def generate_report(self, results: List[ProcessedFile], output_file: str = "report.json"): """Generate a processing report.""" report = { "generated_at": datetime.now().isoformat(), "total_files": len(results), "total_rows": sum(r.row_count for r in results), "total_errors": sum(len(r.errors) for r in results), "files": [ { "filename": r.filename, "format": r.format, "rows": r.row_count, "errors": len(r.errors), "time": round(r.processing_time, 3), } for r in results ], }
report_path = self.output_dir / output_file with open(report_path, "w") as f: json.dump(report, f, indent=2)
logger.info(f"Report saved: {report_path}") return report
def main(): """Run the file processor demo."""
import tempfile import os
# Create sample files with tempfile.TemporaryDirectory() as tmpdir: input_dir = Path(tmpdir) / "input" input_dir.mkdir()
# Sample CSV csv_path = input_dir / "users.csv" csv_path.write_text("""name,email,ageAlice,alice@example.com,30Bob,bob@example.com,25Charlie,charlie@example.com,invalid_ageDiana,diana@example.com,28""")
# Sample JSON json_path = input_dir / "products.json" json_path.write_text(json.dumps([ {"id": 1, "name": "Widget", "price": 9.99}, {"id": 2, "name": "Gadget", "price": 24.99}, ]))
# Sample TXT txt_path = input_dir / "notes.txt" txt_path.write_text("Line one\nLine two\nLine three\n")
# Process files processor = FileProcessor(input_dir=input_dir) results = processor.process_directory()
print("\n=== Processing Results ===") for r in results: status = "✓" if not r.errors else "⚠" print(f"{status} {r.filename} ({r.format}): {r.row_count} rows, {len(r.errors)} errors")
report = processor.generate_report(results) print(f"\nTotal: {report['total_files']} files, {report['total_rows']} rows, {report['total_errors']} errors")
if __name__ == "__main__": main()What You Practiced
| Concept | Usage |
|---|---|
| Custom exceptions | FileProcessorError, UnsupportedFormatError, ValidationError |
| try/except/else/finally | Per-format processing with error containment |
| File I/O | Read CSV, JSON, TXT; write JSON reports |
| Logging | logging module with file + console handlers |
| Context managers | with open() for all file operations |
| pathlib | Path for directory traversal, file inspection |
| Error recovery | Per-row error handling in CSV processing |
| Exception chaining | Wrapping low-level errors in domain exceptions |
Extensions
- YAML support — Add YAML file processing with
pyyaml - Schema validation — Define expected columns/types and validate against schema
- Encoding detection — Use
chardetto auto-detect file encoding - Progress bar — Add
tqdmprogress bar for batch processing - Parallel processing — Use
concurrent.futuresto process files in parallel