from extractous import Extractor, TesseractOcrConfig
def extract_with_ocr():
# Configure extractor with OCR settings
extractor = Extractor()
extractor.set_ocr_config(
TesseractOcrConfig().set_language("deu")
)
# Extract content
content = extractor.extract_file_to_string("path/to/document.pdf")
return content
# Usage with error handling
try:
content = extract_with_ocr()
print(content)
except Exception as e:
print(f"Error extracting content: {e}")