curl --request POST \
  --url https://api.extractous.com/v1/extract \
  --header 'Content-Type: multipart/form-data' \
  --header 'X-Api-Key: YOUR_API_KEY' \
  --form file='PATH_TO_YOUR_FILE' \
  --form 'config[strategy]=FAST_WITH_OCR' \
  --form 'config[pdf_config][ocr_strategy]=AUTO' \
  --form 'config[pdf_config][extract_annotation_text]=true' \
  --form 'config[pdf_config][extract_inline_images]=false' \
  --form 'config[pdf_config][extract_marked_content]=false' \
  --form 'config[pdf_config][extract_unique_inline_images_only]=false' \
  --form 'config[ocr_config][apply_rotation]=false' \
  --form 'config[ocr_config][density]=300' \
  --form 'config[ocr_config][depth]=8' \
  --form 'config[ocr_config][enable_image_preprocessing]=false' \
  --form 'config[ocr_config][language]=eng' \
  --form 'config[ocr_config][timeout_seconds]=120' \
  --form 'config[office_config][concatenate_phonetic_runs]=true' \
  --form 'config[office_config][extract_all_alternatives_from_msg]=false' \
  --form 'config[office_config][extract_macros]=false' \
  --form 'config[office_config][include_deleted_content]=false' \
  --form 'config[office_config][include_headers_and_footers]=true' \
  --form 'config[office_config][include_missing_rows]=false' \
  --form 'config[office_config][include_move_from_content]=false' \
  --form 'config[office_config][include_shape_based_content]=true' \
  --form 'config[office_config][include_slide_master_content]=true' \
  --form 'config[office_config][include_slide_notes]=true'

use reqwest::multipart::{Form, Part};
use serde_json::json;
use tokio::fs::File;
use tokio_util::codec::{BytesCodec, FramedRead};

async fn extract_document(file_path: &str, api_key: &str) -> Result<String, Box<dyn std::error::Error>> {
    let url = "https://api.extractous.com/v1/extract";

    // Create config JSON
    let config = json!({
        "strategy": "FAST_WITH_OCR",
        "pdf_config": {
            "ocr_strategy": "AUTO",
            "extract_annotation_text": true,
            "extract_inline_images": false,
            "extract_marked_content": false,
            "extract_unique_inline_images_only": false
        },
        "office_config": {
            "concatenate_phonetic_runs": true,
            "extract_all_alternatives_from_msg": false,
            "extract_macros": false,
            "include_deleted_content": false,
            "include_headers_and_footers": true,
            "include_missing_rows": false,
            "include_move_from_content": false,
            "include_shape_based_content": true,
            "include_slide_master_content": true,
            "include_slide_notes": true
        },
        "ocr_config": {
            "apply_rotation": false,
            "density": 300,
            "depth": 8,
            "enable_image_preprocessing": false,
            "language": "eng",
            "timeout_seconds": 120
        }
    });

    // Open file stream
    let file = File::open(file_path).await?;
    let stream = FramedRead::new(file, BytesCodec::new());
    let file_part = Part::stream(reqwest::Body::wrap_stream(stream));

    // Create multipart form
    let form = Form::new()
        .part("file", file_part)
        .text("config", config.to_string());

    // Make request
    let client = reqwest::Client::new();
    let response = client
        .post(url)
        .header("X-Api-Key", api_key)
        .multipart(form)
        .send()
        .await?;

    Ok(response.text().await?)
}

// Usage example
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let result = extract_document("path/to/file", "YOUR_API_KEY").await?;
    println!("{}", result);
    Ok(())
}

Quickstart

Start using the hosted API.

File Formats

Supported file formats.

On This Page

Star on GitHub Create an issue