Validate Document

curl --request POST \
  --url https://api.example.com/api/validate_stream/ \
  --header 'Content-Type: application/json' \
  --data '
{
  "document_bytes": "<string>",
  "document_category": "<string>",
  "document_metadata": {
    "audience_hint": "<string>",
    "jurisdiction": "<string>",
    "product_class": "<string>",
    "document_type": "<string>",
    "product_types": [
      {}
    ]
  }
}
'

import requests

url = "https://api.example.com/api/validate_stream/"

payload = {
    "document_bytes": "<string>",
    "document_category": "<string>",
    "document_metadata": {
        "audience_hint": "<string>",
        "jurisdiction": "<string>",
        "product_class": "<string>",
        "document_type": "<string>",
        "product_types": [{}]
    }
}
headers = {"Content-Type": "application/json"}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': 'application/json'},
  body: JSON.stringify({
    document_bytes: '<string>',
    document_category: '<string>',
    document_metadata: {
      audience_hint: '<string>',
      jurisdiction: '<string>',
      product_class: '<string>',
      document_type: '<string>',
      product_types: [{}]
    }
  })
};

fetch('https://api.example.com/api/validate_stream/', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.example.com/api/validate_stream/",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'document_bytes' => '<string>',
    'document_category' => '<string>',
    'document_metadata' => [
        'audience_hint' => '<string>',
        'jurisdiction' => '<string>',
        'product_class' => '<string>',
        'document_type' => '<string>',
        'product_types' => [
                [
                                
                ]
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.example.com/api/validate_stream/"

	payload := strings.NewReader("{\n  \"document_bytes\": \"<string>\",\n  \"document_category\": \"<string>\",\n  \"document_metadata\": {\n    \"audience_hint\": \"<string>\",\n    \"jurisdiction\": \"<string>\",\n    \"product_class\": \"<string>\",\n    \"document_type\": \"<string>\",\n    \"product_types\": [\n      {}\n    ]\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.example.com/api/validate_stream/")
  .header("Content-Type", "application/json")
  .body("{\n  \"document_bytes\": \"<string>\",\n  \"document_category\": \"<string>\",\n  \"document_metadata\": {\n    \"audience_hint\": \"<string>\",\n    \"jurisdiction\": \"<string>\",\n    \"product_class\": \"<string>\",\n    \"document_type\": \"<string>\",\n    \"product_types\": [\n      {}\n    ]\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.example.com/api/validate_stream/")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = 'application/json'
request.body = "{\n  \"document_bytes\": \"<string>\",\n  \"document_category\": \"<string>\",\n  \"document_metadata\": {\n    \"audience_hint\": \"<string>\",\n    \"jurisdiction\": \"<string>\",\n    \"product_class\": \"<string>\",\n    \"document_type\": \"<string>\",\n    \"product_types\": [\n      {}\n    ]\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "job_id": "abc123def456",
  "status": "accepted",
  "message": "Validation request queued for processing",
  "timestamp": "2026-01-08T10:30:00Z"
}

POST

api

validate_stream

Validate Document

curl --request POST \
  --url https://api.example.com/api/validate_stream/ \
  --header 'Content-Type: application/json' \
  --data '
{
  "document_bytes": "<string>",
  "document_category": "<string>",
  "document_metadata": {
    "audience_hint": "<string>",
    "jurisdiction": "<string>",
    "product_class": "<string>",
    "document_type": "<string>",
    "product_types": [
      {}
    ]
  }
}
'

import requests

url = "https://api.example.com/api/validate_stream/"

payload = {
    "document_bytes": "<string>",
    "document_category": "<string>",
    "document_metadata": {
        "audience_hint": "<string>",
        "jurisdiction": "<string>",
        "product_class": "<string>",
        "document_type": "<string>",
        "product_types": [{}]
    }
}
headers = {"Content-Type": "application/json"}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': 'application/json'},
  body: JSON.stringify({
    document_bytes: '<string>',
    document_category: '<string>',
    document_metadata: {
      audience_hint: '<string>',
      jurisdiction: '<string>',
      product_class: '<string>',
      document_type: '<string>',
      product_types: [{}]
    }
  })
};

fetch('https://api.example.com/api/validate_stream/', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.example.com/api/validate_stream/",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'document_bytes' => '<string>',
    'document_category' => '<string>',
    'document_metadata' => [
        'audience_hint' => '<string>',
        'jurisdiction' => '<string>',
        'product_class' => '<string>',
        'document_type' => '<string>',
        'product_types' => [
                [
                                
                ]
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.example.com/api/validate_stream/"

	payload := strings.NewReader("{\n  \"document_bytes\": \"<string>\",\n  \"document_category\": \"<string>\",\n  \"document_metadata\": {\n    \"audience_hint\": \"<string>\",\n    \"jurisdiction\": \"<string>\",\n    \"product_class\": \"<string>\",\n    \"document_type\": \"<string>\",\n    \"product_types\": [\n      {}\n    ]\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.example.com/api/validate_stream/")
  .header("Content-Type", "application/json")
  .body("{\n  \"document_bytes\": \"<string>\",\n  \"document_category\": \"<string>\",\n  \"document_metadata\": {\n    \"audience_hint\": \"<string>\",\n    \"jurisdiction\": \"<string>\",\n    \"product_class\": \"<string>\",\n    \"document_type\": \"<string>\",\n    \"product_types\": [\n      {}\n    ]\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.example.com/api/validate_stream/")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = 'application/json'
request.body = "{\n  \"document_bytes\": \"<string>\",\n  \"document_category\": \"<string>\",\n  \"document_metadata\": {\n    \"audience_hint\": \"<string>\",\n    \"jurisdiction\": \"<string>\",\n    \"product_class\": \"<string>\",\n    \"document_type\": \"<string>\",\n    \"product_types\": [\n      {}\n    ]\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "job_id": "abc123def456",
  "status": "accepted",
  "message": "Validation request queued for processing",
  "timestamp": "2026-01-08T10:30:00Z"
}

Submit a base64-encoded document for asynchronous compliance validation.

Custom policy rules run automatically. Any rules you’ve activated via Activate Rules are applied on every validation for your API key — you do not select them via document_category or document_metadata. Those fields only choose which of ZeroDrift’s built-in (default) rule scenarios also run. To get your custom rules plus broad default coverage, use document_category: "scenario_all_general".

Request Body

document_bytes

string

required

Base64-encoded document content (PDF, DOCX, etc.)

document_category

string

Pre-defined category for the document. Required if document_metadata is not provided.Options: retail_investor_letter, retail_fact_sheet_registered_fund, retail_fact_sheet_non_registered, pitch_book_registered_fund, pitch_book_non_registered, scenario_retail_investor_letter, scenario_retail_fact_sheet_registered_fund, scenario_retail_fact_sheet_non_registered, scenario_pitch_book_registered_fund, scenario_pitch_book_non_registered, scenario_all_general, scenario_email_general, scenario_mnpi_focusedUse scenario_all_general for full coverage across all default rules (including MNPI detection).

document_metadata

object

Detailed metadata for precise rule matching. Required if document_category is not provided.

Show metadata properties

audience_hint

string

required

Target audience. Options: retail_US, retail_nonUS, institutional, accredited_investor, qualified_client_SEC, qualified_purchaser, professional_client_UK, eligible_counterparty, plan_participant_ERISA, plan_sponsor, intermediary_distributor, prospect, existing_client, former_client, HNW, UHNW, media_press, regulator_examiner, internal_only

jurisdiction

string

required

Regulatory jurisdiction: US

product_class

string

required

Product classification: registered_fund or non_registered

document_type

string

required

Document type: investor_letter, fact_sheet, pitch_book, presentation, email, website_copy, rfp_response, research_report, prospectus_extract, social_post, advisor_newsletter

product_types

array

required

Array of product types (e.g., ["Fund__mutual_fund"]). Common values include Fund__mutual_fund, Fund__etf, Fund__hedge_fund, Fund__private_equity_fund, Advisory_Account__sma, and many more.

At least one of document_category or document_metadata must be provided.

Scanned PDF Support (OCR)

The validation service automatically handles scanned PDFs using AWS Textract OCR. No additional parameters are needed — OCR is triggered transparently when text extraction yields insufficient content. How it works:

The service first attempts standard text extraction via pypdf
If a page yields fewer than 50 characters, it is classified as a scanned/image page
Scanned pages are automatically sent to AWS Textract for OCR
The OCR text is merged with any text-extracted pages before validation

Three PDF cases:

Case	Behavior
Text-only PDF	Standard text extraction, no OCR
Fully scanned PDF	All pages sent to Textract OCR
Mixed PDF (text + scanned pages)	Only scanned pages are OCR’d, text pages kept as-is

Limits:

Workflow	OCR Limit
`POST /api/validate_stream/` (direct base64 upload)	Up to 50 pages, 10MB per page (sync, page-by-page)
Presigned URL + `POST /api/validate_stream_start/`	Up to 3,000 pages, 500MB total (async via S3)

Scanned PDFs may take longer to process due to OCR. For direct uploads via this endpoint, OCR is performed page-by-page (sync). For large scanned documents (50+ pages), use the presigned URL workflow which enables asynchronous Textract processing with higher limits.

Response

job_id

string

Unique identifier for the validation job

status

string

Job status: accepted

message

string

Status message

timestamp

string

ISO 8601 timestamp

{
  "job_id": "abc123def456",
  "status": "accepted",
  "message": "Validation request queued for processing",
  "timestamp": "2026-01-08T10:30:00Z"
}

Example

# Encode document to base64
DOC_BASE64=$(base64 -i document.pdf)

curl -X POST "https://{api-url}/api/validate_stream/" \
  -H "x-api-key: YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d "{
    \"document_bytes\": \"$DOC_BASE64\",
    \"document_category\": \"retail_investor_letter\"
  }"

import requests
import base64

API_KEY = "YOUR_API_KEY"
API_BASE = "https://{api-url}"

with open("document.pdf", "rb") as f:
    doc_bytes = base64.b64encode(f.read()).decode()

response = requests.post(
    f"{API_BASE}/api/validate_stream/",
    headers={
        "x-api-key": API_KEY,
        "Content-Type": "application/json"
    },
    json={
        "document_bytes": doc_bytes,
        "document_category": "retail_investor_letter"
    }
)

print(response.json())

const fs = require('fs');
const axios = require('axios');

const API_KEY = 'YOUR_API_KEY';
const API_BASE = 'https://{api-url}';

const docBytes = fs.readFileSync('document.pdf').toString('base64');

const response = await axios.post(
  `${API_BASE}/api/validate_stream/`,
  {
    document_bytes: docBytes,
    document_category: 'retail_investor_letter'
  },
  {
    headers: {
      'x-api-key': API_KEY,
      'Content-Type': 'application/json'
    }
  }
);

console.log(response.data);

Introduction Validate Text Snippet

Getting Started

Validation

Validation (v2)

Custom Policies

Validate Document

Request Body

Scanned PDF Support (OCR)

Response

Example

​Request Body

​Scanned PDF Support (OCR)

​Response

​Example

Request Body

Scanned PDF Support (OCR)

Response

Example