How to properly handle document upload and polling for extraction status
After uploading a document, you'll need to poll the get endpoint to check when processing is complete and retrieve the extracted data. This guide shows you how to implement a robust upload + polling workflow.
documentIdINDEXED, retrieve and use the extracted dataThe status.extractionStatus field indicates the current processing state:
PENDING - Document uploaded but not yet processedPARTIAL - Document classified but not yet extractedEXTRACTED - Document extracted but not yet indexedINDEXED - Document fully processed and ready (extraction data available)interface UploadResponse {
success: boolean;
documentId?: string;
error?: string;
}
interface GetResponse {
success: boolean;
documentId: string;
status: {
extractionStatus: 'PENDING' | 'PARTIAL' | 'EXTRACTED' | 'INDEXED';
docType?: string;
};
document?: any;
extraction?: {
fields?: any;
tables?: any;
};
}
async function uploadDocument(
fileUrl: string,
webhookId: string,
secret: string
): Promise<string> {
const response = await fetch(
`https://app.1flow.io/api/webhook/${webhookId}/document/upload`,
{
method: 'POST',
headers: {
'Authorization': secret,
'Content-Type': 'application/json',
},
body: JSON.stringify({ url: fileUrl }),
}
);
const result: UploadResponse = await response.json();
if (!result.success || !result.documentId) {
throw new Error(result.error || 'Upload failed');
}
return result.documentId;
}
async function getDocument(
docId: string,
webhookId: string,
secret: string
): Promise<GetResponse> {
const response = await fetch(
`https://app.1flow.io/api/webhook/${webhookId}/document/get?docId=${docId}`,
{
method: 'GET',
headers: {
'Authorization': secret,
},
}
);
return await response.json();
}
async function pollDocumentStatus(
docId: string,
webhookId: string,
secret: string,
options: {
maxWaitTime?: number; // milliseconds
initialDelay?: number; // milliseconds
pollInterval?: number; // milliseconds
} = {}
): Promise<GetResponse> {
const {
maxWaitTime = 180000, // 3 minutes default
initialDelay = 5000, // 5 seconds default
pollInterval = 10000, // 10 seconds default
} = options;
const startTime = Date.now();
// Wait before first poll
await new Promise(resolve => setTimeout(resolve, initialDelay));
while (true) {
const elapsed = Date.now() - startTime;
if (elapsed > maxWaitTime) {
throw new Error(`Polling timeout after ${maxWaitTime}ms`);
}
const result = await getDocument(docId, webhookId, secret);
if (!result.success) {
throw new Error('Failed to get document status');
}
const status = result.status.extractionStatus;
if (status === 'INDEXED') {
return result; // Processing complete
}
if (status === 'PENDING' || status === 'PARTIAL' || status === 'EXTRACTED') {
// Still processing, wait and poll again
await new Promise(resolve => setTimeout(resolve, pollInterval));
continue;
}
// Unknown status, throw error
throw new Error(`Unexpected extraction status: ${status}`);
}
}
// Complete workflow example
async function uploadAndWaitForExtraction(
fileUrl: string,
webhookId: string,
secret: string
) {
try {
// Step 1: Upload document
console.log('Uploading document...');
const documentId = await uploadDocument(fileUrl, webhookId, secret);
console.log(`Document uploaded: ${documentId}`);
// Step 2: Poll for completion
console.log('Waiting for extraction to complete...');
const result = await pollDocumentStatus(documentId, webhookId, secret, {
maxWaitTime: 180000, // 3 minutes
initialDelay: 5000, // Wait 5 seconds before first poll
pollInterval: 10000, // Poll every 10 seconds
});
// Step 3: Process extracted data
console.log('Extraction complete!');
console.log('Document type:', result.status.docType);
if (result.extraction) {
console.log('Extracted fields:', result.extraction.fields);
console.log('Extracted tables:', result.extraction.tables);
}
return result;
} catch (error) {
console.error('Error:', error);
throw error;
}
}Instead of polling, you can set up webhook callbacks to receive notifications when documents are processed. This is more efficient for high-volume integrations and reduces API calls.