Architecture
┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐
│ Document │ ──▶ │ OCR │ ──▶ │ Embeddings │ ──▶ │ Search │
│ Upload │ │ Processing │ │ Generation │ │ Index │
└──────────────┘ └──────────────┘ └──────────────┘ └──────────────┘
Prerequisites
- Case.dev API key
- Node.js 18+ or Python 3.9+
- Documents to process (PDFs, images, Word docs)
Step 1: Create a vault
import Casedev from 'casedev';
import fs from 'fs';
import path from 'path';
const client = new Casedev({ apiKey: process.env.CASEDEV_API_KEY });
const vault = await client.vault.create({
name: 'Matter 2024-1234 - Discovery',
description: 'Documents received from opposing counsel'
});
console.log(`Created vault: ${vault.id}`);
Step 2: Subscribe to ingestion events
Use webhooks to get notified when documents finish processing instead of polling.
const subscription = await client.vault.events.subscriptions.create(vault.id, {
callbackUrl: 'https://your-app.com/webhooks/case-vault',
eventTypes: ['vault.ingest.completed', 'vault.ingest.failed']
});
console.log(`Webhook subscription: ${subscription.id}`);
Your webhook endpoint receives events like:
{
"id": "evt_abc123",
"eventType": "vault.ingest.completed",
"vaultId": "vault_abc123",
"objectId": "obj_xyz789",
"data": { "status": "completed" }
}
Webhook delivery is at-least-once. Use the id field as an idempotency key and design handlers to safely process duplicates. See Vault Webhooks for signing and retry details.
Step 3: Batch upload documents
async function uploadDocuments(vaultId: string, documentsDir: string) {
const files = fs.readdirSync(documentsDir);
const results = [];
const contentTypes: Record<string, string> = {
'.pdf': 'application/pdf',
'.doc': 'application/msword',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.tiff': 'image/tiff',
'.txt': 'text/plain',
};
for (const file of files) {
const filePath = path.join(documentsDir, file);
const stat = fs.statSync(filePath);
if (!stat.isFile()) continue;
const ext = path.extname(file).toLowerCase();
const contentType = contentTypes[ext] || 'application/octet-stream';
// Get presigned upload URL
const upload = await client.vault.upload(vaultId, {
filename: file,
contentType,
metadata: {
source: 'discovery',
original_path: filePath,
}
});
// Upload file to S3
const fileBuffer = fs.readFileSync(filePath);
await fetch(upload.uploadUrl, {
method: 'PUT',
headers: { 'Content-Type': contentType },
body: fileBuffer
});
console.log(`Uploaded: ${file}`);
results.push({ file, objectId: upload.objectId });
}
return results;
}
Step 4: Trigger ingestion
Ingestion runs OCR (if needed) and generates embeddings for search. Your webhook will fire when each document finishes.
async function ingestDocuments(vaultId: string, uploads: { file: string; objectId: string }[]) {
for (const { file, objectId } of uploads) {
await client.vault.ingest(vaultId, objectId);
console.log(`Ingesting: ${file}`);
}
// Ingestion runs async — your webhook endpoint receives
// vault.ingest.completed or vault.ingest.failed for each document
}
Step 5: Search your documents
Once your webhook confirms ingestion is complete, documents are searchable.
async function searchDiscovery(vaultId: string, query: string) {
const results = await client.vault.search(vaultId, {
query,
method: 'hybrid', // Combines semantic + keyword
limit: 10
});
console.log(`\nResults for: "${query}"\n`);
for (const chunk of results.chunks) {
console.log(`${chunk.filename} (page ${chunk.page})`);
console.log(` Score: ${chunk.hybridScore.toFixed(2)}`);
console.log(` "${chunk.text.substring(0, 200)}..."\n`);
}
return results;
}
Complete example
import Casedev from 'casedev';
import fs from 'fs';
import path from 'path';
const client = new Casedev({ apiKey: process.env.CASEDEV_API_KEY });
async function main() {
const documentsDir = './discovery_dump';
// 1. Create vault
const vault = await client.vault.create({
name: 'Matter 2024-1234 - Discovery',
description: 'Documents from opposing counsel'
});
// 2. Subscribe to ingestion events
await client.vault.events.subscriptions.create(vault.id, {
callbackUrl: 'https://your-app.com/webhooks/case-vault',
eventTypes: ['vault.ingest.completed', 'vault.ingest.failed']
});
// 3. Upload and ingest all documents
const files = fs.readdirSync(documentsDir);
for (const file of files) {
const filePath = path.join(documentsDir, file);
if (!fs.statSync(filePath).isFile()) continue;
const upload = await client.vault.upload(vault.id, {
filename: file,
contentType: 'application/pdf'
});
await fetch(upload.uploadUrl, {
method: 'PUT',
body: fs.readFileSync(filePath)
});
await client.vault.ingest(vault.id, upload.objectId);
console.log(`Queued: ${file}`);
}
// 4. Search (after webhook confirms ingestion is complete)
const results = await client.vault.search(vault.id, {
query: 'evidence of safety violations in 2023',
method: 'hybrid',
limit: 10
});
console.log(results.chunks);
}
main();
Production tip: For large document sets (1000+), use parallel uploads with a concurrency limit of 10-20 to maximize throughput while avoiding rate limits.