Export the variable and the SDK will reference your environment variables, looking for TENSORLAKE_API_KEY:
Copy
Ask AI
export TENSORLAKE_API_KEY=your-api-key-here
3
Parse a document
quickstart.py
Copy
Ask AI
import jsonimport osfrom typing import Optionalfrom pydantic import BaseModel, Fieldfrom tensorlake.documentai import ( DocumentAI, ParsingOptions, StructuredExtractionOptions, ChunkingStrategy,)doc_ai = DocumentAI(api_key=os.getenv("TENSORLAKE_API_KEY"))# Use a publicly accessible URL or upload a file to Tensorlake and use the file ID.file_url = "https://pub-226479de18b2493f96b64c6674705dd8.r2.dev/real-estate-purchase-all-signed.pdf"# Define a JSON schema using Pydantic# Our structured extraction model will identify the properties we want to extract from the document.# In this case, we are extracting the names and signature dates of the buyer and seller.class Signers(BaseModel): buyer_name: Optional[str] = Field( default=None, description="The name of the buyer, do not extract initials" ) buyer_signature_date: Optional[str] = Field( default=None, description="Date and time that the buyer signed." ) seller_name: Optional[str] = Field( default=None, description="The name of the seller, do not extract initials" ) seller_signature_date: Optional[str] = Field( default=None, description="Date and time that the seller signed." )# Create a structured extraction options object with the schema## You can send as many schemas as you want, and the API will return structured data for each schema# indexed by the schema name.real_estate_agreement_extraction_options = StructuredExtractionOptions( schema_name="Signers", json_schema=Signers,)# Tune in the options for finding data in the document## We provide sane defaults, but every document is different, so you can adjust# every option to your needs.## In this example, we are using the PAGE chunking strategy, which means that each page of the document will be a separate chunk.parsing_options = ParsingOptions( chunking_strategy=ChunkingStrategy.PAGE,)# Submit the parse operation and wait for the job to completeparse_id = doc_ai.parse( file=file_url, page_range="9-10", parsing_options=parsing_options, structured_extraction_options=[real_estate_agreement_extraction_options],)
4
Wait for the job to complete
quickstart.py
Copy
Ask AI
result = doc_ai.wait_for_completion(parse_id)
5
Use the results
quickstart.py
Copy
Ask AI
markdown_chunks = result.chunkswith open("markdown_chunks.md", "w") as f: for chunk_number, chunk in enumerate(markdown_chunks): f.write(f"## CHUNK NUMBER {chunk_number}\n\n") f.write(f"## Page {chunk.page_number}\n\n{chunk.content}\n\n")serializable_data = result.model_dump()with open("structured_data.json", "w") as f: json.dump(serializable_data["structured_data"], f, indent=2)