Tensorlake Documentation

Accessing Bounding Boxes:

result = doc_ai.parse_and_wait(file_id)

for page in result.pages:
  for fragment in page.page_fragments:
    bbox = fragment.bbox
    print(f"Fragment type: {fragment.fragment_type}")
    print(f"Top-left: ({bbox['x1']}, {bbox['y1']})")
    print(f"Bottom-right: ({bbox['x2']}, {bbox['y2']})")

Citation with source location:

Show users exactly where information came from

for fragment in page.page_fragments:
  if fragment.fragment_type == "text":
    print(f"Content: {fragment.content.text}")
    print(f"Found on page {page.page_number} at ({fragment.bbox['x1']}, {fragment.bbox['y1']})")

Calculate dimensions:

# Get width and height of a fragment
for fragment in page.page_fragments:
  width = fragment.bbox['x2'] - fragment.bbox['x1']
  height = fragment.bbox['y2'] - fragment.bbox['y1']
  print(f"{fragment.fragment_type}: {width}x{height} pixels")

Filter by location:

# Extract only content from a specific region (e.g., ignore headers/footers)
main_content = [
  fragment for fragment in page.page_fragments
  if 50 < fragment.bbox['y1'] < 700  # Exclude top/bottom margins
]

Visual debugging:

# Identify which fragments might have extraction issues
for fragment in page.page_fragments:
  width = fragment.bbox['x2'] - fragment.bbox['x1']
  height = fragment.bbox['y2'] - fragment.bbox['y1']
  
  if width < 10 or height < 10:
    print(f"Warning: Very small fragment at ({fragment.bbox['x1']}, {fragment.bbox['y1']})")

Build Clickable Citations:

# Create data structure for highlighting content in a document viewer
citations = []
for fragment in page.page_fragments:
  if "key information" in fragment.content.text.lower():
    citations.append({
      "page": page.page_number,
      "bbox": {
        "x1": fragment.bbox['x1'],
        "y1": fragment.bbox['y1'],
        "x2": fragment.bbox['x2'],
        "y2": fragment.bbox['y2']
      },
      "text": fragment.content.text
    })