openocr-skill
by @topdu
Extract text from images, documents and scanned PDFs using OpenOCR - a lightweight and efficient OCR system with document parsing model requiring only 0.1B parameters, capable of running recognition on personal PCs. Supports text detection, recognition, universal VLM recognition, and document parsing with layout analysis
Example 1: Batch OCR with Progress
from openocr import OpenOCR
import osdef batch_ocr(image_dir, output_dir='./ocr_results'):
"""OCR all images in a directory."""
ocr = OpenOCR(task='ocr', mode='mobile')
os.makedirs(output_dir, exist_ok=True)
image_files = [
f for f in os.listdir(image_dir)
if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff'))
]
all_results = {}
for i, filename in enumerate(image_files):
filepath = os.path.join(image_dir, filename)
print(f"[{i+1}/{len(image_files)}] Processing: {filename}")
results, time_dicts = ocr(
image_path=filepath,
save_dir=output_dir,
is_visualize=True,
)
texts = []
for result in results:
for line in result:
texts.append(line[1][0])
all_results[filename] = texts
print(f" Found {len(texts)} text lines")
# Save all text
with open(os.path.join(output_dir, 'all_text.txt'), 'w') as f:
for filename, texts in all_results.items():
f.write(f"--- {filename} ---\n")
f.write('\n'.join(texts))
f.write('\n\n')
return all_results
results = batch_ocr('./images')
Example 2: Document to Markdown Converter
from openocr import OpenOCR
import osdef doc_to_markdown(input_path, output_dir='./markdown_output'):
"""Convert document images or PDFs to Markdown."""
doc = OpenOCR(
task='doc',
use_layout_detection=True,
use_chart_recognition=True,
)
os.makedirs(output_dir, exist_ok=True)
result = doc(image_path=input_path)
if isinstance(result, list):
# PDF: multiple pages
for page_result in result:
doc.save_to_markdown(page_result, output_dir)
print(f"Converted {len(result)} pages to Markdown")
else:
# Single image
doc.save_to_markdown(result, output_dir)
print("Converted image to Markdown")
print(f"Output saved to: {output_dir}")
Convert a scanned PDF
doc_to_markdown('paper.pdf')Convert a document image
doc_to_markdown('page.jpg')
Example 3: Multi-Task Comparison
from openocr import OpenOCRdef compare_tasks(image_path):
"""Compare results from different OpenOCR tasks."""
# 1. Detection only
det = OpenOCR(task='det')
det_result = det(image_path=image_path)
num_boxes = len(det_result[0]['boxes'])
print(f"Detection: Found {num_boxes} text regions")
# 2. End-to-End OCR
ocr = OpenOCR(task='ocr')
ocr_results, _ = ocr(image_path=image_path)
ocr_texts = [line[1][0] for result in ocr_results for line in result]
print(f"OCR: Extracted {len(ocr_texts)} text lines")
for t in ocr_texts[:5]:
print(f" - {t}")
# 3. Universal Recognition
unirec = OpenOCR(task='unirec')
text, _ = unirec(image_path=image_path)
print(f"UniRec: {text[:200]}...")
return {
'det_boxes': num_boxes,
'ocr_texts': ocr_texts,
'unirec_text': text,
}
compare_tasks('document.jpg')
Example 4: Gradio Demo Launch
from openocr import launch_openocr_demo, launch_unirec_demo, launch_opendoc_demoLaunch OCR demo
launch_openocr_demo(share=True, server_port=7860, server_name='0.0.0.0')Launch UniRec demo
launch_unirec_demo(share=True, server_port=7861)Launch OpenDoc demo
launch_opendoc_demo(share=True, server_port=7862)
1. Choose the Right Task: Use ocr for general text, unirec for formulas/tables, doc for full documents
2. Use Mobile Mode for Speed: mode='mobile' is much faster; use mode='server' only when accuracy is critical
3. Use ONNX Backend: Default ONNX backend works on CPU without extra dependencies
4. Set Appropriate Thresholds: Adjust drop_score (OCR) and layout_threshold (Doc) for your use case
5. Enable Layout Detection: For documents with mixed content (text + formulas + tables), always enable use_layout_detection
6. Batch Processing: Use rec_batch_num to control recognition batch size for throughput optimization
7. GPU Acceleration: Install onnxruntime-gpu or PyTorch with CUDA for significant speedup
clawhub install openocr-skill