| Use cheaper models for simple tasks: |
| ```python |
| def smart_route(message: str) -> str: |
| # Simple queries -> fast/cheap model |
| if len(message) < 50: |
| model = "gpt-3.5-turbo" |
| # Complex reasoning -> powerful model |
| else: |
| model = "gpt-4.1" |
| return client.chat(model=model, messages=[{"role": "user", "content": message}]) |
| ``` |
| ### 2. Fallback Strategy |
| Automatic fallback on failure: |
| ```python |
| def chat_with_fallback(message: str) -> str: |
| models = ["gpt-4.1", "claude-sonnet-4-5", "gemini-2.5-flash"] |
| for model in models: |
| try: |
| return client.chat(model=model, messages=[{"role": "user", "content": message}]) |
| except Exception: |
| continue |
| raise Exception("All models failed") |
| ``` |
| ### 3. Model A/B Testing |
| Compare model outputs: |
| ```python |
| results = client.compare_models( |
| models=["gpt-4.1", "claude-opus-4-1"], |
| message="Analyze this quarterly report..." |
| ) |
| # Log for analysis |
| for model, result in results.items(): |
| log_response(model=model, latency=result["latency"], cost=result["cost"]) |
| ``` |
| ### 4. Specialized Model Selection |
| Choose the best model for each task: |
| ```python |
| MODEL_MAP = { |
| "code": "gpt-4.1", |
| "creative": "claude-opus-4-1", |
| "fast": "gemini-2.5-flash", |
| "vision": "gpt-4o", |
| "reasoning": "o1", |
| "open_source": "llama-3.1-70b" |
| } |
| def route_by_task(task_type: str, message: str) -> str: |
| model = MODEL_MAP.get(task_type, "gpt-4.1") |
| return client.chat(model=model, messages=[{"role": "user", "content": message}]) |
| ``` |