Files
2026-02-03 21:58:25 +01:00

172 lines
5.3 KiB
Python

"""
=============================================================================
AI ROUTER
=============================================================================
This router handles all AI-related endpoints.
Features:
- Text generation with OpenAI or Anthropic
- Streaming responses for real-time output
- Multiple AI models support
Architecture:
- Endpoints are thin - they just validate input and return output
- Business logic is in the services module
- This separation makes testing easier
"""
from fastapi import APIRouter, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from typing import Optional
# Import our AI service
from services.ai_service import generate_text, stream_text, chat_completion
router = APIRouter()
# =============================================================================
# REQUEST/RESPONSE MODELS
# =============================================================================
class GenerateRequest(BaseModel):
"""Request body for text generation."""
prompt: str = Field(..., min_length=1, max_length=10000)
max_tokens: Optional[int] = Field(default=500, ge=1, le=4000)
temperature: Optional[float] = Field(default=0.7, ge=0, le=2)
class GenerateResponse(BaseModel):
"""Response from text generation."""
text: str
model: str
usage: Optional[dict] = None
class ChatMessage(BaseModel):
"""A single message in a chat conversation."""
role: str = Field(..., pattern="^(user|assistant|system)$")
content: str = Field(..., min_length=1)
class ChatRequest(BaseModel):
"""Request body for chat completion."""
messages: list[ChatMessage] = Field(..., min_items=1)
max_tokens: Optional[int] = Field(default=500, ge=1, le=4000)
temperature: Optional[float] = Field(default=0.7, ge=0, le=2)
class ChatResponse(BaseModel):
"""Response from chat completion."""
message: ChatMessage
model: str
# =============================================================================
# ENDPOINTS
# =============================================================================
@router.post("/generate", response_model=GenerateResponse)
async def generate_endpoint(request: GenerateRequest):
"""
Generate text from a prompt.
This is a simple completion endpoint - give it a prompt,
get back generated text.
Example:
POST /api/ai/generate
{"prompt": "Write a haiku about coding"}
"""
try:
result = await generate_text(
prompt=request.prompt,
max_tokens=request.max_tokens,
temperature=request.temperature,
)
return GenerateResponse(**result)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/chat", response_model=ChatResponse)
async def chat_endpoint(request: ChatRequest):
"""
Chat completion with message history.
Send a list of messages (conversation history) and get
the assistant's response.
Example:
POST /api/ai/chat
{
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"}
]
}
"""
try:
# Convert Pydantic models to dicts
messages = [m.model_dump() for m in request.messages]
result = await chat_completion(
messages=messages,
max_tokens=request.max_tokens,
temperature=request.temperature,
)
return ChatResponse(
message=ChatMessage(**result["message"]),
model=result["model"],
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/stream")
async def stream_endpoint(request: GenerateRequest):
"""
Stream generated text in real-time.
Uses Server-Sent Events (SSE) to stream the response
token by token. Great for chat interfaces!
The frontend can read this with:
const response = await fetch("/api/ai/stream", {...});
const reader = response.body.getReader();
"""
try:
return StreamingResponse(
stream_text(
prompt=request.prompt,
max_tokens=request.max_tokens,
temperature=request.temperature,
),
media_type="text/event-stream",
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/models")
async def list_models():
"""
List available AI models.
Useful for letting users choose which model to use.
"""
return {
"openai": [
{"id": "gpt-4-turbo", "name": "GPT-4 Turbo", "description": "Most capable model"},
{"id": "gpt-4", "name": "GPT-4", "description": "High quality responses"},
{"id": "gpt-3.5-turbo", "name": "GPT-3.5 Turbo", "description": "Fast and efficient"},
],
"anthropic": [
{"id": "claude-3-opus-20240229", "name": "Claude 3 Opus", "description": "Most capable"},
{"id": "claude-3-sonnet-20240229", "name": "Claude 3 Sonnet", "description": "Balanced"},
{"id": "claude-3-haiku-20240307", "name": "Claude 3 Haiku", "description": "Fast"},
],
}