""" ============================================================================= AI ROUTER ============================================================================= This router handles all AI-related endpoints. Features: - Text generation with OpenAI or Anthropic - Streaming responses for real-time output - Multiple AI models support Architecture: - Endpoints are thin - they just validate input and return output - Business logic is in the services module - This separation makes testing easier """ from fastapi import APIRouter, HTTPException from fastapi.responses import StreamingResponse from pydantic import BaseModel, Field from typing import Optional # Import our AI service from services.ai_service import generate_text, stream_text, chat_completion router = APIRouter() # ============================================================================= # REQUEST/RESPONSE MODELS # ============================================================================= class GenerateRequest(BaseModel): """Request body for text generation.""" prompt: str = Field(..., min_length=1, max_length=10000) max_tokens: Optional[int] = Field(default=500, ge=1, le=4000) temperature: Optional[float] = Field(default=0.7, ge=0, le=2) class GenerateResponse(BaseModel): """Response from text generation.""" text: str model: str usage: Optional[dict] = None class ChatMessage(BaseModel): """A single message in a chat conversation.""" role: str = Field(..., pattern="^(user|assistant|system)$") content: str = Field(..., min_length=1) class ChatRequest(BaseModel): """Request body for chat completion.""" messages: list[ChatMessage] = Field(..., min_items=1) max_tokens: Optional[int] = Field(default=500, ge=1, le=4000) temperature: Optional[float] = Field(default=0.7, ge=0, le=2) class ChatResponse(BaseModel): """Response from chat completion.""" message: ChatMessage model: str # ============================================================================= # ENDPOINTS # ============================================================================= @router.post("/generate", response_model=GenerateResponse) async def generate_endpoint(request: GenerateRequest): """ Generate text from a prompt. This is a simple completion endpoint - give it a prompt, get back generated text. Example: POST /api/ai/generate {"prompt": "Write a haiku about coding"} """ try: result = await generate_text( prompt=request.prompt, max_tokens=request.max_tokens, temperature=request.temperature, ) return GenerateResponse(**result) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @router.post("/chat", response_model=ChatResponse) async def chat_endpoint(request: ChatRequest): """ Chat completion with message history. Send a list of messages (conversation history) and get the assistant's response. Example: POST /api/ai/chat { "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"} ] } """ try: # Convert Pydantic models to dicts messages = [m.model_dump() for m in request.messages] result = await chat_completion( messages=messages, max_tokens=request.max_tokens, temperature=request.temperature, ) return ChatResponse( message=ChatMessage(**result["message"]), model=result["model"], ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @router.post("/stream") async def stream_endpoint(request: GenerateRequest): """ Stream generated text in real-time. Uses Server-Sent Events (SSE) to stream the response token by token. Great for chat interfaces! The frontend can read this with: const response = await fetch("/api/ai/stream", {...}); const reader = response.body.getReader(); """ try: return StreamingResponse( stream_text( prompt=request.prompt, max_tokens=request.max_tokens, temperature=request.temperature, ), media_type="text/event-stream", ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @router.get("/models") async def list_models(): """ List available AI models. Useful for letting users choose which model to use. """ return { "openai": [ {"id": "gpt-4-turbo", "name": "GPT-4 Turbo", "description": "Most capable model"}, {"id": "gpt-4", "name": "GPT-4", "description": "High quality responses"}, {"id": "gpt-3.5-turbo", "name": "GPT-3.5 Turbo", "description": "Fast and efficient"}, ], "anthropic": [ {"id": "claude-3-opus-20240229", "name": "Claude 3 Opus", "description": "Most capable"}, {"id": "claude-3-sonnet-20240229", "name": "Claude 3 Sonnet", "description": "Balanced"}, {"id": "claude-3-haiku-20240307", "name": "Claude 3 Haiku", "description": "Fast"}, ], }