172 lines
5.3 KiB
Python
172 lines
5.3 KiB
Python
"""
|
|
=============================================================================
|
|
AI ROUTER
|
|
=============================================================================
|
|
|
|
This router handles all AI-related endpoints.
|
|
|
|
Features:
|
|
- Text generation with OpenAI or Anthropic
|
|
- Streaming responses for real-time output
|
|
- Multiple AI models support
|
|
|
|
Architecture:
|
|
- Endpoints are thin - they just validate input and return output
|
|
- Business logic is in the services module
|
|
- This separation makes testing easier
|
|
"""
|
|
|
|
from fastapi import APIRouter, HTTPException
|
|
from fastapi.responses import StreamingResponse
|
|
from pydantic import BaseModel, Field
|
|
from typing import Optional
|
|
|
|
# Import our AI service
|
|
from services.ai_service import generate_text, stream_text, chat_completion
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
# =============================================================================
|
|
# REQUEST/RESPONSE MODELS
|
|
# =============================================================================
|
|
|
|
class GenerateRequest(BaseModel):
|
|
"""Request body for text generation."""
|
|
prompt: str = Field(..., min_length=1, max_length=10000)
|
|
max_tokens: Optional[int] = Field(default=500, ge=1, le=4000)
|
|
temperature: Optional[float] = Field(default=0.7, ge=0, le=2)
|
|
|
|
|
|
class GenerateResponse(BaseModel):
|
|
"""Response from text generation."""
|
|
text: str
|
|
model: str
|
|
usage: Optional[dict] = None
|
|
|
|
|
|
class ChatMessage(BaseModel):
|
|
"""A single message in a chat conversation."""
|
|
role: str = Field(..., pattern="^(user|assistant|system)$")
|
|
content: str = Field(..., min_length=1)
|
|
|
|
|
|
class ChatRequest(BaseModel):
|
|
"""Request body for chat completion."""
|
|
messages: list[ChatMessage] = Field(..., min_items=1)
|
|
max_tokens: Optional[int] = Field(default=500, ge=1, le=4000)
|
|
temperature: Optional[float] = Field(default=0.7, ge=0, le=2)
|
|
|
|
|
|
class ChatResponse(BaseModel):
|
|
"""Response from chat completion."""
|
|
message: ChatMessage
|
|
model: str
|
|
|
|
|
|
# =============================================================================
|
|
# ENDPOINTS
|
|
# =============================================================================
|
|
|
|
@router.post("/generate", response_model=GenerateResponse)
|
|
async def generate_endpoint(request: GenerateRequest):
|
|
"""
|
|
Generate text from a prompt.
|
|
|
|
This is a simple completion endpoint - give it a prompt,
|
|
get back generated text.
|
|
|
|
Example:
|
|
POST /api/ai/generate
|
|
{"prompt": "Write a haiku about coding"}
|
|
"""
|
|
try:
|
|
result = await generate_text(
|
|
prompt=request.prompt,
|
|
max_tokens=request.max_tokens,
|
|
temperature=request.temperature,
|
|
)
|
|
return GenerateResponse(**result)
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post("/chat", response_model=ChatResponse)
|
|
async def chat_endpoint(request: ChatRequest):
|
|
"""
|
|
Chat completion with message history.
|
|
|
|
Send a list of messages (conversation history) and get
|
|
the assistant's response.
|
|
|
|
Example:
|
|
POST /api/ai/chat
|
|
{
|
|
"messages": [
|
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
{"role": "user", "content": "Hello!"}
|
|
]
|
|
}
|
|
"""
|
|
try:
|
|
# Convert Pydantic models to dicts
|
|
messages = [m.model_dump() for m in request.messages]
|
|
|
|
result = await chat_completion(
|
|
messages=messages,
|
|
max_tokens=request.max_tokens,
|
|
temperature=request.temperature,
|
|
)
|
|
return ChatResponse(
|
|
message=ChatMessage(**result["message"]),
|
|
model=result["model"],
|
|
)
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post("/stream")
|
|
async def stream_endpoint(request: GenerateRequest):
|
|
"""
|
|
Stream generated text in real-time.
|
|
|
|
Uses Server-Sent Events (SSE) to stream the response
|
|
token by token. Great for chat interfaces!
|
|
|
|
The frontend can read this with:
|
|
const response = await fetch("/api/ai/stream", {...});
|
|
const reader = response.body.getReader();
|
|
"""
|
|
try:
|
|
return StreamingResponse(
|
|
stream_text(
|
|
prompt=request.prompt,
|
|
max_tokens=request.max_tokens,
|
|
temperature=request.temperature,
|
|
),
|
|
media_type="text/event-stream",
|
|
)
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.get("/models")
|
|
async def list_models():
|
|
"""
|
|
List available AI models.
|
|
|
|
Useful for letting users choose which model to use.
|
|
"""
|
|
return {
|
|
"openai": [
|
|
{"id": "gpt-4-turbo", "name": "GPT-4 Turbo", "description": "Most capable model"},
|
|
{"id": "gpt-4", "name": "GPT-4", "description": "High quality responses"},
|
|
{"id": "gpt-3.5-turbo", "name": "GPT-3.5 Turbo", "description": "Fast and efficient"},
|
|
],
|
|
"anthropic": [
|
|
{"id": "claude-3-opus-20240229", "name": "Claude 3 Opus", "description": "Most capable"},
|
|
{"id": "claude-3-sonnet-20240229", "name": "Claude 3 Sonnet", "description": "Balanced"},
|
|
{"id": "claude-3-haiku-20240307", "name": "Claude 3 Haiku", "description": "Fast"},
|
|
],
|
|
}
|