""" ============================================================================= AI SERVICE ============================================================================= This module handles all AI provider integrations. Supported Providers: - OpenAI (GPT-4, GPT-3.5) - Anthropic (Claude) Architecture: - Provider-agnostic interface (same API regardless of provider) - Easy to add new providers - Streaming support for real-time responses The actual API keys and configuration come from config.py """ from typing import AsyncGenerator, Optional import openai from config import settings # ============================================================================= # INITIALIZE CLIENTS # ============================================================================= # Initialize OpenAI client openai_client = openai.AsyncOpenAI(api_key=settings.OPENAI_API_KEY) # For Anthropic, you would: # import anthropic # anthropic_client = anthropic.AsyncAnthropic(api_key=settings.ANTHROPIC_API_KEY) # ============================================================================= # TEXT GENERATION # ============================================================================= async def generate_text( prompt: str, max_tokens: int = 500, temperature: float = 0.7, model: Optional[str] = None, ) -> dict: """ Generate text from a prompt using the configured AI provider. Args: prompt: The input text to generate from max_tokens: Maximum tokens in the response temperature: Creativity level (0 = deterministic, 2 = very creative) model: Override the default model Returns: dict with keys: text, model, usage Example: result = await generate_text("Write a haiku about Python") print(result["text"]) """ model = model or settings.AI_MODEL if settings.AI_PROVIDER == "openai": return await _generate_openai(prompt, max_tokens, temperature, model) elif settings.AI_PROVIDER == "anthropic": return await _generate_anthropic(prompt, max_tokens, temperature, model) else: raise ValueError(f"Unknown AI provider: {settings.AI_PROVIDER}") async def _generate_openai( prompt: str, max_tokens: int, temperature: float, model: str, ) -> dict: """Generate text using OpenAI.""" response = await openai_client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], max_tokens=max_tokens, temperature=temperature, ) return { "text": response.choices[0].message.content, "model": model, "usage": { "prompt_tokens": response.usage.prompt_tokens, "completion_tokens": response.usage.completion_tokens, }, } async def _generate_anthropic( prompt: str, max_tokens: int, temperature: float, model: str, ) -> dict: """ Generate text using Anthropic Claude. NOTE: Uncomment and install anthropic package to use. """ # import anthropic # client = anthropic.AsyncAnthropic(api_key=settings.ANTHROPIC_API_KEY) # # response = await client.messages.create( # model=model, # max_tokens=max_tokens, # messages=[{"role": "user", "content": prompt}], # ) # # return { # "text": response.content[0].text, # "model": model, # "usage": { # "prompt_tokens": response.usage.input_tokens, # "completion_tokens": response.usage.output_tokens, # }, # } raise NotImplementedError("Anthropic provider not configured") # ============================================================================= # CHAT COMPLETION # ============================================================================= async def chat_completion( messages: list[dict], max_tokens: int = 500, temperature: float = 0.7, model: Optional[str] = None, ) -> dict: """ Generate a chat response from message history. Args: messages: List of message dicts with 'role' and 'content' max_tokens: Maximum tokens in the response temperature: Creativity level model: Override the default model Returns: dict with keys: message, model Example: result = await chat_completion([ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}, ]) print(result["message"]["content"]) """ model = model or settings.AI_MODEL response = await openai_client.chat.completions.create( model=model, messages=messages, max_tokens=max_tokens, temperature=temperature, ) return { "message": { "role": "assistant", "content": response.choices[0].message.content, }, "model": model, } # ============================================================================= # STREAMING # ============================================================================= async def stream_text( prompt: str, max_tokens: int = 500, temperature: float = 0.7, model: Optional[str] = None, ) -> AsyncGenerator[str, None]: """ Stream generated text token by token. This is an async generator that yields text chunks as they're generated. Use this for real-time chat interfaces. Example usage in FastAPI: @app.post("/stream") async def stream_endpoint(request: Request): return StreamingResponse( stream_text(request.prompt), media_type="text/event-stream", ) """ model = model or settings.AI_MODEL response = await openai_client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], max_tokens=max_tokens, temperature=temperature, stream=True, # Enable streaming ) # Yield each chunk as it arrives async for chunk in response: if chunk.choices[0].delta.content: yield chunk.choices[0].delta.content # ============================================================================= # UTILITY FUNCTIONS # ============================================================================= async def count_tokens(text: str, model: str = "gpt-3.5-turbo") -> int: """ Count the number of tokens in a text string. Useful for: - Estimating costs before making API calls - Ensuring prompts don't exceed model limits Note: This is an approximation. For exact counts, use tiktoken library. """ # Rough approximation: ~4 characters per token for English return len(text) // 4 def get_model_context_limit(model: str) -> int: """ Get the context window size for a model. Useful for knowing how much text you can send/receive. """ limits = { # OpenAI "gpt-4-turbo": 128000, "gpt-4": 8192, "gpt-3.5-turbo": 16385, # Anthropic "claude-3-opus-20240229": 200000, "claude-3-sonnet-20240229": 200000, "claude-3-haiku-20240307": 200000, } return limits.get(model, 4096)