backend/services/ai_service.py

"""
=============================================================================
AI SERVICE
=============================================================================

This module handles all AI provider integrations.

Supported Providers:
- OpenAI (GPT-4, GPT-3.5)
- Anthropic (Claude)

Architecture:
- Provider-agnostic interface (same API regardless of provider)
- Easy to add new providers
- Streaming support for real-time responses

The actual API keys and configuration come from config.py
"""

from typing import AsyncGenerator, Optional
import openai
from config import settings


# =============================================================================
# INITIALIZE CLIENTS
# =============================================================================

# Initialize OpenAI client
openai_client = openai.AsyncOpenAI(api_key=settings.OPENAI_API_KEY)

# For Anthropic, you would:
# import anthropic
# anthropic_client = anthropic.AsyncAnthropic(api_key=settings.ANTHROPIC_API_KEY)


# =============================================================================
# TEXT GENERATION
# =============================================================================

async def generate_text(
    prompt: str,
    max_tokens: int = 500,
    temperature: float = 0.7,
    model: Optional[str] = None,
) -> dict:
    """
    Generate text from a prompt using the configured AI provider.
    
    Args:
        prompt: The input text to generate from
        max_tokens: Maximum tokens in the response
        temperature: Creativity level (0 = deterministic, 2 = very creative)
        model: Override the default model
    
    Returns:
        dict with keys: text, model, usage
    
    Example:
        result = await generate_text("Write a haiku about Python")
        print(result["text"])
    """
    model = model or settings.AI_MODEL
    
    if settings.AI_PROVIDER == "openai":
        return await _generate_openai(prompt, max_tokens, temperature, model)
    elif settings.AI_PROVIDER == "anthropic":
        return await _generate_anthropic(prompt, max_tokens, temperature, model)
    else:
        raise ValueError(f"Unknown AI provider: {settings.AI_PROVIDER}")


async def _generate_openai(
    prompt: str,
    max_tokens: int,
    temperature: float,
    model: str,
) -> dict:
    """Generate text using OpenAI."""
    response = await openai_client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        max_tokens=max_tokens,
        temperature=temperature,
    )
    
    return {
        "text": response.choices[0].message.content,
        "model": model,
        "usage": {
            "prompt_tokens": response.usage.prompt_tokens,
            "completion_tokens": response.usage.completion_tokens,
        },
    }


async def _generate_anthropic(
    prompt: str,
    max_tokens: int,
    temperature: float,
    model: str,
) -> dict:
    """
    Generate text using Anthropic Claude.
    
    NOTE: Uncomment and install anthropic package to use.
    """
    # import anthropic
    # client = anthropic.AsyncAnthropic(api_key=settings.ANTHROPIC_API_KEY)
    # 
    # response = await client.messages.create(
    #     model=model,
    #     max_tokens=max_tokens,
    #     messages=[{"role": "user", "content": prompt}],
    # )
    # 
    # return {
    #     "text": response.content[0].text,
    #     "model": model,
    #     "usage": {
    #         "prompt_tokens": response.usage.input_tokens,
    #         "completion_tokens": response.usage.output_tokens,
    #     },
    # }
    
    raise NotImplementedError("Anthropic provider not configured")


# =============================================================================
# CHAT COMPLETION
# =============================================================================

async def chat_completion(
    messages: list[dict],
    max_tokens: int = 500,
    temperature: float = 0.7,
    model: Optional[str] = None,
) -> dict:
    """
    Generate a chat response from message history.
    
    Args:
        messages: List of message dicts with 'role' and 'content'
        max_tokens: Maximum tokens in the response
        temperature: Creativity level
        model: Override the default model
    
    Returns:
        dict with keys: message, model
    
    Example:
        result = await chat_completion([
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Hello!"},
        ])
        print(result["message"]["content"])
    """
    model = model or settings.AI_MODEL
    
    response = await openai_client.chat.completions.create(
        model=model,
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
    )
    
    return {
        "message": {
            "role": "assistant",
            "content": response.choices[0].message.content,
        },
        "model": model,
    }


# =============================================================================
# STREAMING
# =============================================================================

async def stream_text(
    prompt: str,
    max_tokens: int = 500,
    temperature: float = 0.7,
    model: Optional[str] = None,
) -> AsyncGenerator[str, None]:
    """
    Stream generated text token by token.
    
    This is an async generator that yields text chunks as they're generated.
    Use this for real-time chat interfaces.
    
    Example usage in FastAPI:
        @app.post("/stream")
        async def stream_endpoint(request: Request):
            return StreamingResponse(
                stream_text(request.prompt),
                media_type="text/event-stream",
            )
    """
    model = model or settings.AI_MODEL
    
    response = await openai_client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        max_tokens=max_tokens,
        temperature=temperature,
        stream=True,  # Enable streaming
    )
    
    # Yield each chunk as it arrives
    async for chunk in response:
        if chunk.choices[0].delta.content:
            yield chunk.choices[0].delta.content


# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================

async def count_tokens(text: str, model: str = "gpt-3.5-turbo") -> int:
    """
    Count the number of tokens in a text string.
    
    Useful for:
    - Estimating costs before making API calls
    - Ensuring prompts don't exceed model limits
    
    Note: This is an approximation. For exact counts, use tiktoken library.
    """
    # Rough approximation: ~4 characters per token for English
    return len(text) // 4


def get_model_context_limit(model: str) -> int:
    """
    Get the context window size for a model.
    
    Useful for knowing how much text you can send/receive.
    """
    limits = {
        # OpenAI
        "gpt-4-turbo": 128000,
        "gpt-4": 8192,
        "gpt-3.5-turbo": 16385,
        # Anthropic
        "claude-3-opus-20240229": 200000,
        "claude-3-sonnet-20240229": 200000,
        "claude-3-haiku-20240307": 200000,
    }
    return limits.get(model, 4096)
template 2026-02-03 21:58:25 +01:00			`"""`
			`=============================================================================`
			`AI SERVICE`
			`=============================================================================`

			`This module handles all AI provider integrations.`

			`Supported Providers:`
			`- OpenAI (GPT-4, GPT-3.5)`
			`- Anthropic (Claude)`

			`Architecture:`
			`- Provider-agnostic interface (same API regardless of provider)`
			`- Easy to add new providers`
			`- Streaming support for real-time responses`

			`The actual API keys and configuration come from config.py`
			`"""`

			`from typing import AsyncGenerator, Optional`
			`import openai`
			`from config import settings`


			`# =============================================================================`
			`# INITIALIZE CLIENTS`
			`# =============================================================================`

			`# Initialize OpenAI client`
			`openai_client = openai.AsyncOpenAI(api_key=settings.OPENAI_API_KEY)`

			`# For Anthropic, you would:`
			`# import anthropic`
			`# anthropic_client = anthropic.AsyncAnthropic(api_key=settings.ANTHROPIC_API_KEY)`


			`# =============================================================================`
			`# TEXT GENERATION`
			`# =============================================================================`

			`async def generate_text(`
			`prompt: str,`
			`max_tokens: int = 500,`
			`temperature: float = 0.7,`
			`model: Optional[str] = None,`
			`) -> dict:`
			`"""`
			`Generate text from a prompt using the configured AI provider.`

			`Args:`
			`prompt: The input text to generate from`
			`max_tokens: Maximum tokens in the response`
			`temperature: Creativity level (0 = deterministic, 2 = very creative)`
			`model: Override the default model`

			`Returns:`
			`dict with keys: text, model, usage`

			`Example:`
			`result = await generate_text("Write a haiku about Python")`
			`print(result["text"])`
			`"""`
			`model = model or settings.AI_MODEL`

			`if settings.AI_PROVIDER == "openai":`
			`return await _generate_openai(prompt, max_tokens, temperature, model)`
			`elif settings.AI_PROVIDER == "anthropic":`
			`return await _generate_anthropic(prompt, max_tokens, temperature, model)`
			`else:`
			`raise ValueError(f"Unknown AI provider: {settings.AI_PROVIDER}")`


			`async def _generate_openai(`
			`prompt: str,`
			`max_tokens: int,`
			`temperature: float,`
			`model: str,`
			`) -> dict:`
			`"""Generate text using OpenAI."""`
			`response = await openai_client.chat.completions.create(`
			`model=model,`
			`messages=[{"role": "user", "content": prompt}],`
			`max_tokens=max_tokens,`
			`temperature=temperature,`
			`)`

			`return {`
			`"text": response.choices[0].message.content,`
			`"model": model,`
			`"usage": {`
			`"prompt_tokens": response.usage.prompt_tokens,`
			`"completion_tokens": response.usage.completion_tokens,`
			`},`
			`}`


			`async def _generate_anthropic(`
			`prompt: str,`
			`max_tokens: int,`
			`temperature: float,`
			`model: str,`
			`) -> dict:`
			`"""`
			`Generate text using Anthropic Claude.`

			`NOTE: Uncomment and install anthropic package to use.`
			`"""`
			`# import anthropic`
			`# client = anthropic.AsyncAnthropic(api_key=settings.ANTHROPIC_API_KEY)`
			`#`
			`# response = await client.messages.create(`
			`# model=model,`
			`# max_tokens=max_tokens,`
			`# messages=[{"role": "user", "content": prompt}],`
			`# )`
			`#`
			`# return {`
			`# "text": response.content[0].text,`
			`# "model": model,`
			`# "usage": {`
			`# "prompt_tokens": response.usage.input_tokens,`
			`# "completion_tokens": response.usage.output_tokens,`
			`# },`
			`# }`

			`raise NotImplementedError("Anthropic provider not configured")`


			`# =============================================================================`
			`# CHAT COMPLETION`
			`# =============================================================================`

			`async def chat_completion(`
			`messages: list[dict],`
			`max_tokens: int = 500,`
			`temperature: float = 0.7,`
			`model: Optional[str] = None,`
			`) -> dict:`
			`"""`
			`Generate a chat response from message history.`

			`Args:`
			`messages: List of message dicts with 'role' and 'content'`
			`max_tokens: Maximum tokens in the response`
			`temperature: Creativity level`
			`model: Override the default model`

			`Returns:`
			`dict with keys: message, model`

			`Example:`
			`result = await chat_completion([`
			`{"role": "system", "content": "You are a helpful assistant."},`
			`{"role": "user", "content": "Hello!"},`
			`])`
			`print(result["message"]["content"])`
			`"""`
			`model = model or settings.AI_MODEL`

			`response = await openai_client.chat.completions.create(`
			`model=model,`
			`messages=messages,`
			`max_tokens=max_tokens,`
			`temperature=temperature,`
			`)`

			`return {`
			`"message": {`
			`"role": "assistant",`
			`"content": response.choices[0].message.content,`
			`},`
			`"model": model,`
			`}`


			`# =============================================================================`
			`# STREAMING`
			`# =============================================================================`

			`async def stream_text(`
			`prompt: str,`
			`max_tokens: int = 500,`
			`temperature: float = 0.7,`
			`model: Optional[str] = None,`
			`) -> AsyncGenerator[str, None]:`
			`"""`
			`Stream generated text token by token.`

			`This is an async generator that yields text chunks as they're generated.`
			`Use this for real-time chat interfaces.`

			`Example usage in FastAPI:`
			`@app.post("/stream")`
			`async def stream_endpoint(request: Request):`
			`return StreamingResponse(`
			`stream_text(request.prompt),`
			`media_type="text/event-stream",`
			`)`
			`"""`
			`model = model or settings.AI_MODEL`

			`response = await openai_client.chat.completions.create(`
			`model=model,`
			`messages=[{"role": "user", "content": prompt}],`
			`max_tokens=max_tokens,`
			`temperature=temperature,`
			`stream=True, # Enable streaming`
			`)`

			`# Yield each chunk as it arrives`
			`async for chunk in response:`
			`if chunk.choices[0].delta.content:`
			`yield chunk.choices[0].delta.content`


			`# =============================================================================`
			`# UTILITY FUNCTIONS`
			`# =============================================================================`

			`async def count_tokens(text: str, model: str = "gpt-3.5-turbo") -> int:`
			`"""`
			`Count the number of tokens in a text string.`

			`Useful for:`
			`- Estimating costs before making API calls`
			`- Ensuring prompts don't exceed model limits`

			`Note: This is an approximation. For exact counts, use tiktoken library.`
			`"""`
			`# Rough approximation: ~4 characters per token for English`
			`return len(text) // 4`


			`def get_model_context_limit(model: str) -> int:`
			`"""`
			`Get the context window size for a model.`

			`Useful for knowing how much text you can send/receive.`
			`"""`
			`limits = {`
			`# OpenAI`
			`"gpt-4-turbo": 128000,`
			`"gpt-4": 8192,`
			`"gpt-3.5-turbo": 16385,`
			`# Anthropic`
			`"claude-3-opus-20240229": 200000,`
			`"claude-3-sonnet-20240229": 200000,`
			`"claude-3-haiku-20240307": 200000,`
			`}`
			`return limits.get(model, 4096)`