hack-nation/backend/services/ai_service.py

"""
=============================================================================
AI SERVICE
=============================================================================

This module handles all AI provider integrations.

Supported Providers:
- OpenAI (GPT-4, GPT-3.5)
- Anthropic (Claude)

Architecture:
- Provider-agnostic interface (same API regardless of provider)
- Easy to add new providers
- Streaming support for real-time responses

The actual API keys and configuration come from config.py
"""

from typing import AsyncGenerator, Optional
import openai
from config import settings


# =============================================================================
# INITIALIZE CLIENTS
# =============================================================================

# Initialize OpenAI client
openai_client = openai.AsyncOpenAI(api_key=settings.OPENAI_API_KEY)

# For Anthropic, you would:
# import anthropic
# anthropic_client = anthropic.AsyncAnthropic(api_key=settings.ANTHROPIC_API_KEY)


# =============================================================================
# TEXT GENERATION
# =============================================================================

async def generate_text(
    prompt: str,
    max_tokens: int = 500,
    temperature: float = 0.7,
    model: Optional[str] = None,
) -> dict:
    """
    Generate text from a prompt using the configured AI provider.

    Args:
        prompt: The input text to generate from
        max_tokens: Maximum tokens in the response
        temperature: Creativity level (0 = deterministic, 2 = very creative)
        model: Override the default model

    Returns:
        dict with keys: text, model, usage

    Example:
        result = await generate_text("Write a haiku about Python")
        print(result["text"])
    """
    model = model or settings.AI_MODEL

    if settings.AI_PROVIDER == "openai":
        return await _generate_openai(prompt, max_tokens, temperature, model)
    elif settings.AI_PROVIDER == "anthropic":
        return await _generate_anthropic(prompt, max_tokens, temperature, model)
    else:
        raise ValueError(f"Unknown AI provider: {settings.AI_PROVIDER}")


async def _generate_openai(
    prompt: str,
    max_tokens: int,
    temperature: float,
    model: str,
) -> dict:
    """Generate text using OpenAI."""
    response = await openai_client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        max_tokens=max_tokens,
        temperature=temperature,
    )

    return {
        "text": response.choices[0].message.content,
        "model": model,
        "usage": {
            "prompt_tokens": response.usage.prompt_tokens,
            "completion_tokens": response.usage.completion_tokens,
        },
    }


async def _generate_anthropic(
    prompt: str,
    max_tokens: int,
    temperature: float,
    model: str,
) -> dict:
    """
    Generate text using Anthropic Claude.

    NOTE: Uncomment and install anthropic package to use.
    """
    # import anthropic
    # client = anthropic.AsyncAnthropic(api_key=settings.ANTHROPIC_API_KEY)
    #
    # response = await client.messages.create(
    #     model=model,
    #     max_tokens=max_tokens,
    #     messages=[{"role": "user", "content": prompt}],
    # )
    #
    # return {
    #     "text": response.content[0].text,
    #     "model": model,
    #     "usage": {
    #         "prompt_tokens": response.usage.input_tokens,
    #         "completion_tokens": response.usage.output_tokens,
    #     },
    # }

    raise NotImplementedError("Anthropic provider not configured")


# =============================================================================
# CHAT COMPLETION
# =============================================================================

async def chat_completion(
    messages: list[dict],
    max_tokens: int = 500,
    temperature: float = 0.7,
    model: Optional[str] = None,
) -> dict:
    """
    Generate a chat response from message history.

    Args:
        messages: List of message dicts with 'role' and 'content'
        max_tokens: Maximum tokens in the response
        temperature: Creativity level
        model: Override the default model

    Returns:
        dict with keys: message, model

    Example:
        result = await chat_completion([
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Hello!"},
        ])
        print(result["message"]["content"])
    """
    model = model or settings.AI_MODEL

    response = await openai_client.chat.completions.create(
        model=model,
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
    )

    return {
        "message": {
            "role": "assistant",
            "content": response.choices[0].message.content,
        },
        "model": model,
    }


# =============================================================================
# STREAMING
# =============================================================================

async def stream_text(
    prompt: str,
    max_tokens: int = 500,
    temperature: float = 0.7,
    model: Optional[str] = None,
) -> AsyncGenerator[str, None]:
    """
    Stream generated text token by token.

    This is an async generator that yields text chunks as they're generated.
    Use this for real-time chat interfaces.

    Example usage in FastAPI:
        @app.post("/stream")
        async def stream_endpoint(request: Request):
            return StreamingResponse(
                stream_text(request.prompt),
                media_type="text/event-stream",
            )
    """
    model = model or settings.AI_MODEL

    response = await openai_client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        max_tokens=max_tokens,
        temperature=temperature,
        stream=True,  # Enable streaming
    )

    # Yield each chunk as it arrives
    async for chunk in response:
        if chunk.choices[0].delta.content:
            yield chunk.choices[0].delta.content


# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================

async def count_tokens(text: str, model: str = "gpt-3.5-turbo") -> int:
    """
    Count the number of tokens in a text string.

    Useful for:
    - Estimating costs before making API calls
    - Ensuring prompts don't exceed model limits

    Note: This is an approximation. For exact counts, use tiktoken library.
    """
    # Rough approximation: ~4 characters per token for English
    return len(text) // 4


def get_model_context_limit(model: str) -> int:
    """
    Get the context window size for a model.

    Useful for knowing how much text you can send/receive.
    """
    limits = {
        # OpenAI
        "gpt-4-turbo": 128000,
        "gpt-4": 8192,
        "gpt-3.5-turbo": 16385,
        # Anthropic
        "claude-3-opus-20240229": 200000,
        "claude-3-sonnet-20240229": 200000,
        "claude-3-haiku-20240307": 200000,
    }
    return limits.get(model, 4096)