Files
hack-nation/backend/services/ai_service.py

251 lines
7.1 KiB
Python
Raw Normal View History

2026-02-03 21:58:25 +01:00
"""
=============================================================================
AI SERVICE
=============================================================================
This module handles all AI provider integrations.
Supported Providers:
- OpenAI (GPT-4, GPT-3.5)
- Anthropic (Claude)
Architecture:
- Provider-agnostic interface (same API regardless of provider)
- Easy to add new providers
- Streaming support for real-time responses
The actual API keys and configuration come from config.py
"""
from typing import AsyncGenerator, Optional
import openai
from config import settings
# =============================================================================
# INITIALIZE CLIENTS
# =============================================================================
# Initialize OpenAI client
openai_client = openai.AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
# For Anthropic, you would:
# import anthropic
# anthropic_client = anthropic.AsyncAnthropic(api_key=settings.ANTHROPIC_API_KEY)
# =============================================================================
# TEXT GENERATION
# =============================================================================
async def generate_text(
prompt: str,
max_tokens: int = 500,
temperature: float = 0.7,
model: Optional[str] = None,
) -> dict:
"""
Generate text from a prompt using the configured AI provider.
Args:
prompt: The input text to generate from
max_tokens: Maximum tokens in the response
temperature: Creativity level (0 = deterministic, 2 = very creative)
model: Override the default model
Returns:
dict with keys: text, model, usage
Example:
result = await generate_text("Write a haiku about Python")
print(result["text"])
"""
model = model or settings.AI_MODEL
if settings.AI_PROVIDER == "openai":
return await _generate_openai(prompt, max_tokens, temperature, model)
elif settings.AI_PROVIDER == "anthropic":
return await _generate_anthropic(prompt, max_tokens, temperature, model)
else:
raise ValueError(f"Unknown AI provider: {settings.AI_PROVIDER}")
async def _generate_openai(
prompt: str,
max_tokens: int,
temperature: float,
model: str,
) -> dict:
"""Generate text using OpenAI."""
response = await openai_client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=temperature,
)
return {
"text": response.choices[0].message.content,
"model": model,
"usage": {
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
},
}
async def _generate_anthropic(
prompt: str,
max_tokens: int,
temperature: float,
model: str,
) -> dict:
"""
Generate text using Anthropic Claude.
NOTE: Uncomment and install anthropic package to use.
"""
# import anthropic
# client = anthropic.AsyncAnthropic(api_key=settings.ANTHROPIC_API_KEY)
#
# response = await client.messages.create(
# model=model,
# max_tokens=max_tokens,
# messages=[{"role": "user", "content": prompt}],
# )
#
# return {
# "text": response.content[0].text,
# "model": model,
# "usage": {
# "prompt_tokens": response.usage.input_tokens,
# "completion_tokens": response.usage.output_tokens,
# },
# }
raise NotImplementedError("Anthropic provider not configured")
# =============================================================================
# CHAT COMPLETION
# =============================================================================
async def chat_completion(
messages: list[dict],
max_tokens: int = 500,
temperature: float = 0.7,
model: Optional[str] = None,
) -> dict:
"""
Generate a chat response from message history.
Args:
messages: List of message dicts with 'role' and 'content'
max_tokens: Maximum tokens in the response
temperature: Creativity level
model: Override the default model
Returns:
dict with keys: message, model
Example:
result = await chat_completion([
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
])
print(result["message"]["content"])
"""
model = model or settings.AI_MODEL
response = await openai_client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
)
return {
"message": {
"role": "assistant",
"content": response.choices[0].message.content,
},
"model": model,
}
# =============================================================================
# STREAMING
# =============================================================================
async def stream_text(
prompt: str,
max_tokens: int = 500,
temperature: float = 0.7,
model: Optional[str] = None,
) -> AsyncGenerator[str, None]:
"""
Stream generated text token by token.
This is an async generator that yields text chunks as they're generated.
Use this for real-time chat interfaces.
Example usage in FastAPI:
@app.post("/stream")
async def stream_endpoint(request: Request):
return StreamingResponse(
stream_text(request.prompt),
media_type="text/event-stream",
)
"""
model = model or settings.AI_MODEL
response = await openai_client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=temperature,
stream=True, # Enable streaming
)
# Yield each chunk as it arrives
async for chunk in response:
if chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================
async def count_tokens(text: str, model: str = "gpt-3.5-turbo") -> int:
"""
Count the number of tokens in a text string.
Useful for:
- Estimating costs before making API calls
- Ensuring prompts don't exceed model limits
Note: This is an approximation. For exact counts, use tiktoken library.
"""
# Rough approximation: ~4 characters per token for English
return len(text) // 4
def get_model_context_limit(model: str) -> int:
"""
Get the context window size for a model.
Useful for knowing how much text you can send/receive.
"""
limits = {
# OpenAI
"gpt-4-turbo": 128000,
"gpt-4": 8192,
"gpt-3.5-turbo": 16385,
# Anthropic
"claude-3-opus-20240229": 200000,
"claude-3-sonnet-20240229": 200000,
"claude-3-haiku-20240307": 200000,
}
return limits.get(model, 4096)