template
This commit is contained in:
250
backend/services/ai_service.py
Normal file
250
backend/services/ai_service.py
Normal file
@@ -0,0 +1,250 @@
|
||||
"""
|
||||
=============================================================================
|
||||
AI SERVICE
|
||||
=============================================================================
|
||||
|
||||
This module handles all AI provider integrations.
|
||||
|
||||
Supported Providers:
|
||||
- OpenAI (GPT-4, GPT-3.5)
|
||||
- Anthropic (Claude)
|
||||
|
||||
Architecture:
|
||||
- Provider-agnostic interface (same API regardless of provider)
|
||||
- Easy to add new providers
|
||||
- Streaming support for real-time responses
|
||||
|
||||
The actual API keys and configuration come from config.py
|
||||
"""
|
||||
|
||||
from typing import AsyncGenerator, Optional
|
||||
import openai
|
||||
from config import settings
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# INITIALIZE CLIENTS
|
||||
# =============================================================================
|
||||
|
||||
# Initialize OpenAI client
|
||||
openai_client = openai.AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
|
||||
|
||||
# For Anthropic, you would:
|
||||
# import anthropic
|
||||
# anthropic_client = anthropic.AsyncAnthropic(api_key=settings.ANTHROPIC_API_KEY)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# TEXT GENERATION
|
||||
# =============================================================================
|
||||
|
||||
async def generate_text(
|
||||
prompt: str,
|
||||
max_tokens: int = 500,
|
||||
temperature: float = 0.7,
|
||||
model: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Generate text from a prompt using the configured AI provider.
|
||||
|
||||
Args:
|
||||
prompt: The input text to generate from
|
||||
max_tokens: Maximum tokens in the response
|
||||
temperature: Creativity level (0 = deterministic, 2 = very creative)
|
||||
model: Override the default model
|
||||
|
||||
Returns:
|
||||
dict with keys: text, model, usage
|
||||
|
||||
Example:
|
||||
result = await generate_text("Write a haiku about Python")
|
||||
print(result["text"])
|
||||
"""
|
||||
model = model or settings.AI_MODEL
|
||||
|
||||
if settings.AI_PROVIDER == "openai":
|
||||
return await _generate_openai(prompt, max_tokens, temperature, model)
|
||||
elif settings.AI_PROVIDER == "anthropic":
|
||||
return await _generate_anthropic(prompt, max_tokens, temperature, model)
|
||||
else:
|
||||
raise ValueError(f"Unknown AI provider: {settings.AI_PROVIDER}")
|
||||
|
||||
|
||||
async def _generate_openai(
|
||||
prompt: str,
|
||||
max_tokens: int,
|
||||
temperature: float,
|
||||
model: str,
|
||||
) -> dict:
|
||||
"""Generate text using OpenAI."""
|
||||
response = await openai_client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
)
|
||||
|
||||
return {
|
||||
"text": response.choices[0].message.content,
|
||||
"model": model,
|
||||
"usage": {
|
||||
"prompt_tokens": response.usage.prompt_tokens,
|
||||
"completion_tokens": response.usage.completion_tokens,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def _generate_anthropic(
|
||||
prompt: str,
|
||||
max_tokens: int,
|
||||
temperature: float,
|
||||
model: str,
|
||||
) -> dict:
|
||||
"""
|
||||
Generate text using Anthropic Claude.
|
||||
|
||||
NOTE: Uncomment and install anthropic package to use.
|
||||
"""
|
||||
# import anthropic
|
||||
# client = anthropic.AsyncAnthropic(api_key=settings.ANTHROPIC_API_KEY)
|
||||
#
|
||||
# response = await client.messages.create(
|
||||
# model=model,
|
||||
# max_tokens=max_tokens,
|
||||
# messages=[{"role": "user", "content": prompt}],
|
||||
# )
|
||||
#
|
||||
# return {
|
||||
# "text": response.content[0].text,
|
||||
# "model": model,
|
||||
# "usage": {
|
||||
# "prompt_tokens": response.usage.input_tokens,
|
||||
# "completion_tokens": response.usage.output_tokens,
|
||||
# },
|
||||
# }
|
||||
|
||||
raise NotImplementedError("Anthropic provider not configured")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CHAT COMPLETION
|
||||
# =============================================================================
|
||||
|
||||
async def chat_completion(
|
||||
messages: list[dict],
|
||||
max_tokens: int = 500,
|
||||
temperature: float = 0.7,
|
||||
model: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Generate a chat response from message history.
|
||||
|
||||
Args:
|
||||
messages: List of message dicts with 'role' and 'content'
|
||||
max_tokens: Maximum tokens in the response
|
||||
temperature: Creativity level
|
||||
model: Override the default model
|
||||
|
||||
Returns:
|
||||
dict with keys: message, model
|
||||
|
||||
Example:
|
||||
result = await chat_completion([
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hello!"},
|
||||
])
|
||||
print(result["message"]["content"])
|
||||
"""
|
||||
model = model or settings.AI_MODEL
|
||||
|
||||
response = await openai_client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
)
|
||||
|
||||
return {
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": response.choices[0].message.content,
|
||||
},
|
||||
"model": model,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# STREAMING
|
||||
# =============================================================================
|
||||
|
||||
async def stream_text(
|
||||
prompt: str,
|
||||
max_tokens: int = 500,
|
||||
temperature: float = 0.7,
|
||||
model: Optional[str] = None,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""
|
||||
Stream generated text token by token.
|
||||
|
||||
This is an async generator that yields text chunks as they're generated.
|
||||
Use this for real-time chat interfaces.
|
||||
|
||||
Example usage in FastAPI:
|
||||
@app.post("/stream")
|
||||
async def stream_endpoint(request: Request):
|
||||
return StreamingResponse(
|
||||
stream_text(request.prompt),
|
||||
media_type="text/event-stream",
|
||||
)
|
||||
"""
|
||||
model = model or settings.AI_MODEL
|
||||
|
||||
response = await openai_client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
stream=True, # Enable streaming
|
||||
)
|
||||
|
||||
# Yield each chunk as it arrives
|
||||
async for chunk in response:
|
||||
if chunk.choices[0].delta.content:
|
||||
yield chunk.choices[0].delta.content
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# UTILITY FUNCTIONS
|
||||
# =============================================================================
|
||||
|
||||
async def count_tokens(text: str, model: str = "gpt-3.5-turbo") -> int:
|
||||
"""
|
||||
Count the number of tokens in a text string.
|
||||
|
||||
Useful for:
|
||||
- Estimating costs before making API calls
|
||||
- Ensuring prompts don't exceed model limits
|
||||
|
||||
Note: This is an approximation. For exact counts, use tiktoken library.
|
||||
"""
|
||||
# Rough approximation: ~4 characters per token for English
|
||||
return len(text) // 4
|
||||
|
||||
|
||||
def get_model_context_limit(model: str) -> int:
|
||||
"""
|
||||
Get the context window size for a model.
|
||||
|
||||
Useful for knowing how much text you can send/receive.
|
||||
"""
|
||||
limits = {
|
||||
# OpenAI
|
||||
"gpt-4-turbo": 128000,
|
||||
"gpt-4": 8192,
|
||||
"gpt-3.5-turbo": 16385,
|
||||
# Anthropic
|
||||
"claude-3-opus-20240229": 200000,
|
||||
"claude-3-sonnet-20240229": 200000,
|
||||
"claude-3-haiku-20240307": 200000,
|
||||
}
|
||||
return limits.get(model, 4096)
|
||||
Reference in New Issue
Block a user