Files

153 lines
6.5 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package service
import (
"context"
"log"
"strings"
"sync"
"time"
"ai-platform/internal/config"
"ai-platform/internal/model"
)
// Router — LLM-маршрутизатор: классифицирует запрос через ROUTER_MODEL
// и возвращает имя целевой модели.
// Кэширует результат: модель определяется один раз и закрепляется на всю сессию.
type Router struct {
client *OllamaClient
cfg *config.Config
// Кэш: первое user-сообщение → имя модели
mu sync.RWMutex
cache map[string]string
}
func NewRouter(client *OllamaClient, cfg *config.Config) *Router {
return &Router{
client: client,
cfg: cfg,
cache: make(map[string]string),
}
}
// Route определяет целевую модель для запроса.
// Модель определяется ОДИН РАЗ через LLM и кэшируется на всю сессию.
func (r *Router) Route(ctx context.Context, modelName string, messages []model.Message) string {
if modelName != "" && modelName != "auto" {
return modelName
}
// Находим первое "настоящее" user-сообщение
firstMsg := firstRealUserMessage(messages)
if firstMsg == "" {
return r.cfg.GeneralModel
}
// Проверяем кэш — если уже классифицировали, возвращаем из кэша
r.mu.RLock()
if cached, ok := r.cache[firstMsg]; ok {
r.mu.RUnlock()
log.Printf("router: cache hit → %s", cached)
return cached
}
r.mu.RUnlock()
// Классифицируем через LLM (только один раз за сессию)
systemPrompt := "You are a router in a multi-model AI system. Your job is to decide which specialist model should handle the user's request. There are three specialist models:\n\n" +
"1. CODE model — a programming expert. It should handle ANY request where the end goal is producing, modifying, explaining, or debugging software. " +
"This includes writing code, creating scripts, designing algorithms, working with APIs, databases, DevOps, infrastructure, configuration files, " +
"or anything else where the output is meant to be executed by a computer. If the user's intent is to build or fix something technical, choose code.\n\n" +
"2. DOCUMENT model — a text processing expert. It should handle ANY request where the user wants to work WITH existing text or produce written content that is NOT code. " +
"This includes analyzing, summarizing, translating, rewriting, extracting key points, proofreading, comparing texts, writing essays, reports, articles, letters, " +
"or any other task where the primary material is natural language text and the output is also natural language text. " +
"The key distinction: if the user gives you text and wants you to DO something with that text (understand it, transform it, evaluate it), choose document.\n\n" +
"3. GENERAL model — a conversational assistant. It handles everything that does not fit the other two categories: casual chat, factual questions about the world, " +
"opinions, recommendations, brainstorming ideas, explanations of non-technical concepts. Choose general ONLY when the request is clearly neither about code nor about text processing.\n\n" +
"DECISION LOGIC:\n" +
"- Ask yourself: does the user want SOFTWARE as a result? → code\n" +
"- Ask yourself: does the user want to PROCESS or PRODUCE written text? → document\n" +
"- Neither of the above? → general\n" +
"- When in doubt between code and general → choose code\n" +
"- When in doubt between document and general → choose document\n" +
"- general is the LAST resort, not the default\n\n" +
"Reply with ONLY one word: code, document, or general. Nothing else."
classifyCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
response, err := r.client.CompleteWithSystem(classifyCtx, r.cfg.RouterModel, systemPrompt, firstMsg)
if err != nil {
log.Printf("router: classification failed: %v, falling back to general", err)
return r.cfg.GeneralModel
}
result := r.categoryToModel(response)
// Сохраняем в кэш — все последующие запросы в этой сессии получат ту же модель
r.mu.Lock()
r.cache[firstMsg] = result
r.mu.Unlock()
log.Printf("router: cached → %s", result)
return result
}
// categoryToModel парсит ответ классификатора и возвращает соответствующую модель.
func (r *Router) categoryToModel(response string) string {
category := strings.ToLower(strings.TrimSpace(response))
switch {
case strings.Contains(category, "code") ||
strings.Contains(category, "код") ||
strings.Contains(category, "програм"):
log.Printf("router: classified as CODE → %s (raw: %q)", r.cfg.CodeModel, response)
return r.cfg.CodeModel
case strings.Contains(category, "document") ||
strings.Contains(category, "doc") ||
strings.Contains(category, "докум") ||
strings.Contains(category, "текст") ||
strings.Contains(category, "анализ"):
log.Printf("router: classified as DOCUMENT → %s (raw: %q)", r.cfg.DocModel, response)
return r.cfg.DocModel
default:
log.Printf("router: classified as GENERAL → %s (raw: %q)", r.cfg.GeneralModel, response)
return r.cfg.GeneralModel
}
}
// firstRealUserMessage — последнее user-сообщение перед первым assistant-ответом.
// Пропускает системные инструкции Codex CLI.
func firstRealUserMessage(messages []model.Message) string {
var lastUser string
for _, m := range messages {
if m.Role == "assistant" {
break
}
if m.Role == "user" && !isSystemBoilerplate(m.Content) {
lastUser = m.Content
}
}
if lastUser == "" {
for i := len(messages) - 1; i >= 0; i-- {
if messages[i].Role == "user" && !isSystemBoilerplate(messages[i].Content) {
return messages[i].Content
}
}
}
return lastUser
}
// isSystemBoilerplate проверяет, является ли сообщение системными инструкциями Codex CLI
func isSystemBoilerplate(content string) bool {
if len(content) > 500 {
return true
}
lower := strings.ToLower(content)
return strings.Contains(lower, "<instructions>") ||
strings.Contains(lower, "<permissions") ||
strings.Contains(lower, "<environment_context>") ||
strings.Contains(lower, "<collaboration_mode>") ||
strings.Contains(lower, "agents.md")
}