feat: implement SLA compliance monitoring with latency alerting and global Toast notifications, along with related telemetry and tests.

This commit is contained in:
Tony_at_EON-DEV
2026-02-12 13:10:37 +09:00
parent ecacc58a21
commit b2bb2c38d5
15 changed files with 380 additions and 18 deletions

View File

@@ -0,0 +1,43 @@
# Walkthrough: Phase 8 Step 4 - SLA Compliance Monitoring & Latency Alerting
**Date**: 2026-02-12
**Status**: ✅ Completed
## Objective
Implement real-time latency alerting and refine SLA monitoring to ensure model responsiveness and user awareness of performance breaches.
## Implementation Details
### 1. Backend: Telemetry & SLA Breach Detection
- **[telemetry.py](file:///home/dev1/src/_GIT/awesome-agentic-ai/models/telemetry.py)**: Added a console alert `🚨 [SLA BREACH ALERT]` and database logging when latency exceeds `SLA_LATENCY_MAX_MS` (3000ms).
- **[MemoryManager.py](file:///home/dev1/src/_GIT/awesome-agentic-ai/memory/memory_manager.py)**: Integrated `model_router` to ensure all conversation LLM calls are tracked via telemetry. The `process` method now returns `sla_breach` information in the response if the last request breached the SLA.
### 2. Frontend: Real-time Alerting (Toasts)
- **[Toast.jsx](file:///home/dev1/src/_GIT/awesome-agentic-ai/web/src/components/Toast.jsx)**: A new reusable notification component with styles for `info`, `warning`, and `error` (SLA breach).
- **[PersonaContext.jsx](file:///home/dev1/src/_GIT/awesome-agentic-ai/web/src/context/PersonaContext.jsx)**: Added global toast management (`toast`, `setToast`, `showToast`) to allow any component to trigger alerts.
- **[App.jsx](file:///home/dev1/src/_GIT/awesome-agentic-ai/web/src/App.jsx)**: Updated to display global toasts.
- **[FollowUpChat.jsx](file:///home/dev1/src/_GIT/awesome-agentic-ai/web/src/chat/FollowUpChat.jsx)**: Automatically triggers an "error" toast if the backend response contains `sla_breach` data.
## Verification Results
### Automated Backend Tests
Ran `tests/verify_sla_alerting_p8_s4.py` which validates:
1. **SLA Breach Logging**: Confirmed that slow requests are correctly identified and recorded in the `sla_violations` table.
2. **MemoryManager Integration**: Confirmed that `MemoryManager.process` correctly detects breaches and returns them in the API-like result structure.
```bash
--- Testing Telemetry Violation Logging ---
Logging request with latency 3500.0ms (Threshold: 3000.0ms)
🚨 [SLA BREACH ALERT] Latency: 3500.0ms exceeds threshold: 3000.0ms
✅ Violation correctly recorded: 3500.0ms > 3000.0ms
--- Testing MemoryManager Integration ---
Processing conversation (mocked slow response > 3000.0ms)...
✅ SLA Breach detected in result: {'latency': 3500, 'threshold': 3000.0, 'type': 'LATENCY_BREACH'}
✅ SLA Alerting (Full Integration) Verification PASSED!
```
## Related Artifacts
- [Implementation Plan](file:///home/dev1/.gemini/antigravity/brain/afb17af0-1bf2-4eb0-8eef-42ba8ada470b/implementation_plan.md)
- [Task List](file:///home/dev1/.gemini/antigravity/brain/afb17af0-1bf2-4eb0-8eef-42ba8ada470b/task.md)

View File

@@ -5,7 +5,10 @@ from .config import (
SLM_ENGINE,
EMBEDDING_ENGINE,
EMBEDDING_MODEL_NAME,
DATA_DIR
DATA_DIR,
ANALYTICS_WINDOW,
LATENCY_THRESHOLD_MS,
SLA_LATENCY_MAX_MS
)
from .persona_presets import (

View File

@@ -14,6 +14,7 @@ OFFLINE_MODE = True # Set to True to enforce local LLM usage
# Analytics & Optimization
ANALYTICS_WINDOW = 5
LATENCY_THRESHOLD_MS = 2000.0 # Shift to zen if avg latency > 2s
SLA_LATENCY_MAX_MS = 3000.0 # Hard breach if latency > 3s
# Validation
if OFFLINE_MODE and LLM_ENGINE not in ["llama.cpp", "ollama", "vllm"]:

BIN
data/test_telemetry.db Normal file

Binary file not shown.

View File

@@ -66,8 +66,10 @@ from collections import deque
from utils.logger import logger
from models.llm_loader import get_llm
from agents.pattern_learner import PatternLearner
from config import PERSONA_PRESETS
from config import PERSONA_PRESETS, SLA_LATENCY_MAX_MS
from agents.persona_optimizer import optimizer as persona_optimizer
from models.model_router import get_routed_llm
from models.telemetry import UsageTracker
pattern_learner = PatternLearner()
pattern_learner.load()
@@ -170,17 +172,33 @@ class MemoryManager:
context_str = "\n".join([f'{m["role"]}: {m["content"]}' for m in messages])
prompt = f"다음을 고려하여 대화를 이어가세요 (Persona: {active_persona_id}):\n\n{context_str}"
response = self.retry_llm_sync(prompt).strip()
# Use routed LLM for telemetry support
routed_llm = get_routed_llm(prompt)
response = routed_llm.generate(prompt).strip()
messages.append({"role": "assistant", "content": response})
self.sessions[session_id] = messages
self._save_memory()
# Check for SLA breach in the last log
recent_violation = None
stats = UsageTracker.instance().get_recent_stats(limit=1)
if stats:
last_stat = stats[0]
if last_stat.get("latency_ms", 0) > SLA_LATENCY_MAX_MS:
recent_violation = {
"latency": round(last_stat["latency_ms"]),
"threshold": SLA_LATENCY_MAX_MS,
"type": "LATENCY_BREACH"
}
return {
"response": response,
"persona": active_persona_id,
"persona_shifted": optimization["shifted"],
"persona_shift_reason": optimization["reason"]
"persona_shift_reason": optimization["reason"],
"sla_breach": recent_violation
}
def get_summary(self, user_id: str = "default") -> str:

View File

@@ -4,14 +4,14 @@
"interests": [],
"history": [],
"sessions": {
"9da1f8e0aecc9d868bad115129706a77": [
"42b27efc1480b4fe6d7eaa5eec47424d": [
{
"role": "user",
"content": "Slow please"
"content": "Hello"
},
{
"role": "assistant",
"content": "Mocked Response"
"content": "Delayed Mocked Response"
}
]
}

View File

@@ -10,7 +10,7 @@ def get_llm():
elif LLM_ENGINE == "llama.cpp":
return LlamaCpp(model_path="./models/llama.bin")
elif LLM_ENGINE == "vllm":
return ChatOpenAI(openai_api_base="http://localhost:8000/v1", model="gpt-3.5-turbo")
return ChatOpenAI(openai_api_base="http://localhost:8000/v1", model="gpt-3.5-turbo", openai_api_key="mock")
else:
raise ValueError(f"Unsupported LLM engine: {LLM_ENGINE}")
@@ -20,6 +20,6 @@ def get_llm_by_name(name):
elif name == "llama.cpp":
return LlamaCpp(model_path="./models/llama.bin")
elif name == "vllm":
return ChatOpenAI(openai_api_base="http://localhost:8000/v1", model="gpt-3.5-turbo")
return ChatOpenAI(openai_api_base="http://localhost:8000/v1", model="gpt-3.5-turbo", openai_api_key="mock")
else:
raise ValueError(f"Unsupported LLM engine: {name}")

View File

@@ -40,6 +40,18 @@ class UsageTracker:
metadata TEXT
)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS sla_violations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
log_id INTEGER,
timestamp TEXT NOT NULL,
alias TEXT NOT NULL,
latency_ms REAL,
threshold_ms REAL,
violation_type TEXT,
FOREIGN KEY(log_id) REFERENCES usage_logs(id)
)
''')
conn.commit()
def log_request(self,
@@ -65,6 +77,18 @@ class UsageTracker:
(timestamp, model_type, alias, prompt_tokens, completion_tokens, total_tokens, latency_ms, cost_usd, metadata)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (timestamp, model_type, alias, prompt_tokens, completion_tokens, total_tokens, latency_ms, cost, meta_json))
log_id = cursor.lastrowid
# SLA Check
from config.config import SLA_LATENCY_MAX_MS
if latency_ms > SLA_LATENCY_MAX_MS:
print(f"🚨 [SLA BREACH ALERT] Latency: {latency_ms:.1f}ms exceeds threshold: {SLA_LATENCY_MAX_MS}ms")
cursor.execute('''
INSERT INTO sla_violations
(log_id, timestamp, alias, latency_ms, threshold_ms, violation_type)
VALUES (?, ?, ?, ?, ?, ?)
''', (log_id, timestamp, alias, latency_ms, SLA_LATENCY_MAX_MS, "LATENCY_BREACH"))
conn.commit()
except Exception as e:
print(f"Error logging telemetry: {e}")

View File

@@ -73,3 +73,41 @@ async def get_model_performance():
return [dict(zip(columns, row)) for row in cursor.fetchall()]
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/analytics/sla-violations")
async def get_sla_violations(limit: int = 10):
"""Returns recent SLA breaches."""
try:
with sqlite3.connect(tracker.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT * FROM sla_violations
ORDER BY id DESC LIMIT ?
''', (limit,))
columns = [col[0] for col in cursor.description]
return [dict(zip(columns, row)) for row in cursor.fetchall()]
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/analytics/sla-compliance")
async def get_sla_compliance():
"""Returns SLA compliance percentage."""
try:
with sqlite3.connect(tracker.db_path) as conn:
cursor = conn.cursor()
cursor.execute('SELECT COUNT(*) FROM usage_logs')
total = cursor.fetchone()[0]
if total == 0:
return {"compliance_rate": 100.0, "total": 0, "violations": 0}
cursor.execute('SELECT COUNT(*) FROM sla_violations')
violations = cursor.fetchone()[0]
rate = ((total - violations) / total) * 100
return {
"compliance_rate": round(rate, 2),
"total": total,
"violations": violations
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -0,0 +1,119 @@
# tests/verify_sla_alerting_p8_s4.py
import sys
import os
import time
import sqlite3
import json
import requests
from unittest.mock import patch, MagicMock
# Add project root to sys.path
sys.path.append(os.getcwd())
# MOCK LLM_LOADER BEFORE ANY OTHER IMPORTS
# This prevents model loading errors (like missing llama.bin)
import models.llm_loader
models.llm_loader.get_llm_by_name = MagicMock(return_value=lambda x: "Mocked Response")
models.llm_loader.get_llm = MagicMock(return_value=lambda x: "Mocked Response")
from models.telemetry import UsageTracker
from models.llm_handler import LLMHandler
from config.config import SLA_LATENCY_MAX_MS
def test_telemetry_violation_logging():
print("--- Testing Telemetry Violation Logging ---")
tracker = UsageTracker("data/test_telemetry.db")
# Clean up
if os.path.exists("data/test_telemetry.db"):
with sqlite3.connect("data/test_telemetry.db") as conn:
conn.execute("DELETE FROM usage_logs")
conn.execute("DELETE FROM sla_violations")
# Log a request that breaches SLA
breach_latency = SLA_LATENCY_MAX_MS + 500
print(f"Logging request with latency {breach_latency}ms (Threshold: {SLA_LATENCY_MAX_MS}ms)")
tracker.log_request(
model_type="llm",
alias="test-model",
prompt_tokens=100,
completion_tokens=50,
latency_ms=breach_latency,
metadata={"test": True}
)
# Verify DB
with sqlite3.connect("data/test_telemetry.db") as conn:
cursor = conn.cursor()
cursor.execute("SELECT * FROM usage_logs")
logs = cursor.fetchall()
print(f"Usage logs found: {len(logs)}")
cursor.execute("SELECT * FROM sla_violations")
violations = cursor.fetchall()
print(f"SLA Violations found: {len(violations)}")
if len(violations) == 1:
v = violations[0]
print(f"✅ Violation correctly recorded: {v[4]}ms > {v[5]}ms")
else:
print("❌ Violation NOT recorded correctly")
return False
return True
def test_memory_manager_integration():
print("\n--- Testing MemoryManager Integration ---")
from memory.memory_manager import MemoryManager
# We mock the LLM to control latency and avoid actual execution
mm = MemoryManager()
# Patch the LLMHandler.generate to inject high latency
with patch('models.llm_handler.LLMHandler.generate') as mock_gen:
# We need to simulate the latency in telemetry
def delayed_gen(*args, **kwargs):
time.sleep(SLA_LATENCY_MAX_MS / 1000 + 0.1)
return "Delayed Mocked Response"
mock_gen.side_effect = delayed_gen
print(f"Processing conversation (mocked slow response > {SLA_LATENCY_MAX_MS}ms)...")
result = mm.process("Hello", "test-user")
print(f"Result keys: {result.keys()}")
if "sla_breach" in result and result["sla_breach"]:
print(f"✅ SLA Breach detected in result: {result['sla_breach']}")
else:
print("❌ SLA Breach NOT detected in result")
return False
return True
def test_api_violation_retrieval():
print("\n--- Testing API Violation Retrieval ---")
try:
res = requests.get("http://localhost:8000/analytics/sla-violations")
if res.status_code == 200:
data = res.json()
print(f"Recent violations from API: {len(data)}")
print("✅ API check successful")
else:
print(f"❌ API check failed with status {res.status_code}")
return False
except Exception as e:
print(f"⚠️ API server not reachable: {e}")
return True
if __name__ == "__main__":
success = test_telemetry_violation_logging()
if success:
success = test_memory_manager_integration()
if success:
print("\n✅ SLA Alerting (Full Integration) Verification PASSED!")
sys.exit(0)
else:
print("\n❌ SLA Alerting (Full Integration) Verification FAILED!")
sys.exit(1)

View File

@@ -64,9 +64,10 @@ import AssistantDashboard from "./dashboard/AssistantDashboard";
import MemoryMapDashboard from "./dashboard/MemoryMapDashboard";
import { PersonaProvider, usePersona } from "./context/PersonaContext";
import { LIGHT_MAP } from "./config/emotionEffects";
import Toast from "./components/Toast";
function AppContent() {
const { currentEmotion } = usePersona();
const { currentEmotion, toast, setToast } = usePersona();
const bgColor = LIGHT_MAP[currentEmotion] || "#f3f4f6";
return (
@@ -75,6 +76,13 @@ function AppContent() {
className="min-h-screen transition-colors duration-1000 p-6"
style={{ backgroundColor: bgColor }}
>
{toast && (
<Toast
message={toast.message}
type={toast.type}
onClose={() => setToast(null)}
/>
)}
<nav className="mb-6 flex flex-wrap gap-4 bg-white/50 p-4 rounded-xl backdrop-blur-sm sticky top-0 z-50">
<Link to="/" className="text-blue-600 font-semibold hover:text-blue-800 transition-colors">💬 Query</Link>
<Link to="/memory" className="text-blue-600 font-semibold hover:text-blue-800 transition-colors">🧠 Memory</Link>

View File

@@ -8,7 +8,7 @@ export default function FollowUpChat() {
const [chatHistory, setChatHistory] = useState([]);
const [input, setInput] = useState("");
const [loading, setLoading] = useState(false);
const { updatePersona, updateEmotion } = usePersona();
const { updatePersona, updateEmotion, showToast } = usePersona();
const sendMessage = async () => {
if (!input.trim()) return;
@@ -32,6 +32,14 @@ export default function FollowUpChat() {
};
setChatHistory((prev) => [...prev, assistantMessage]);
// SLA Alerting
if (res.data.sla_breach) {
showToast(
`SLA Breach! Latency: ${res.data.sla_breach.latency}ms (Limit: ${res.data.sla_breach.threshold}ms)`,
'error'
);
}
// Broadcast reaction to global context
if (res.data.emotion) {
updateEmotion(res.data.emotion.toLowerCase());

View File

@@ -0,0 +1,39 @@
import React, { useEffect } from 'react';
const Toast = ({ message, type = 'info', duration = 5000, onClose }) => {
useEffect(() => {
const timer = setTimeout(() => {
onClose();
}, duration);
return () => clearTimeout(timer);
}, [duration, onClose]);
const typeStyles = {
info: 'bg-blue-600 border-blue-400',
success: 'bg-emerald-600 border-emerald-400',
warning: 'bg-amber-600 border-amber-400',
error: 'bg-red-600 border-red-400',
};
return (
<div className={`fixed bottom-8 right-8 ${typeStyles[type]} border px-6 py-4 rounded-2xl shadow-2xl text-white transform transition-all duration-300 animate-slide-up flex items-center gap-4 z-[9999]`}>
<div className="text-2xl">
{type === 'error' ? '🚨' : type === 'warning' ? '⚠️' : ''}
</div>
<div>
<div className="font-bold text-sm uppercase tracking-wider opacity-80 mb-1">
{type === 'error' ? 'SLA Breach' : 'System Alert'}
</div>
<div className="text-white font-medium">{message}</div>
</div>
<button
onClick={onClose}
className="ml-4 hover:opacity-60 transition-opacity"
>
</button>
</div>
);
};
export default Toast;

View File

@@ -1,4 +1,4 @@
import React, { createContext, useContext, useState, useEffect } from 'react';
import React, { createContext, useContext, useState } from 'react';
const PersonaContext = createContext();
@@ -7,6 +7,7 @@ export const usePersona = () => useContext(PersonaContext);
export const PersonaProvider = ({ children }) => {
const [currentPersona, setCurrentPersona] = useState(null);
const [currentEmotion, setCurrentEmotion] = useState('neutral');
const [toast, setToast] = useState(null);
const updatePersona = (persona) => {
setCurrentPersona(persona);
@@ -19,8 +20,20 @@ export const PersonaProvider = ({ children }) => {
setCurrentEmotion(emotion);
};
const showToast = (message, type = 'info') => {
setToast({ message, type });
};
return (
<PersonaContext.Provider value={{ currentPersona, updatePersona, currentEmotion, updateEmotion }}>
<PersonaContext.Provider value={{
currentPersona,
updatePersona,
currentEmotion,
updateEmotion,
toast,
setToast,
showToast
}}>
{children}
</PersonaContext.Provider>
);

View File

@@ -30,19 +30,25 @@ const PersonaAnalyticsDashboard = () => {
const [usage, setUsage] = useState(null);
const [personaDist, setPersonaDist] = useState(null);
const [modelPerf, setModelPerf] = useState([]);
const [slaCompliance, setSlaCompliance] = useState(null);
const [slaViolations, setSlaViolations] = useState([]);
const [loading, setLoading] = useState(true);
useEffect(() => {
const fetchData = async () => {
try {
const [usageRes, distRes, perfRes] = await Promise.all([
const [usageRes, distRes, perfRes, slaCompRes, slaViolRes] = await Promise.all([
axios.get('/api/analytics/usage'),
axios.get('/api/analytics/persona-distribution'),
axios.get('/api/analytics/model-performance')
axios.get('/api/analytics/model-performance'),
axios.get('/api/analytics/sla-compliance'),
axios.get('/api/analytics/sla-violations')
]);
setUsage(usageRes.data);
setPersonaDist(distRes.data);
setModelPerf(perfRes.data);
setSlaCompliance(slaCompRes.data);
setSlaViolations(slaViolRes.data);
} catch (err) {
console.error("Failed to fetch analytics", err);
} finally {
@@ -78,11 +84,17 @@ const PersonaAnalyticsDashboard = () => {
<div className="p-8 bg-zinc-900 min-h-screen text-white">
<h1 className="text-3xl font-bold mb-8 text-indigo-400">Persona & Model Analytics</h1>
<div className="grid grid-cols-1 md:grid-cols-4 gap-6 mb-8">
<div className="grid grid-cols-1 md:grid-cols-5 gap-6 mb-8">
<StatCard title="Total Tokens" value={usage?.total_tokens?.toLocaleString() || 0} icon="🪙" />
<StatCard title="Total Requests" value={usage?.total_requests || 0} icon="📩" />
<StatCard title="Avg Latency" value={`${Math.round(usage?.avg_latency || 0)}ms`} icon="⚡" />
<StatCard title="Total Cost" value={`$${usage?.total_cost?.toFixed(4) || 0}`} icon="💸" />
<StatCard
title="SLA Compliance"
value={`${slaCompliance?.compliance_rate || 100}%`}
icon="🛡️"
color={slaCompliance?.compliance_rate < 95 ? "text-red-400" : "text-emerald-400"}
/>
</div>
<div className="grid grid-cols-1 lg:grid-cols-2 gap-8">
@@ -108,17 +120,53 @@ const PersonaAnalyticsDashboard = () => {
</div>
</div>
</div>
<div className="mt-8 bg-zinc-800 p-6 rounded-xl border border-zinc-700">
<h2 className="text-xl font-semibold mb-4">Recent SLA Violations</h2>
<div className="overflow-x-auto">
<table className="w-full text-left">
<thead>
<tr className="border-b border-zinc-700 text-zinc-400">
<th className="pb-3 pr-4">Timestamp</th>
<th className="pb-3 pr-4">Model</th>
<th className="pb-3 pr-4">Latency</th>
<th className="pb-3 pr-4">Threshold</th>
<th className="pb-3">Type</th>
</tr>
</thead>
<tbody className="divide-y divide-zinc-700/50">
{slaViolations.length > 0 ? slaViolations.map((v, i) => (
<tr key={i} className="text-sm">
<td className="py-3 pr-4 text-zinc-300">{new Date(v.timestamp).toLocaleString()}</td>
<td className="py-3 pr-4">{v.alias}</td>
<td className="py-3 pr-4 text-red-400">{Math.round(v.latency_ms)}ms</td>
<td className="py-3 pr-4 text-zinc-500">{v.threshold_ms}ms</td>
<td className="py-3">
<span className="bg-red-900/40 text-red-400 px-2 py-0.5 rounded border border-red-800/50 text-[10px] font-bold">
{v.violation_type}
</span>
</td>
</tr>
)) : (
<tr>
<td colSpan="5" className="py-8 text-center text-zinc-500">No violations recorded. System is healthy.</td>
</tr>
)}
</tbody>
</table>
</div>
</div>
</div>
);
};
const StatCard = ({ title, value, icon }) => (
const StatCard = ({ title, value, icon, color = "text-white" }) => (
<div className="bg-zinc-800 p-6 rounded-xl border border-zinc-700 hover:border-indigo-500 transition-colors">
<div className="flex justify-between items-center mb-2">
<span className="text-zinc-400 text-sm uppercase tracking-wider">{title}</span>
<span className="text-2xl">{icon}</span>
</div>
<div className="text-2xl font-bold">{value}</div>
<div className={`text-2xl font-bold ${color}`}>{value}</div>
</div>
);