Add/Update Security & Governance related stuffs

This commit is contained in:
2025-12-05 19:03:04 +09:00
parent 73d941d98a
commit f773f3bae8
41 changed files with 1469 additions and 87 deletions

38
.github/workflows/ci.yml vendored Normal file
View File

@@ -0,0 +1,38 @@
name: CI
on:
push:
branches: [ "main", "develop" ]
pull_request:
branches: [ "main", "develop" ]
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pytest pytest-cov
# - name: Run tests
# run: pytest -v --maxfail=1 --disable-warnings -q
- name: Run tests with coverage
run: pytest --cov=models --cov=routes --cov-report=xml --cov-report=term-missing
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }} # add in repo secrets
files: ./coverage.xml
fail_ci_if_error: true

27
.gitlab-ci.yml Normal file
View File

@@ -0,0 +1,27 @@
stages:
- test
variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
cache:
paths:
- .cache/pip
test:
stage: test
image: python:3.11
before_script:
- pip install --upgrade pip
- pip install -r requirements.txt
- pip install pytest pytest-cov
script:
# - pytest -v --maxfail=1 --disable-warnings -q
- pytest --cov=models --cov=routes --cov-report=xml --cov-report=term-missing
artifacts:
paths:
- coverage.xml
reports:
coverage_report:
coverage_format: cobertura
path: coverage.xml

View File

@@ -13,5 +13,6 @@ COPY . .
# Expose FastAPI port
EXPOSE 8000
# Run FastAPI app
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
# Default command runs main backend (FastAPI)
# Can be overridden in docker-compose for MoE router
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

View File

@@ -1,2 +1,31 @@
# Awesome-Agentic-AI
## GitHub Badges (Actions + Codecov + Release + License)
[![CI](https://github.com/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/actions/workflows/ci.yml/badge.svg)](https://github.com/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/actions/workflows/ci.yml)
[![codecov](https://codecov.io/gh/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/branch/main/graph/badge.svg?token=<CODECOV_TOKEN>)](https://codecov.io/gh/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>)
[![GitHub release](https://img.shields.io/github/v/release/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>?include_prereleases&sort=semver)](https://github.com/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/releases)
[![License](https://img.shields.io/github/license/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>)](https://github.com/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/blob/main/LICENSE)
### Dependencies
#### Using Requires.io (tracks outdated packages)
[![Requirements Status](https://requires.io/github/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/requirements.svg?branch=main)](https://requires.io/github/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/requirements/?branch=main)
#### Using Snyk (tracks vulnerabilities)
[![Known Vulnerabilities](https://snyk.io/test/github/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/badge.svg)](https://snyk.io/test/github/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>)
## GitLab Badges (Pipeline + Coverage + Release + License)
[![pipeline status](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/badges/main/pipeline.svg)](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/-/pipelines)
[![coverage report](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/badges/main/coverage.svg)](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/-/commits/main)
[![release](https://img.shields.io/gitlab/v/release/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>?sort=semver)](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/-/releases)
[![License](https://img.shields.io/gitlab/license/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>)](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/-/blob/main/LICENSE)
### Dependencies
[![Requirements Status](https://requires.io/gitlab/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/requirements.svg?branch=main)](https://requires.io/gitlab/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/requirements/?branch=main)

View File

@@ -1,7 +1,10 @@
# agents/orchestrator.py
from agents.agent_registry import get_agent
from models.model_router import get_routed_llm
from utils.language_utils import detect_model_profile
# from deployment.deployment_router import route_deployment
from deployment.deployment_router import DeploymentRouter
class Orchestrator:
def __init__(self, memory):
@@ -9,6 +12,7 @@ class Orchestrator:
self.planner = get_agent("planner")
self.executor = get_agent("executor")
self.critic = get_agent("critic")
self.deployment_router = DeploymentRouter()
async def run_task(self, task: str, user_id: str):
context = self.memory.get_context()
@@ -16,10 +20,6 @@ class Orchestrator:
# executor = get_agent("executor")
# critic = get_agent("critic")
# plan = planner.run(task, context)
# result = executor.run(plan, context)
# feedback = critic.run(result, context)
plan = self.planner.run(task, context)
result = self.executor.run(plan, context)
feedback = self.critic.run(result, context)
@@ -42,10 +42,19 @@ class Orchestrator:
def route_request(prompt: str, context: dict):
profile = detect_model_profile(prompt)
model = get_routed_llm(prompt)
# # model = get_routed_llm(prompt)
# # return {
# # "model": str(model),
# # "profile": profile,
# # "response": f"[{model}] response to: {prompt}" # Replace with actual call
# # }
# routed = route_deployment(prompt, context)
# return {
# "profile": profile,
# **routed
# }
routed = self.deployment_router.route(prompt, context)
return {
"model": str(model),
"profile": profile,
"response": f"[{model}] response to: {prompt}" # Replace with actual call
**routed
}

View File

@@ -0,0 +1,28 @@
# deployment/deployment_router.py
from models.model_router import get_routed_llm
from models.moe_handler import MoEHandler
moe_handler = MoEHandler()
def route_deployment(prompt: str, context: dict):
mode = context.get("deployment_mode", "hybrid")
if mode == "edge":
model = get_routed_llm(prompt, lightweight=True)
return {"mode": "edge", "response": model.generate(prompt)}
elif mode == "cloud":
output = moe_handler.generate(prompt)
return {"mode": "cloud", "response": output}
elif mode == "hybrid":
edge_model = get_routed_llm(prompt, lightweight=True)
edge_output = edge_model.generate(prompt)
cloud_output = moe_handler.generate(prompt)
return {
"mode": "hybrid",
"edge_output": edge_output,
"cloud_output": cloud_output,
}

View File

@@ -0,0 +1,61 @@
# deployment/deployment_router.py
from models.model_router import get_routed_llm
from models.moe_handler import MoEHandler
class DeploymentRouter:
"""
Handles routing of inference requests across edge, cloud, and hybrid deployments.
"""
def __init__(self):
self.moe_handler = MoEHandler()
def route(self, prompt: str, context: dict = None):
"""
Route a request based on deployment_mode in context.
Supported modes: edge, cloud, hybrid.
"""
if context is None:
context = {}
mode = context.get("deployment_mode", "hybrid")
if mode == "edge":
# Lightweight model for edge/mobile
model = get_routed_llm(prompt, lightweight=True)
return {
"mode": "edge",
"model": str(model),
"response": model.generate(prompt)
}
elif mode == "cloud":
# Full MoE router in cloud
output = self.moe_handler.generate(prompt)
return {
"mode": "cloud",
"response": output
}
elif mode == "hybrid":
# Quick edge response + refined cloud response
edge_model = get_routed_llm(prompt, lightweight=True)
edge_output = edge_model.generate(prompt)
cloud_output = self.moe_handler.generate(prompt)
return {
"mode": "hybrid",
"edge_output": edge_output,
"cloud_output": cloud_output
}
else:
# Fallback if mode is unknown
model = get_routed_llm(prompt)
return {
"mode": "default",
"model": str(model),
"response": model.generate(prompt)
}

View File

@@ -9,6 +9,7 @@ services:
volumes:
- .:/app
restart: always
command: ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
frontend:
build:
@@ -20,4 +21,23 @@ services:
volumes:
- ./web:/web
working_dir: /web
restart: always
restart: always
moe-router:
build: .
container_name: agentic-moe-router
ports:
- "8080:8080"
volumes:
- .:/app
restart: always
command: ["uvicorn", "models.moe_handler:app", "--host", "0.0.0.0", "--port", "8080"]
prometheus:
image: prom/prometheus:latest
container_name: agentic-prometheus
ports:
- "9090:9090"
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
restart: always

View File

@@ -1,10 +1,12 @@
# governance/policy_registry.py
##INFO:
import time
class PolicyRegistry:
def __init__(self):
self.policies = {} # {tenant_id: {allowed_roles, restricted_tasks, audit_level}}
self.global_policies = self._load_global_policies()
self.audit_log = [] # list of {timestamp, tenant_id, role, action, violation, reason}
def set_policy(self, tenant_id: str, allowed_roles: list, restricted_tasks: list, audit_level: str = "standard"):
self.policies[tenant_id] = {
@@ -60,4 +62,30 @@ class PolicyRegistry:
def list_global_policies(self):
return list(self.global_policies.keys())
# -----------------------------
# New Audit & Violation Logging
# -----------------------------
def log_violation(self, tenant_id: str, role: str, action: str, reason: str):
entry = {
"timestamp": time.time(),
"tenant_id": tenant_id,
"role": role,
"action": action,
"violation": True,
"reason": reason
}
self.audit_log.append(entry)
return entry
def get_audit_log(self, tenant_id: str = None):
if tenant_id:
return [r for r in self.audit_log if r["tenant_id"] == tenant_id]
return self.audit_log
def clear_audit_log(self, tenant_id: str = None):
if tenant_id:
self.audit_log = [r for r in self.audit_log if r["tenant_id"] != tenant_id]
else:
self.audit_log = []
policy_registry = PolicyRegistry()

View File

@@ -0,0 +1,62 @@
{
"id": null,
"title": "Combined Overview Dashboard",
"panels": [
{
"type": "piechart",
"title": "Request Distribution by Model Type & Deployment Mode",
"targets": [
{
"expr": "sum(rate(requests_total[5m])) by (model_type, deployment_mode)",
"legendFormat": "{{model_type}}-{{deployment_mode}}"
}
],
"datasource": "Prometheus"
},
{
"type": "graph",
"title": "95th Percentile Latency by Model Type & Deployment Mode",
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, model_type, deployment_mode))",
"legendFormat": "{{model_type}}-{{deployment_mode}}"
}
],
"datasource": "Prometheus"
},
{
"type": "stat",
"title": "Success Rate by Deployment Mode",
"targets": [
{
"expr": "sum(rate(requests_total{success=\"True\"}[5m])) by (deployment_mode) / sum(rate(requests_total[5m])) by (deployment_mode) * 100",
"legendFormat": "{{deployment_mode}}"
}
],
"datasource": "Prometheus"
},
{
"type": "graph",
"title": "Error Rate Over Time by Agent & Mode",
"targets": [
{
"expr": "sum(rate(requests_total{success=\"False\"}[5m])) by (agent_role, deployment_mode)",
"legendFormat": "{{agent_role}}-{{deployment_mode}}"
}
],
"datasource": "Prometheus"
},
{
"type": "graph",
"title": "Expert Usage by Tenant/Agent/Action/Mode",
"targets": [
{
"expr": "sum(rate(requests_total[5m])) by (tenant_id, agent_role, action, deployment_mode)",
"legendFormat": "{{tenant_id}}-{{agent_role}}-{{action}}-{{deployment_mode}}"
}
],
"datasource": "Prometheus"
}
],
"time": { "from": "now-12h", "to": "now" }
}

View File

@@ -0,0 +1,28 @@
{
"title": "Deployment Mode Overview",
"panels": [
{
"type": "piechart",
"title": "Request Distribution by Mode",
"targets": [
{
"expr": "sum(rate(requests_total[5m])) by (deployment_mode)",
"legendFormat": "{{deployment_mode}}"
}
],
"datasource": "Prometheus"
},
{
"type": "graph",
"title": "Latency Trends by Mode",
"targets": [
{
"expr": "avg(rate(request_latency_ms_sum[5m]) / rate(request_latency_ms_count[5m])) by (deployment_mode)",
"legendFormat": "{{deployment_mode}}"
}
],
"datasource": "Prometheus"
}
],
"time": { "from": "now-12h", "to": "now" }
}

View File

@@ -0,0 +1,18 @@
{
"id": null,
"title": "Expert Usage Dashboard",
"panels": [
{
"type": "graph",
"title": "Requests per Expert by Deployment Mode",
"targets": [
{
"expr": "sum(rate(requests_total[5m])) by (tenant_id, agent_role, action, deployment_mode)",
"legendFormat": "{{tenant_id}}-{{agent_role}}-{{action}}-{{deployment_mode}}"
}
],
"datasource": "Prometheus"
}
],
"time": { "from": "now-6h", "to": "now" }
}

View File

@@ -0,0 +1,18 @@
{
"id": null,
"title": "Request Latency Dashboard",
"panels": [
{
"type": "graph",
"title": "Latency per Model Type & Deployment Mode",
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, model_type, deployment_mode))",
"legendFormat": "{{model_type}}-{{deployment_mode}}"
}
],
"datasource": "Prometheus"
}
],
"time": { "from": "now-1h", "to": "now" }
}

View File

@@ -0,0 +1,55 @@
{
"id": null,
"title": "Policy Violations Dashboard",
"panels": [
{
"type": "graph",
"title": "Violations Over Time by Tenant",
"targets": [
{
"expr": "sum(rate(policy_violations_total[5m])) by (tenant_id)",
"legendFormat": "{{tenant_id}}"
}
],
"datasource": "Prometheus"
},
{
"type": "graph",
"title": "Violations by Role",
"targets": [
{
"expr": "sum(rate(policy_violations_total[5m])) by (role)",
"legendFormat": "{{role}}"
}
],
"datasource": "Prometheus"
},
{
"type": "table",
"title": "Top Restricted Actions Triggered",
"targets": [
{
"expr": "sum(rate(policy_violations_total[5m])) by (action)",
"legendFormat": "{{action}}"
}
],
"datasource": "Prometheus",
"columns": [
{ "text": "Action", "value": "action" },
{ "text": "Violations", "value": "Value" }
]
},
{
"type": "stat",
"title": "Total Violations (24h)",
"targets": [
{
"expr": "sum(increase(policy_violations_total[24h]))",
"legendFormat": "Total"
}
],
"datasource": "Prometheus"
}
],
"time": { "from": "now-24h", "to": "now" }
}

View File

@@ -0,0 +1,79 @@
{
"id": null,
"title": "SLA Compliance Dashboard",
"panels": [
{
"type": "stat",
"title": "Success Rate per Deployment Mode",
"targets": [
{
"expr": "sum(rate(requests_total{success=\"True\"}[5m])) by (deployment_mode) / sum(rate(requests_total[5m])) by (deployment_mode) * 100",
"legendFormat": "{{deployment_mode}}"
}
],
"datasource": "Prometheus"
},
{
"type": "graph",
"title": "Error Rate Over Time by Agent & Mode",
"targets": [
{
"expr": "sum(rate(requests_total{success=\"False\"}[5m])) by (agent_role, deployment_mode)",
"legendFormat": "{{agent_role}}-{{deployment_mode}}"
}
],
"datasource": "Prometheus"
},
{
"type": "graph",
"title": "Latency vs SLA Thresholds",
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, deployment_mode))",
"legendFormat": "Latency-{{deployment_mode}}"
},
{
"expr": "avg_over_time(request_latency_ms_sum[5m] / request_latency_ms_count[5m]) > 500",
"legendFormat": "SLA Breach Threshold"
}
],
"datasource": "Prometheus"
},
{
"type": "graph",
"title": "Policy Violations vs SLA Breaches",
"targets": [
{
"expr": "sum(rate(policy_violations_total[5m])) by (tenant_id)",
"legendFormat": "Violations-{{tenant_id}}"
},
{
"expr": "sum(rate(requests_total{success=\"False\"}[5m])) by (tenant_id)",
"legendFormat": "Errors-{{tenant_id}}"
}
],
"datasource": "Prometheus"
},
{
"type": "table",
"title": "Top Tenants with SLA Breaches & Violations (24h)",
"targets": [
{
"expr": "sum(increase(policy_violations_total[24h])) by (tenant_id)",
"legendFormat": "Violations-{{tenant_id}}"
},
{
"expr": "sum(increase(requests_total{success=\"False\"}[24h])) by (tenant_id)",
"legendFormat": "Errors-{{tenant_id}}"
}
],
"datasource": "Prometheus",
"columns": [
{ "text": "Tenant", "value": "tenant_id" },
{ "text": "Violations (24h)", "value": "Value" },
{ "text": "Errors (24h)", "value": "Value" }
]
}
],
"time": { "from": "now-24h", "to": "now" }
}

View File

@@ -0,0 +1,3 @@
```
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.15.0/cert-manager.yaml
```

View File

@@ -0,0 +1,23 @@
# k8s/backend-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: agentic-backend
spec:
replicas: 2
selector:
matchLabels:
app: agentic-backend
template:
metadata:
labels:
app: agentic-backend
spec:
containers:
- name: backend
image: agentic-backend:latest
ports:
- containerPort: 8000
env:
- name: ENV
value: "production"

13
k8s/backend-service.yaml Normal file
View File

@@ -0,0 +1,13 @@
# k8s/backend-service.yaml
apiVersion: v1
kind: Service
metadata:
name: agentic-backend-service
spec:
selector:
app: agentic-backend
ports:
- protocol: TCP
port: 8000
targetPort: 8000
type: ClusterIP

15
k8s/cluster-issuer.yaml Normal file
View File

@@ -0,0 +1,15 @@
# k8s/cluster-issuer.yaml
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-prod
spec:
acme:
email: your-email@example.com # replace with your email
server: https://acme-v02.api.letsencrypt.org/directory
privateKeySecretRef:
name: letsencrypt-prod-key
solvers:
- http01:
ingress:
class: nginx

View File

@@ -0,0 +1,20 @@
# k8s/frontend-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: agentic-frontend
spec:
replicas: 1
selector:
matchLabels:
app: agentic-frontend
template:
metadata:
labels:
app: agentic-frontend
spec:
containers:
- name: frontend
image: agentic-frontend:latest
ports:
- containerPort: 5173

13
k8s/frontend-service.yaml Normal file
View File

@@ -0,0 +1,13 @@
# k8s/frontend-service.yaml
apiVersion: v1
kind: Service
metadata:
name: agentic-frontend-service
spec:
selector:
app: agentic-frontend
ports:
- protocol: TCP
port: 5173
targetPort: 5173
type: NodePort

19
k8s/hpa-moe-router.yaml Normal file
View File

@@ -0,0 +1,19 @@
# k8s/hpa-moe-router.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: agentic-moe-router-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: agentic-moe-router
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70

42
k8s/ingress.yaml Normal file
View File

@@ -0,0 +1,42 @@
# k8s/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: agentic-ingress
annotations:
kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/rewrite-target: /$1
# For Encrypt (using cert-manager)
# cert-manager.io/cluster-issuer: letsencrypt-prod
spec:
# tls:
# - hosts:
# - agentic.example.com # replace with your domain
# secretName: agentic-tls
rules:
- host: agentic.local
# Using cert-manager
# - host: agentic.example.com # replace with your domain
http:
paths:
- path: /api(/|$)(.*)
pathType: Prefix
backend:
service:
name: agentic-backend-service
port:
number: 8000
- path: /moe(/|$)(.*)
pathType: Prefix
backend:
service:
name: agentic-moe-router-service
port:
number: 8080
- path: /()(.*)
pathType: Prefix
backend:
service:
name: agentic-frontend-service
port:
number: 5173

View File

@@ -0,0 +1,23 @@
# k8s/moe-router-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: agentic-moe-router
spec:
replicas: 2
selector:
matchLabels:
app: agentic-moe-router
template:
metadata:
labels:
app: agentic-moe-router
spec:
containers:
- name: moe-router
image: agentic-moe-router:latest
ports:
- containerPort: 8080
env:
- name: ROUTING_MODE
value: "capability_weighted"

View File

@@ -0,0 +1,13 @@
# k8s/moe-router-service.yaml
apiVersion: v1
kind: Service
metadata:
name: agentic-moe-router-service
spec:
selector:
app: agentic-moe-router
ports:
- protocol: TCP
port: 8080
targetPort: 8080
type: ClusterIP

View File

@@ -13,6 +13,7 @@ from fastapi.requests import Request
from fastapi.exception_handlers import http_exception_handler
from fastapi.exceptions import RequestValidationError
from routes.auth_routes import router as auth_router
from routes.api import router as api_router
from routes.data_routes import router as data_router
from routes.dataset_routes import router as dataset_router
@@ -118,6 +119,8 @@ async def global_exception_handler(request: Request, exc: Exception):
)
##INFO: ✅ Register routes
app.include_router(auth_router, prefix="/auth")
# app.include_router(api_router)
app.include_router(api_router, prefix="/api")
# data_router (/api/data/...), Means all endpoints in this router will be nested under /api
@@ -139,6 +142,8 @@ app.include_router(scheduler_router, prefix="/schedule")
app.include_router(calendar_router, prefix="/calendar")
app.include_router(group_router, prefix="/group")
# app.include_router(metrics_router, prefix="/metrics")
##INFO: API route to Trigger Orchestration
app.include_router(orchestration_router, prefix="/api")

View File

@@ -6,6 +6,9 @@ import 'package:http/http.dart' as http;
class SyncService {
static const String baseUrl = "https://your-api-endpoint.com/api/sync"; // Replace with actual base URL
// -----------------------------
// APIs for agent memory/state
// -----------------------------
static Future<List<Map<String, dynamic>>> getAgentMemory(String tenantId, String agentRole) async {
final uri = Uri.parse("$baseUrl/agent/memory?tenant_id=$tenantId&agent_role=$agentRole");
final res = await http.get(uri);
@@ -110,4 +113,56 @@ class SyncService {
}
}
// -----------------------------
// Hybrid Deployment Sync APIs
// -----------------------------
static final Map<String, dynamic> _localCache = {};
/// Run edge inference and cache result
static Future<Map<String, dynamic>> runEdgeInference(
String prompt, Future<String> Function(String) edgeModelFn) async {
final edgeOutput = await edgeModelFn(prompt);
_localCache[prompt] = {
"mode": "edge",
"edge_output": edgeOutput,
"cloud_output": null,
"synced": false,
};
return _localCache[prompt]!;
}
/// Sync with cloud inference when online
static Future<Map<String, dynamic>> syncWithCloud(String prompt) async {
if (!_localCache.containsKey(prompt)) {
throw Exception("No cached edge result for prompt: $prompt");
}
final uri = Uri.parse("https://your-api-endpoint.com/api/inference");
final res = await http.post(uri,
headers: {"Content-Type": "application/json"},
body: json.encode({"prompt": prompt, "deployment_mode": "cloud"})
);
if (res.statusCode == 200) {
final cloudOutput = json.decode(res.body);
_localCache[prompt]!["cloud_output"] = cloudOutput["response"];
_localCache[prompt]!["synced"] = true;
return _localCache[prompt]!;
} else {
throw Exception("Cloud sync failed: ${res.statusCode}");
}
}
/// Get cached result (edge or hybrid)
static Map<String, dynamic>? getCachedResult(String prompt) {
return _localCache[prompt];
}
/// Clear cache
static void clearCache() {
_localCache.clear();
}
}

View File

@@ -1,16 +1,91 @@
# models/moe_handler.py
from models.base_model import BaseModel
import random
import time
from typing import List, Dict, Any, Optional
class MoEHandler(BaseModel):
def __init__(self):
# stub: would load multiple experts and router
self.experts = []
class Expert:
def __init__(self, name: str, capabilities: List[str], engine: str, weight: float = 1.0):
self.name = name
self.capabilities = capabilities
self.engine = engine
self.weight = weight
def generate(self, prompt: str) -> str:
return "[MoE output stub]"
# TODO: integrate with actual engine handler (llm, slm, vlm, etc.)
return f"[{self.engine}:{self.name}] generated output for: {prompt}"
def embed(self, text: str) -> list[float]:
return []
def embed(self, text: str) -> List[float]:
# TODO: integrate with embedding handler
return [0.1, 0.2, 0.3]
def analyze(self, input_data: dict) -> dict:
return {"analysis": "MoE stub"}
return {"analysis": f"Stub analysis by {self.name}"}
# class MoEHandler(BaseModel):
# def __init__(self):
# # stub: would load multiple experts and router
# self.experts = []
# def generate(self, prompt: str) -> str:
# return "[MoE output stub]"
# def embed(self, text: str) -> list[float]:
# return []
# def analyze(self, input_data: dict) -> dict:
# return {"analysis": "MoE stub"}
class MoEHandler(BaseModel):
def __init__(self):
# Load multiple experts into registry
self.experts: Dict[str, Expert] = {
"expert_reasoning": Expert("expert_reasoning", ["reasoning"], "llm", weight=1.5),
"expert_creative": Expert("expert_creative", ["creative"], "llm", weight=1.0),
"expert_summary": Expert("expert_summary", ["summary"], "slm", weight=1.0),
"expert_code": Expert("expert_code", ["code"], "llm", weight=0.8),
}
self.routing_mode = "capability_weighted"
self.default_expert = "expert_reasoning"
self._rr_index = 0
def _route(self, capability: Optional[str] = None) -> Expert:
candidates = [e for e in self.experts.values() if capability is None or capability in e.capabilities]
if not candidates:
return self.experts[self.default_expert]
if self.routing_mode == "capability_weighted":
weights = [e.weight for e in candidates]
return random.choices(candidates, weights=weights, k=1)[0]
elif self.routing_mode == "first_match":
return candidates[0]
elif self.routing_mode == "round_robin":
expert = candidates[self._rr_index % len(candidates)]
self._rr_index += 1
return expert
return candidates[0]
def generate(self, prompt: str, capability: Optional[str] = None) -> str:
expert = self._route(capability)
start = time.time()
output = expert.generate(prompt)
latency = int((time.time() - start) * 1000)
return f"{output} (latency={latency}ms)"
def embed(self, text: str, capability: Optional[str] = None) -> List[float]:
expert = self._route(capability)
return expert.embed(text)
def analyze(self, input_data: dict, capability: Optional[str] = None) -> dict:
expert = self._route(capability)
return expert.analyze(input_data)
def list_experts(self) -> List[Dict[str, Any]]:
return [{"name": e.name, "capabilities": e.capabilities, "engine": e.engine, "weight": e.weight}
for e in self.experts.values()]
def update_config(self, default_expert: Optional[str] = None, routing_mode: Optional[str] = None) -> Dict[str, str]:
if default_expert:
self.default_expert = default_expert
if routing_mode:
self.routing_mode = routing_mode
return {"default_expert": self.default_expert, "routing_mode": self.routing_mode}

View File

@@ -0,0 +1,12 @@
# monitoring/metrics_endpoint.py
from fastapi import APIRouter
from prometheus_client import Counter, Histogram, generate_latest
router = APIRouter()
REQUEST_COUNT = Counter("requests_total", "Total requests", ["model_type"])
LATENCY = Histogram("request_latency_ms", "Request latency (ms)", ["model_type"])
@router.get("/metrics")
def metrics():
return generate_latest()

View File

@@ -0,0 +1,49 @@
groups:
- name: agentic-alerts
interval: 30s
rules:
# High latency alert
- alert: HighLatency
expr: histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, deployment_mode)) > 500
for: 2m
labels:
severity: warning
annotations:
summary: "High latency detected"
description: "95th percentile latency > 500ms for 2 minutes"
# SLA violation (success rate < 95%)
- alert: SLAViolation
expr: (sum(rate(requests_total{success="True"}[5m])) / sum(rate(requests_total[5m]))) * 100 < 95
for: 5m
labels:
severity: critical
annotations:
summary: "SLA violation detected"
description: "Success rate dropped below 95%"
# Error spike alert
- alert: ErrorSpike
expr: sum(rate(requests_total{success="False"}[1m])) by (tenant_id, deployment_mode) > 10
for: 1m
labels:
severity: critical
annotations:
summary: "Error spike detected"
description: "More than 10 failed requests per minute"
# -----------------------------
# New Combined SLA + Policy Violation Alert
# -----------------------------
- alert: SLAWithPolicyViolation
expr: (
(sum(rate(requests_total{success="True"}[5m])) / sum(rate(requests_total[5m]))) * 100 < 95
)
and
(sum(rate(policy_violations_total[5m])) by (tenant_id) > 5)
for: 5m
labels:
severity: critical
annotations:
summary: "SLA violation with policy violations"
description: "Success rate <95% AND policy violations >5 in 5m window"

View File

@@ -0,0 +1,24 @@
inhibit_rules:
# Suppress SLA violation alerts if HighLatency is already firing
- source_match:
alertname: "HighLatency"
target_match:
alertname: "SLAViolation"
equal: ["deployment_mode"]
# Explanation: If latency is already high, SLA violation alerts are redundant.
# Suppress ErrorSpike alerts if SLAViolation is firing
- source_match:
alertname: "SLAViolation"
target_match:
alertname: "ErrorSpike"
equal: ["agent_role", "deployment_mode"]
# Explanation: SLA violation already covers error spikes, so suppress ErrorSpike.
# Suppress lower-severity alerts when critical ones are active
- source_match:
severity: "critical"
target_match:
severity: "warning"
equal: ["tenant_id", "deployment_mode"]
# Explanation: If a critical alert is firing, warnings for same tenant/mode are suppressed.

View File

@@ -0,0 +1,45 @@
global:
resolve_timeout: 5m
route:
receiver: 'default'
inhibit_rules:
- source_match:
alertname: "HighLatency"
target_match:
alertname: "SLAViolation"
equal: ["deployment_mode"]
- source_match:
alertname: "SLAViolation"
target_match:
alertname: "ErrorSpike"
equal: ["agent_role", "deployment_mode"]
- source_match:
severity: "critical"
target_match:
severity: "warning"
equal: ["tenant_id", "deployment_mode"]
# Suppress standalone SLA or Error alerts if combined SLA+PolicyViolation is firing
- source_match:
alertname: "SLAWithPolicyViolation"
target_match:
alertname: "SLAViolation"
equal: ["tenant_id"]
- source_match:
alertname: "SLAWithPolicyViolation"
target_match:
alertname: "ErrorSpike"
equal: ["tenant_id"]
receivers:
- name: 'default'
email_configs:
- to: 'ops-team@example.com'
from: 'alertmanager@example.com'
smarthost: 'smtp.example.com:587'
auth_username: 'alertmanager@example.com'
auth_password: 'yourpassword'

View File

@@ -16,15 +16,30 @@ class MetricsStore:
"success": success,
"latency": latency
})
def log(self, tenant_id: str, agent_role: str, action: str, success: bool, latency: float,
deployment_mode: str = "default"
):
"""Log a single metric record with deployment_mode awareness."""
self.records.append({
"timestamp": time.time(),
"tenant_id": tenant_id,
"agent": agent_role,
"action": action,
"success": success,
"latency": latency,
"deployment_mode": deployment_mode
})
def get_all(self):
return self.records
def get_summary(self, tenant_id: str = None):
"""Summarize metrics, grouped by (agent, action, deployment_mode)."""
filtered = [r for r in self.records if not tenant_id or r["tenant_id"] == tenant_id]
summary = {}
for r in filtered:
key = (r["agent"], r["action"])
# key = (r["agent"], r["action"])
key = (r["agent"], r["action"], r.get("deployment_mode", "default"))
if key not in summary:
summary[key] = {"count": 0, "success": 0, "latency": 0.0}
summary[key]["count"] += 1
@@ -36,14 +51,38 @@ class MetricsStore:
return summary
def get_agent_metrics(self, agent_role: str):
"""Summarize metrics for a specific agent, broken down by deployment_mode."""
filtered = [r for r in self.records if r["agent"] == agent_role]
latency_values = [r["latency"] for r in filtered]
success_count = sum(1 for r in filtered if r["success"])
return {
"total_actions": len(filtered),
"avg_latency": sum(latency_values) / len(latency_values) if latency_values else 0,
"success_rate": success_count / len(filtered) if filtered else 0,
"latency_distribution": latency_values
}
# latency_values = [r["latency"] for r in filtered]
# success_count = sum(1 for r in filtered if r["success"])
# return {
# "total_actions": len(filtered),
# "avg_latency": sum(latency_values) / len(latency_values) if latency_values else 0,
# "success_rate": success_count / len(filtered) if filtered else 0,
# "latency_distribution": latency_values
# }
metrics_by_mode = defaultdict(lambda: {"count": 0, "latencies": [], "success": 0})
for r in filtered:
mode = r.get("deployment_mode", "default")
metrics_by_mode[mode]["count"] += 1
metrics_by_mode[mode]["latencies"].append(r["latency"])
metrics_by_mode[mode]["success"] += int(r["success"])
summary = {}
for mode, stats in metrics_by_mode.items():
count = stats["count"]
latencies = stats["latencies"]
success = stats["success"]
summary[mode] = {
"total_actions": count,
"avg_latency": sum(latencies) / count if count else 0,
"success_rate": success / count if count else 0,
"latency_distribution": latencies
}
return summary
metrics_store = MetricsStore()

15
monitoring/prometheus.yml Normal file
View File

@@ -0,0 +1,15 @@
# monitoring/prometheus.yml
global:
scrape_interval: 15s
rule_files:
- "alert-rules.yml"
- "recording-rules.yml"
scrape_configs:
- job_name: 'backend'
static_configs:
- targets: ['agentic-backend:8000/admin/metrics/promethus']
- job_name: 'moe-router'
static_configs:
- targets: ['agentic-moe-router:8080']

View File

@@ -0,0 +1,27 @@
groups:
- name: agentic-recording-rules
interval: 30s
rules:
# -----------------------------
# Latency aggregations
# -----------------------------
- record: request_latency_ms:avg_by_mode
expr: avg(rate(request_latency_ms_sum[5m]) / rate(request_latency_ms_count[5m])) by (model_type, deployment_mode)
- record: request_latency_ms:p95_by_mode
expr: histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, model_type, deployment_mode))
# -----------------------------
# Request counts
# -----------------------------
- record: requests_total:rate_by_mode
expr: sum(rate(requests_total[5m])) by (model_type, deployment_mode)
- record: requests_total:success_rate_by_mode
expr: sum(rate(requests_total{success="True"}[5m])) by (deployment_mode) / sum(rate(requests_total[5m])) by (deployment_mode) * 100
# -----------------------------
# Error counts
# -----------------------------
- record: requests_total:error_rate_by_mode
expr: sum(rate(requests_total{success="False"}[5m])) by (agent_role, deployment_mode)

View File

@@ -1,7 +1,11 @@
from fastapi import APIRouter, HTTPException, Depends
# routes/auth_routes.py
from fastapi import APIRouter, HTTPException, Depends, status
from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
from datetime import datetime, timedelta
from jose import JWTError, jwt
from pydantic import BaseModel
from typing import List, Optional
SECRET_KEY = "your-secret-key"
ALGORITHM = "HS256"
@@ -10,35 +14,79 @@ ACCESS_TOKEN_EXPIRE_MINUTES = 60
router = APIRouter()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="auth/token")
# Tenant-aware user registry
fake_users_db = {
"tony": {"username": "tony", "password": "agentic123"},
"guest": {"username": "guest", "password": "guest123"}
# "tony": {"username": "tony", "password": "agentic123"},
# "guest": {"username": "guest", "password": "guest123"}
"tony": {"username": "tony", "password": "agentic123", "tenant_id": "tenantA", "roles": ["admin"]},
"guest": {"username": "guest", "password": "guest123", "tenant_id": "tenantB", "roles": ["tenant"]}
}
def authenticate_user(username: str, password: str):
class AuthUser(BaseModel):
username: str
tenant_id: Optional[str]
roles: List[str]
# def authenticate_user(username: str, password: str):
# user = fake_users_db.get(username)
# if not user or user["password"] != password:
# return None
# return user
def authenticate_user(username: str, password: str) -> Optional[AuthUser]:
user = fake_users_db.get(username)
if not user or user["password"] != password:
return None
return user
return AuthUser(username=user["username"], tenant_id=user["tenant_id"], roles=user["roles"])
def create_access_token(data: dict, expires_delta: timedelta = None):
to_encode = data.copy()
expire = datetime.utcnow() + (expires_delta or timedelta(minutes=15))
to_encode.update({"exp": expire})
# def create_access_token(data: dict, expires_delta: timedelta = None):
# to_encode = data.copy()
# expire = datetime.utcnow() + (expires_delta or timedelta(minutes=15))
# to_encode.update({"exp": expire})
# return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
def create_access_token(user: AuthUser, expires_delta: timedelta = None):
expire = datetime.utcnow() + (expires_delta or timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
to_encode = {"sub": user.username, "tenant_id": user.tenant_id, "roles": user.roles, "exp": expire}
return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
def decode_token(token: str) -> AuthUser:
try:
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
return AuthUser(username=payload.get("sub"), tenant_id=payload.get("tenant_id"), roles=payload.get("roles", []))
except JWTError:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid token")
def get_current_user(token: str = Depends(oauth2_scheme)) -> AuthUser:
return decode_token(token)
def require_roles(*required: str):
def wrapper(user: AuthUser = Depends(get_current_user)) -> AuthUser:
if not any(r in user.roles for r in required):
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Insufficient privileges")
return user
return wrapper
# @router.post("/auth/token")
# def login(form_data: OAuth2PasswordRequestForm = Depends()):
# user = authenticate_user(form_data.username, form_data.password)
# if not user:
# raise HTTPException(status_code=401, detail="Invalid credentials")
# token = create_access_token(data={"sub": user["username"]})
# return {"access_token": token, "token_type": "bearer"}
@router.post("/auth/token")
def login(form_data: OAuth2PasswordRequestForm = Depends()):
user = authenticate_user(form_data.username, form_data.password)
if not user:
raise HTTPException(status_code=401, detail="Invalid credentials")
token = create_access_token(data={"sub": user["username"]})
token = create_access_token(user)
return {"access_token": token, "token_type": "bearer"}
# @router.get("/auth/me")
# def read_users_me(token: str = Depends(oauth2_scheme)):
# try:
# payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
# return {"username": payload.get("sub")}
# except JWTError:
# raise HTTPException(status_code=403, detail="Invalid token")
@router.get("/auth/me")
def read_users_me(token: str = Depends(oauth2_scheme)):
try:
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
return {"username": payload.get("sub")}
except JWTError:
raise HTTPException(status_code=403, detail="Invalid token")
def read_users_me(user: AuthUser = Depends(get_current_user)):
return {"username": user.username, "tenant_id": user.tenant_id, "roles": user.roles}

View File

@@ -1,44 +1,119 @@
# routes/inference_routes.py
from fastapi import APIRouter
# from fastapi import APIRouter, Body, HTTPException
from fastapi import APIRouter, Body, HTTPException, Depends
from pydantic import BaseModel
from typing import Optional
from models.llm_loader import get_llm
from agents.orchestrator import route_request
from models.registry import get_model
from models.moe_handler import MoEHandler
from auth.security import require_roles, get_current_user
from auth.security import AuthUser # Optional: for typing
router = APIRouter()
# llm = get_llm()
moe_handler = MoEHandler()
class EdgePrompt(BaseModel):
prompt: str
# @router.post("/llm/infer-edge")
# def infer_edge_label(data: EdgePrompt):
# label = llm(data.prompt).strip()
# return {"label": label}
class HybridPrompt(BaseModel):
prompt: str
context: Optional[dict] = {}
deployment_mode: Optional[str] = "hybrid" # edge | cloud | hybrid
# Public or low-risk: keep open, or add tenant auth if needed
@router.post("/llm/infer-edge")
def infer_edge_label(data: EdgePrompt):
llm = get_model("llm")
label = llm.generate(data.prompt).strip()
return {"label": label}
# @router.post("/admin/infer")
# def infer(prompt: str, context: dict = {}):
# return route_request(prompt, context)
@router.post("/admin/infer")
# Admin-only unified inference (no deployment awareness)
@router.post("/admin/infer", dependencies=[Depends(require_roles("admin"))])
def infer(prompt: str = Body(...), context: dict = Body(default={})):
return route_request(prompt, context)
# -----------------------------
# New unified inference route
# -----------------------------
@router.post("/admin/infer/{model_type}")
# Deployment-aware inference (tenant allowed)
# @router.post("/admin/infer/deployment")
# def infer_with_deployment(data: HybridPrompt):
# """
# Run inference with deployment_mode awareness.
# deployment_mode can be: edge, cloud, hybrid.
# """
# try:
# result = route_request(data.prompt, {**data.context, "deployment_mode": data.deployment_mode})
# return {
# "deployment_mode": data.deployment_mode,
# "result": result
# }
# except Exception as e:
# raise HTTPException(status_code=500, detail=str(e))
@router.post("/admin/infer/deployment", dependencies=[Depends(require_roles("admin", "tenant"))])
def infer_with_deployment(data: HybridPrompt, user: AuthUser = Depends(get_current_user)):
"""
Run inference with deployment_mode awareness.
deployment_mode can be: edge, cloud, hybrid.
"""
try:
# Optionally enforce tenant scoping in context
ctx = {**(data.context or {}), "deployment_mode": data.deployment_mode}
if user.tenant_id:
ctx["tenant_id"] = user.tenant_id
result = route_request(data.prompt, ctx)
return {"deployment_mode": data.deployment_mode, "tenant_id": user.tenant_id, "result": result}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# Admin-only model-specific inference
# @router.post("/admin/infer/{model_type}")
# def infer_with_model(model_type: str, prompt: str = Body(...)):
# """
# Run inference with a specific model type (llm, slm, vlm, moe, lcm, lam, mlm, embed).
# """
# try:
# if model_type == "moe":
# output = moe_handler.generate(prompt)
# return {"model_type": "moe", "output": output, "experts": moe_handler.list_experts()}
# model = get_model(model_type)
# if model_type in ["embed", "vlm"]:
# vector = model.embed(prompt)
# return {"model_type": model_type, "vector": vector}
# else:
# output = model.generate(prompt)
# return {"model_type": model_type, "output": output}
# except Exception as e:
# raise HTTPException(status_code=500, detail=str(e))
@router.post("/admin/infer/{model_type}", dependencies=[Depends(require_roles("admin"))])
def infer_with_model(model_type: str, prompt: str = Body(...)):
"""
Run inference with a specific model type (llm, slm, vlm, moe, lcm, lam, mlm, embed).
"""
model = get_model(model_type)
if model_type in ["embed", "vlm"]:
vector = model.embed(prompt)
return {"model_type": model_type, "vector": vector}
else:
output = model.generate(prompt)
return {"model_type": model_type, "output": output}
try:
if model_type == "moe":
output = moe_handler.generate(prompt)
return {"model_type": "moe", "output": output, "experts": moe_handler.list_experts()}
model = get_model(model_type)
if model_type in ["embed", "vlm"]:
vector = model.embed(prompt)
return {"model_type": model_type, "vector": vector}
else:
output = model.generate(prompt)
return {"model_type": model_type, "output": output}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# -----------------------------
# MoE-specific management routes
# -----------------------------
# @router.get("/admin/moe/experts")
# def list_moe_experts():
# return moe_handler.list_experts()
@router.get("/admin/moe/experts", dependencies=[Depends(require_roles("admin"))])
def list_moe_experts():
return moe_handler.list_experts()
# @router.post("/admin/moe/config")
# def update_moe_config(default_expert: Optional[str] = Body(None), routing_mode: Optional[str] = Body(None)):
# return moe_handler.update_config(default_expert=default_expert, routing_mode=routing_mode)
@router.post("/admin/moe/config", dependencies=[Depends(require_roles("admin"))])
def update_moe_config(default_expert: Optional[str] = Body(None), routing_mode: Optional[str] = Body(None)):
return moe_handler.update_config(default_expert=default_expert, routing_mode=routing_mode)

View File

@@ -1,20 +1,135 @@
# routes/metrics_routes.py
from fastapi import APIRouter
from fastapi import APIRouter, Response, Depends
from monitoring.metrics_store import metrics_store
from prometheus_client import Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST
from auth.security import require_roles, get_current_user, AuthUser
router = APIRouter()
@router.post("/admin/metrics/log")
def log_metric(tenant_id: str, agent_role: str, action: str, success: bool, latency: float):
metrics_store.log(tenant_id, agent_role, action, success, latency)
# -----------------------------
# Prometheus metrics definitions
# -----------------------------
REQUEST_COUNT = Counter(
# "requests_total", "Total requests handled", ["tenant_id", "agent_role", "action", "success"]
"requests_total",
"Total requests handled",
["tenant_id", "agent_role", "action", "success", "deployment_mode"]
)
LATENCY = Histogram(
# "request_latency_ms", "Request latency in milliseconds", ["tenant_id", "agent_role", "action"]
"request_latency_ms",
"Request latency in milliseconds",
["tenant_id", "agent_role", "action", "deployment_mode"]
)
POLICY_VIOLATIONS = Counter(
"policy_violations_total",
"Total policy violations",
["tenant_id", "role", "action"]
)
POLICY_VIOLATIONS.labels(
tenant_id=tenant_id,
role=role,
action=action
).inc()
# -----------------------------
# Extended logging endpoint
# -----------------------------
# # @router.post("/admin/metrics/log")
# # def log_metric(tenant_id: str, agent_role: str, action: str, success: bool, latency: float):
# # metrics_store.log(tenant_id, agent_role, action, success, latency)
# # # Update Prometheus counters
# # REQUEST_COUNT.labels(
# # tenant_id=tenant_id, agent_role=agent_role, action=action, success=str(success)
# # ).inc()
# # LATENCY.labels(tenant_id=tenant_id, agent_role=agent_role, action=action).observe(latency)
# # return {"status": "logged"}
# @router.post("/admin/metrics/log")
# def log_metric(
# tenant_id: str,
# agent_role: str,
# action: str,
# success: bool,
# latency: float,
# deployment_mode: str = "default"
# ):
# # Log into custom store
# metrics_store.log(tenant_id, agent_role, action, success, latency)
# # Update Prometheus counters with deployment_mode
# REQUEST_COUNT.labels(
# tenant_id=tenant_id,
# agent_role=agent_role,
# action=action,
# success=str(success),
# deployment_mode=deployment_mode
# ).inc()
# LATENCY.labels(
# tenant_id=tenant_id,
# agent_role=agent_role,
# action=action,
# deployment_mode=deployment_mode
# ).observe(latency)
# return {"status": "logged"}
@router.post("/admin/metrics/log", dependencies=[Depends(require_roles("admin", "tenant"))])
def log_metric(
tenant_id: str,
agent_role: str,
action: str,
success: bool,
latency: float,
deployment_mode: str = "default",
user: AuthUser = Depends(get_current_user)
):
# Enforce tenant scoping: only allow logging for own tenant unless admin
if "admin" not in user.roles and user.tenant_id and user.tenant_id != tenant_id:
from fastapi import HTTPException
raise HTTPException(status_code=403, detail="Tenant mismatch")
metrics_store.log(tenant_id, agent_role, action, success, latency, deployment_mode)
REQUEST_COUNT.labels(
tenant_id=tenant_id, agent_role=agent_role, action=action, success=str(success), deployment_mode=deployment_mode
).inc()
LATENCY.labels(
tenant_id=tenant_id, agent_role=agent_role, action=action, deployment_mode=deployment_mode
).observe(latency)
return {"status": "logged"}
@router.get("/admin/metrics/all")
# -----------------------------
# Summary endpoints
# -----------------------------
# @router.get("/admin/metrics/all")
# def get_all_metrics():
# return {"metrics": metrics_store.get_all()}
@router.get("/admin/metrics/all", dependencies=[Depends(require_roles("admin"))])
def get_all_metrics():
return {"metrics": metrics_store.get_all()}
@router.get("/admin/metrics/summary")
def get_summary(tenant_id: str = None):
return {"summary": metrics_store.get_summary(tenant_id)}
# @router.get("/admin/metrics/summary")
# def get_summary(tenant_id: str = None):
# return {"summary": metrics_store.get_summary(tenant_id)}
@router.get("/admin/metrics/summary", dependencies=[Depends(require_roles("admin", "tenant"))])
def get_summary(tenant_id: str = None, user: AuthUser = Depends(get_current_user)):
# Tenant users see only their tenant unless admin
effective_tenant = tenant_id if "admin" in user.roles else user.tenant_id
return {"summary": metrics_store.get_summary(effective_tenant)}
# -----------------------------
# Prometheus scrape endpoint — usually cluster-internal; keep admin-only
# -----------------------------
# @router.get("/admin/metrics/prometheus")
# def prometheus_metrics():
# """Expose metrics in Prometheus format for scraping."""
# return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
@router.get("/admin/metrics/prometheus", dependencies=[Depends(require_roles("admin"))])
def prometheus_metrics():
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)

View File

@@ -1,35 +1,67 @@
# routes/security_routes.py
##INFO: Security Routes
from fastapi import APIRouter
from fastapi import APIRouter, Depends, HTTPException
from security.rbac_registry import rbac_registry
from security.action_validator import action_validator
from tenants.rbac_guard import check_access
from governance.policy_registry import policy_registry
from routes.auth_routes import require_roles, get_current_user, AuthUser
router = APIRouter()
@router.post("/admin/security/role")
# @router.post("/admin/security/role")
# def define_role(agent_role: str, allowed_actions: list[str]):
# return rbac_registry.define_role(agent_role, allowed_actions)
@router.post("/admin/security/role", dependencies=[Depends(require_roles("admin"))])
def define_role(agent_role: str, allowed_actions: list[str]):
return rbac_registry.define_role(agent_role, allowed_actions)
@router.get("/admin/security/roles")
# @router.get("/admin/security/roles")
# def get_all_roles():
# return {"roles": rbac_registry.get_all_roles()}
@router.get("/admin/security/roles", dependencies=[Depends(require_roles("admin"))])
def get_all_roles():
return {"roles": rbac_registry.get_all_roles()}
@router.post("/admin/security/validate")
def validate_action(agent_role: str, action: str, tenant_id: str):
# @router.post("/admin/security/validate")
# def validate_action(agent_role: str, action: str, tenant_id: str):
# return action_validator.validate(agent_role, action, tenant_id)
@router.post("/admin/security/validate", dependencies=[Depends(require_roles("admin", "tenant"))])
def validate_action(agent_role: str, action: str, tenant_id: str, user: AuthUser = Depends(get_current_user)):
# Enforce tenant scoping
if "admin" not in user.roles and user.tenant_id != tenant_id:
raise HTTPException(status_code=403, detail="Tenant mismatch")
# Check against policy registry
policy = policy_registry.get_policy(tenant_id)
# if agent_role not in policy["allowed_roles"] or action in policy["restricted_tasks"]:
# return {"allowed": False, "reason": "Policy restriction"}
if agent_role not in policy["allowed_roles"]:
return policy_registry.log_violation(tenant_id, agent_role, action, "Role not allowed")
if action in policy["restricted_tasks"]:
return policy_registry.log_violation(tenant_id, agent_role, action, "Action restricted")
return action_validator.validate(agent_role, action, tenant_id)
@router.get("/admin/security/audit")
# @router.get("/admin/security/audit")
# def get_audit_log(tenant_id: str):
# return {"audit": action_validator.get_audit(tenant_id)}
@router.get("/admin/security/audit", dependencies=[Depends(require_roles("admin"))])
def get_audit_log(tenant_id: str):
return {"audit": action_validator.get_audit(tenant_id)}
@router.post("/admin/security/access-check")
def check_rbac_access(tenant_id: str, role: str, action: str, resource: str):
# @router.post("/admin/security/access-check")
# def check_rbac_access(tenant_id: str, role: str, action: str, resource: str):
# allowed = check_access(tenant_id, role, action)
# return {"access_granted": allowed}
@router.post("/admin/security/access-check", dependencies=[Depends(require_roles("admin", "tenant"))])
def check_rbac_access(tenant_id: str, role: str, action: str, resource: str, user: AuthUser = Depends(get_current_user)):
if "admin" not in user.roles and user.tenant_id != tenant_id:
raise HTTPException(status_code=403, detail="Tenant mismatch")
allowed = check_access(tenant_id, role, action)
return {"access_granted": allowed}
@router.get("/admin/security/global-policies")
# @router.get("/admin/security/global-policies")
# def list_global_policies():
# return {"global_policies": policy_registry.list_global_policies()}
@router.get("/admin/security/global-policies", dependencies=[Depends(require_roles("admin"))])
def list_global_policies():
return {"global_policies": policy_registry.list_global_policies()}

View File

@@ -0,0 +1,47 @@
# tests/test_inference_routes.py
import pytest
from fastapi.testclient import TestClient
from routes.inference_routes import router
from fastapi import FastAPI
app = FastAPI()
app.include_router(router)
client = TestClient(app)
def test_infer_edge_label():
resp = client.post("/llm/infer-edge", json={"prompt": "classify sentiment"})
assert resp.status_code == 200
data = resp.json()
assert "label" in data
def test_admin_infer():
resp = client.post("/admin/infer", json={"prompt": "Hello", "context": {}})
assert resp.status_code == 200
assert isinstance(resp.json(), dict)
def test_infer_with_llm():
resp = client.post("/admin/infer/llm", json={"prompt": "Hello"})
assert resp.status_code == 200
data = resp.json()
assert data["model_type"] == "llm"
def test_infer_with_moe():
resp = client.post("/admin/infer/moe", json={"prompt": "Explain transformers"})
assert resp.status_code == 200
data = resp.json()
assert data["model_type"] == "moe"
assert "output" in data
def test_list_moe_experts():
resp = client.get("/admin/moe/experts")
assert resp.status_code == 200
experts = resp.json()
assert isinstance(experts, list)
assert any("expert_reasoning" in e["name"] for e in experts)
def test_update_moe_config():
resp = client.post("/admin/moe/config", json={"default_expert": "expert_summary", "routing_mode": "first_match"})
assert resp.status_code == 200
cfg = resp.json()
assert cfg["default_expert"] == "expert_summary"
assert cfg["routing_mode"] == "first_match"

34
tests/test_moe_handler.py Normal file
View File

@@ -0,0 +1,34 @@
# tests/test_moe_handler.py
import pytest
from models.moe_handler import MoEHandler, ExpertSpec
@pytest.fixture
def moe():
return MoEHandler()
def test_list_experts(moe):
experts = moe.list_experts()
assert isinstance(experts, list)
assert any(e["name"] == "expert_reasoning" for e in experts)
def test_generate_default(moe):
output = moe.generate("Hello world")
assert "output" in output or "latency" in output
def test_generate_with_capability(moe):
output = moe.generate("Summarize this text", capability="summary")
assert "summary" in output.lower() or "latency" in output
def test_embed(moe):
vector = moe.embed("Artificial intelligence")
assert isinstance(vector, list)
assert all(isinstance(v, float) for v in vector)
def test_analyze(moe):
result = moe.analyze({"data": "test"})
assert "analysis" in result
def test_update_config(moe):
cfg = moe.update_config(default_expert="expert_creative", routing_mode="round_robin")
assert cfg["default_expert"] == "expert_creative"
assert cfg["routing_mode"] == "round_robin"