forked from tonycho/Awesome-Agentic-AI
Add/Update Security & Governance related stuffs
This commit is contained in:
38
.github/workflows/ci.yml
vendored
Normal file
38
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "main", "develop" ]
|
||||
pull_request:
|
||||
branches: [ "main", "develop" ]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
pip install pytest pytest-cov
|
||||
|
||||
# - name: Run tests
|
||||
# run: pytest -v --maxfail=1 --disable-warnings -q
|
||||
- name: Run tests with coverage
|
||||
run: pytest --cov=models --cov=routes --cov-report=xml --cov-report=term-missing
|
||||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v4
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }} # add in repo secrets
|
||||
files: ./coverage.xml
|
||||
fail_ci_if_error: true
|
||||
27
.gitlab-ci.yml
Normal file
27
.gitlab-ci.yml
Normal file
@@ -0,0 +1,27 @@
|
||||
stages:
|
||||
- test
|
||||
|
||||
variables:
|
||||
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
|
||||
|
||||
cache:
|
||||
paths:
|
||||
- .cache/pip
|
||||
|
||||
test:
|
||||
stage: test
|
||||
image: python:3.11
|
||||
before_script:
|
||||
- pip install --upgrade pip
|
||||
- pip install -r requirements.txt
|
||||
- pip install pytest pytest-cov
|
||||
script:
|
||||
# - pytest -v --maxfail=1 --disable-warnings -q
|
||||
- pytest --cov=models --cov=routes --cov-report=xml --cov-report=term-missing
|
||||
artifacts:
|
||||
paths:
|
||||
- coverage.xml
|
||||
reports:
|
||||
coverage_report:
|
||||
coverage_format: cobertura
|
||||
path: coverage.xml
|
||||
@@ -13,5 +13,6 @@ COPY . .
|
||||
# Expose FastAPI port
|
||||
EXPOSE 8000
|
||||
|
||||
# Run FastAPI app
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
# Default command runs main backend (FastAPI)
|
||||
# Can be overridden in docker-compose for MoE router
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
||||
29
README.md
29
README.md
@@ -1,2 +1,31 @@
|
||||
# Awesome-Agentic-AI
|
||||
|
||||
## GitHub Badges (Actions + Codecov + Release + License)
|
||||
|
||||
[](https://github.com/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/actions/workflows/ci.yml)
|
||||
[](https://codecov.io/gh/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>)
|
||||
[](https://github.com/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/releases)
|
||||
[](https://github.com/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/blob/main/LICENSE)
|
||||
|
||||
### Dependencies
|
||||
|
||||
#### Using Requires.io (tracks outdated packages)
|
||||
|
||||
[](https://requires.io/github/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/requirements/?branch=main)
|
||||
|
||||
#### Using Snyk (tracks vulnerabilities)
|
||||
|
||||
[](https://snyk.io/test/github/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>)
|
||||
|
||||
|
||||
## GitLab Badges (Pipeline + Coverage + Release + License)
|
||||
|
||||
[](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/-/pipelines)
|
||||
[](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/-/commits/main)
|
||||
[](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/-/releases)
|
||||
[](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/-/blob/main/LICENSE)
|
||||
|
||||
### Dependencies
|
||||
|
||||
[](https://requires.io/gitlab/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/requirements/?branch=main)
|
||||
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
# agents/orchestrator.py
|
||||
|
||||
from agents.agent_registry import get_agent
|
||||
from models.model_router import get_routed_llm
|
||||
from utils.language_utils import detect_model_profile
|
||||
|
||||
# from deployment.deployment_router import route_deployment
|
||||
from deployment.deployment_router import DeploymentRouter
|
||||
|
||||
class Orchestrator:
|
||||
def __init__(self, memory):
|
||||
@@ -9,6 +12,7 @@ class Orchestrator:
|
||||
self.planner = get_agent("planner")
|
||||
self.executor = get_agent("executor")
|
||||
self.critic = get_agent("critic")
|
||||
self.deployment_router = DeploymentRouter()
|
||||
|
||||
async def run_task(self, task: str, user_id: str):
|
||||
context = self.memory.get_context()
|
||||
@@ -16,10 +20,6 @@ class Orchestrator:
|
||||
# executor = get_agent("executor")
|
||||
# critic = get_agent("critic")
|
||||
|
||||
# plan = planner.run(task, context)
|
||||
# result = executor.run(plan, context)
|
||||
# feedback = critic.run(result, context)
|
||||
|
||||
plan = self.planner.run(task, context)
|
||||
result = self.executor.run(plan, context)
|
||||
feedback = self.critic.run(result, context)
|
||||
@@ -42,10 +42,19 @@ class Orchestrator:
|
||||
|
||||
def route_request(prompt: str, context: dict):
|
||||
profile = detect_model_profile(prompt)
|
||||
model = get_routed_llm(prompt)
|
||||
# # model = get_routed_llm(prompt)
|
||||
# # return {
|
||||
# # "model": str(model),
|
||||
# # "profile": profile,
|
||||
# # "response": f"[{model}] response to: {prompt}" # Replace with actual call
|
||||
# # }
|
||||
# routed = route_deployment(prompt, context)
|
||||
# return {
|
||||
# "profile": profile,
|
||||
# **routed
|
||||
# }
|
||||
routed = self.deployment_router.route(prompt, context)
|
||||
return {
|
||||
"model": str(model),
|
||||
"profile": profile,
|
||||
"response": f"[{model}] response to: {prompt}" # Replace with actual call
|
||||
**routed
|
||||
}
|
||||
|
||||
|
||||
28
deployment/OLD.deployment_router.py
Normal file
28
deployment/OLD.deployment_router.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# deployment/deployment_router.py
|
||||
from models.model_router import get_routed_llm
|
||||
from models.moe_handler import MoEHandler
|
||||
|
||||
moe_handler = MoEHandler()
|
||||
|
||||
def route_deployment(prompt: str, context: dict):
|
||||
mode = context.get("deployment_mode", "hybrid")
|
||||
|
||||
if mode == "edge":
|
||||
model = get_routed_llm(prompt, lightweight=True)
|
||||
return {"mode": "edge", "response": model.generate(prompt)}
|
||||
|
||||
elif mode == "cloud":
|
||||
output = moe_handler.generate(prompt)
|
||||
return {"mode": "cloud", "response": output}
|
||||
|
||||
elif mode == "hybrid":
|
||||
edge_model = get_routed_llm(prompt, lightweight=True)
|
||||
edge_output = edge_model.generate(prompt)
|
||||
|
||||
cloud_output = moe_handler.generate(prompt)
|
||||
|
||||
return {
|
||||
"mode": "hybrid",
|
||||
"edge_output": edge_output,
|
||||
"cloud_output": cloud_output,
|
||||
}
|
||||
61
deployment/deployment_router.py
Normal file
61
deployment/deployment_router.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# deployment/deployment_router.py
|
||||
|
||||
from models.model_router import get_routed_llm
|
||||
from models.moe_handler import MoEHandler
|
||||
|
||||
class DeploymentRouter:
|
||||
"""
|
||||
Handles routing of inference requests across edge, cloud, and hybrid deployments.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.moe_handler = MoEHandler()
|
||||
|
||||
def route(self, prompt: str, context: dict = None):
|
||||
"""
|
||||
Route a request based on deployment_mode in context.
|
||||
Supported modes: edge, cloud, hybrid.
|
||||
"""
|
||||
if context is None:
|
||||
context = {}
|
||||
|
||||
mode = context.get("deployment_mode", "hybrid")
|
||||
|
||||
if mode == "edge":
|
||||
# Lightweight model for edge/mobile
|
||||
model = get_routed_llm(prompt, lightweight=True)
|
||||
return {
|
||||
"mode": "edge",
|
||||
"model": str(model),
|
||||
"response": model.generate(prompt)
|
||||
}
|
||||
|
||||
elif mode == "cloud":
|
||||
# Full MoE router in cloud
|
||||
output = self.moe_handler.generate(prompt)
|
||||
return {
|
||||
"mode": "cloud",
|
||||
"response": output
|
||||
}
|
||||
|
||||
elif mode == "hybrid":
|
||||
# Quick edge response + refined cloud response
|
||||
edge_model = get_routed_llm(prompt, lightweight=True)
|
||||
edge_output = edge_model.generate(prompt)
|
||||
|
||||
cloud_output = self.moe_handler.generate(prompt)
|
||||
|
||||
return {
|
||||
"mode": "hybrid",
|
||||
"edge_output": edge_output,
|
||||
"cloud_output": cloud_output
|
||||
}
|
||||
|
||||
else:
|
||||
# Fallback if mode is unknown
|
||||
model = get_routed_llm(prompt)
|
||||
return {
|
||||
"mode": "default",
|
||||
"model": str(model),
|
||||
"response": model.generate(prompt)
|
||||
}
|
||||
@@ -9,6 +9,7 @@ services:
|
||||
volumes:
|
||||
- .:/app
|
||||
restart: always
|
||||
command: ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
||||
frontend:
|
||||
build:
|
||||
@@ -20,4 +21,23 @@ services:
|
||||
volumes:
|
||||
- ./web:/web
|
||||
working_dir: /web
|
||||
restart: always
|
||||
restart: always
|
||||
|
||||
moe-router:
|
||||
build: .
|
||||
container_name: agentic-moe-router
|
||||
ports:
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- .:/app
|
||||
restart: always
|
||||
command: ["uvicorn", "models.moe_handler:app", "--host", "0.0.0.0", "--port", "8080"]
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
container_name: agentic-prometheus
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
restart: always
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
# governance/policy_registry.py
|
||||
##INFO:
|
||||
|
||||
import time
|
||||
|
||||
class PolicyRegistry:
|
||||
def __init__(self):
|
||||
self.policies = {} # {tenant_id: {allowed_roles, restricted_tasks, audit_level}}
|
||||
self.global_policies = self._load_global_policies()
|
||||
self.audit_log = [] # list of {timestamp, tenant_id, role, action, violation, reason}
|
||||
|
||||
def set_policy(self, tenant_id: str, allowed_roles: list, restricted_tasks: list, audit_level: str = "standard"):
|
||||
self.policies[tenant_id] = {
|
||||
@@ -60,4 +62,30 @@ class PolicyRegistry:
|
||||
def list_global_policies(self):
|
||||
return list(self.global_policies.keys())
|
||||
|
||||
# -----------------------------
|
||||
# New Audit & Violation Logging
|
||||
# -----------------------------
|
||||
def log_violation(self, tenant_id: str, role: str, action: str, reason: str):
|
||||
entry = {
|
||||
"timestamp": time.time(),
|
||||
"tenant_id": tenant_id,
|
||||
"role": role,
|
||||
"action": action,
|
||||
"violation": True,
|
||||
"reason": reason
|
||||
}
|
||||
self.audit_log.append(entry)
|
||||
return entry
|
||||
|
||||
def get_audit_log(self, tenant_id: str = None):
|
||||
if tenant_id:
|
||||
return [r for r in self.audit_log if r["tenant_id"] == tenant_id]
|
||||
return self.audit_log
|
||||
|
||||
def clear_audit_log(self, tenant_id: str = None):
|
||||
if tenant_id:
|
||||
self.audit_log = [r for r in self.audit_log if r["tenant_id"] != tenant_id]
|
||||
else:
|
||||
self.audit_log = []
|
||||
|
||||
policy_registry = PolicyRegistry()
|
||||
|
||||
62
grafana/combined-overview-dashboard.json
Normal file
62
grafana/combined-overview-dashboard.json
Normal file
@@ -0,0 +1,62 @@
|
||||
{
|
||||
"id": null,
|
||||
"title": "Combined Overview Dashboard",
|
||||
"panels": [
|
||||
{
|
||||
"type": "piechart",
|
||||
"title": "Request Distribution by Model Type & Deployment Mode",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(requests_total[5m])) by (model_type, deployment_mode)",
|
||||
"legendFormat": "{{model_type}}-{{deployment_mode}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
},
|
||||
{
|
||||
"type": "graph",
|
||||
"title": "95th Percentile Latency by Model Type & Deployment Mode",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, model_type, deployment_mode))",
|
||||
"legendFormat": "{{model_type}}-{{deployment_mode}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Success Rate by Deployment Mode",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(requests_total{success=\"True\"}[5m])) by (deployment_mode) / sum(rate(requests_total[5m])) by (deployment_mode) * 100",
|
||||
"legendFormat": "{{deployment_mode}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
},
|
||||
{
|
||||
"type": "graph",
|
||||
"title": "Error Rate Over Time by Agent & Mode",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(requests_total{success=\"False\"}[5m])) by (agent_role, deployment_mode)",
|
||||
"legendFormat": "{{agent_role}}-{{deployment_mode}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
},
|
||||
{
|
||||
"type": "graph",
|
||||
"title": "Expert Usage by Tenant/Agent/Action/Mode",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(requests_total[5m])) by (tenant_id, agent_role, action, deployment_mode)",
|
||||
"legendFormat": "{{tenant_id}}-{{agent_role}}-{{action}}-{{deployment_mode}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
}
|
||||
],
|
||||
"time": { "from": "now-12h", "to": "now" }
|
||||
}
|
||||
28
grafana/deployment-mode-dashboard.json
Normal file
28
grafana/deployment-mode-dashboard.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"title": "Deployment Mode Overview",
|
||||
"panels": [
|
||||
{
|
||||
"type": "piechart",
|
||||
"title": "Request Distribution by Mode",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(requests_total[5m])) by (deployment_mode)",
|
||||
"legendFormat": "{{deployment_mode}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
},
|
||||
{
|
||||
"type": "graph",
|
||||
"title": "Latency Trends by Mode",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg(rate(request_latency_ms_sum[5m]) / rate(request_latency_ms_count[5m])) by (deployment_mode)",
|
||||
"legendFormat": "{{deployment_mode}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
}
|
||||
],
|
||||
"time": { "from": "now-12h", "to": "now" }
|
||||
}
|
||||
18
grafana/expert-usage-dashboard.json
Normal file
18
grafana/expert-usage-dashboard.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"id": null,
|
||||
"title": "Expert Usage Dashboard",
|
||||
"panels": [
|
||||
{
|
||||
"type": "graph",
|
||||
"title": "Requests per Expert by Deployment Mode",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(requests_total[5m])) by (tenant_id, agent_role, action, deployment_mode)",
|
||||
"legendFormat": "{{tenant_id}}-{{agent_role}}-{{action}}-{{deployment_mode}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
}
|
||||
],
|
||||
"time": { "from": "now-6h", "to": "now" }
|
||||
}
|
||||
18
grafana/latency-dashboard.json
Normal file
18
grafana/latency-dashboard.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"id": null,
|
||||
"title": "Request Latency Dashboard",
|
||||
"panels": [
|
||||
{
|
||||
"type": "graph",
|
||||
"title": "Latency per Model Type & Deployment Mode",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, model_type, deployment_mode))",
|
||||
"legendFormat": "{{model_type}}-{{deployment_mode}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
}
|
||||
],
|
||||
"time": { "from": "now-1h", "to": "now" }
|
||||
}
|
||||
55
grafana/policy-violations-dashboard.json
Normal file
55
grafana/policy-violations-dashboard.json
Normal file
@@ -0,0 +1,55 @@
|
||||
{
|
||||
"id": null,
|
||||
"title": "Policy Violations Dashboard",
|
||||
"panels": [
|
||||
{
|
||||
"type": "graph",
|
||||
"title": "Violations Over Time by Tenant",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(policy_violations_total[5m])) by (tenant_id)",
|
||||
"legendFormat": "{{tenant_id}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
},
|
||||
{
|
||||
"type": "graph",
|
||||
"title": "Violations by Role",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(policy_violations_total[5m])) by (role)",
|
||||
"legendFormat": "{{role}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
},
|
||||
{
|
||||
"type": "table",
|
||||
"title": "Top Restricted Actions Triggered",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(policy_violations_total[5m])) by (action)",
|
||||
"legendFormat": "{{action}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus",
|
||||
"columns": [
|
||||
{ "text": "Action", "value": "action" },
|
||||
{ "text": "Violations", "value": "Value" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Total Violations (24h)",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(policy_violations_total[24h]))",
|
||||
"legendFormat": "Total"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
}
|
||||
],
|
||||
"time": { "from": "now-24h", "to": "now" }
|
||||
}
|
||||
79
grafana/sla-compliance-dashboard.json
Normal file
79
grafana/sla-compliance-dashboard.json
Normal file
@@ -0,0 +1,79 @@
|
||||
{
|
||||
"id": null,
|
||||
"title": "SLA Compliance Dashboard",
|
||||
"panels": [
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Success Rate per Deployment Mode",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(requests_total{success=\"True\"}[5m])) by (deployment_mode) / sum(rate(requests_total[5m])) by (deployment_mode) * 100",
|
||||
"legendFormat": "{{deployment_mode}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
},
|
||||
{
|
||||
"type": "graph",
|
||||
"title": "Error Rate Over Time by Agent & Mode",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(requests_total{success=\"False\"}[5m])) by (agent_role, deployment_mode)",
|
||||
"legendFormat": "{{agent_role}}-{{deployment_mode}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
},
|
||||
{
|
||||
"type": "graph",
|
||||
"title": "Latency vs SLA Thresholds",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, deployment_mode))",
|
||||
"legendFormat": "Latency-{{deployment_mode}}"
|
||||
},
|
||||
{
|
||||
"expr": "avg_over_time(request_latency_ms_sum[5m] / request_latency_ms_count[5m]) > 500",
|
||||
"legendFormat": "SLA Breach Threshold"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
},
|
||||
{
|
||||
"type": "graph",
|
||||
"title": "Policy Violations vs SLA Breaches",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(policy_violations_total[5m])) by (tenant_id)",
|
||||
"legendFormat": "Violations-{{tenant_id}}"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(requests_total{success=\"False\"}[5m])) by (tenant_id)",
|
||||
"legendFormat": "Errors-{{tenant_id}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus"
|
||||
},
|
||||
{
|
||||
"type": "table",
|
||||
"title": "Top Tenants with SLA Breaches & Violations (24h)",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(policy_violations_total[24h])) by (tenant_id)",
|
||||
"legendFormat": "Violations-{{tenant_id}}"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(requests_total{success=\"False\"}[24h])) by (tenant_id)",
|
||||
"legendFormat": "Errors-{{tenant_id}}"
|
||||
}
|
||||
],
|
||||
"datasource": "Prometheus",
|
||||
"columns": [
|
||||
{ "text": "Tenant", "value": "tenant_id" },
|
||||
{ "text": "Violations (24h)", "value": "Value" },
|
||||
{ "text": "Errors (24h)", "value": "Value" }
|
||||
]
|
||||
}
|
||||
],
|
||||
"time": { "from": "now-24h", "to": "now" }
|
||||
}
|
||||
3
k8s/README-cert-manager-install.md
Normal file
3
k8s/README-cert-manager-install.md
Normal file
@@ -0,0 +1,3 @@
|
||||
```
|
||||
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.15.0/cert-manager.yaml
|
||||
```
|
||||
23
k8s/backend-deployment.yaml
Normal file
23
k8s/backend-deployment.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
# k8s/backend-deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: agentic-backend
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: agentic-backend
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: agentic-backend
|
||||
spec:
|
||||
containers:
|
||||
- name: backend
|
||||
image: agentic-backend:latest
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
env:
|
||||
- name: ENV
|
||||
value: "production"
|
||||
13
k8s/backend-service.yaml
Normal file
13
k8s/backend-service.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
# k8s/backend-service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: agentic-backend-service
|
||||
spec:
|
||||
selector:
|
||||
app: agentic-backend
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
type: ClusterIP
|
||||
15
k8s/cluster-issuer.yaml
Normal file
15
k8s/cluster-issuer.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
# k8s/cluster-issuer.yaml
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: letsencrypt-prod
|
||||
spec:
|
||||
acme:
|
||||
email: your-email@example.com # replace with your email
|
||||
server: https://acme-v02.api.letsencrypt.org/directory
|
||||
privateKeySecretRef:
|
||||
name: letsencrypt-prod-key
|
||||
solvers:
|
||||
- http01:
|
||||
ingress:
|
||||
class: nginx
|
||||
20
k8s/frontend-deployment.yaml
Normal file
20
k8s/frontend-deployment.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
# k8s/frontend-deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: agentic-frontend
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: agentic-frontend
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: agentic-frontend
|
||||
spec:
|
||||
containers:
|
||||
- name: frontend
|
||||
image: agentic-frontend:latest
|
||||
ports:
|
||||
- containerPort: 5173
|
||||
13
k8s/frontend-service.yaml
Normal file
13
k8s/frontend-service.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
# k8s/frontend-service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: agentic-frontend-service
|
||||
spec:
|
||||
selector:
|
||||
app: agentic-frontend
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 5173
|
||||
targetPort: 5173
|
||||
type: NodePort
|
||||
19
k8s/hpa-moe-router.yaml
Normal file
19
k8s/hpa-moe-router.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
# k8s/hpa-moe-router.yaml
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: agentic-moe-router-hpa
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: agentic-moe-router
|
||||
minReplicas: 2
|
||||
maxReplicas: 10
|
||||
metrics:
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 70
|
||||
42
k8s/ingress.yaml
Normal file
42
k8s/ingress.yaml
Normal file
@@ -0,0 +1,42 @@
|
||||
# k8s/ingress.yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: agentic-ingress
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx
|
||||
nginx.ingress.kubernetes.io/rewrite-target: /$1
|
||||
# For Encrypt (using cert-manager)
|
||||
# cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
spec:
|
||||
# tls:
|
||||
# - hosts:
|
||||
# - agentic.example.com # replace with your domain
|
||||
# secretName: agentic-tls
|
||||
rules:
|
||||
- host: agentic.local
|
||||
# Using cert-manager
|
||||
# - host: agentic.example.com # replace with your domain
|
||||
http:
|
||||
paths:
|
||||
- path: /api(/|$)(.*)
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: agentic-backend-service
|
||||
port:
|
||||
number: 8000
|
||||
- path: /moe(/|$)(.*)
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: agentic-moe-router-service
|
||||
port:
|
||||
number: 8080
|
||||
- path: /()(.*)
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: agentic-frontend-service
|
||||
port:
|
||||
number: 5173
|
||||
23
k8s/moe-router-deployment.yaml
Normal file
23
k8s/moe-router-deployment.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
# k8s/moe-router-deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: agentic-moe-router
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: agentic-moe-router
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: agentic-moe-router
|
||||
spec:
|
||||
containers:
|
||||
- name: moe-router
|
||||
image: agentic-moe-router:latest
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
env:
|
||||
- name: ROUTING_MODE
|
||||
value: "capability_weighted"
|
||||
13
k8s/moe-router-service.yaml
Normal file
13
k8s/moe-router-service.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
# k8s/moe-router-service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: agentic-moe-router-service
|
||||
spec:
|
||||
selector:
|
||||
app: agentic-moe-router
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
type: ClusterIP
|
||||
5
main.py
5
main.py
@@ -13,6 +13,7 @@ from fastapi.requests import Request
|
||||
from fastapi.exception_handlers import http_exception_handler
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
|
||||
from routes.auth_routes import router as auth_router
|
||||
from routes.api import router as api_router
|
||||
from routes.data_routes import router as data_router
|
||||
from routes.dataset_routes import router as dataset_router
|
||||
@@ -118,6 +119,8 @@ async def global_exception_handler(request: Request, exc: Exception):
|
||||
)
|
||||
|
||||
##INFO: ✅ Register routes
|
||||
app.include_router(auth_router, prefix="/auth")
|
||||
|
||||
# app.include_router(api_router)
|
||||
app.include_router(api_router, prefix="/api")
|
||||
# data_router (/api/data/...), Means all endpoints in this router will be nested under /api
|
||||
@@ -139,6 +142,8 @@ app.include_router(scheduler_router, prefix="/schedule")
|
||||
app.include_router(calendar_router, prefix="/calendar")
|
||||
app.include_router(group_router, prefix="/group")
|
||||
|
||||
# app.include_router(metrics_router, prefix="/metrics")
|
||||
|
||||
##INFO: API route to Trigger Orchestration
|
||||
app.include_router(orchestration_router, prefix="/api")
|
||||
|
||||
|
||||
@@ -6,6 +6,9 @@ import 'package:http/http.dart' as http;
|
||||
class SyncService {
|
||||
static const String baseUrl = "https://your-api-endpoint.com/api/sync"; // Replace with actual base URL
|
||||
|
||||
// -----------------------------
|
||||
// APIs for agent memory/state
|
||||
// -----------------------------
|
||||
static Future<List<Map<String, dynamic>>> getAgentMemory(String tenantId, String agentRole) async {
|
||||
final uri = Uri.parse("$baseUrl/agent/memory?tenant_id=$tenantId&agent_role=$agentRole");
|
||||
final res = await http.get(uri);
|
||||
@@ -110,4 +113,56 @@ class SyncService {
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------
|
||||
// Hybrid Deployment Sync APIs
|
||||
// -----------------------------
|
||||
static final Map<String, dynamic> _localCache = {};
|
||||
|
||||
/// Run edge inference and cache result
|
||||
static Future<Map<String, dynamic>> runEdgeInference(
|
||||
String prompt, Future<String> Function(String) edgeModelFn) async {
|
||||
final edgeOutput = await edgeModelFn(prompt);
|
||||
|
||||
_localCache[prompt] = {
|
||||
"mode": "edge",
|
||||
"edge_output": edgeOutput,
|
||||
"cloud_output": null,
|
||||
"synced": false,
|
||||
};
|
||||
|
||||
return _localCache[prompt]!;
|
||||
}
|
||||
|
||||
/// Sync with cloud inference when online
|
||||
static Future<Map<String, dynamic>> syncWithCloud(String prompt) async {
|
||||
if (!_localCache.containsKey(prompt)) {
|
||||
throw Exception("No cached edge result for prompt: $prompt");
|
||||
}
|
||||
|
||||
final uri = Uri.parse("https://your-api-endpoint.com/api/inference");
|
||||
final res = await http.post(uri,
|
||||
headers: {"Content-Type": "application/json"},
|
||||
body: json.encode({"prompt": prompt, "deployment_mode": "cloud"})
|
||||
);
|
||||
|
||||
if (res.statusCode == 200) {
|
||||
final cloudOutput = json.decode(res.body);
|
||||
_localCache[prompt]!["cloud_output"] = cloudOutput["response"];
|
||||
_localCache[prompt]!["synced"] = true;
|
||||
return _localCache[prompt]!;
|
||||
} else {
|
||||
throw Exception("Cloud sync failed: ${res.statusCode}");
|
||||
}
|
||||
}
|
||||
|
||||
/// Get cached result (edge or hybrid)
|
||||
static Map<String, dynamic>? getCachedResult(String prompt) {
|
||||
return _localCache[prompt];
|
||||
}
|
||||
|
||||
/// Clear cache
|
||||
static void clearCache() {
|
||||
_localCache.clear();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,16 +1,91 @@
|
||||
# models/moe_handler.py
|
||||
from models.base_model import BaseModel
|
||||
import random
|
||||
import time
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
class MoEHandler(BaseModel):
|
||||
def __init__(self):
|
||||
# stub: would load multiple experts and router
|
||||
self.experts = []
|
||||
class Expert:
|
||||
def __init__(self, name: str, capabilities: List[str], engine: str, weight: float = 1.0):
|
||||
self.name = name
|
||||
self.capabilities = capabilities
|
||||
self.engine = engine
|
||||
self.weight = weight
|
||||
|
||||
def generate(self, prompt: str) -> str:
|
||||
return "[MoE output stub]"
|
||||
# TODO: integrate with actual engine handler (llm, slm, vlm, etc.)
|
||||
return f"[{self.engine}:{self.name}] generated output for: {prompt}"
|
||||
|
||||
def embed(self, text: str) -> list[float]:
|
||||
return []
|
||||
def embed(self, text: str) -> List[float]:
|
||||
# TODO: integrate with embedding handler
|
||||
return [0.1, 0.2, 0.3]
|
||||
|
||||
def analyze(self, input_data: dict) -> dict:
|
||||
return {"analysis": "MoE stub"}
|
||||
return {"analysis": f"Stub analysis by {self.name}"}
|
||||
|
||||
# class MoEHandler(BaseModel):
|
||||
# def __init__(self):
|
||||
# # stub: would load multiple experts and router
|
||||
# self.experts = []
|
||||
|
||||
# def generate(self, prompt: str) -> str:
|
||||
# return "[MoE output stub]"
|
||||
|
||||
# def embed(self, text: str) -> list[float]:
|
||||
# return []
|
||||
|
||||
# def analyze(self, input_data: dict) -> dict:
|
||||
# return {"analysis": "MoE stub"}
|
||||
class MoEHandler(BaseModel):
|
||||
def __init__(self):
|
||||
# Load multiple experts into registry
|
||||
self.experts: Dict[str, Expert] = {
|
||||
"expert_reasoning": Expert("expert_reasoning", ["reasoning"], "llm", weight=1.5),
|
||||
"expert_creative": Expert("expert_creative", ["creative"], "llm", weight=1.0),
|
||||
"expert_summary": Expert("expert_summary", ["summary"], "slm", weight=1.0),
|
||||
"expert_code": Expert("expert_code", ["code"], "llm", weight=0.8),
|
||||
}
|
||||
self.routing_mode = "capability_weighted"
|
||||
self.default_expert = "expert_reasoning"
|
||||
self._rr_index = 0
|
||||
|
||||
def _route(self, capability: Optional[str] = None) -> Expert:
|
||||
candidates = [e for e in self.experts.values() if capability is None or capability in e.capabilities]
|
||||
if not candidates:
|
||||
return self.experts[self.default_expert]
|
||||
|
||||
if self.routing_mode == "capability_weighted":
|
||||
weights = [e.weight for e in candidates]
|
||||
return random.choices(candidates, weights=weights, k=1)[0]
|
||||
elif self.routing_mode == "first_match":
|
||||
return candidates[0]
|
||||
elif self.routing_mode == "round_robin":
|
||||
expert = candidates[self._rr_index % len(candidates)]
|
||||
self._rr_index += 1
|
||||
return expert
|
||||
return candidates[0]
|
||||
|
||||
def generate(self, prompt: str, capability: Optional[str] = None) -> str:
|
||||
expert = self._route(capability)
|
||||
start = time.time()
|
||||
output = expert.generate(prompt)
|
||||
latency = int((time.time() - start) * 1000)
|
||||
return f"{output} (latency={latency}ms)"
|
||||
|
||||
def embed(self, text: str, capability: Optional[str] = None) -> List[float]:
|
||||
expert = self._route(capability)
|
||||
return expert.embed(text)
|
||||
|
||||
def analyze(self, input_data: dict, capability: Optional[str] = None) -> dict:
|
||||
expert = self._route(capability)
|
||||
return expert.analyze(input_data)
|
||||
|
||||
def list_experts(self) -> List[Dict[str, Any]]:
|
||||
return [{"name": e.name, "capabilities": e.capabilities, "engine": e.engine, "weight": e.weight}
|
||||
for e in self.experts.values()]
|
||||
|
||||
def update_config(self, default_expert: Optional[str] = None, routing_mode: Optional[str] = None) -> Dict[str, str]:
|
||||
if default_expert:
|
||||
self.default_expert = default_expert
|
||||
if routing_mode:
|
||||
self.routing_mode = routing_mode
|
||||
return {"default_expert": self.default_expert, "routing_mode": self.routing_mode}
|
||||
|
||||
12
monitoring/OLD.metrics_endpoint.py
Normal file
12
monitoring/OLD.metrics_endpoint.py
Normal file
@@ -0,0 +1,12 @@
|
||||
# monitoring/metrics_endpoint.py
|
||||
from fastapi import APIRouter
|
||||
from prometheus_client import Counter, Histogram, generate_latest
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
REQUEST_COUNT = Counter("requests_total", "Total requests", ["model_type"])
|
||||
LATENCY = Histogram("request_latency_ms", "Request latency (ms)", ["model_type"])
|
||||
|
||||
@router.get("/metrics")
|
||||
def metrics():
|
||||
return generate_latest()
|
||||
49
monitoring/alert-rules.yml
Normal file
49
monitoring/alert-rules.yml
Normal file
@@ -0,0 +1,49 @@
|
||||
groups:
|
||||
- name: agentic-alerts
|
||||
interval: 30s
|
||||
rules:
|
||||
# High latency alert
|
||||
- alert: HighLatency
|
||||
expr: histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, deployment_mode)) > 500
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High latency detected"
|
||||
description: "95th percentile latency > 500ms for 2 minutes"
|
||||
|
||||
# SLA violation (success rate < 95%)
|
||||
- alert: SLAViolation
|
||||
expr: (sum(rate(requests_total{success="True"}[5m])) / sum(rate(requests_total[5m]))) * 100 < 95
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "SLA violation detected"
|
||||
description: "Success rate dropped below 95%"
|
||||
|
||||
# Error spike alert
|
||||
- alert: ErrorSpike
|
||||
expr: sum(rate(requests_total{success="False"}[1m])) by (tenant_id, deployment_mode) > 10
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Error spike detected"
|
||||
description: "More than 10 failed requests per minute"
|
||||
|
||||
# -----------------------------
|
||||
# New Combined SLA + Policy Violation Alert
|
||||
# -----------------------------
|
||||
- alert: SLAWithPolicyViolation
|
||||
expr: (
|
||||
(sum(rate(requests_total{success="True"}[5m])) / sum(rate(requests_total[5m]))) * 100 < 95
|
||||
)
|
||||
and
|
||||
(sum(rate(policy_violations_total[5m])) by (tenant_id) > 5)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "SLA violation with policy violations"
|
||||
description: "Success rate <95% AND policy violations >5 in 5m window"
|
||||
24
monitoring/alertmanager-inhibit.yml
Normal file
24
monitoring/alertmanager-inhibit.yml
Normal file
@@ -0,0 +1,24 @@
|
||||
inhibit_rules:
|
||||
# Suppress SLA violation alerts if HighLatency is already firing
|
||||
- source_match:
|
||||
alertname: "HighLatency"
|
||||
target_match:
|
||||
alertname: "SLAViolation"
|
||||
equal: ["deployment_mode"]
|
||||
# Explanation: If latency is already high, SLA violation alerts are redundant.
|
||||
|
||||
# Suppress ErrorSpike alerts if SLAViolation is firing
|
||||
- source_match:
|
||||
alertname: "SLAViolation"
|
||||
target_match:
|
||||
alertname: "ErrorSpike"
|
||||
equal: ["agent_role", "deployment_mode"]
|
||||
# Explanation: SLA violation already covers error spikes, so suppress ErrorSpike.
|
||||
|
||||
# Suppress lower-severity alerts when critical ones are active
|
||||
- source_match:
|
||||
severity: "critical"
|
||||
target_match:
|
||||
severity: "warning"
|
||||
equal: ["tenant_id", "deployment_mode"]
|
||||
# Explanation: If a critical alert is firing, warnings for same tenant/mode are suppressed.
|
||||
45
monitoring/alertmanager.yml
Normal file
45
monitoring/alertmanager.yml
Normal file
@@ -0,0 +1,45 @@
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
|
||||
route:
|
||||
receiver: 'default'
|
||||
|
||||
inhibit_rules:
|
||||
- source_match:
|
||||
alertname: "HighLatency"
|
||||
target_match:
|
||||
alertname: "SLAViolation"
|
||||
equal: ["deployment_mode"]
|
||||
|
||||
- source_match:
|
||||
alertname: "SLAViolation"
|
||||
target_match:
|
||||
alertname: "ErrorSpike"
|
||||
equal: ["agent_role", "deployment_mode"]
|
||||
|
||||
- source_match:
|
||||
severity: "critical"
|
||||
target_match:
|
||||
severity: "warning"
|
||||
equal: ["tenant_id", "deployment_mode"]
|
||||
|
||||
# Suppress standalone SLA or Error alerts if combined SLA+PolicyViolation is firing
|
||||
- source_match:
|
||||
alertname: "SLAWithPolicyViolation"
|
||||
target_match:
|
||||
alertname: "SLAViolation"
|
||||
equal: ["tenant_id"]
|
||||
- source_match:
|
||||
alertname: "SLAWithPolicyViolation"
|
||||
target_match:
|
||||
alertname: "ErrorSpike"
|
||||
equal: ["tenant_id"]
|
||||
|
||||
receivers:
|
||||
- name: 'default'
|
||||
email_configs:
|
||||
- to: 'ops-team@example.com'
|
||||
from: 'alertmanager@example.com'
|
||||
smarthost: 'smtp.example.com:587'
|
||||
auth_username: 'alertmanager@example.com'
|
||||
auth_password: 'yourpassword'
|
||||
@@ -16,15 +16,30 @@ class MetricsStore:
|
||||
"success": success,
|
||||
"latency": latency
|
||||
})
|
||||
def log(self, tenant_id: str, agent_role: str, action: str, success: bool, latency: float,
|
||||
deployment_mode: str = "default"
|
||||
):
|
||||
"""Log a single metric record with deployment_mode awareness."""
|
||||
self.records.append({
|
||||
"timestamp": time.time(),
|
||||
"tenant_id": tenant_id,
|
||||
"agent": agent_role,
|
||||
"action": action,
|
||||
"success": success,
|
||||
"latency": latency,
|
||||
"deployment_mode": deployment_mode
|
||||
})
|
||||
|
||||
def get_all(self):
|
||||
return self.records
|
||||
|
||||
def get_summary(self, tenant_id: str = None):
|
||||
"""Summarize metrics, grouped by (agent, action, deployment_mode)."""
|
||||
filtered = [r for r in self.records if not tenant_id or r["tenant_id"] == tenant_id]
|
||||
summary = {}
|
||||
for r in filtered:
|
||||
key = (r["agent"], r["action"])
|
||||
# key = (r["agent"], r["action"])
|
||||
key = (r["agent"], r["action"], r.get("deployment_mode", "default"))
|
||||
if key not in summary:
|
||||
summary[key] = {"count": 0, "success": 0, "latency": 0.0}
|
||||
summary[key]["count"] += 1
|
||||
@@ -36,14 +51,38 @@ class MetricsStore:
|
||||
return summary
|
||||
|
||||
def get_agent_metrics(self, agent_role: str):
|
||||
"""Summarize metrics for a specific agent, broken down by deployment_mode."""
|
||||
filtered = [r for r in self.records if r["agent"] == agent_role]
|
||||
latency_values = [r["latency"] for r in filtered]
|
||||
success_count = sum(1 for r in filtered if r["success"])
|
||||
return {
|
||||
"total_actions": len(filtered),
|
||||
"avg_latency": sum(latency_values) / len(latency_values) if latency_values else 0,
|
||||
"success_rate": success_count / len(filtered) if filtered else 0,
|
||||
"latency_distribution": latency_values
|
||||
}
|
||||
# latency_values = [r["latency"] for r in filtered]
|
||||
# success_count = sum(1 for r in filtered if r["success"])
|
||||
|
||||
# return {
|
||||
# "total_actions": len(filtered),
|
||||
# "avg_latency": sum(latency_values) / len(latency_values) if latency_values else 0,
|
||||
# "success_rate": success_count / len(filtered) if filtered else 0,
|
||||
# "latency_distribution": latency_values
|
||||
# }
|
||||
|
||||
metrics_by_mode = defaultdict(lambda: {"count": 0, "latencies": [], "success": 0})
|
||||
|
||||
for r in filtered:
|
||||
mode = r.get("deployment_mode", "default")
|
||||
metrics_by_mode[mode]["count"] += 1
|
||||
metrics_by_mode[mode]["latencies"].append(r["latency"])
|
||||
metrics_by_mode[mode]["success"] += int(r["success"])
|
||||
|
||||
summary = {}
|
||||
for mode, stats in metrics_by_mode.items():
|
||||
count = stats["count"]
|
||||
latencies = stats["latencies"]
|
||||
success = stats["success"]
|
||||
summary[mode] = {
|
||||
"total_actions": count,
|
||||
"avg_latency": sum(latencies) / count if count else 0,
|
||||
"success_rate": success / count if count else 0,
|
||||
"latency_distribution": latencies
|
||||
}
|
||||
|
||||
return summary
|
||||
|
||||
metrics_store = MetricsStore()
|
||||
|
||||
15
monitoring/prometheus.yml
Normal file
15
monitoring/prometheus.yml
Normal file
@@ -0,0 +1,15 @@
|
||||
# monitoring/prometheus.yml
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
|
||||
rule_files:
|
||||
- "alert-rules.yml"
|
||||
- "recording-rules.yml"
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'backend'
|
||||
static_configs:
|
||||
- targets: ['agentic-backend:8000/admin/metrics/promethus']
|
||||
- job_name: 'moe-router'
|
||||
static_configs:
|
||||
- targets: ['agentic-moe-router:8080']
|
||||
27
monitoring/recording-rules.yml
Normal file
27
monitoring/recording-rules.yml
Normal file
@@ -0,0 +1,27 @@
|
||||
groups:
|
||||
- name: agentic-recording-rules
|
||||
interval: 30s
|
||||
rules:
|
||||
# -----------------------------
|
||||
# Latency aggregations
|
||||
# -----------------------------
|
||||
- record: request_latency_ms:avg_by_mode
|
||||
expr: avg(rate(request_latency_ms_sum[5m]) / rate(request_latency_ms_count[5m])) by (model_type, deployment_mode)
|
||||
|
||||
- record: request_latency_ms:p95_by_mode
|
||||
expr: histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, model_type, deployment_mode))
|
||||
|
||||
# -----------------------------
|
||||
# Request counts
|
||||
# -----------------------------
|
||||
- record: requests_total:rate_by_mode
|
||||
expr: sum(rate(requests_total[5m])) by (model_type, deployment_mode)
|
||||
|
||||
- record: requests_total:success_rate_by_mode
|
||||
expr: sum(rate(requests_total{success="True"}[5m])) by (deployment_mode) / sum(rate(requests_total[5m])) by (deployment_mode) * 100
|
||||
|
||||
# -----------------------------
|
||||
# Error counts
|
||||
# -----------------------------
|
||||
- record: requests_total:error_rate_by_mode
|
||||
expr: sum(rate(requests_total{success="False"}[5m])) by (agent_role, deployment_mode)
|
||||
@@ -1,7 +1,11 @@
|
||||
from fastapi import APIRouter, HTTPException, Depends
|
||||
# routes/auth_routes.py
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Depends, status
|
||||
from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
|
||||
from datetime import datetime, timedelta
|
||||
from jose import JWTError, jwt
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional
|
||||
|
||||
SECRET_KEY = "your-secret-key"
|
||||
ALGORITHM = "HS256"
|
||||
@@ -10,35 +14,79 @@ ACCESS_TOKEN_EXPIRE_MINUTES = 60
|
||||
router = APIRouter()
|
||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="auth/token")
|
||||
|
||||
# Tenant-aware user registry
|
||||
fake_users_db = {
|
||||
"tony": {"username": "tony", "password": "agentic123"},
|
||||
"guest": {"username": "guest", "password": "guest123"}
|
||||
# "tony": {"username": "tony", "password": "agentic123"},
|
||||
# "guest": {"username": "guest", "password": "guest123"}
|
||||
"tony": {"username": "tony", "password": "agentic123", "tenant_id": "tenantA", "roles": ["admin"]},
|
||||
"guest": {"username": "guest", "password": "guest123", "tenant_id": "tenantB", "roles": ["tenant"]}
|
||||
}
|
||||
|
||||
def authenticate_user(username: str, password: str):
|
||||
class AuthUser(BaseModel):
|
||||
username: str
|
||||
tenant_id: Optional[str]
|
||||
roles: List[str]
|
||||
|
||||
# def authenticate_user(username: str, password: str):
|
||||
# user = fake_users_db.get(username)
|
||||
# if not user or user["password"] != password:
|
||||
# return None
|
||||
# return user
|
||||
def authenticate_user(username: str, password: str) -> Optional[AuthUser]:
|
||||
user = fake_users_db.get(username)
|
||||
if not user or user["password"] != password:
|
||||
return None
|
||||
return user
|
||||
return AuthUser(username=user["username"], tenant_id=user["tenant_id"], roles=user["roles"])
|
||||
|
||||
def create_access_token(data: dict, expires_delta: timedelta = None):
|
||||
to_encode = data.copy()
|
||||
expire = datetime.utcnow() + (expires_delta or timedelta(minutes=15))
|
||||
to_encode.update({"exp": expire})
|
||||
# def create_access_token(data: dict, expires_delta: timedelta = None):
|
||||
# to_encode = data.copy()
|
||||
# expire = datetime.utcnow() + (expires_delta or timedelta(minutes=15))
|
||||
# to_encode.update({"exp": expire})
|
||||
# return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
|
||||
def create_access_token(user: AuthUser, expires_delta: timedelta = None):
|
||||
expire = datetime.utcnow() + (expires_delta or timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
|
||||
to_encode = {"sub": user.username, "tenant_id": user.tenant_id, "roles": user.roles, "exp": expire}
|
||||
return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
|
||||
|
||||
def decode_token(token: str) -> AuthUser:
|
||||
try:
|
||||
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
|
||||
return AuthUser(username=payload.get("sub"), tenant_id=payload.get("tenant_id"), roles=payload.get("roles", []))
|
||||
except JWTError:
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid token")
|
||||
|
||||
def get_current_user(token: str = Depends(oauth2_scheme)) -> AuthUser:
|
||||
return decode_token(token)
|
||||
|
||||
def require_roles(*required: str):
|
||||
def wrapper(user: AuthUser = Depends(get_current_user)) -> AuthUser:
|
||||
if not any(r in user.roles for r in required):
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Insufficient privileges")
|
||||
return user
|
||||
return wrapper
|
||||
|
||||
# @router.post("/auth/token")
|
||||
# def login(form_data: OAuth2PasswordRequestForm = Depends()):
|
||||
# user = authenticate_user(form_data.username, form_data.password)
|
||||
# if not user:
|
||||
# raise HTTPException(status_code=401, detail="Invalid credentials")
|
||||
# token = create_access_token(data={"sub": user["username"]})
|
||||
# return {"access_token": token, "token_type": "bearer"}
|
||||
@router.post("/auth/token")
|
||||
def login(form_data: OAuth2PasswordRequestForm = Depends()):
|
||||
user = authenticate_user(form_data.username, form_data.password)
|
||||
if not user:
|
||||
raise HTTPException(status_code=401, detail="Invalid credentials")
|
||||
token = create_access_token(data={"sub": user["username"]})
|
||||
token = create_access_token(user)
|
||||
return {"access_token": token, "token_type": "bearer"}
|
||||
|
||||
# @router.get("/auth/me")
|
||||
# def read_users_me(token: str = Depends(oauth2_scheme)):
|
||||
# try:
|
||||
# payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
|
||||
# return {"username": payload.get("sub")}
|
||||
# except JWTError:
|
||||
# raise HTTPException(status_code=403, detail="Invalid token")
|
||||
@router.get("/auth/me")
|
||||
def read_users_me(token: str = Depends(oauth2_scheme)):
|
||||
try:
|
||||
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
|
||||
return {"username": payload.get("sub")}
|
||||
except JWTError:
|
||||
raise HTTPException(status_code=403, detail="Invalid token")
|
||||
def read_users_me(user: AuthUser = Depends(get_current_user)):
|
||||
return {"username": user.username, "tenant_id": user.tenant_id, "roles": user.roles}
|
||||
|
||||
@@ -1,44 +1,119 @@
|
||||
# routes/inference_routes.py
|
||||
from fastapi import APIRouter
|
||||
|
||||
# from fastapi import APIRouter, Body, HTTPException
|
||||
from fastapi import APIRouter, Body, HTTPException, Depends
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
from models.llm_loader import get_llm
|
||||
from agents.orchestrator import route_request
|
||||
from models.registry import get_model
|
||||
from models.moe_handler import MoEHandler
|
||||
from auth.security import require_roles, get_current_user
|
||||
from auth.security import AuthUser # Optional: for typing
|
||||
|
||||
router = APIRouter()
|
||||
# llm = get_llm()
|
||||
moe_handler = MoEHandler()
|
||||
|
||||
class EdgePrompt(BaseModel):
|
||||
prompt: str
|
||||
|
||||
# @router.post("/llm/infer-edge")
|
||||
# def infer_edge_label(data: EdgePrompt):
|
||||
# label = llm(data.prompt).strip()
|
||||
# return {"label": label}
|
||||
class HybridPrompt(BaseModel):
|
||||
prompt: str
|
||||
context: Optional[dict] = {}
|
||||
deployment_mode: Optional[str] = "hybrid" # edge | cloud | hybrid
|
||||
|
||||
# Public or low-risk: keep open, or add tenant auth if needed
|
||||
@router.post("/llm/infer-edge")
|
||||
def infer_edge_label(data: EdgePrompt):
|
||||
llm = get_model("llm")
|
||||
label = llm.generate(data.prompt).strip()
|
||||
return {"label": label}
|
||||
|
||||
# @router.post("/admin/infer")
|
||||
# def infer(prompt: str, context: dict = {}):
|
||||
# return route_request(prompt, context)
|
||||
@router.post("/admin/infer")
|
||||
# Admin-only unified inference (no deployment awareness)
|
||||
@router.post("/admin/infer", dependencies=[Depends(require_roles("admin"))])
|
||||
def infer(prompt: str = Body(...), context: dict = Body(default={})):
|
||||
return route_request(prompt, context)
|
||||
|
||||
# -----------------------------
|
||||
# New unified inference route
|
||||
# -----------------------------
|
||||
@router.post("/admin/infer/{model_type}")
|
||||
# Deployment-aware inference (tenant allowed)
|
||||
# @router.post("/admin/infer/deployment")
|
||||
# def infer_with_deployment(data: HybridPrompt):
|
||||
# """
|
||||
# Run inference with deployment_mode awareness.
|
||||
# deployment_mode can be: edge, cloud, hybrid.
|
||||
# """
|
||||
# try:
|
||||
# result = route_request(data.prompt, {**data.context, "deployment_mode": data.deployment_mode})
|
||||
# return {
|
||||
# "deployment_mode": data.deployment_mode,
|
||||
# "result": result
|
||||
# }
|
||||
# except Exception as e:
|
||||
# raise HTTPException(status_code=500, detail=str(e))
|
||||
@router.post("/admin/infer/deployment", dependencies=[Depends(require_roles("admin", "tenant"))])
|
||||
def infer_with_deployment(data: HybridPrompt, user: AuthUser = Depends(get_current_user)):
|
||||
"""
|
||||
Run inference with deployment_mode awareness.
|
||||
deployment_mode can be: edge, cloud, hybrid.
|
||||
"""
|
||||
try:
|
||||
# Optionally enforce tenant scoping in context
|
||||
ctx = {**(data.context or {}), "deployment_mode": data.deployment_mode}
|
||||
if user.tenant_id:
|
||||
ctx["tenant_id"] = user.tenant_id
|
||||
result = route_request(data.prompt, ctx)
|
||||
return {"deployment_mode": data.deployment_mode, "tenant_id": user.tenant_id, "result": result}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Admin-only model-specific inference
|
||||
# @router.post("/admin/infer/{model_type}")
|
||||
# def infer_with_model(model_type: str, prompt: str = Body(...)):
|
||||
# """
|
||||
# Run inference with a specific model type (llm, slm, vlm, moe, lcm, lam, mlm, embed).
|
||||
# """
|
||||
# try:
|
||||
# if model_type == "moe":
|
||||
# output = moe_handler.generate(prompt)
|
||||
# return {"model_type": "moe", "output": output, "experts": moe_handler.list_experts()}
|
||||
# model = get_model(model_type)
|
||||
# if model_type in ["embed", "vlm"]:
|
||||
# vector = model.embed(prompt)
|
||||
# return {"model_type": model_type, "vector": vector}
|
||||
# else:
|
||||
# output = model.generate(prompt)
|
||||
# return {"model_type": model_type, "output": output}
|
||||
# except Exception as e:
|
||||
# raise HTTPException(status_code=500, detail=str(e))
|
||||
@router.post("/admin/infer/{model_type}", dependencies=[Depends(require_roles("admin"))])
|
||||
def infer_with_model(model_type: str, prompt: str = Body(...)):
|
||||
"""
|
||||
Run inference with a specific model type (llm, slm, vlm, moe, lcm, lam, mlm, embed).
|
||||
"""
|
||||
model = get_model(model_type)
|
||||
if model_type in ["embed", "vlm"]:
|
||||
vector = model.embed(prompt)
|
||||
return {"model_type": model_type, "vector": vector}
|
||||
else:
|
||||
output = model.generate(prompt)
|
||||
return {"model_type": model_type, "output": output}
|
||||
try:
|
||||
if model_type == "moe":
|
||||
output = moe_handler.generate(prompt)
|
||||
return {"model_type": "moe", "output": output, "experts": moe_handler.list_experts()}
|
||||
model = get_model(model_type)
|
||||
if model_type in ["embed", "vlm"]:
|
||||
vector = model.embed(prompt)
|
||||
return {"model_type": model_type, "vector": vector}
|
||||
else:
|
||||
output = model.generate(prompt)
|
||||
return {"model_type": model_type, "output": output}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# -----------------------------
|
||||
# MoE-specific management routes
|
||||
# -----------------------------
|
||||
# @router.get("/admin/moe/experts")
|
||||
# def list_moe_experts():
|
||||
# return moe_handler.list_experts()
|
||||
@router.get("/admin/moe/experts", dependencies=[Depends(require_roles("admin"))])
|
||||
def list_moe_experts():
|
||||
return moe_handler.list_experts()
|
||||
|
||||
# @router.post("/admin/moe/config")
|
||||
# def update_moe_config(default_expert: Optional[str] = Body(None), routing_mode: Optional[str] = Body(None)):
|
||||
# return moe_handler.update_config(default_expert=default_expert, routing_mode=routing_mode)
|
||||
@router.post("/admin/moe/config", dependencies=[Depends(require_roles("admin"))])
|
||||
def update_moe_config(default_expert: Optional[str] = Body(None), routing_mode: Optional[str] = Body(None)):
|
||||
return moe_handler.update_config(default_expert=default_expert, routing_mode=routing_mode)
|
||||
|
||||
@@ -1,20 +1,135 @@
|
||||
# routes/metrics_routes.py
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import APIRouter, Response, Depends
|
||||
from monitoring.metrics_store import metrics_store
|
||||
from prometheus_client import Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST
|
||||
from auth.security import require_roles, get_current_user, AuthUser
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@router.post("/admin/metrics/log")
|
||||
def log_metric(tenant_id: str, agent_role: str, action: str, success: bool, latency: float):
|
||||
metrics_store.log(tenant_id, agent_role, action, success, latency)
|
||||
# -----------------------------
|
||||
# Prometheus metrics definitions
|
||||
# -----------------------------
|
||||
REQUEST_COUNT = Counter(
|
||||
# "requests_total", "Total requests handled", ["tenant_id", "agent_role", "action", "success"]
|
||||
"requests_total",
|
||||
"Total requests handled",
|
||||
["tenant_id", "agent_role", "action", "success", "deployment_mode"]
|
||||
)
|
||||
LATENCY = Histogram(
|
||||
# "request_latency_ms", "Request latency in milliseconds", ["tenant_id", "agent_role", "action"]
|
||||
"request_latency_ms",
|
||||
"Request latency in milliseconds",
|
||||
["tenant_id", "agent_role", "action", "deployment_mode"]
|
||||
)
|
||||
|
||||
POLICY_VIOLATIONS = Counter(
|
||||
"policy_violations_total",
|
||||
"Total policy violations",
|
||||
["tenant_id", "role", "action"]
|
||||
)
|
||||
|
||||
POLICY_VIOLATIONS.labels(
|
||||
tenant_id=tenant_id,
|
||||
role=role,
|
||||
action=action
|
||||
).inc()
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Extended logging endpoint
|
||||
# -----------------------------
|
||||
# # @router.post("/admin/metrics/log")
|
||||
# # def log_metric(tenant_id: str, agent_role: str, action: str, success: bool, latency: float):
|
||||
# # metrics_store.log(tenant_id, agent_role, action, success, latency)
|
||||
|
||||
# # # Update Prometheus counters
|
||||
# # REQUEST_COUNT.labels(
|
||||
# # tenant_id=tenant_id, agent_role=agent_role, action=action, success=str(success)
|
||||
# # ).inc()
|
||||
# # LATENCY.labels(tenant_id=tenant_id, agent_role=agent_role, action=action).observe(latency)
|
||||
|
||||
# # return {"status": "logged"}
|
||||
# @router.post("/admin/metrics/log")
|
||||
# def log_metric(
|
||||
# tenant_id: str,
|
||||
# agent_role: str,
|
||||
# action: str,
|
||||
# success: bool,
|
||||
# latency: float,
|
||||
# deployment_mode: str = "default"
|
||||
# ):
|
||||
# # Log into custom store
|
||||
# metrics_store.log(tenant_id, agent_role, action, success, latency)
|
||||
|
||||
# # Update Prometheus counters with deployment_mode
|
||||
# REQUEST_COUNT.labels(
|
||||
# tenant_id=tenant_id,
|
||||
# agent_role=agent_role,
|
||||
# action=action,
|
||||
# success=str(success),
|
||||
# deployment_mode=deployment_mode
|
||||
# ).inc()
|
||||
|
||||
# LATENCY.labels(
|
||||
# tenant_id=tenant_id,
|
||||
# agent_role=agent_role,
|
||||
# action=action,
|
||||
# deployment_mode=deployment_mode
|
||||
# ).observe(latency)
|
||||
|
||||
# return {"status": "logged"}
|
||||
@router.post("/admin/metrics/log", dependencies=[Depends(require_roles("admin", "tenant"))])
|
||||
def log_metric(
|
||||
tenant_id: str,
|
||||
agent_role: str,
|
||||
action: str,
|
||||
success: bool,
|
||||
latency: float,
|
||||
deployment_mode: str = "default",
|
||||
user: AuthUser = Depends(get_current_user)
|
||||
):
|
||||
# Enforce tenant scoping: only allow logging for own tenant unless admin
|
||||
if "admin" not in user.roles and user.tenant_id and user.tenant_id != tenant_id:
|
||||
from fastapi import HTTPException
|
||||
raise HTTPException(status_code=403, detail="Tenant mismatch")
|
||||
|
||||
metrics_store.log(tenant_id, agent_role, action, success, latency, deployment_mode)
|
||||
REQUEST_COUNT.labels(
|
||||
tenant_id=tenant_id, agent_role=agent_role, action=action, success=str(success), deployment_mode=deployment_mode
|
||||
).inc()
|
||||
LATENCY.labels(
|
||||
tenant_id=tenant_id, agent_role=agent_role, action=action, deployment_mode=deployment_mode
|
||||
).observe(latency)
|
||||
|
||||
return {"status": "logged"}
|
||||
|
||||
@router.get("/admin/metrics/all")
|
||||
# -----------------------------
|
||||
# Summary endpoints
|
||||
# -----------------------------
|
||||
# @router.get("/admin/metrics/all")
|
||||
# def get_all_metrics():
|
||||
# return {"metrics": metrics_store.get_all()}
|
||||
@router.get("/admin/metrics/all", dependencies=[Depends(require_roles("admin"))])
|
||||
def get_all_metrics():
|
||||
return {"metrics": metrics_store.get_all()}
|
||||
|
||||
@router.get("/admin/metrics/summary")
|
||||
def get_summary(tenant_id: str = None):
|
||||
return {"summary": metrics_store.get_summary(tenant_id)}
|
||||
# @router.get("/admin/metrics/summary")
|
||||
# def get_summary(tenant_id: str = None):
|
||||
# return {"summary": metrics_store.get_summary(tenant_id)}
|
||||
@router.get("/admin/metrics/summary", dependencies=[Depends(require_roles("admin", "tenant"))])
|
||||
def get_summary(tenant_id: str = None, user: AuthUser = Depends(get_current_user)):
|
||||
# Tenant users see only their tenant unless admin
|
||||
effective_tenant = tenant_id if "admin" in user.roles else user.tenant_id
|
||||
return {"summary": metrics_store.get_summary(effective_tenant)}
|
||||
|
||||
# -----------------------------
|
||||
# Prometheus scrape endpoint — usually cluster-internal; keep admin-only
|
||||
# -----------------------------
|
||||
# @router.get("/admin/metrics/prometheus")
|
||||
# def prometheus_metrics():
|
||||
# """Expose metrics in Prometheus format for scraping."""
|
||||
# return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
|
||||
@router.get("/admin/metrics/prometheus", dependencies=[Depends(require_roles("admin"))])
|
||||
def prometheus_metrics():
|
||||
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
|
||||
|
||||
@@ -1,35 +1,67 @@
|
||||
# routes/security_routes.py
|
||||
##INFO: Security Routes
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from security.rbac_registry import rbac_registry
|
||||
from security.action_validator import action_validator
|
||||
from tenants.rbac_guard import check_access
|
||||
from governance.policy_registry import policy_registry
|
||||
from routes.auth_routes import require_roles, get_current_user, AuthUser
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@router.post("/admin/security/role")
|
||||
# @router.post("/admin/security/role")
|
||||
# def define_role(agent_role: str, allowed_actions: list[str]):
|
||||
# return rbac_registry.define_role(agent_role, allowed_actions)
|
||||
@router.post("/admin/security/role", dependencies=[Depends(require_roles("admin"))])
|
||||
def define_role(agent_role: str, allowed_actions: list[str]):
|
||||
return rbac_registry.define_role(agent_role, allowed_actions)
|
||||
|
||||
@router.get("/admin/security/roles")
|
||||
# @router.get("/admin/security/roles")
|
||||
# def get_all_roles():
|
||||
# return {"roles": rbac_registry.get_all_roles()}
|
||||
@router.get("/admin/security/roles", dependencies=[Depends(require_roles("admin"))])
|
||||
def get_all_roles():
|
||||
return {"roles": rbac_registry.get_all_roles()}
|
||||
|
||||
@router.post("/admin/security/validate")
|
||||
def validate_action(agent_role: str, action: str, tenant_id: str):
|
||||
# @router.post("/admin/security/validate")
|
||||
# def validate_action(agent_role: str, action: str, tenant_id: str):
|
||||
# return action_validator.validate(agent_role, action, tenant_id)
|
||||
@router.post("/admin/security/validate", dependencies=[Depends(require_roles("admin", "tenant"))])
|
||||
def validate_action(agent_role: str, action: str, tenant_id: str, user: AuthUser = Depends(get_current_user)):
|
||||
# Enforce tenant scoping
|
||||
if "admin" not in user.roles and user.tenant_id != tenant_id:
|
||||
raise HTTPException(status_code=403, detail="Tenant mismatch")
|
||||
# Check against policy registry
|
||||
policy = policy_registry.get_policy(tenant_id)
|
||||
# if agent_role not in policy["allowed_roles"] or action in policy["restricted_tasks"]:
|
||||
# return {"allowed": False, "reason": "Policy restriction"}
|
||||
if agent_role not in policy["allowed_roles"]:
|
||||
return policy_registry.log_violation(tenant_id, agent_role, action, "Role not allowed")
|
||||
if action in policy["restricted_tasks"]:
|
||||
return policy_registry.log_violation(tenant_id, agent_role, action, "Action restricted")
|
||||
return action_validator.validate(agent_role, action, tenant_id)
|
||||
|
||||
@router.get("/admin/security/audit")
|
||||
# @router.get("/admin/security/audit")
|
||||
# def get_audit_log(tenant_id: str):
|
||||
# return {"audit": action_validator.get_audit(tenant_id)}
|
||||
@router.get("/admin/security/audit", dependencies=[Depends(require_roles("admin"))])
|
||||
def get_audit_log(tenant_id: str):
|
||||
return {"audit": action_validator.get_audit(tenant_id)}
|
||||
|
||||
@router.post("/admin/security/access-check")
|
||||
def check_rbac_access(tenant_id: str, role: str, action: str, resource: str):
|
||||
# @router.post("/admin/security/access-check")
|
||||
# def check_rbac_access(tenant_id: str, role: str, action: str, resource: str):
|
||||
# allowed = check_access(tenant_id, role, action)
|
||||
# return {"access_granted": allowed}
|
||||
@router.post("/admin/security/access-check", dependencies=[Depends(require_roles("admin", "tenant"))])
|
||||
def check_rbac_access(tenant_id: str, role: str, action: str, resource: str, user: AuthUser = Depends(get_current_user)):
|
||||
if "admin" not in user.roles and user.tenant_id != tenant_id:
|
||||
raise HTTPException(status_code=403, detail="Tenant mismatch")
|
||||
allowed = check_access(tenant_id, role, action)
|
||||
return {"access_granted": allowed}
|
||||
|
||||
@router.get("/admin/security/global-policies")
|
||||
# @router.get("/admin/security/global-policies")
|
||||
# def list_global_policies():
|
||||
# return {"global_policies": policy_registry.list_global_policies()}
|
||||
@router.get("/admin/security/global-policies", dependencies=[Depends(require_roles("admin"))])
|
||||
def list_global_policies():
|
||||
return {"global_policies": policy_registry.list_global_policies()}
|
||||
|
||||
47
tests/test_inference_routes.py
Normal file
47
tests/test_inference_routes.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# tests/test_inference_routes.py
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
from routes.inference_routes import router
|
||||
from fastapi import FastAPI
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(router)
|
||||
client = TestClient(app)
|
||||
|
||||
def test_infer_edge_label():
|
||||
resp = client.post("/llm/infer-edge", json={"prompt": "classify sentiment"})
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert "label" in data
|
||||
|
||||
def test_admin_infer():
|
||||
resp = client.post("/admin/infer", json={"prompt": "Hello", "context": {}})
|
||||
assert resp.status_code == 200
|
||||
assert isinstance(resp.json(), dict)
|
||||
|
||||
def test_infer_with_llm():
|
||||
resp = client.post("/admin/infer/llm", json={"prompt": "Hello"})
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["model_type"] == "llm"
|
||||
|
||||
def test_infer_with_moe():
|
||||
resp = client.post("/admin/infer/moe", json={"prompt": "Explain transformers"})
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["model_type"] == "moe"
|
||||
assert "output" in data
|
||||
|
||||
def test_list_moe_experts():
|
||||
resp = client.get("/admin/moe/experts")
|
||||
assert resp.status_code == 200
|
||||
experts = resp.json()
|
||||
assert isinstance(experts, list)
|
||||
assert any("expert_reasoning" in e["name"] for e in experts)
|
||||
|
||||
def test_update_moe_config():
|
||||
resp = client.post("/admin/moe/config", json={"default_expert": "expert_summary", "routing_mode": "first_match"})
|
||||
assert resp.status_code == 200
|
||||
cfg = resp.json()
|
||||
assert cfg["default_expert"] == "expert_summary"
|
||||
assert cfg["routing_mode"] == "first_match"
|
||||
34
tests/test_moe_handler.py
Normal file
34
tests/test_moe_handler.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# tests/test_moe_handler.py
|
||||
import pytest
|
||||
from models.moe_handler import MoEHandler, ExpertSpec
|
||||
|
||||
@pytest.fixture
|
||||
def moe():
|
||||
return MoEHandler()
|
||||
|
||||
def test_list_experts(moe):
|
||||
experts = moe.list_experts()
|
||||
assert isinstance(experts, list)
|
||||
assert any(e["name"] == "expert_reasoning" for e in experts)
|
||||
|
||||
def test_generate_default(moe):
|
||||
output = moe.generate("Hello world")
|
||||
assert "output" in output or "latency" in output
|
||||
|
||||
def test_generate_with_capability(moe):
|
||||
output = moe.generate("Summarize this text", capability="summary")
|
||||
assert "summary" in output.lower() or "latency" in output
|
||||
|
||||
def test_embed(moe):
|
||||
vector = moe.embed("Artificial intelligence")
|
||||
assert isinstance(vector, list)
|
||||
assert all(isinstance(v, float) for v in vector)
|
||||
|
||||
def test_analyze(moe):
|
||||
result = moe.analyze({"data": "test"})
|
||||
assert "analysis" in result
|
||||
|
||||
def test_update_config(moe):
|
||||
cfg = moe.update_config(default_expert="expert_creative", routing_mode="round_robin")
|
||||
assert cfg["default_expert"] == "expert_creative"
|
||||
assert cfg["routing_mode"] == "round_robin"
|
||||
Reference in New Issue
Block a user