Add/Update Security & Governance related stuffs

2025-12-05 19:03:04 +09:00
parent 73d941d98a
commit f773f3bae8
41 changed files with 1469 additions and 87 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,38 @@
+name: CI
+
+on:
+  push:
+    branches: [ "main", "develop" ]
+  pull_request:
+    branches: [ "main", "develop" ]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install pytest pytest-cov
+
+      # - name: Run tests
+      #   run: pytest -v --maxfail=1 --disable-warnings -q
+      - name: Run tests with coverage
+        run: pytest --cov=models --cov=routes --cov-report=xml --cov-report=term-missing
+
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v4
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}   # add in repo secrets
+          files: ./coverage.xml
+          fail_ci_if_error: true
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -0,0 +1,27 @@
+stages:
+  - test
+
+variables:
+  PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
+
+cache:
+  paths:
+    - .cache/pip
+
+test:
+  stage: test
+  image: python:3.11
+  before_script:
+    - pip install --upgrade pip
+    - pip install -r requirements.txt
+    - pip install pytest pytest-cov
+  script:
+    # - pytest -v --maxfail=1 --disable-warnings -q
+    - pytest --cov=models --cov=routes --cov-report=xml --cov-report=term-missing
+  artifacts:
+    paths:
+      - coverage.xml
+    reports:
+      coverage_report:
+        coverage_format: cobertura
+        path: coverage.xml
--- a/5
+++ b/5
@@ -13,5 +13,6 @@ COPY . .
 # Expose FastAPI port
 EXPOSE 8000

-# Run FastAPI app
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
+# Default command runs main backend (FastAPI)
+# Can be overridden in docker-compose for MoE router
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/README.md
+++ b/README.md
@@ -1,2 +1,31 @@
 # Awesome-Agentic-AI

+## GitHub Badges (Actions + Codecov + Release + License)
+
+[![CI](https://github.com/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/actions/workflows/ci.yml/badge.svg)](https://github.com/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/actions/workflows/ci.yml)
+[![codecov](https://codecov.io/gh/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/branch/main/graph/badge.svg?token=<CODECOV_TOKEN>)](https://codecov.io/gh/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>)
+[![GitHub release](https://img.shields.io/github/v/release/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>?include_prereleases&sort=semver)](https://github.com/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/releases)
+[![License](https://img.shields.io/github/license/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>)](https://github.com/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/blob/main/LICENSE)
+
+### Dependencies
+
+#### Using Requires.io (tracks outdated packages)
+
+[![Requirements Status](https://requires.io/github/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/requirements.svg?branch=main)](https://requires.io/github/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/requirements/?branch=main)
+
+#### Using Snyk (tracks vulnerabilities)
+
+[![Known Vulnerabilities](https://snyk.io/test/github/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>/badge.svg)](https://snyk.io/test/github/<YOUR_GITHUB_USERNAME>/<YOUR_REPO_NAME>)
+
+
+## GitLab Badges (Pipeline + Coverage + Release + License)
+
+[![pipeline status](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/badges/main/pipeline.svg)](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/-/pipelines)
+[![coverage report](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/badges/main/coverage.svg)](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/-/commits/main)
+[![release](https://img.shields.io/gitlab/v/release/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>?sort=semver)](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/-/releases)
+[![License](https://img.shields.io/gitlab/license/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>)](https://gitlab.com/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/-/blob/main/LICENSE)
+
+### Dependencies
+
+[![Requirements Status](https://requires.io/gitlab/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/requirements.svg?branch=main)](https://requires.io/gitlab/<YOUR_GITLAB_NAMESPACE>/<YOUR_REPO_NAME>/requirements/?branch=main)
+
--- a/agents/orchestrator.py
+++ b/agents/orchestrator.py
@@ -1,7 +1,10 @@
+# agents/orchestrator.py
+
 from agents.agent_registry import get_agent
 from models.model_router import get_routed_llm
 from utils.language_utils import detect_model_profile
-
+# from deployment.deployment_router import route_deployment
+from deployment.deployment_router import DeploymentRouter

 class Orchestrator:
    def __init__(self, memory):
@@ -9,6 +12,7 @@ class Orchestrator:
        self.planner = get_agent("planner")
        self.executor = get_agent("executor")
        self.critic = get_agent("critic")
+        self.deployment_router = DeploymentRouter()

    async def run_task(self, task: str, user_id: str):
        context = self.memory.get_context()
@@ -16,10 +20,6 @@ class Orchestrator:
        # executor = get_agent("executor")
        # critic = get_agent("critic")

-        # plan = planner.run(task, context)
-        # result = executor.run(plan, context)
-        # feedback = critic.run(result, context)
-
        plan = self.planner.run(task, context)
        result = self.executor.run(plan, context)
        feedback = self.critic.run(result, context)
@@ -42,10 +42,19 @@ class Orchestrator:

    def route_request(prompt: str, context: dict):
        profile = detect_model_profile(prompt)
-        model = get_routed_llm(prompt)
+        # # model = get_routed_llm(prompt)
+        # # return {
+        # #     "model": str(model),
+        # #     "profile": profile,
+        # #     "response": f"[{model}] response to: {prompt}"  # Replace with actual call
+        # # }
+        # routed = route_deployment(prompt, context)
+        # return {
+        #     "profile": profile,
+        #     **routed
+        # }
+        routed = self.deployment_router.route(prompt, context)
        return {
-            "model": str(model),
            "profile": profile,
-            "response": f"[{model}] response to: {prompt}"  # Replace with actual call
+            **routed
        }
-
--- a/deployment/OLD.deployment_router.py
+++ b/deployment/OLD.deployment_router.py
@@ -0,0 +1,28 @@
+# deployment/deployment_router.py
+from models.model_router import get_routed_llm
+from models.moe_handler import MoEHandler
+
+moe_handler = MoEHandler()
+
+def route_deployment(prompt: str, context: dict):
+    mode = context.get("deployment_mode", "hybrid")
+
+    if mode == "edge":
+        model = get_routed_llm(prompt, lightweight=True)
+        return {"mode": "edge", "response": model.generate(prompt)}
+
+    elif mode == "cloud":
+        output = moe_handler.generate(prompt)
+        return {"mode": "cloud", "response": output}
+
+    elif mode == "hybrid":
+        edge_model = get_routed_llm(prompt, lightweight=True)
+        edge_output = edge_model.generate(prompt)
+
+        cloud_output = moe_handler.generate(prompt)
+
+        return {
+            "mode": "hybrid",
+            "edge_output": edge_output,
+            "cloud_output": cloud_output,
+        }
--- a/deployment/deployment_router.py
+++ b/deployment/deployment_router.py
@@ -0,0 +1,61 @@
+# deployment/deployment_router.py
+
+from models.model_router import get_routed_llm
+from models.moe_handler import MoEHandler
+
+class DeploymentRouter:
+    """
+    Handles routing of inference requests across edge, cloud, and hybrid deployments.
+    """
+
+    def __init__(self):
+        self.moe_handler = MoEHandler()
+
+    def route(self, prompt: str, context: dict = None):
+        """
+        Route a request based on deployment_mode in context.
+        Supported modes: edge, cloud, hybrid.
+        """
+        if context is None:
+            context = {}
+
+        mode = context.get("deployment_mode", "hybrid")
+
+        if mode == "edge":
+            # Lightweight model for edge/mobile
+            model = get_routed_llm(prompt, lightweight=True)
+            return {
+                "mode": "edge",
+                "model": str(model),
+                "response": model.generate(prompt)
+            }
+
+        elif mode == "cloud":
+            # Full MoE router in cloud
+            output = self.moe_handler.generate(prompt)
+            return {
+                "mode": "cloud",
+                "response": output
+            }
+
+        elif mode == "hybrid":
+            # Quick edge response + refined cloud response
+            edge_model = get_routed_llm(prompt, lightweight=True)
+            edge_output = edge_model.generate(prompt)
+
+            cloud_output = self.moe_handler.generate(prompt)
+
+            return {
+                "mode": "hybrid",
+                "edge_output": edge_output,
+                "cloud_output": cloud_output
+            }
+
+        else:
+            # Fallback if mode is unknown
+            model = get_routed_llm(prompt)
+            return {
+                "mode": "default",
+                "model": str(model),
+                "response": model.generate(prompt)
+            }
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -9,6 +9,7 @@ services:
    volumes:
      - .:/app
    restart: always
+    command: ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

  frontend:
    build:
@@ -20,4 +21,23 @@ services:
    volumes:
      - ./web:/web
    working_dir: /web
-    restart: always
+    restart: always
+
+  moe-router:
+    build: .
+    container_name: agentic-moe-router
+    ports:
+      - "8080:8080"
+    volumes:
+      - .:/app
+    restart: always
+    command: ["uvicorn", "models.moe_handler:app", "--host", "0.0.0.0", "--port", "8080"]
+
+  prometheus:
+    image: prom/prometheus:latest
+    container_name: agentic-prometheus
+    ports:
+      - "9090:9090"
+    volumes:
+      - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
+    restart: always
--- a/governance/policy_registry.py
+++ b/governance/policy_registry.py
@@ -1,10 +1,12 @@
 # governance/policy_registry.py
-##INFO:
+
+import time

 class PolicyRegistry:
    def __init__(self):
        self.policies = {}  # {tenant_id: {allowed_roles, restricted_tasks, audit_level}}
        self.global_policies = self._load_global_policies()
+        self.audit_log = []  # list of {timestamp, tenant_id, role, action, violation, reason}

    def set_policy(self, tenant_id: str, allowed_roles: list, restricted_tasks: list, audit_level: str = "standard"):
        self.policies[tenant_id] = {
@@ -60,4 +62,30 @@ class PolicyRegistry:
    def list_global_policies(self):
        return list(self.global_policies.keys())

+    # -----------------------------
+    # New Audit & Violation Logging
+    # -----------------------------
+    def log_violation(self, tenant_id: str, role: str, action: str, reason: str):
+        entry = {
+            "timestamp": time.time(),
+            "tenant_id": tenant_id,
+            "role": role,
+            "action": action,
+            "violation": True,
+            "reason": reason
+        }
+        self.audit_log.append(entry)
+        return entry
+
+    def get_audit_log(self, tenant_id: str = None):
+        if tenant_id:
+            return [r for r in self.audit_log if r["tenant_id"] == tenant_id]
+        return self.audit_log
+
+    def clear_audit_log(self, tenant_id: str = None):
+        if tenant_id:
+            self.audit_log = [r for r in self.audit_log if r["tenant_id"] != tenant_id]
+        else:
+            self.audit_log = []
+
 policy_registry = PolicyRegistry()
--- a/grafana/combined-overview-dashboard.json
+++ b/grafana/combined-overview-dashboard.json
@@ -0,0 +1,62 @@
+{
+  "id": null,
+  "title": "Combined Overview Dashboard",
+  "panels": [
+    {
+      "type": "piechart",
+      "title": "Request Distribution by Model Type & Deployment Mode",
+      "targets": [
+        {
+          "expr": "sum(rate(requests_total[5m])) by (model_type, deployment_mode)",
+          "legendFormat": "{{model_type}}-{{deployment_mode}}"
+        }
+      ],
+      "datasource": "Prometheus"
+    },
+    {
+      "type": "graph",
+      "title": "95th Percentile Latency by Model Type & Deployment Mode",
+      "targets": [
+        {
+          "expr": "histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, model_type, deployment_mode))",
+          "legendFormat": "{{model_type}}-{{deployment_mode}}"
+        }
+      ],
+      "datasource": "Prometheus"
+    },
+    {
+      "type": "stat",
+      "title": "Success Rate by Deployment Mode",
+      "targets": [
+        {
+          "expr": "sum(rate(requests_total{success=\"True\"}[5m])) by (deployment_mode) / sum(rate(requests_total[5m])) by (deployment_mode) * 100",
+          "legendFormat": "{{deployment_mode}}"
+        }
+      ],
+      "datasource": "Prometheus"
+    },
+    {
+      "type": "graph",
+      "title": "Error Rate Over Time by Agent & Mode",
+      "targets": [
+        {
+          "expr": "sum(rate(requests_total{success=\"False\"}[5m])) by (agent_role, deployment_mode)",
+          "legendFormat": "{{agent_role}}-{{deployment_mode}}"
+        }
+      ],
+      "datasource": "Prometheus"
+    },
+    {
+      "type": "graph",
+      "title": "Expert Usage by Tenant/Agent/Action/Mode",
+      "targets": [
+        {
+          "expr": "sum(rate(requests_total[5m])) by (tenant_id, agent_role, action, deployment_mode)",
+          "legendFormat": "{{tenant_id}}-{{agent_role}}-{{action}}-{{deployment_mode}}"
+        }
+      ],
+      "datasource": "Prometheus"
+    }
+  ],
+  "time": { "from": "now-12h", "to": "now" }
+}
--- a/grafana/deployment-mode-dashboard.json
+++ b/grafana/deployment-mode-dashboard.json
@@ -0,0 +1,28 @@
+{
+  "title": "Deployment Mode Overview",
+  "panels": [
+    {
+      "type": "piechart",
+      "title": "Request Distribution by Mode",
+      "targets": [
+        {
+          "expr": "sum(rate(requests_total[5m])) by (deployment_mode)",
+          "legendFormat": "{{deployment_mode}}"
+        }
+      ],
+      "datasource": "Prometheus"
+    },
+    {
+      "type": "graph",
+      "title": "Latency Trends by Mode",
+      "targets": [
+        {
+          "expr": "avg(rate(request_latency_ms_sum[5m]) / rate(request_latency_ms_count[5m])) by (deployment_mode)",
+          "legendFormat": "{{deployment_mode}}"
+        }
+      ],
+      "datasource": "Prometheus"
+    }
+  ],
+  "time": { "from": "now-12h", "to": "now" }
+}
--- a/grafana/expert-usage-dashboard.json
+++ b/grafana/expert-usage-dashboard.json
@@ -0,0 +1,18 @@
+{
+  "id": null,
+  "title": "Expert Usage Dashboard",
+  "panels": [
+    {
+      "type": "graph",
+      "title": "Requests per Expert by Deployment Mode",
+      "targets": [
+        {
+          "expr": "sum(rate(requests_total[5m])) by (tenant_id, agent_role, action, deployment_mode)",
+          "legendFormat": "{{tenant_id}}-{{agent_role}}-{{action}}-{{deployment_mode}}"
+        }
+      ],
+      "datasource": "Prometheus"
+    }
+  ],
+  "time": { "from": "now-6h", "to": "now" }
+}
--- a/grafana/latency-dashboard.json
+++ b/grafana/latency-dashboard.json
@@ -0,0 +1,18 @@
+{
+  "id": null,
+  "title": "Request Latency Dashboard",
+  "panels": [
+    {
+      "type": "graph",
+      "title": "Latency per Model Type & Deployment Mode",
+      "targets": [
+        {
+          "expr": "histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, model_type, deployment_mode))",
+          "legendFormat": "{{model_type}}-{{deployment_mode}}"
+        }
+      ],
+      "datasource": "Prometheus"
+    }
+  ],
+  "time": { "from": "now-1h", "to": "now" }
+}
--- a/grafana/policy-violations-dashboard.json
+++ b/grafana/policy-violations-dashboard.json
@@ -0,0 +1,55 @@
+{
+  "id": null,
+  "title": "Policy Violations Dashboard",
+  "panels": [
+    {
+      "type": "graph",
+      "title": "Violations Over Time by Tenant",
+      "targets": [
+        {
+          "expr": "sum(rate(policy_violations_total[5m])) by (tenant_id)",
+          "legendFormat": "{{tenant_id}}"
+        }
+      ],
+      "datasource": "Prometheus"
+    },
+    {
+      "type": "graph",
+      "title": "Violations by Role",
+      "targets": [
+        {
+          "expr": "sum(rate(policy_violations_total[5m])) by (role)",
+          "legendFormat": "{{role}}"
+        }
+      ],
+      "datasource": "Prometheus"
+    },
+    {
+      "type": "table",
+      "title": "Top Restricted Actions Triggered",
+      "targets": [
+        {
+          "expr": "sum(rate(policy_violations_total[5m])) by (action)",
+          "legendFormat": "{{action}}"
+        }
+      ],
+      "datasource": "Prometheus",
+      "columns": [
+        { "text": "Action", "value": "action" },
+        { "text": "Violations", "value": "Value" }
+      ]
+    },
+    {
+      "type": "stat",
+      "title": "Total Violations (24h)",
+      "targets": [
+        {
+          "expr": "sum(increase(policy_violations_total[24h]))",
+          "legendFormat": "Total"
+        }
+      ],
+      "datasource": "Prometheus"
+    }
+  ],
+  "time": { "from": "now-24h", "to": "now" }
+}
--- a/grafana/sla-compliance-dashboard.json
+++ b/grafana/sla-compliance-dashboard.json
@@ -0,0 +1,79 @@
+{
+  "id": null,
+  "title": "SLA Compliance Dashboard",
+  "panels": [
+    {
+      "type": "stat",
+      "title": "Success Rate per Deployment Mode",
+      "targets": [
+        {
+          "expr": "sum(rate(requests_total{success=\"True\"}[5m])) by (deployment_mode) / sum(rate(requests_total[5m])) by (deployment_mode) * 100",
+          "legendFormat": "{{deployment_mode}}"
+        }
+      ],
+      "datasource": "Prometheus"
+    },
+    {
+      "type": "graph",
+      "title": "Error Rate Over Time by Agent & Mode",
+      "targets": [
+        {
+          "expr": "sum(rate(requests_total{success=\"False\"}[5m])) by (agent_role, deployment_mode)",
+          "legendFormat": "{{agent_role}}-{{deployment_mode}}"
+        }
+      ],
+      "datasource": "Prometheus"
+    },
+    {
+      "type": "graph",
+      "title": "Latency vs SLA Thresholds",
+      "targets": [
+        {
+          "expr": "histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, deployment_mode))",
+          "legendFormat": "Latency-{{deployment_mode}}"
+        },
+        {
+          "expr": "avg_over_time(request_latency_ms_sum[5m] / request_latency_ms_count[5m]) > 500",
+          "legendFormat": "SLA Breach Threshold"
+        }
+      ],
+      "datasource": "Prometheus"
+    },
+    {
+      "type": "graph",
+      "title": "Policy Violations vs SLA Breaches",
+      "targets": [
+        {
+          "expr": "sum(rate(policy_violations_total[5m])) by (tenant_id)",
+          "legendFormat": "Violations-{{tenant_id}}"
+        },
+        {
+          "expr": "sum(rate(requests_total{success=\"False\"}[5m])) by (tenant_id)",
+          "legendFormat": "Errors-{{tenant_id}}"
+        }
+      ],
+      "datasource": "Prometheus"
+    },
+    {
+      "type": "table",
+      "title": "Top Tenants with SLA Breaches & Violations (24h)",
+      "targets": [
+        {
+          "expr": "sum(increase(policy_violations_total[24h])) by (tenant_id)",
+          "legendFormat": "Violations-{{tenant_id}}"
+        },
+        {
+          "expr": "sum(increase(requests_total{success=\"False\"}[24h])) by (tenant_id)",
+          "legendFormat": "Errors-{{tenant_id}}"
+        }
+      ],
+      "datasource": "Prometheus",
+      "columns": [
+        { "text": "Tenant", "value": "tenant_id" },
+        { "text": "Violations (24h)", "value": "Value" },
+        { "text": "Errors (24h)", "value": "Value" }
+      ]
+    }
+  ],
+  "time": { "from": "now-24h", "to": "now" }
+}
--- a/k8s/README-cert-manager-install.md
+++ b/k8s/README-cert-manager-install.md
@@ -0,0 +1,3 @@
+```
+kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.15.0/cert-manager.yaml
+```
--- a/k8s/backend-deployment.yaml
+++ b/k8s/backend-deployment.yaml
@@ -0,0 +1,23 @@
+# k8s/backend-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: agentic-backend
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: agentic-backend
+  template:
+    metadata:
+      labels:
+        app: agentic-backend
+    spec:
+      containers:
+        - name: backend
+          image: agentic-backend:latest
+          ports:
+            - containerPort: 8000
+          env:
+            - name: ENV
+              value: "production"
--- a/k8s/backend-service.yaml
+++ b/k8s/backend-service.yaml
@@ -0,0 +1,13 @@
+# k8s/backend-service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: agentic-backend-service
+spec:
+  selector:
+    app: agentic-backend
+  ports:
+    - protocol: TCP
+      port: 8000
+      targetPort: 8000
+  type: ClusterIP
--- a/k8s/cluster-issuer.yaml
+++ b/k8s/cluster-issuer.yaml
@@ -0,0 +1,15 @@
+# k8s/cluster-issuer.yaml
+apiVersion: cert-manager.io/v1
+kind: ClusterIssuer
+metadata:
+  name: letsencrypt-prod
+spec:
+  acme:
+    email: your-email@example.com   # replace with your email
+    server: https://acme-v02.api.letsencrypt.org/directory
+    privateKeySecretRef:
+      name: letsencrypt-prod-key
+    solvers:
+      - http01:
+          ingress:
+            class: nginx
--- a/k8s/frontend-deployment.yaml
+++ b/k8s/frontend-deployment.yaml
@@ -0,0 +1,20 @@
+# k8s/frontend-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: agentic-frontend
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: agentic-frontend
+  template:
+    metadata:
+      labels:
+        app: agentic-frontend
+    spec:
+      containers:
+        - name: frontend
+          image: agentic-frontend:latest
+          ports:
+            - containerPort: 5173
--- a/k8s/frontend-service.yaml
+++ b/k8s/frontend-service.yaml
@@ -0,0 +1,13 @@
+# k8s/frontend-service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: agentic-frontend-service
+spec:
+  selector:
+    app: agentic-frontend
+  ports:
+    - protocol: TCP
+      port: 5173
+      targetPort: 5173
+  type: NodePort
--- a/k8s/hpa-moe-router.yaml
+++ b/k8s/hpa-moe-router.yaml
@@ -0,0 +1,19 @@
+# k8s/hpa-moe-router.yaml
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: agentic-moe-router-hpa
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: agentic-moe-router
+  minReplicas: 2
+  maxReplicas: 10
+  metrics:
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: 70
--- a/k8s/ingress.yaml
+++ b/k8s/ingress.yaml
@@ -0,0 +1,42 @@
+# k8s/ingress.yaml
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: agentic-ingress
+  annotations:
+    kubernetes.io/ingress.class: nginx
+    nginx.ingress.kubernetes.io/rewrite-target: /$1
+    # For Encrypt (using cert-manager)
+    # cert-manager.io/cluster-issuer: letsencrypt-prod
+spec:
+  # tls:
+  #   - hosts:
+  #       - agentic.example.com # replace with your domain
+  #     secretName: agentic-tls
+  rules:
+    - host: agentic.local
+    # Using cert-manager
+    # - host: agentic.example.com # replace with your domain
+      http:
+        paths:
+          - path: /api(/|$)(.*)
+            pathType: Prefix
+            backend:
+              service:
+                name: agentic-backend-service
+                port:
+                  number: 8000
+          - path: /moe(/|$)(.*)
+            pathType: Prefix
+            backend:
+              service:
+                name: agentic-moe-router-service
+                port:
+                  number: 8080
+          - path: /()(.*)
+            pathType: Prefix
+            backend:
+              service:
+                name: agentic-frontend-service
+                port:
+                  number: 5173
--- a/k8s/moe-router-deployment.yaml
+++ b/k8s/moe-router-deployment.yaml
@@ -0,0 +1,23 @@
+# k8s/moe-router-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: agentic-moe-router
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: agentic-moe-router
+  template:
+    metadata:
+      labels:
+        app: agentic-moe-router
+    spec:
+      containers:
+        - name: moe-router
+          image: agentic-moe-router:latest
+          ports:
+            - containerPort: 8080
+          env:
+            - name: ROUTING_MODE
+              value: "capability_weighted"
--- a/k8s/moe-router-service.yaml
+++ b/k8s/moe-router-service.yaml
@@ -0,0 +1,13 @@
+# k8s/moe-router-service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: agentic-moe-router-service
+spec:
+  selector:
+    app: agentic-moe-router
+  ports:
+    - protocol: TCP
+      port: 8080
+      targetPort: 8080
+  type: ClusterIP
--- a/main.py
+++ b/main.py
@@ -13,6 +13,7 @@ from fastapi.requests import Request
 from fastapi.exception_handlers import http_exception_handler
 from fastapi.exceptions import RequestValidationError

+from routes.auth_routes import router as auth_router
 from routes.api import router as api_router
 from routes.data_routes import router as data_router
 from routes.dataset_routes import router as dataset_router
@@ -118,6 +119,8 @@ async def global_exception_handler(request: Request, exc: Exception):
    )

 ##INFO: ✅ Register routes
+app.include_router(auth_router, prefix="/auth")
+
 # app.include_router(api_router)
 app.include_router(api_router, prefix="/api")
 # data_router (/api/data/...), Means all endpoints in this router will be nested under /api
@@ -139,6 +142,8 @@ app.include_router(scheduler_router, prefix="/schedule")
 app.include_router(calendar_router, prefix="/calendar")
 app.include_router(group_router, prefix="/group")

+# app.include_router(metrics_router, prefix="/metrics")
+
 ##INFO: API route to Trigger Orchestration
 app.include_router(orchestration_router, prefix="/api")

--- a/mobile-flutter/lib/services/sync_service.dart
+++ b/mobile-flutter/lib/services/sync_service.dart
@@ -6,6 +6,9 @@ import 'package:http/http.dart' as http;
 class SyncService {
  static const String baseUrl = "https://your-api-endpoint.com/api/sync"; // Replace with actual base URL

+  // -----------------------------
+  // APIs for agent memory/state
+  // -----------------------------
  static Future<List<Map<String, dynamic>>> getAgentMemory(String tenantId, String agentRole) async {
    final uri = Uri.parse("$baseUrl/agent/memory?tenant_id=$tenantId&agent_role=$agentRole");
    final res = await http.get(uri);
@@ -110,4 +113,56 @@ class SyncService {
    }
  }

+  // -----------------------------
+  // Hybrid Deployment Sync APIs
+  // -----------------------------
+  static final Map<String, dynamic> _localCache = {};
+
+  /// Run edge inference and cache result
+  static Future<Map<String, dynamic>> runEdgeInference(
+      String prompt, Future<String> Function(String) edgeModelFn) async {
+    final edgeOutput = await edgeModelFn(prompt);
+
+    _localCache[prompt] = {
+      "mode": "edge",
+      "edge_output": edgeOutput,
+      "cloud_output": null,
+      "synced": false,
+    };
+
+    return _localCache[prompt]!;
+  }
+
+  /// Sync with cloud inference when online
+  static Future<Map<String, dynamic>> syncWithCloud(String prompt) async {
+    if (!_localCache.containsKey(prompt)) {
+      throw Exception("No cached edge result for prompt: $prompt");
+    }
+
+    final uri = Uri.parse("https://your-api-endpoint.com/api/inference");
+    final res = await http.post(uri,
+      headers: {"Content-Type": "application/json"},
+      body: json.encode({"prompt": prompt, "deployment_mode": "cloud"})
+    );
+
+    if (res.statusCode == 200) {
+      final cloudOutput = json.decode(res.body);
+      _localCache[prompt]!["cloud_output"] = cloudOutput["response"];
+      _localCache[prompt]!["synced"] = true;
+      return _localCache[prompt]!;
+    } else {
+      throw Exception("Cloud sync failed: ${res.statusCode}");
+    }
+  }
+
+  /// Get cached result (edge or hybrid)
+  static Map<String, dynamic>? getCachedResult(String prompt) {
+    return _localCache[prompt];
+  }
+
+  /// Clear cache
+  static void clearCache() {
+    _localCache.clear();
+  }
+
 }
--- a/models/moe_handler.py
+++ b/models/moe_handler.py
@@ -1,16 +1,91 @@
 # models/moe_handler.py
 from models.base_model import BaseModel
+import random
+import time
+from typing import List, Dict, Any, Optional

-class MoEHandler(BaseModel):
-    def __init__(self):
-        # stub: would load multiple experts and router
-        self.experts = []
+class Expert:
+    def __init__(self, name: str, capabilities: List[str], engine: str, weight: float = 1.0):
+        self.name = name
+        self.capabilities = capabilities
+        self.engine = engine
+        self.weight = weight

    def generate(self, prompt: str) -> str:
-        return "[MoE output stub]"
+        # TODO: integrate with actual engine handler (llm, slm, vlm, etc.)
+        return f"[{self.engine}:{self.name}] generated output for: {prompt}"

-    def embed(self, text: str) -> list[float]:
-        return []
+    def embed(self, text: str) -> List[float]:
+        # TODO: integrate with embedding handler
+        return [0.1, 0.2, 0.3]

    def analyze(self, input_data: dict) -> dict:
-        return {"analysis": "MoE stub"}
+        return {"analysis": f"Stub analysis by {self.name}"}
+
+# class MoEHandler(BaseModel):
+#     def __init__(self):
+#         # stub: would load multiple experts and router
+#         self.experts = []
+
+#     def generate(self, prompt: str) -> str:
+#         return "[MoE output stub]"
+
+#     def embed(self, text: str) -> list[float]:
+#         return []
+
+#     def analyze(self, input_data: dict) -> dict:
+#         return {"analysis": "MoE stub"}
+class MoEHandler(BaseModel):
+    def __init__(self):
+        # Load multiple experts into registry
+        self.experts: Dict[str, Expert] = {
+            "expert_reasoning": Expert("expert_reasoning", ["reasoning"], "llm", weight=1.5),
+            "expert_creative": Expert("expert_creative", ["creative"], "llm", weight=1.0),
+            "expert_summary": Expert("expert_summary", ["summary"], "slm", weight=1.0),
+            "expert_code": Expert("expert_code", ["code"], "llm", weight=0.8),
+        }
+        self.routing_mode = "capability_weighted"
+        self.default_expert = "expert_reasoning"
+        self._rr_index = 0
+
+    def _route(self, capability: Optional[str] = None) -> Expert:
+        candidates = [e for e in self.experts.values() if capability is None or capability in e.capabilities]
+        if not candidates:
+            return self.experts[self.default_expert]
+
+        if self.routing_mode == "capability_weighted":
+            weights = [e.weight for e in candidates]
+            return random.choices(candidates, weights=weights, k=1)[0]
+        elif self.routing_mode == "first_match":
+            return candidates[0]
+        elif self.routing_mode == "round_robin":
+            expert = candidates[self._rr_index % len(candidates)]
+            self._rr_index += 1
+            return expert
+        return candidates[0]
+
+    def generate(self, prompt: str, capability: Optional[str] = None) -> str:
+        expert = self._route(capability)
+        start = time.time()
+        output = expert.generate(prompt)
+        latency = int((time.time() - start) * 1000)
+        return f"{output} (latency={latency}ms)"
+
+    def embed(self, text: str, capability: Optional[str] = None) -> List[float]:
+        expert = self._route(capability)
+        return expert.embed(text)
+
+    def analyze(self, input_data: dict, capability: Optional[str] = None) -> dict:
+        expert = self._route(capability)
+        return expert.analyze(input_data)
+
+    def list_experts(self) -> List[Dict[str, Any]]:
+        return [{"name": e.name, "capabilities": e.capabilities, "engine": e.engine, "weight": e.weight}
+                for e in self.experts.values()]
+
+    def update_config(self, default_expert: Optional[str] = None, routing_mode: Optional[str] = None) -> Dict[str, str]:
+        if default_expert:
+            self.default_expert = default_expert
+        if routing_mode:
+            self.routing_mode = routing_mode
+        return {"default_expert": self.default_expert, "routing_mode": self.routing_mode}
--- a/monitoring/OLD.metrics_endpoint.py
+++ b/monitoring/OLD.metrics_endpoint.py
@@ -0,0 +1,12 @@
+# monitoring/metrics_endpoint.py
+from fastapi import APIRouter
+from prometheus_client import Counter, Histogram, generate_latest
+
+router = APIRouter()
+
+REQUEST_COUNT = Counter("requests_total", "Total requests", ["model_type"])
+LATENCY = Histogram("request_latency_ms", "Request latency (ms)", ["model_type"])
+
+@router.get("/metrics")
+def metrics():
+    return generate_latest()
--- a/monitoring/alert-rules.yml
+++ b/monitoring/alert-rules.yml
@@ -0,0 +1,49 @@
+groups:
+  - name: agentic-alerts
+    interval: 30s
+    rules:
+      # High latency alert
+      - alert: HighLatency
+        expr: histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, deployment_mode)) > 500
+        for: 2m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High latency detected"
+          description: "95th percentile latency > 500ms for 2 minutes"
+
+      # SLA violation (success rate < 95%)
+      - alert: SLAViolation
+        expr: (sum(rate(requests_total{success="True"}[5m])) / sum(rate(requests_total[5m]))) * 100 < 95
+        for: 5m
+        labels:
+          severity: critical
+        annotations:
+          summary: "SLA violation detected"
+          description: "Success rate dropped below 95%"
+
+      # Error spike alert
+      - alert: ErrorSpike
+        expr: sum(rate(requests_total{success="False"}[1m])) by (tenant_id, deployment_mode) > 10
+        for: 1m
+        labels:
+          severity: critical
+        annotations:
+          summary: "Error spike detected"
+          description: "More than 10 failed requests per minute"
+
+      # -----------------------------
+      # New Combined SLA + Policy Violation Alert
+      # -----------------------------
+      - alert: SLAWithPolicyViolation
+        expr: (
+          (sum(rate(requests_total{success="True"}[5m])) / sum(rate(requests_total[5m]))) * 100 < 95
+        )
+        and
+        (sum(rate(policy_violations_total[5m])) by (tenant_id) > 5)
+        for: 5m
+        labels:
+          severity: critical
+        annotations:
+          summary: "SLA violation with policy violations"
+          description: "Success rate <95% AND policy violations >5 in 5m window"
--- a/monitoring/alertmanager-inhibit.yml
+++ b/monitoring/alertmanager-inhibit.yml
@@ -0,0 +1,24 @@
+inhibit_rules:
+  # Suppress SLA violation alerts if HighLatency is already firing
+  - source_match:
+      alertname: "HighLatency"
+    target_match:
+      alertname: "SLAViolation"
+    equal: ["deployment_mode"]
+    # Explanation: If latency is already high, SLA violation alerts are redundant.
+
+  # Suppress ErrorSpike alerts if SLAViolation is firing
+  - source_match:
+      alertname: "SLAViolation"
+    target_match:
+      alertname: "ErrorSpike"
+    equal: ["agent_role", "deployment_mode"]
+    # Explanation: SLA violation already covers error spikes, so suppress ErrorSpike.
+
+  # Suppress lower-severity alerts when critical ones are active
+  - source_match:
+      severity: "critical"
+    target_match:
+      severity: "warning"
+    equal: ["tenant_id", "deployment_mode"]
+    # Explanation: If a critical alert is firing, warnings for same tenant/mode are suppressed.
--- a/monitoring/alertmanager.yml
+++ b/monitoring/alertmanager.yml
@@ -0,0 +1,45 @@
+global:
+  resolve_timeout: 5m
+
+route:
+  receiver: 'default'
+
+inhibit_rules:
+  - source_match:
+      alertname: "HighLatency"
+    target_match:
+      alertname: "SLAViolation"
+    equal: ["deployment_mode"]
+
+  - source_match:
+      alertname: "SLAViolation"
+    target_match:
+      alertname: "ErrorSpike"
+    equal: ["agent_role", "deployment_mode"]
+
+  - source_match:
+      severity: "critical"
+    target_match:
+      severity: "warning"
+    equal: ["tenant_id", "deployment_mode"]
+
+  # Suppress standalone SLA or Error alerts if combined SLA+PolicyViolation is firing
+  - source_match:
+      alertname: "SLAWithPolicyViolation"
+    target_match:
+      alertname: "SLAViolation"
+    equal: ["tenant_id"]
+  - source_match:
+      alertname: "SLAWithPolicyViolation"
+    target_match:
+      alertname: "ErrorSpike"
+    equal: ["tenant_id"]
+
+receivers:
+  - name: 'default'
+    email_configs:
+      - to: 'ops-team@example.com'
+        from: 'alertmanager@example.com'
+        smarthost: 'smtp.example.com:587'
+        auth_username: 'alertmanager@example.com'
+        auth_password: 'yourpassword'
--- a/monitoring/metrics_store.py
+++ b/monitoring/metrics_store.py
@@ -16,15 +16,30 @@ class MetricsStore:
            "success": success,
            "latency": latency
        })
+    def log(self, tenant_id: str, agent_role: str, action: str, success: bool, latency: float,
+        deployment_mode: str = "default"
+    ):
+        """Log a single metric record with deployment_mode awareness."""
+        self.records.append({
+            "timestamp": time.time(),
+            "tenant_id": tenant_id,
+            "agent": agent_role,
+            "action": action,
+            "success": success,
+            "latency": latency,
+            "deployment_mode": deployment_mode
+        })

    def get_all(self):
        return self.records

    def get_summary(self, tenant_id: str = None):
+        """Summarize metrics, grouped by (agent, action, deployment_mode)."""
        filtered = [r for r in self.records if not tenant_id or r["tenant_id"] == tenant_id]
        summary = {}
        for r in filtered:
-            key = (r["agent"], r["action"])
+            # key = (r["agent"], r["action"])
+            key = (r["agent"], r["action"], r.get("deployment_mode", "default"))
            if key not in summary:
                summary[key] = {"count": 0, "success": 0, "latency": 0.0}
            summary[key]["count"] += 1
@@ -36,14 +51,38 @@ class MetricsStore:
        return summary

    def get_agent_metrics(self, agent_role: str):
+        """Summarize metrics for a specific agent, broken down by deployment_mode."""
        filtered = [r for r in self.records if r["agent"] == agent_role]
-        latency_values = [r["latency"] for r in filtered]
-        success_count = sum(1 for r in filtered if r["success"])
-        return {
-            "total_actions": len(filtered),
-            "avg_latency": sum(latency_values) / len(latency_values) if latency_values else 0,
-            "success_rate": success_count / len(filtered) if filtered else 0,
-            "latency_distribution": latency_values
-        }
+        # latency_values = [r["latency"] for r in filtered]
+        # success_count = sum(1 for r in filtered if r["success"])
+
+        # return {
+        #     "total_actions": len(filtered),
+        #     "avg_latency": sum(latency_values) / len(latency_values) if latency_values else 0,
+        #     "success_rate": success_count / len(filtered) if filtered else 0,
+        #     "latency_distribution": latency_values
+        # }
+
+        metrics_by_mode = defaultdict(lambda: {"count": 0, "latencies": [], "success": 0})
+
+        for r in filtered:
+            mode = r.get("deployment_mode", "default")
+            metrics_by_mode[mode]["count"] += 1
+            metrics_by_mode[mode]["latencies"].append(r["latency"])
+            metrics_by_mode[mode]["success"] += int(r["success"])
+
+        summary = {}
+        for mode, stats in metrics_by_mode.items():
+            count = stats["count"]
+            latencies = stats["latencies"]
+            success = stats["success"]
+            summary[mode] = {
+                "total_actions": count,
+                "avg_latency": sum(latencies) / count if count else 0,
+                "success_rate": success / count if count else 0,
+                "latency_distribution": latencies
+            }
+
+        return summary

 metrics_store = MetricsStore()
--- a/monitoring/prometheus.yml
+++ b/monitoring/prometheus.yml
@@ -0,0 +1,15 @@
+# monitoring/prometheus.yml
+global:
+  scrape_interval: 15s
+
+rule_files:
+  - "alert-rules.yml"
+  - "recording-rules.yml"
+
+scrape_configs:
+  - job_name: 'backend'
+    static_configs:
+      - targets: ['agentic-backend:8000/admin/metrics/promethus']
+  - job_name: 'moe-router'
+    static_configs:
+      - targets: ['agentic-moe-router:8080']
--- a/monitoring/recording-rules.yml
+++ b/monitoring/recording-rules.yml
@@ -0,0 +1,27 @@
+groups:
+  - name: agentic-recording-rules
+    interval: 30s
+    rules:
+      # -----------------------------
+      # Latency aggregations
+      # -----------------------------
+      - record: request_latency_ms:avg_by_mode
+        expr: avg(rate(request_latency_ms_sum[5m]) / rate(request_latency_ms_count[5m])) by (model_type, deployment_mode)
+
+      - record: request_latency_ms:p95_by_mode
+        expr: histogram_quantile(0.95, sum(rate(request_latency_ms_bucket[5m])) by (le, model_type, deployment_mode))
+
+      # -----------------------------
+      # Request counts
+      # -----------------------------
+      - record: requests_total:rate_by_mode
+        expr: sum(rate(requests_total[5m])) by (model_type, deployment_mode)
+
+      - record: requests_total:success_rate_by_mode
+        expr: sum(rate(requests_total{success="True"}[5m])) by (deployment_mode) / sum(rate(requests_total[5m])) by (deployment_mode) * 100
+
+      # -----------------------------
+      # Error counts
+      # -----------------------------
+      - record: requests_total:error_rate_by_mode
+        expr: sum(rate(requests_total{success="False"}[5m])) by (agent_role, deployment_mode)
--- a/routes/auth_routes.py
+++ b/routes/auth_routes.py
@@ -1,7 +1,11 @@
-from fastapi import APIRouter, HTTPException, Depends
+# routes/auth_routes.py
+
+from fastapi import APIRouter, HTTPException, Depends, status
 from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
 from datetime import datetime, timedelta
 from jose import JWTError, jwt
+from pydantic import BaseModel
+from typing import List, Optional

 SECRET_KEY = "your-secret-key"
 ALGORITHM = "HS256"
@@ -10,35 +14,79 @@ ACCESS_TOKEN_EXPIRE_MINUTES = 60
 router = APIRouter()
 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="auth/token")

+# Tenant-aware user registry
 fake_users_db = {
-    "tony": {"username": "tony", "password": "agentic123"},
-    "guest": {"username": "guest", "password": "guest123"}
+    # "tony": {"username": "tony", "password": "agentic123"},
+    # "guest": {"username": "guest", "password": "guest123"}
+    "tony": {"username": "tony", "password": "agentic123", "tenant_id": "tenantA", "roles": ["admin"]},
+    "guest": {"username": "guest", "password": "guest123", "tenant_id": "tenantB", "roles": ["tenant"]}
 }

-def authenticate_user(username: str, password: str):
+class AuthUser(BaseModel):
+    username: str
+    tenant_id: Optional[str]
+    roles: List[str]
+
+# def authenticate_user(username: str, password: str):
+#     user = fake_users_db.get(username)
+#     if not user or user["password"] != password:
+#         return None
+#     return user
+def authenticate_user(username: str, password: str) -> Optional[AuthUser]:
    user = fake_users_db.get(username)
    if not user or user["password"] != password:
        return None
-    return user
+    return AuthUser(username=user["username"], tenant_id=user["tenant_id"], roles=user["roles"])

-def create_access_token(data: dict, expires_delta: timedelta = None):
-    to_encode = data.copy()
-    expire = datetime.utcnow() + (expires_delta or timedelta(minutes=15))
-    to_encode.update({"exp": expire})
+# def create_access_token(data: dict, expires_delta: timedelta = None):
+#     to_encode = data.copy()
+#     expire = datetime.utcnow() + (expires_delta or timedelta(minutes=15))
+#     to_encode.update({"exp": expire})
+#     return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
+def create_access_token(user: AuthUser, expires_delta: timedelta = None):
+    expire = datetime.utcnow() + (expires_delta or timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
+    to_encode = {"sub": user.username, "tenant_id": user.tenant_id, "roles": user.roles, "exp": expire}
    return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)

+def decode_token(token: str) -> AuthUser:
+    try:
+        payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
+        return AuthUser(username=payload.get("sub"), tenant_id=payload.get("tenant_id"), roles=payload.get("roles", []))
+    except JWTError:
+        raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid token")
+
+def get_current_user(token: str = Depends(oauth2_scheme)) -> AuthUser:
+    return decode_token(token)
+
+def require_roles(*required: str):
+    def wrapper(user: AuthUser = Depends(get_current_user)) -> AuthUser:
+        if not any(r in user.roles for r in required):
+            raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Insufficient privileges")
+        return user
+    return wrapper
+
+# @router.post("/auth/token")
+# def login(form_data: OAuth2PasswordRequestForm = Depends()):
+#     user = authenticate_user(form_data.username, form_data.password)
+#     if not user:
+#         raise HTTPException(status_code=401, detail="Invalid credentials")
+#     token = create_access_token(data={"sub": user["username"]})
+#     return {"access_token": token, "token_type": "bearer"}
@router.post("/auth/token")
 def login(form_data: OAuth2PasswordRequestForm = Depends()):
    user = authenticate_user(form_data.username, form_data.password)
    if not user:
        raise HTTPException(status_code=401, detail="Invalid credentials")
-    token = create_access_token(data={"sub": user["username"]})
+    token = create_access_token(user)
    return {"access_token": token, "token_type": "bearer"}

+# @router.get("/auth/me")
+# def read_users_me(token: str = Depends(oauth2_scheme)):
+#     try:
+#         payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
+#         return {"username": payload.get("sub")}
+#     except JWTError:
+#         raise HTTPException(status_code=403, detail="Invalid token")
@router.get("/auth/me")
-def read_users_me(token: str = Depends(oauth2_scheme)):
-    try:
-        payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
-        return {"username": payload.get("sub")}
-    except JWTError:
-        raise HTTPException(status_code=403, detail="Invalid token")
+def read_users_me(user: AuthUser = Depends(get_current_user)):
+    return {"username": user.username, "tenant_id": user.tenant_id, "roles": user.roles}
--- a/routes/inference_routes.py
+++ b/routes/inference_routes.py
@@ -1,44 +1,119 @@
 # routes/inference_routes.py
-from fastapi import APIRouter
+
+# from fastapi import APIRouter, Body, HTTPException
+from fastapi import APIRouter, Body, HTTPException, Depends
 from pydantic import BaseModel
+from typing import Optional
 from models.llm_loader import get_llm
 from agents.orchestrator import route_request
+from models.registry import get_model
+from models.moe_handler import MoEHandler
+from auth.security import require_roles, get_current_user
+from auth.security import AuthUser  # Optional: for typing

 router = APIRouter()
 # llm = get_llm()
+moe_handler = MoEHandler()

 class EdgePrompt(BaseModel):
    prompt: str

-# @router.post("/llm/infer-edge")
-# def infer_edge_label(data: EdgePrompt):
-#     label = llm(data.prompt).strip()
-#     return {"label": label}
+class HybridPrompt(BaseModel):
+    prompt: str
+    context: Optional[dict] = {}
+    deployment_mode: Optional[str] = "hybrid"  # edge | cloud | hybrid
+
+# Public or low-risk: keep open, or add tenant auth if needed
@router.post("/llm/infer-edge")
 def infer_edge_label(data: EdgePrompt):
    llm = get_model("llm")
    label = llm.generate(data.prompt).strip()
    return {"label": label}

-# @router.post("/admin/infer")
-# def infer(prompt: str, context: dict = {}):
-#     return route_request(prompt, context)
-@router.post("/admin/infer")
+# Admin-only unified inference (no deployment awareness)
+@router.post("/admin/infer", dependencies=[Depends(require_roles("admin"))])
 def infer(prompt: str = Body(...), context: dict = Body(default={})):
    return route_request(prompt, context)

-# -----------------------------
-# New unified inference route
-# -----------------------------
-@router.post("/admin/infer/{model_type}")
+# Deployment-aware inference (tenant allowed)
+# @router.post("/admin/infer/deployment")
+# def infer_with_deployment(data: HybridPrompt):
+#     """
+#     Run inference with deployment_mode awareness.
+#     deployment_mode can be: edge, cloud, hybrid.
+#     """
+#     try:
+#         result = route_request(data.prompt, {**data.context, "deployment_mode": data.deployment_mode})
+#         return {
+#             "deployment_mode": data.deployment_mode,
+#             "result": result
+#         }
+#     except Exception as e:
+#         raise HTTPException(status_code=500, detail=str(e))
+@router.post("/admin/infer/deployment", dependencies=[Depends(require_roles("admin", "tenant"))])
+def infer_with_deployment(data: HybridPrompt, user: AuthUser = Depends(get_current_user)):
+    """
+    Run inference with deployment_mode awareness.
+    deployment_mode can be: edge, cloud, hybrid.
+    """
+    try:
+        # Optionally enforce tenant scoping in context
+        ctx = {**(data.context or {}), "deployment_mode": data.deployment_mode}
+        if user.tenant_id:
+            ctx["tenant_id"] = user.tenant_id
+        result = route_request(data.prompt, ctx)
+        return {"deployment_mode": data.deployment_mode, "tenant_id": user.tenant_id, "result": result}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+# Admin-only model-specific inference
+# @router.post("/admin/infer/{model_type}")
+# def infer_with_model(model_type: str, prompt: str = Body(...)):
+#     """
+#     Run inference with a specific model type (llm, slm, vlm, moe, lcm, lam, mlm, embed).
+#     """
+#     try:
+#         if model_type == "moe":
+#             output = moe_handler.generate(prompt)
+#             return {"model_type": "moe", "output": output, "experts": moe_handler.list_experts()}
+#         model = get_model(model_type)
+#         if model_type in ["embed", "vlm"]:
+#             vector = model.embed(prompt)
+#             return {"model_type": model_type, "vector": vector}
+#         else:
+#             output = model.generate(prompt)
+#             return {"model_type": model_type, "output": output}
+#     except Exception as e:
+#         raise HTTPException(status_code=500, detail=str(e))
+@router.post("/admin/infer/{model_type}", dependencies=[Depends(require_roles("admin"))])
 def infer_with_model(model_type: str, prompt: str = Body(...)):
-    """
-    Run inference with a specific model type (llm, slm, vlm, moe, lcm, lam, mlm, embed).
-    """
-    model = get_model(model_type)
-    if model_type in ["embed", "vlm"]:
-        vector = model.embed(prompt)
-        return {"model_type": model_type, "vector": vector}
-    else:
-        output = model.generate(prompt)
-        return {"model_type": model_type, "output": output}
+    try:
+        if model_type == "moe":
+            output = moe_handler.generate(prompt)
+            return {"model_type": "moe", "output": output, "experts": moe_handler.list_experts()}
+        model = get_model(model_type)
+        if model_type in ["embed", "vlm"]:
+            vector = model.embed(prompt)
+            return {"model_type": model_type, "vector": vector}
+        else:
+            output = model.generate(prompt)
+            return {"model_type": model_type, "output": output}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+# -----------------------------
+# MoE-specific management routes
+# -----------------------------
+# @router.get("/admin/moe/experts")
+# def list_moe_experts():
+#     return moe_handler.list_experts()
+@router.get("/admin/moe/experts", dependencies=[Depends(require_roles("admin"))])
+def list_moe_experts():
+    return moe_handler.list_experts()
+
+# @router.post("/admin/moe/config")
+# def update_moe_config(default_expert: Optional[str] = Body(None), routing_mode: Optional[str] = Body(None)):
+#     return moe_handler.update_config(default_expert=default_expert, routing_mode=routing_mode)
+@router.post("/admin/moe/config", dependencies=[Depends(require_roles("admin"))])
+def update_moe_config(default_expert: Optional[str] = Body(None), routing_mode: Optional[str] = Body(None)):
+    return moe_handler.update_config(default_expert=default_expert, routing_mode=routing_mode)
--- a/routes/metrics_routes.py
+++ b/routes/metrics_routes.py
@@ -1,20 +1,135 @@
 # routes/metrics_routes.py

-from fastapi import APIRouter
+from fastapi import APIRouter, Response, Depends
 from monitoring.metrics_store import metrics_store
+from prometheus_client import Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST
+from auth.security import require_roles, get_current_user, AuthUser

 router = APIRouter()

-@router.post("/admin/metrics/log")
-def log_metric(tenant_id: str, agent_role: str, action: str, success: bool, latency: float):
-    metrics_store.log(tenant_id, agent_role, action, success, latency)
+# -----------------------------
+# Prometheus metrics definitions
+# -----------------------------
+REQUEST_COUNT = Counter(
+    # "requests_total", "Total requests handled", ["tenant_id", "agent_role", "action", "success"]
+    "requests_total",
+    "Total requests handled",
+    ["tenant_id", "agent_role", "action", "success", "deployment_mode"]
+)
+LATENCY = Histogram(
+    # "request_latency_ms", "Request latency in milliseconds", ["tenant_id", "agent_role", "action"]
+    "request_latency_ms",
+    "Request latency in milliseconds",
+    ["tenant_id", "agent_role", "action", "deployment_mode"]
+)
+
+POLICY_VIOLATIONS = Counter(
+    "policy_violations_total",
+    "Total policy violations",
+    ["tenant_id", "role", "action"]
+)
+
+POLICY_VIOLATIONS.labels(
+    tenant_id=tenant_id,
+    role=role,
+    action=action
+).inc()
+
+
+# -----------------------------
+# Extended logging endpoint
+# -----------------------------
+# # @router.post("/admin/metrics/log")
+# # def log_metric(tenant_id: str, agent_role: str, action: str, success: bool, latency: float):
+# #     metrics_store.log(tenant_id, agent_role, action, success, latency)
+
+# #     # Update Prometheus counters
+# #     REQUEST_COUNT.labels(
+# #         tenant_id=tenant_id, agent_role=agent_role, action=action, success=str(success)
+# #     ).inc()
+# #     LATENCY.labels(tenant_id=tenant_id, agent_role=agent_role, action=action).observe(latency)
+
+# #     return {"status": "logged"}
+# @router.post("/admin/metrics/log")
+# def log_metric(
+#     tenant_id: str,
+#     agent_role: str,
+#     action: str,
+#     success: bool,
+#     latency: float,
+#     deployment_mode: str = "default"
+# ):
+#     # Log into custom store
+#     metrics_store.log(tenant_id, agent_role, action, success, latency)
+
+#     # Update Prometheus counters with deployment_mode
+#     REQUEST_COUNT.labels(
+#         tenant_id=tenant_id,
+#         agent_role=agent_role,
+#         action=action,
+#         success=str(success),
+#         deployment_mode=deployment_mode
+#     ).inc()
+
+#     LATENCY.labels(
+#         tenant_id=tenant_id,
+#         agent_role=agent_role,
+#         action=action,
+#         deployment_mode=deployment_mode
+#     ).observe(latency)
+
+#     return {"status": "logged"}
+@router.post("/admin/metrics/log", dependencies=[Depends(require_roles("admin", "tenant"))])
+def log_metric(
+    tenant_id: str,
+    agent_role: str,
+    action: str,
+    success: bool,
+    latency: float,
+    deployment_mode: str = "default",
+    user: AuthUser = Depends(get_current_user)
+):
+    # Enforce tenant scoping: only allow logging for own tenant unless admin
+    if "admin" not in user.roles and user.tenant_id and user.tenant_id != tenant_id:
+        from fastapi import HTTPException
+        raise HTTPException(status_code=403, detail="Tenant mismatch")
+
+    metrics_store.log(tenant_id, agent_role, action, success, latency, deployment_mode)
+    REQUEST_COUNT.labels(
+        tenant_id=tenant_id, agent_role=agent_role, action=action, success=str(success), deployment_mode=deployment_mode
+    ).inc()
+    LATENCY.labels(
+        tenant_id=tenant_id, agent_role=agent_role, action=action, deployment_mode=deployment_mode
+    ).observe(latency)
+
    return {"status": "logged"}

-@router.get("/admin/metrics/all")
+# -----------------------------
+# Summary endpoints
+# -----------------------------
+# @router.get("/admin/metrics/all")
+# def get_all_metrics():
+#     return {"metrics": metrics_store.get_all()}
+@router.get("/admin/metrics/all", dependencies=[Depends(require_roles("admin"))])
 def get_all_metrics():
    return {"metrics": metrics_store.get_all()}

-@router.get("/admin/metrics/summary")
-def get_summary(tenant_id: str = None):
-    return {"summary": metrics_store.get_summary(tenant_id)}
+# @router.get("/admin/metrics/summary")
+# def get_summary(tenant_id: str = None):
+#     return {"summary": metrics_store.get_summary(tenant_id)}
+@router.get("/admin/metrics/summary", dependencies=[Depends(require_roles("admin", "tenant"))])
+def get_summary(tenant_id: str = None, user: AuthUser = Depends(get_current_user)):
+    # Tenant users see only their tenant unless admin
+    effective_tenant = tenant_id if "admin" in user.roles else user.tenant_id
+    return {"summary": metrics_store.get_summary(effective_tenant)}

+# -----------------------------
+# Prometheus scrape endpoint — usually cluster-internal; keep admin-only
+# -----------------------------
+# @router.get("/admin/metrics/prometheus")
+# def prometheus_metrics():
+#     """Expose metrics in Prometheus format for scraping."""
+#     return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
+@router.get("/admin/metrics/prometheus", dependencies=[Depends(require_roles("admin"))])
+def prometheus_metrics():
+    return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
--- a/routes/security_routes.py
+++ b/routes/security_routes.py
@@ -1,35 +1,67 @@
 # routes/security_routes.py
-##INFO: Security Routes

-from fastapi import APIRouter
+from fastapi import APIRouter, Depends, HTTPException
 from security.rbac_registry import rbac_registry
 from security.action_validator import action_validator
 from tenants.rbac_guard import check_access
 from governance.policy_registry import policy_registry
+from routes.auth_routes import require_roles, get_current_user, AuthUser

 router = APIRouter()

-@router.post("/admin/security/role")
+# @router.post("/admin/security/role")
+# def define_role(agent_role: str, allowed_actions: list[str]):
+#     return rbac_registry.define_role(agent_role, allowed_actions)
+@router.post("/admin/security/role", dependencies=[Depends(require_roles("admin"))])
 def define_role(agent_role: str, allowed_actions: list[str]):
    return rbac_registry.define_role(agent_role, allowed_actions)

-@router.get("/admin/security/roles")
+# @router.get("/admin/security/roles")
+# def get_all_roles():
+#     return {"roles": rbac_registry.get_all_roles()}
+@router.get("/admin/security/roles", dependencies=[Depends(require_roles("admin"))])
 def get_all_roles():
    return {"roles": rbac_registry.get_all_roles()}

-@router.post("/admin/security/validate")
-def validate_action(agent_role: str, action: str, tenant_id: str):
+# @router.post("/admin/security/validate")
+# def validate_action(agent_role: str, action: str, tenant_id: str):
+#     return action_validator.validate(agent_role, action, tenant_id)
+@router.post("/admin/security/validate", dependencies=[Depends(require_roles("admin", "tenant"))])
+def validate_action(agent_role: str, action: str, tenant_id: str, user: AuthUser = Depends(get_current_user)):
+    # Enforce tenant scoping
+    if "admin" not in user.roles and user.tenant_id != tenant_id:
+        raise HTTPException(status_code=403, detail="Tenant mismatch")
+    # Check against policy registry
+    policy = policy_registry.get_policy(tenant_id)
+    # if agent_role not in policy["allowed_roles"] or action in policy["restricted_tasks"]:
+    #     return {"allowed": False, "reason": "Policy restriction"}
+    if agent_role not in policy["allowed_roles"]:
+        return policy_registry.log_violation(tenant_id, agent_role, action, "Role not allowed")
+    if action in policy["restricted_tasks"]:
+        return policy_registry.log_violation(tenant_id, agent_role, action, "Action restricted")
    return action_validator.validate(agent_role, action, tenant_id)

-@router.get("/admin/security/audit")
+# @router.get("/admin/security/audit")
+# def get_audit_log(tenant_id: str):
+#     return {"audit": action_validator.get_audit(tenant_id)}
+@router.get("/admin/security/audit", dependencies=[Depends(require_roles("admin"))])
 def get_audit_log(tenant_id: str):
    return {"audit": action_validator.get_audit(tenant_id)}

-@router.post("/admin/security/access-check")
-def check_rbac_access(tenant_id: str, role: str, action: str, resource: str):
+# @router.post("/admin/security/access-check")
+# def check_rbac_access(tenant_id: str, role: str, action: str, resource: str):
+#     allowed = check_access(tenant_id, role, action)
+#     return {"access_granted": allowed}
+@router.post("/admin/security/access-check", dependencies=[Depends(require_roles("admin", "tenant"))])
+def check_rbac_access(tenant_id: str, role: str, action: str, resource: str, user: AuthUser = Depends(get_current_user)):
+    if "admin" not in user.roles and user.tenant_id != tenant_id:
+        raise HTTPException(status_code=403, detail="Tenant mismatch")
    allowed = check_access(tenant_id, role, action)
    return {"access_granted": allowed}

-@router.get("/admin/security/global-policies")
+# @router.get("/admin/security/global-policies")
+# def list_global_policies():
+#     return {"global_policies": policy_registry.list_global_policies()}
+@router.get("/admin/security/global-policies", dependencies=[Depends(require_roles("admin"))])
 def list_global_policies():
    return {"global_policies": policy_registry.list_global_policies()}
--- a/tests/test_inference_routes.py
+++ b/tests/test_inference_routes.py
@@ -0,0 +1,47 @@
+# tests/test_inference_routes.py
+import pytest
+from fastapi.testclient import TestClient
+from routes.inference_routes import router
+from fastapi import FastAPI
+
+app = FastAPI()
+app.include_router(router)
+client = TestClient(app)
+
+def test_infer_edge_label():
+    resp = client.post("/llm/infer-edge", json={"prompt": "classify sentiment"})
+    assert resp.status_code == 200
+    data = resp.json()
+    assert "label" in data
+
+def test_admin_infer():
+    resp = client.post("/admin/infer", json={"prompt": "Hello", "context": {}})
+    assert resp.status_code == 200
+    assert isinstance(resp.json(), dict)
+
+def test_infer_with_llm():
+    resp = client.post("/admin/infer/llm", json={"prompt": "Hello"})
+    assert resp.status_code == 200
+    data = resp.json()
+    assert data["model_type"] == "llm"
+
+def test_infer_with_moe():
+    resp = client.post("/admin/infer/moe", json={"prompt": "Explain transformers"})
+    assert resp.status_code == 200
+    data = resp.json()
+    assert data["model_type"] == "moe"
+    assert "output" in data
+
+def test_list_moe_experts():
+    resp = client.get("/admin/moe/experts")
+    assert resp.status_code == 200
+    experts = resp.json()
+    assert isinstance(experts, list)
+    assert any("expert_reasoning" in e["name"] for e in experts)
+
+def test_update_moe_config():
+    resp = client.post("/admin/moe/config", json={"default_expert": "expert_summary", "routing_mode": "first_match"})
+    assert resp.status_code == 200
+    cfg = resp.json()
+    assert cfg["default_expert"] == "expert_summary"
+    assert cfg["routing_mode"] == "first_match"
--- a/tests/test_moe_handler.py
+++ b/tests/test_moe_handler.py
@@ -0,0 +1,34 @@
+# tests/test_moe_handler.py
+import pytest
+from models.moe_handler import MoEHandler, ExpertSpec
+
+@pytest.fixture
+def moe():
+    return MoEHandler()
+
+def test_list_experts(moe):
+    experts = moe.list_experts()
+    assert isinstance(experts, list)
+    assert any(e["name"] == "expert_reasoning" for e in experts)
+
+def test_generate_default(moe):
+    output = moe.generate("Hello world")
+    assert "output" in output or "latency" in output
+
+def test_generate_with_capability(moe):
+    output = moe.generate("Summarize this text", capability="summary")
+    assert "summary" in output.lower() or "latency" in output
+
+def test_embed(moe):
+    vector = moe.embed("Artificial intelligence")
+    assert isinstance(vector, list)
+    assert all(isinstance(v, float) for v in vector)
+
+def test_analyze(moe):
+    result = moe.analyze({"data": "test"})
+    assert "analysis" in result
+
+def test_update_config(moe):
+    cfg = moe.update_config(default_expert="expert_creative", routing_mode="round_robin")
+    assert cfg["default_expert"] == "expert_creative"
+    assert cfg["routing_mode"] == "round_robin"