Add more updates for CI/CD Automation with application manifests

This commit is contained in:
2025-12-10 14:05:29 +09:00
parent 1ef833534c
commit b87b0930c7
18 changed files with 376 additions and 5 deletions

View File

@@ -65,3 +65,48 @@ jobs:
git config user.email "actions@github.com"
git commit -am "Update image tag to ${{ github.sha }}"
git push
canary-promote:
runs-on: ubuntu-latest
needs: [update-argocd] # run after image build + ArgoCD update
steps:
- uses: actions/checkout@v4
- name: Start canary rollout (80/20)
run: |
sed -i "s|v1:.*|v1: 80|" charts/umbrella-agentic/values-tenant-a.yaml
sed -i "s|v2:.*|v2: 20|" charts/umbrella-agentic/values-tenant-a.yaml
git config user.name "github-actions"
git config user.email "actions@github.com"
git commit -am "Canary start: v2 20% for tenant-a"
git push
- name: Check SLA before promotion
run: |
SUCCESS_THRESHOLD=$(yq '.sla.thresholds.successRate' charts/umbrella-agentic/values-tenant-a.yaml)
ERROR_THRESHOLD=$(yq '.sla.thresholds.errorRate' charts/umbrella-agentic/values-tenant-a.yaml)
LATENCY_THRESHOLD=$(yq '.sla.thresholds.latencyP95' charts/umbrella-agentic/values-tenant-a.yaml)
echo "Success threshold: $SUCCESS_THRESHOLD"
echo "Error threshold: $ERROR_THRESHOLD"
echo "Latency threshold: $LATENCY_THRESHOLD"
SUCCESS=$(curl -s "http://$PROM_URL/api/v1/query?query=rate(http_requests_total{tenant='tenant-a',status=~'2..'}[5m])" | jq '.data.result[0].value[1]' | awk '{print $1}')
ERRORS=$(curl -s "http://$PROM_URL/api/v1/query?query=rate(http_requests_total{tenant='tenant-a',status=~'5..'}[5m])" | jq '.data.result[0].value[1]' | awk '{print $1}')
LATENCY=$(curl -s "http://$PROM_URL/api/v1/query?query=histogram_quantile(0.95, sum(rate(request_duration_seconds_bucket{tenant='tenant-a'}[5m])) by (le))" | jq '.data.result[0].value[1]' | awk '{print $1}')
if (( $(echo "$SUCCESS < $SUCCESS_THRESHOLD" | bc -l) )) || \
(( $(echo "$ERRORS > $ERROR_THRESHOLD" | bc -l) )) || \
(( $(echo "$LATENCY > $LATENCY_THRESHOLD" | bc -l) )); then
echo "SLA check failed — aborting promotion"
exit 1
else
echo "SLA check passed — proceeding with promotion"
fi
- name: Promote canary to full rollout (0/100)
run: |
sed -i "s|v1:.*|v1: 0|" charts/umbrella-agentic/values-tenant-a.yaml
sed -i "s|v2:.*|v2: 100|" charts/umbrella-agentic/values-tenant-a.yaml
git commit -am "Canary complete: v2 100% for tenant-a"
git push

View File

@@ -2,6 +2,7 @@ stages:
- test
- build
- deploy
- canary
variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
@@ -47,3 +48,44 @@ deploy:
- git config user.email "ci@gitlab.com"
- git commit -am "Update image tag to $CI_COMMIT_SHA"
- git push
canary_start:
stage: canary
script:
- sed -i "s|v1: .*|v1: 80|" charts/umbrella-agentic/values-tenant-a.yaml
- sed -i "s|v2: .*|v2: 20|" charts/umbrella-agentic/values-tenant-a.yaml
- git config user.name "gitlab-ci"
- git config user.email "ci@gitlab.com"
- git commit -am "Canary start: v2 20% for tenant-a"
- git push
when: manual # require operator approval
canary_promote:
stage: canary
script:
- SUCCESS_THRESHOLD=$(yq '.sla.thresholds.successRate' charts/umbrella-agentic/values-tenant-a.yaml)
- ERROR_THRESHOLD=$(yq '.sla.thresholds.errorRate' charts/umbrella-agentic/values-tenant-a.yaml)
- LATENCY_THRESHOLD=$(yq '.sla.thresholds.latencyP95' charts/umbrella-agentic/values-tenant-a.yaml)
- SUCCESS=$(curl -s "http://$PROM_URL/api/v1/query?query=rate(http_requests_total{tenant='tenant-a',status=~'2..'}[5m])" | jq '.data.result[0].value[1]' | awk '{print $1}')
- ERRORS=$(curl -s "http://$PROM_URL/api/v1/query?query=rate(http_requests_total{tenant='tenant-a',status=~'5..'}[5m])" | jq '.data.result[0].value[1]' | awk '{print $1}')
- LATENCY=$(curl -s "http://$PROM_URL/api/v1/query?query=histogram_quantile(0.95, sum(rate(request_duration_seconds_bucket{tenant='tenant-a'}[5m])) by (le))" | jq '.data.result[0].value[1]' | awk '{print $1}')
- |
if (( $(echo "$SUCCESS < $SUCCESS_THRESHOLD" | bc -l) )) || \
(( $(echo "$ERRORS > $ERROR_THRESHOLD" | bc -l) )) || \
(( $(echo "$LATENCY > $LATENCY_THRESHOLD" | bc -l) )); then
echo "SLA check failed — aborting promotion"
exit 1
else
echo "SLA check passed — proceeding with promotion"
fi
- sed -i "s|v1:.*|v1: 0|" charts/umbrella-agentic/values-tenant-a.yaml
- sed -i "s|v2:.*|v2: 100|" charts/umbrella-agentic/values-tenant-a.yaml
- git commit -am "Canary complete: v2 100% for tenant-a"
- git push
when: manual
needs:
- canary_start

View File

@@ -12,6 +12,9 @@ spec:
source:
repoURL: https://github.com/your-org/awesome-agentic-ai
targetRevision: main
path: k8s
directory:
recurse: true
chart: charts/umbrella-agentic
helm:
valueFiles:

View File

@@ -11,6 +11,9 @@ spec:
source:
repoURL: https://github.com/your-org/awesome-agentic-ai
targetRevision: main
path: k8s
directory:
recurse: true
chart: charts/umbrella-agentic
helm:
valueFiles:

View File

@@ -11,6 +11,9 @@ spec:
source:
repoURL: https://github.com/your-org/awesome-agentic-ai
targetRevision: main
path: k8s
directory:
recurse: true
chart: charts/umbrella-agentic
helm:
valueFiles:

View File

@@ -11,7 +11,7 @@ spec:
source:
repoURL: https://github.com/your-org/awesome-agentic-ai
targetRevision: main
path: kubernetes/namespaces
path: k8s/namespaces
destination:
server: https://kubernetes.default.svc
namespace: argocd

View File

@@ -0,0 +1,23 @@
# argocd/apps/tenant-a-inference-dr.yaml
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: tenant-a-inference-dr
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "10"
spec:
project: agentic
source:
repoURL: https://github.com/your-org/awesome-agentic-ai
targetRevision: main
path: k8s/istio/inference-destinationrule.yaml
directory:
recurse: true
destination:
server: https://kubernetes.default.svc
namespace: tenant-a
syncPolicy:
automated:
prune: true
selfHeal: true

View File

@@ -0,0 +1,23 @@
# argocd/apps/tenant-a-inference-vs.yaml
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: tenant-a-inference-vs
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "30"
spec:
project: agentic
source:
repoURL: https://github.com/your-org/awesome-agentic-ai
targetRevision: main
path: k8s/istio/inference-virtualservice.yaml
directory:
recurse: true
destination:
server: https://kubernetes.default.svc
namespace: tenant-a
syncPolicy:
automated:
prune: true
selfHeal: true

View File

@@ -0,0 +1,24 @@
# argocd/apps/tenant-a-umbrella.yaml
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: tenant-a-umbrella
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "20"
spec:
project: agentic
source:
repoURL: https://github.com/your-org/awesome-agentic-ai
targetRevision: main
chart: charts/umbrella-agentic
helm:
valueFiles:
- charts/umbrella-agentic/values-tenant-a.yaml
destination:
server: https://kubernetes.default.svc
namespace: tenant-a
syncPolicy:
automated:
prune: true
selfHeal: true

View File

@@ -0,0 +1,26 @@
# charts/agentic-platform/templates/deployment-inference-v1.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: inference-v1
namespace: {{ .Values.tenant }}
spec:
replicas: {{ .Values.components.inference.v1.replicas | default 2 }}
selector:
matchLabels:
app: inference
version: v1
tenant: {{ .Values.tenant }}
template:
metadata:
labels:
app: inference
version: v1
tenant: {{ .Values.tenant }}
spec:
containers:
- name: inference
image: "{{ .Values.image.repository }}:{{ .Values.image.tagV1 }}"
ports:
- name: http
containerPort: {{ .Values.components.inference.ports.http }}

View File

@@ -0,0 +1,26 @@
# charts/agentic-platform/templates/deployment-inference-v2.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: inference-v2
namespace: {{ .Values.tenant }}
spec:
replicas: {{ .Values.components.inference.v2.replicas | default 1 }}
selector:
matchLabels:
app: inference
version: v2
tenant: {{ .Values.tenant }}
template:
metadata:
labels:
app: inference
version: v2
tenant: {{ .Values.tenant }}
spec:
containers:
- name: inference
image: "{{ .Values.image.repository }}:{{ .Values.image.tagV2 }}"
ports:
- name: http
containerPort: {{ .Values.components.inference.ports.http }}

View File

@@ -0,0 +1,17 @@
# charts/agentic-platform/templates/istio-destinationrule.yaml
{{- if .Values.istio }}
apiVersion: networking.istio.io/v1beta1
kind: DestinationRule
metadata:
name: inference
namespace: {{ .Values.tenant }}
spec:
host: inference.{{ .Values.tenant }}.svc.cluster.local
subsets:
- name: v1
labels:
version: v1
- name: v2
labels:
version: v2
{{- end }}

View File

@@ -0,0 +1,22 @@
# charts/agentic-platform/templates/istio-virtualservice.yaml
{{- if .Values.istio }}
apiVersion: networking.istio.io/v1beta1
kind: VirtualService
metadata:
name: inference
namespace: {{ .Values.tenant }}
spec:
hosts:
- inference.{{ .Values.tenant }}.svc.cluster.local
gateways: [ mesh ]
http:
- route:
- destination:
host: inference.{{ .Values.tenant }}.svc.cluster.local
subset: v1
weight: {{ .Values.istio.weights.v1 | default 100 }}
- destination:
host: inference.{{ .Values.tenant }}.svc.cluster.local
subset: v2
weight: {{ .Values.istio.weights.v2 | default 0 }}
{{- end }}

View File

@@ -3,7 +3,7 @@ tenant: tenant-a
global:
image:
repository: your-registry/agentic-app
tag: v1.2.3
tag: v1.2.3 # default/stable tag
deploymentMode: hybrid
inference:
@@ -18,6 +18,28 @@ inference:
cpu: "4"
memory: "4Gi"
components:
inference:
ports:
http: 8000
v1:
replicas: 2
imageTag: v1.2.3 # explicit tag for v1
v2:
replicas: 1
imageTag: v1.2.4 # explicit tag for v2
image:
repository: your-registry/agentic-app
tagV1: v1.2.3
tagV2: v1.2.4
istio:
enabled: true
weights:
v1: 80 # % traffic to v1
v2: 20 # % traffic to v2
governance:
enabled: true
replicas: 1
@@ -38,4 +60,10 @@ ingress:
host: "agentic.tenant-a.example.com"
tls:
enabled: true
secretName: tenant-a-tls
secretName: tenant-a-tls
sla:
thresholds:
successRate: 0.95 # minimum acceptable success ratio
errorRate: 0.05 # maximum acceptable error ratio
latencyP95: 500 # optional: max 95th percentile latency in ms

View File

@@ -11,6 +11,23 @@ inference:
replicas: 2
tenant: tenant-b
components:
inference:
ports:
http: 8000
v1:
replicas: 2
v2:
replicas: 1
image:
repository: your-registry/agentic-app
tagV1: v1.2.3
tagV2: v1.2.4
istio:
weights:
v1: 80
v2: 20
governance:
enabled: false # disable governance for tenant B
tenant: tenant-b
@@ -30,4 +47,10 @@ ingress:
host: "agentic.tenant-b.example.com"
tls:
enabled: true
secretName: tenant-b-tls
secretName: tenant-b-tls
sla:
thresholds:
successRate: 0.95 # minimum acceptable success ratio
errorRate: 0.05 # maximum acceptable error ratio
latencyP95: 500 # optional: max 95th percentile latency in ms

View File

@@ -11,6 +11,23 @@ inference:
replicas: 4
tenant: tenant-c
components:
inference:
ports:
http: 8000
v1:
replicas: 2
v2:
replicas: 1
image:
repository: your-registry/agentic-app
tagV1: v1.2.3
tagV2: v1.2.4
istio:
weights:
v1: 80
v2: 20
governance:
enabled: true
replicas: 2
@@ -31,4 +48,10 @@ ingress:
host: "agentic.tenant-c.example.com"
tls:
enabled: true
secretName: tenant-c-tls
secretName: tenant-c-tls
sla:
thresholds:
successRate: 0.95 # minimum acceptable success ratio
errorRate: 0.05 # maximum acceptable error ratio
latencyP95: 500 # optional: max 95th percentile latency in ms

View File

@@ -0,0 +1,18 @@
# kubernetes/istio/inference-destinationrule.yaml
apiVersion: networking.istio.io/v1beta1
kind: DestinationRule
metadata:
name: inference
namespace: tenant-a
spec:
host: inference.tenant-a.svc.cluster.local
trafficPolicy:
tls:
mode: ISTIO_MUTUAL
subsets:
- name: v1
labels:
version: v1
- name: v2
labels:
version: v2

View File

@@ -0,0 +1,22 @@
# kubernetes/istio/inference-virtualservice.yaml
apiVersion: networking.istio.io/v1beta1
kind: VirtualService
metadata:
name: inference
namespace: tenant-a
spec:
hosts:
- inference.tenant-a.svc.cluster.local
gateways:
- mesh
http:
- name: canary-split
route:
- destination:
host: inference.tenant-a.svc.cluster.local
subset: v1
weight: 80
- destination:
host: inference.tenant-a.svc.cluster.local
subset: v2
weight: 20