dragonfly/.github/workflows/benchmark.yml

196 lines
6.8 KiB
YAML
Raw Normal View History

name: benchmark-tests
on:
schedule:
- cron: "0 9 * * *" # run at 6 AM UTC
workflow_dispatch:
jobs:
benchmark:
strategy:
matrix:
config:
- operator:
apiVersion: "dragonflydb.io/v1alpha1"
kind: "Dragonfly"
metadata:
labels:
app.kubernetes.io/name: "dragonfly"
app.kubernetes.io/instance: "dragonfly-sample"
app.kubernetes.io/part-of: "dragonfly-operator"
app.kubernetes.io/managed-by: "kustomize"
app.kubernetes.io/created-by: "dragonfly-operator"
name: "dragonfly-sample"
spec:
image: "ghcr.io/dragonflydb/dragonfly:latest"
args: ["--cache_mode"]
replicas: 2
resources:
requests:
cpu: "2"
memory: "2000Mi"
limits:
cpu: "2"
memory: "2000Mi"
runs-on: ubuntu-latest
container:
image: ghcr.io/romange/benchmark-dev:latest
options: --security-opt seccomp=unconfined
permissions:
id-token: write
steps:
- name: Setup namespace name
id: setup
run: echo "namespace=benchmark-$(date +"%Y-%m-%d-%s")" >> $GITHUB_OUTPUT
- uses: actions/checkout@v4
with:
submodules: true
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
aws-region: ${{ vars.AWS_REGION }}
- name: Update kube config
run: aws eks update-kubeconfig --name "$EKS_CLUSTER_NAME" --region "$AWS_REGION"
env:
AWS_REGION: ${{ vars.AWS_REGION }}
EKS_CLUSTER_NAME: dev
- name: Scale up
run: |
set -x
aws autoscaling set-desired-capacity --auto-scaling-group-name "$AUTOSCALING_GROUP" --desired-capacity "$DESIRED_CAPACITY"
env:
AUTOSCALING_GROUP: ${{ vars.DEV_EKS_AS_GROUP }}
DESIRED_CAPACITY: 1
- name: Install the CRD and Operator
run: |
# Install the CRD and Operator
kubectl apply -f https://raw.githubusercontent.com/dragonflydb/dragonfly-operator/main/manifests/dragonfly-operator.yaml
- name: Apply Configuration
run: |
set -x
kubectl create namespace ${{ steps.setup.outputs.namespace }} || true
echo '${{ toJson(matrix.config.operator) }}' | kubectl apply -n ${{ steps.setup.outputs.namespace }} -f -
- name: Wait For Service
run: |
set -x
kubectl wait -n ${{ steps.setup.outputs.namespace }} dragonfly/dragonfly-sample --for=jsonpath='{.status.phase}'=ready --timeout=180s
kubectl wait -n ${{ steps.setup.outputs.namespace }} pods --selector app=dragonfly-sample --for condition=Ready --timeout=120s
kubectl describe -n ${{ steps.setup.outputs.namespace }} pod dragonfly-sample-0
- name: Run Memtier Benchmark
shell: bash
run: |
kubectl apply -n ${{ steps.setup.outputs.namespace }} -f tools/benchmark/k8s-benchmark-job.yaml
- name: Version upgrade
shell: bash
run: |
# benchmark is running, wait for 30 seconds before version upgrade
sleep 30
kubectl patch dragonfly dragonfly-sample -n ${{ steps.setup.outputs.namespace }} --type merge -p '{"spec":{"image":"ghcr.io/dragonflydb/dragonfly-weekly:latest"}}'
- name: Wait for Memtier Benchmark fail
shell: bash
run: |
# Memtier benchmark run will fail at some point because old master shutdown on version upgrade
kubectl wait --for=condition=failed --timeout=120s -n ${{ steps.setup.outputs.namespace }} jobs/memtier-benchmark 2>/dev/null
kubectl logs -n ${{ steps.setup.outputs.namespace }} -f jobs/memtier-benchmark
kubectl delete -n ${{ steps.setup.outputs.namespace }} jobs/memtier-benchmark
- name: Run Memtier Benchmark again
shell: bash
run: |
kubectl apply -n ${{ steps.setup.outputs.namespace }} -f tools/benchmark/k8s-benchmark-job.yaml
while true; do
if kubectl wait --for=condition=complete --timeout=0 -n ${{ steps.setup.outputs.namespace }} jobs/memtier-benchmark 2>/dev/null; then
job_result=0
break
fi
if kubectl wait --for=condition=failed --timeout=0 -n ${{ steps.setup.outputs.namespace }} jobs/memtier-benchmark 2>/dev/null; then
job_result=1
break
fi
sleep 3
done
kubectl logs -n ${{ steps.setup.outputs.namespace }} -f jobs/memtier-benchmark
if [[ $job_result -eq 1 ]]; then
exit 1
fi
- name: Server checks
run: |
nohup kubectl port-forward -n ${{ steps.setup.outputs.namespace }} service/dragonfly-sample 6379:6379 &
pip install -r tools/requirements.txt
python3 tools/benchmark/post_run_checks.py
- name: Get Dragonfly logs
uses: nick-fields/retry@v3
if: always()
with:
timeout_minutes: 1
max_attempts: 3
command: |
kubectl logs -n ${{ steps.setup.outputs.namespace }} dragonfly-sample-0
- name: Get Dragonfly replica logs
uses: nick-fields/retry@v3
if: always()
with:
timeout_minutes: 1
max_attempts: 3
command: |
kubectl logs -n ${{ steps.setup.outputs.namespace }} dragonfly-sample-1
- name: Describe dragonflydb object
uses: nick-fields/retry@v3
if: always()
with:
timeout_minutes: 1
max_attempts: 3
command: |
kubectl describe dragonflies.dragonflydb.io -n ${{ steps.setup.outputs.namespace }} dragonfly-sample
- name: Scale down to zero
if: always()
run: |
set -x
aws autoscaling set-desired-capacity --auto-scaling-group-name "$AUTOSCALING_GROUP" --desired-capacity 0
env:
AUTOSCALING_GROUP: ${{ vars.DEV_EKS_AS_GROUP }}
- name: Cleanup
if: always()
run: |
set -x
kubectl delete namespace ${{ steps.setup.outputs.namespace }}
kubectl delete namespace dragonfly-operator-system
- name: Send notification on failure
if: failure() && github.ref == 'refs/heads/main'
shell: bash
run: |
job_link="${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}"
message="Benchmark tests failed.\\n Job Link: ${job_link}\\n"
curl -s \
-X POST \
-H 'Content-Type: application/json' \
'${{ secrets.GSPACES_BOT_DF_BUILD }}' \
-d '{"text": "'"${message}"'"}'