microsoft
diff --git a/‎.github/workflows/badge-compat.yml‎
Lines changed: 29 additions & 0 deletions b/‎.github/workflows/badge-compat.yml‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎.github/workflows/badge-unit.yml‎
Lines changed: 31 additions & 0 deletions b/‎.github/workflows/badge-unit.yml‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎.github/workflows/dashboard.yml‎
Lines changed: 33 additions & 0 deletions b/‎.github/workflows/dashboard.yml‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎.github/workflows/examples-calc-x.yml‎
Lines changed: 112 additions & 10 deletions b/‎.github/workflows/examples-calc-x.yml‎
Lines changed: 112 additions & 10 deletions
diff --git a/‎.github/workflows/pypi-nightly.yml‎
Lines changed: 8 additions & 0 deletions b/‎.github/workflows/pypi-nightly.yml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎.github/workflows/pypi-release.yml‎
Lines changed: 8 additions & 0 deletions b/‎.github/workflows/pypi-release.yml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎.github/workflows/tests-full.yml‎
Lines changed: 8 additions & 0 deletions b/‎.github/workflows/tests-full.yml‎
Lines changed: 8 additions & 0 deletions
@@ -0,0 +1,29 @@
+name: Badge - Compatibility
+
+on:
+  workflow_run:
+    workflows:
+      - Examples - Backward Compatibility
+    types: [completed]
+
+  workflow_dispatch:
+
+permissions:
+  actions: read
+  contents: read
+
+jobs:
+  badge:
+    if: ${{ github.event_name == 'workflow_dispatch' || (github.event_name == 'workflow_run' && github.event.workflow_run.head_branch == 'main') }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/github-script@v8
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const badgeAggregation = require('./scripts/badge_aggregation.js');
+            const dependencies = [
+              { workflow: 'examples-compat.yml', label: 'examples-compat', variants: ['legacy', 'stable'] },
+            ];
+            await badgeAggregation({ github, context, core, dependencies });
@@ -0,0 +1,31 @@
+name: Badge - Unit Test
+
+on:
+  workflow_run:
+    workflows:
+      - CPU Test
+      - GPU Test
+    types: [completed]
+
+  workflow_dispatch:
+
+permissions:
+  actions: read
+  contents: read
+
+jobs:
+  badge:
+    if: ${{ github.event_name == 'workflow_dispatch' || (github.event_name == 'workflow_run' && github.event.workflow_run.head_branch == 'main') }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/github-script@v8
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const badgeAggregation = require('./scripts/badge_aggregation.js');
+            const dependencies = [
+              { workflow: 'tests-full.yml', label: 'tests-full', variants: ['legacy', 'stable'] },
+              { workflow: 'tests.yml', label: 'tests', variants: ['legacy', 'stable', 'Lint', 'documentation', 'JavaScript'] },
+            ];
+            await badgeAggregation({ github, context, core, dependencies });
@@ -0,0 +1,33 @@
+name: Dashboard
+permissions:
+  contents: read
+on:
+  schedule:
+    # Every day at 5 AM UTC+8
+    - cron: '0 21 * * *'
+
+  workflow_dispatch:
+
+  push:
+    branches: [ main, stable/**/* ]
+
+jobs:
+  dashboard:
+    name: Chromatic
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-node@v6
+        with:
+          node-version: '22'
+      - name: Install JavaScript dependencies
+        run: cd dashboard && npm ci
+      - name: Run Chromatic
+        uses: chromaui/action@v13
+        with:
+          projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
+          workingDir: dashboard
+          exitZeroOnChanges: false
@@ -22,12 +22,12 @@ run-name: >-
       || format('Calc-X - {0}', github.event_name) }}
 
 jobs:
-  calc-x:
+  calc-x-perf:
     if: >
       github.event_name != 'repository_dispatch' ||
       github.event.action == 'ci-calc-x' ||
       github.event.action == 'ci-all'
-    name: Calc-X (Python ${{ matrix.python-version }}, ${{ matrix.setup-script }})
+    name: Calc-X Performance (Python ${{ matrix.python-version }}, ${{ matrix.setup-script }})
     runs-on: [self-hosted, 1ES.Pool=agl-runner-gpu]
     timeout-minutes: 90
     strategy:
@@ -74,7 +74,7 @@ jobs:
       - name: Upload dependencies artifact
         uses: actions/upload-artifact@v4
         with:
-          name: dependencies-calc-x-${{ matrix.python-version }}-${{ matrix.setup-script }}
+          name: dependencies-calc-x-performance-${{ matrix.python-version }}-${{ matrix.setup-script }}
           path: requirements-freeze.txt
           compression-level: 0
 
@@ -116,13 +116,11 @@ jobs:
       # Don't ask why. Don't touch this.
       - name: Calc-X training
         run: |
-          set -ex
           source .venv/bin/activate
           cd examples/calc_x
           ../../scripts/restart_ray.sh
           sleep 5
-          PYTHONUNBUFFERED=1 python train_calc_agent.py --val-file data/test_mini.parquet --ci
-          sleep 10
+          python train_calc_agent.py --val-file data/test_mini.parquet --ci
         shell: bash
         env:
           WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }}
@@ -137,22 +135,126 @@ jobs:
           WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }}
           WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }}
 
-      - name: Calc-X training LLM Proxy
+  calc-x-variants:
+    if: >
+      github.event_name != 'repository_dispatch' ||
+      github.event.action == 'ci-calc-x' ||
+      github.event.action == 'ci-all'
+    name: Calc-X Variants (Python ${{ matrix.python-version }}, ${{ matrix.setup-script }})
+    runs-on: [self-hosted, 1ES.Pool=agl-runner-gpu]
+    timeout-minutes: 90
+    strategy:
+      matrix:
+        include:
+        - python-version: '3.10'
+          setup-script: 'legacy'
+        - python-version: '3.12'
+          setup-script: 'stable'
+        - python-version: '3.13'
+          setup-script: 'latest'
+      fail-fast: false
+    steps:
+      - name: Check GPU status
+        run: nvidia-smi
+      - name: Check disk space
+        run: df -h
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.pr_ref || (github.event.pull_request.number && format('refs/pull/{0}/merge', github.event.pull_request.number)) || github.ref }}
+      - uses: astral-sh/setup-uv@v7
+        with:
+          enable-cache: true
+          python-version: ${{ matrix.python-version }}
+      - name: Upgrade dependencies (latest)
+        run: uv lock --upgrade
+        if: matrix.setup-script == 'latest'
+      - name: Sync dependencies (latest)
+        run: |
+          uv sync --frozen --no-default-groups --extra verl \
+            --group dev --group experiment --group agents --group torch-gpu-stable
+        if: matrix.setup-script == 'latest'
+      - name: Sync dependencies (stable & legacy)
+        run: |
+          uv sync --frozen --no-default-groups --extra verl \
+            --group dev --group experiment --group agents --group torch-gpu-${{ matrix.setup-script }}
+        if: matrix.setup-script != 'latest'
+      - name: Freeze dependencies
+        run: |
+          set -ex
+          uv pip freeze | tee requirements-freeze.txt
+          echo "UV_LOCKED=1" >> $GITHUB_ENV
+          echo "UV_NO_SYNC=1" >> $GITHUB_ENV
+      - name: Upload dependencies artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: dependencies-calc-x-variants-${{ matrix.python-version }}-${{ matrix.setup-script }}
+          path: requirements-freeze.txt
+          compression-level: 0
+
+      - name: Launch LiteLLM Proxy
+        run: |
+          ./scripts/litellm_run.sh
+        env:
+          AZURE_API_BASE: ${{ secrets.AZURE_GROUP_SUBSCRIPTION_API_BASE }}
+          AZURE_API_KEY: ${{ secrets.AZURE_GROUP_SUBSCRIPTION_API_KEY }}
+
+      - name: Prepare Calc-X dataset
+        run: |
+          set -ex
+          cd examples/calc_x
+          uv run gdown --fuzzy https://drive.google.com/file/d/1FQMyKLLd6hP9dw9rfZn1EZOWNvKaDsqw/view
+          unzip calc-x-data.zip -d data
+          rm calc-x-data.zip
+
+      - name: Calc-X MCP sanity check
+        run: |
+          set -ex
+          cd examples/calc_x
+          uv run tests/test_mcp_calculator.py
+        env:
+          OPENAI_API_BASE: http://localhost:12306/
+          OPENAI_API_KEY: dummy
+      - name: Calc-X sanity check
+        run: |
+          set -ex
+          cd examples/calc_x
+          uv run legacy_calc_agent_debug.py
+        env:
+          OPENAI_BASE_URL: http://localhost:12306/
+          OPENAI_API_KEY: dummy
+
+      - name: Training with local model
+        run: |
+          set -ex
+          source .venv/bin/activate
+          cd examples/calc_x
+          ../../scripts/restart_ray.sh
+          sleep 5
+          hf download Qwen/Qwen2.5-0.5B-Instruct --local-dir data/qwen_model
+          PYTHONUNBUFFERED=1 python train_calc_agent.py --val-file data/test_mini.parquet --ci-fast --model $(realpath data/qwen_model)
+          sleep 10
+        shell: bash
+        env:
+          WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }}
+          WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }}
+        id: calc_x_train_local_model
+
+      - name: Training with LLM Proxy
         run: |
           set -ex
           source .venv/bin/activate
           cd examples/calc_x
           ../../scripts/restart_ray.sh
           sleep 5
-          PYTHONUNBUFFERED=1 python train_calc_agent.py --val-file data/test_mini.parquet --ci --llm-proxy
+          PYTHONUNBUFFERED=1 python train_calc_agent.py --val-file data/test_mini.parquet --ci-fast --llm-proxy
           sleep 10
         shell: bash
         env:
           WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }}
           WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }}
         id: calc_x_train_llm_proxy
 
-      - name: Calc-X training with external store
+      - name: Training with external store
         run: |
           set -euo pipefail
           source .venv/bin/activate
@@ -182,7 +284,7 @@ jobs:
           WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }}
         id: calc_x_train_external_store
 
-      - name: Calc-X training with role-based environment variables
+      - name: Training with role-based environment variables
         run: |
           set -euo pipefail
           source .venv/bin/activate
 
@@ -26,6 +26,14 @@ jobs:
       - name: Sync dependencies
         run: uv sync --frozen --no-default-groups --group dev
 
+      - uses: actions/setup-node@v6
+        with:
+          node-version: '22'
+      - name: Install JavaScript dependencies
+        run: cd dashboard && npm ci
+      - name: Build dashboard
+        run: cd dashboard && npm run build
+
       - name: Get current version
         id: get_version
         run: |
 
@@ -60,6 +60,14 @@ jobs:
       - name: Sync dependencies
         run: uv sync --frozen --no-default-groups --group dev
 
+      - uses: actions/setup-node@v6
+        with:
+          node-version: '22'
+      - name: Install JavaScript dependencies
+        run: cd dashboard && npm ci
+      - name: Build dashboard
+        run: cd dashboard && npm run build
+
       - name: Build package
         run: |
           uv build
 
@@ -73,6 +73,14 @@ jobs:
           path: requirements-freeze.txt
           compression-level: 0
 
+      - uses: actions/setup-node@v6
+        with:
+          node-version: '22'
+      - name: Install JavaScript dependencies
+        run: cd dashboard && npm ci
+      - name: Build dashboard
+        run: cd dashboard && npm run build
+
       - name: Launch LiteLLM Proxy
         run: |
           ./scripts/litellm_run.sh