OpenHands · simonrosenberg · Dec 4, 2025 · Dec 6, 2025 · Dec 6, 2025
diff --git a/.github/workflows/run-eval.yml b/.github/workflows/run-eval.yml
@@ -8,12 +8,20 @@ on:
         types: [published]
     workflow_dispatch:
         inputs:
+            benchmark:
+                description: Benchmark to evaluate
+                required: false
+                default: swebench
+                type: choice
+                options:
+                    - swebench
+                    - gaia
             sdk_ref:
                 description: SDK commit/ref to evaluate
                 required: true
                 default: main
             eval_limit:
-                description: Number of SWE-bench instances to run
+                description: Number of instances to run
                 required: true
                 default: '1'
                 type: choice
@@ -135,7 +143,8 @@ jobs:
                     EVAL_LIMIT="${{ github.event.inputs.eval_limit }}"
                     SDK_REF="${{ github.event.inputs.sdk_ref }}"
                     # Convert ref to SHA for manual dispatch
-                    SDK_SHA=$(git rev-parse "$SDK_REF" 2>/dev/null || echo "$SDK_REF")
+                    # Try with origin/ prefix first for remote branches, then without prefix, then use as-is
+                    SDK_SHA=$(git rev-parse "origin/$SDK_REF" 2>/dev/null || git rev-parse "$SDK_REF" 2>/dev/null || echo "$SDK_REF")
                     PR_NUMBER=""
                     REASON="${{ github.event.inputs.reason }}"
                     if [ -z "$REASON" ]; then
@@ -181,10 +190,11 @@ jobs:
                   EVAL_WORKFLOW: ${{ env.EVAL_WORKFLOW }}
                   EVAL_BRANCH: ${{ github.event.inputs.eval_branch || 'main' }}
                   BENCHMARKS_BRANCH: ${{ github.event.inputs.benchmarks_branch || 'main' }}
+                  BENCHMARK: ${{ github.event.inputs.benchmark || 'swebench' }}
                   TRIGGER_REASON: ${{ github.event.inputs.reason }}
                   PR_NUMBER: ${{ steps.params.outputs.pr_number }}
               run: |
-                  echo "Dispatching evaluation workflow with SDK commit: $SDK_SHA (eval branch: $EVAL_BRANCH, benchmarks branch: $BENCHMARKS_BRANCH)"
+                  echo "Dispatching evaluation workflow with SDK commit: $SDK_SHA (benchmark: $BENCHMARK, eval branch: $EVAL_BRANCH, benchmarks branch: $BENCHMARKS_BRANCH)"
                   PAYLOAD=$(jq -n \
                     --arg sdk "$SDK_SHA" \
                     --arg eval_limit "$EVAL_LIMIT" \
@@ -193,7 +203,8 @@ jobs:
                     --arg reason "$TRIGGER_REASON" \
                     --arg pr "$PR_NUMBER" \
                     --arg benchmarks "$BENCHMARKS_BRANCH" \
-                    '{ref: $ref, inputs: {sdk_commit: $sdk, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks}}')
+                    --arg benchmark "$BENCHMARK" \
+                    '{ref: $ref, inputs: {sdk_commit: $sdk, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks, benchmark: $benchmark}}')
                   RESPONSE=$(curl -sS -o /tmp/dispatch.out -w "%{http_code}" -X POST \
                     -H "Authorization: token $PAT_TOKEN" \
                     -H "Accept: application/vnd.github+json" \

diff --git a/openhands-agent-server/pyproject.toml b/openhands-agent-server/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openhands-agent-server"
-version = "1.4.0"
+version = "1.4.1"
 description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent"
 
 requires-python = ">=3.12"

diff --git a/openhands-sdk/pyproject.toml b/openhands-sdk/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openhands-sdk"
-version = "1.4.0"
+version = "1.4.1"
 description = "OpenHands SDK - Core functionality for building AI agents"
 
 requires-python = ">=3.12"

diff --git a/openhands-tools/pyproject.toml b/openhands-tools/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openhands-tools"
-version = "1.4.0"
+version = "1.4.1"
 description = "OpenHands Tools - Runtime tools for AI agents"
 
 requires-python = ">=3.12"

diff --git a/openhands-workspace/pyproject.toml b/openhands-workspace/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openhands-workspace"
-version = "1.4.0"
+version = "1.4.1"
 description = "OpenHands Workspace - Docker and container-based workspace implementations"
 
 requires-python = ">=3.12"

diff --git a/uv.lock b/uv.lock