From 37c4b350cf7bddb1134c63168bca5eaf34503467 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Thu, 4 Dec 2025 11:27:21 -0500 Subject: [PATCH 1/3] set version --- openhands-agent-server/pyproject.toml | 2 +- openhands-sdk/pyproject.toml | 2 +- openhands-tools/pyproject.toml | 2 +- openhands-workspace/pyproject.toml | 2 +- uv.lock | 8 ++++---- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/openhands-agent-server/pyproject.toml b/openhands-agent-server/pyproject.toml index 45513222b3..26582aa5b8 100644 --- a/openhands-agent-server/pyproject.toml +++ b/openhands-agent-server/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openhands-agent-server" -version = "1.4.0" +version = "1.4.1" description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent" requires-python = ">=3.12" diff --git a/openhands-sdk/pyproject.toml b/openhands-sdk/pyproject.toml index 717625a3ae..c3e61a1eb9 100644 --- a/openhands-sdk/pyproject.toml +++ b/openhands-sdk/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openhands-sdk" -version = "1.4.0" +version = "1.4.1" description = "OpenHands SDK - Core functionality for building AI agents" requires-python = ">=3.12" diff --git a/openhands-tools/pyproject.toml b/openhands-tools/pyproject.toml index 119c061cad..65c202f6e3 100644 --- a/openhands-tools/pyproject.toml +++ b/openhands-tools/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openhands-tools" -version = "1.4.0" +version = "1.4.1" description = "OpenHands Tools - Runtime tools for AI agents" requires-python = ">=3.12" diff --git a/openhands-workspace/pyproject.toml b/openhands-workspace/pyproject.toml index 9777daab64..3165e634d4 100644 --- a/openhands-workspace/pyproject.toml +++ b/openhands-workspace/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openhands-workspace" -version = "1.4.0" +version = "1.4.1" description = "OpenHands Workspace - Docker and container-based workspace implementations" requires-python = ">=3.12" diff --git a/uv.lock b/uv.lock index 60af2cca14..6c3b46c8e9 100644 --- a/uv.lock +++ b/uv.lock @@ -2065,7 +2065,7 @@ wheels = [ [[package]] name = "openhands-agent-server" -version = "1.4.0" +version = "1.4.1" source = { editable = "openhands-agent-server" } dependencies = [ { name = "aiosqlite" }, @@ -2096,7 +2096,7 @@ requires-dist = [ [[package]] name = "openhands-sdk" -version = "1.4.0" +version = "1.4.1" source = { editable = "openhands-sdk" } dependencies = [ { name = "deprecation" }, @@ -2134,7 +2134,7 @@ provides-extras = ["boto3"] [[package]] name = "openhands-tools" -version = "1.4.0" +version = "1.4.1" source = { editable = "openhands-tools" } dependencies = [ { name = "bashlex" }, @@ -2163,7 +2163,7 @@ requires-dist = [ [[package]] name = "openhands-workspace" -version = "1.4.0" +version = "1.4.1" source = { editable = "openhands-workspace" } dependencies = [ { name = "openhands-sdk" }, From da3297a6c881a0eeb72c46ceeb578263e97540c4 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 6 Dec 2025 10:52:14 +0000 Subject: [PATCH 2/3] Fix SDK ref resolution in run-eval workflow Try origin/ prefix first when resolving SDK ref to SHA, as the workflow checks out a specific branch and remote branches need the origin/ prefix. --- .github/workflows/run-eval.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-eval.yml b/.github/workflows/run-eval.yml index 6d13e5303a..efd86cc594 100644 --- a/.github/workflows/run-eval.yml +++ b/.github/workflows/run-eval.yml @@ -135,7 +135,8 @@ jobs: EVAL_LIMIT="${{ github.event.inputs.eval_limit }}" SDK_REF="${{ github.event.inputs.sdk_ref }}" # Convert ref to SHA for manual dispatch - SDK_SHA=$(git rev-parse "$SDK_REF" 2>/dev/null || echo "$SDK_REF") + # Try with origin/ prefix first for remote branches, then without prefix, then use as-is + SDK_SHA=$(git rev-parse "origin/$SDK_REF" 2>/dev/null || git rev-parse "$SDK_REF" 2>/dev/null || echo "$SDK_REF") PR_NUMBER="" REASON="${{ github.event.inputs.reason }}" if [ -z "$REASON" ]; then From afb19bf2a97898a61b126551cc41cdc3395836aa Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 6 Dec 2025 10:53:14 +0000 Subject: [PATCH 3/3] Add benchmark parameter to run-eval workflow - Add benchmark input (swebench/gaia) with swebench as default - Pass benchmark parameter to evaluation workflow dispatch - Update eval_limit description to be benchmark-agnostic --- .github/workflows/run-eval.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run-eval.yml b/.github/workflows/run-eval.yml index efd86cc594..e4be1ecea0 100644 --- a/.github/workflows/run-eval.yml +++ b/.github/workflows/run-eval.yml @@ -8,12 +8,20 @@ on: types: [published] workflow_dispatch: inputs: + benchmark: + description: Benchmark to evaluate + required: false + default: swebench + type: choice + options: + - swebench + - gaia sdk_ref: description: SDK commit/ref to evaluate required: true default: main eval_limit: - description: Number of SWE-bench instances to run + description: Number of instances to run required: true default: '1' type: choice @@ -182,10 +190,11 @@ jobs: EVAL_WORKFLOW: ${{ env.EVAL_WORKFLOW }} EVAL_BRANCH: ${{ github.event.inputs.eval_branch || 'main' }} BENCHMARKS_BRANCH: ${{ github.event.inputs.benchmarks_branch || 'main' }} + BENCHMARK: ${{ github.event.inputs.benchmark || 'swebench' }} TRIGGER_REASON: ${{ github.event.inputs.reason }} PR_NUMBER: ${{ steps.params.outputs.pr_number }} run: | - echo "Dispatching evaluation workflow with SDK commit: $SDK_SHA (eval branch: $EVAL_BRANCH, benchmarks branch: $BENCHMARKS_BRANCH)" + echo "Dispatching evaluation workflow with SDK commit: $SDK_SHA (benchmark: $BENCHMARK, eval branch: $EVAL_BRANCH, benchmarks branch: $BENCHMARKS_BRANCH)" PAYLOAD=$(jq -n \ --arg sdk "$SDK_SHA" \ --arg eval_limit "$EVAL_LIMIT" \ @@ -194,7 +203,8 @@ jobs: --arg reason "$TRIGGER_REASON" \ --arg pr "$PR_NUMBER" \ --arg benchmarks "$BENCHMARKS_BRANCH" \ - '{ref: $ref, inputs: {sdk_commit: $sdk, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks}}') + --arg benchmark "$BENCHMARK" \ + '{ref: $ref, inputs: {sdk_commit: $sdk, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks, benchmark: $benchmark}}') RESPONSE=$(curl -sS -o /tmp/dispatch.out -w "%{http_code}" -X POST \ -H "Authorization: token $PAT_TOKEN" \ -H "Accept: application/vnd.github+json" \