ci : remove tag from build-self-hosted.yml

ci : update build-self-hosted.yml
2026-06-25 13:09:46 +02:00 · 2026-05-24 18:08:11 +03:00 · 2026-05-24 16:55:23 +03:00
4 changed files with 158 additions and 141 deletions
@@ -55,24 +55,7 @@ env:
  LLAMA_LOG_TIMESTAMPS: 1

 jobs:
-  determine-tag:
-    name: Determine tag name
-    runs-on: ubuntu-slim
-    outputs:
-      tag_name: ${{ steps.tag.outputs.name }}
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
-      - name: Determine tag name
-        id: tag
-        uses: ./.github/actions/get-tag-name
-        env:
-          BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
-
  ggml-ci-nvidia-cuda:
-    needs: determine-tag
    runs-on: [self-hosted, Linux, NVIDIA]

    steps:
@@ -82,14 +65,11 @@ jobs:

      - name: Test
        id: ggml-ci
-        env:
-          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          nvidia-smi
-          GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+          GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  ggml-ci-nvidia-vulkan-cm:
-    needs: determine-tag
    runs-on: [self-hosted, Linux, NVIDIA]

    steps:
@@ -99,14 +79,11 @@ jobs:

      - name: Test
        id: ggml-ci
-        env:
-          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          vulkaninfo --summary
-          GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+          GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  ggml-ci-nvidia-vulkan-cm2:
-    needs: determine-tag
    runs-on: [self-hosted, Linux, NVIDIA, COOPMAT2]

    steps:
@@ -116,14 +93,12 @@ jobs:

      - name: Test
        id: ggml-ci
-        env:
-          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          vulkaninfo --summary
-          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  ggml-ci-nvidia-webgpu:
-    runs-on: [self-hosted, Linux, NVIDIA]
+    runs-on: [self-hosted, Linux, NVIDIA, X64]

    steps:
      - name: Clone
@@ -149,7 +124,7 @@ jobs:
          GG_BUILD_WEBGPU=1 \
          GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
          GG_BUILD_WEBGPU_DAWN_DIR="$GITHUB_WORKSPACE/dawn/lib64/cmake/Dawn" \
-            bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+            bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  # TODO: provision AMX-compatible machine
  #ggml-ci-cpu-amx:
@@ -163,7 +138,7 @@ jobs:
  #    - name: Test
  #      id: ggml-ci
  #      run: |
-  #        bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+  #        bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  # TODO: provision AMD GPU machine
  # ggml-ci-amd-vulkan:
@@ -178,7 +153,7 @@ jobs:
  #       id: ggml-ci
  #       run: |
  #         vulkaninfo --summary
-  #         GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+  #         GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  # TODO: provision AMD GPU machine
  # ggml-ci-amd-rocm:
@@ -193,10 +168,9 @@ jobs:
  #       id: ggml-ci
  #       run: |
  #         amd-smi static
-  #         GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+  #         GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  ggml-ci-mac-metal:
-    needs: determine-tag
    runs-on: [self-hosted, macOS, ARM64]

    steps:
@@ -206,13 +180,10 @@ jobs:

      - name: Test
        id: ggml-ci
-        env:
-          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  ggml-ci-mac-webgpu:
-    needs: determine-tag
    runs-on: [self-hosted, macOS, ARM64]

    steps:
@@ -235,14 +206,11 @@ jobs:

      - name: Test
        id: ggml-ci
-        env:
-          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
            bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  ggml-ci-mac-vulkan:
-    needs: determine-tag
    runs-on: [self-hosted, macOS, ARM64]

    steps:
@@ -252,14 +220,11 @@ jobs:

      - name: Test
        id: ggml-ci
-        env:
-          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          vulkaninfo --summary
          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  ggml-ci-linux-intel-vulkan:
-    needs: determine-tag
    runs-on: [self-hosted, Linux, Intel]

    steps:
@@ -271,14 +236,11 @@ jobs:

      - name: Test
        id: ggml-ci
-        env:
-          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          vulkaninfo --summary
          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  ggml-ci-win-intel-vulkan:
-    needs: determine-tag
    runs-on: [self-hosted, Windows, X64, Intel]

    steps:
@@ -293,7 +255,6 @@ jobs:
          MSYSTEM: UCRT64
          CHERE_INVOKING: 1
          PATH: C:\msys64\ucrt64\bin;C:\msys64\usr\bin;C:\Windows\System32;${{ env.PATH }}
-          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          vulkaninfo --summary
          # Skip python related tests with GG_BUILD_LOW_PERF=1 since Windows MSYS2 UCRT64 currently fails to create
@@ -301,7 +262,6 @@ jobs:
          LLAMA_FATAL_WARNINGS=OFF GG_BUILD_NINJA=1 GG_BUILD_VULKAN=1 GG_BUILD_LOW_PERF=1 ./ci/run.sh ./results/llama.cpp ./mnt/llama.cpp

  ggml-ci-intel-openvino-gpu-low-perf:
-    needs: determine-tag
    runs-on: [self-hosted, Linux, Intel, OpenVINO]

    concurrency:
@@ -333,8 +293,64 @@ jobs:

      - name: Test
        id: ggml-ci
-        env:
-          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          source ./openvino_toolkit/setupvars.sh
-          GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+          GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
+
+  ggml-ci-arm64-cpu-low-perf:
+    runs-on: [self-hosted, Linux, ARM64]
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v6
+
+      - name: Test
+        id: ggml-ci
+        run: |
+          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
+
+  ggml-ci-arm64-cpu-high-perf:
+    runs-on: [self-hosted, Linux, ARM64]
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v6
+
+      - name: Test
+        id: ggml-ci
+        run: |
+          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
+
+# TODO: not sure how to detect ARM flags on DGX Spark. currently get this error during cmake:
+#         CMake Warning at ggml/src/ggml-cpu/CMakeLists.txt:147 (message):
+#           ARM -march/-mcpu not found, -mcpu=native will be used
+#
+#       if we resolve this, we should be able to offload these jobs to the self-hosted runners
+#
+#  ggml-ci-arm64-cpu-high-perf-sve:
+#    runs-on: [self-hosted, Linux, NVIDIA, ARM64]
+#
+#    steps:
+#      - name: Clone
+#        id: checkout
+#        uses: actions/checkout@v6
+#
+#      - name: Test
+#        id: ggml-ci
+#        run: |
+#          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
+#
+#  ggml-ci-arm64-cpu-kleidiai:
+#    runs-on: [self-hosted, Linux, NVIDIA, ARM64]
+#
+#    steps:
+#      - name: Clone
+#        id: checkout
+#        uses: actions/checkout@v6
+#
+#      - name: Test
+#        id: ggml-ci
+#        run: |
+#          GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
@@ -931,31 +931,32 @@ jobs:
        run: |
          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt

-  ggml-ci-arm64-cpu-low-perf:
-    runs-on: ubuntu-22.04-arm
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
-
-      - name: ccache
-        uses: ggml-org/ccache-action@v1.2.21
-        with:
-          key: ggml-ci-arm64-cpu-low-perf
-          evict-old-files: 1d
-          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
-
-      - name: Dependencies
-        id: depends
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential
-
-      - name: Test
-        id: ggml-ci
-        run: |
-          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+# note: moved to build-self-hosted.yml - can remove from here when everything is stable
+#  ggml-ci-arm64-cpu-low-perf:
+#    runs-on: ubuntu-22.04-arm
+#
+#    steps:
+#      - name: Clone
+#        id: checkout
+#        uses: actions/checkout@v6
+#
+#      - name: ccache
+#        uses: ggml-org/ccache-action@v1.2.21
+#        with:
+#          key: ggml-ci-arm64-cpu-low-perf
+#          evict-old-files: 1d
+#          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
+#
+#      - name: Dependencies
+#        id: depends
+#        run: |
+#          sudo apt-get update
+#          sudo apt-get install build-essential
+#
+#      - name: Test
+#        id: ggml-ci
+#        run: |
+#          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt

  ggml-ci-x64-cpu-high-perf:
    runs-on: ubuntu-22.04
@@ -983,31 +984,32 @@ jobs:
        run: |
          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt

-  ggml-ci-arm64-cpu-high-perf:
-    runs-on: ubuntu-22.04-arm
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
-
-      - name: ccache
-        uses: ggml-org/ccache-action@v1.2.21
-        with:
-          key: ggml-ci-arm64-cpu-high-perf
-          evict-old-files: 1d
-          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
-
-      - name: Dependencies
-        id: depends
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential
-
-      - name: Test
-        id: ggml-ci
-        run: |
-          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+# note: moved to build-self-hosted.yml - can remove from here when everything is stable
+#  ggml-ci-arm64-cpu-high-perf:
+#    runs-on: ubuntu-22.04-arm
+#
+#    steps:
+#      - name: Clone
+#        id: checkout
+#        uses: actions/checkout@v6
+#
+#      - name: ccache
+#        uses: ggml-org/ccache-action@v1.2.21
+#        with:
+#          key: ggml-ci-arm64-cpu-high-perf
+#          evict-old-files: 1d
+#          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
+#
+#      - name: Dependencies
+#        id: depends
+#        run: |
+#          sudo apt-get update
+#          sudo apt-get install build-essential
+#
+#      - name: Test
+#        id: ggml-ci
+#        run: |
+#          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt

  ggml-ci-arm64-cpu-high-perf-sve:
    runs-on: ubuntu-22.04-arm
@@ -91,45 +91,44 @@ jobs:
          export ${{ matrix.extra_args }}
          pytest -v -x -m "not slow"

-  # TODO: provision CUDA runner
-  #  server-cuda:
-  #    runs-on: [self-hosted, llama-server, Linux, NVIDIA]
-  #
-  #    name: server-cuda (${{ matrix.wf_name }})
-  #    strategy:
-  #      matrix:
-  #        build_type: [Release]
-  #        wf_name: ["GPUx1"]
-  #        include:
-  #          - build_type: Release
-  #            extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1"
-  #            wf_name:    "GPUx1, backend-sampling"
-  #      fail-fast: false
-  #
-  #    steps:
-  #      - name: Clone
-  #        id: checkout
-  #        uses: actions/checkout@v6
-  #        with:
-  #          fetch-depth: 0
-  #          ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
-  #
-  #      - name: Build
-  #        id: cmake_build
-  #        run: |
-  #          cmake -B build -DGGML_SCHED_NO_REALLOC=ON
-  #          cmake --build build --config ${{ matrix.build_type }} -j $(sysctl -n hw.logicalcpu) --target llama-server
-  #
-  #      - name: Tests
-  #        id: server_integration_tests
-  #        if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }}
-  #        run: |
-  #          cd tools/server/tests
-  #          python3 -m venv venv
-  #          source venv/bin/activate
-  #          pip install -r requirements.txt
-  #          export ${{ matrix.extra_args }}
-  #          pytest -v -x -m "not slow"
+  server-cuda:
+    runs-on: [self-hosted, llama-server, Linux, NVIDIA]
+
+    name: server-cuda (${{ matrix.wf_name }})
+    strategy:
+      matrix:
+        build_type: [Release]
+        wf_name: ["GPUx1"]
+        include:
+          - build_type: Release
+            extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1"
+            wf_name:    "GPUx1, backend-sampling"
+      fail-fast: false
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
+
+      - name: Build
+        id: cmake_build
+        run: |
+          cmake -B build -DGGML_CUDA=ON -DGGML_SCHED_NO_REALLOC=ON
+          cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
+
+      - name: Tests
+        id: server_integration_tests
+        if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }}
+        run: |
+          cd tools/server/tests
+          python3 -m venv venv
+          source venv/bin/activate
+          pip install -r requirements.txt
+          export ${{ matrix.extra_args }}
+          pytest -v -x -m "not slow"

  server-kleidiai:
    runs-on: ah-ubuntu_22_04-c8g_8x
@@ -238,7 +238,7 @@ function gg_run_ctest_debug {
    (cmake -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
    (time cmake --build . --config Debug -j$(nproc)) 2>&1 | tee -a $OUT/${ci}-make.log

-    (time ctest -C Debug --output-on-failure -L main -E "test-opt|test-backend-ops" ${CTEST_EXTRA}) 2>&1 | tee -a $OUT/${ci}-ctest.log
+    (time ctest -C Debug --output-on-failure -L main -E "test-opt|test-backend-ops|test-llama-archs" ${CTEST_EXTRA}) 2>&1 | tee -a $OUT/${ci}-ctest.log

    set +e
 }
Author	SHA1	Message	Date
Georgi Gerganov	ced88c03cb	ci : remove tag from build-self-hosted.yml	2026-05-24 18:08:11 +03:00
Georgi Gerganov	bb69b8f87b	ci : update build-self-hosted.yml	2026-05-24 16:55:23 +03:00