Compare commits

...

7 Commits
b9294 ... b9301

Author SHA1 Message Date
Yiwei Shao
1c0f6db545 hexagon: apply repl optimization in flash attn softmax as #22993 (#23455) 2026-05-23 19:56:59 -07:00
Aparna M P
cec51c7a7d snapdragon: update windows toolchain to use hsdk v6.6.0.0 (#23552) 2026-05-23 19:56:41 -07:00
Aldehir Rojas
b22ff4b7b4 cmake/ui : refactor the build (#23352) 2026-05-23 17:08:22 -04:00
Aditya Singh
c0c7e147e7 requirements : bump torch to 2.11.0 (#23503)
* requirements: relax torch~=2.6.0 to torch>=2.6.0 for convert_hf_to_gguf

The ~=2.6.0 operator resolves to >=2.6.0, <2.7.0, which fails on
PyPI for platform/CPython combinations where 2.6.x is not present.
The accompanying comment already says 'PyTorch 2.6.0 or later', so
the looser >=2.6.0 matches the documented intent and unblocks
pip install -r requirements/requirements-convert_hf_to_gguf.txt.

Fixes #23408

* requirements: bump torch floor to 2.11.0 per maintainer

* requirements: pin torch to ==2.11.0 per project policy

* requirements: pin mtmd torch and torchvision to 2.11.0/0.26.0 per project policy

* requirements: suppress check_requirements pin warning on mtmd

The check_requirements script flags '==' on lines in files matched by
*/**/requirements*.txt. Append the documented suppression comment to the
pinned torch and torchvision lines (and to the s390x platform marker lines)
so the check passes while keeping the pins required by project policy.

* ty: silence Tensor/Module union check on model[0].auto_model

With torch 2.11.0 stubs, nn.Sequential.__getitem__ now returns
Tensor | Module rather than Module, so model[0].auto_model fails ty
on the SentenceTransformer code path. The runtime behavior is
unchanged because SentenceTransformer always wraps a Module at
index 0. Adding a targeted unresolved-attribute ignore keeps the
type-check green without altering behavior. A follow-up issue
tracks typing the variable explicitly.
2026-05-23 18:24:39 +02:00
Michael Wand
b0df4c0cfd model : add NVFP4 MTP scale tensors (#23563)
* Add NVFP4 MTP scale tensors

* Link Qwen3.5 MTP tensors

* Aligned nullptr
2026-05-23 13:30:31 +02:00
dskwe
a497476330 ggml : Check the right iface method before using the fallback 2d get (#23514) 2026-05-23 12:49:24 +02:00
Jeff Bolz
95405ac65f vulkan: fix windows find_package of SPIRV-Headers (#23215)
* vulkan: fix windows find_package of SPIRV-Headers

* not windows-only
2026-05-23 09:44:46 +02:00
31 changed files with 716 additions and 481 deletions

View File

@@ -1234,6 +1234,9 @@ jobs:
path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz
name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz
ui-build:
uses: ./.github/workflows/ui-build.yml
release:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
@@ -1259,6 +1262,7 @@ jobs:
- macOS-cpu
- ios-xcode-build
- openEuler-cann
- ui-build
outputs:
tag_name: ${{ steps.tag.outputs.name }}
@@ -1318,6 +1322,18 @@ jobs:
mv -v artifact/*.zip release
mv -v artifact/*.tar.gz release
- name: Download UI build
id: download_ui
uses: actions/download-artifact@v7
with:
name: ui-build
path: ./ui-dist
- name: Package UI
id: package_ui
run: |
tar -czvf release/llama-${{ steps.tag.outputs.name }}-ui.tar.gz --transform "s,^\.,llama-${{ steps.tag.outputs.name }}," -C ./ui-dist .
- name: Create release
id: create_release
uses: ggml-org/action-create-release@v1
@@ -1367,6 +1383,9 @@ jobs:
- [openEuler aarch64 (310p)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-310p-openEuler-aarch64.tar.gz)
- [openEuler aarch64 (910b, ACL Graph)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-aarch64-aclgraph.tar.gz)
**UI:**
- [UI](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-ui.tar.gz)
- name: Upload release
id: upload_release
uses: actions/github-script@v8

View File

@@ -54,8 +54,13 @@ concurrency:
cancel-in-progress: true
jobs:
ui-build:
name: Build Web UI
uses: ./.github/workflows/ui-build.yml
server:
runs-on: ubuntu-latest
needs: ui-build
name: server (${{ matrix.wf_name }})
strategy:
@@ -93,12 +98,11 @@ jobs:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Setup Node.js
uses: actions/setup-node@v6
- name: Download built UI
uses: actions/download-artifact@v7
with:
node-version: "24"
cache: "npm"
cache-dependency-path: "tools/ui/package-lock.json"
name: ui-build
path: tools/ui/dist
- name: Build
id: cmake_build

View File

@@ -31,7 +31,7 @@ jobs:
- name: Generate checksums
run: |
cd build/tools/ui/dist
cd tools/ui/dist
for f in *; do
sha256sum "$f" | awk '{print $1, $2}' >> checksums.txt
done
@@ -40,5 +40,5 @@ jobs:
uses: actions/upload-artifact@v6
with:
name: ui-build
path: build/tools/ui/dist/
path: tools/ui/dist/
retention-days: 1

View File

@@ -38,7 +38,7 @@ jobs:
uses: actions/download-artifact@v7
with:
name: ui-build
path: build/tools/ui/dist/
path: tools/ui/dist/
- name: Install Hugging Face Hub CLI
run: pip install -U huggingface_hub
@@ -49,12 +49,12 @@ jobs:
- name: Sync built files to Hugging Face bucket (version tag)
run: |
# Upload the built files to the Hugging Face bucket under the release version
hf buckets sync build/tools/ui/dist hf://buckets/ggml-org/${{ env.HF_BUCKET_NAME }}/${{ inputs.version_tag }} --delete --quiet
hf buckets sync tools/ui/dist hf://buckets/ggml-org/${{ env.HF_BUCKET_NAME }}/${{ inputs.version_tag }} --delete --quiet
- name: Sync built files to Hugging Face bucket (latest)
run: |
# Also upload to the 'latest' directory for fallback downloads
hf buckets sync build/tools/ui/dist hf://buckets/ggml-org/${{ env.HF_BUCKET_NAME }}/latest --delete --quiet
hf buckets sync tools/ui/dist hf://buckets/ggml-org/${{ env.HF_BUCKET_NAME }}/latest --delete --quiet
- name: Verify upload
run: |

View File

@@ -112,16 +112,6 @@ option(LLAMA_BUILD_APP "llama: build the unified binary"
option(LLAMA_BUILD_UI "llama: build the embedded Web UI for server" ON)
option(LLAMA_USE_PREBUILT_UI "llama: use prebuilt UI from HF Bucket when available (requires LLAMA_BUILD_UI=ON)" ON)
# Backward compat: when old var is set but new one isn't, forward the value
if(DEFINED LLAMA_BUILD_WEBUI)
set(LLAMA_BUILD_UI ${LLAMA_BUILD_WEBUI})
message(DEPRECATION "LLAMA_BUILD_WEBUI is deprecated, use LLAMA_BUILD_UI instead")
endif()
if(DEFINED LLAMA_USE_PREBUILT_WEBUI)
set(LLAMA_USE_PREBUILT_UI ${LLAMA_USE_PREBUILT_WEBUI})
message(DEPRECATION "LLAMA_USE_PREBUILT_WEBUI is deprecated, use LLAMA_USE_PREBUILT_UI instead")
endif()
option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_DEFAULT})
option(LLAMA_TESTS_INSTALL "llama: install tests" ON)

View File

@@ -617,11 +617,7 @@ struct common_params {
std::map<std::string, std::string> default_template_kwargs;
// UI configs
#ifdef LLAMA_UI_DEFAULT_ENABLED
bool ui = LLAMA_UI_DEFAULT_ENABLED != 0;
#else
bool ui = true; // default to enabled when not set
#endif
bool ui = true;
// Deprecated: use ui, ui_mcp_proxy, ui_config_json instead
bool webui = ui;

View File

@@ -33,8 +33,8 @@
"name": "arm64-windows-snapdragon",
"inherits": [ "base", "arm64-windows-llvm" ],
"cacheVariables": {
"CMAKE_C_FLAGS": "-march=armv8.7a+fp16 -fvectorize -ffp-model=fast -flto -D_GNU_SOURCE",
"CMAKE_CXX_FLAGS": "-march=armv8.7a+fp16 -fvectorize -ffp-model=fast -flto -D_GNU_SOURCE",
"CMAKE_C_FLAGS": "-march=armv8.7a+fp16+dotprod+i8mm -fvectorize -ffp-model=fast -flto -D_GNU_SOURCE",
"CMAKE_CXX_FLAGS": "-march=armv8.7a+fp16+dotprod+i8mm -fvectorize -ffp-model=fast -flto -D_GNU_SOURCE",
"CMAKE_C_FLAGS_RELEASE": "-O3 -DNDEBUG",
"CMAKE_CXX_FLAGS_RELEASE": "-O3 -DNDEBUG",
"CMAKE_C_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g",

View File

@@ -24,7 +24,7 @@ Native Windows 11 arm64 builds has the following tools dependencies:
- UCRT and Driver Kit
- LLVM core libraries and Clang compiler (winget)
- CMake, Git, Python (winget)
- Hexagon SDK Community Edition 6.4 or later (see windows.md)
- Hexagon SDK Community Edition 6.6 or later (see windows.md)
- OpenCL SDK 2.3 or later (see windows.md)
Note: The rest of the **Windows** build process assumes that you're running natively in Powershell.
@@ -45,7 +45,7 @@ Preset CMake variables:
GGML_HEXAGON="ON"
GGML_OPENCL="ON"
GGML_OPENMP="OFF"
HEXAGON_SDK_ROOT="/opt/hexagon/6.4.0.2"
HEXAGON_SDK_ROOT="/opt/hexagon/6.6.0.0"
...
-- Including OpenCL backend
-- Including Hexagon backend

View File

@@ -28,15 +28,15 @@ c:\Qualcomm\OpenCL_SDK\2.3.2
Either use the trimmed down version (optimized for CI) from
https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.4.0.2/hexagon-sdk-v6.4.0.2-arm64-wos.tar.xz
https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.6.0.0/hexagon-sdk-v6.6.0.0-arm64-wos.tar.xz
Or download the complete official version from
https://softwarecenter.qualcomm.com/catalog/item/Hexagon_SDK?version=6.4.0.2
https://softwarecenter.qualcomm.com/catalog/item/Hexagon_SDK?version=6.6.0.0
Unzip/untar the archive into
```
c:\Qualcomm\Hexagon_SDK\6.4.0.2
c:\Qualcomm\Hexagon_SDK\6.6.0.0
```
## Install the latest Adreno GPU driver
@@ -123,10 +123,10 @@ The overall Hexagon backend build procedure for Windows on Snapdragon is the sam
However, additional settings are required for generating and signing HTP Ops libraries.
```
> $env:OPENCL_SDK_ROOT="C:\Qualcomm\OpenCL_SDK\2.3.2"
> $env:HEXAGON_SDK_ROOT="C:\Qualcomm\Hexagon_SDK\6.4.0.2"
> $env:HEXAGON_TOOLS_ROOT="C:\Qualcomm\Hexagon_SDK\6.4.0.2\tools\HEXAGON_Tools\19.0.04"
> $env:HEXAGON_SDK_ROOT="C:\Qualcomm\Hexagon_SDK\6.6.0.0"
> $env:HEXAGON_TOOLS_ROOT="C:\Qualcomm\Hexagon_SDK\6.6.0.0\tools\HEXAGON_Tools\19.0.07"
> $env:HEXAGON_HTP_CERT="c:\Users\MyUsers\Certs\ggml-htp-v1.pfx"
> $env:WINDOWS_SDK_BIN="C:\Program Files (x86)\Windows Kits\10\bin\10.0.26100.0\arm64"
> $env:WINDOWS_SDK_BIN="C:\Program Files (x86)\Windows Kits\10\bin\10.0.26100.0"
> cmake --preset arm64-windows-snapdragon-release -B build-wos
...

View File

@@ -64,7 +64,7 @@ def load_model_and_tokenizer(model_path, use_sentence_transformers=False, device
print("Using SentenceTransformer to apply all numbered layers")
model = SentenceTransformer(model_path)
tokenizer = model.tokenizer
config = model[0].auto_model.config
config = model[0].auto_model.config # ty: ignore[unresolved-attribute]
else:
tokenizer = AutoTokenizer.from_pretrained(model_path)
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)

View File

@@ -306,7 +306,7 @@ void ggml_backend_tensor_get_2d_async(ggml_backend_t backend, const struct ggml_
GGML_ASSERT(tensor);
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
if (n_copies <= 1 || backend->iface.set_tensor_2d_async == NULL) {
if (n_copies <= 1 || backend->iface.get_tensor_2d_async == NULL) {
for (size_t i = 0; i < n_copies; i++) {
ggml_backend_tensor_get_async(backend, tensor, (char *) data + i*stride_data, offset + i*stride_tensor, size);
}
@@ -317,7 +317,7 @@ void ggml_backend_tensor_get_2d_async(ggml_backend_t backend, const struct ggml_
}
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
GGML_ASSERT(offset + (n_copies-1)*stride_tensor + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
GGML_ASSERT(offset + (n_copies-1)*stride_tensor + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
backend->iface.get_tensor_2d_async(backend, tensor, data, offset, size, n_copies, stride_tensor, stride_data);
}

View File

@@ -852,9 +852,10 @@ static void fa_softmax_thread(unsigned int n, unsigned int i, void * data) {
v_s_rowmax1 = hvx_vec_reduce_max_f16(v_s_rowmax1);
// Splat m_prev[r], m_prev[r+1] from the per-row accumulator.
// vror brings the target lane to lane 0, then extract + re-splat.
HVX_Vector v_m_prev0 = hvx_vec_splat_f16(hvx_vec_get_f16(Q6_V_vror_VR(m_prev_v, r_vec_off * 2)));
HVX_Vector v_m_prev1 = hvx_vec_splat_f16(hvx_vec_get_f16(Q6_V_vror_VR(m_prev_v, (r_vec_off + 1) * 2)));
// vror brings the target lane to lane 0, then vdelta replicates it
// across all lanes — stays in the vector domain (no store/reload).
HVX_Vector v_m_prev0 = hvx_vec_repl_f16(Q6_V_vror_VR(m_prev_v, r_vec_off * 2));
HVX_Vector v_m_prev1 = hvx_vec_repl_f16(Q6_V_vror_VR(m_prev_v, (r_vec_off + 1) * 2));
// HVX max — both operands are splats, so result is splat of m_new.
HVX_Vector v_dup_m0 = Q6_Vhf_vmax_VhfVhf(v_m_prev0, v_s_rowmax0);

View File

@@ -8,7 +8,10 @@ endif()
find_package(Vulkan COMPONENTS glslc REQUIRED)
find_package(SPIRV-Headers REQUIRED)
if (DEFINED ENV{VULKAN_SDK})
list(APPEND CMAKE_PREFIX_PATH "$ENV{VULKAN_SDK}")
endif()
find_package(SPIRV-Headers CONFIG REQUIRED)
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# Parallel build object files

View File

@@ -1,8 +1,8 @@
-r ./requirements-convert_legacy_llama.txt
--extra-index-url https://download.pytorch.org/whl/cpu
## Embedding Gemma requires PyTorch 2.6.0 or later
torch~=2.6.0; platform_machine != "s390x"
## Embedding Gemma requires PyTorch 2.6.0 or later, bumped to 2.11.0 for compatibility
torch==2.11.0; platform_machine != "s390x"
# torch s390x packages can only be found from nightly builds
--extra-index-url https://download.pytorch.org/whl/nightly

View File

@@ -7,10 +7,10 @@ $ErrorActionPreference = "Stop"
$BaseDir = "C:\Qualcomm"
# SDK 1: Hexagon
$HexagonUrl = "https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.4.0.2/hexagon-sdk-v6.4.0.2-arm64-wos.tar.xz"
$HexagonUrl = "https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.6.0.0/hexagon-sdk-v6.6.0.0-arm64-wos.tar.xz"
$HexagonParent = Join-Path $BaseDir "Hexagon_SDK"
$HexagonSdkVersion = "6.4.0.2"
$HexagonToolsVersion = "19.0.04"
$HexagonSdkVersion = "6.6.0.0"
$HexagonToolsVersion = "19.0.07"
$HexagonSdkTarget = Join-Path $HexagonParent $HexagonSdkVersion
$HexagonToolsTarget = Join-Path $HexagonSdkTarget "\tools\HEXAGON_Tools\$HexagonToolsVersion"

342
scripts/ui-assets.cmake Normal file
View File

@@ -0,0 +1,342 @@
# Provision UI assets and generate ui.cpp/ui.h.
#
# Asset provisioning priority:
# 1. Pre-built assets in SRC_DIST_DIR (manually built by user)
# 2. If BUILD_UI=ON: npm build
# 3. If above did not produce assets and HF_ENABLED=ON: HF Bucket download
cmake_minimum_required(VERSION 3.16)
set(UI_SOURCE_DIR "" CACHE STRING "UI source directory (to run npm build)")
set(UI_BINARY_DIR "" CACHE STRING "UI binary directory (to store generated files)")
set(LLAMA_SOURCE_DIR "" CACHE STRING "Project source root (to resolve version from git)")
set(HF_BUCKET "" CACHE STRING "Hugging Face bucket name")
set(HF_VERSION "" CACHE STRING "Version to download (empty = resolve from git)")
set(HF_ENABLED "" CACHE STRING "Whether to allow HF Bucket download (ON/OFF)")
set(BUILD_UI "" CACHE STRING "Build UI via npm (ON/OFF)")
set(LLAMA_UI_EMBED "" CACHE STRING "Path to llama-ui-embed helper")
set(ASSETS
bundle.css
bundle.js
index.html
loading.html
)
set(DIST_DIR "${UI_BINARY_DIR}/dist")
set(SRC_DIST_DIR "${UI_SOURCE_DIR}/dist")
set(STAMP_FILE "${UI_BINARY_DIR}/.ui-stamp")
set(UI_CPP "${UI_BINARY_DIR}/ui.cpp")
set(UI_H "${UI_BINARY_DIR}/ui.h")
function(assets_present out_var)
set(present TRUE)
foreach(asset ${ASSETS})
if(NOT EXISTS "${DIST_DIR}/${asset}")
set(present FALSE)
break()
endif()
endforeach()
set(${out_var} ${present} PARENT_SCOPE)
endfunction()
function(copy_src_dist out_var)
set(${out_var} FALSE PARENT_SCOPE)
foreach(asset ${ASSETS})
if(NOT EXISTS "${SRC_DIST_DIR}/${asset}")
return()
endif()
endforeach()
file(MAKE_DIRECTORY "${DIST_DIR}")
message(STATUS "UI: using pre-built assets from ${SRC_DIST_DIR}")
foreach(asset ${ASSETS})
execute_process(
COMMAND ${CMAKE_COMMAND} -E copy_if_different
"${SRC_DIST_DIR}/${asset}" "${DIST_DIR}/${asset}"
)
endforeach()
set(${out_var} TRUE PARENT_SCOPE)
endfunction()
function(npm_build_should_skip out_var)
set(${out_var} FALSE PARENT_SCOPE)
assets_present(present)
if(NOT present)
return()
endif()
if(EXISTS "${STAMP_FILE}")
return()
endif()
if(NOT EXISTS "${UI_SOURCE_DIR}/sources.cmake")
return()
endif()
include("${UI_SOURCE_DIR}/sources.cmake")
set(globs "")
foreach(g ${UI_SOURCE_GLOBS})
list(APPEND globs "${UI_SOURCE_DIR}/${g}")
endforeach()
file(GLOB_RECURSE sources ${globs})
foreach(f ${UI_SOURCE_FILES})
list(APPEND sources "${UI_SOURCE_DIR}/${f}")
endforeach()
file(TIMESTAMP "${DIST_DIR}/index.html" out_ts)
foreach(s ${sources})
if(NOT EXISTS "${s}")
continue()
endif()
file(TIMESTAMP "${s}" s_ts)
if(s_ts STRGREATER out_ts)
return()
endif()
endforeach()
set(${out_var} TRUE PARENT_SCOPE)
endfunction()
function(npm_build out_var)
set(${out_var} FALSE PARENT_SCOPE)
if(NOT EXISTS "${UI_SOURCE_DIR}/package.json")
message(STATUS "UI: ${UI_SOURCE_DIR}/package.json not found, skipping npm")
return()
endif()
npm_build_should_skip(skip)
if(skip)
message(STATUS "UI: npm output up-to-date, skipping build")
set(${out_var} TRUE PARENT_SCOPE)
return()
endif()
if(CMAKE_HOST_WIN32)
find_program(NPM_EXECUTABLE NAMES npm.cmd npm.bat npm)
else()
find_program(NPM_EXECUTABLE npm)
endif()
if(NOT NPM_EXECUTABLE)
message(STATUS "UI: npm not found, skipping npm build")
return()
endif()
if(NOT EXISTS "${UI_SOURCE_DIR}/node_modules")
message(STATUS "UI: running npm install (first time)")
execute_process(
COMMAND ${NPM_EXECUTABLE} install
WORKING_DIRECTORY "${UI_SOURCE_DIR}"
RESULT_VARIABLE rc
ERROR_VARIABLE err
)
if(NOT rc EQUAL 0)
message(STATUS "UI: npm install failed (${rc})")
message(STATUS " stderr: ${err}")
return()
endif()
endif()
file(MAKE_DIRECTORY "${DIST_DIR}")
message(STATUS "UI: running npm run build, output -> ${DIST_DIR}")
execute_process(
COMMAND ${CMAKE_COMMAND} -E env "LLAMA_UI_OUT_DIR=${DIST_DIR}"
${NPM_EXECUTABLE} run build
WORKING_DIRECTORY "${UI_SOURCE_DIR}"
RESULT_VARIABLE rc
ERROR_VARIABLE err
)
if(NOT rc EQUAL 0)
message(STATUS "UI: npm run build failed (${rc})")
message(STATUS " stderr: ${err}")
return()
endif()
assets_present(present)
if(NOT present)
message(STATUS "UI: npm build finished but assets missing in ${DIST_DIR}")
return()
endif()
message(STATUS "UI: npm build succeeded")
file(REMOVE "${STAMP_FILE}")
set(${out_var} TRUE PARENT_SCOPE)
endfunction()
function(resolve_version out_var)
if(NOT "${HF_VERSION}" STREQUAL "")
set(${out_var} "${HF_VERSION}" PARENT_SCOPE)
return()
endif()
if(EXISTS "${LLAMA_SOURCE_DIR}/cmake/build-info.cmake")
include("${LLAMA_SOURCE_DIR}/cmake/build-info.cmake")
if(NOT "${BUILD_NUMBER}" STREQUAL "" AND NOT BUILD_NUMBER EQUAL 0)
set(${out_var} "b${BUILD_NUMBER}" PARENT_SCOPE)
return()
endif()
endif()
set(${out_var} "" PARENT_SCOPE)
endfunction()
function(hf_download version out_var out_resolved)
set(${out_var} FALSE PARENT_SCOPE)
set(${out_resolved} "" PARENT_SCOPE)
file(MAKE_DIRECTORY "${DIST_DIR}")
set(candidates "")
if(NOT "${version}" STREQUAL "")
list(APPEND candidates "${version}")
endif()
list(APPEND candidates "latest")
foreach(resolved ${candidates})
set(base "https://huggingface.co/buckets/ggml-org/${HF_BUCKET}/resolve/${resolved}")
message(STATUS "UI: downloading from ${resolved}: ${base}")
set(ok TRUE)
foreach(asset ${ASSETS})
file(DOWNLOAD "${base}/${asset}?download=true" "${DIST_DIR}/${asset}"
STATUS status TIMEOUT 60
)
list(GET status 0 rc)
if(NOT rc EQUAL 0)
list(GET status 1 errmsg)
message(STATUS "UI: download ${asset} from ${resolved} failed: ${errmsg}")
set(ok FALSE)
break()
endif()
message(STATUS "UI: downloaded ${asset}")
endforeach()
if(NOT ok)
continue()
endif()
# Best-effort checksum verification
file(DOWNLOAD "${base}/checksums.txt?download=true" "${DIST_DIR}/checksums.txt"
STATUS cs_status TIMEOUT 30
)
list(GET cs_status 0 cs_rc)
if(cs_rc EQUAL 0)
message(STATUS "UI: verifying checksums")
file(STRINGS "${DIST_DIR}/checksums.txt" cs_lines)
foreach(asset ${ASSETS})
file(SHA256 "${DIST_DIR}/${asset}" h)
string(TOLOWER "${h}" h)
string(REGEX MATCH "${h}[ \t]+${asset}" m "${cs_lines}")
if(NOT m)
message(WARNING "UI: checksum verification failed for ${asset}")
set(ok FALSE)
break()
endif()
endforeach()
if(ok)
message(STATUS "UI: all checksums verified")
endif()
endif()
if(ok)
set(${out_var} TRUE PARENT_SCOPE)
set(${out_resolved} "${resolved}" PARENT_SCOPE)
return()
endif()
endforeach()
endfunction()
function(emit_files)
assets_present(present)
set(args "${UI_CPP}" "${UI_H}")
if(present)
foreach(asset ${ASSETS})
list(APPEND args "${asset}" "${DIST_DIR}/${asset}")
endforeach()
endif()
execute_process(
COMMAND "${LLAMA_UI_EMBED}" ${args}
RESULT_VARIABLE rc
)
if(NOT rc EQUAL 0)
message(FATAL_ERROR "UI: llama-ui-embed failed (${rc})")
endif()
endfunction()
# ---------------------------------------------------------------------------
# 1. Priority 1: pre-built assets supplied in tools/ui/dist
# ---------------------------------------------------------------------------
copy_src_dist(SRC_OK)
if(SRC_OK)
emit_files()
return()
endif()
# ---------------------------------------------------------------------------
# 2. Priority 2: npm build (if BUILD_UI=ON)
# ---------------------------------------------------------------------------
set(provisioned FALSE)
if(BUILD_UI)
npm_build(NPM_OK)
if(NPM_OK)
set(provisioned TRUE)
endif()
endif()
# ---------------------------------------------------------------------------
# 3. Priority 3: HF Bucket download (if npm did not produce assets and HF_ENABLED=ON)
# ---------------------------------------------------------------------------
if(NOT provisioned AND HF_ENABLED)
resolve_version(VERSION)
set(stamp_ok FALSE)
if(EXISTS "${STAMP_FILE}" AND NOT "${VERSION}" STREQUAL "")
file(READ "${STAMP_FILE}" stamped)
string(STRIP "${stamped}" stamped)
if("${stamped}" STREQUAL "${VERSION}")
set(stamp_ok TRUE)
endif()
endif()
assets_present(have_assets)
if(stamp_ok AND have_assets)
message(STATUS "UI: HF stamp '${stamped}' matches version, skipping HF fetch")
set(provisioned TRUE)
else()
hf_download("${VERSION}" HF_OK HF_RESOLVED)
if(HF_OK)
file(WRITE "${STAMP_FILE}" "${HF_RESOLVED}")
message(STATUS "UI: HF download succeeded, stamp updated (${HF_RESOLVED})")
set(provisioned TRUE)
else()
message(STATUS "UI: HF download failed")
endif()
endif()
endif()
# ---------------------------------------------------------------------------
# 4. Fallback: warn about stale or missing assets, then emit whatever we have
# ---------------------------------------------------------------------------
if(NOT provisioned)
assets_present(have_assets)
if(have_assets)
message(WARNING "UI: provisioning failed; embedding stale assets from ${DIST_DIR}")
else()
message(WARNING "UI: no assets available - building without an embedded UI. "
"In a disconnected environment, download the pre-built UI "
"from a llama.cpp release at "
"https://github.com/ggml-org/llama.cpp/releases and "
"extract to tools/ui/dist.")
endif()
endif()
emit_files()

View File

@@ -1,223 +0,0 @@
# Download UI assets from Hugging Face Bucket at build time
# Usage: cmake -DPUBLIC_DIR=... -DHF_BUCKET=... -DHF_VERSION=... -DASSETS="a;b;c" -P scripts/ui-download.cmake
#
# Asset provisioning priority:
# 1. Pre-built assets already in PUBLIC_DIR (cached from a previous run)
# 2. Local npm build (if NPM_DIR is provided and has package.json)
# 3. Hugging Face Bucket download (version-specific, then 'latest' fallback)
cmake_minimum_required(VERSION 3.16)
set(PUBLIC_DIR "" CACHE STRING "Directory to store/download assets")
set(HF_BUCKET "" CACHE STRING "Hugging Face bucket name")
set(HF_VERSION "" CACHE STRING "Version to download (empty = resolve from git)")
set(ASSETS "" CACHE STRING "Plus-separated list of asset filenames (+)")
set(STAMP_FILE "" CACHE STRING "Stamp file to create on success (optional)")
set(SOURCE_DIR "" CACHE STRING "Project source root (to resolve version from git)")
set(NPM_DIR "" CACHE STRING "UI source directory (to run npm build)")
set(HF_ENABLED "" CACHE STRING "Whether to allow HF Bucket download (ON/OFF)")
# ---------------------------------------------------------------------------
# 1. Resolve version from git if not provided at configure time
# ---------------------------------------------------------------------------
set(RESOLVED_VERSION "${HF_VERSION}")
if("${RESOLVED_VERSION}" STREQUAL "" AND NOT "${SOURCE_DIR}" STREQUAL "")
if(EXISTS "${SOURCE_DIR}/cmake/build-info.cmake")
include("${SOURCE_DIR}/cmake/build-info.cmake")
if(NOT "${BUILD_NUMBER}" STREQUAL "" AND NOT BUILD_NUMBER EQUAL 0)
set(RESOLVED_VERSION "b${BUILD_NUMBER}")
message(STATUS "UI: resolved version from git: ${RESOLVED_VERSION}")
endif()
endif()
endif()
# Convert + back to CMake list (+ is used as separator instead of ; to
# avoid platform-specific escaping issues when passing via -D arguments)
string(REGEX REPLACE "\\+" ";" ASSETS "${ASSETS}")
# ---------------------------------------------------------------------------
# 2. Check stamp freshness — re-download if resolved version changed
# ---------------------------------------------------------------------------
set(FORCE_REBUILD FALSE)
if(NOT "${STAMP_FILE}" STREQUAL "" AND EXISTS "${STAMP_FILE}")
file(READ "${STAMP_FILE}" STAMPED_VERSION)
string(STRIP "${STAMPED_VERSION}" STAMPED_VERSION)
if(NOT "${STAMPED_VERSION}" STREQUAL "${RESOLVED_VERSION}")
message(STATUS "UI: version changed (${STAMPED_VERSION} -> ${RESOLVED_VERSION}), re-building")
set(FORCE_REBUILD TRUE)
endif()
endif()
# ---------------------------------------------------------------------------
# 3. Check if assets already exist (cached from a previous run)
# ---------------------------------------------------------------------------
set(ALL_EXISTS TRUE)
foreach(asset ${ASSETS})
if(NOT EXISTS "${PUBLIC_DIR}/${asset}")
set(ALL_EXISTS FALSE)
break()
endif()
endforeach()
if(ALL_EXISTS AND NOT FORCE_REBUILD)
message(STATUS "UI: all assets already exist in ${PUBLIC_DIR}, skipping")
return()
endif()
file(MAKE_DIRECTORY "${PUBLIC_DIR}")
# ---------------------------------------------------------------------------
# 4. Priority 2: build from source via npm (fast path for developers)
# ---------------------------------------------------------------------------
set(PROVISION_SUCCESS FALSE)
if(NOT PROVISION_SUCCESS AND NOT "${NPM_DIR}" STREQUAL "")
if(EXISTS "${NPM_DIR}/package.json")
# Check if npm is available before attempting npm build
find_program(NPM_EXECUTABLE npm)
if(NPM_EXECUTABLE)
message(STATUS "UI: building from source in ${NPM_DIR}")
# Run npm install if node_modules is missing
if(NOT EXISTS "${NPM_DIR}/node_modules")
message(STATUS "UI: running npm install (first time)")
execute_process(
COMMAND ${NPM_EXECUTABLE} install
WORKING_DIRECTORY "${NPM_DIR}"
RESULT_VARIABLE NPM_INSTALL_RESULT
OUTPUT_VARIABLE NPM_OUT
ERROR_VARIABLE NPM_ERR
)
if(NOT NPM_INSTALL_RESULT EQUAL 0)
message(STATUS "UI: npm install failed (${NPM_INSTALL_RESULT}), falling back to download")
message(STATUS " stderr: ${NPM_ERR}")
endif()
endif()
# Run the build
execute_process(
COMMAND ${NPM_EXECUTABLE} run build
WORKING_DIRECTORY "${NPM_DIR}"
RESULT_VARIABLE NPM_BUILD_RESULT
OUTPUT_VARIABLE NPM_OUT
ERROR_VARIABLE NPM_ERR
)
if(NPM_BUILD_RESULT EQUAL 0)
# Verify that the expected assets were produced
set(ALL_BUILT TRUE)
foreach(asset ${ASSETS})
if(NOT EXISTS "${PUBLIC_DIR}/${asset}")
set(ALL_BUILT FALSE)
break()
endif()
endforeach()
if(ALL_BUILT)
message(STATUS "UI: local npm build succeeded")
set(PROVISION_SUCCESS TRUE)
else()
message(STATUS "UI: npm build completed but assets missing from ${PUBLIC_DIR}, falling back to download")
endif()
else()
message(STATUS "UI: npm build failed (${NPM_BUILD_RESULT}), falling back to download")
message(STATUS " stderr: ${NPM_ERR}")
endif()
else()
message(STATUS "UI: npm not found, skipping npm build and trying HF Bucket download")
endif()
else()
message(STATUS "UI: NPM_DIR (${NPM_DIR}) has no package.json, skipping npm build")
endif()
endif()
# ---------------------------------------------------------------------------
# 5. Priority 3: download from Hugging Face Bucket (if enabled)
# ---------------------------------------------------------------------------
if(NOT PROVISION_SUCCESS AND HF_ENABLED)
# Build list of URLs to try — version-specific first, then 'latest'
set(URL_ENTRIES "")
if(NOT "${RESOLVED_VERSION}" STREQUAL "")
list(APPEND URL_ENTRIES
"version:https://huggingface.co/buckets/ggml-org/${HF_BUCKET}/resolve/${RESOLVED_VERSION}")
endif()
list(APPEND URL_ENTRIES
"latest:https://huggingface.co/buckets/ggml-org/${HF_BUCKET}/resolve/latest")
foreach(entry ${URL_ENTRIES})
string(REGEX REPLACE "^([^:]+):.*$" "\\1" url_label "${entry}")
string(REGEX REPLACE "^[^:]+:(.*)$" "\\1" base_url "${entry}")
message(STATUS "UI: downloading assets from ${url_label}: ${base_url}")
# Download each asset
set(ALL_OK TRUE)
foreach(asset ${ASSETS})
set(download_url "${base_url}/${asset}?download=true")
set(download_path "${PUBLIC_DIR}/${asset}")
file(DOWNLOAD "${download_url}" "${download_path}"
STATUS download_status TIMEOUT 60
)
list(GET download_status 0 download_result)
if(NOT download_result EQUAL 0)
list(GET download_status 1 error_message)
message(STATUS "UI: failed to download ${asset} from ${url_label}: ${error_message}")
set(ALL_OK FALSE)
break()
endif()
message(STATUS "UI: downloaded ${asset}")
endforeach()
if(NOT ALL_OK)
continue()
endif()
# Verify checksums if the server provides them
file(DOWNLOAD "${base_url}/checksums.txt?download=true"
"${PUBLIC_DIR}/checksums.txt"
STATUS checksum_status TIMEOUT 30
)
list(GET checksum_status 0 checksum_result)
if(checksum_result EQUAL 0)
message(STATUS "UI: verifying checksums...")
file(STRINGS "${PUBLIC_DIR}/checksums.txt" CHECKSUMS_CONTENT)
foreach(asset ${ASSETS})
set(download_path "${PUBLIC_DIR}/${asset}")
file(SHA256 "${download_path}" asset_hash)
string(TOLOWER "${asset_hash}" EXPECTED_HASH_LOWER)
string(REGEX MATCH "${EXPECTED_HASH_LOWER}[ \\t]+${asset}" CHECKSUM_LINE "${CHECKSUMS_CONTENT}")
if(NOT CHECKSUM_LINE)
message(WARNING "UI: checksum verification failed for ${asset}")
set(ALL_OK FALSE)
break()
endif()
endforeach()
if(ALL_OK)
message(STATUS "UI: all checksums verified")
endif()
endif()
if(ALL_OK)
set(PROVISION_SUCCESS TRUE)
break()
endif()
endforeach()
if(PROVISION_SUCCESS)
message(STATUS "UI: provisioning complete")
else()
message(WARNING "UI: failed to download assets from HF Bucket (${HF_BUCKET})")
endif()
endif()
# ---------------------------------------------------------------------------
# 6. Write stamp file on success (stores resolved version for freshness check)
# ---------------------------------------------------------------------------
if(PROVISION_SUCCESS)
if(NOT "${STAMP_FILE}" STREQUAL "")
file(WRITE "${STAMP_FILE}" "${RESOLVED_VERSION}")
endif()
else()
message(WARNING "UI: no source available. Neither local build (${NPM_DIR}) nor HF Bucket download succeeded.")
message(WARNING "UI: building server without embedded UI. Set LLAMA_BUILD_UI=OFF to suppress this warning.")
endif()

View File

@@ -1,16 +0,0 @@
# CMake equivalent of `xxd -i ${INPUT} ${OUTPUT}`
# Usage: cmake -DINPUT=build/tools/ui/dist/index.html -DOUTPUT=build/tools/ui/dist/index.html.hpp -P scripts/xxd.cmake
SET(INPUT "" CACHE STRING "Input File")
SET(OUTPUT "" CACHE STRING "Output File")
get_filename_component(filename "${INPUT}" NAME)
string(REGEX REPLACE "\\.|-" "_" name "${filename}")
file(READ "${INPUT}" hex_data HEX)
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," hex_sequence "${hex_data}")
string(LENGTH ${hex_data} hex_len)
math(EXPR len "${hex_len} / 2")
file(WRITE "${OUTPUT}" "unsigned char ${name}[] = {${hex_sequence}};\nunsigned int ${name}_len = ${len};\n")

View File

@@ -1334,6 +1334,12 @@ bool llama_model_base::load_tensors(llama_model_loader & ml) {
if (!layer.ssm_beta_s && layer.ssm_beta) {
layer.ssm_beta_s = create_tensor(tn(LLM_TENSOR_SSM_BETA, "scale", i), {1}, TENSOR_NOT_REQUIRED);
}
if (!layer.nextn.eh_proj_s && layer.nextn.eh_proj) {
layer.nextn.eh_proj_s = create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "scale", i), {1}, TENSOR_NOT_REQUIRED);
}
if (!layer.nextn.shared_head_head_s && layer.nextn.shared_head_head) {
layer.nextn.shared_head_head_s = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "scale", i), {1}, TENSOR_NOT_REQUIRED);
}
// input scales
if (!layer.wq_in_s && layer.wq) {
@@ -1393,6 +1399,12 @@ bool llama_model_base::load_tensors(llama_model_loader & ml) {
if (!layer.ssm_beta_in_s && layer.ssm_beta) {
layer.ssm_beta_in_s = create_tensor(tn(LLM_TENSOR_SSM_BETA, "input_scale", i), {1}, TENSOR_NOT_REQUIRED);
}
if (!layer.nextn.eh_proj_in_s && layer.nextn.eh_proj) {
layer.nextn.eh_proj_in_s = create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "input_scale", i), {1}, TENSOR_NOT_REQUIRED);
}
if (!layer.nextn.shared_head_head_in_s && layer.nextn.shared_head_head) {
layer.nextn.shared_head_head_in_s = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "input_scale", i), {1}, TENSOR_NOT_REQUIRED);
}
}
// output scales
if (output && output->type == GGML_TYPE_NVFP4) {

View File

@@ -202,12 +202,16 @@ struct llama_layer_shortconv {
};
struct llama_layer_nextn {
struct ggml_tensor * eh_proj = nullptr;
struct ggml_tensor * embed_tokens = nullptr;
struct ggml_tensor * enorm = nullptr;
struct ggml_tensor * hnorm = nullptr;
struct ggml_tensor * shared_head_head = nullptr;
struct ggml_tensor * shared_head_norm = nullptr;
struct ggml_tensor * eh_proj = nullptr;
struct ggml_tensor * eh_proj_s = nullptr;
struct ggml_tensor * eh_proj_in_s = nullptr;
struct ggml_tensor * embed_tokens = nullptr;
struct ggml_tensor * enorm = nullptr;
struct ggml_tensor * hnorm = nullptr;
struct ggml_tensor * shared_head_head = nullptr;
struct ggml_tensor * shared_head_head_s = nullptr;
struct ggml_tensor * shared_head_head_in_s = nullptr;
struct ggml_tensor * shared_head_norm = nullptr;
};
struct llama_layer {

View File

@@ -538,7 +538,7 @@ llama_model_qwen35::graph_mtp::graph_mtp(const llama_model & model, const llm_gr
ggml_tensor * concat = ggml_concat(ctx0, e_norm, h_norm, /*dim=*/ 0);
cb(concat, "mtp_concat", il);
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat);
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat, layer.nextn.eh_proj_s);
cb(cur, "mtp_eh_proj", il);
ggml_tensor * inpSA = cur;
@@ -626,8 +626,9 @@ llama_model_qwen35::graph_mtp::graph_mtp(const llama_model & model, const llm_gr
cb(cur, "mtp_shared_head_norm", -1);
ggml_tensor * head_w = layer.nextn.shared_head_head ? layer.nextn.shared_head_head : model.output;
ggml_tensor * head_s = layer.nextn.shared_head_head ? layer.nextn.shared_head_head_s : model.output_s;
GGML_ASSERT(head_w && "QWEN35 MTP: missing LM head (nextn.shared_head_head or model.output)");
cur = build_lora_mm(head_w, cur);
cur = build_lora_mm(head_w, cur, head_s);
cb(cur, "result_output", -1);
res->t_logits = cur;

View File

@@ -602,7 +602,7 @@ llama_model_qwen35moe::graph_mtp::graph_mtp(const llama_model & model, const llm
ggml_tensor * concat = ggml_concat(ctx0, e_norm, h_norm, /*dim=*/ 0);
cb(concat, "mtp_concat", il);
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat);
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat, layer.nextn.eh_proj_s);
cb(cur, "mtp_eh_proj", il);
ggml_tensor * inpSA = cur;
@@ -722,8 +722,9 @@ llama_model_qwen35moe::graph_mtp::graph_mtp(const llama_model & model, const llm
cb(cur, "mtp_shared_head_norm", -1);
ggml_tensor * head_w = layer.nextn.shared_head_head ? layer.nextn.shared_head_head : model.output;
ggml_tensor * head_s = layer.nextn.shared_head_head ? layer.nextn.shared_head_head_s : model.output_s;
GGML_ASSERT(head_w && "QWEN35MOE MTP: missing LM head (nextn.shared_head_head or model.output)");
cur = build_lora_mm(head_w, cur);
cur = build_lora_mm(head_w, cur, head_s);
cb(cur, "result_output", -1);
res->t_logits = cur;

View File

@@ -1,5 +1,12 @@
-r ../../requirements/requirements-convert_legacy_llama.txt
--extra-index-url https://download.pytorch.org/whl/cpu
pillow~=11.3.0
torch~=2.6.0
torchvision~=0.21.0
## Embedding Gemma requires PyTorch 2.6.0 or later, bumped to 2.11.0 for compatibility
torch==2.11.0; platform_machine != "s390x" # check_requirements: ignore "=="
torchvision==0.26.0; platform_machine != "s390x" # check_requirements: ignore "=="
# torch s390x packages can only be found from nightly builds
--extra-index-url https://download.pytorch.org/whl/nightly
torch>=0.0.0.dev0; platform_machine == "s390x" # check_requirements: ignore "=="
torchvision>=0.0.0.dev0; platform_machine == "s390x" # check_requirements: ignore "=="

View File

@@ -231,16 +231,19 @@ bool server_http_context::init(const common_params & params) {
};
auto middleware_server_state = [this](const httplib::Request & req, httplib::Response & res) {
(void)req; // suppress unused parameter warning when LLAMA_BUILD_UI is not defined
bool ready = is_ready.load();
if (!ready) {
#if defined(LLAMA_BUILD_UI)
#if defined(LLAMA_UI_HAS_ASSETS)
auto tmp = string_split<std::string>(req.path, '.');
if (req.path == "/" || (tmp.size() > 0 && tmp.back() == "html")) {
res.status = 503;
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
return false;
if (const llama_ui_asset * a = llama_ui_find_asset("loading.html")) {
res.status = 503;
res.set_content(reinterpret_cast<const char*>(a->data), a->size, "text/html; charset=utf-8");
return false;
}
}
#else
(void)req;
#endif
// no endpoints are allowed to be accessed when the server is not ready
// this is to prevent any data races or inconsistent states
@@ -312,23 +315,27 @@ bool server_http_context::init(const common_params & params) {
return 1;
}
} else {
#if defined(LLAMA_BUILD_UI)
// using embedded static index.html
srv->Get(params.api_prefix + "/", [](const httplib::Request & /*req*/, httplib::Response & res) {
// COEP and COOP headers, required by pyodide (python interpreter)
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
res.set_content(reinterpret_cast<const char*>(index_html), index_html_len, "text/html; charset=utf-8");
return false;
});
srv->Get(params.api_prefix + "/bundle.js", [](const httplib::Request & /*req*/, httplib::Response & res) {
res.set_content(reinterpret_cast<const char*>(bundle_js), bundle_js_len, "application/javascript; charset=utf-8");
return false;
});
srv->Get(params.api_prefix + "/bundle.css", [](const httplib::Request & /*req*/, httplib::Response & res) {
res.set_content(reinterpret_cast<const char*>(bundle_css), bundle_css_len, "text/css; charset=utf-8");
return false;
});
#if defined(LLAMA_UI_HAS_ASSETS)
auto serve_asset = [](const std::string & name, const char * mime, bool with_isolation_headers) {
return [name, mime, with_isolation_headers](const httplib::Request & /*req*/, httplib::Response & res) {
const llama_ui_asset * a = llama_ui_find_asset(name.c_str());
if (!a) {
res.status = 404;
return false;
}
if (with_isolation_headers) {
// COEP and COOP headers, required by pyodide (python interpreter)
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
}
res.set_content(reinterpret_cast<const char*>(a->data), a->size, mime);
return false;
};
};
srv->Get(params.api_prefix + "/", serve_asset("index.html", "text/html; charset=utf-8", true));
srv->Get(params.api_prefix + "/bundle.js", serve_asset("bundle.js", "application/javascript; charset=utf-8", false));
srv->Get(params.api_prefix + "/bundle.css", serve_asset("bundle.css", "text/css; charset=utf-8", false));
#endif
}
}

View File

@@ -1,150 +1,98 @@
set(TARGET llama-ui)
# Deprecated: use LLAMA_UI_HF_BUCKET instead
set(LLAMA_WEBUI_HF_BUCKET "llama-ui" CACHE STRING "Hugging Face bucket name for prebuilt webui assets (deprecated: use LLAMA_UI_HF_BUCKET)")
set(LLAMA_UI_HF_BUCKET "llama-ui" CACHE STRING "Hugging Face bucket name for prebuilt UI assets")
# Backward compat: forward old var to new one
if(DEFINED LLAMA_WEBUI_HF_BUCKET AND NOT DEFINED LLAMA_UI_HF_BUCKET)
if(DEFINED LLAMA_BUILD_WEBUI)
set(LLAMA_BUILD_UI ${LLAMA_BUILD_WEBUI})
message(DEPRECATION "LLAMA_BUILD_WEBUI is deprecated, use LLAMA_BUILD_UI instead")
endif()
if(DEFINED LLAMA_USE_PREBUILT_WEBUI)
set(LLAMA_USE_PREBUILT_UI ${LLAMA_USE_PREBUILT_WEBUI})
message(DEPRECATION "LLAMA_USE_PREBUILT_WEBUI is deprecated, use LLAMA_USE_PREBUILT_UI instead")
endif()
if(DEFINED LLAMA_WEBUI_HF_BUCKET)
set(LLAMA_UI_HF_BUCKET ${LLAMA_WEBUI_HF_BUCKET})
elseif(DEFINED LLAMA_WEBUI_HF_BUCKET AND NOT "${LLAMA_WEBUI_HF_BUCKET}" STREQUAL "${LLAMA_UI_HF_BUCKET}")
message(DEPRECATION "LLAMA_WEBUI_HF_BUCKET is deprecated, use LLAMA_UI_HF_BUCKET instead")
endif()
set(TARGET_SRCS "")
set(UI_COMPILE_DEFS "")
if(LLAMA_BUILD_UI)
set(PUBLIC_ASSETS
index.html
bundle.js
bundle.css
loading.html
)
# Determine source of UI assets (priority: local > HF Bucket)
set(UI_SOURCE "")
set(UI_SOURCE_DIR "")
# Priority 1: Check for local build output
set(LOCAL_UI_DIR "${PROJECT_SOURCE_DIR}/build/tools/ui/dist")
# Verify all required assets exist before declaring local source valid
set(ALL_ASSETS_PRESENT TRUE)
foreach(asset ${PUBLIC_ASSETS})
if(NOT EXISTS "${LOCAL_UI_DIR}/${asset}")
set(ALL_ASSETS_PRESENT FALSE)
break()
endif()
endforeach()
if(ALL_ASSETS_PRESENT)
set(UI_SOURCE "local")
set(UI_SOURCE_DIR "${LOCAL_UI_DIR}")
message(STATUS "UI: using local build from ${UI_SOURCE_DIR}")
endif()
# Priority 2: Build-time asset provisioning (npm build → HF Bucket fallback)
if(NOT UI_SOURCE_DIR)
# Environment variable takes precedence (e.g., from CI workflows)
# Deprecated: use HF_UI_VERSION instead
if(DEFINED ENV{HF_WEBUI_VERSION})
set(HF_UI_VERSION "$ENV{HF_WEBUI_VERSION}")
message(DEPRECATION "HF_WEBUI_VERSION env var is deprecated, use HF_UI_VERSION instead")
if(NOT HF_UI_VERSION MATCHES "^[A-Za-z0-9._-]+$")
message(FATAL_ERROR "UI: invalid HF_WEBUI_VERSION='${HF_UI_VERSION}' - must match ^[A-Za-z0-9._-]+$")
endif()
elseif(DEFINED ENV{HF_UI_VERSION})
set(HF_UI_VERSION "$ENV{HF_UI_VERSION}")
if(NOT HF_UI_VERSION MATCHES "^[A-Za-z0-9._-]+$")
message(FATAL_ERROR "UI: invalid HF_UI_VERSION='${HF_UI_VERSION}' - must match ^[A-Za-z0-9._-]+$")
endif()
elseif(DEFINED LLAMA_BUILD_NUMBER)
set(HF_UI_VERSION "b${LLAMA_BUILD_NUMBER}")
message(STATUS "UI: derived HF_UI_VERSION=b${LLAMA_BUILD_NUMBER}")
else()
set(HF_UI_VERSION "")
message(STATUS "UI: version not specified (will use HF 'latest')")
endif()
if("${HF_UI_VERSION}" STREQUAL "")
set(UI_VERSION_TAG "provisioned")
else()
set(UI_VERSION_TAG "${HF_UI_VERSION}")
endif()
set(UI_STAMP "${CMAKE_CURRENT_BINARY_DIR}/.ui-${UI_VERSION_TAG}.stamp")
string(REPLACE ";" "+" PUBLIC_ASSETS_JOINED "${PUBLIC_ASSETS}")
add_custom_command(
OUTPUT ${UI_STAMP}
COMMAND ${CMAKE_COMMAND}
"-DSOURCE_DIR=${PROJECT_SOURCE_DIR}"
"-DPUBLIC_DIR=${PROJECT_SOURCE_DIR}/build/tools/ui/dist"
"-DHF_BUCKET=${LLAMA_UI_HF_BUCKET}"
"-DHF_VERSION=${HF_UI_VERSION}"
"-DHF_ENABLED=${LLAMA_USE_PREBUILT_UI}"
"-DASSETS=${PUBLIC_ASSETS_JOINED}"
"-DSTAMP_FILE=${UI_STAMP}"
"-DNPM_DIR=${PROJECT_SOURCE_DIR}/tools/ui"
-P ${PROJECT_SOURCE_DIR}/scripts/ui-download.cmake
COMMENT "Building/provisioning UI assets (npm build -> HF Bucket fallback)"
)
set(UI_SOURCE "provisioned")
set(UI_SOURCE_DIR "${PROJECT_SOURCE_DIR}/build/tools/ui/dist")
endif()
# Process assets from the determined source
if(UI_SOURCE_DIR)
foreach(asset ${PUBLIC_ASSETS})
set(input "${UI_SOURCE_DIR}/${asset}")
set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
list(APPEND TARGET_SRCS ${output})
if(UI_SOURCE STREQUAL "local")
if(NOT EXISTS "${input}")
message(FATAL_ERROR "UI asset not found: ${input}")
endif()
set(dependency "${input}")
else()
set(dependency "${UI_STAMP}")
endif()
add_custom_command(
DEPENDS ${dependency}
OUTPUT "${output}"
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
)
set_source_files_properties(${output} PROPERTIES GENERATED TRUE)
endforeach()
list(APPEND UI_COMPILE_DEFS
LLAMA_BUILD_UI
LLAMA_UI_DEFAULT_ENABLED=1
)
message(STATUS "UI: embedded with source: ${UI_SOURCE}")
else()
message(WARNING "UI: no source available. Neither local build (build/tools/ui/dist/) nor HF Bucket download succeeded.")
message(WARNING "UI: building server without embedded UI. Set LLAMA_BUILD_UI=OFF to suppress this warning.")
list(APPEND UI_COMPILE_DEFS LLAMA_UI_DEFAULT_ENABLED=0)
endif()
# Resolve HF asset version: explicit env var > derived from build number > unset
if(DEFINED ENV{HF_WEBUI_VERSION})
set(HF_UI_VERSION "$ENV{HF_WEBUI_VERSION}")
message(DEPRECATION "HF_WEBUI_VERSION env var is deprecated, use HF_UI_VERSION instead")
elseif(DEFINED ENV{HF_UI_VERSION})
set(HF_UI_VERSION "$ENV{HF_UI_VERSION}")
elseif(DEFINED LLAMA_BUILD_NUMBER)
set(HF_UI_VERSION "b${LLAMA_BUILD_NUMBER}")
else()
list(APPEND UI_COMPILE_DEFS LLAMA_UI_DEFAULT_ENABLED=0)
set(HF_UI_VERSION "")
endif()
# Build the static library
add_library(${TARGET} STATIC ui.cpp)
if(NOT "${HF_UI_VERSION}" STREQUAL "" AND NOT HF_UI_VERSION MATCHES "^[A-Za-z0-9._-]+$")
message(FATAL_ERROR "UI: invalid HF_UI_VERSION='${HF_UI_VERSION}' - must match ^[A-Za-z0-9._-]+$")
endif()
target_include_directories(${TARGET} PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}
set(UI_CPP "${CMAKE_CURRENT_BINARY_DIR}/ui.cpp")
set(UI_H "${CMAKE_CURRENT_BINARY_DIR}/ui.h")
if(CMAKE_CROSSCOMPILING)
find_program(HOST_CXX_COMPILER NAMES g++ clang++ NO_CMAKE_FIND_ROOT_PATH)
if(NOT HOST_CXX_COMPILER)
message(FATAL_ERROR "UI: no host C++ compiler (g++/clang++) found to build llama-ui-embed; set -DHOST_CXX_COMPILER=<path>")
endif()
message(STATUS "UI: building llama-ui-embed with host compiler ${HOST_CXX_COMPILER}")
if(CMAKE_HOST_WIN32)
set(LLAMA_UI_EMBED_EXE "${CMAKE_CURRENT_BINARY_DIR}/llama-ui-embed.exe")
else()
set(LLAMA_UI_EMBED_EXE "${CMAKE_CURRENT_BINARY_DIR}/llama-ui-embed")
endif()
add_custom_command(
OUTPUT "${LLAMA_UI_EMBED_EXE}"
COMMAND "${HOST_CXX_COMPILER}" -O2 -std=c++17
-o "${LLAMA_UI_EMBED_EXE}" "${CMAKE_CURRENT_SOURCE_DIR}/embed.cpp"
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/embed.cpp"
COMMENT "Building llama-ui-embed (host)"
VERBATIM
)
add_custom_target(llama-ui-embed DEPENDS "${LLAMA_UI_EMBED_EXE}")
else()
add_executable(llama-ui-embed embed.cpp)
target_compile_features(llama-ui-embed PRIVATE cxx_std_17)
set_target_properties(llama-ui-embed PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
)
set(LLAMA_UI_EMBED_EXE "$<TARGET_FILE:llama-ui-embed>")
endif()
# Run the provisioning script every build so source changes in tools/ui/ are
# always picked up. The script uses copy_if_different for ui.cpp/ui.h, so the
# library only recompiles when contents actually change.
add_custom_target(llama-ui-assets ALL
BYPRODUCTS ${UI_CPP} ${UI_H}
COMMAND ${CMAKE_COMMAND}
"-DUI_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR}"
"-DUI_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}"
"-DLLAMA_SOURCE_DIR=${PROJECT_SOURCE_DIR}"
"-DHF_BUCKET=${LLAMA_UI_HF_BUCKET}"
"-DHF_VERSION=${HF_UI_VERSION}"
"-DHF_ENABLED=${LLAMA_USE_PREBUILT_UI}"
"-DBUILD_UI=${LLAMA_BUILD_UI}"
"-DLLAMA_UI_EMBED=${LLAMA_UI_EMBED_EXE}"
-P "${PROJECT_SOURCE_DIR}/scripts/ui-assets.cmake"
COMMENT "Provisioning UI assets"
VERBATIM
)
target_compile_definitions(${TARGET} PUBLIC ${UI_COMPILE_DEFS})
add_dependencies(llama-ui-assets llama-ui-embed)
if(TARGET_SRCS)
# List generated .hpp files as sources so CMake tracks them as build dependencies
target_sources(${TARGET} PRIVATE ${TARGET_SRCS})
set_source_files_properties(${TARGET_SRCS} PROPERTIES HEADER_FILE_ONLY TRUE)
endif()
set_source_files_properties(${UI_CPP} ${UI_H} PROPERTIES GENERATED TRUE)
add_library(${TARGET} STATIC ${UI_CPP} ${UI_H})
target_compile_features(${TARGET} PRIVATE cxx_std_17)
add_dependencies(${TARGET} llama-ui-assets)
target_include_directories(${TARGET} PUBLIC
${CMAKE_CURRENT_BINARY_DIR}
)

144
tools/ui/embed.cpp Normal file
View File

@@ -0,0 +1,144 @@
// llama-ui-embed: generate ui.cpp / ui.h that embed UI assets as C arrays.
//
// Usage:
// llama-ui-embed <out_cpp> <out_h> [<asset_name> <asset_path>]...
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <fstream>
#include <string>
#include <vector>
static bool read_file(const std::string & path, std::vector<unsigned char> & out) {
std::ifstream f(path, std::ios::binary | std::ios::ate);
if (!f) {
fprintf(stderr, "embed: cannot open %s\n", path.c_str());
return false;
}
const auto sz = f.tellg();
if (sz < 0) {
return false;
}
f.seekg(0);
out.resize(static_cast<size_t>(sz));
if (sz > 0 && !f.read(reinterpret_cast<char *>(out.data()), sz)) {
return false;
}
return true;
}
static void append_bytes_hex(std::string & out, const std::vector<unsigned char> & bytes) {
static const char hex[] = "0123456789abcdef";
out.reserve(out.size() + bytes.size() * 5);
for (unsigned char b : bytes) {
out += '0';
out += 'x';
out += hex[b >> 4];
out += hex[b & 0xf];
out += ',';
}
}
static bool write_if_different(const std::string & path, const std::string & content) {
std::ifstream f(path, std::ios::binary | std::ios::ate);
if (f) {
const auto sz = f.tellg();
if (sz >= 0 && static_cast<size_t>(sz) == content.size()) {
std::string existing(static_cast<size_t>(sz), '\0');
f.seekg(0);
if (sz == 0 || f.read(existing.data(), sz)) {
if (existing == content) {
return true;
}
}
}
}
std::ofstream out(path, std::ios::binary | std::ios::trunc);
if (!out) {
fprintf(stderr, "embed: cannot write %s\n", path.c_str());
return false;
}
if (!content.empty()) {
out.write(content.data(), static_cast<std::streamsize>(content.size()));
}
return out.good();
}
static std::string fmt(const char * pattern, ...) {
char tmp[512];
va_list ap;
va_start(ap, pattern);
const int n = vsnprintf(tmp, sizeof(tmp), pattern, ap);
va_end(ap);
return (n > 0) ? std::string(tmp, static_cast<size_t>(n)) : std::string();
}
int main(int argc, char ** argv) {
if (argc < 3 || ((argc - 3) % 2) != 0) {
fprintf(stderr, "usage: %s <out_cpp> <out_h> [<name> <path>]...\n", argv[0]);
return 1;
}
const std::string out_cpp = argv[1];
const std::string out_h = argv[2];
const int n_assets = (argc - 3) / 2;
std::string h;
h += "#pragma once\n\n#include <stddef.h>\n\n";
if (n_assets > 0) {
h += "#define LLAMA_UI_HAS_ASSETS 1\n\n";
}
h +=
"struct llama_ui_asset {\n"
" const char * name;\n"
" const unsigned char * data;\n"
" size_t size;\n"
"};\n\n"
"const llama_ui_asset * llama_ui_find_asset(const char * name);\n";
std::string cpp;
cpp += "#include \"ui.h\"\n\n#include <string.h>\n\n";
if (n_assets > 0) {
for (int i = 0; i < n_assets; i++) {
const char * path = argv[3 + i * 2 + 1];
std::vector<unsigned char> bytes;
if (!read_file(path, bytes)) {
return 1;
}
cpp += fmt("static const unsigned char asset_%d_data[] = {", i);
append_bytes_hex(cpp, bytes);
cpp += fmt("};\nstatic const size_t asset_%d_size = %lu;\n\n",
i, static_cast<unsigned long>(bytes.size()));
}
cpp += "static const llama_ui_asset g_assets[] = {\n";
for (int i = 0; i < n_assets; i++) {
const char * name = argv[3 + i * 2];
cpp += fmt(" { \"%s\", asset_%d_data, asset_%d_size },\n", name, i, i);
}
cpp += "};\n\n";
cpp +=
"const llama_ui_asset * llama_ui_find_asset(const char * name) {\n"
" for (const auto & a : g_assets) {\n"
" if (strcmp(a.name, name) == 0) {\n"
" return &a;\n"
" }\n"
" }\n"
" return nullptr;\n"
"}\n";
} else {
cpp +=
"const llama_ui_asset * llama_ui_find_asset(const char *) {\n"
" return nullptr;\n"
"}\n";
}
bool ok = true;
ok = write_if_different(out_h, h) && ok;
ok = write_if_different(out_cpp, cpp) && ok;
return ok ? 0 : 1;
}

View File

@@ -19,7 +19,7 @@ const GUIDE_FOR_FRONTEND = `
-->
`.trim();
const OUTPUT_DIR = '../../build/tools/ui/dist';
const OUTPUT_DIR = process.env.LLAMA_UI_OUT_DIR ?? './dist';
export function llamaCppBuildPlugin(): Plugin {
return {

15
tools/ui/sources.cmake Normal file
View File

@@ -0,0 +1,15 @@
# Inputs used to decide whether the npm build output is up-to-date.
set(UI_SOURCE_GLOBS
src/*
static/*
)
set(UI_SOURCE_FILES
package.json
package-lock.json
vite.config.ts
svelte.config.js
tsconfig.json
scripts/vite-plugin-llama-cpp-build.ts
)

View File

@@ -2,6 +2,10 @@ import { mdsvex } from 'mdsvex';
import adapter from '@sveltejs/adapter-static';
import { vitePreprocess } from '@sveltejs/vite-plugin-svelte';
// CMake sets LLAMA_UI_OUT_DIR to the staging dir under the build tree; manual
// `npm run build` runs without the env var default to ./dist.
const outDir = process.env.LLAMA_UI_OUT_DIR ?? './dist';
/** @type {import('@sveltejs/kit').Config} */
const config = {
// Consult https://svelte.dev/docs/kit/integrations
@@ -14,8 +18,8 @@ const config = {
},
router: { type: 'hash' },
adapter: adapter({
pages: '../../build/tools/ui/dist',
assets: '../../build/tools/ui/dist',
pages: outDir,
assets: outDir,
fallback: 'index.html',
precompress: false,
strict: true

View File

@@ -1,7 +0,0 @@
#ifdef LLAMA_BUILD_UI
// auto generated files (see README.md for details)
#include "index.html.hpp"
#include "bundle.js.hpp"
#include "bundle.css.hpp"
#include "loading.html.hpp"
#endif

View File

@@ -1,17 +0,0 @@
#pragma once
// TODO @ngxson : refactor, wrap these in a function
#ifdef LLAMA_BUILD_UI
extern unsigned char index_html[];
extern unsigned int index_html_len;
extern unsigned char bundle_js[];
extern unsigned int bundle_js_len;
extern unsigned char bundle_css[];
extern unsigned int bundle_css_len;
extern unsigned char loading_html[];
extern unsigned int loading_html_len;
#endif