mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-24 20:00:57 +02:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1c0f6db545 | ||
|
|
cec51c7a7d | ||
|
|
b22ff4b7b4 | ||
|
|
c0c7e147e7 | ||
|
|
b0df4c0cfd | ||
|
|
a497476330 | ||
|
|
95405ac65f |
19
.github/workflows/release.yml
vendored
19
.github/workflows/release.yml
vendored
@@ -1234,6 +1234,9 @@ jobs:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz
|
||||
name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz
|
||||
|
||||
ui-build:
|
||||
uses: ./.github/workflows/ui-build.yml
|
||||
|
||||
release:
|
||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||
|
||||
@@ -1259,6 +1262,7 @@ jobs:
|
||||
- macOS-cpu
|
||||
- ios-xcode-build
|
||||
- openEuler-cann
|
||||
- ui-build
|
||||
|
||||
outputs:
|
||||
tag_name: ${{ steps.tag.outputs.name }}
|
||||
@@ -1318,6 +1322,18 @@ jobs:
|
||||
mv -v artifact/*.zip release
|
||||
mv -v artifact/*.tar.gz release
|
||||
|
||||
- name: Download UI build
|
||||
id: download_ui
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: ui-build
|
||||
path: ./ui-dist
|
||||
|
||||
- name: Package UI
|
||||
id: package_ui
|
||||
run: |
|
||||
tar -czvf release/llama-${{ steps.tag.outputs.name }}-ui.tar.gz --transform "s,^\.,llama-${{ steps.tag.outputs.name }}," -C ./ui-dist .
|
||||
|
||||
- name: Create release
|
||||
id: create_release
|
||||
uses: ggml-org/action-create-release@v1
|
||||
@@ -1367,6 +1383,9 @@ jobs:
|
||||
- [openEuler aarch64 (310p)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-310p-openEuler-aarch64.tar.gz)
|
||||
- [openEuler aarch64 (910b, ACL Graph)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-aarch64-aclgraph.tar.gz)
|
||||
|
||||
**UI:**
|
||||
- [UI](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-ui.tar.gz)
|
||||
|
||||
- name: Upload release
|
||||
id: upload_release
|
||||
uses: actions/github-script@v8
|
||||
|
||||
14
.github/workflows/server.yml
vendored
14
.github/workflows/server.yml
vendored
@@ -54,8 +54,13 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
ui-build:
|
||||
name: Build Web UI
|
||||
uses: ./.github/workflows/ui-build.yml
|
||||
|
||||
server:
|
||||
runs-on: ubuntu-latest
|
||||
needs: ui-build
|
||||
|
||||
name: server (${{ matrix.wf_name }})
|
||||
strategy:
|
||||
@@ -93,12 +98,11 @@ jobs:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v6
|
||||
- name: Download built UI
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
node-version: "24"
|
||||
cache: "npm"
|
||||
cache-dependency-path: "tools/ui/package-lock.json"
|
||||
name: ui-build
|
||||
path: tools/ui/dist
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
|
||||
4
.github/workflows/ui-build.yml
vendored
4
.github/workflows/ui-build.yml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
|
||||
- name: Generate checksums
|
||||
run: |
|
||||
cd build/tools/ui/dist
|
||||
cd tools/ui/dist
|
||||
for f in *; do
|
||||
sha256sum "$f" | awk '{print $1, $2}' >> checksums.txt
|
||||
done
|
||||
@@ -40,5 +40,5 @@ jobs:
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: ui-build
|
||||
path: build/tools/ui/dist/
|
||||
path: tools/ui/dist/
|
||||
retention-days: 1
|
||||
|
||||
6
.github/workflows/ui-publish.yml
vendored
6
.github/workflows/ui-publish.yml
vendored
@@ -38,7 +38,7 @@ jobs:
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: ui-build
|
||||
path: build/tools/ui/dist/
|
||||
path: tools/ui/dist/
|
||||
|
||||
- name: Install Hugging Face Hub CLI
|
||||
run: pip install -U huggingface_hub
|
||||
@@ -49,12 +49,12 @@ jobs:
|
||||
- name: Sync built files to Hugging Face bucket (version tag)
|
||||
run: |
|
||||
# Upload the built files to the Hugging Face bucket under the release version
|
||||
hf buckets sync build/tools/ui/dist hf://buckets/ggml-org/${{ env.HF_BUCKET_NAME }}/${{ inputs.version_tag }} --delete --quiet
|
||||
hf buckets sync tools/ui/dist hf://buckets/ggml-org/${{ env.HF_BUCKET_NAME }}/${{ inputs.version_tag }} --delete --quiet
|
||||
|
||||
- name: Sync built files to Hugging Face bucket (latest)
|
||||
run: |
|
||||
# Also upload to the 'latest' directory for fallback downloads
|
||||
hf buckets sync build/tools/ui/dist hf://buckets/ggml-org/${{ env.HF_BUCKET_NAME }}/latest --delete --quiet
|
||||
hf buckets sync tools/ui/dist hf://buckets/ggml-org/${{ env.HF_BUCKET_NAME }}/latest --delete --quiet
|
||||
|
||||
- name: Verify upload
|
||||
run: |
|
||||
|
||||
@@ -112,16 +112,6 @@ option(LLAMA_BUILD_APP "llama: build the unified binary"
|
||||
option(LLAMA_BUILD_UI "llama: build the embedded Web UI for server" ON)
|
||||
option(LLAMA_USE_PREBUILT_UI "llama: use prebuilt UI from HF Bucket when available (requires LLAMA_BUILD_UI=ON)" ON)
|
||||
|
||||
# Backward compat: when old var is set but new one isn't, forward the value
|
||||
if(DEFINED LLAMA_BUILD_WEBUI)
|
||||
set(LLAMA_BUILD_UI ${LLAMA_BUILD_WEBUI})
|
||||
message(DEPRECATION "LLAMA_BUILD_WEBUI is deprecated, use LLAMA_BUILD_UI instead")
|
||||
endif()
|
||||
if(DEFINED LLAMA_USE_PREBUILT_WEBUI)
|
||||
set(LLAMA_USE_PREBUILT_UI ${LLAMA_USE_PREBUILT_WEBUI})
|
||||
message(DEPRECATION "LLAMA_USE_PREBUILT_WEBUI is deprecated, use LLAMA_USE_PREBUILT_UI instead")
|
||||
endif()
|
||||
|
||||
option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_DEFAULT})
|
||||
option(LLAMA_TESTS_INSTALL "llama: install tests" ON)
|
||||
|
||||
|
||||
@@ -617,11 +617,7 @@ struct common_params {
|
||||
std::map<std::string, std::string> default_template_kwargs;
|
||||
|
||||
// UI configs
|
||||
#ifdef LLAMA_UI_DEFAULT_ENABLED
|
||||
bool ui = LLAMA_UI_DEFAULT_ENABLED != 0;
|
||||
#else
|
||||
bool ui = true; // default to enabled when not set
|
||||
#endif
|
||||
bool ui = true;
|
||||
|
||||
// Deprecated: use ui, ui_mcp_proxy, ui_config_json instead
|
||||
bool webui = ui;
|
||||
|
||||
@@ -33,8 +33,8 @@
|
||||
"name": "arm64-windows-snapdragon",
|
||||
"inherits": [ "base", "arm64-windows-llvm" ],
|
||||
"cacheVariables": {
|
||||
"CMAKE_C_FLAGS": "-march=armv8.7a+fp16 -fvectorize -ffp-model=fast -flto -D_GNU_SOURCE",
|
||||
"CMAKE_CXX_FLAGS": "-march=armv8.7a+fp16 -fvectorize -ffp-model=fast -flto -D_GNU_SOURCE",
|
||||
"CMAKE_C_FLAGS": "-march=armv8.7a+fp16+dotprod+i8mm -fvectorize -ffp-model=fast -flto -D_GNU_SOURCE",
|
||||
"CMAKE_CXX_FLAGS": "-march=armv8.7a+fp16+dotprod+i8mm -fvectorize -ffp-model=fast -flto -D_GNU_SOURCE",
|
||||
"CMAKE_C_FLAGS_RELEASE": "-O3 -DNDEBUG",
|
||||
"CMAKE_CXX_FLAGS_RELEASE": "-O3 -DNDEBUG",
|
||||
"CMAKE_C_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g",
|
||||
|
||||
@@ -24,7 +24,7 @@ Native Windows 11 arm64 builds has the following tools dependencies:
|
||||
- UCRT and Driver Kit
|
||||
- LLVM core libraries and Clang compiler (winget)
|
||||
- CMake, Git, Python (winget)
|
||||
- Hexagon SDK Community Edition 6.4 or later (see windows.md)
|
||||
- Hexagon SDK Community Edition 6.6 or later (see windows.md)
|
||||
- OpenCL SDK 2.3 or later (see windows.md)
|
||||
|
||||
Note: The rest of the **Windows** build process assumes that you're running natively in Powershell.
|
||||
@@ -45,7 +45,7 @@ Preset CMake variables:
|
||||
GGML_HEXAGON="ON"
|
||||
GGML_OPENCL="ON"
|
||||
GGML_OPENMP="OFF"
|
||||
HEXAGON_SDK_ROOT="/opt/hexagon/6.4.0.2"
|
||||
HEXAGON_SDK_ROOT="/opt/hexagon/6.6.0.0"
|
||||
...
|
||||
-- Including OpenCL backend
|
||||
-- Including Hexagon backend
|
||||
|
||||
@@ -28,15 +28,15 @@ c:\Qualcomm\OpenCL_SDK\2.3.2
|
||||
|
||||
Either use the trimmed down version (optimized for CI) from
|
||||
|
||||
https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.4.0.2/hexagon-sdk-v6.4.0.2-arm64-wos.tar.xz
|
||||
https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.6.0.0/hexagon-sdk-v6.6.0.0-arm64-wos.tar.xz
|
||||
|
||||
Or download the complete official version from
|
||||
|
||||
https://softwarecenter.qualcomm.com/catalog/item/Hexagon_SDK?version=6.4.0.2
|
||||
https://softwarecenter.qualcomm.com/catalog/item/Hexagon_SDK?version=6.6.0.0
|
||||
|
||||
Unzip/untar the archive into
|
||||
```
|
||||
c:\Qualcomm\Hexagon_SDK\6.4.0.2
|
||||
c:\Qualcomm\Hexagon_SDK\6.6.0.0
|
||||
```
|
||||
|
||||
## Install the latest Adreno GPU driver
|
||||
@@ -123,10 +123,10 @@ The overall Hexagon backend build procedure for Windows on Snapdragon is the sam
|
||||
However, additional settings are required for generating and signing HTP Ops libraries.
|
||||
```
|
||||
> $env:OPENCL_SDK_ROOT="C:\Qualcomm\OpenCL_SDK\2.3.2"
|
||||
> $env:HEXAGON_SDK_ROOT="C:\Qualcomm\Hexagon_SDK\6.4.0.2"
|
||||
> $env:HEXAGON_TOOLS_ROOT="C:\Qualcomm\Hexagon_SDK\6.4.0.2\tools\HEXAGON_Tools\19.0.04"
|
||||
> $env:HEXAGON_SDK_ROOT="C:\Qualcomm\Hexagon_SDK\6.6.0.0"
|
||||
> $env:HEXAGON_TOOLS_ROOT="C:\Qualcomm\Hexagon_SDK\6.6.0.0\tools\HEXAGON_Tools\19.0.07"
|
||||
> $env:HEXAGON_HTP_CERT="c:\Users\MyUsers\Certs\ggml-htp-v1.pfx"
|
||||
> $env:WINDOWS_SDK_BIN="C:\Program Files (x86)\Windows Kits\10\bin\10.0.26100.0\arm64"
|
||||
> $env:WINDOWS_SDK_BIN="C:\Program Files (x86)\Windows Kits\10\bin\10.0.26100.0"
|
||||
|
||||
> cmake --preset arm64-windows-snapdragon-release -B build-wos
|
||||
...
|
||||
|
||||
@@ -64,7 +64,7 @@ def load_model_and_tokenizer(model_path, use_sentence_transformers=False, device
|
||||
print("Using SentenceTransformer to apply all numbered layers")
|
||||
model = SentenceTransformer(model_path)
|
||||
tokenizer = model.tokenizer
|
||||
config = model[0].auto_model.config
|
||||
config = model[0].auto_model.config # ty: ignore[unresolved-attribute]
|
||||
else:
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
||||
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
|
||||
|
||||
@@ -306,7 +306,7 @@ void ggml_backend_tensor_get_2d_async(ggml_backend_t backend, const struct ggml_
|
||||
GGML_ASSERT(tensor);
|
||||
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
||||
|
||||
if (n_copies <= 1 || backend->iface.set_tensor_2d_async == NULL) {
|
||||
if (n_copies <= 1 || backend->iface.get_tensor_2d_async == NULL) {
|
||||
for (size_t i = 0; i < n_copies; i++) {
|
||||
ggml_backend_tensor_get_async(backend, tensor, (char *) data + i*stride_data, offset + i*stride_tensor, size);
|
||||
}
|
||||
@@ -317,7 +317,7 @@ void ggml_backend_tensor_get_2d_async(ggml_backend_t backend, const struct ggml_
|
||||
}
|
||||
|
||||
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
||||
GGML_ASSERT(offset + (n_copies-1)*stride_tensor + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
||||
GGML_ASSERT(offset + (n_copies-1)*stride_tensor + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
|
||||
backend->iface.get_tensor_2d_async(backend, tensor, data, offset, size, n_copies, stride_tensor, stride_data);
|
||||
}
|
||||
|
||||
|
||||
@@ -852,9 +852,10 @@ static void fa_softmax_thread(unsigned int n, unsigned int i, void * data) {
|
||||
v_s_rowmax1 = hvx_vec_reduce_max_f16(v_s_rowmax1);
|
||||
|
||||
// Splat m_prev[r], m_prev[r+1] from the per-row accumulator.
|
||||
// vror brings the target lane to lane 0, then extract + re-splat.
|
||||
HVX_Vector v_m_prev0 = hvx_vec_splat_f16(hvx_vec_get_f16(Q6_V_vror_VR(m_prev_v, r_vec_off * 2)));
|
||||
HVX_Vector v_m_prev1 = hvx_vec_splat_f16(hvx_vec_get_f16(Q6_V_vror_VR(m_prev_v, (r_vec_off + 1) * 2)));
|
||||
// vror brings the target lane to lane 0, then vdelta replicates it
|
||||
// across all lanes — stays in the vector domain (no store/reload).
|
||||
HVX_Vector v_m_prev0 = hvx_vec_repl_f16(Q6_V_vror_VR(m_prev_v, r_vec_off * 2));
|
||||
HVX_Vector v_m_prev1 = hvx_vec_repl_f16(Q6_V_vror_VR(m_prev_v, (r_vec_off + 1) * 2));
|
||||
|
||||
// HVX max — both operands are splats, so result is splat of m_new.
|
||||
HVX_Vector v_dup_m0 = Q6_Vhf_vmax_VhfVhf(v_m_prev0, v_s_rowmax0);
|
||||
|
||||
@@ -8,7 +8,10 @@ endif()
|
||||
|
||||
find_package(Vulkan COMPONENTS glslc REQUIRED)
|
||||
|
||||
find_package(SPIRV-Headers REQUIRED)
|
||||
if (DEFINED ENV{VULKAN_SDK})
|
||||
list(APPEND CMAKE_PREFIX_PATH "$ENV{VULKAN_SDK}")
|
||||
endif()
|
||||
find_package(SPIRV-Headers CONFIG REQUIRED)
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
# Parallel build object files
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
-r ./requirements-convert_legacy_llama.txt
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
|
||||
## Embedding Gemma requires PyTorch 2.6.0 or later
|
||||
torch~=2.6.0; platform_machine != "s390x"
|
||||
## Embedding Gemma requires PyTorch 2.6.0 or later, bumped to 2.11.0 for compatibility
|
||||
torch==2.11.0; platform_machine != "s390x"
|
||||
|
||||
# torch s390x packages can only be found from nightly builds
|
||||
--extra-index-url https://download.pytorch.org/whl/nightly
|
||||
|
||||
@@ -7,10 +7,10 @@ $ErrorActionPreference = "Stop"
|
||||
$BaseDir = "C:\Qualcomm"
|
||||
|
||||
# SDK 1: Hexagon
|
||||
$HexagonUrl = "https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.4.0.2/hexagon-sdk-v6.4.0.2-arm64-wos.tar.xz"
|
||||
$HexagonUrl = "https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.6.0.0/hexagon-sdk-v6.6.0.0-arm64-wos.tar.xz"
|
||||
$HexagonParent = Join-Path $BaseDir "Hexagon_SDK"
|
||||
$HexagonSdkVersion = "6.4.0.2"
|
||||
$HexagonToolsVersion = "19.0.04"
|
||||
$HexagonSdkVersion = "6.6.0.0"
|
||||
$HexagonToolsVersion = "19.0.07"
|
||||
$HexagonSdkTarget = Join-Path $HexagonParent $HexagonSdkVersion
|
||||
$HexagonToolsTarget = Join-Path $HexagonSdkTarget "\tools\HEXAGON_Tools\$HexagonToolsVersion"
|
||||
|
||||
|
||||
342
scripts/ui-assets.cmake
Normal file
342
scripts/ui-assets.cmake
Normal file
@@ -0,0 +1,342 @@
|
||||
# Provision UI assets and generate ui.cpp/ui.h.
|
||||
#
|
||||
# Asset provisioning priority:
|
||||
# 1. Pre-built assets in SRC_DIST_DIR (manually built by user)
|
||||
# 2. If BUILD_UI=ON: npm build
|
||||
# 3. If above did not produce assets and HF_ENABLED=ON: HF Bucket download
|
||||
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
set(UI_SOURCE_DIR "" CACHE STRING "UI source directory (to run npm build)")
|
||||
set(UI_BINARY_DIR "" CACHE STRING "UI binary directory (to store generated files)")
|
||||
set(LLAMA_SOURCE_DIR "" CACHE STRING "Project source root (to resolve version from git)")
|
||||
set(HF_BUCKET "" CACHE STRING "Hugging Face bucket name")
|
||||
set(HF_VERSION "" CACHE STRING "Version to download (empty = resolve from git)")
|
||||
set(HF_ENABLED "" CACHE STRING "Whether to allow HF Bucket download (ON/OFF)")
|
||||
set(BUILD_UI "" CACHE STRING "Build UI via npm (ON/OFF)")
|
||||
set(LLAMA_UI_EMBED "" CACHE STRING "Path to llama-ui-embed helper")
|
||||
|
||||
set(ASSETS
|
||||
bundle.css
|
||||
bundle.js
|
||||
index.html
|
||||
loading.html
|
||||
)
|
||||
|
||||
set(DIST_DIR "${UI_BINARY_DIR}/dist")
|
||||
set(SRC_DIST_DIR "${UI_SOURCE_DIR}/dist")
|
||||
set(STAMP_FILE "${UI_BINARY_DIR}/.ui-stamp")
|
||||
set(UI_CPP "${UI_BINARY_DIR}/ui.cpp")
|
||||
set(UI_H "${UI_BINARY_DIR}/ui.h")
|
||||
|
||||
function(assets_present out_var)
|
||||
set(present TRUE)
|
||||
foreach(asset ${ASSETS})
|
||||
if(NOT EXISTS "${DIST_DIR}/${asset}")
|
||||
set(present FALSE)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
set(${out_var} ${present} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(copy_src_dist out_var)
|
||||
set(${out_var} FALSE PARENT_SCOPE)
|
||||
|
||||
foreach(asset ${ASSETS})
|
||||
if(NOT EXISTS "${SRC_DIST_DIR}/${asset}")
|
||||
return()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
file(MAKE_DIRECTORY "${DIST_DIR}")
|
||||
message(STATUS "UI: using pre-built assets from ${SRC_DIST_DIR}")
|
||||
foreach(asset ${ASSETS})
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||
"${SRC_DIST_DIR}/${asset}" "${DIST_DIR}/${asset}"
|
||||
)
|
||||
endforeach()
|
||||
set(${out_var} TRUE PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(npm_build_should_skip out_var)
|
||||
set(${out_var} FALSE PARENT_SCOPE)
|
||||
|
||||
assets_present(present)
|
||||
if(NOT present)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if(EXISTS "${STAMP_FILE}")
|
||||
return()
|
||||
endif()
|
||||
|
||||
if(NOT EXISTS "${UI_SOURCE_DIR}/sources.cmake")
|
||||
return()
|
||||
endif()
|
||||
include("${UI_SOURCE_DIR}/sources.cmake")
|
||||
|
||||
set(globs "")
|
||||
foreach(g ${UI_SOURCE_GLOBS})
|
||||
list(APPEND globs "${UI_SOURCE_DIR}/${g}")
|
||||
endforeach()
|
||||
file(GLOB_RECURSE sources ${globs})
|
||||
foreach(f ${UI_SOURCE_FILES})
|
||||
list(APPEND sources "${UI_SOURCE_DIR}/${f}")
|
||||
endforeach()
|
||||
|
||||
file(TIMESTAMP "${DIST_DIR}/index.html" out_ts)
|
||||
|
||||
foreach(s ${sources})
|
||||
if(NOT EXISTS "${s}")
|
||||
continue()
|
||||
endif()
|
||||
file(TIMESTAMP "${s}" s_ts)
|
||||
if(s_ts STRGREATER out_ts)
|
||||
return()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(${out_var} TRUE PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(npm_build out_var)
|
||||
set(${out_var} FALSE PARENT_SCOPE)
|
||||
|
||||
if(NOT EXISTS "${UI_SOURCE_DIR}/package.json")
|
||||
message(STATUS "UI: ${UI_SOURCE_DIR}/package.json not found, skipping npm")
|
||||
return()
|
||||
endif()
|
||||
|
||||
npm_build_should_skip(skip)
|
||||
if(skip)
|
||||
message(STATUS "UI: npm output up-to-date, skipping build")
|
||||
set(${out_var} TRUE PARENT_SCOPE)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if(CMAKE_HOST_WIN32)
|
||||
find_program(NPM_EXECUTABLE NAMES npm.cmd npm.bat npm)
|
||||
else()
|
||||
find_program(NPM_EXECUTABLE npm)
|
||||
endif()
|
||||
if(NOT NPM_EXECUTABLE)
|
||||
message(STATUS "UI: npm not found, skipping npm build")
|
||||
return()
|
||||
endif()
|
||||
|
||||
if(NOT EXISTS "${UI_SOURCE_DIR}/node_modules")
|
||||
message(STATUS "UI: running npm install (first time)")
|
||||
execute_process(
|
||||
COMMAND ${NPM_EXECUTABLE} install
|
||||
WORKING_DIRECTORY "${UI_SOURCE_DIR}"
|
||||
RESULT_VARIABLE rc
|
||||
ERROR_VARIABLE err
|
||||
)
|
||||
if(NOT rc EQUAL 0)
|
||||
message(STATUS "UI: npm install failed (${rc})")
|
||||
message(STATUS " stderr: ${err}")
|
||||
return()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
file(MAKE_DIRECTORY "${DIST_DIR}")
|
||||
|
||||
message(STATUS "UI: running npm run build, output -> ${DIST_DIR}")
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} -E env "LLAMA_UI_OUT_DIR=${DIST_DIR}"
|
||||
${NPM_EXECUTABLE} run build
|
||||
WORKING_DIRECTORY "${UI_SOURCE_DIR}"
|
||||
RESULT_VARIABLE rc
|
||||
ERROR_VARIABLE err
|
||||
)
|
||||
if(NOT rc EQUAL 0)
|
||||
message(STATUS "UI: npm run build failed (${rc})")
|
||||
message(STATUS " stderr: ${err}")
|
||||
return()
|
||||
endif()
|
||||
|
||||
assets_present(present)
|
||||
if(NOT present)
|
||||
message(STATUS "UI: npm build finished but assets missing in ${DIST_DIR}")
|
||||
return()
|
||||
endif()
|
||||
|
||||
message(STATUS "UI: npm build succeeded")
|
||||
file(REMOVE "${STAMP_FILE}")
|
||||
set(${out_var} TRUE PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(resolve_version out_var)
|
||||
if(NOT "${HF_VERSION}" STREQUAL "")
|
||||
set(${out_var} "${HF_VERSION}" PARENT_SCOPE)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if(EXISTS "${LLAMA_SOURCE_DIR}/cmake/build-info.cmake")
|
||||
include("${LLAMA_SOURCE_DIR}/cmake/build-info.cmake")
|
||||
if(NOT "${BUILD_NUMBER}" STREQUAL "" AND NOT BUILD_NUMBER EQUAL 0)
|
||||
set(${out_var} "b${BUILD_NUMBER}" PARENT_SCOPE)
|
||||
return()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(${out_var} "" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(hf_download version out_var out_resolved)
|
||||
set(${out_var} FALSE PARENT_SCOPE)
|
||||
set(${out_resolved} "" PARENT_SCOPE)
|
||||
|
||||
file(MAKE_DIRECTORY "${DIST_DIR}")
|
||||
|
||||
set(candidates "")
|
||||
if(NOT "${version}" STREQUAL "")
|
||||
list(APPEND candidates "${version}")
|
||||
endif()
|
||||
list(APPEND candidates "latest")
|
||||
|
||||
foreach(resolved ${candidates})
|
||||
set(base "https://huggingface.co/buckets/ggml-org/${HF_BUCKET}/resolve/${resolved}")
|
||||
|
||||
message(STATUS "UI: downloading from ${resolved}: ${base}")
|
||||
|
||||
set(ok TRUE)
|
||||
foreach(asset ${ASSETS})
|
||||
file(DOWNLOAD "${base}/${asset}?download=true" "${DIST_DIR}/${asset}"
|
||||
STATUS status TIMEOUT 60
|
||||
)
|
||||
list(GET status 0 rc)
|
||||
if(NOT rc EQUAL 0)
|
||||
list(GET status 1 errmsg)
|
||||
message(STATUS "UI: download ${asset} from ${resolved} failed: ${errmsg}")
|
||||
set(ok FALSE)
|
||||
break()
|
||||
endif()
|
||||
message(STATUS "UI: downloaded ${asset}")
|
||||
endforeach()
|
||||
|
||||
if(NOT ok)
|
||||
continue()
|
||||
endif()
|
||||
|
||||
# Best-effort checksum verification
|
||||
file(DOWNLOAD "${base}/checksums.txt?download=true" "${DIST_DIR}/checksums.txt"
|
||||
STATUS cs_status TIMEOUT 30
|
||||
)
|
||||
list(GET cs_status 0 cs_rc)
|
||||
if(cs_rc EQUAL 0)
|
||||
message(STATUS "UI: verifying checksums")
|
||||
file(STRINGS "${DIST_DIR}/checksums.txt" cs_lines)
|
||||
foreach(asset ${ASSETS})
|
||||
file(SHA256 "${DIST_DIR}/${asset}" h)
|
||||
string(TOLOWER "${h}" h)
|
||||
string(REGEX MATCH "${h}[ \t]+${asset}" m "${cs_lines}")
|
||||
if(NOT m)
|
||||
message(WARNING "UI: checksum verification failed for ${asset}")
|
||||
set(ok FALSE)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
if(ok)
|
||||
message(STATUS "UI: all checksums verified")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ok)
|
||||
set(${out_var} TRUE PARENT_SCOPE)
|
||||
set(${out_resolved} "${resolved}" PARENT_SCOPE)
|
||||
return()
|
||||
endif()
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
function(emit_files)
|
||||
assets_present(present)
|
||||
|
||||
set(args "${UI_CPP}" "${UI_H}")
|
||||
if(present)
|
||||
foreach(asset ${ASSETS})
|
||||
list(APPEND args "${asset}" "${DIST_DIR}/${asset}")
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
execute_process(
|
||||
COMMAND "${LLAMA_UI_EMBED}" ${args}
|
||||
RESULT_VARIABLE rc
|
||||
)
|
||||
if(NOT rc EQUAL 0)
|
||||
message(FATAL_ERROR "UI: llama-ui-embed failed (${rc})")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Priority 1: pre-built assets supplied in tools/ui/dist
|
||||
# ---------------------------------------------------------------------------
|
||||
copy_src_dist(SRC_OK)
|
||||
if(SRC_OK)
|
||||
emit_files()
|
||||
return()
|
||||
endif()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. Priority 2: npm build (if BUILD_UI=ON)
|
||||
# ---------------------------------------------------------------------------
|
||||
set(provisioned FALSE)
|
||||
|
||||
if(BUILD_UI)
|
||||
npm_build(NPM_OK)
|
||||
if(NPM_OK)
|
||||
set(provisioned TRUE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3. Priority 3: HF Bucket download (if npm did not produce assets and HF_ENABLED=ON)
|
||||
# ---------------------------------------------------------------------------
|
||||
if(NOT provisioned AND HF_ENABLED)
|
||||
resolve_version(VERSION)
|
||||
|
||||
set(stamp_ok FALSE)
|
||||
if(EXISTS "${STAMP_FILE}" AND NOT "${VERSION}" STREQUAL "")
|
||||
file(READ "${STAMP_FILE}" stamped)
|
||||
string(STRIP "${stamped}" stamped)
|
||||
if("${stamped}" STREQUAL "${VERSION}")
|
||||
set(stamp_ok TRUE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
assets_present(have_assets)
|
||||
if(stamp_ok AND have_assets)
|
||||
message(STATUS "UI: HF stamp '${stamped}' matches version, skipping HF fetch")
|
||||
set(provisioned TRUE)
|
||||
else()
|
||||
hf_download("${VERSION}" HF_OK HF_RESOLVED)
|
||||
if(HF_OK)
|
||||
file(WRITE "${STAMP_FILE}" "${HF_RESOLVED}")
|
||||
message(STATUS "UI: HF download succeeded, stamp updated (${HF_RESOLVED})")
|
||||
set(provisioned TRUE)
|
||||
else()
|
||||
message(STATUS "UI: HF download failed")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 4. Fallback: warn about stale or missing assets, then emit whatever we have
|
||||
# ---------------------------------------------------------------------------
|
||||
if(NOT provisioned)
|
||||
assets_present(have_assets)
|
||||
if(have_assets)
|
||||
message(WARNING "UI: provisioning failed; embedding stale assets from ${DIST_DIR}")
|
||||
else()
|
||||
message(WARNING "UI: no assets available - building without an embedded UI. "
|
||||
"In a disconnected environment, download the pre-built UI "
|
||||
"from a llama.cpp release at "
|
||||
"https://github.com/ggml-org/llama.cpp/releases and "
|
||||
"extract to tools/ui/dist.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
emit_files()
|
||||
@@ -1,223 +0,0 @@
|
||||
# Download UI assets from Hugging Face Bucket at build time
|
||||
# Usage: cmake -DPUBLIC_DIR=... -DHF_BUCKET=... -DHF_VERSION=... -DASSETS="a;b;c" -P scripts/ui-download.cmake
|
||||
#
|
||||
# Asset provisioning priority:
|
||||
# 1. Pre-built assets already in PUBLIC_DIR (cached from a previous run)
|
||||
# 2. Local npm build (if NPM_DIR is provided and has package.json)
|
||||
# 3. Hugging Face Bucket download (version-specific, then 'latest' fallback)
|
||||
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
set(PUBLIC_DIR "" CACHE STRING "Directory to store/download assets")
|
||||
set(HF_BUCKET "" CACHE STRING "Hugging Face bucket name")
|
||||
set(HF_VERSION "" CACHE STRING "Version to download (empty = resolve from git)")
|
||||
set(ASSETS "" CACHE STRING "Plus-separated list of asset filenames (+)")
|
||||
set(STAMP_FILE "" CACHE STRING "Stamp file to create on success (optional)")
|
||||
set(SOURCE_DIR "" CACHE STRING "Project source root (to resolve version from git)")
|
||||
set(NPM_DIR "" CACHE STRING "UI source directory (to run npm build)")
|
||||
set(HF_ENABLED "" CACHE STRING "Whether to allow HF Bucket download (ON/OFF)")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Resolve version from git if not provided at configure time
|
||||
# ---------------------------------------------------------------------------
|
||||
set(RESOLVED_VERSION "${HF_VERSION}")
|
||||
if("${RESOLVED_VERSION}" STREQUAL "" AND NOT "${SOURCE_DIR}" STREQUAL "")
|
||||
if(EXISTS "${SOURCE_DIR}/cmake/build-info.cmake")
|
||||
include("${SOURCE_DIR}/cmake/build-info.cmake")
|
||||
if(NOT "${BUILD_NUMBER}" STREQUAL "" AND NOT BUILD_NUMBER EQUAL 0)
|
||||
set(RESOLVED_VERSION "b${BUILD_NUMBER}")
|
||||
message(STATUS "UI: resolved version from git: ${RESOLVED_VERSION}")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Convert + back to CMake list (+ is used as separator instead of ; to
|
||||
# avoid platform-specific escaping issues when passing via -D arguments)
|
||||
string(REGEX REPLACE "\\+" ";" ASSETS "${ASSETS}")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. Check stamp freshness — re-download if resolved version changed
|
||||
# ---------------------------------------------------------------------------
|
||||
set(FORCE_REBUILD FALSE)
|
||||
if(NOT "${STAMP_FILE}" STREQUAL "" AND EXISTS "${STAMP_FILE}")
|
||||
file(READ "${STAMP_FILE}" STAMPED_VERSION)
|
||||
string(STRIP "${STAMPED_VERSION}" STAMPED_VERSION)
|
||||
if(NOT "${STAMPED_VERSION}" STREQUAL "${RESOLVED_VERSION}")
|
||||
message(STATUS "UI: version changed (${STAMPED_VERSION} -> ${RESOLVED_VERSION}), re-building")
|
||||
set(FORCE_REBUILD TRUE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3. Check if assets already exist (cached from a previous run)
|
||||
# ---------------------------------------------------------------------------
|
||||
set(ALL_EXISTS TRUE)
|
||||
foreach(asset ${ASSETS})
|
||||
if(NOT EXISTS "${PUBLIC_DIR}/${asset}")
|
||||
set(ALL_EXISTS FALSE)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(ALL_EXISTS AND NOT FORCE_REBUILD)
|
||||
message(STATUS "UI: all assets already exist in ${PUBLIC_DIR}, skipping")
|
||||
return()
|
||||
endif()
|
||||
|
||||
file(MAKE_DIRECTORY "${PUBLIC_DIR}")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 4. Priority 2: build from source via npm (fast path for developers)
|
||||
# ---------------------------------------------------------------------------
|
||||
set(PROVISION_SUCCESS FALSE)
|
||||
|
||||
if(NOT PROVISION_SUCCESS AND NOT "${NPM_DIR}" STREQUAL "")
|
||||
if(EXISTS "${NPM_DIR}/package.json")
|
||||
# Check if npm is available before attempting npm build
|
||||
find_program(NPM_EXECUTABLE npm)
|
||||
if(NPM_EXECUTABLE)
|
||||
message(STATUS "UI: building from source in ${NPM_DIR}")
|
||||
|
||||
# Run npm install if node_modules is missing
|
||||
if(NOT EXISTS "${NPM_DIR}/node_modules")
|
||||
message(STATUS "UI: running npm install (first time)")
|
||||
execute_process(
|
||||
COMMAND ${NPM_EXECUTABLE} install
|
||||
WORKING_DIRECTORY "${NPM_DIR}"
|
||||
RESULT_VARIABLE NPM_INSTALL_RESULT
|
||||
OUTPUT_VARIABLE NPM_OUT
|
||||
ERROR_VARIABLE NPM_ERR
|
||||
)
|
||||
if(NOT NPM_INSTALL_RESULT EQUAL 0)
|
||||
message(STATUS "UI: npm install failed (${NPM_INSTALL_RESULT}), falling back to download")
|
||||
message(STATUS " stderr: ${NPM_ERR}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Run the build
|
||||
execute_process(
|
||||
COMMAND ${NPM_EXECUTABLE} run build
|
||||
WORKING_DIRECTORY "${NPM_DIR}"
|
||||
RESULT_VARIABLE NPM_BUILD_RESULT
|
||||
OUTPUT_VARIABLE NPM_OUT
|
||||
ERROR_VARIABLE NPM_ERR
|
||||
)
|
||||
|
||||
if(NPM_BUILD_RESULT EQUAL 0)
|
||||
# Verify that the expected assets were produced
|
||||
set(ALL_BUILT TRUE)
|
||||
foreach(asset ${ASSETS})
|
||||
if(NOT EXISTS "${PUBLIC_DIR}/${asset}")
|
||||
set(ALL_BUILT FALSE)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(ALL_BUILT)
|
||||
message(STATUS "UI: local npm build succeeded")
|
||||
set(PROVISION_SUCCESS TRUE)
|
||||
else()
|
||||
message(STATUS "UI: npm build completed but assets missing from ${PUBLIC_DIR}, falling back to download")
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "UI: npm build failed (${NPM_BUILD_RESULT}), falling back to download")
|
||||
message(STATUS " stderr: ${NPM_ERR}")
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "UI: npm not found, skipping npm build and trying HF Bucket download")
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "UI: NPM_DIR (${NPM_DIR}) has no package.json, skipping npm build")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 5. Priority 3: download from Hugging Face Bucket (if enabled)
|
||||
# ---------------------------------------------------------------------------
|
||||
if(NOT PROVISION_SUCCESS AND HF_ENABLED)
|
||||
# Build list of URLs to try — version-specific first, then 'latest'
|
||||
set(URL_ENTRIES "")
|
||||
if(NOT "${RESOLVED_VERSION}" STREQUAL "")
|
||||
list(APPEND URL_ENTRIES
|
||||
"version:https://huggingface.co/buckets/ggml-org/${HF_BUCKET}/resolve/${RESOLVED_VERSION}")
|
||||
endif()
|
||||
list(APPEND URL_ENTRIES
|
||||
"latest:https://huggingface.co/buckets/ggml-org/${HF_BUCKET}/resolve/latest")
|
||||
|
||||
foreach(entry ${URL_ENTRIES})
|
||||
string(REGEX REPLACE "^([^:]+):.*$" "\\1" url_label "${entry}")
|
||||
string(REGEX REPLACE "^[^:]+:(.*)$" "\\1" base_url "${entry}")
|
||||
|
||||
message(STATUS "UI: downloading assets from ${url_label}: ${base_url}")
|
||||
|
||||
# Download each asset
|
||||
set(ALL_OK TRUE)
|
||||
foreach(asset ${ASSETS})
|
||||
set(download_url "${base_url}/${asset}?download=true")
|
||||
set(download_path "${PUBLIC_DIR}/${asset}")
|
||||
file(DOWNLOAD "${download_url}" "${download_path}"
|
||||
STATUS download_status TIMEOUT 60
|
||||
)
|
||||
list(GET download_status 0 download_result)
|
||||
if(NOT download_result EQUAL 0)
|
||||
list(GET download_status 1 error_message)
|
||||
message(STATUS "UI: failed to download ${asset} from ${url_label}: ${error_message}")
|
||||
set(ALL_OK FALSE)
|
||||
break()
|
||||
endif()
|
||||
message(STATUS "UI: downloaded ${asset}")
|
||||
endforeach()
|
||||
|
||||
if(NOT ALL_OK)
|
||||
continue()
|
||||
endif()
|
||||
|
||||
# Verify checksums if the server provides them
|
||||
file(DOWNLOAD "${base_url}/checksums.txt?download=true"
|
||||
"${PUBLIC_DIR}/checksums.txt"
|
||||
STATUS checksum_status TIMEOUT 30
|
||||
)
|
||||
list(GET checksum_status 0 checksum_result)
|
||||
if(checksum_result EQUAL 0)
|
||||
message(STATUS "UI: verifying checksums...")
|
||||
file(STRINGS "${PUBLIC_DIR}/checksums.txt" CHECKSUMS_CONTENT)
|
||||
foreach(asset ${ASSETS})
|
||||
set(download_path "${PUBLIC_DIR}/${asset}")
|
||||
file(SHA256 "${download_path}" asset_hash)
|
||||
string(TOLOWER "${asset_hash}" EXPECTED_HASH_LOWER)
|
||||
string(REGEX MATCH "${EXPECTED_HASH_LOWER}[ \\t]+${asset}" CHECKSUM_LINE "${CHECKSUMS_CONTENT}")
|
||||
if(NOT CHECKSUM_LINE)
|
||||
message(WARNING "UI: checksum verification failed for ${asset}")
|
||||
set(ALL_OK FALSE)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
if(ALL_OK)
|
||||
message(STATUS "UI: all checksums verified")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ALL_OK)
|
||||
set(PROVISION_SUCCESS TRUE)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(PROVISION_SUCCESS)
|
||||
message(STATUS "UI: provisioning complete")
|
||||
else()
|
||||
message(WARNING "UI: failed to download assets from HF Bucket (${HF_BUCKET})")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 6. Write stamp file on success (stores resolved version for freshness check)
|
||||
# ---------------------------------------------------------------------------
|
||||
if(PROVISION_SUCCESS)
|
||||
if(NOT "${STAMP_FILE}" STREQUAL "")
|
||||
file(WRITE "${STAMP_FILE}" "${RESOLVED_VERSION}")
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "UI: no source available. Neither local build (${NPM_DIR}) nor HF Bucket download succeeded.")
|
||||
message(WARNING "UI: building server without embedded UI. Set LLAMA_BUILD_UI=OFF to suppress this warning.")
|
||||
endif()
|
||||
@@ -1,16 +0,0 @@
|
||||
# CMake equivalent of `xxd -i ${INPUT} ${OUTPUT}`
|
||||
# Usage: cmake -DINPUT=build/tools/ui/dist/index.html -DOUTPUT=build/tools/ui/dist/index.html.hpp -P scripts/xxd.cmake
|
||||
|
||||
SET(INPUT "" CACHE STRING "Input File")
|
||||
SET(OUTPUT "" CACHE STRING "Output File")
|
||||
|
||||
get_filename_component(filename "${INPUT}" NAME)
|
||||
string(REGEX REPLACE "\\.|-" "_" name "${filename}")
|
||||
|
||||
file(READ "${INPUT}" hex_data HEX)
|
||||
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," hex_sequence "${hex_data}")
|
||||
|
||||
string(LENGTH ${hex_data} hex_len)
|
||||
math(EXPR len "${hex_len} / 2")
|
||||
|
||||
file(WRITE "${OUTPUT}" "unsigned char ${name}[] = {${hex_sequence}};\nunsigned int ${name}_len = ${len};\n")
|
||||
@@ -1334,6 +1334,12 @@ bool llama_model_base::load_tensors(llama_model_loader & ml) {
|
||||
if (!layer.ssm_beta_s && layer.ssm_beta) {
|
||||
layer.ssm_beta_s = create_tensor(tn(LLM_TENSOR_SSM_BETA, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.nextn.eh_proj_s && layer.nextn.eh_proj) {
|
||||
layer.nextn.eh_proj_s = create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.nextn.shared_head_head_s && layer.nextn.shared_head_head) {
|
||||
layer.nextn.shared_head_head_s = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
|
||||
// input scales
|
||||
if (!layer.wq_in_s && layer.wq) {
|
||||
@@ -1393,6 +1399,12 @@ bool llama_model_base::load_tensors(llama_model_loader & ml) {
|
||||
if (!layer.ssm_beta_in_s && layer.ssm_beta) {
|
||||
layer.ssm_beta_in_s = create_tensor(tn(LLM_TENSOR_SSM_BETA, "input_scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.nextn.eh_proj_in_s && layer.nextn.eh_proj) {
|
||||
layer.nextn.eh_proj_in_s = create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "input_scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.nextn.shared_head_head_in_s && layer.nextn.shared_head_head) {
|
||||
layer.nextn.shared_head_head_in_s = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "input_scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
}
|
||||
// output scales
|
||||
if (output && output->type == GGML_TYPE_NVFP4) {
|
||||
|
||||
@@ -202,12 +202,16 @@ struct llama_layer_shortconv {
|
||||
};
|
||||
|
||||
struct llama_layer_nextn {
|
||||
struct ggml_tensor * eh_proj = nullptr;
|
||||
struct ggml_tensor * embed_tokens = nullptr;
|
||||
struct ggml_tensor * enorm = nullptr;
|
||||
struct ggml_tensor * hnorm = nullptr;
|
||||
struct ggml_tensor * shared_head_head = nullptr;
|
||||
struct ggml_tensor * shared_head_norm = nullptr;
|
||||
struct ggml_tensor * eh_proj = nullptr;
|
||||
struct ggml_tensor * eh_proj_s = nullptr;
|
||||
struct ggml_tensor * eh_proj_in_s = nullptr;
|
||||
struct ggml_tensor * embed_tokens = nullptr;
|
||||
struct ggml_tensor * enorm = nullptr;
|
||||
struct ggml_tensor * hnorm = nullptr;
|
||||
struct ggml_tensor * shared_head_head = nullptr;
|
||||
struct ggml_tensor * shared_head_head_s = nullptr;
|
||||
struct ggml_tensor * shared_head_head_in_s = nullptr;
|
||||
struct ggml_tensor * shared_head_norm = nullptr;
|
||||
};
|
||||
|
||||
struct llama_layer {
|
||||
|
||||
@@ -538,7 +538,7 @@ llama_model_qwen35::graph_mtp::graph_mtp(const llama_model & model, const llm_gr
|
||||
ggml_tensor * concat = ggml_concat(ctx0, e_norm, h_norm, /*dim=*/ 0);
|
||||
cb(concat, "mtp_concat", il);
|
||||
|
||||
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat);
|
||||
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat, layer.nextn.eh_proj_s);
|
||||
cb(cur, "mtp_eh_proj", il);
|
||||
|
||||
ggml_tensor * inpSA = cur;
|
||||
@@ -626,8 +626,9 @@ llama_model_qwen35::graph_mtp::graph_mtp(const llama_model & model, const llm_gr
|
||||
cb(cur, "mtp_shared_head_norm", -1);
|
||||
|
||||
ggml_tensor * head_w = layer.nextn.shared_head_head ? layer.nextn.shared_head_head : model.output;
|
||||
ggml_tensor * head_s = layer.nextn.shared_head_head ? layer.nextn.shared_head_head_s : model.output_s;
|
||||
GGML_ASSERT(head_w && "QWEN35 MTP: missing LM head (nextn.shared_head_head or model.output)");
|
||||
cur = build_lora_mm(head_w, cur);
|
||||
cur = build_lora_mm(head_w, cur, head_s);
|
||||
cb(cur, "result_output", -1);
|
||||
|
||||
res->t_logits = cur;
|
||||
|
||||
@@ -602,7 +602,7 @@ llama_model_qwen35moe::graph_mtp::graph_mtp(const llama_model & model, const llm
|
||||
ggml_tensor * concat = ggml_concat(ctx0, e_norm, h_norm, /*dim=*/ 0);
|
||||
cb(concat, "mtp_concat", il);
|
||||
|
||||
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat);
|
||||
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat, layer.nextn.eh_proj_s);
|
||||
cb(cur, "mtp_eh_proj", il);
|
||||
|
||||
ggml_tensor * inpSA = cur;
|
||||
@@ -722,8 +722,9 @@ llama_model_qwen35moe::graph_mtp::graph_mtp(const llama_model & model, const llm
|
||||
cb(cur, "mtp_shared_head_norm", -1);
|
||||
|
||||
ggml_tensor * head_w = layer.nextn.shared_head_head ? layer.nextn.shared_head_head : model.output;
|
||||
ggml_tensor * head_s = layer.nextn.shared_head_head ? layer.nextn.shared_head_head_s : model.output_s;
|
||||
GGML_ASSERT(head_w && "QWEN35MOE MTP: missing LM head (nextn.shared_head_head or model.output)");
|
||||
cur = build_lora_mm(head_w, cur);
|
||||
cur = build_lora_mm(head_w, cur, head_s);
|
||||
cb(cur, "result_output", -1);
|
||||
|
||||
res->t_logits = cur;
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
-r ../../requirements/requirements-convert_legacy_llama.txt
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
pillow~=11.3.0
|
||||
torch~=2.6.0
|
||||
torchvision~=0.21.0
|
||||
|
||||
## Embedding Gemma requires PyTorch 2.6.0 or later, bumped to 2.11.0 for compatibility
|
||||
torch==2.11.0; platform_machine != "s390x" # check_requirements: ignore "=="
|
||||
torchvision==0.26.0; platform_machine != "s390x" # check_requirements: ignore "=="
|
||||
|
||||
# torch s390x packages can only be found from nightly builds
|
||||
--extra-index-url https://download.pytorch.org/whl/nightly
|
||||
torch>=0.0.0.dev0; platform_machine == "s390x" # check_requirements: ignore "=="
|
||||
torchvision>=0.0.0.dev0; platform_machine == "s390x" # check_requirements: ignore "=="
|
||||
|
||||
@@ -231,16 +231,19 @@ bool server_http_context::init(const common_params & params) {
|
||||
};
|
||||
|
||||
auto middleware_server_state = [this](const httplib::Request & req, httplib::Response & res) {
|
||||
(void)req; // suppress unused parameter warning when LLAMA_BUILD_UI is not defined
|
||||
bool ready = is_ready.load();
|
||||
if (!ready) {
|
||||
#if defined(LLAMA_BUILD_UI)
|
||||
#if defined(LLAMA_UI_HAS_ASSETS)
|
||||
auto tmp = string_split<std::string>(req.path, '.');
|
||||
if (req.path == "/" || (tmp.size() > 0 && tmp.back() == "html")) {
|
||||
res.status = 503;
|
||||
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
|
||||
return false;
|
||||
if (const llama_ui_asset * a = llama_ui_find_asset("loading.html")) {
|
||||
res.status = 503;
|
||||
res.set_content(reinterpret_cast<const char*>(a->data), a->size, "text/html; charset=utf-8");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#else
|
||||
(void)req;
|
||||
#endif
|
||||
// no endpoints are allowed to be accessed when the server is not ready
|
||||
// this is to prevent any data races or inconsistent states
|
||||
@@ -312,23 +315,27 @@ bool server_http_context::init(const common_params & params) {
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
#if defined(LLAMA_BUILD_UI)
|
||||
// using embedded static index.html
|
||||
srv->Get(params.api_prefix + "/", [](const httplib::Request & /*req*/, httplib::Response & res) {
|
||||
// COEP and COOP headers, required by pyodide (python interpreter)
|
||||
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
|
||||
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
|
||||
res.set_content(reinterpret_cast<const char*>(index_html), index_html_len, "text/html; charset=utf-8");
|
||||
return false;
|
||||
});
|
||||
srv->Get(params.api_prefix + "/bundle.js", [](const httplib::Request & /*req*/, httplib::Response & res) {
|
||||
res.set_content(reinterpret_cast<const char*>(bundle_js), bundle_js_len, "application/javascript; charset=utf-8");
|
||||
return false;
|
||||
});
|
||||
srv->Get(params.api_prefix + "/bundle.css", [](const httplib::Request & /*req*/, httplib::Response & res) {
|
||||
res.set_content(reinterpret_cast<const char*>(bundle_css), bundle_css_len, "text/css; charset=utf-8");
|
||||
return false;
|
||||
});
|
||||
#if defined(LLAMA_UI_HAS_ASSETS)
|
||||
auto serve_asset = [](const std::string & name, const char * mime, bool with_isolation_headers) {
|
||||
return [name, mime, with_isolation_headers](const httplib::Request & /*req*/, httplib::Response & res) {
|
||||
const llama_ui_asset * a = llama_ui_find_asset(name.c_str());
|
||||
if (!a) {
|
||||
res.status = 404;
|
||||
return false;
|
||||
}
|
||||
if (with_isolation_headers) {
|
||||
// COEP and COOP headers, required by pyodide (python interpreter)
|
||||
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
|
||||
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
|
||||
}
|
||||
res.set_content(reinterpret_cast<const char*>(a->data), a->size, mime);
|
||||
return false;
|
||||
};
|
||||
};
|
||||
|
||||
srv->Get(params.api_prefix + "/", serve_asset("index.html", "text/html; charset=utf-8", true));
|
||||
srv->Get(params.api_prefix + "/bundle.js", serve_asset("bundle.js", "application/javascript; charset=utf-8", false));
|
||||
srv->Get(params.api_prefix + "/bundle.css", serve_asset("bundle.css", "text/css; charset=utf-8", false));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,150 +1,98 @@
|
||||
set(TARGET llama-ui)
|
||||
|
||||
# Deprecated: use LLAMA_UI_HF_BUCKET instead
|
||||
set(LLAMA_WEBUI_HF_BUCKET "llama-ui" CACHE STRING "Hugging Face bucket name for prebuilt webui assets (deprecated: use LLAMA_UI_HF_BUCKET)")
|
||||
set(LLAMA_UI_HF_BUCKET "llama-ui" CACHE STRING "Hugging Face bucket name for prebuilt UI assets")
|
||||
|
||||
# Backward compat: forward old var to new one
|
||||
if(DEFINED LLAMA_WEBUI_HF_BUCKET AND NOT DEFINED LLAMA_UI_HF_BUCKET)
|
||||
if(DEFINED LLAMA_BUILD_WEBUI)
|
||||
set(LLAMA_BUILD_UI ${LLAMA_BUILD_WEBUI})
|
||||
message(DEPRECATION "LLAMA_BUILD_WEBUI is deprecated, use LLAMA_BUILD_UI instead")
|
||||
endif()
|
||||
if(DEFINED LLAMA_USE_PREBUILT_WEBUI)
|
||||
set(LLAMA_USE_PREBUILT_UI ${LLAMA_USE_PREBUILT_WEBUI})
|
||||
message(DEPRECATION "LLAMA_USE_PREBUILT_WEBUI is deprecated, use LLAMA_USE_PREBUILT_UI instead")
|
||||
endif()
|
||||
if(DEFINED LLAMA_WEBUI_HF_BUCKET)
|
||||
set(LLAMA_UI_HF_BUCKET ${LLAMA_WEBUI_HF_BUCKET})
|
||||
elseif(DEFINED LLAMA_WEBUI_HF_BUCKET AND NOT "${LLAMA_WEBUI_HF_BUCKET}" STREQUAL "${LLAMA_UI_HF_BUCKET}")
|
||||
message(DEPRECATION "LLAMA_WEBUI_HF_BUCKET is deprecated, use LLAMA_UI_HF_BUCKET instead")
|
||||
endif()
|
||||
|
||||
set(TARGET_SRCS "")
|
||||
set(UI_COMPILE_DEFS "")
|
||||
|
||||
if(LLAMA_BUILD_UI)
|
||||
set(PUBLIC_ASSETS
|
||||
index.html
|
||||
bundle.js
|
||||
bundle.css
|
||||
loading.html
|
||||
)
|
||||
|
||||
# Determine source of UI assets (priority: local > HF Bucket)
|
||||
set(UI_SOURCE "")
|
||||
set(UI_SOURCE_DIR "")
|
||||
|
||||
# Priority 1: Check for local build output
|
||||
set(LOCAL_UI_DIR "${PROJECT_SOURCE_DIR}/build/tools/ui/dist")
|
||||
|
||||
# Verify all required assets exist before declaring local source valid
|
||||
set(ALL_ASSETS_PRESENT TRUE)
|
||||
foreach(asset ${PUBLIC_ASSETS})
|
||||
if(NOT EXISTS "${LOCAL_UI_DIR}/${asset}")
|
||||
set(ALL_ASSETS_PRESENT FALSE)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(ALL_ASSETS_PRESENT)
|
||||
set(UI_SOURCE "local")
|
||||
set(UI_SOURCE_DIR "${LOCAL_UI_DIR}")
|
||||
message(STATUS "UI: using local build from ${UI_SOURCE_DIR}")
|
||||
endif()
|
||||
|
||||
# Priority 2: Build-time asset provisioning (npm build → HF Bucket fallback)
|
||||
if(NOT UI_SOURCE_DIR)
|
||||
# Environment variable takes precedence (e.g., from CI workflows)
|
||||
# Deprecated: use HF_UI_VERSION instead
|
||||
if(DEFINED ENV{HF_WEBUI_VERSION})
|
||||
set(HF_UI_VERSION "$ENV{HF_WEBUI_VERSION}")
|
||||
message(DEPRECATION "HF_WEBUI_VERSION env var is deprecated, use HF_UI_VERSION instead")
|
||||
if(NOT HF_UI_VERSION MATCHES "^[A-Za-z0-9._-]+$")
|
||||
message(FATAL_ERROR "UI: invalid HF_WEBUI_VERSION='${HF_UI_VERSION}' - must match ^[A-Za-z0-9._-]+$")
|
||||
endif()
|
||||
elseif(DEFINED ENV{HF_UI_VERSION})
|
||||
set(HF_UI_VERSION "$ENV{HF_UI_VERSION}")
|
||||
if(NOT HF_UI_VERSION MATCHES "^[A-Za-z0-9._-]+$")
|
||||
message(FATAL_ERROR "UI: invalid HF_UI_VERSION='${HF_UI_VERSION}' - must match ^[A-Za-z0-9._-]+$")
|
||||
endif()
|
||||
elseif(DEFINED LLAMA_BUILD_NUMBER)
|
||||
set(HF_UI_VERSION "b${LLAMA_BUILD_NUMBER}")
|
||||
message(STATUS "UI: derived HF_UI_VERSION=b${LLAMA_BUILD_NUMBER}")
|
||||
else()
|
||||
set(HF_UI_VERSION "")
|
||||
message(STATUS "UI: version not specified (will use HF 'latest')")
|
||||
endif()
|
||||
|
||||
if("${HF_UI_VERSION}" STREQUAL "")
|
||||
set(UI_VERSION_TAG "provisioned")
|
||||
else()
|
||||
set(UI_VERSION_TAG "${HF_UI_VERSION}")
|
||||
endif()
|
||||
set(UI_STAMP "${CMAKE_CURRENT_BINARY_DIR}/.ui-${UI_VERSION_TAG}.stamp")
|
||||
|
||||
string(REPLACE ";" "+" PUBLIC_ASSETS_JOINED "${PUBLIC_ASSETS}")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${UI_STAMP}
|
||||
COMMAND ${CMAKE_COMMAND}
|
||||
"-DSOURCE_DIR=${PROJECT_SOURCE_DIR}"
|
||||
"-DPUBLIC_DIR=${PROJECT_SOURCE_DIR}/build/tools/ui/dist"
|
||||
"-DHF_BUCKET=${LLAMA_UI_HF_BUCKET}"
|
||||
"-DHF_VERSION=${HF_UI_VERSION}"
|
||||
"-DHF_ENABLED=${LLAMA_USE_PREBUILT_UI}"
|
||||
"-DASSETS=${PUBLIC_ASSETS_JOINED}"
|
||||
"-DSTAMP_FILE=${UI_STAMP}"
|
||||
"-DNPM_DIR=${PROJECT_SOURCE_DIR}/tools/ui"
|
||||
-P ${PROJECT_SOURCE_DIR}/scripts/ui-download.cmake
|
||||
COMMENT "Building/provisioning UI assets (npm build -> HF Bucket fallback)"
|
||||
)
|
||||
|
||||
set(UI_SOURCE "provisioned")
|
||||
set(UI_SOURCE_DIR "${PROJECT_SOURCE_DIR}/build/tools/ui/dist")
|
||||
endif()
|
||||
|
||||
# Process assets from the determined source
|
||||
if(UI_SOURCE_DIR)
|
||||
foreach(asset ${PUBLIC_ASSETS})
|
||||
set(input "${UI_SOURCE_DIR}/${asset}")
|
||||
set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
|
||||
list(APPEND TARGET_SRCS ${output})
|
||||
|
||||
if(UI_SOURCE STREQUAL "local")
|
||||
if(NOT EXISTS "${input}")
|
||||
message(FATAL_ERROR "UI asset not found: ${input}")
|
||||
endif()
|
||||
set(dependency "${input}")
|
||||
else()
|
||||
set(dependency "${UI_STAMP}")
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
DEPENDS ${dependency}
|
||||
OUTPUT "${output}"
|
||||
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
|
||||
)
|
||||
set_source_files_properties(${output} PROPERTIES GENERATED TRUE)
|
||||
endforeach()
|
||||
|
||||
list(APPEND UI_COMPILE_DEFS
|
||||
LLAMA_BUILD_UI
|
||||
LLAMA_UI_DEFAULT_ENABLED=1
|
||||
)
|
||||
message(STATUS "UI: embedded with source: ${UI_SOURCE}")
|
||||
else()
|
||||
message(WARNING "UI: no source available. Neither local build (build/tools/ui/dist/) nor HF Bucket download succeeded.")
|
||||
message(WARNING "UI: building server without embedded UI. Set LLAMA_BUILD_UI=OFF to suppress this warning.")
|
||||
list(APPEND UI_COMPILE_DEFS LLAMA_UI_DEFAULT_ENABLED=0)
|
||||
endif()
|
||||
# Resolve HF asset version: explicit env var > derived from build number > unset
|
||||
if(DEFINED ENV{HF_WEBUI_VERSION})
|
||||
set(HF_UI_VERSION "$ENV{HF_WEBUI_VERSION}")
|
||||
message(DEPRECATION "HF_WEBUI_VERSION env var is deprecated, use HF_UI_VERSION instead")
|
||||
elseif(DEFINED ENV{HF_UI_VERSION})
|
||||
set(HF_UI_VERSION "$ENV{HF_UI_VERSION}")
|
||||
elseif(DEFINED LLAMA_BUILD_NUMBER)
|
||||
set(HF_UI_VERSION "b${LLAMA_BUILD_NUMBER}")
|
||||
else()
|
||||
list(APPEND UI_COMPILE_DEFS LLAMA_UI_DEFAULT_ENABLED=0)
|
||||
set(HF_UI_VERSION "")
|
||||
endif()
|
||||
|
||||
# Build the static library
|
||||
add_library(${TARGET} STATIC ui.cpp)
|
||||
if(NOT "${HF_UI_VERSION}" STREQUAL "" AND NOT HF_UI_VERSION MATCHES "^[A-Za-z0-9._-]+$")
|
||||
message(FATAL_ERROR "UI: invalid HF_UI_VERSION='${HF_UI_VERSION}' - must match ^[A-Za-z0-9._-]+$")
|
||||
endif()
|
||||
|
||||
target_include_directories(${TARGET} PUBLIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
set(UI_CPP "${CMAKE_CURRENT_BINARY_DIR}/ui.cpp")
|
||||
set(UI_H "${CMAKE_CURRENT_BINARY_DIR}/ui.h")
|
||||
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
find_program(HOST_CXX_COMPILER NAMES g++ clang++ NO_CMAKE_FIND_ROOT_PATH)
|
||||
if(NOT HOST_CXX_COMPILER)
|
||||
message(FATAL_ERROR "UI: no host C++ compiler (g++/clang++) found to build llama-ui-embed; set -DHOST_CXX_COMPILER=<path>")
|
||||
endif()
|
||||
message(STATUS "UI: building llama-ui-embed with host compiler ${HOST_CXX_COMPILER}")
|
||||
|
||||
if(CMAKE_HOST_WIN32)
|
||||
set(LLAMA_UI_EMBED_EXE "${CMAKE_CURRENT_BINARY_DIR}/llama-ui-embed.exe")
|
||||
else()
|
||||
set(LLAMA_UI_EMBED_EXE "${CMAKE_CURRENT_BINARY_DIR}/llama-ui-embed")
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT "${LLAMA_UI_EMBED_EXE}"
|
||||
COMMAND "${HOST_CXX_COMPILER}" -O2 -std=c++17
|
||||
-o "${LLAMA_UI_EMBED_EXE}" "${CMAKE_CURRENT_SOURCE_DIR}/embed.cpp"
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/embed.cpp"
|
||||
COMMENT "Building llama-ui-embed (host)"
|
||||
VERBATIM
|
||||
)
|
||||
add_custom_target(llama-ui-embed DEPENDS "${LLAMA_UI_EMBED_EXE}")
|
||||
else()
|
||||
add_executable(llama-ui-embed embed.cpp)
|
||||
target_compile_features(llama-ui-embed PRIVATE cxx_std_17)
|
||||
set_target_properties(llama-ui-embed PROPERTIES
|
||||
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
)
|
||||
set(LLAMA_UI_EMBED_EXE "$<TARGET_FILE:llama-ui-embed>")
|
||||
endif()
|
||||
|
||||
# Run the provisioning script every build so source changes in tools/ui/ are
|
||||
# always picked up. The script uses copy_if_different for ui.cpp/ui.h, so the
|
||||
# library only recompiles when contents actually change.
|
||||
add_custom_target(llama-ui-assets ALL
|
||||
BYPRODUCTS ${UI_CPP} ${UI_H}
|
||||
COMMAND ${CMAKE_COMMAND}
|
||||
"-DUI_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
"-DUI_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}"
|
||||
"-DLLAMA_SOURCE_DIR=${PROJECT_SOURCE_DIR}"
|
||||
"-DHF_BUCKET=${LLAMA_UI_HF_BUCKET}"
|
||||
"-DHF_VERSION=${HF_UI_VERSION}"
|
||||
"-DHF_ENABLED=${LLAMA_USE_PREBUILT_UI}"
|
||||
"-DBUILD_UI=${LLAMA_BUILD_UI}"
|
||||
"-DLLAMA_UI_EMBED=${LLAMA_UI_EMBED_EXE}"
|
||||
-P "${PROJECT_SOURCE_DIR}/scripts/ui-assets.cmake"
|
||||
COMMENT "Provisioning UI assets"
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
target_compile_definitions(${TARGET} PUBLIC ${UI_COMPILE_DEFS})
|
||||
add_dependencies(llama-ui-assets llama-ui-embed)
|
||||
|
||||
if(TARGET_SRCS)
|
||||
# List generated .hpp files as sources so CMake tracks them as build dependencies
|
||||
target_sources(${TARGET} PRIVATE ${TARGET_SRCS})
|
||||
set_source_files_properties(${TARGET_SRCS} PROPERTIES HEADER_FILE_ONLY TRUE)
|
||||
endif()
|
||||
set_source_files_properties(${UI_CPP} ${UI_H} PROPERTIES GENERATED TRUE)
|
||||
|
||||
add_library(${TARGET} STATIC ${UI_CPP} ${UI_H})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||
add_dependencies(${TARGET} llama-ui-assets)
|
||||
|
||||
target_include_directories(${TARGET} PUBLIC
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
)
|
||||
|
||||
144
tools/ui/embed.cpp
Normal file
144
tools/ui/embed.cpp
Normal file
@@ -0,0 +1,144 @@
|
||||
// llama-ui-embed: generate ui.cpp / ui.h that embed UI assets as C arrays.
|
||||
//
|
||||
// Usage:
|
||||
// llama-ui-embed <out_cpp> <out_h> [<asset_name> <asset_path>]...
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
static bool read_file(const std::string & path, std::vector<unsigned char> & out) {
|
||||
std::ifstream f(path, std::ios::binary | std::ios::ate);
|
||||
if (!f) {
|
||||
fprintf(stderr, "embed: cannot open %s\n", path.c_str());
|
||||
return false;
|
||||
}
|
||||
const auto sz = f.tellg();
|
||||
if (sz < 0) {
|
||||
return false;
|
||||
}
|
||||
f.seekg(0);
|
||||
out.resize(static_cast<size_t>(sz));
|
||||
if (sz > 0 && !f.read(reinterpret_cast<char *>(out.data()), sz)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void append_bytes_hex(std::string & out, const std::vector<unsigned char> & bytes) {
|
||||
static const char hex[] = "0123456789abcdef";
|
||||
out.reserve(out.size() + bytes.size() * 5);
|
||||
for (unsigned char b : bytes) {
|
||||
out += '0';
|
||||
out += 'x';
|
||||
out += hex[b >> 4];
|
||||
out += hex[b & 0xf];
|
||||
out += ',';
|
||||
}
|
||||
}
|
||||
|
||||
static bool write_if_different(const std::string & path, const std::string & content) {
|
||||
std::ifstream f(path, std::ios::binary | std::ios::ate);
|
||||
if (f) {
|
||||
const auto sz = f.tellg();
|
||||
if (sz >= 0 && static_cast<size_t>(sz) == content.size()) {
|
||||
std::string existing(static_cast<size_t>(sz), '\0');
|
||||
f.seekg(0);
|
||||
if (sz == 0 || f.read(existing.data(), sz)) {
|
||||
if (existing == content) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::ofstream out(path, std::ios::binary | std::ios::trunc);
|
||||
if (!out) {
|
||||
fprintf(stderr, "embed: cannot write %s\n", path.c_str());
|
||||
return false;
|
||||
}
|
||||
if (!content.empty()) {
|
||||
out.write(content.data(), static_cast<std::streamsize>(content.size()));
|
||||
}
|
||||
return out.good();
|
||||
}
|
||||
|
||||
static std::string fmt(const char * pattern, ...) {
|
||||
char tmp[512];
|
||||
va_list ap;
|
||||
va_start(ap, pattern);
|
||||
const int n = vsnprintf(tmp, sizeof(tmp), pattern, ap);
|
||||
va_end(ap);
|
||||
return (n > 0) ? std::string(tmp, static_cast<size_t>(n)) : std::string();
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
if (argc < 3 || ((argc - 3) % 2) != 0) {
|
||||
fprintf(stderr, "usage: %s <out_cpp> <out_h> [<name> <path>]...\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const std::string out_cpp = argv[1];
|
||||
const std::string out_h = argv[2];
|
||||
const int n_assets = (argc - 3) / 2;
|
||||
|
||||
std::string h;
|
||||
h += "#pragma once\n\n#include <stddef.h>\n\n";
|
||||
if (n_assets > 0) {
|
||||
h += "#define LLAMA_UI_HAS_ASSETS 1\n\n";
|
||||
}
|
||||
h +=
|
||||
"struct llama_ui_asset {\n"
|
||||
" const char * name;\n"
|
||||
" const unsigned char * data;\n"
|
||||
" size_t size;\n"
|
||||
"};\n\n"
|
||||
"const llama_ui_asset * llama_ui_find_asset(const char * name);\n";
|
||||
|
||||
std::string cpp;
|
||||
cpp += "#include \"ui.h\"\n\n#include <string.h>\n\n";
|
||||
|
||||
if (n_assets > 0) {
|
||||
for (int i = 0; i < n_assets; i++) {
|
||||
const char * path = argv[3 + i * 2 + 1];
|
||||
std::vector<unsigned char> bytes;
|
||||
if (!read_file(path, bytes)) {
|
||||
return 1;
|
||||
}
|
||||
cpp += fmt("static const unsigned char asset_%d_data[] = {", i);
|
||||
append_bytes_hex(cpp, bytes);
|
||||
cpp += fmt("};\nstatic const size_t asset_%d_size = %lu;\n\n",
|
||||
i, static_cast<unsigned long>(bytes.size()));
|
||||
}
|
||||
|
||||
cpp += "static const llama_ui_asset g_assets[] = {\n";
|
||||
for (int i = 0; i < n_assets; i++) {
|
||||
const char * name = argv[3 + i * 2];
|
||||
cpp += fmt(" { \"%s\", asset_%d_data, asset_%d_size },\n", name, i, i);
|
||||
}
|
||||
cpp += "};\n\n";
|
||||
|
||||
cpp +=
|
||||
"const llama_ui_asset * llama_ui_find_asset(const char * name) {\n"
|
||||
" for (const auto & a : g_assets) {\n"
|
||||
" if (strcmp(a.name, name) == 0) {\n"
|
||||
" return &a;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" return nullptr;\n"
|
||||
"}\n";
|
||||
} else {
|
||||
cpp +=
|
||||
"const llama_ui_asset * llama_ui_find_asset(const char *) {\n"
|
||||
" return nullptr;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
bool ok = true;
|
||||
ok = write_if_different(out_h, h) && ok;
|
||||
ok = write_if_different(out_cpp, cpp) && ok;
|
||||
return ok ? 0 : 1;
|
||||
}
|
||||
@@ -19,7 +19,7 @@ const GUIDE_FOR_FRONTEND = `
|
||||
-->
|
||||
`.trim();
|
||||
|
||||
const OUTPUT_DIR = '../../build/tools/ui/dist';
|
||||
const OUTPUT_DIR = process.env.LLAMA_UI_OUT_DIR ?? './dist';
|
||||
|
||||
export function llamaCppBuildPlugin(): Plugin {
|
||||
return {
|
||||
|
||||
15
tools/ui/sources.cmake
Normal file
15
tools/ui/sources.cmake
Normal file
@@ -0,0 +1,15 @@
|
||||
# Inputs used to decide whether the npm build output is up-to-date.
|
||||
|
||||
set(UI_SOURCE_GLOBS
|
||||
src/*
|
||||
static/*
|
||||
)
|
||||
|
||||
set(UI_SOURCE_FILES
|
||||
package.json
|
||||
package-lock.json
|
||||
vite.config.ts
|
||||
svelte.config.js
|
||||
tsconfig.json
|
||||
scripts/vite-plugin-llama-cpp-build.ts
|
||||
)
|
||||
@@ -2,6 +2,10 @@ import { mdsvex } from 'mdsvex';
|
||||
import adapter from '@sveltejs/adapter-static';
|
||||
import { vitePreprocess } from '@sveltejs/vite-plugin-svelte';
|
||||
|
||||
// CMake sets LLAMA_UI_OUT_DIR to the staging dir under the build tree; manual
|
||||
// `npm run build` runs without the env var default to ./dist.
|
||||
const outDir = process.env.LLAMA_UI_OUT_DIR ?? './dist';
|
||||
|
||||
/** @type {import('@sveltejs/kit').Config} */
|
||||
const config = {
|
||||
// Consult https://svelte.dev/docs/kit/integrations
|
||||
@@ -14,8 +18,8 @@ const config = {
|
||||
},
|
||||
router: { type: 'hash' },
|
||||
adapter: adapter({
|
||||
pages: '../../build/tools/ui/dist',
|
||||
assets: '../../build/tools/ui/dist',
|
||||
pages: outDir,
|
||||
assets: outDir,
|
||||
fallback: 'index.html',
|
||||
precompress: false,
|
||||
strict: true
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
#ifdef LLAMA_BUILD_UI
|
||||
// auto generated files (see README.md for details)
|
||||
#include "index.html.hpp"
|
||||
#include "bundle.js.hpp"
|
||||
#include "bundle.css.hpp"
|
||||
#include "loading.html.hpp"
|
||||
#endif
|
||||
@@ -1,17 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
// TODO @ngxson : refactor, wrap these in a function
|
||||
|
||||
#ifdef LLAMA_BUILD_UI
|
||||
extern unsigned char index_html[];
|
||||
extern unsigned int index_html_len;
|
||||
|
||||
extern unsigned char bundle_js[];
|
||||
extern unsigned int bundle_js_len;
|
||||
|
||||
extern unsigned char bundle_css[];
|
||||
extern unsigned int bundle_css_len;
|
||||
|
||||
extern unsigned char loading_html[];
|
||||
extern unsigned int loading_html_len;
|
||||
#endif
|
||||
Reference in New Issue
Block a user