mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-13 14:32:59 +02:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
597b6672e8 | ||
|
|
57fe1f07c3 | ||
|
|
d8a24ccee2 | ||
|
|
c34b92235b | ||
|
|
e37abd6b5f |
5
.github/workflows/release.yml
vendored
5
.github/workflows/release.yml
vendored
@@ -1508,7 +1508,8 @@ jobs:
|
||||
- [Ubuntu arm64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-arm64.tar.gz)
|
||||
- [Ubuntu x64 (ROCm 7.2)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-7.2-x64.tar.gz)
|
||||
- [Ubuntu x64 (OpenVINO)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-${{ needs.ubuntu-24-openvino.outputs.openvino_version }}-x64.tar.gz)
|
||||
- Ubuntu x64 (SYCL FP32) [DISABLED](https://github.com/ggml-org/llama.cpp/pull/23705)
|
||||
- [Ubuntu x64 (SYCL FP32)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-sycl-fp32-x64.tar.gz)
|
||||
- [Ubuntu x64 (SYCL FP16)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-sycl-fp16-x64.tar.gz)
|
||||
|
||||
**Android:**
|
||||
- [Android arm64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-android-arm64.tar.gz)
|
||||
@@ -1519,7 +1520,7 @@ jobs:
|
||||
- [Windows x64 (CUDA 12)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip) - [CUDA 12.4 DLLs](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-12.4-x64.zip)
|
||||
- [Windows x64 (CUDA 13)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-13.3-x64.zip) - [CUDA 13.3 DLLs](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-13.3-x64.zip)
|
||||
- [Windows x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-vulkan-x64.zip)
|
||||
- Windows x64 (SYCL) [DISABLED](https://github.com/ggml-org/llama.cpp/pull/23705)
|
||||
- [Windows x64 (SYCL)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip)
|
||||
- [Windows x64 (HIP)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-hip-radeon-x64.zip)
|
||||
|
||||
**openEuler:**
|
||||
|
||||
7
.github/workflows/ui-build-self-hosted.yml
vendored
7
.github/workflows/ui-build-self-hosted.yml
vendored
@@ -28,13 +28,6 @@ jobs:
|
||||
run: npm run build
|
||||
working-directory: tools/ui
|
||||
|
||||
- name: Generate checksums
|
||||
run: |
|
||||
cd tools/ui/dist
|
||||
for f in *; do
|
||||
sha256sum "$f" | awk '{print $1, $2}' >> checksums.txt
|
||||
done
|
||||
|
||||
- name: Upload built UI
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
|
||||
9
.github/workflows/ui-build.yml
vendored
9
.github/workflows/ui-build.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
- name: Build application
|
||||
env:
|
||||
HF_UI_VERSION: ${{ inputs.hf_ui_version || '' }}
|
||||
LLAMA_UI_VERSION: ${{ inputs.hf_ui_version || 'b0000' }}
|
||||
LLAMA_BUILD_NUMBER: ${{ inputs.hf_ui_version || 'b0000' }}
|
||||
run: npm run build
|
||||
working-directory: tools/ui
|
||||
|
||||
@@ -40,13 +40,6 @@ jobs:
|
||||
run: npx vitest --project=unit --run tests/unit/pwa.spec.ts
|
||||
working-directory: tools/ui
|
||||
|
||||
- name: Generate checksums
|
||||
run: |
|
||||
cd tools/ui/dist
|
||||
for f in *; do
|
||||
sha256sum "$f" | awk '{print $1, $2}' >> checksums.txt
|
||||
done
|
||||
|
||||
- name: Upload built UI
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
|
||||
6
.github/workflows/ui-publish.yml
vendored
6
.github/workflows/ui-publish.yml
vendored
@@ -40,6 +40,12 @@ jobs:
|
||||
name: ui-build
|
||||
path: tools/ui/dist/
|
||||
|
||||
- name: Create distribution archive
|
||||
run: |
|
||||
tar -czf dist.tar.gz -C tools/ui/dist .
|
||||
sha256sum dist.tar.gz > dist.tar.gz.sha256
|
||||
mv dist.tar.gz dist.tar.gz.sha256 tools/ui/dist/
|
||||
|
||||
- name: Install Hugging Face Hub CLI
|
||||
run: pip install -U huggingface_hub
|
||||
|
||||
|
||||
@@ -2243,6 +2243,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
params.image_max_tokens = value;
|
||||
}
|
||||
).set_examples(mmproj_examples).set_env("LLAMA_ARG_IMAGE_MAX_TOKENS"));
|
||||
add_opt(common_arg(
|
||||
{"--mtmd-batch-max-tokens"}, "N",
|
||||
string_format("maximum number of image tokens per batch when encoding images (default: %d)", params.mtmd_batch_max_tokens),
|
||||
[](common_params & params, int value) {
|
||||
params.mtmd_batch_max_tokens = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MTMD_BATCH_MAX_TOKENS"));
|
||||
if (llama_supports_rpc()) {
|
||||
add_opt(common_arg(
|
||||
{"--rpc"}, "SERVERS",
|
||||
|
||||
@@ -575,6 +575,7 @@ struct common_params {
|
||||
std::vector<std::string> image; // path to image file(s) ; TODO: change the name to "media"
|
||||
int image_min_tokens = -1;
|
||||
int image_max_tokens = -1;
|
||||
int mtmd_batch_max_tokens = 1024;
|
||||
|
||||
// finetune
|
||||
struct lr_opt lr;
|
||||
|
||||
@@ -26,7 +26,7 @@ class common_params_fit_exception : public std::runtime_error {
|
||||
using std::runtime_error::runtime_error;
|
||||
};
|
||||
|
||||
std::vector<llama_device_memory_data> common_get_device_memory_data(
|
||||
static std::vector<llama_device_memory_data> common_get_device_memory_data_impl(
|
||||
const char * path_model,
|
||||
const llama_model_params * mparams,
|
||||
const llama_context_params * cparams,
|
||||
@@ -150,6 +150,29 @@ std::vector<llama_device_memory_data> common_get_device_memory_data(
|
||||
return ret;
|
||||
}
|
||||
|
||||
common_device_memory_data_vec common_get_device_memory_data(
|
||||
const char * path_model,
|
||||
const llama_model_params * mparams,
|
||||
const llama_context_params * cparams,
|
||||
std::vector<ggml_backend_dev_t> & devs,
|
||||
uint32_t & hp_ngl,
|
||||
uint32_t & hp_n_ctx_train,
|
||||
uint32_t & hp_n_expert,
|
||||
ggml_log_level log_level) {
|
||||
std::vector<llama_device_memory_data> impl = common_get_device_memory_data_impl(
|
||||
path_model, mparams, cparams, devs, hp_ngl, hp_n_ctx_train, hp_n_expert, log_level);
|
||||
|
||||
common_device_memory_data_vec ret(impl.size());
|
||||
for (size_t i = 0; i < impl.size(); i++) {
|
||||
ret[i].total = impl[i].total;
|
||||
ret[i].free = impl[i].free;
|
||||
ret[i].model = impl[i].mb.model;
|
||||
ret[i].context = impl[i].mb.context;
|
||||
ret[i].compute = impl[i].mb.compute;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void common_params_fit_impl(
|
||||
const char * path_model, struct llama_model_params * mparams, struct llama_context_params * cparams,
|
||||
float * tensor_split, struct llama_model_tensor_buft_override * tensor_buft_overrides,
|
||||
@@ -169,7 +192,7 @@ static void common_params_fit_impl(
|
||||
// step 1: get data for default parameters and check whether any changes are necessary in the first place
|
||||
|
||||
LOG_TRC("%s: getting device memory data for initial parameters:\n", __func__);
|
||||
const dmds_t dmds_full = common_get_device_memory_data(path_model, mparams, cparams, devs, hp_ngl, hp_nct, hp_nex, log_level);
|
||||
const dmds_t dmds_full = common_get_device_memory_data_impl(path_model, mparams, cparams, devs, hp_ngl, hp_nct, hp_nex, log_level);
|
||||
const size_t nd = devs.size(); // number of devices
|
||||
|
||||
std::vector<int64_t> margins; // this function uses int64_t rather than size_t for memory sizes to more conveniently handle deficits
|
||||
@@ -304,7 +327,7 @@ static void common_params_fit_impl(
|
||||
|
||||
int64_t sum_projected_used_min_ctx = 0;
|
||||
cparams->n_ctx = n_ctx_min;
|
||||
const dmds_t dmds_min_ctx = common_get_device_memory_data(path_model, mparams, cparams, devs, hp_ngl, hp_nct, hp_nex, log_level);
|
||||
const dmds_t dmds_min_ctx = common_get_device_memory_data_impl(path_model, mparams, cparams, devs, hp_ngl, hp_nct, hp_nex, log_level);
|
||||
if (nd == 0) {
|
||||
sum_projected_used_min_ctx = dmds_min_ctx.back().mb.total();
|
||||
} else {
|
||||
@@ -482,7 +505,7 @@ static void common_params_fit_impl(
|
||||
llama_model_params mparams_copy = *mparams;
|
||||
set_ngl_tensor_split_tbo(ngl_per_device, overflow_bufts, mparams_copy);
|
||||
|
||||
const dmds_t dmd_nl = common_get_device_memory_data(
|
||||
const dmds_t dmd_nl = common_get_device_memory_data_impl(
|
||||
path_model, &mparams_copy, cparams, devs, hp_ngl, hp_nct, hp_nex, log_level);
|
||||
|
||||
LOG_TRC("%s: memory for test allocation by device:\n", func_name);
|
||||
@@ -510,7 +533,7 @@ static void common_params_fit_impl(
|
||||
mparams->tensor_buft_overrides = tensor_buft_overrides;
|
||||
|
||||
LOG_TRC("%s: getting device memory data with all MoE tensors moved to system memory:\n", __func__);
|
||||
const dmds_t dmds_cpu_moe = common_get_device_memory_data(
|
||||
const dmds_t dmds_cpu_moe = common_get_device_memory_data_impl(
|
||||
path_model, mparams, cparams, devs, hp_ngl, hp_nct, hp_nex, log_level);
|
||||
|
||||
for (size_t id = 0; id < nd; id++) {
|
||||
@@ -940,7 +963,7 @@ void common_fit_print(
|
||||
uint32_t hp_nct = 0; // hparams.n_ctx_train
|
||||
uint32_t hp_nex = 0; // hparams.n_expert
|
||||
|
||||
auto dmd = common_get_device_memory_data(path_model, mparams, cparams, devs, hp_ngl, hp_nct, hp_nex, GGML_LOG_LEVEL_ERROR);
|
||||
auto dmd = common_get_device_memory_data_impl(path_model, mparams, cparams, devs, hp_ngl, hp_nct, hp_nex, GGML_LOG_LEVEL_ERROR);
|
||||
GGML_ASSERT(dmd.size() == devs.size() + 1);
|
||||
|
||||
for (size_t id = 0; id < devs.size(); id++) {
|
||||
|
||||
14
common/fit.h
14
common/fit.h
@@ -34,12 +34,18 @@ void common_fit_print(
|
||||
|
||||
void common_memory_breakdown_print(const llama_context * ctx);
|
||||
|
||||
// TODO: convert this to common_device_memory_data that wraps llama_device_memory_data
|
||||
// add API for accessing the internal `llama-ext.h` information
|
||||
struct llama_device_memory_data;
|
||||
struct common_device_memory_data {
|
||||
int64_t total;
|
||||
int64_t free;
|
||||
size_t model;
|
||||
size_t context;
|
||||
size_t compute;
|
||||
};
|
||||
|
||||
using common_device_memory_data_vec = std::vector<common_device_memory_data>;
|
||||
|
||||
// Load a model + context with no_alloc and return the per-device memory breakdown.
|
||||
std::vector<llama_device_memory_data> common_get_device_memory_data(
|
||||
common_device_memory_data_vec common_get_device_memory_data(
|
||||
const char * path_model,
|
||||
const llama_model_params * mparams,
|
||||
const llama_context_params * cparams,
|
||||
|
||||
@@ -4,8 +4,9 @@
|
||||
# 1. Pre-built assets in SRC_DIST_DIR (manually built by user)
|
||||
# 2. If BUILD_UI=ON: npm build
|
||||
# 3. If above did not produce assets and HF_ENABLED=ON: HF Bucket download
|
||||
# of dist.tar.gz (verified against dist.tar.gz.sha256)
|
||||
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
set(UI_SOURCE_DIR "" CACHE STRING "UI source directory (to run npm build)")
|
||||
set(UI_BINARY_DIR "" CACHE STRING "UI binary directory (to store generated files)")
|
||||
@@ -16,124 +17,16 @@ set(HF_ENABLED "" CACHE STRING "Whether to allow HF Bucket download (ON/O
|
||||
set(BUILD_UI "" CACHE STRING "Build UI via npm (ON/OFF)")
|
||||
set(LLAMA_UI_EMBED "" CACHE STRING "Path to llama-ui-embed helper")
|
||||
|
||||
# IMPORTANT: When adding PWA assets, sync across all 3 places:
|
||||
# 1. tools/ui/src/lib/constants/pwa.ts (APPLE_DEVICES, PUBLIC_ENDPOINTS)
|
||||
# 2. tools/server/server-http.cpp (public_endpoints)
|
||||
# 3. scripts/ui-assets.cmake (ASSETS list)
|
||||
# - C++ (server-http.cpp) - public endpoints (splash screens generated via helper)
|
||||
# - TypeScript (constants/pwa.ts) - APPLE_DEVICES, PWA_MANIFEST, PUBLIC_ENDPOINTS
|
||||
#
|
||||
# When adding/changing PWA assets, update tools/ui/src/lib/constants/pwa.ts first,
|
||||
# then sync any new file names here and in server-http.cpp.
|
||||
set(ASSETS
|
||||
index.html
|
||||
loading.html
|
||||
# PWA assets
|
||||
favicon.ico
|
||||
favicon-dark.ico
|
||||
favicon.svg
|
||||
favicon-dark.svg
|
||||
pwa-64x64.png
|
||||
pwa-192x192.png
|
||||
pwa-512x512.png
|
||||
maskable-icon-512x512.png
|
||||
apple-touch-icon-180x180.png
|
||||
# iOS splash screens
|
||||
apple-splash-portrait-640x1136.png
|
||||
apple-splash-landscape-1136x640.png
|
||||
apple-splash-portrait-750x1334.png
|
||||
apple-splash-landscape-1334x750.png
|
||||
apple-splash-portrait-1170x2532.png
|
||||
apple-splash-landscape-2532x1170.png
|
||||
apple-splash-portrait-1179x2556.png
|
||||
apple-splash-landscape-2556x1179.png
|
||||
apple-splash-portrait-1206x2622.png
|
||||
apple-splash-landscape-2622x1206.png
|
||||
apple-splash-portrait-1284x2778.png
|
||||
apple-splash-landscape-2778x1284.png
|
||||
apple-splash-portrait-1290x2796.png
|
||||
apple-splash-landscape-2796x1290.png
|
||||
apple-splash-portrait-1320x2868.png
|
||||
apple-splash-landscape-2868x1320.png
|
||||
apple-splash-portrait-1488x2266.png
|
||||
apple-splash-landscape-2266x1488.png
|
||||
apple-splash-portrait-1640x2360.png
|
||||
apple-splash-landscape-2360x1640.png
|
||||
apple-splash-portrait-1668x2388.png
|
||||
apple-splash-landscape-2388x1668.png
|
||||
apple-splash-portrait-2048x2732.png
|
||||
apple-splash-landscape-2732x2048.png
|
||||
# iOS dark splash screens
|
||||
apple-splash-portrait-dark-640x1136.png
|
||||
apple-splash-landscape-dark-1136x640.png
|
||||
apple-splash-portrait-dark-750x1334.png
|
||||
apple-splash-landscape-dark-1334x750.png
|
||||
apple-splash-portrait-dark-1170x2532.png
|
||||
apple-splash-landscape-dark-2532x1170.png
|
||||
apple-splash-portrait-dark-1179x2556.png
|
||||
apple-splash-landscape-dark-2556x1179.png
|
||||
apple-splash-portrait-dark-1206x2622.png
|
||||
apple-splash-landscape-dark-2622x1206.png
|
||||
apple-splash-portrait-dark-1284x2778.png
|
||||
apple-splash-landscape-dark-2778x1284.png
|
||||
apple-splash-portrait-dark-1290x2796.png
|
||||
apple-splash-landscape-dark-2796x1290.png
|
||||
apple-splash-portrait-dark-1320x2868.png
|
||||
apple-splash-landscape-dark-2868x1320.png
|
||||
apple-splash-portrait-dark-1640x2360.png
|
||||
apple-splash-landscape-dark-2360x1640.png
|
||||
apple-splash-portrait-dark-1668x2388.png
|
||||
apple-splash-landscape-dark-2388x1668.png
|
||||
apple-splash-portrait-dark-2048x2732.png
|
||||
apple-splash-landscape-dark-2732x2048.png
|
||||
manifest.webmanifest
|
||||
sw.js
|
||||
_app/version.json
|
||||
build.json
|
||||
)
|
||||
|
||||
set(DIST_DIR "${UI_BINARY_DIR}/dist")
|
||||
set(SRC_DIST_DIR "${UI_SOURCE_DIR}/dist")
|
||||
set(STAMP_FILE "${UI_BINARY_DIR}/.ui-stamp")
|
||||
set(UI_CPP "${UI_BINARY_DIR}/ui.cpp")
|
||||
set(UI_H "${UI_BINARY_DIR}/ui.h")
|
||||
|
||||
function(assets_present out_var)
|
||||
set(present TRUE)
|
||||
foreach(asset ${ASSETS})
|
||||
if(NOT EXISTS "${DIST_DIR}/${asset}")
|
||||
set(present FALSE)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
set(${out_var} ${present} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(copy_src_dist out_var)
|
||||
set(${out_var} FALSE PARENT_SCOPE)
|
||||
|
||||
foreach(asset ${ASSETS})
|
||||
if(NOT EXISTS "${SRC_DIST_DIR}/${asset}")
|
||||
return()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
file(MAKE_DIRECTORY "${DIST_DIR}")
|
||||
message(STATUS "UI: using pre-built assets from ${SRC_DIST_DIR}")
|
||||
foreach(asset ${ASSETS})
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||
"${SRC_DIST_DIR}/${asset}" "${DIST_DIR}/${asset}"
|
||||
)
|
||||
endforeach()
|
||||
set(${out_var} TRUE PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(npm_build_should_skip out_var)
|
||||
set(${out_var} FALSE PARENT_SCOPE)
|
||||
|
||||
assets_present(present)
|
||||
if(NOT present)
|
||||
if(NOT EXISTS "${DIST_DIR}/index.html")
|
||||
return()
|
||||
endif()
|
||||
|
||||
@@ -240,8 +133,7 @@ function(npm_build out_var)
|
||||
return()
|
||||
endif()
|
||||
|
||||
assets_present(present)
|
||||
if(NOT present)
|
||||
if(NOT EXISTS "${DIST_DIR}/index.html")
|
||||
message(STATUS "UI: npm build finished but assets missing in ${DIST_DIR}")
|
||||
return()
|
||||
endif()
|
||||
@@ -272,7 +164,7 @@ function(hf_download version out_var out_resolved)
|
||||
set(${out_var} FALSE PARENT_SCOPE)
|
||||
set(${out_resolved} "" PARENT_SCOPE)
|
||||
|
||||
file(MAKE_DIRECTORY "${DIST_DIR}")
|
||||
set(archive "${UI_BINARY_DIR}/dist.tar.gz")
|
||||
|
||||
set(candidates "")
|
||||
if(NOT "${version}" STREQUAL "")
|
||||
@@ -281,97 +173,63 @@ function(hf_download version out_var out_resolved)
|
||||
list(APPEND candidates "latest")
|
||||
|
||||
foreach(resolved ${candidates})
|
||||
set(base "https://huggingface.co/buckets/ggml-org/${HF_BUCKET}/resolve/${resolved}")
|
||||
set(base "https://huggingface.co/buckets/${HF_BUCKET}/resolve/${resolved}")
|
||||
|
||||
message(STATUS "UI: downloading from ${resolved}: ${base}")
|
||||
message(STATUS "UI: downloading from ${resolved}: ${base}/dist.tar.gz")
|
||||
|
||||
set(ok TRUE)
|
||||
foreach(asset ${ASSETS})
|
||||
file(DOWNLOAD "${base}/${asset}?download=true" "${DIST_DIR}/${asset}"
|
||||
STATUS status TIMEOUT 60
|
||||
)
|
||||
list(GET status 0 rc)
|
||||
if(NOT rc EQUAL 0)
|
||||
list(GET status 1 errmsg)
|
||||
message(STATUS "UI: download ${asset} from ${resolved} failed: ${errmsg}")
|
||||
set(ok FALSE)
|
||||
break()
|
||||
endif()
|
||||
message(STATUS "UI: downloaded ${asset}")
|
||||
endforeach()
|
||||
|
||||
if(NOT ok)
|
||||
file(DOWNLOAD "${base}/dist.tar.gz?download=true" "${archive}"
|
||||
STATUS status TIMEOUT 300
|
||||
)
|
||||
list(GET status 0 rc)
|
||||
if(NOT rc EQUAL 0)
|
||||
list(GET status 1 errmsg)
|
||||
message(STATUS "UI: download dist.tar.gz from ${resolved} failed: ${errmsg}")
|
||||
continue()
|
||||
endif()
|
||||
|
||||
# Best-effort checksum verification
|
||||
file(DOWNLOAD "${base}/checksums.txt?download=true" "${DIST_DIR}/checksums.txt"
|
||||
STATUS cs_status TIMEOUT 30
|
||||
file(DOWNLOAD "${base}/dist.tar.gz.sha256?download=true" "${archive}.sha256"
|
||||
STATUS status TIMEOUT 30
|
||||
)
|
||||
list(GET cs_status 0 cs_rc)
|
||||
if(cs_rc EQUAL 0)
|
||||
message(STATUS "UI: verifying checksums")
|
||||
file(STRINGS "${DIST_DIR}/checksums.txt" cs_lines)
|
||||
foreach(asset ${ASSETS})
|
||||
file(SHA256 "${DIST_DIR}/${asset}" h)
|
||||
string(TOLOWER "${h}" h)
|
||||
string(REGEX MATCH "${h}[ \t]+${asset}" m "${cs_lines}")
|
||||
if(NOT m)
|
||||
message(WARNING "UI: checksum verification failed for ${asset}")
|
||||
set(ok FALSE)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
if(ok)
|
||||
message(STATUS "UI: all checksums verified")
|
||||
endif()
|
||||
list(GET status 0 rc)
|
||||
if(NOT rc EQUAL 0)
|
||||
list(GET status 1 errmsg)
|
||||
message(STATUS "UI: download dist.tar.gz.sha256 from ${resolved} failed: ${errmsg}")
|
||||
continue()
|
||||
endif()
|
||||
|
||||
if(ok)
|
||||
set(${out_var} TRUE PARENT_SCOPE)
|
||||
set(${out_resolved} "${resolved}" PARENT_SCOPE)
|
||||
return()
|
||||
# Validate sha256 checkums
|
||||
file(READ "${archive}.sha256" expected)
|
||||
string(REGEX MATCH "^[0-9a-fA-F]+" expected "${expected}")
|
||||
string(TOLOWER "${expected}" expected)
|
||||
file(SHA256 "${archive}" actual)
|
||||
if("${expected}" STREQUAL "" OR NOT "${actual}" STREQUAL "${expected}")
|
||||
message(STATUS "UI: checksum mismatch for dist.tar.gz from ${resolved}")
|
||||
continue()
|
||||
endif()
|
||||
|
||||
# Clear DIST_DIR to remove stale files first
|
||||
file(REMOVE_RECURSE "${DIST_DIR}")
|
||||
|
||||
file(ARCHIVE_EXTRACT INPUT "${archive}" DESTINATION "${DIST_DIR}")
|
||||
|
||||
if(NOT EXISTS "${DIST_DIR}/index.html")
|
||||
message(STATUS "UI: archive from ${resolved} is missing required assets")
|
||||
continue()
|
||||
endif()
|
||||
|
||||
message(STATUS "UI: archive verified and extracted")
|
||||
set(${out_var} TRUE PARENT_SCOPE)
|
||||
set(${out_resolved} "${resolved}" PARENT_SCOPE)
|
||||
return()
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
function(emit_files)
|
||||
assets_present(present)
|
||||
|
||||
function(emit_files dist_dir)
|
||||
set(args "${UI_CPP}" "${UI_H}")
|
||||
if(present)
|
||||
foreach(asset ${ASSETS})
|
||||
list(APPEND args "${asset}" "${DIST_DIR}/${asset}")
|
||||
endforeach()
|
||||
|
||||
# Bundle files live in _app/immutable/ — vanilla SvelteKit output, no plugin
|
||||
# rewriting. Embedded names must match the exact _app/ paths that index.html
|
||||
# and sw.js reference.
|
||||
file(GLOB_RECURSE detected_bundle_js "${DIST_DIR}/_app/immutable/bundle.*.js")
|
||||
file(GLOB_RECURSE detected_bundle_css "${DIST_DIR}/_app/immutable/assets/bundle.*.css")
|
||||
file(GLOB_RECURSE detected_workbox "${DIST_DIR}/workbox-*.js")
|
||||
# Compute relative path from DIST_DIR to each found file.
|
||||
# e.g. /path/to/build/tools/ui/dist/_app/immutable/bundle.XXX.js
|
||||
# -> _app/immutable/bundle.XXX.js
|
||||
foreach(f ${detected_bundle_js})
|
||||
string(REPLACE "${DIST_DIR}/" "" rel "${f}")
|
||||
list(APPEND args "${rel}" "${f}")
|
||||
endforeach()
|
||||
foreach(f ${detected_bundle_css})
|
||||
string(REPLACE "${DIST_DIR}/" "" rel "${f}")
|
||||
list(APPEND args "${rel}" "${f}")
|
||||
endforeach()
|
||||
foreach(f ${detected_workbox})
|
||||
string(REPLACE "${DIST_DIR}/" "" rel "${f}")
|
||||
list(APPEND args "${rel}" "${f}")
|
||||
endforeach()
|
||||
if(EXISTS "${dist_dir}/index.html")
|
||||
list(APPEND args "${dist_dir}")
|
||||
endif()
|
||||
|
||||
# Create build.json with the llama.cpp build number for UI version display.
|
||||
# This is separate from SvelteKit's _app/version.json (used for SW cache invalidation).
|
||||
# build.json is generated by the vite plugin (buildInfoPlugin) during npm build.
|
||||
# CMake just embeds it from the dist that npm produced.
|
||||
|
||||
execute_process(
|
||||
COMMAND "${LLAMA_UI_EMBED}" ${args}
|
||||
RESULT_VARIABLE rc
|
||||
@@ -384,9 +242,9 @@ endfunction()
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Priority 1: pre-built assets supplied in tools/ui/dist
|
||||
# ---------------------------------------------------------------------------
|
||||
copy_src_dist(SRC_OK)
|
||||
if(SRC_OK)
|
||||
emit_files()
|
||||
if(EXISTS "${SRC_DIST_DIR}/index.html")
|
||||
message(STATUS "UI: using pre-built assets from ${SRC_DIST_DIR}")
|
||||
emit_files("${SRC_DIST_DIR}")
|
||||
return()
|
||||
endif()
|
||||
|
||||
@@ -419,7 +277,10 @@ if(NOT provisioned AND HF_ENABLED)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
assets_present(have_assets)
|
||||
set(have_assets FALSE)
|
||||
if(EXISTS "${DIST_DIR}/index.html")
|
||||
set(have_assets TRUE)
|
||||
endif()
|
||||
if(stamp_ok AND have_assets)
|
||||
message(STATUS "UI: HF stamp '${stamped}' matches version, skipping HF fetch")
|
||||
set(provisioned TRUE)
|
||||
@@ -439,8 +300,7 @@ endif()
|
||||
# 4. Fallback: warn about stale or missing assets, then emit whatever we have
|
||||
# ---------------------------------------------------------------------------
|
||||
if(NOT provisioned)
|
||||
assets_present(have_assets)
|
||||
if(have_assets)
|
||||
if(EXISTS "${DIST_DIR}/index.html")
|
||||
message(WARNING "UI: provisioning failed; embedding stale assets from ${DIST_DIR}")
|
||||
else()
|
||||
message(WARNING "UI: no assets available - building without an embedded UI. "
|
||||
@@ -451,4 +311,4 @@ if(NOT provisioned)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
emit_files()
|
||||
emit_files("${DIST_DIR}")
|
||||
|
||||
@@ -54,6 +54,10 @@ struct clip_graph {
|
||||
virtual ggml_tensor * build_mm(ggml_tensor * w, ggml_tensor * x) const;
|
||||
// TODO: build_mm(w, b, x) to support bias
|
||||
|
||||
virtual bool support_batch() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// utility functions
|
||||
//
|
||||
|
||||
@@ -171,6 +171,8 @@ struct clip_ctx {
|
||||
std::map<ggml_backend_dev_t, size_t> mem_usage;
|
||||
std::map<ggml_backend_dev_t, size_t> mem_compute;
|
||||
|
||||
bool support_batch = false;
|
||||
|
||||
clip_ctx(clip_context_params & ctx_params) {
|
||||
flash_attn_type = ctx_params.flash_attn_type;
|
||||
no_alloc = ctx_params.no_alloc;
|
||||
@@ -314,7 +316,7 @@ ggml_tensor * clip_graph::build_vit(
|
||||
std::function<ggml_tensor *(ggml_tensor *, const clip_layer &)> add_pos,
|
||||
const build_vit_opts & opts
|
||||
) {
|
||||
// batch dim: inp is [n_embd, n_pos] (B==1) or [n_embd, n_pos, B] (multi-tile encode)
|
||||
// batch dim: inp is [n_embd, n_pos, B]
|
||||
const int64_t B = inp->ne[2];
|
||||
|
||||
if (learned_pos_embd) {
|
||||
@@ -862,7 +864,7 @@ ggml_tensor * clip_graph::build_patch_merge_permute(ggml_tensor * cur, int scale
|
||||
return cur;
|
||||
}
|
||||
|
||||
static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32_batch & imgs) {
|
||||
static std::unique_ptr<clip_graph> clip_get_graph_builder(clip_ctx * ctx, const clip_image_f32_batch & imgs) {
|
||||
const clip_image_f32 & img = *imgs.entries[0];
|
||||
std::unique_ptr<clip_graph> builder;
|
||||
|
||||
@@ -1025,7 +1027,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||
// TODO [QWEN_VIDEO]: improve this in the future
|
||||
builder->n_batch = imgs.entries.size();
|
||||
|
||||
return builder->build();
|
||||
return builder;
|
||||
}
|
||||
|
||||
//
|
||||
@@ -2819,7 +2821,7 @@ struct clip_model_loader {
|
||||
std::vector<support_info_op> ops;
|
||||
};
|
||||
|
||||
static void warmup(clip_ctx & ctx_clip) {
|
||||
static clip_image_f32_batch get_dummy_batch(clip_ctx & ctx_clip) {
|
||||
// create a fake batch
|
||||
const auto & hparams = ctx_clip.model.hparams;
|
||||
clip_image_f32_batch batch;
|
||||
@@ -2833,6 +2835,20 @@ struct clip_model_loader {
|
||||
LOG_INF("%s: warmup with audio size = %d\n", __func__, hparams.warmup_audio_size);
|
||||
}
|
||||
batch.entries.push_back(std::move(img));
|
||||
return batch;
|
||||
}
|
||||
|
||||
static void init_ctx(clip_ctx & ctx_clip) {
|
||||
ctx_clip.buf_compute_meta.resize(ctx_clip.max_nodes * ggml_tensor_overhead() + ggml_graph_overhead());
|
||||
|
||||
// check batching support
|
||||
auto batch = get_dummy_batch(ctx_clip);
|
||||
auto builder = clip_get_graph_builder(&ctx_clip, batch);
|
||||
ctx_clip.support_batch = builder->support_batch();
|
||||
}
|
||||
|
||||
static void warmup(clip_ctx & ctx_clip) {
|
||||
auto batch = get_dummy_batch(ctx_clip);
|
||||
warmup(ctx_clip, batch);
|
||||
}
|
||||
|
||||
@@ -2905,9 +2921,7 @@ struct clip_model_loader {
|
||||
|
||||
// only initialize backend buffers, but do not allocate them yet
|
||||
static support_info_graph reserve_compute_meta(clip_ctx & ctx_clip, const clip_image_f32_batch & batch) {
|
||||
ctx_clip.buf_compute_meta.resize(ctx_clip.max_nodes * ggml_tensor_overhead() + ggml_graph_overhead());
|
||||
|
||||
ggml_cgraph * gf = clip_image_build_graph(&ctx_clip, batch);
|
||||
ggml_cgraph * gf = clip_get_graph_builder(&ctx_clip, batch)->build();
|
||||
ggml_backend_sched_reserve(ctx_clip.sched.get(), gf);
|
||||
|
||||
ctx_clip.mem_compute.clear();
|
||||
@@ -3070,6 +3084,7 @@ struct clip_init_result clip_init(const char * fname, struct clip_context_params
|
||||
ctx_vision = new clip_ctx(ctx_params);
|
||||
loader.load_hparams(ctx_vision->model, CLIP_MODALITY_VISION);
|
||||
loader.load_tensors(*ctx_vision);
|
||||
loader.init_ctx(*ctx_vision);
|
||||
if (ctx_params.warmup) {
|
||||
loader.warmup(*ctx_vision);
|
||||
}
|
||||
@@ -3083,6 +3098,7 @@ struct clip_init_result clip_init(const char * fname, struct clip_context_params
|
||||
ctx_audio = new clip_ctx(ctx_params);
|
||||
loader.load_hparams(ctx_audio->model, CLIP_MODALITY_AUDIO);
|
||||
loader.load_tensors(*ctx_audio);
|
||||
loader.init_ctx(*ctx_audio);
|
||||
if (ctx_params.warmup) {
|
||||
loader.warmup(*ctx_audio);
|
||||
}
|
||||
@@ -3484,25 +3500,22 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
|
||||
return n_patches;
|
||||
}
|
||||
|
||||
bool clip_image_encode(struct clip_ctx * ctx, const int n_threads, clip_image_f32 * img, float * vec) {
|
||||
bool clip_image_encode(struct clip_ctx * ctx, const int n_threads, clip_image_f32 * img, std::vector<float> & out_vec) {
|
||||
clip_image_f32_batch imgs;
|
||||
clip_image_f32_ptr img_copy(clip_image_f32_init());
|
||||
*img_copy = *img;
|
||||
imgs.entries.push_back(std::move(img_copy));
|
||||
|
||||
return clip_image_batch_encode(ctx, n_threads, &imgs, vec);
|
||||
return clip_image_batch_encode(ctx, n_threads, &imgs, out_vec);
|
||||
}
|
||||
|
||||
bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs_c_ptr, float * vec) {
|
||||
bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs_c_ptr, std::vector<float> & out_batch_embd) {
|
||||
const clip_image_f32_batch & imgs = *imgs_c_ptr;
|
||||
int n_batch_cur = imgs.entries.size();
|
||||
|
||||
// maximum supported batch size, usually == 2 for qwen-vl-based models
|
||||
int n_batch_max = clip_model_n_batch_max(ctx);
|
||||
|
||||
// TODO @ngxson : implement batch size > 1 as a loop
|
||||
// we don't need true batching support because the cgraph will gonna be big anyway
|
||||
if (n_batch_cur > n_batch_max) {
|
||||
// [QWEN_VIDEO] for video models, the batch dimension is used as temporal dimension for merged frames
|
||||
if (!ctx->support_batch && n_batch_cur > clip_model_n_temporal_merge(ctx)) {
|
||||
LOG_ERR("%s: batch size %d exceeds maximum supported batch/temporal-merge size %d\n", __func__, n_batch_cur, clip_model_n_temporal_merge(ctx));
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -3513,7 +3526,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||
|
||||
// build the inference graph
|
||||
ggml_backend_sched_reset(ctx->sched.get());
|
||||
ggml_cgraph * gf = clip_image_build_graph(ctx, imgs);
|
||||
ggml_cgraph * gf = clip_get_graph_builder(ctx, imgs)->build();
|
||||
ggml_backend_sched_alloc_graph(ctx->sched.get(), gf);
|
||||
|
||||
// set inputs
|
||||
@@ -3582,6 +3595,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||
const int n = nx * ny;
|
||||
|
||||
for (int b = 0; b < n_batch_cur; b++) {
|
||||
LOG_DBG("%s: copying image %d/%d to input buffer (nx=%d, ny=%d)\n", __func__, b+1, n_batch_cur, nx, ny);
|
||||
const auto & buf = imgs.entries[b]->get_ro_buf();
|
||||
float * batch_entry = inp_raw.data() + b * (3*n);
|
||||
for (int y = 0; y < ny; y++) {
|
||||
@@ -4416,7 +4430,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||
// the last node is the embedding tensor
|
||||
ggml_tensor * embeddings = ggml_graph_node(gf, -1);
|
||||
|
||||
// sanity check (only support batch size of 1 for now)
|
||||
// sanity check (assuming that all images in batch have the same number of tokens, so we only check the first one)
|
||||
const int n_tokens_out = embeddings->ne[1];
|
||||
const int expected_n_tokens_out = clip_n_output_tokens(ctx, imgs.entries[0].get());
|
||||
if (n_tokens_out != expected_n_tokens_out) {
|
||||
@@ -4424,16 +4438,26 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||
GGML_ABORT("Invalid number of output tokens");
|
||||
}
|
||||
|
||||
// copy the embeddings to the location passed by the user
|
||||
if (vec != nullptr) {
|
||||
ggml_backend_tensor_get(embeddings, vec, 0, ggml_nbytes(embeddings));
|
||||
LOG_DBG("%s: output embedding shape [%d, %d, %d]\n", __func__,
|
||||
(int)embeddings->ne[0], (int)embeddings->ne[1], (int)embeddings->ne[2]);
|
||||
|
||||
// copy output to user buffer if provided
|
||||
// if output is empty, skip the copy
|
||||
if (!out_batch_embd.empty()) {
|
||||
if (out_batch_embd.size() != (size_t)ggml_nelements(embeddings)) {
|
||||
LOG_ERR("%s: output buffer has %zu elements but expected %zu\n", __func__, out_batch_embd.size(), (size_t)ggml_nelements(embeddings));
|
||||
GGML_ABORT("Output buffer size mismatch");
|
||||
}
|
||||
ggml_backend_tensor_get(embeddings, out_batch_embd.data(), 0, ggml_nbytes(embeddings));
|
||||
} else {
|
||||
LOG_WRN("%s: output buffer is empty, skipping copy\n", __func__);
|
||||
}
|
||||
|
||||
// Debug: dump final embeddings if MTMD_DEBUG_EMBEDDINGS is set
|
||||
if (ctx->debug_output_embeddings) {
|
||||
const int64_t n_embd = embeddings->ne[0];
|
||||
const int64_t n_tokens = embeddings->ne[1];
|
||||
std::vector<float> emb_data(n_embd * n_tokens);
|
||||
std::vector<float> emb_data(ggml_nelements(embeddings));
|
||||
ggml_backend_tensor_get(embeddings, emb_data.data(), 0, ggml_nbytes(embeddings));
|
||||
|
||||
LOG_INF("\n=== MTMD_DEBUG_EMBEDDINGS ===\n");
|
||||
@@ -4570,7 +4594,14 @@ bool clip_has_audio_encoder(const struct clip_ctx * ctx) {
|
||||
return ctx->model.modality == CLIP_MODALITY_AUDIO;
|
||||
}
|
||||
|
||||
int clip_model_n_batch_max(const struct clip_ctx * ctx) {
|
||||
bool clip_support_batch(const struct clip_ctx * ctx) {
|
||||
return ctx->support_batch;
|
||||
}
|
||||
|
||||
// TODO @ngxson : this is no longer correct with mtmd_batch API
|
||||
// this was only meant to be used by qwen-vl-based models, to fuse 2 input images into one (qwen-vl video support)
|
||||
// this logic should be refactored in near future to distinctly handle "merge frames" and "batching"
|
||||
int clip_model_n_temporal_merge(const struct clip_ctx * ctx) {
|
||||
switch (ctx->proj_type()) {
|
||||
case PROJECTOR_TYPE_QWEN2VL:
|
||||
case PROJECTOR_TYPE_QWEN25VL:
|
||||
|
||||
@@ -97,8 +97,8 @@ size_t clip_image_f32_batch_nx(const struct clip_image_f32_batch * batch, int id
|
||||
size_t clip_image_f32_batch_ny(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->ny
|
||||
struct clip_image_f32 * clip_image_f32_get_img(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->data
|
||||
|
||||
bool clip_image_encode (struct clip_ctx * ctx, int n_threads, struct clip_image_f32 * img, float * vec);
|
||||
bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, float * vec);
|
||||
bool clip_image_encode (struct clip_ctx * ctx, int n_threads, struct clip_image_f32 * img, std::vector<float> & out_vec);
|
||||
bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, std::vector<float> & out_batch_embd);
|
||||
|
||||
bool clip_is_llava(const struct clip_ctx * ctx);
|
||||
// note for contributor: this clip_is_(model) pattern is deprecated
|
||||
@@ -107,7 +107,9 @@ bool clip_is_llava(const struct clip_ctx * ctx);
|
||||
bool clip_has_vision_encoder(const struct clip_ctx * ctx);
|
||||
bool clip_has_audio_encoder(const struct clip_ctx * ctx);
|
||||
|
||||
int clip_model_n_batch_max(const struct clip_ctx * ctx);
|
||||
bool clip_support_batch(const struct clip_ctx * ctx);
|
||||
|
||||
int clip_model_n_temporal_merge(const struct clip_ctx * ctx); // TODO @ngxson : remove, refactor this
|
||||
|
||||
std::map<ggml_backend_dev_t, size_t> clip_get_mem_usage(const struct clip_ctx * ctx);
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ ggml_cgraph * clip_graph_gemma4v::build() {
|
||||
ggml_set_name(inp_raw, "inp_raw_scaled");
|
||||
|
||||
ggml_tensor * inp = ggml_conv_2d(ctx0, model.patch_embeddings_0, inp_raw, patch_size, patch_size, 0, 0, 1, 1);
|
||||
inp = ggml_reshape_2d(ctx0, inp, n_patches, n_embd);
|
||||
inp = ggml_reshape_3d(ctx0, inp, n_patches, n_embd, n_batch);
|
||||
inp = ggml_cont(ctx0, ggml_transpose(ctx0, inp));
|
||||
ggml_set_name(inp, "inp");
|
||||
// note: no patch bias
|
||||
@@ -51,10 +51,11 @@ ggml_cgraph * clip_graph_gemma4v::build() {
|
||||
// first half
|
||||
ggml_tensor * first;
|
||||
{
|
||||
first = ggml_view_3d(ctx0, cur,
|
||||
n_dim/2, n_head, n_pos,
|
||||
first = ggml_view_4d(ctx0, cur,
|
||||
n_dim/2, n_head, n_pos, n_batch,
|
||||
cur->nb[1],
|
||||
cur->nb[2],
|
||||
cur->nb[3],
|
||||
0);
|
||||
first = ggml_rope_ext(
|
||||
ctx0,
|
||||
@@ -70,10 +71,11 @@ ggml_cgraph * clip_graph_gemma4v::build() {
|
||||
// second half
|
||||
ggml_tensor * second;
|
||||
{
|
||||
second = ggml_view_3d(ctx0, cur,
|
||||
n_dim/2, n_head, n_pos,
|
||||
second = ggml_view_4d(ctx0, cur,
|
||||
n_dim/2, n_head, n_pos, n_batch,
|
||||
cur->nb[1],
|
||||
cur->nb[2],
|
||||
cur->nb[3],
|
||||
n_dim/2 * ggml_element_size(cur));
|
||||
second = ggml_rope_ext(
|
||||
ctx0,
|
||||
@@ -103,14 +105,14 @@ ggml_cgraph * clip_graph_gemma4v::build() {
|
||||
const int kernel_size = hparams.n_merge;
|
||||
GGML_ASSERT(kernel_size > 0);
|
||||
|
||||
// [n_embd, n_patches] -> [n_patches_x, n_patches_y, n_embd, 1]
|
||||
cur = ggml_cont_4d(ctx0, ggml_transpose(ctx0, cur), n_patches_x, n_patches_y, n_embd, 1);
|
||||
// [n_embd, n_patches] -> [n_patches_x, n_patches_y, n_embd, n_batch]
|
||||
cur = ggml_cont_4d(ctx0, ggml_transpose(ctx0, cur), n_patches_x, n_patches_y, n_embd, n_batch);
|
||||
cur = ggml_pool_2d(ctx0, cur, GGML_OP_POOL_AVG,
|
||||
kernel_size, kernel_size, kernel_size, kernel_size, 0, 0);
|
||||
const int out_x = n_patches_x / kernel_size;
|
||||
const int out_y = n_patches_y / kernel_size;
|
||||
// [out_x, out_y, n_embd, 1] -> [n_embd, out_x * out_y]
|
||||
cur = ggml_reshape_3d(ctx0, cur, out_x * out_y, n_embd, 1);
|
||||
// [out_x, out_y, n_embd, n_batch] -> [n_embd, out_x * out_y, n_batch]
|
||||
cur = ggml_reshape_3d(ctx0, cur, out_x * out_y, n_embd, n_batch);
|
||||
cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
|
||||
cur = ggml_scale(ctx0, cur, sqrtf((float)n_embd));
|
||||
cb(cur, "pooled", -1);
|
||||
|
||||
@@ -16,6 +16,7 @@ struct clip_graph_gemma4v : clip_graph {
|
||||
clip_graph_gemma4v(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
|
||||
ggml_cgraph * build() override;
|
||||
ggml_tensor * build_mm(ggml_tensor * w, ggml_tensor * x) const override;
|
||||
bool support_batch() const override { return true; }
|
||||
};
|
||||
|
||||
struct clip_graph_gemma4uv : clip_graph {
|
||||
|
||||
@@ -67,8 +67,8 @@ MTMD_API void mtmd_helper_image_get_decoder_pos(const mtmd_image_tokens * image,
|
||||
|
||||
// helper function that automatically:
|
||||
// 1. run llama_decode() on text chunks
|
||||
// 2. run mtmd_encode() on image chunks, then mtmd_get_output_embd() and then llama_decode()
|
||||
// if any of the mtmd_encode() or llama_decode() calls return non-zero, stop and forward the error
|
||||
// 2. run mtmd_encode_chunk() on image chunks, then mtmd_get_output_embd() and then llama_decode()
|
||||
// if any of the mtmd_encode_chunk() or llama_decode() calls return non-zero, stop and forward the error
|
||||
// otherwise, returns 0 on success
|
||||
// this function is NOT thread-safe
|
||||
MTMD_API int32_t mtmd_helper_eval_chunks(mtmd_context * ctx,
|
||||
@@ -157,13 +157,16 @@ MTMD_API int32_t mtmd_helper_video_read_next(mtmd_helper_video * ctx,
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include <set>
|
||||
#include <memory>
|
||||
|
||||
namespace mtmd_helper {
|
||||
|
||||
//
|
||||
// C++ wrappers
|
||||
//
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace mtmd_helper {
|
||||
|
||||
// video-related C++ wrappers
|
||||
struct mtmd_helper_video_deleter {
|
||||
void operator()(mtmd_helper_video * val) { mtmd_helper_video_free(val); }
|
||||
|
||||
@@ -69,8 +69,8 @@ struct mtmd_bitmap {
|
||||
return data.size();
|
||||
}
|
||||
|
||||
bool can_batch_with(const mtmd_bitmap & other) const {
|
||||
// [QWEN_VIDEO] can batch if both are images with same size
|
||||
bool can_merge_with(const mtmd_bitmap & other) const {
|
||||
// [QWEN_VIDEO] can (temporal) merge if both are images with same size
|
||||
return !is_audio && !other.is_audio && nx == other.nx && ny == other.ny;
|
||||
}
|
||||
|
||||
@@ -90,12 +90,24 @@ struct mtmd_image_tokens {
|
||||
uint32_t ny = 0; // number of tokens in y direction
|
||||
mtmd_pos_type pos = MTMD_POS_TYPE_NORMAL;
|
||||
uint32_t image_idx = 0; // 0-based position of this image among image chunks in the prompt(used by pos == MTMD_POS_TYPE_HUNYUANVL)
|
||||
uint32_t n_temporal_merge = 1; // for qwen-vl style temporal merge
|
||||
uint32_t n_tokens() const {
|
||||
if (pos == MTMD_POS_TYPE_HUNYUANVL) {
|
||||
// [BOI] [row0 tokens + newline] ... [row(ny-1) tokens + newline] [EOI]
|
||||
return (nx + 1) * ny + 2;
|
||||
}
|
||||
return nx * ny;
|
||||
// [QWEN_VIDEO] this logic is quite ugly, it's mostly to make qwen-vl temporal merge work, can be improved in the future
|
||||
if (batch_f32.entries.size() == 1 || n_temporal_merge == 1) {
|
||||
return nx * ny;
|
||||
}
|
||||
uint32_t nz = batch_f32.entries.size();
|
||||
// TODO: simplify this by repeating the last frame until it fits the temporal merge
|
||||
if (nz % n_temporal_merge != 0) {
|
||||
nz = nz / n_temporal_merge + 1;
|
||||
} else {
|
||||
nz = nz / n_temporal_merge;
|
||||
}
|
||||
return nx * ny * nz;
|
||||
}
|
||||
clip_image_f32_batch batch_f32; // preprocessed image patches
|
||||
std::string id; // optional user-defined ID, useful for KV cache tracking
|
||||
@@ -110,12 +122,17 @@ struct mtmd_image_tokens {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool can_batch_with(const mtmd_image_tokens & other) {
|
||||
return nx == other.nx && ny == other.ny && pos == other.pos;
|
||||
}
|
||||
|
||||
mtmd_image_tokens clone() {
|
||||
return mtmd_image_tokens{
|
||||
nx,
|
||||
ny,
|
||||
pos,
|
||||
image_idx,
|
||||
n_temporal_merge,
|
||||
batch_f32.clone(),
|
||||
id
|
||||
};
|
||||
@@ -153,12 +170,49 @@ struct mtmd_input_chunk {
|
||||
std::vector<llama_token> tokens_text;
|
||||
mtmd_image_tokens_ptr tokens_image;
|
||||
mtmd_audio_tokens_ptr tokens_audio;
|
||||
|
||||
bool can_batch_with(const mtmd_input_chunk & other) const {
|
||||
if (type != other.type) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tokens_image && other.tokens_image) {
|
||||
return tokens_image->can_batch_with(*other.tokens_image);
|
||||
}
|
||||
|
||||
// TODO: allow batching audio chunks of the same size
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_placeholder() const {
|
||||
if (type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
||||
return tokens_image && tokens_image->is_placeholder();
|
||||
} else if (type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
|
||||
return tokens_audio && tokens_audio->is_placeholder();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct mtmd_input_chunks {
|
||||
std::vector<mtmd_input_chunk> entries;
|
||||
};
|
||||
|
||||
struct mtmd_batch {
|
||||
mtmd_context * ctx;
|
||||
std::vector<const mtmd_input_chunk *> entries;
|
||||
std::vector<float> output_embd; // aggregated output embedding for the whole batch
|
||||
mtmd_batch(mtmd_context * ctx): ctx(ctx) {}
|
||||
int32_t n_tokens() const {
|
||||
int32_t n = 0;
|
||||
for (const auto * chunk : entries) {
|
||||
n += mtmd_input_chunk_get_n_tokens(chunk);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
};
|
||||
|
||||
// slice template, used by some llava-uhd models to correctly place the special tokens around image embeddings
|
||||
// models not having it (llava-1.6) will process embeddings without any special tokens in-between
|
||||
enum mtmd_slice_tmpl {
|
||||
@@ -197,6 +251,7 @@ mtmd_context_params mtmd_context_params_default() {
|
||||
/* image_max_tokens */ -1,
|
||||
/* cb_eval */ nullptr,
|
||||
/* cb_eval_user_data */ nullptr,
|
||||
/* batch_max_tokens */ 1024,
|
||||
};
|
||||
return params;
|
||||
}
|
||||
@@ -204,7 +259,7 @@ mtmd_context_params mtmd_context_params_default() {
|
||||
struct mtmd_context {
|
||||
struct clip_ctx * ctx_v; // vision
|
||||
struct clip_ctx * ctx_a; // audio
|
||||
std::vector<float> image_embd_v; // image embedding vector
|
||||
std::vector<float> out_embd; // image embedding vector
|
||||
|
||||
bool print_timings;
|
||||
int n_threads;
|
||||
@@ -239,17 +294,21 @@ struct mtmd_context {
|
||||
std::unique_ptr<mtmd_audio_preprocessor> audio_preproc;
|
||||
std::unique_ptr<mtmd_image_preprocessor> image_preproc;
|
||||
|
||||
// batching
|
||||
int32_t batch_max_tokens;
|
||||
|
||||
// TODO @ngxson : add timings
|
||||
|
||||
mtmd_context(const char * mmproj_fname,
|
||||
const llama_model * text_model,
|
||||
const mtmd_context_params & ctx_params,
|
||||
bool no_alloc = false) :
|
||||
print_timings(ctx_params.print_timings),
|
||||
n_threads (ctx_params.n_threads),
|
||||
media_marker (ctx_params.media_marker),
|
||||
n_embd_text (text_model ? llama_model_n_embd_inp(text_model) : -1),
|
||||
vocab (text_model ? llama_model_get_vocab(text_model) : nullptr)
|
||||
print_timings (ctx_params.print_timings),
|
||||
n_threads (ctx_params.n_threads),
|
||||
media_marker (ctx_params.media_marker),
|
||||
n_embd_text (text_model ? llama_model_n_embd_inp(text_model) : -1),
|
||||
vocab (text_model ? llama_model_get_vocab(text_model) : nullptr),
|
||||
batch_max_tokens(ctx_params.batch_max_tokens)
|
||||
{
|
||||
if (ctx_params.image_marker != nullptr) {
|
||||
throw std::runtime_error("custom image_marker is not supported anymore, use media_marker instead");
|
||||
@@ -680,6 +739,16 @@ struct mtmd_context {
|
||||
return ctx_a ? clip_get_projector_type(ctx_a) : PROJECTOR_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
int64_t n_embd_out() const {
|
||||
if (ctx_v) {
|
||||
return clip_n_mmproj_embd(ctx_v);
|
||||
} else if (ctx_a) {
|
||||
return clip_n_mmproj_embd(ctx_a);
|
||||
} else {
|
||||
throw std::runtime_error("no CLIP model loaded");
|
||||
}
|
||||
}
|
||||
|
||||
~mtmd_context() {
|
||||
clip_free(ctx_a);
|
||||
clip_free(ctx_v);
|
||||
@@ -845,7 +914,7 @@ struct mtmd_tokenizer {
|
||||
// [QWEN_VIDEO] handle frame merging for models that support it (i.e. qwen-vl)
|
||||
int n_merge_frames = 1;
|
||||
if (ctx->ctx_v) {
|
||||
n_merge_frames = clip_model_n_batch_max(ctx->ctx_v);
|
||||
n_merge_frames = clip_model_n_temporal_merge(ctx->ctx_v);
|
||||
GGML_ASSERT(n_merge_frames <= 2 && "we only support merging maximum 2 images for now; open an issue if this model supports merging more");
|
||||
}
|
||||
|
||||
@@ -860,7 +929,7 @@ struct mtmd_tokenizer {
|
||||
if (i + 1 < parts.size() && parts[i + 1].bitmap != nullptr) {
|
||||
const mtmd_bitmap * bm_a = parts[i].bitmap;
|
||||
const mtmd_bitmap * bm_b = parts[i + 1].bitmap;
|
||||
if (bm_a->can_batch_with(*bm_b)) {
|
||||
if (bm_a->can_merge_with(*bm_b)) {
|
||||
LOG_DBG("%s: merging 2 frames at part index %zu and %zu\n", __func__, i, i + 1);
|
||||
merged_bitmaps.push_back({bm_a, bm_b});
|
||||
parts.erase(parts.begin() + i + 1); // collapse the second bitmap part
|
||||
@@ -1103,13 +1172,17 @@ struct mtmd_tokenizer {
|
||||
size_t n_tokens = 0;
|
||||
for (const auto & e : batch_f32.entries) {
|
||||
n_tokens += clip_n_output_tokens(ctx->ctx_v, e.get());
|
||||
if (clip_model_n_batch_max(ctx->ctx_v) == 2) {
|
||||
if (clip_model_n_temporal_merge(ctx->ctx_v) == 2) {
|
||||
// [QWEN_VIDEO] pair input is merged to the same embd, so only count as one image
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mtmd_image_tokens_ptr image_tokens(new mtmd_image_tokens);
|
||||
|
||||
// [QWEN_VIDEO] improve this in the future
|
||||
image_tokens->n_temporal_merge = clip_model_n_temporal_merge(ctx->ctx_v);
|
||||
|
||||
if (mtmd_decode_use_mrope(ctx)) {
|
||||
// for Qwen2VL, we need this information for M-RoPE decoding positions
|
||||
image_tokens->nx = clip_n_output_tokens_x(ctx->ctx_v, batch_f32.entries[0].get());
|
||||
@@ -1327,60 +1400,18 @@ int32_t mtmd_tokenize(mtmd_context * ctx,
|
||||
}
|
||||
}
|
||||
|
||||
int32_t mtmd_encode_chunk(mtmd_context * ctx, const mtmd_input_chunk * chunk) {
|
||||
if (chunk->type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
||||
LOG_WRN("mtmd_encode_chunk has no effect for text chunks\n");
|
||||
return 0;
|
||||
} else if (chunk->type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
||||
if (!ctx->ctx_v) {
|
||||
LOG_ERR("%s: model does not support vision input\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
if (chunk->tokens_image == nullptr) {
|
||||
LOG_ERR("%s: image tokens are null\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
if (chunk->tokens_image->is_placeholder()) {
|
||||
LOG_ERR("%s: image tokens batch is placeholder\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
return mtmd_encode(ctx, chunk->tokens_image.get());
|
||||
} else if (chunk->type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
|
||||
if (!ctx->ctx_a) {
|
||||
LOG_ERR("%s: model does not support audio input\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
if (chunk->tokens_audio == nullptr) {
|
||||
LOG_ERR("%s: audio tokens are null\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
if (chunk->tokens_audio->is_placeholder()) {
|
||||
LOG_ERR("%s: audio tokens batch is placeholder\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
int n_mmproj_embd = ctx->n_embd_text;
|
||||
ctx->image_embd_v.resize(chunk->tokens_audio->n_tokens * n_mmproj_embd);
|
||||
bool ok = clip_image_batch_encode(
|
||||
ctx->ctx_a,
|
||||
ctx->n_threads,
|
||||
&chunk->tokens_audio->batch_f32,
|
||||
ctx->image_embd_v.data());
|
||||
return ok ? 0 : 1;
|
||||
}
|
||||
|
||||
LOG_ERR("%s: unknown chunk type %d\n", __func__, (int)chunk->type);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens) {
|
||||
static int32_t mtmd_encode_impl(mtmd_context * ctx, const mtmd_image_tokens * image_tokens, std::vector<float> & out_embd) {
|
||||
clip_ctx * ctx_clip = ctx->ctx_v;
|
||||
if (!ctx_clip) {
|
||||
LOG_ERR("%s: this API does not support non-vision input, please use mtmd_encode_chunk instead\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
auto proj_type = clip_get_projector_type(ctx_clip);
|
||||
int n_mmproj_embd = clip_n_mmproj_embd(ctx_clip);
|
||||
ctx->image_embd_v.resize(image_tokens->n_tokens() * n_mmproj_embd);
|
||||
|
||||
int n_embd_out = ctx->n_embd_out();
|
||||
auto n_tokens_out = image_tokens->n_tokens();
|
||||
out_embd.resize((size_t)n_embd_out * n_tokens_out);
|
||||
|
||||
bool ok = false;
|
||||
|
||||
if (clip_is_llava(ctx_clip)
|
||||
@@ -1400,12 +1431,19 @@ int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens)
|
||||
return 1;
|
||||
}
|
||||
int n_tokens_per_image = clip_n_output_tokens(ctx_clip, entries[i].get());
|
||||
ok = clip_image_encode(
|
||||
std::vector<float> tmp_embd((size_t)n_tokens_per_image * n_embd_out);
|
||||
bool ok_i = clip_image_encode(
|
||||
ctx_clip,
|
||||
ctx->n_threads,
|
||||
entries[i].get(),
|
||||
ctx->image_embd_v.data() + offset);
|
||||
offset += static_cast<size_t>(n_mmproj_embd) * n_tokens_per_image;
|
||||
tmp_embd);
|
||||
if (!ok_i) {
|
||||
LOG_ERR("%s: failed to encode image %zu\n", __func__, i);
|
||||
return 1;
|
||||
}
|
||||
ok = true;
|
||||
std::copy(tmp_embd.begin(), tmp_embd.end(), out_embd.begin() + offset);
|
||||
offset += static_cast<size_t>(n_embd_out) * n_tokens_per_image;
|
||||
}
|
||||
} else {
|
||||
if (image_tokens->is_placeholder()) {
|
||||
@@ -1416,14 +1454,206 @@ int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens)
|
||||
ctx_clip,
|
||||
ctx->n_threads,
|
||||
&image_tokens->batch_f32,
|
||||
ctx->image_embd_v.data());
|
||||
out_embd);
|
||||
}
|
||||
|
||||
return ok ? 0 : 1;
|
||||
}
|
||||
|
||||
static int32_t mtmd_encode_chunk_impl(mtmd_context * ctx, const mtmd_input_chunk * chunk, std::vector<float> & out_embd) {
|
||||
if (chunk->type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
||||
LOG_WRN("mtmd_encode_chunk has no effect for text chunks\n");
|
||||
return 0;
|
||||
} else if (chunk->type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
||||
if (!ctx->ctx_v) {
|
||||
LOG_ERR("%s: model does not support vision input\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
if (chunk->tokens_image == nullptr) {
|
||||
LOG_ERR("%s: image tokens are null\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
if (chunk->tokens_image->is_placeholder()) {
|
||||
LOG_ERR("%s: image tokens batch is placeholder\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
return mtmd_encode_impl(ctx, chunk->tokens_image.get(), out_embd);
|
||||
} else if (chunk->type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
|
||||
if (!ctx->ctx_a) {
|
||||
LOG_ERR("%s: model does not support audio input\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
if (chunk->tokens_audio == nullptr) {
|
||||
LOG_ERR("%s: audio tokens are null\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
if (chunk->tokens_audio->is_placeholder()) {
|
||||
LOG_ERR("%s: audio tokens batch is placeholder\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
int n_mmproj_embd = ctx->n_embd_out();
|
||||
out_embd.resize((size_t)chunk->tokens_audio->n_tokens * n_mmproj_embd);
|
||||
bool ok = clip_image_batch_encode(
|
||||
ctx->ctx_a,
|
||||
ctx->n_threads,
|
||||
&chunk->tokens_audio->batch_f32,
|
||||
out_embd);
|
||||
return ok ? 0 : 1;
|
||||
}
|
||||
|
||||
LOG_ERR("%s: unknown chunk type %d\n", __func__, (int)chunk->type);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int32_t mtmd_encode_chunk(mtmd_context * ctx, const mtmd_input_chunk * chunk) {
|
||||
// this is the non-batching version
|
||||
try {
|
||||
return mtmd_encode_chunk_impl(ctx, chunk, ctx->out_embd);
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("%s: error: %s\n", __func__, e.what());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens) {
|
||||
try {
|
||||
return mtmd_encode_impl(ctx, image_tokens, ctx->out_embd);
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("%s: error: %s\n", __func__, e.what());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
float * mtmd_get_output_embd(mtmd_context * ctx) {
|
||||
return ctx->image_embd_v.data();
|
||||
return ctx->out_embd.data();
|
||||
}
|
||||
|
||||
mtmd_batch * mtmd_batch_init(mtmd_context * ctx) {
|
||||
return new mtmd_batch(ctx);
|
||||
}
|
||||
|
||||
void mtmd_batch_free(mtmd_batch * batch) {
|
||||
if (batch) {
|
||||
delete batch;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t mtmd_batch_add_chunk(mtmd_batch * batch, const mtmd_input_chunk * chunk) {
|
||||
if (chunk->type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
||||
LOG_ERR("%s: text chunk is not supported in batch\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto * ctx = batch->ctx->get_clip_ctx(chunk);
|
||||
if (!ctx) {
|
||||
LOG_ERR("%s: model does not support input chunk type %d\n", __func__, (int)chunk->type);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (batch->entries.empty()) {
|
||||
// batch must have at least one chunk
|
||||
batch->entries.push_back(chunk);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!clip_support_batch(ctx)) {
|
||||
// if no batching support, batch can only have one single chunk
|
||||
return 2; // "batch too large" error code
|
||||
}
|
||||
|
||||
int32_t new_n_tokens = batch->n_tokens() + (int32_t)mtmd_input_chunk_get_n_tokens(chunk);
|
||||
if (new_n_tokens > batch->ctx->batch_max_tokens) {
|
||||
return 2; // "batch too large" error code
|
||||
}
|
||||
|
||||
auto & first_chunk = batch->entries[0];
|
||||
if (first_chunk->can_batch_with(*chunk)) {
|
||||
batch->entries.push_back(chunk);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 3; // "cannot batch" error code
|
||||
}
|
||||
|
||||
static int32_t mtmd_batch_encode_impl(mtmd_batch * batch) {
|
||||
if (batch->entries.empty()) {
|
||||
LOG_ERR("%s: batch is empty\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
for (const auto * chunk : batch->entries) {
|
||||
if (chunk->is_placeholder()) {
|
||||
LOG_ERR("%s: chunk is placeholder\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// represent the whole batch as one single chunk
|
||||
mtmd::input_chunk_ptr batch_chunk(mtmd_input_chunk_copy(batch->entries[0]));
|
||||
if (batch_chunk->tokens_image) {
|
||||
auto & b0_f32 = batch_chunk->tokens_image->batch_f32;
|
||||
// copy all entries from other chunks into the first chunk's batch_f32
|
||||
// note: skip first entry because it's already in batch_chunk
|
||||
for (size_t ic = 1; ic < batch->entries.size(); ic++) {
|
||||
auto & chunk = batch->entries[ic];
|
||||
GGML_ASSERT(chunk->tokens_image);
|
||||
auto b1_f32 = chunk->tokens_image->batch_f32.clone();
|
||||
for (size_t i = 0; i < b1_f32.entries.size(); i++) {
|
||||
b0_f32.entries.push_back(std::move(b1_f32.entries[i]));
|
||||
}
|
||||
}
|
||||
} else if (batch_chunk->tokens_audio) {
|
||||
auto & b0_f32 = batch_chunk->tokens_audio->batch_f32;
|
||||
// copy all entries from other chunks into the first chunk's batch_f32
|
||||
// note: skip first entry because it's already in batch_chunk
|
||||
for (size_t ic = 1; ic < batch->entries.size(); ic++) {
|
||||
auto & chunk = batch->entries[ic];
|
||||
GGML_ASSERT(chunk->tokens_audio);
|
||||
auto b1_f32 = chunk->tokens_audio->batch_f32.clone();
|
||||
for (size_t i = 0; i < b1_f32.entries.size(); i++) {
|
||||
b0_f32.entries.push_back(std::move(b1_f32.entries[i]));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LOG_ERR("%s: unsupported chunk type\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
LOG_DBG("%s: encoding batch with %zu entries and total %zu tokens\n",
|
||||
__func__, batch->entries.size(), mtmd_input_chunk_get_n_tokens(batch_chunk.get()));
|
||||
int32_t res = mtmd_encode_chunk_impl(
|
||||
batch->ctx,
|
||||
batch_chunk.get(),
|
||||
batch->output_embd);
|
||||
return res;
|
||||
}
|
||||
|
||||
int32_t mtmd_batch_encode(mtmd_batch * batch) {
|
||||
try {
|
||||
return mtmd_batch_encode_impl(batch);
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("%s: error: %s\n", __func__, e.what());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
float * mtmd_batch_get_output_embd(mtmd_batch * batch, const mtmd_input_chunk * chunk) {
|
||||
if (batch->output_embd.empty()) {
|
||||
LOG_ERR("%s: batch has not been encoded yet\n", __func__);
|
||||
return nullptr;
|
||||
}
|
||||
size_t offset = 0;
|
||||
const size_t n_embd = batch->ctx->n_embd_out();
|
||||
for (const auto * c : batch->entries) {
|
||||
size_t offset_prev = offset;
|
||||
size_t n_tokens = mtmd_input_chunk_get_n_tokens(c);
|
||||
offset += n_tokens * n_embd;
|
||||
GGML_ASSERT(offset_prev < batch->output_embd.size());
|
||||
GGML_ASSERT(offset <= batch->output_embd.size());
|
||||
if (c == chunk) {
|
||||
return &batch->output_embd.data()[offset_prev];
|
||||
}
|
||||
}
|
||||
return nullptr; // not found
|
||||
}
|
||||
|
||||
bool mtmd_decode_use_non_causal(const mtmd_context * ctx, const mtmd_input_chunk * chunk) {
|
||||
@@ -1801,7 +2031,7 @@ static void mtmd_debug_encode_impl(mtmd_context * ctx, clip_ctx * ctx_clip, clip
|
||||
ctx_clip,
|
||||
ctx->n_threads,
|
||||
&image,
|
||||
embd_output.data());
|
||||
embd_output);
|
||||
if (!ok) {
|
||||
LOG_ERR("%s: failed to encode image\n", __func__);
|
||||
}
|
||||
|
||||
@@ -63,6 +63,7 @@ struct mtmd_bitmap;
|
||||
struct mtmd_image_tokens;
|
||||
struct mtmd_input_chunk;
|
||||
struct mtmd_input_chunks;
|
||||
struct mtmd_batch;
|
||||
|
||||
struct mtmd_input_text {
|
||||
const char * text;
|
||||
@@ -80,6 +81,7 @@ typedef struct mtmd_image_tokens mtmd_image_tokens;
|
||||
typedef struct mtmd_input_chunk mtmd_input_chunk;
|
||||
typedef struct mtmd_input_chunks mtmd_input_chunks;
|
||||
typedef struct mtmd_input_text mtmd_input_text;
|
||||
typedef struct mtmd_batch mtmd_batch;
|
||||
|
||||
struct mtmd_context_params {
|
||||
bool use_gpu;
|
||||
@@ -97,6 +99,11 @@ struct mtmd_context_params {
|
||||
// callback function passed over to mtmd proper
|
||||
ggml_backend_sched_eval_callback cb_eval;
|
||||
void * cb_eval_user_data;
|
||||
|
||||
// batching params
|
||||
int32_t batch_max_tokens; // maximum number of output tokens in a batch
|
||||
// (note: this is not a hard-limit, the first image will always be added even if it exceeds this limit)
|
||||
// (default: 1024)
|
||||
};
|
||||
|
||||
MTMD_API const char * mtmd_default_marker(void);
|
||||
@@ -265,12 +272,12 @@ MTMD_API int32_t mtmd_tokenize(mtmd_context * ctx,
|
||||
const mtmd_bitmap ** bitmaps,
|
||||
size_t n_bitmaps);
|
||||
|
||||
// returns 0 on success
|
||||
// TODO: deprecate
|
||||
MTMD_API int32_t mtmd_encode(mtmd_context * ctx,
|
||||
const mtmd_image_tokens * image_tokens);
|
||||
DEPRECATED(MTMD_API int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens),
|
||||
"use mtmd_encode_chunk() instead");
|
||||
|
||||
// text chunk will be ignored silently, only media chunk will be encoded
|
||||
// returns 0 on success
|
||||
// returns 1 on generic error
|
||||
MTMD_API int32_t mtmd_encode_chunk(mtmd_context * ctx,
|
||||
const mtmd_input_chunk * chunk);
|
||||
|
||||
@@ -279,6 +286,26 @@ MTMD_API int32_t mtmd_encode_chunk(mtmd_context * ctx,
|
||||
// llama_model_n_embd_inp(model) * mtmd_input_chunk_get_n_tokens(chunk) * sizeof(float)
|
||||
MTMD_API float * mtmd_get_output_embd(mtmd_context * ctx);
|
||||
|
||||
|
||||
// batch encoding API
|
||||
// chunks are not owned by the batch, they will not be freed by mtmd_batch_free()
|
||||
// batch is valid for a given context, cannot be shared across contexts
|
||||
MTMD_API mtmd_batch * mtmd_batch_init(mtmd_context * ctx);
|
||||
MTMD_API void mtmd_batch_free(mtmd_batch * batch);
|
||||
|
||||
// only media chunks are allowed, text chunks will be rejected
|
||||
// returns 0 on success
|
||||
// returns 1 on generic error
|
||||
// returns 2 if the batch is too large (chunk won't be added)
|
||||
// returns 3 if it cannot be batched with the existing chunks in the batch
|
||||
MTMD_API int32_t mtmd_batch_add_chunk(mtmd_batch * batch, const mtmd_input_chunk * chunk);
|
||||
|
||||
// returns 0 on success
|
||||
// returns 1 on generic error
|
||||
MTMD_API int32_t mtmd_batch_encode(mtmd_batch * batch);
|
||||
MTMD_API float * mtmd_batch_get_output_embd(mtmd_batch * batch, const mtmd_input_chunk * chunk);
|
||||
|
||||
|
||||
// Set callback for all future logging events.
|
||||
// If this is not called, or NULL is supplied, everything is output on stderr.
|
||||
MTMD_API void mtmd_log_set(ggml_log_callback log_callback, void * user_data);
|
||||
@@ -336,6 +363,11 @@ struct mtmd_input_chunk_deleter {
|
||||
};
|
||||
using input_chunk_ptr = std::unique_ptr<mtmd_input_chunk, mtmd_input_chunk_deleter>;
|
||||
|
||||
struct mtmd_batch_deleter {
|
||||
void operator()(mtmd_batch * val) { mtmd_batch_free(val); }
|
||||
};
|
||||
using batch_ptr = std::unique_ptr<mtmd_batch, mtmd_batch_deleter>;
|
||||
|
||||
struct bitmap {
|
||||
bitmap_ptr ptr;
|
||||
bitmap() : ptr(nullptr) {}
|
||||
|
||||
@@ -344,6 +344,14 @@ const mtmd::input_chunk_ptr & server_tokens::find_chunk(size_t idx) const {
|
||||
throw std::runtime_error("Chunk not found");
|
||||
}
|
||||
|
||||
std::pair<const mtmd::input_chunk_ptr *, size_t> server_tokens::find_next_media_chunk(size_t idx) const {
|
||||
auto it = map_idx_to_media.upper_bound(idx);
|
||||
if (it != map_idx_to_media.end()) {
|
||||
return { &it->second, it->first };
|
||||
}
|
||||
return { nullptr, 0 };
|
||||
}
|
||||
|
||||
void server_tokens::push_back(llama_token tok) {
|
||||
if (tok == LLAMA_TOKEN_NULL) {
|
||||
throw std::runtime_error("Invalid token");
|
||||
|
||||
@@ -180,6 +180,10 @@ public:
|
||||
|
||||
const mtmd::input_chunk_ptr & find_chunk(size_t idx) const;
|
||||
|
||||
// find next media chunk after idx
|
||||
// returns a pair of pointer to the chunk (nullptr if not found) and its start index in tokens
|
||||
std::pair<const mtmd::input_chunk_ptr *, size_t> find_next_media_chunk(size_t idx) const;
|
||||
|
||||
void push_back(llama_token tok);
|
||||
|
||||
// will create a copy of the chunk if it contains non-text data
|
||||
|
||||
@@ -80,6 +80,8 @@ struct server_slot {
|
||||
|
||||
// multimodal
|
||||
mtmd_context * mctx = nullptr;
|
||||
mtmd::batch_ptr mbatch = nullptr;
|
||||
std::array<llama_context *, 2> mtgt = {nullptr, nullptr}; // [0] for main context, [1] for optional draft context
|
||||
|
||||
// speculative decoding
|
||||
common_speculative * spec;
|
||||
@@ -239,6 +241,18 @@ struct server_slot {
|
||||
|
||||
// clear alora start
|
||||
alora_invocation_start = -1;
|
||||
|
||||
// clear multimodal state
|
||||
mbatch.reset();
|
||||
mtgt[0] = ctx_tgt;
|
||||
mtgt[1] = nullptr;
|
||||
if (ctx_dft && llama_get_ctx_other(ctx_dft) != ctx_tgt) {
|
||||
// TODO: in the future, figure out how to infuse target embeddings to the images
|
||||
// for now, we re-decode the same chunk in both ctx_tgt and ctx_dft
|
||||
// maybe we simply need to call `common_speculative_process()` ?
|
||||
// [TAG_MTMD_DRAFT_PROCESSING]
|
||||
mtgt[1] = ctx_dft;
|
||||
}
|
||||
}
|
||||
|
||||
void init_sampler() const {
|
||||
@@ -578,6 +592,87 @@ struct server_slot {
|
||||
other.prompt = prompt.clone();
|
||||
other.init_sampler();
|
||||
}
|
||||
|
||||
// returns 0 on success
|
||||
// caller need to update prompt.tokens after a successful call to keep track of the processing progress
|
||||
int process_mtmd_chunk(size_t idx, size_t & n_tokens_out) {
|
||||
GGML_ASSERT(mctx);
|
||||
const auto & input_tokens = task->tokens;
|
||||
auto & chunk = input_tokens.find_chunk(idx);
|
||||
int32_t res = 0;
|
||||
|
||||
auto try_decode = [&]() -> int32_t {
|
||||
if (mbatch) {
|
||||
float * embd = mtmd_batch_get_output_embd(mbatch.get(), chunk.get());
|
||||
if (embd) {
|
||||
for (auto * lctx : mtgt) {
|
||||
if (lctx == nullptr) {
|
||||
continue;
|
||||
}
|
||||
llama_pos new_n_past; // unused for now
|
||||
res = mtmd_helper_decode_image_chunk(
|
||||
mctx,
|
||||
lctx,
|
||||
chunk.get(),
|
||||
embd,
|
||||
prompt.tokens.pos_next(),
|
||||
id,
|
||||
llama_n_batch(lctx),
|
||||
&new_n_past
|
||||
);
|
||||
if (res != 0) {
|
||||
SLT_ERR(*this, "failed to decode mtmd chunk, idx = %zu, res = %d\n", idx, res);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
n_tokens_out = mtmd_input_chunk_get_n_tokens(chunk.get());
|
||||
return 0; // success
|
||||
}
|
||||
}
|
||||
return 1; // (non-error) need to create & encode batch
|
||||
};
|
||||
|
||||
// if the batch is already exist, try searching & encode
|
||||
res = try_decode();
|
||||
if (res == 0) {
|
||||
return 0;
|
||||
} else if (res < 0) {
|
||||
// fatal error
|
||||
return res;
|
||||
}
|
||||
|
||||
// otherwise, the batch is either uninitialized or is used up
|
||||
// we need to create & encode a new batch
|
||||
mbatch.reset(mtmd_batch_init(mctx));
|
||||
res = mtmd_batch_add_chunk(mbatch.get(), chunk.get());
|
||||
GGML_ASSERT(res == 0); // we should never have an empty batch
|
||||
|
||||
// try batching as much as possible
|
||||
int n_added = 1;
|
||||
size_t idx_cur = idx;
|
||||
while (res == 0) {
|
||||
auto [next_chunk, next_idx] = input_tokens.find_next_media_chunk(idx_cur);
|
||||
if (next_chunk == nullptr) {
|
||||
break;
|
||||
}
|
||||
res = mtmd_batch_add_chunk(mbatch.get(), next_chunk->get());
|
||||
n_added += (res == 0 ? 1 : 0);
|
||||
idx_cur = next_idx;
|
||||
SLT_DBG(*this, "try adding media chunk idx = %zu to batch, res = %d\n", next_idx, res);
|
||||
// if res != 0, batch is full or chunk is not compatible -> this loop breaks
|
||||
}
|
||||
|
||||
// TODO @ngxson : move this log line to debug when it become more stable
|
||||
SLT_INF(*this, "encoding mtmd batch from idx = %zu, n_chunks = %d\n", idx, n_added);
|
||||
|
||||
res = mtmd_batch_encode(mbatch.get());
|
||||
if (res != 0) {
|
||||
SLT_ERR(*this, "failed to encode mtmd batch for chunk idx = %zu, res = %d\n", idx, res);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return try_decode();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -781,6 +876,7 @@ private:
|
||||
mparams.warmup = params_base.warmup;
|
||||
mparams.image_min_tokens = params_base.image_min_tokens;
|
||||
mparams.image_max_tokens = params_base.image_max_tokens;
|
||||
mparams.batch_max_tokens = params_base.mtmd_batch_max_tokens;
|
||||
mparams.media_marker = get_media_marker();
|
||||
}
|
||||
|
||||
@@ -866,10 +962,7 @@ private:
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < devs.size(); ++j) {
|
||||
const size_t bytes =
|
||||
(measure_model_bytes ? dmd[j].mb.model : 0) +
|
||||
dmd[j].mb.context +
|
||||
dmd[j].mb.compute;
|
||||
const size_t bytes = (measure_model_bytes ? dmd[j].model : 0) + dmd[j].context + dmd[j].compute;
|
||||
total += bytes;
|
||||
for (size_t i = 0; i < tgt_devices.size(); i++) {
|
||||
if (tgt_devices[i] == devs[j]) {
|
||||
@@ -2928,7 +3021,7 @@ private:
|
||||
send_partial_response(slot, {}, false, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end of SLOT_STATE_STARTED
|
||||
|
||||
if (!slot.can_split()) {
|
||||
// cannot fit the prompt in the current batch - will try next iter
|
||||
@@ -2983,10 +3076,18 @@ private:
|
||||
bool has_mtmd = false;
|
||||
|
||||
// check if we should process the image
|
||||
while (slot.prompt.n_tokens() < slot.task->n_tokens() && input_tokens[slot.prompt.n_tokens()] == LLAMA_TOKEN_NULL) {
|
||||
while (true) {
|
||||
auto cur_token_idx = slot.prompt.n_tokens();
|
||||
if (
|
||||
cur_token_idx >= slot.task->n_tokens() ||
|
||||
input_tokens[cur_token_idx] != LLAMA_TOKEN_NULL // encountered a text token
|
||||
) {
|
||||
break;
|
||||
}
|
||||
|
||||
// process the image
|
||||
size_t n_tokens_out = 0;
|
||||
int32_t res = input_tokens.process_chunk(ctx_tgt, mctx, slot.prompt.n_tokens(), slot.prompt.tokens.pos_next(), slot.id, n_tokens_out);
|
||||
int32_t res = slot.process_mtmd_chunk(cur_token_idx, n_tokens_out);
|
||||
if (res != 0) {
|
||||
SLT_ERR(slot, "failed to process image, res = %d\n", res);
|
||||
send_error(slot, "failed to process image", ERROR_TYPE_SERVER);
|
||||
@@ -2994,22 +3095,11 @@ private:
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ctx_dft && llama_get_ctx_other(ctx_dft.get()) != ctx_tgt) {
|
||||
// TODO: in the future, figure out how to infuse target embeddings to the images
|
||||
// for now, we skip this for simplicity
|
||||
// maybe we simply need to call `common_speculative_process()` on the mtmd batches in the `process_chunk` above?
|
||||
// [TAG_MTMD_DRAFT_PROCESSING]
|
||||
res = input_tokens.process_chunk(ctx_dft.get(), mctx, slot.prompt.n_tokens(), slot.prompt.tokens.pos_next(), slot.id, n_tokens_out);
|
||||
if (res != 0) {
|
||||
GGML_ABORT("failed to process multi-modal data on draft context\n");
|
||||
}
|
||||
}
|
||||
|
||||
slot.n_prompt_tokens_processed += n_tokens_out;
|
||||
|
||||
// add the image chunk to cache
|
||||
{
|
||||
const auto & chunk = input_tokens.find_chunk(slot.prompt.n_tokens());
|
||||
const auto & chunk = input_tokens.find_chunk(cur_token_idx);
|
||||
slot.prompt.tokens.push_back(chunk.get()); // copy
|
||||
}
|
||||
|
||||
|
||||
@@ -113,7 +113,7 @@ bool server_http_context::init(const common_params & params) {
|
||||
#endif
|
||||
|
||||
srv->set_default_headers({{"Server", "llama.cpp"}});
|
||||
srv->set_logger(log_server_request);
|
||||
// srv->set_logger(log_server_request); // TODO @ngxson : this is too spamy, no very useful; improve it in the future
|
||||
srv->set_exception_handler([](const httplib::Request &, httplib::Response & res, const std::exception_ptr & ep) {
|
||||
// this is fail-safe; exceptions should already handled by `ex_wrapper`
|
||||
|
||||
@@ -169,91 +169,21 @@ bool server_http_context::init(const common_params & params) {
|
||||
SRV_INF("api_keys: %zu keys loaded\n", params.api_keys.size());
|
||||
}
|
||||
|
||||
//
|
||||
// Helper: Generate iOS splash screen paths from device dimensions
|
||||
// This centralizes PWA asset paths to avoid duplication across CMake, C++, and TypeScript.
|
||||
// Source of truth: tools/ui/src/lib/constants/pwa.ts (APPLE_DEVICES)
|
||||
//
|
||||
auto generate_splash_endpoints = []() -> std::vector<std::string> {
|
||||
// Apple device dimensions (width x height) with orientation and color scheme
|
||||
// Format: "orientation-dimension1xdimension2.png" or "orientation-dark-dimension1xdimension2.png"
|
||||
// Based on https://developer.apple.com/design/human-interface-guidelines/app-icons
|
||||
static const std::vector<std::pair<std::string, std::string>> splash_specs = {
|
||||
// Portrait screens (light)
|
||||
{"portrait", "640x1136"}, {"portrait", "750x1334"},
|
||||
{"portrait", "1170x2532"}, {"portrait", "1179x2556"},
|
||||
{"portrait", "1206x2622"}, {"portrait", "1284x2778"},
|
||||
{"portrait", "1290x2796"}, {"portrait", "1320x2868"},
|
||||
{"portrait", "1488x2266"}, {"portrait", "1640x2360"},
|
||||
{"portrait", "1668x2388"}, {"portrait", "2048x2732"},
|
||||
// Landscape screens (light) - dimensions swapped
|
||||
{"landscape", "1136x640"}, {"landscape", "1334x750"},
|
||||
{"landscape", "2532x1170"}, {"landscape", "2556x1179"},
|
||||
{"landscape", "2622x1206"}, {"landscape", "2778x1284"},
|
||||
{"landscape", "2796x1290"}, {"landscape", "2868x1320"},
|
||||
{"landscape", "2266x1488"}, {"landscape", "2360x1640"},
|
||||
{"landscape", "2388x1668"}, {"landscape", "2732x2048"},
|
||||
// Portrait screens (dark)
|
||||
{"portrait-dark", "640x1136"}, {"portrait-dark", "750x1334"},
|
||||
{"portrait-dark", "1170x2532"}, {"portrait-dark", "1179x2556"},
|
||||
{"portrait-dark", "1206x2622"}, {"portrait-dark", "1284x2778"},
|
||||
{"portrait-dark", "1290x2796"}, {"portrait-dark", "1320x2868"},
|
||||
{"portrait-dark", "1488x2266"}, {"portrait-dark", "1640x2360"},
|
||||
{"portrait-dark", "1668x2388"}, {"portrait-dark", "2048x2732"},
|
||||
// Landscape screens (dark)
|
||||
{"landscape-dark", "1136x640"}, {"landscape-dark", "1334x750"},
|
||||
{"landscape-dark", "2532x1170"}, {"landscape-dark", "2556x1179"},
|
||||
{"landscape-dark", "2622x1206"}, {"landscape-dark", "2778x1284"},
|
||||
{"landscape-dark", "2796x1290"}, {"landscape-dark", "2868x1320"},
|
||||
{"landscape-dark", "2266x1488"}, {"landscape-dark", "2360x1640"},
|
||||
{"landscape-dark", "2388x1668"}, {"landscape-dark", "2732x2048"}
|
||||
};
|
||||
|
||||
std::vector<std::string> endpoints;
|
||||
endpoints.reserve(splash_specs.size());
|
||||
for (const auto & [orientation, dimensions] : splash_specs) {
|
||||
endpoints.push_back("/apple-splash-" + orientation + "-" + dimensions + ".png");
|
||||
}
|
||||
return endpoints;
|
||||
};
|
||||
|
||||
//
|
||||
// Middlewares
|
||||
//
|
||||
|
||||
// Public endpoints list - includes health, UI, and PWA assets
|
||||
// Source of truth for splash screen paths: tools/ui/src/lib/constants/pwa.ts (APPLE_DEVICES)
|
||||
static const std::unordered_set<std::string> get_public_endpoints = [generate_splash_endpoints]() {
|
||||
// Public endpoints - API routes plus all embedded UI assets
|
||||
static const std::unordered_set<std::string> get_public_endpoints = []() {
|
||||
std::unordered_set<std::string> endpoints {
|
||||
"/health",
|
||||
"/v1/health",
|
||||
"/models",
|
||||
"/v1/models",
|
||||
"/",
|
||||
"/index.html",
|
||||
// PWA assets
|
||||
"/favicon.ico",
|
||||
"/favicon-dark.ico",
|
||||
"/favicon.svg",
|
||||
"/favicon-dark.svg",
|
||||
"/pwa-64x64.png",
|
||||
"/pwa-192x192.png",
|
||||
"/pwa-512x512.png",
|
||||
"/maskable-icon-512x512.png",
|
||||
"/apple-touch-icon-180x180.png",
|
||||
// iOS splash screens (generated from APPLE_DEVICES in TypeScript)
|
||||
// PWA runtime files
|
||||
"/manifest.webmanifest",
|
||||
"/sw.js",
|
||||
"/version.json",
|
||||
"/workbox-<hash>.js",
|
||||
"/_app/version.json",
|
||||
"/build.json"
|
||||
};
|
||||
// Add all splash screen endpoints
|
||||
auto splash = generate_splash_endpoints();
|
||||
for (const auto & path : splash) {
|
||||
endpoints.insert(path);
|
||||
for (const llama_ui_asset & a : llama_ui_get_assets()) {
|
||||
endpoints.insert("/" + a.name);
|
||||
}
|
||||
return endpoints;
|
||||
}();
|
||||
@@ -264,13 +194,8 @@ bool server_http_context::init(const common_params & params) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If path is public or static file, skip validation
|
||||
if (get_public_endpoints.find(req.path) != get_public_endpoints.end()) {
|
||||
return true;
|
||||
}
|
||||
// Static assets (_app/ files, workbox runtime). These are embedded at build time
|
||||
// so no API key is needed — browsers fetch them directly.
|
||||
if (req.path.find("/_app/") == 0 || req.path.find("/workbox-") == 0) {
|
||||
// If path is public or a UI asset, skip validation
|
||||
if (get_public_endpoints.count(req.path)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -394,151 +319,62 @@ bool server_http_context::init(const common_params & params) {
|
||||
}
|
||||
} else {
|
||||
#if defined(LLAMA_UI_HAS_ASSETS)
|
||||
// Embedded assets are immutable — cache aggressively for PWA/sw offline support.
|
||||
// PWA runtime files (sw.js, manifest, version.json) use no-cache for revalidation.
|
||||
// Bundle files use Vite content hashes (bundle.<hash>.js/css) so each build
|
||||
// produces a different filename — browsers naturally get a fresh copy on upgrade.
|
||||
auto serve_asset_cached = [](const std::string & name, const char * mime, bool with_isolation_headers) {
|
||||
return [name, mime, with_isolation_headers](const httplib::Request & req, httplib::Response & res) {
|
||||
const llama_ui_asset * a = llama_ui_find_asset(name.c_str());
|
||||
if (!a) {
|
||||
res.status = 404;
|
||||
return false;
|
||||
}
|
||||
auto serve_asset_cached = [](const std::string & name, bool isolation) {
|
||||
return [name, isolation](const httplib::Request & req, httplib::Response & res) {
|
||||
const llama_ui_asset * a = llama_ui_find_asset(name);
|
||||
if (!a) { res.status = 404; return false; }
|
||||
res.set_header("ETag", a->etag);
|
||||
// Check If-None-Match for conditional GET (304 Not Modified)
|
||||
if (const std::string & inm = req.get_header_value("If-None-Match");
|
||||
!inm.empty() && (inm == a->etag || inm == std::string("W/") + a->etag)) {
|
||||
res.status = 304;
|
||||
return false;
|
||||
}
|
||||
if (with_isolation_headers) {
|
||||
// COEP and COOP headers, required by pyodide (python interpreter)
|
||||
if (isolation) {
|
||||
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
|
||||
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
|
||||
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
|
||||
}
|
||||
res.set_header("Cache-Control", "public, max-age=31536000, immutable");
|
||||
res.set_content(reinterpret_cast<const char*>(a->data), a->size, mime);
|
||||
res.set_content(reinterpret_cast<const char*>(a->data), a->size, a->type.c_str());
|
||||
return false;
|
||||
};
|
||||
};
|
||||
|
||||
auto serve_asset_nocache = [](const std::string & name, const char * mime, bool with_isolation_headers) {
|
||||
return [name, mime, with_isolation_headers](const httplib::Request & /*req*/, httplib::Response & res) {
|
||||
const llama_ui_asset * a = llama_ui_find_asset(name.c_str());
|
||||
auto serve_asset_nocache = [](const std::string & name) {
|
||||
return [name](const httplib::Request & /*req*/, httplib::Response & res) {
|
||||
const llama_ui_asset * a = llama_ui_find_asset(name);
|
||||
if (!a) {
|
||||
res.status = 404;
|
||||
return false;
|
||||
}
|
||||
if (with_isolation_headers) {
|
||||
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
|
||||
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
|
||||
}
|
||||
res.set_header("Cache-Control", "no-cache");
|
||||
res.set_content(reinterpret_cast<const char*>(a->data), a->size, mime);
|
||||
res.set_content(reinterpret_cast<const char*>(a->data), a->size, a->type.c_str());
|
||||
return false;
|
||||
};
|
||||
};
|
||||
|
||||
// Bundle files in _app/immutable/ — SvelteKit outputs them here and index.html
|
||||
// and sw.js reference them via these paths (vanilla build, no plugin).
|
||||
auto serve_bundle = [serve_asset_cached](const httplib::Request & req, httplib::Response & res) {
|
||||
std::string path = req.path;
|
||||
std::string name;
|
||||
const char * mime;
|
||||
if (path.rfind("/_app/immutable/bundle.", 0) == 0 && path.size() > 22) {
|
||||
name = path.substr(1); // strip leading /
|
||||
mime = "application/javascript; charset=utf-8";
|
||||
} else if (path.rfind("/_app/immutable/assets/bundle.", 0) == 0 && path.size() > 30) {
|
||||
name = path.substr(1); // strip leading /
|
||||
mime = "text/css; charset=utf-8";
|
||||
// main index file
|
||||
srv->Get(params.api_prefix + "/", serve_asset_cached("index.html", true));
|
||||
srv->Get(params.api_prefix + "/index.html", serve_asset_cached("index.html", true));
|
||||
|
||||
// All remaining assets registered directly from the embedded asset table.
|
||||
// PWA revalidation files (sw.js, manifest, version.json) use no-cache;
|
||||
// everything else is immutable.
|
||||
static const std::unordered_set<std::string> no_cache_names = {
|
||||
"sw.js",
|
||||
"manifest.webmanifest",
|
||||
"_app/version.json",
|
||||
"build.json"
|
||||
};
|
||||
|
||||
for (const auto & a : llama_ui_get_assets()) {
|
||||
if (a.name == "index.html") continue; // served at "/" and "/index.html" above
|
||||
if (no_cache_names.count(a.name)) {
|
||||
SRV_DBG("serve nocache for %s\n", a.name.c_str());
|
||||
srv->Get(params.api_prefix + "/" + a.name, serve_asset_nocache(a.name));
|
||||
} else {
|
||||
res.status = 404;
|
||||
return false;
|
||||
srv->Get(params.api_prefix + "/" + a.name, serve_asset_cached(a.name, false));
|
||||
}
|
||||
return serve_asset_cached(name, mime, false)(req, res);
|
||||
};
|
||||
|
||||
// _app/ paths — vanilla SvelteKit output, index.html and sw.js reference
|
||||
// bundles and version.json here directly.
|
||||
srv->Get(params.api_prefix + R"(/_app/immutable/bundle\.[^/]+\.js)", serve_bundle);
|
||||
srv->Get(params.api_prefix + R"(/_app/immutable/assets/bundle\.[^/]+\.css)", serve_bundle);
|
||||
srv->Get(params.api_prefix + "/_app/version.json", serve_asset_cached("_app/version.json", "application/json; charset=utf-8", false));
|
||||
|
||||
auto serve_workbox = [serve_asset_cached](const httplib::Request & req, httplib::Response & res) {
|
||||
std::string name = req.path.substr(1);
|
||||
if (name.rfind("workbox-", 0) == 0 && name.size() > 10) {
|
||||
return serve_asset_cached(name, "application/javascript; charset=utf-8", false)(req, res);
|
||||
}
|
||||
res.status = 404;
|
||||
return false;
|
||||
};
|
||||
srv->Get(params.api_prefix + R"(/workbox-[^/]+\.js)", serve_workbox);
|
||||
srv->Get(params.api_prefix + R"(/sw\.js)", serve_asset_cached("sw.js", "application/javascript; charset=utf-8", false));
|
||||
srv->Get(params.api_prefix + "/manifest.webmanifest", serve_asset_cached("manifest.webmanifest", "application/manifest+json; charset=utf-8", false));
|
||||
srv->Get(params.api_prefix + "/version.json", serve_asset_cached("_app/version.json", "application/json; charset=utf-8", false));
|
||||
srv->Get(params.api_prefix + "/build.json", serve_asset_cached("build.json", "application/json; charset=utf-8", false));
|
||||
|
||||
// Finally serve index.html for all other routes (SPA fallback)
|
||||
srv->Get(params.api_prefix + "/", serve_asset_cached("index.html", "text/html; charset=utf-8", true));
|
||||
srv->Get(params.api_prefix + "/favicon.ico", serve_asset_cached("favicon.ico", "image/x-icon", false));
|
||||
srv->Get(params.api_prefix + "/favicon-dark.ico", serve_asset_cached("favicon-dark.ico", "image/x-icon", false));
|
||||
srv->Get(params.api_prefix + "/favicon.svg", serve_asset_cached("favicon.svg", "image/svg+xml", false));
|
||||
srv->Get(params.api_prefix + "/favicon-dark.svg", serve_asset_cached("favicon-dark.svg", "image/svg+xml", false));
|
||||
srv->Get(params.api_prefix + "/pwa-64x64.png", serve_asset_cached("pwa-64x64.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/pwa-192x192.png", serve_asset_cached("pwa-192x192.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/pwa-512x512.png", serve_asset_cached("pwa-512x512.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/maskable-icon-512x512.png", serve_asset_cached("maskable-icon-512x512.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-touch-icon-180x180.png", serve_asset_cached("apple-touch-icon-180x180.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-640x1136.png", serve_asset_cached("apple-splash-portrait-640x1136.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-1136x640.png", serve_asset_cached("apple-splash-landscape-1136x640.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-750x1334.png", serve_asset_cached("apple-splash-portrait-750x1334.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-1334x750.png", serve_asset_cached("apple-splash-landscape-1334x750.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-1170x2532.png", serve_asset_cached("apple-splash-portrait-1170x2532.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-2532x1170.png", serve_asset_cached("apple-splash-landscape-2532x1170.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-1179x2556.png", serve_asset_cached("apple-splash-portrait-1179x2556.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-2556x1179.png", serve_asset_cached("apple-splash-landscape-2556x1179.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-1206x2622.png", serve_asset_cached("apple-splash-portrait-1206x2622.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-2622x1206.png", serve_asset_cached("apple-splash-landscape-2622x1206.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-1284x2778.png", serve_asset_cached("apple-splash-portrait-1284x2778.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-2778x1284.png", serve_asset_cached("apple-splash-landscape-2778x1284.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-1290x2796.png", serve_asset_cached("apple-splash-portrait-1290x2796.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-2796x1290.png", serve_asset_cached("apple-splash-landscape-2796x1290.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-1320x2868.png", serve_asset_cached("apple-splash-portrait-1320x2868.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-2868x1320.png", serve_asset_cached("apple-splash-landscape-2868x1320.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-1488x2266.png", serve_asset_cached("apple-splash-portrait-1488x2266.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-2266x1488.png", serve_asset_cached("apple-splash-landscape-2266x1488.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-1640x2360.png", serve_asset_cached("apple-splash-portrait-1640x2360.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-2360x1640.png", serve_asset_cached("apple-splash-landscape-2360x1640.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-1668x2388.png", serve_asset_cached("apple-splash-portrait-1668x2388.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-2388x1668.png", serve_asset_cached("apple-splash-landscape-2388x1668.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-2048x2732.png", serve_asset_cached("apple-splash-portrait-2048x2732.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-2732x2048.png", serve_asset_cached("apple-splash-landscape-2732x2048.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-640x1136.png", serve_asset_cached("apple-splash-portrait-dark-640x1136.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-1136x640.png", serve_asset_cached("apple-splash-landscape-dark-1136x640.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-750x1334.png", serve_asset_cached("apple-splash-portrait-dark-750x1334.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-1334x750.png", serve_asset_cached("apple-splash-landscape-dark-1334x750.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1170x2532.png", serve_asset_cached("apple-splash-portrait-dark-1170x2532.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2532x1170.png", serve_asset_cached("apple-splash-landscape-dark-2532x1170.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1179x2556.png", serve_asset_cached("apple-splash-portrait-dark-1179x2556.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2556x1179.png", serve_asset_cached("apple-splash-landscape-dark-2556x1179.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1206x2622.png", serve_asset_cached("apple-splash-portrait-dark-1206x2622.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2622x1206.png", serve_asset_cached("apple-splash-landscape-dark-2622x1206.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1284x2778.png", serve_asset_cached("apple-splash-portrait-dark-1284x2778.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2778x1284.png", serve_asset_cached("apple-splash-landscape-dark-2778x1284.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1290x2796.png", serve_asset_cached("apple-splash-portrait-dark-1290x2796.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2796x1290.png", serve_asset_cached("apple-splash-landscape-dark-2796x1290.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1320x2868.png", serve_asset_cached("apple-splash-portrait-dark-1320x2868.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2868x1320.png", serve_asset_cached("apple-splash-landscape-dark-2868x1320.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1640x2360.png", serve_asset_cached("apple-splash-portrait-dark-1640x2360.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2360x1640.png", serve_asset_cached("apple-splash-landscape-dark-2360x1640.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1668x2388.png", serve_asset_cached("apple-splash-portrait-dark-1668x2388.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2388x1668.png", serve_asset_cached("apple-splash-landscape-dark-2388x1668.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-2048x2732.png", serve_asset_cached("apple-splash-portrait-dark-2048x2732.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2732x2048.png", serve_asset_cached("apple-splash-landscape-dark-2732x2048.png", "image/png", false));
|
||||
srv->Get(params.api_prefix + "/manifest.webmanifest", serve_asset_nocache("manifest.webmanifest", "application/manifest+json", false));
|
||||
srv->Get(params.api_prefix + "/sw.js", serve_asset_nocache("sw.js", "application/javascript; charset=utf-8", false));
|
||||
srv->Get(params.api_prefix + "/version.json", serve_asset_nocache("version.json", "application/json", false));
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
set(TARGET llama-ui)
|
||||
|
||||
set(LLAMA_UI_HF_BUCKET "llama-ui" CACHE STRING "Hugging Face bucket name for prebuilt UI assets")
|
||||
set(LLAMA_UI_HF_BUCKET "ggml-org/llama-ui" CACHE STRING "Hugging Face bucket name for prebuilt UI assets")
|
||||
|
||||
# Backward compat: forward old var to new one
|
||||
if(DEFINED LLAMA_BUILD_WEBUI)
|
||||
|
||||
@@ -1,16 +1,44 @@
|
||||
// llama-ui-embed: generate ui.cpp / ui.h that embed UI assets as C arrays.
|
||||
//
|
||||
// Usage:
|
||||
// llama-ui-embed <out_cpp> <out_h> [<asset_name> <asset_path>]...
|
||||
// llama-ui-embed <out_cpp> <out_h> [<asset_dir>]
|
||||
//
|
||||
// Recursively embeds every regular file under <asset_dir>.
|
||||
// Asset names are relative paths from <asset_dir> (e.g. "_app/immutable/bundle.HASH.js").
|
||||
// Without <asset_dir>, emits an empty asset table.
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cinttypes>
|
||||
#include <cstdint>
|
||||
|
||||
|
||||
static const char * mime_from_ext(const std::string & name) {
|
||||
auto ext = name.rfind('.');
|
||||
if (ext == std::string::npos) return "application/octet-stream";
|
||||
std::string e = name.substr(ext + 1);
|
||||
if (e == "html") return "text/html; charset=utf-8";
|
||||
if (e == "css") return "text/css";
|
||||
if (e == "js") return "application/javascript";
|
||||
if (e == "json") return "application/json";
|
||||
if (e == "webmanifest") return "application/manifest+json";
|
||||
if (e == "svg") return "image/svg+xml";
|
||||
if (e == "png") return "image/png";
|
||||
if (e == "jpg" ||
|
||||
e == "jpeg") return "image/jpeg";
|
||||
if (e == "ico") return "image/x-icon";
|
||||
if (e == "woff") return "font/woff";
|
||||
if (e == "woff2") return "font/woff2";
|
||||
return "application/octet-stream";
|
||||
}
|
||||
|
||||
// Computes FNV-1a hash of the data
|
||||
static uint64_t fnv_hash(const uint8_t * data, size_t len) {
|
||||
@@ -24,10 +52,10 @@ static uint64_t fnv_hash(const uint8_t * data, size_t len) {
|
||||
return hash;
|
||||
}
|
||||
|
||||
static bool read_file(const std::string & path, std::vector<unsigned char> & out) {
|
||||
static bool read_file(const std::filesystem::path & path, std::vector<unsigned char> & out) {
|
||||
std::ifstream f(path, std::ios::binary | std::ios::ate);
|
||||
if (!f) {
|
||||
fprintf(stderr, "embed: cannot open %s\n", path.c_str());
|
||||
fprintf(stderr, "embed: cannot open %s\n", path.string().c_str());
|
||||
return false;
|
||||
}
|
||||
const auto sz = f.tellg();
|
||||
@@ -77,7 +105,24 @@ static bool write_if_different(const std::string & path, const std::string & con
|
||||
if (!content.empty()) {
|
||||
out.write(content.data(), static_cast<std::streamsize>(content.size()));
|
||||
}
|
||||
return out.good();
|
||||
bool ok = out.good();
|
||||
if (ok) {
|
||||
printf("embed: write output file %s\n", path.c_str());
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
static std::string path_basename(const std::string & name) {
|
||||
const size_t p = name.rfind('/');
|
||||
return p == std::string::npos ? name : name.substr(p + 1);
|
||||
}
|
||||
static bool str_starts_with(const std::string & s, const char * prefix) {
|
||||
const size_t n = strlen(prefix);
|
||||
return s.size() >= n && s.compare(0, n, prefix) == 0;
|
||||
}
|
||||
static bool str_ends_with(const std::string & s, const char * suffix) {
|
||||
const size_t n = strlen(suffix);
|
||||
return s.size() >= n && s.compare(s.size() - n, n, suffix) == 0;
|
||||
}
|
||||
|
||||
static std::string fmt(const char * pattern, ...) {
|
||||
@@ -89,70 +134,164 @@ static std::string fmt(const char * pattern, ...) {
|
||||
return (n > 0) ? std::string(tmp, static_cast<size_t>(n)) : std::string();
|
||||
}
|
||||
|
||||
struct asset_entry {
|
||||
std::string name;
|
||||
std::filesystem::path path;
|
||||
};
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
if (argc < 3 || ((argc - 3) % 2) != 0) {
|
||||
fprintf(stderr, "usage: %s <out_cpp> <out_h> [<name> <path>]...\n", argv[0]);
|
||||
if (argc < 3 || argc > 4) {
|
||||
fprintf(stderr, "usage: %s <out_cpp> <out_h> [<asset_dir>]\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const std::string out_cpp = argv[1];
|
||||
const std::string out_h = argv[2];
|
||||
const int n_assets = (argc - 3) / 2;
|
||||
const std::string in_dir = argv[3];
|
||||
|
||||
std::vector<asset_entry> assets;
|
||||
if (argc == 4) {
|
||||
const std::filesystem::path dir = in_dir;
|
||||
|
||||
std::error_code ec;
|
||||
std::filesystem::recursive_directory_iterator it(dir, ec);
|
||||
if (ec) {
|
||||
fprintf(stderr, "embed: cannot iterate %s: %s\n", argv[3], ec.message().c_str());
|
||||
return 1;
|
||||
}
|
||||
for (const auto & entry : it) {
|
||||
if (!entry.is_regular_file()) {
|
||||
continue;
|
||||
}
|
||||
// name is the relative path from dir, with forward slashes
|
||||
const std::string name = entry.path().lexically_relative(dir).generic_string();
|
||||
assets.push_back({ name, entry.path() });
|
||||
}
|
||||
|
||||
// directory iteration order is unspecified; sort for reproducible output
|
||||
std::sort(assets.begin(), assets.end(),
|
||||
[](const asset_entry & a, const asset_entry & b) { return a.name < b.name; });
|
||||
}
|
||||
|
||||
const int n_assets = static_cast<int>(assets.size());
|
||||
|
||||
if (n_assets > 0) {
|
||||
using match_fn = std::function<bool(const std::string &)>;
|
||||
auto exact = [](const char * name) -> match_fn {
|
||||
return [name](const std::string & base) { return base == name; };
|
||||
};
|
||||
|
||||
struct required_check { const char * label; match_fn match; bool found; };
|
||||
required_check checks[] = {
|
||||
{ "index.html", exact("index.html"), false },
|
||||
{ "loading.html", exact("loading.html"), false },
|
||||
{ "manifest.webmanifest", exact("manifest.webmanifest"), false },
|
||||
{ "sw.js", exact("sw.js"), false },
|
||||
{ "build.json", exact("build.json"), false },
|
||||
{ "version.json", exact("version.json"), false },
|
||||
{ "bundle[hash].js", [](const std::string & b) {
|
||||
return str_starts_with(b, "bundle") && str_ends_with(b, ".js");
|
||||
}, false },
|
||||
{ "bundle[hash].css", [](const std::string & b) {
|
||||
return str_starts_with(b, "bundle") && str_ends_with(b, ".css");
|
||||
}, false },
|
||||
{ "workbox[hash].js", [](const std::string & b) {
|
||||
return str_starts_with(b, "workbox") && str_ends_with(b, ".js");
|
||||
}, false },
|
||||
};
|
||||
|
||||
for (const auto & a : assets) {
|
||||
const std::string base = path_basename(a.name);
|
||||
for (auto & c : checks) {
|
||||
if (!c.found) { c.found = c.match(base); }
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const char *> missing;
|
||||
for (const auto & c : checks) {
|
||||
if (!c.found) { missing.push_back(c.label); }
|
||||
}
|
||||
if (!missing.empty()) {
|
||||
fprintf(stderr, "\ncurrent asset files:\n");
|
||||
for (const auto & a : assets) {
|
||||
fprintf(stderr, " %s\n", a.name.c_str());
|
||||
}
|
||||
fprintf(stderr, "missing required asset(s):\n");
|
||||
for (const char * m : missing) {
|
||||
fprintf(stderr, " %s\n", m);
|
||||
}
|
||||
fprintf(stderr, "hint: try cleaning your build directory: %s\n", in_dir.c_str());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::string h;
|
||||
h += "#pragma once\n\n#include <stddef.h>\n\n";
|
||||
h += "#pragma once\n\n#include <array>\n#include <string>\n\n";
|
||||
if (n_assets > 0) {
|
||||
h += "#define LLAMA_UI_HAS_ASSETS 1\n\n";
|
||||
}
|
||||
h +=
|
||||
"struct llama_ui_asset {\n"
|
||||
" const char * name;\n"
|
||||
" std::string name;\n"
|
||||
" const unsigned char * data;\n"
|
||||
" size_t size;\n"
|
||||
" const char * etag;\n"
|
||||
" std::size_t size;\n"
|
||||
" std::string etag;\n"
|
||||
" std::string type;\n"
|
||||
"};\n\n"
|
||||
"const llama_ui_asset * llama_ui_find_asset(const char * name);\n";
|
||||
"const llama_ui_asset * llama_ui_find_asset(const std::string & name);\n";
|
||||
h += fmt("const std::array<llama_ui_asset, %d> & llama_ui_get_assets();\n", n_assets);
|
||||
|
||||
std::string cpp;
|
||||
cpp += "#include \"ui.h\"\n\n#include <string.h>\n\n";
|
||||
cpp += "#include \"ui.h\"\n\n";
|
||||
|
||||
if (n_assets > 0) {
|
||||
for (int i = 0; i < n_assets; i++) {
|
||||
const char * path = argv[3 + i * 2 + 1];
|
||||
std::vector<unsigned char> bytes;
|
||||
if (!read_file(path, bytes)) {
|
||||
if (!read_file(assets[i].path, bytes)) {
|
||||
return 1;
|
||||
}
|
||||
if (bytes.empty()) {
|
||||
fprintf(stderr, "embed: empty file: %s\n", assets[i].path.generic_string().c_str());
|
||||
return 1;
|
||||
}
|
||||
cpp += fmt("static const unsigned char asset_%d_data[] = {", i);
|
||||
append_bytes_hex(cpp, bytes);
|
||||
const auto hash = fnv_hash(bytes.data(), bytes.size());
|
||||
|
||||
cpp += fmt("};\nstatic const size_t asset_%d_size = %zu;\n",
|
||||
cpp += fmt("};\nstatic const std::size_t asset_%d_size = %zu;\n",
|
||||
i, bytes.size());
|
||||
cpp += fmt("static const char asset_%d_etag[] = \"\\\"0x%016" PRIx64 "\\\"\";\n\n",
|
||||
cpp += fmt("static const char asset_%d_etag[] = \"\\\"0x%016" PRIx64 "\\\"\";\n\n",
|
||||
i, hash);
|
||||
}
|
||||
|
||||
cpp += "static const llama_ui_asset g_assets[] = {\n";
|
||||
cpp += fmt("static const std::array<llama_ui_asset, %d> g_assets = {{\n", n_assets);
|
||||
for (int i = 0; i < n_assets; i++) {
|
||||
cpp += fmt(" { \"%s\", asset_%d_data, asset_%d_size, asset_%d_etag },\n",
|
||||
argv[3 + i * 2], i, i, i);
|
||||
const std::string & name = assets[i].name;
|
||||
cpp += fmt(" { \"%s\", asset_%d_data, asset_%d_size, asset_%d_etag, \"%s\" },\n",
|
||||
name.c_str(), i, i, i, mime_from_ext(name));
|
||||
}
|
||||
cpp += "};\n\n";
|
||||
cpp += "}};\n\n";
|
||||
|
||||
cpp +=
|
||||
"const llama_ui_asset * llama_ui_find_asset(const char * name) {\n"
|
||||
"const llama_ui_asset * llama_ui_find_asset(const std::string & name) {\n"
|
||||
" for (const auto & a : g_assets) {\n"
|
||||
" if (strcmp(a.name, name) == 0) {\n"
|
||||
" if (a.name == name) {\n"
|
||||
" return &a;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" return nullptr;\n"
|
||||
"}\n";
|
||||
cpp += fmt("const std::array<llama_ui_asset, %d> & llama_ui_get_assets() {\n", n_assets);
|
||||
cpp += " return g_assets;\n"
|
||||
"}\n";
|
||||
} else {
|
||||
cpp +=
|
||||
"const llama_ui_asset * llama_ui_find_asset(const char *) {\n"
|
||||
"const llama_ui_asset * llama_ui_find_asset(const std::string &) {\n"
|
||||
" return nullptr;\n"
|
||||
"}\n"
|
||||
"const std::array<llama_ui_asset, 0> & llama_ui_get_assets() {\n"
|
||||
" static const std::array<llama_ui_asset, 0> empty{};\n"
|
||||
" return empty;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
|
||||
@@ -23,8 +23,7 @@ export function buildInfoPlugin(): Plugin {
|
||||
if (processed) return;
|
||||
processed = true;
|
||||
|
||||
const buildNumber = process.env.LLAMA_BUILD_NUMBER;
|
||||
if (!buildNumber) return;
|
||||
const buildNumber = process.env.LLAMA_BUILD_NUMBER || 'b0000';
|
||||
|
||||
const outDir = resolve(OUTPUT_DIR);
|
||||
const indexPath = resolve(outDir, 'index.html');
|
||||
|
||||
Reference in New Issue
Block a user