imatrix : warn when GGUF imatrix is saved without .gguf suffix (#15076 )

* imatrix : add warning when suffix is not .gguf for GGUF imatrix * imatrix : only warn about suffix when output format is unspecified
cmake: Add GGML_BACKEND_DIR option (#15074 )
2026-05-03 08:14:19 +02:00 · 2025-08-04 23:26:52 +02:00 · 2025-08-04 21:29:14 +02:00 · 2025-08-04 21:01:48 +02:00
8 changed files with 37 additions and 12 deletions
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2649,10 +2649,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
    ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
    add_opt(common_arg(
        {"--output-format"}, "{gguf,dat}",
-        string_format("output format for imatrix file (default: %s)", params.imat_dat ? "dat" : "gguf"),
+        string_format("output format for imatrix file (default: %s)", params.imat_dat > 0 ? "dat" : "gguf"),
        [](common_params & params, const std::string & value) {
-            /**/ if (value == "gguf") { params.imat_dat = false; }
-            else if (value == "dat")  { params.imat_dat = true;  }
+            /**/ if (value == "gguf") { params.imat_dat = -1; }
+            else if (value == "dat")  { params.imat_dat = 1;  }
            else { throw std::invalid_argument("invalid output format"); }
        }
    ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
--- a/common/common.h
+++ b/common/common.h
@@ -439,7 +439,7 @@ struct common_params {
    int32_t n_out_freq  = 10; // output the imatrix every n_out_freq iterations
    int32_t n_save_freq =  0; // save the imatrix every n_save_freq iterations
    int32_t i_chunk     =  0; // start processing from this chunk
-    bool    imat_dat    = false; // whether the legacy imatrix.dat format should be output
+    int8_t  imat_dat    =  0; // whether the legacy imatrix.dat format should be output (gguf <= 0 < dat)

    bool process_output  = false; // collect data for the output tensor
    bool compute_ppl     = true;  // whether to compute perplexity
--- a/ggml/CMakeLists.txt
+++ b/ggml/CMakeLists.txt
@@ -39,8 +39,9 @@ if (WIN32)
    set(CMAKE_SHARED_MODULE_PREFIX  "")
 endif()

-option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
-option(GGML_BACKEND_DL   "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF)
+option(BUILD_SHARED_LIBS           "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
+option(GGML_BACKEND_DL             "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF)
+set(GGML_BACKEND_DIR "" CACHE PATH "ggml: directory to load dynamic backends from (requires GGML_BACKEND_DL")

 #
 # option list
--- a/ggml/cmake/ggml-config.cmake.in
+++ b/ggml/cmake/ggml-config.cmake.in
@@ -106,7 +106,7 @@ if(NOT TARGET ggml::ggml)

    find_library(GGML_LIBRARY ggml
        REQUIRED
-        HINTS ${GGML_LIB_DIR}
+        HINTS ${GGML_LIB_DIR} ${GGML_BACKEND_DIR}
        NO_CMAKE_FIND_ROOT_PATH)

    add_library(ggml::ggml UNKNOWN IMPORTED)
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -214,6 +214,13 @@ add_library(ggml
            ggml-backend-reg.cpp)
 add_library(ggml::ggml ALIAS ggml)

+if (GGML_BACKEND_DIR)
+    if (NOT GGML_BACKEND_DL)
+        message(FATAL_ERROR "GGML_BACKEND_DIR requires GGML_BACKEND_DL")
+    endif()
+    target_compile_definitions(ggml PUBLIC GGML_BACKEND_DIR="${GGML_BACKEND_DIR}")
+endif()
+
 target_link_libraries(ggml PUBLIC ggml-base)

 if (CMAKE_SYSTEM_NAME MATCHES "Linux")
@@ -227,7 +234,11 @@ function(ggml_add_backend_library backend)
        set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
        target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
        add_dependencies(ggml ${backend})
-        install(TARGETS ${backend} LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
+        if (GGML_BACKEND_DIR)
+            install(TARGETS ${backend} LIBRARY DESTINATION ${GGML_BACKEND_DIR})
+        else()
+            install(TARGETS ${backend} LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
+        endif()
    else()
        add_library(${backend} ${ARGN})
        target_link_libraries(ggml PUBLIC ${backend})
--- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp
@@ -498,6 +498,9 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,

    std::vector<fs::path> search_paths;
    if (user_search_path == nullptr) {
+#ifdef GGML_BACKEND_DIR
+        search_paths.push_back(fs::u8path(GGML_BACKEND_DIR));
+#endif
        // default search paths: executable directory, current directory
        search_paths.push_back(get_executable_path());
        search_paths.push_back(fs::current_path());
--- a/gguf-py/gguf/scripts/gguf_new_metadata.py
+++ b/gguf-py/gguf/scripts/gguf_new_metadata.py
@@ -111,6 +111,7 @@ def main() -> None:
    parser.add_argument("--general-description",                       type=str,  help="The models general.description", metavar='"Description ..."')
    parser.add_argument("--chat-template",                             type=str,  help="Chat template string (or JSON string containing templates)", metavar='"{% ... %} ..."')
    parser.add_argument("--chat-template-config",                      type=Path, help="Config file containing chat template(s)", metavar='tokenizer_config.json')
+    parser.add_argument("--chat-template-file",                        type=Path, help="Jinja file containing chat template", metavar='chat_template.jinja')
    parser.add_argument("--pre-tokenizer",                             type=str,  help="The models tokenizer.ggml.pre", metavar='"pre tokenizer"')
    parser.add_argument("--remove-metadata",      action="append",     type=str,  help="Remove metadata (by key name) from output model", metavar='general.url')
    parser.add_argument("--special-token",        action="append",     type=str,  help="Special token by value", nargs=2, metavar=(' | '.join(token_names.keys()), '"<token>"'))
@@ -134,12 +135,17 @@ def main() -> None:
        new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, json.loads(args.chat_template) if args.chat_template.startswith('[') else args.chat_template)

    if args.chat_template_config:
-        with open(args.chat_template_config, 'r') as fp:
+        with open(args.chat_template_config, 'r', encoding='utf-8') as fp:
            config = json.load(fp)
            template = config.get('chat_template')
            if template:
                new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, template)

+    if args.chat_template_file:
+        with open(args.chat_template_file, 'r', encoding='utf-8') as fp:
+            template = fp.read()
+            new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, template)
+
    if args.pre_tokenizer:
        new_metadata[gguf.Keys.Tokenizer.PRE] = MetadataDetails(gguf.GGUFValueType.STRING, args.pre_tokenizer)

--- a/tools/imatrix/imatrix.cpp
+++ b/tools/imatrix/imatrix.cpp
@@ -506,13 +506,17 @@ void IMatrixCollector::save_imatrix_legacy(int32_t ncall) const {

 void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
    auto fname = m_params.out_file;
-    bool use_legacy_format = m_params.imat_dat;
+    int8_t use_legacy_format = m_params.imat_dat;

-    if (use_legacy_format) {
+    if (use_legacy_format > 0) {
        this->save_imatrix_legacy(n_chunk);
        return;
    }
-    // else, default to GGUF imatrix
+    // only warn when `--output-format gguf` is not specified
+    if (use_legacy_format == 0 && !string_ends_with(fname, ".gguf")) {
+        LOG_WRN("\n%s: saving imatrix using GGUF format with a different suffix than .gguf\n", __func__);
+        LOG_WRN("%s: if you want the previous imatrix format, use --output-format dat\n", __func__);
+    }

    if (n_chunk > 0) {
        fname += ".at_";
Author	SHA1	Message	Date
compilade	19f68fa5a4	imatrix : warn when GGUF imatrix is saved without .gguf suffix (#15076 ) * imatrix : add warning when suffix is not .gguf for GGUF imatrix * imatrix : only warn about suffix when output format is unspecified	2025-08-04 23:26:52 +02:00
Christian Kastner	41613437ff	cmake: Add GGML_BACKEND_DIR option (#15074 ) * cmake: Add GGML_BACKEND_DIR option This can be used by distributions to specify where to look for backends when ggml is built with GGML_BACKEND_DL=ON. * Fix phrasing	2025-08-04 21:29:14 +02:00
Sigbjørn Skjæret	e5bebe5251	gguf-py : add --chat-template-file to gguf_new_metadata (#15075 )	2025-08-04 21:01:48 +02:00