nits: change wording

server: bump timeout to 3600s
2026-05-31 07:06:56 +02:00 · 2026-05-29 00:50:44 +02:00 · 2026-05-28 23:45:03 +02:00
3 changed files with 7 additions and 3 deletions
--- a/common/common.h
+++ b/common/common.h
@@ -587,7 +587,7 @@ struct common_params {
    // server params
    int32_t port                = 8080;          // server listens on this network port
    bool    reuse_port          = false;         // allow multiple sockets to bind to the same port
-    int32_t timeout_read        = 600;           // http read timeout in seconds
+    int32_t timeout_read        = 3600;          // http read timeout in seconds
    int32_t timeout_write       = timeout_read;  // http write timeout in seconds
    int32_t n_threads_http      = -1;    // number of threads to process HTTP requests (TODO: support threadpool)
    int32_t n_cache_reuse       = 0;     // min chunk size to reuse from the cache via KV shifting
--- a/tools/server/server-queue.cpp
+++ b/tools/server/server-queue.cpp
@@ -381,8 +381,10 @@ server_task_result_ptr server_response_reader::next(const std::function<bool()>
        if (result == nullptr) {
            // timeout, check stop condition
            if (should_stop()) {
-                SRV_WRN("%s", "stopping wait for next result due to should_stop condition (adjust the --timeout argument if needed)\n");
-                SRV_WRN("%s", "ref: https://github.com/ggml-org/llama.cpp/pull/22907\n");
+                const int64_t time_elapsed_ms = ggml_time_ms() - time_start_ms;
+                if (time_elapsed_ms > 30000) {
+                    SRV_WRN("%s", "request cancelled after 30s, potentially a client-side timeout; please check your client's code\n");
+                }
                return nullptr;
            }
        } else {
--- a/tools/server/server-queue.h
+++ b/tools/server/server-queue.h
@@ -169,6 +169,8 @@ struct server_response_reader {
    bool cancelled = false;
    int polling_interval_seconds;

+    const int64_t time_start_ms = ggml_time_ms();
+
    // tracking generation state and partial tool calls
    // only used by streaming completions
    std::vector<task_result_state> states;
Author	SHA1	Message	Date
Xuan Son Nguyen	9bc82402ee	nits: change wording	2026-05-29 00:50:44 +02:00
Xuan Son Nguyen	cb71f50783	server: bump timeout to 3600s	2026-05-28 23:45:03 +02:00