Compare commits

...

7 Commits
b3752 ... b3759

Author SHA1 Message Date
Michael Podvitskiy
6988da94a2 cmake : correct order of sycl flags (#9497) 2024-09-15 19:55:52 +03:00
Csaba Kecskemeti
3c7989fd29 py : add "LLaMAForCausalLM" conversion support (#9485)
Co-authored-by: Csaba Kecskemeti <csabakecskemeti@Csabas-Mac-Pro.local>
2024-09-15 10:48:25 +03:00
OSecret
d6b37c881f readme : update tools list (#9475)
* Added link to proprietary wrapper for Unity3d into README.md

Wrapper has prebuild library and was tested on iOS, Android, WebGL, PC, Mac platforms, has online demos like [this](https://d23myu0xfn2ttc.cloudfront.net/rich/index.html) and [that](https://d23myu0xfn2ttc.cloudfront.net/).

* Update README.md

Fixes upon review
2024-09-15 10:36:53 +03:00
Michael Podvitskiy
7596487beb cmake : try to fix sycl+intel build (#9487) 2024-09-15 10:06:38 +03:00
Yuri Khrustalev
822b6322de ggml : ggml_type_name return "NONE" for invalid values (#9458)
When running on Windows, the quantization utility attempts to print the types that are not set which leads to a crash.
2024-09-14 12:54:37 +03:00
VoidIsVoid
dcdcee3a74 server: add data: [DONE] to /chat/completions stream response (#9459) 2024-09-14 11:36:44 +02:00
Georgi Gerganov
1f4111e540 cmake : use list(APPEND ...) instead of set() + dedup linker (#9463)
* cmake : use list(APPEND ...) instead of set() + dedup linker

ggml-ci

* cmake : try fix sycl

* cmake : try to fix sycl 2

* cmake : fix sycl build (#9469)

* try fix sycl build

* use CMAKE_CXX_FLAGS as a string variable

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* one more CMAKE_CXX_FLAGS fix (#9471)

---------

Co-authored-by: Michael Podvitskiy <podvitskiymichael@gmail.com>
2024-09-14 10:55:05 +03:00
6 changed files with 55 additions and 38 deletions

View File

@@ -173,6 +173,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
- [akx/ggify](https://github.com/akx/ggify) download PyTorch models from HuggingFace Hub and convert them to GGML
- [crashr/gppm](https://github.com/crashr/gppm) launch llama.cpp instances utilizing NVIDIA Tesla P40 or P100 GPUs with reduced idle power consumption
- [gpustack/gguf-parser](https://github.com/gpustack/gguf-parser-go/tree/main/cmd/gguf-parser) - review/check the GGUF file and estimate the memory usage
- [Styled Lines](https://marketplace.unity.com/packages/tools/generative-ai/styled-lines-llama-cpp-model-292902) (proprietary licensed, async wrapper of inference part for game development in Unity3d with prebuild Mobile and Web platform wrappers and a model example)
**Infrastructure:**

View File

@@ -1487,7 +1487,7 @@ class StableLMModel(Model):
raise ValueError(f"Unprocessed norms: {norms}")
@Model.register("LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
@Model.register("LLaMAForCausalLM", "LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
class LlamaModel(Model):
model_arch = gguf.MODEL_ARCH.LLAMA

View File

@@ -2993,6 +2993,8 @@ int main(int argc, char ** argv) {
}, [&](json error_data) {
server_sent_event(sink, "error", error_data);
});
static const std::string ev_done = "data: [DONE]\n\n";
sink.write(ev_done.data(), ev_done.size());
sink.done();
return true;
};

View File

@@ -1020,6 +1020,8 @@ async def oai_chat_completions(user_prompt,
event_data = line.split(': ', 1)
assert event_data[0] == 'data', f'Bad event code received: ```{event_data}```'
chunk_raw = event_data[1]
if chunk_raw == '[DONE]':
break
chunk = json.loads(chunk_raw)
assert len(chunk['choices']) == 1, f"no choices provided, line ```{line}```"

View File

@@ -26,6 +26,9 @@ if (NOT MSVC)
endif()
endif()
unset(GGML_EXTRA_LIBS_PRIVATE)
unset(GGML_EXTRA_LIBS_PUBLIC)
if (APPLE AND GGML_ACCELERATE)
find_library(ACCELERATE_FRAMEWORK Accelerate)
if (ACCELERATE_FRAMEWORK)
@@ -35,7 +38,7 @@ if (APPLE AND GGML_ACCELERATE)
add_compile_definitions(ACCELERATE_NEW_LAPACK)
add_compile_definitions(ACCELERATE_LAPACK_ILP64)
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
list(APPEND GGML_EXTRA_LIBS_PRIVATE ${ACCELERATE_FRAMEWORK})
else()
message(WARNING "Accelerate framework not found")
endif()
@@ -87,7 +90,7 @@ if (GGML_METAL)
COMMENT "Generate assembly for embedded Metal library"
)
set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM})
list(APPEND GGML_SOURCES_METAL ${METALLIB_EMBED_ASM})
else()
if (GGML_METAL_SHADER_DEBUG)
# custom command to do the following:
@@ -132,7 +135,7 @@ if (GGML_METAL)
)
endif() # GGML_METAL_EMBED_LIBRARY
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS}
list(APPEND GGML_EXTRA_LIBS_PRIVATE
${FOUNDATION_LIBRARY}
${METAL_FRAMEWORK}
${METALKIT_FRAMEWORK}
@@ -157,11 +160,11 @@ if (GGML_OPENMP)
add_compile_definitions(GGML_USE_OPENMP)
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
list(APPEND GGML_EXTRA_LIBS_PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
if (GGML_MUSA)
set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} "/usr/lib/llvm-10/include/openmp")
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} "/usr/lib/llvm-10/lib/libomp.so")
list(APPEND GGML_EXTRA_INCLUDES "/usr/lib/llvm-10/include/openmp")
list(APPEND GGML_EXTRA_LIBS_PRIVATE "/usr/lib/llvm-10/lib/libomp.so")
endif()
else()
message(WARNING "OpenMP not found")
@@ -244,8 +247,8 @@ if (GGML_BLAS)
set(GGML_HEADERS_BLAS ../include/ggml-blas.h)
set(GGML_SOURCES_BLAS ggml-blas.cpp)
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${BLAS_LIBRARIES})
set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
list(APPEND GGML_EXTRA_LIBS_PRIVATE ${BLAS_LIBRARIES})
list(APPEND GGML_EXTRA_INCLUDES ${BLAS_INCLUDE_DIRS})
else()
message(WARNING "BLAS not found, please refer to "
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
@@ -368,19 +371,19 @@ if (GGML_CUDA)
if (GGML_STATIC)
if (WIN32)
# As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
else ()
if (GGML_MUSA)
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musart_static MUSA::mublas_static)
list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musart_static MUSA::mublas_static)
else()
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
endif()
endif()
else()
if (GGML_MUSA)
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musart MUSA::mublas)
list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musart MUSA::mublas)
else()
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart CUDA::cublas CUDA::cublasLt)
endif()
endif()
@@ -388,9 +391,9 @@ if (GGML_CUDA)
# No VMM requested, no need to link directly with the cuda driver lib (libcuda.so)
else()
if (GGML_MUSA)
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musa_driver) # required by muDeviceGetAttribute(), muMemGetAllocationGranularity(...), ...
list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musa_driver) # required by muDeviceGetAttribute(), muMemGetAllocationGranularity(...), ...
else()
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ...
list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ...
endif()
endif()
else()
@@ -495,7 +498,7 @@ if (GGML_HIPBLAS)
if (CXX_IS_HIPCC)
set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} hip::device)
list(APPEND GGML_EXTRA_LIBS_PRIVATE hip::device)
else()
set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP)
endif()
@@ -504,7 +507,7 @@ if (GGML_HIPBLAS)
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
endif()
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} PUBLIC hip::host roc::rocblas roc::hipblas)
list(APPEND GGML_EXTRA_LIBS_PUBLIC hip::host roc::rocblas roc::hipblas)
endif()
if (GGML_SYCL)
@@ -513,7 +516,8 @@ if (GGML_SYCL)
endif()
check_cxx_compiler_flag("-fsycl" SUPPORTS_SYCL)
if ( DEFINED ENV{ONEAPI_ROOT})
if (DEFINED ENV{ONEAPI_ROOT})
message(STATUS "Using oneAPI Release SYCL compiler (icpx).")
elseif(SUPPORTS_SYCL)
message(WARNING "Using open-source SYCL compiler (clang++). Didn't detect ENV {ONEAPI_ROOT}.
@@ -551,26 +555,29 @@ if (GGML_SYCL)
find_package(DNNL)
message("-- DNNL found:" ${DNNL_FOUND})
if (GGML_SYCL_TARGET STREQUAL "INTEL")
add_compile_definitions(GGML_SYCL_DNNL=${DNNL_FOUND})
else()
add_compile_definitions(GGML_SYCL_DNNL=0)
endif()
if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
list(APPEND GGML_EXTRA_LIBS_PRIVATE DNNL::dnnl)
endif()
if (WIN32)
find_package(IntelSYCL REQUIRED)
find_package(MKL REQUIRED)
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
list(APPEND GGML_EXTRA_LIBS_PRIVATE IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
else()
if (GGML_SYCL_TARGET STREQUAL "INTEL")
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} -fsycl pthread m dl onemkl)
list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl pthread m dl onemkl)
endif()
endif()
if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
list(APPEND GGML_EXTRA_LIBS DNNL::dnnl)
endif()
endif()
if (GGML_RPC)
@@ -579,7 +586,7 @@ if (GGML_RPC)
list(APPEND GGML_CDEF_PUBLIC GGML_USE_RPC)
if (WIN32)
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ws2_32)
list(APPEND GGML_EXTRA_LIBS_PRIVATE ws2_32)
endif()
set(GGML_HEADERS_RPC ../include/ggml-rpc.h)
@@ -657,8 +664,8 @@ if (GGML_VULKAN)
set(GGML_HEADERS_VULKAN ${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml-vulkan.h ${_ggml_vk_header})
set(GGML_SOURCES_VULKAN ggml-vulkan.cpp ${_ggml_vk_source})
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} Vulkan::Vulkan)
set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CMAKE_CURRENT_BINARY_DIR})
list(APPEND GGML_EXTRA_LIBS_PRIVATE Vulkan::Vulkan)
list(APPEND GGML_EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR})
else()
message(WARNING "Vulkan not found")
endif()
@@ -817,8 +824,8 @@ if (GGML_KOMPUTE)
list(APPEND GGML_CDEF_PUBLIC GGML_USE_KOMPUTE)
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} kompute)
set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CMAKE_CURRENT_BINARY_DIR})
list(APPEND GGML_EXTRA_LIBS_PRIVATE kompute)
list(APPEND GGML_EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR})
else()
message(WARNING "Kompute not found")
endif()
@@ -883,9 +890,10 @@ if (GGML_CANN)
message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${CANN_LIBRARIES} )
set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CANN_INCLUDE_DIRS})
set(GGML_EXTRA_LIBDIRS ${GGML_EXTRA_LIBDIRS} ${CANN_INSTALL_DIR}/lib64)
list(APPEND GGML_EXTRA_LIBS_PRIVATE ${CANN_LIBRARIES} )
list(APPEND GGML_EXTRA_INCLUDES ${CANN_INCLUDE_DIRS})
list(APPEND GGML_EXTRA_LIBDIRS ${CANN_INSTALL_DIR}/lib64)
list(APPEND GGML_CDEF_PUBLIC GGML_USE_CANN)
endif()
else()
@@ -1322,13 +1330,13 @@ if (EMSCRIPTEN)
set_target_properties(ggml PROPERTIES COMPILE_FLAGS "-msimd128")
endif()
target_compile_definitions(ggml PUBLIC ${GGML_CDEF_PUBLIC})
target_include_directories(ggml PUBLIC ../include)
target_compile_definitions(ggml PUBLIC ${GGML_CDEF_PUBLIC})
target_include_directories(ggml PUBLIC ../include)
target_include_directories(ggml PRIVATE . ${GGML_EXTRA_INCLUDES})
target_link_directories(ggml PRIVATE ${GGML_EXTRA_LIBDIRS})
target_link_directories (ggml PRIVATE ${GGML_EXTRA_LIBDIRS})
target_compile_features (ggml PRIVATE c_std_11) # don't bump
target_link_libraries(ggml PRIVATE Threads::Threads ${GGML_EXTRA_LIBS})
list(APPEND GGML_EXTRA_LIBS_PRIVATE Threads::Threads)
find_library(MATH_LIBRARY m)
if (MATH_LIBRARY)
@@ -1337,6 +1345,10 @@ if (MATH_LIBRARY)
endif()
endif()
list(REMOVE_DUPLICATES GGML_EXTRA_LIBS_PRIVATE)
list(REMOVE_DUPLICATES GGML_EXTRA_LIBS_PUBLIC)
target_link_libraries(ggml PRIVATE ${GGML_EXTRA_LIBS_PRIVATE} PUBLIC ${GGML_EXTRA_LIBS_PUBLIC})
if (BUILD_SHARED_LIBS)
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(ggml PRIVATE GGML_SHARED GGML_BUILD)

View File

@@ -3399,7 +3399,7 @@ double ggml_type_sizef(enum ggml_type type) {
}
GGML_CALL const char * ggml_type_name(enum ggml_type type) {
return type_traits[type].type_name;
return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
}
GGML_CALL bool ggml_is_quantized(enum ggml_type type) {