mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-10 19:45:56 +02:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2e9eab80c2 | ||
|
|
2fbe3b7bb7 | ||
|
|
63391852b0 | ||
|
|
086a63e3a5 | ||
|
|
b63509262a | ||
|
|
48f47565a7 | ||
|
|
02e409a5be | ||
|
|
6b82eb7883 | ||
|
|
86a3f0fad8 | ||
|
|
63908b631a |
@@ -15,6 +15,7 @@ The project differentiates between 3 levels of contributors:
|
||||
- If you modified the `ggml` source, run the `test-backend-ops` tool to check whether different backend implementations of the `ggml` operators produce consistent results (this requires access to at least two different `ggml` backends)
|
||||
- If you modified a `ggml` operator or added a new one, add the corresponding test cases to `test-backend-ops`
|
||||
- Create separate PRs for each feature or fix. Avoid combining unrelated changes in a single PR
|
||||
- When adding support for a new model or feature, focus on **CPU support only** in the initial PR unless you have a good reason not to. Add support for other backends like CUDA in follow-up PRs
|
||||
- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
|
||||
- If your PR becomes stale, rebase it on top of latest `master` to get maintainers attention
|
||||
- Maintainers will rely on your insights and approval when making a final decision to approve and merge a PR
|
||||
|
||||
132
common/chat.cpp
132
common/chat.cpp
@@ -1,5 +1,6 @@
|
||||
#include "chat.h"
|
||||
#include "chat-parser.h"
|
||||
#include "chat-peg-parser.h"
|
||||
#include "common.h"
|
||||
#include "json-partial.h"
|
||||
#include "json-schema-to-grammar.h"
|
||||
@@ -150,6 +151,7 @@ struct templates_params {
|
||||
common_chat_tool_choice tool_choice;
|
||||
json json_schema;
|
||||
bool parallel_tool_calls;
|
||||
common_reasoning_format reasoning_format;
|
||||
bool stream;
|
||||
std::string grammar;
|
||||
bool add_generation_prompt = true;
|
||||
@@ -589,6 +591,16 @@ common_chat_templates_ptr common_chat_templates_init(
|
||||
"{%- if false %}");
|
||||
}
|
||||
|
||||
// TODO @aldehir : this is a temporary fix, pending Minja changes
|
||||
// Ref: https://github.com/ggml-org/llama.cpp/pull/17713#issuecomment-3631342664
|
||||
if (default_template_src.find("[TOOL_CALLS]") != std::string::npos
|
||||
// search for the error message and patch it
|
||||
&& default_template_src.find("if (message['content'] is none or") != std::string::npos) {
|
||||
string_replace_all(default_template_src,
|
||||
"{%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}",
|
||||
"{%- if false %}");
|
||||
}
|
||||
|
||||
std::string token_bos = bos_token_override;
|
||||
std::string token_eos = eos_token_override;
|
||||
bool add_bos = false;
|
||||
@@ -987,6 +999,118 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
|
||||
return data;
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||
common_chat_params data;
|
||||
|
||||
// Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja
|
||||
auto adjusted_messages = json::array();
|
||||
for (const auto & msg : inputs.messages) {
|
||||
auto role = msg.value("role", "");
|
||||
if (role != "system" && role != "assistant") {
|
||||
// Only adjust system and assistant messages. Interestingly, the system message may contain thinking.
|
||||
adjusted_messages.push_back(msg);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto content = json::array();
|
||||
|
||||
// If message contains `reasoning_content`, add it as a block of type `thinking`
|
||||
if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
|
||||
content.push_back({
|
||||
{"type", "thinking"},
|
||||
{"thinking", msg.at("reasoning_content").get<std::string>()},
|
||||
});
|
||||
}
|
||||
|
||||
// If message contains `content`, add it as a block of type `text`
|
||||
if (msg.contains("content")) {
|
||||
if (msg.at("content").is_string()) {
|
||||
content.push_back({
|
||||
{"type", "text"},
|
||||
{"text", msg.at("content").get<std::string>()},
|
||||
});
|
||||
} else if (msg.at("content").is_array()) {
|
||||
auto blocks = msg.at("content");
|
||||
content.insert(content.end(), blocks.begin(), blocks.end());
|
||||
}
|
||||
}
|
||||
|
||||
auto adjusted = msg;
|
||||
adjusted["content"] = content;
|
||||
adjusted.erase("reasoning_content");
|
||||
adjusted_messages.push_back(adjusted);
|
||||
}
|
||||
|
||||
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
|
||||
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
||||
auto include_grammar = true;
|
||||
|
||||
data.prompt = apply(tmpl, inputs, /* messages_override = */ adjusted_messages);
|
||||
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
|
||||
data.preserved_tokens = {
|
||||
"[THINK]",
|
||||
"[/THINK]",
|
||||
"[TOOL_CALLS]",
|
||||
"[ARGS]",
|
||||
};
|
||||
|
||||
auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
|
||||
auto reasoning = extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
|
||||
|
||||
// Response format parser
|
||||
if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
|
||||
// Ministral wants to emit json surrounded by code fences
|
||||
return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) << "```";
|
||||
}
|
||||
|
||||
// Tool call parser
|
||||
if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
|
||||
auto tool_choice = p.choice();
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool.at("function");
|
||||
std::string name = function.at("name");
|
||||
const auto & schema = function.at("parameters");
|
||||
|
||||
tool_choice |= p.rule("tool-" + name,
|
||||
p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]")
|
||||
+ p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
|
||||
);
|
||||
});
|
||||
|
||||
auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
|
||||
auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
|
||||
auto tool_calls = p.trigger_rule("tool-call", p.repeat("[TOOL_CALLS]" + tool_choice, min_calls, max_calls));
|
||||
|
||||
return reasoning << p.content(p.until("[TOOL_CALLS]")) << tool_calls;
|
||||
}
|
||||
|
||||
// Content only parser
|
||||
include_grammar = false;
|
||||
return reasoning << p.content(p.rest());
|
||||
});
|
||||
|
||||
data.parser = parser.save();
|
||||
|
||||
if (include_grammar) {
|
||||
data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
|
||||
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool.at("function");
|
||||
auto schema = function.at("parameters");
|
||||
builder.resolve_refs(schema);
|
||||
});
|
||||
parser.build_grammar(builder, data.grammar_lazy);
|
||||
});
|
||||
|
||||
data.grammar_triggers = {
|
||||
{COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"}
|
||||
};
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||
common_chat_params data;
|
||||
data.prompt = apply(tmpl, inputs);
|
||||
@@ -2341,6 +2465,7 @@ static common_chat_params common_chat_templates_apply_jinja(
|
||||
params.messages = common_chat_msgs_to_json_oaicompat<json>(inputs.messages, /* concat_text= */ !tmpl.original_caps().requires_typed_content);
|
||||
params.add_generation_prompt = inputs.add_generation_prompt;
|
||||
params.tool_choice = inputs.tool_choice;
|
||||
params.reasoning_format = inputs.reasoning_format;
|
||||
params.enable_thinking = inputs.enable_thinking;
|
||||
params.grammar = inputs.grammar;
|
||||
params.now = inputs.now;
|
||||
@@ -2504,6 +2629,13 @@ static common_chat_params common_chat_templates_apply_jinja(
|
||||
return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
|
||||
}
|
||||
|
||||
// Ministral/Mistral Large 3
|
||||
if (src.find("[SYSTEM_PROMPT]") != std::string::npos &&
|
||||
src.find("[TOOL_CALLS]") != std::string::npos &&
|
||||
src.find("[ARGS]") != std::string::npos) {
|
||||
return common_chat_params_init_ministral_3(tmpl, params);
|
||||
}
|
||||
|
||||
if (src.find("[THINK]") != std::string::npos && src.find("[/THINK]") != std::string::npos) {
|
||||
return common_chat_params_init_magistral(tmpl, params);
|
||||
}
|
||||
|
||||
11
docs/ops.md
11
docs/ops.md
@@ -36,14 +36,15 @@ Legend:
|
||||
| CPY | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ |
|
||||
| CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CROSS_ENTROPY_LOSS_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CUMSUM | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ |
|
||||
| CUMSUM | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ |
|
||||
| DIAG | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| DIAG_MASK_INF | ❌ | ✅ | ✅ | ✅ | ❌ | 🟡 | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| DIV | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| DUP | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| ELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ |
|
||||
| EXP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | ✅ | 🟡 | ✅ | ❌ | ❌ |
|
||||
| EXPM1 | ❌ | ❌ | ✅ | 🟡 | 🟡 | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| FILL | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ |
|
||||
| FILL | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ |
|
||||
| FLASH_ATTN_EXT | ❌ | 🟡 | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ |
|
||||
| FLOOR | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ |
|
||||
| GATED_LINEAR_ATTN | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
@@ -102,7 +103,7 @@ Legend:
|
||||
| SOFTPLUS | ❌ | ❌ | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ |
|
||||
| SOFT_MAX | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| SOFT_MAX_BACK | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ | ❌ | ❌ |
|
||||
| SOLVE_TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ |
|
||||
| SOLVE_TRI | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ |
|
||||
| SQR | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ |
|
||||
| SQRT | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ |
|
||||
| SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
@@ -115,8 +116,8 @@ Legend:
|
||||
| SWIGLU_OAI | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | 🟡 | ✅ | ❌ | ❌ |
|
||||
| TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ | ❌ |
|
||||
| TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| TOP_K | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ |
|
||||
| TRI | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ |
|
||||
| TOP_K | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ |
|
||||
| TRI | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ |
|
||||
| TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ |
|
||||
| UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | 🟡 | ❌ | ❌ | ❌ |
|
||||
| XIELU | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
|
||||
|
||||
496
docs/ops/CPU.csv
496
docs/ops/CPU.csv
@@ -4964,6 +4964,7 @@
|
||||
"CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CPU"
|
||||
"CPU","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","1","yes","CPU"
|
||||
"CPU","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","1","yes","CPU"
|
||||
"CPU","CONV_TRANSPOSE_2D","ne_input=[129,63,35,1],ne_kernel=[3,3,48,35],stride=1","support","1","yes","CPU"
|
||||
"CPU","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","yes","CPU"
|
||||
"CPU","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","yes","CPU"
|
||||
"CPU","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","CPU"
|
||||
@@ -5419,17 +5420,45 @@
|
||||
"CPU","CPY","type_src=f16,type_dst=f16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CPU"
|
||||
"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CPU"
|
||||
"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CPU"
|
||||
"CPU","CPY","type_src=i32,type_dst=i32,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CPU"
|
||||
"CPU","CPY","type_src=i32,type_dst=i32,ne=[256,1,4,1],permute_src=[1,2,0,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
|
||||
"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,1,4,1],permute_src=[1,2,0,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[10,10,10,1]","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[2,1,1,1]","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[2,1,3,5]","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[2,3,5,7]","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f16,ne=[2,1,1,1]","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f16,ne=[2,1,3,5]","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f16,ne=[2,3,5,7]","support","1","yes","CPU"
|
||||
"CPU","CONT","type=bf16,ne=[2,1,1,1]","support","1","yes","CPU"
|
||||
"CPU","CONT","type=bf16,ne=[2,1,3,5]","support","1","yes","CPU"
|
||||
"CPU","CONT","type=bf16,ne=[2,3,5,7]","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[2,1,1,1],use_view_slice=1","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[2,1,3,5],use_view_slice=1","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[2,3,5,7],use_view_slice=1","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[1,4,4,1],use_view_slice=1","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[1,8,17,1],use_view_slice=1","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[10,10,10,1],use_view_slice=1","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[2,1,1,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[2,1,3,5],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[2,3,5,7],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[1,4,4,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[1,8,17,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f32,ne=[10,10,10,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=i32,ne=[2,1,1,1],use_view_slice=1","support","1","yes","CPU"
|
||||
"CPU","CONT","type=i32,ne=[2,1,3,5],use_view_slice=1","support","1","yes","CPU"
|
||||
"CPU","CONT","type=i32,ne=[2,3,5,7],use_view_slice=1","support","1","yes","CPU"
|
||||
"CPU","CONT","type=i32,ne=[1,4,4,1],use_view_slice=1","support","1","yes","CPU"
|
||||
"CPU","CONT","type=i32,ne=[1,8,17,1],use_view_slice=1","support","1","yes","CPU"
|
||||
"CPU","CONT","type=i32,ne=[10,10,10,1],use_view_slice=1","support","1","yes","CPU"
|
||||
"CPU","CONT","type=i32,ne=[2,1,1,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=i32,ne=[2,1,3,5],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=i32,ne=[2,3,5,7],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=i32,ne=[1,4,4,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=i32,ne=[1,8,17,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=i32,ne=[10,10,10,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f16,ne=[2,1,1,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f16,ne=[2,1,3,5],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f16,ne=[2,3,5,7],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f16,ne=[1,4,4,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f16,ne=[1,8,17,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=f16,ne=[10,10,10,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=bf16,ne=[2,1,1,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=bf16,ne=[2,1,3,5],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=bf16,ne=[2,3,5,7],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=bf16,ne=[1,4,4,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=bf16,ne=[1,8,17,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","CONT","type=bf16,ne=[10,10,10,1],use_view_slice=0","support","1","yes","CPU"
|
||||
"CPU","ADD","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
|
||||
"CPU","SUB","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
|
||||
"CPU","MUL","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
|
||||
@@ -5655,6 +5684,7 @@
|
||||
"CPU","MUL","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
|
||||
"CPU","DIV","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
|
||||
"CPU","ADD1","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
|
||||
"CPU","ADD1","type=f32,ne=[1024,1024,1,1]","support","1","yes","CPU"
|
||||
"CPU","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=0.000000,inplace=0","support","1","yes","CPU"
|
||||
"CPU","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=0","support","1","yes","CPU"
|
||||
"CPU","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=1","support","1","yes","CPU"
|
||||
@@ -8644,9 +8674,13 @@
|
||||
"CPU","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","CPU"
|
||||
"CPU","LEAKY_RELU","type=f16,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","CPU"
|
||||
"CPU","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
|
||||
"CPU","FLOOR","type=f16,ne=[1024,1024,1,1]","support","1","yes","CPU"
|
||||
"CPU","CEIL","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
|
||||
"CPU","CEIL","type=f16,ne=[1024,1024,1,1]","support","1","yes","CPU"
|
||||
"CPU","ROUND","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
|
||||
"CPU","ROUND","type=f16,ne=[1024,1024,1,1]","support","1","yes","CPU"
|
||||
"CPU","TRUNC","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
|
||||
"CPU","TRUNC","type=f16,ne=[1024,1024,1,1]","support","1","yes","CPU"
|
||||
"CPU","SQR","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
|
||||
"CPU","SQRT","type=f32,ne=[10,3,3,2]","support","1","yes","CPU"
|
||||
"CPU","LOG","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
|
||||
@@ -8666,9 +8700,13 @@
|
||||
"CPU","CLAMP","type=f32,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","CPU"
|
||||
"CPU","LEAKY_RELU","type=f32,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","CPU"
|
||||
"CPU","FLOOR","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
|
||||
"CPU","FLOOR","type=f32,ne=[1024,1024,1,1]","support","1","yes","CPU"
|
||||
"CPU","CEIL","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
|
||||
"CPU","CEIL","type=f32,ne=[1024,1024,1,1]","support","1","yes","CPU"
|
||||
"CPU","ROUND","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
|
||||
"CPU","ROUND","type=f32,ne=[1024,1024,1,1]","support","1","yes","CPU"
|
||||
"CPU","TRUNC","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
|
||||
"CPU","TRUNC","type=f32,ne=[1024,1024,1,1]","support","1","yes","CPU"
|
||||
"CPU","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","yes","CPU"
|
||||
"CPU","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","yes","CPU"
|
||||
"CPU","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","1","yes","CPU"
|
||||
@@ -9411,18 +9449,405 @@
|
||||
"CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","yes","CPU"
|
||||
"CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","CPU"
|
||||
"CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[3,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[4,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[7,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[15,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[16,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[31,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[32,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[63,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[64,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[127,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[128,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[255,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[256,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[511,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[512,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1023,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[2047,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[2048,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[4095,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[4096,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[8191,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[8192,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[16383,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[65535,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[65536,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[131071,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[131072,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[262143,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[262144,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[524287,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[524288,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1048575,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1048576,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1023,2,1,3],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1024,2,1,3],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1025,2,1,3],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[2047,2,1,3],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[2048,2,1,3],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[2049,2,1,3],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[2,8,8192,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[3,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[4,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[7,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[15,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[16,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[31,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[32,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[63,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[64,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[127,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[128,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[255,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[256,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[511,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[512,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1023,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[2047,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[2048,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[4095,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[4096,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[8191,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[8192,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[16383,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[65535,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[65536,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[131071,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[131072,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[262143,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[262144,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[524287,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[524288,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1048575,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1048576,1,1,1],order=0","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1024,1,1,1],order=1","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[16384,1,1,1],order=1","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1023,2,1,3],order=1","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1024,2,1,3],order=1","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[1025,2,1,3],order=1","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[2047,2,1,3],order=1","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","1","yes","CPU"
|
||||
"CPU","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[12,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[13,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[13,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[15,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[15,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[15,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[19,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[19,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[19,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[19,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[27,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[27,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[27,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[27,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[27,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[43,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[43,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[43,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[43,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[43,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[64,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[75,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[64,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[75,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[64,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[75,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[64,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[75,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[64,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[75,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[128,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[139,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[128,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[139,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[128,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[139,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[128,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[139,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[128,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[139,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[128,1,1,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[139,1,2,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[256,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[267,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[256,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[267,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[256,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[267,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[256,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[267,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[256,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[267,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[256,1,1,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[267,1,2,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[512,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[523,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[512,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[523,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[512,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[523,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[512,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[523,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[512,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[523,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[512,1,1,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[523,1,2,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[512,1,1,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[523,1,2,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1024,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1035,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1024,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1035,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1024,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1035,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1024,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1035,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1024,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1035,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1024,1,1,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1035,1,2,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1024,1,1,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1035,1,2,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1024,1,1,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1035,1,2,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2048,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2059,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2048,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2059,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2048,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2059,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2048,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2059,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2048,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2059,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2048,1,1,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2059,1,2,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2048,1,1,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2059,1,2,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2048,1,1,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2059,1,2,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4096,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4107,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4096,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4107,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4096,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4107,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4096,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4107,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4096,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4107,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4096,1,1,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4107,1,2,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4096,1,1,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4107,1,2,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4096,1,1,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[4107,1,2,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8192,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8203,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8192,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8203,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8192,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8203,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8192,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8203,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8192,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8203,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8192,1,1,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8203,1,2,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8192,1,1,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8203,1,2,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8192,1,1,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[8203,1,2,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16395,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16395,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16395,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16395,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16395,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16384,1,1,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16395,1,2,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16384,1,1,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16395,1,2,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16395,1,2,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16384,1,1,1],k=9999,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16395,1,2,1],k=9999,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32768,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32779,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32768,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32779,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32768,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32779,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32768,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32779,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32768,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32779,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32768,1,1,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32779,1,2,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32768,1,1,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32779,1,2,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32768,1,1,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32779,1,2,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32768,1,1,1],k=9999,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[32779,1,2,1],k=9999,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65536,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65547,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65536,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65547,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65536,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65547,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65536,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65547,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65536,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65547,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65536,1,1,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65547,1,2,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65536,1,1,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65547,1,2,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65536,1,1,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65547,1,2,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65536,1,1,1],k=9999,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[65547,1,2,1],k=9999,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131072,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131083,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131072,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131083,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131072,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131083,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131072,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131083,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131072,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131083,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131072,1,1,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131083,1,2,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131072,1,1,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131083,1,2,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131072,1,1,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131083,1,2,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131072,1,1,1],k=9999,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[131083,1,2,1],k=9999,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262144,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262155,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262144,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262155,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262144,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262155,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262144,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262155,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262144,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262155,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262144,1,1,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262155,1,2,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262144,1,1,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262155,1,2,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262144,1,1,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262155,1,2,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262144,1,1,1],k=9999,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[262155,1,2,1],k=9999,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524288,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524299,1,2,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524288,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524299,1,2,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524288,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524299,1,2,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524288,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524299,1,2,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524288,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524299,1,2,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524288,1,1,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524299,1,2,1],k=100,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524288,1,1,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524299,1,2,1],k=500,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524288,1,1,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524299,1,2,1],k=1023,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524288,1,1,1],k=9999,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[524299,1,2,1],k=9999,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16,10,10,10],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[60,10,10,10],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1023,2,1,3],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1024,2,1,3],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1025,2,1,3],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2047,2,1,3],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2048,2,1,3],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2049,2,1,3],k=1,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16,10,10,10],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[60,10,10,10],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1023,2,1,3],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1024,2,1,3],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1025,2,1,3],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2047,2,1,3],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2048,2,1,3],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2049,2,1,3],k=2,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16,10,10,10],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[60,10,10,10],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1023,2,1,3],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1024,2,1,3],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1025,2,1,3],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2047,2,1,3],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2048,2,1,3],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2049,2,1,3],k=3,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16,10,10,10],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[60,10,10,10],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1023,2,1,3],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1024,2,1,3],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1025,2,1,3],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2047,2,1,3],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2048,2,1,3],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2049,2,1,3],k=7,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16,10,10,10],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[60,10,10,10],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1023,2,1,3],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1024,2,1,3],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[1025,2,1,3],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2047,2,1,3],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2048,2,1,3],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","TOP_K","type=f32,ne=[2049,2,1,3],k=15,ties=0","support","1","yes","CPU"
|
||||
"CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","CPU"
|
||||
"CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","CPU"
|
||||
"CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","1","yes","CPU"
|
||||
@@ -9435,6 +9860,10 @@
|
||||
"CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=1","support","1","yes","CPU"
|
||||
"CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic,flags=none","support","1","yes","CPU"
|
||||
"CPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bicubic,flags=none","support","1","yes","CPU"
|
||||
"CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=513,transpose=0","support","1","yes","CPU"
|
||||
"CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=513,transpose=1","support","1","yes","CPU"
|
||||
"CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=none","support","1","yes","CPU"
|
||||
"CPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear,flags=none","support","1","yes","CPU"
|
||||
"CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=align_corners","support","1","yes","CPU"
|
||||
"CPU","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bilinear,flags=align_corners","support","1","yes","CPU"
|
||||
"CPU","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bilinear,flags=align_corners","support","1","yes","CPU"
|
||||
@@ -9463,15 +9892,30 @@
|
||||
"CPU","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","yes","CPU"
|
||||
"CPU","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","yes","CPU"
|
||||
"CPU","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1,circular=0","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[33,17,2,1],pad_0=4,pad_1=3,circular=1","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0,circular=0","support","1","yes","CPU"
|
||||
"CPU","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","yes","CPU"
|
||||
"CPU","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","1","yes","CPU"
|
||||
"CPU","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","1","yes","CPU"
|
||||
"CPU","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","1","yes","CPU"
|
||||
"CPU","ARANGE","type=f32,start=0.000000,stop=1048576.000000,step=1.000000","support","1","yes","CPU"
|
||||
"CPU","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","yes","CPU"
|
||||
"CPU","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CPU"
|
||||
"CPU","CUMSUM","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
|
||||
"CPU","CUMSUM","type=f32,ne=[127,5,4,3]","support","1","yes","CPU"
|
||||
"CPU","CUMSUM","type=f32,ne=[128,5,4,3]","support","1","yes","CPU"
|
||||
"CPU","CUMSUM","type=f32,ne=[128,128,4,4]","support","1","yes","CPU"
|
||||
"CPU","CUMSUM","type=f32,ne=[255,5,4,3]","support","1","yes","CPU"
|
||||
"CPU","CUMSUM","type=f32,ne=[256,5,4,3]","support","1","yes","CPU"
|
||||
"CPU","CUMSUM","type=f32,ne=[511,5,4,3]","support","1","yes","CPU"
|
||||
"CPU","CUMSUM","type=f32,ne=[512,5,4,3]","support","1","yes","CPU"
|
||||
"CPU","CUMSUM","type=f32,ne=[1023,5,4,3]","support","1","yes","CPU"
|
||||
"CPU","CUMSUM","type=f32,ne=[1024,5,4,3]","support","1","yes","CPU"
|
||||
"CPU","CUMSUM","type=f32,ne=[2047,5,4,3]","support","1","yes","CPU"
|
||||
"CPU","CUMSUM","type=f32,ne=[2048,5,4,3]","support","1","yes","CPU"
|
||||
"CPU","CUMSUM","type=f32,ne=[242004,1,1,1]","support","1","yes","CPU"
|
||||
"CPU","CUMSUM","type=f32,ne=[375960,1,1,1]","support","1","yes","CPU"
|
||||
"CPU","XIELU","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
|
||||
"CPU","TRI","type=f32,ne=[10,10,4,3],tri_type=3","support","1","yes","CPU"
|
||||
"CPU","TRI","type=f32,ne=[10,10,4,3],tri_type=2","support","1","yes","CPU"
|
||||
@@ -9480,6 +9924,10 @@
|
||||
"CPU","FILL","type=f32,ne=[10,10,4,3],c=0.000000","support","1","yes","CPU"
|
||||
"CPU","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","1","yes","CPU"
|
||||
"CPU","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","1","yes","CPU"
|
||||
"CPU","FILL","type=f32,ne=[2048,512,2,2],c=3.500000","support","1","yes","CPU"
|
||||
"CPU","DIAG","type=f32,ne=[10,1,4,3]","support","1","yes","CPU"
|
||||
"CPU","DIAG","type=f32,ne=[79,1,19,13]","support","1","yes","CPU"
|
||||
"CPU","DIAG","type=f32,ne=[256,1,8,16]","support","1","yes","CPU"
|
||||
"CPU","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","1","yes","CPU"
|
||||
"CPU","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","1","yes","CPU"
|
||||
"CPU","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","1","yes","CPU"
|
||||
@@ -9487,10 +9935,16 @@
|
||||
"CPU","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","1","yes","CPU"
|
||||
"CPU","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","1","yes","CPU"
|
||||
"CPU","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1","support","1","yes","CPU"
|
||||
"CPU","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[31,128,4,4]","support","1","yes","CPU"
|
||||
"CPU","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[300,64,4,4]","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=0","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=0","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=1","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=1","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=0","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=0","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=1","support","1","yes","CPU"
|
||||
"CPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=1","support","1","yes","CPU"
|
||||
"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
|
||||
"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
|
||||
"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
|
||||
|
||||
|
Can't render this file because it is too large.
|
@@ -4964,6 +4964,7 @@
|
||||
"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
|
||||
"CUDA0","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","1","yes","CUDA"
|
||||
"CUDA0","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","1","yes","CUDA"
|
||||
"CUDA0","CONV_TRANSPOSE_2D","ne_input=[129,63,35,1],ne_kernel=[3,3,48,35],stride=1","support","1","yes","CUDA"
|
||||
"CUDA0","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","CUDA"
|
||||
@@ -5419,17 +5420,45 @@
|
||||
"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CUDA"
|
||||
"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CUDA"
|
||||
"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CUDA"
|
||||
"CUDA0","CPY","type_src=i32,type_dst=i32,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CUDA"
|
||||
"CUDA0","CPY","type_src=i32,type_dst=i32,ne=[256,1,4,1],permute_src=[1,2,0,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
|
||||
"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,1,4,1],permute_src=[1,2,0,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[10,10,10,1]","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[2,1,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[2,1,3,5]","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[2,3,5,7]","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f16,ne=[2,1,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f16,ne=[2,1,3,5]","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f16,ne=[2,3,5,7]","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=bf16,ne=[2,1,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=bf16,ne=[2,1,3,5]","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=bf16,ne=[2,3,5,7]","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[2,1,1,1],use_view_slice=1","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[2,1,3,5],use_view_slice=1","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[2,3,5,7],use_view_slice=1","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[1,4,4,1],use_view_slice=1","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[1,8,17,1],use_view_slice=1","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[10,10,10,1],use_view_slice=1","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[2,1,1,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[2,1,3,5],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[2,3,5,7],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[1,4,4,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[1,8,17,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f32,ne=[10,10,10,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=i32,ne=[2,1,1,1],use_view_slice=1","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=i32,ne=[2,1,3,5],use_view_slice=1","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=i32,ne=[2,3,5,7],use_view_slice=1","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=i32,ne=[1,4,4,1],use_view_slice=1","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=i32,ne=[1,8,17,1],use_view_slice=1","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=i32,ne=[10,10,10,1],use_view_slice=1","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=i32,ne=[2,1,1,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=i32,ne=[2,1,3,5],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=i32,ne=[2,3,5,7],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=i32,ne=[1,4,4,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=i32,ne=[1,8,17,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=i32,ne=[10,10,10,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f16,ne=[2,1,1,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f16,ne=[2,1,3,5],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f16,ne=[2,3,5,7],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f16,ne=[1,4,4,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f16,ne=[1,8,17,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=f16,ne=[10,10,10,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=bf16,ne=[2,1,1,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=bf16,ne=[2,1,3,5],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=bf16,ne=[2,3,5,7],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=bf16,ne=[1,4,4,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=bf16,ne=[1,8,17,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","CONT","type=bf16,ne=[10,10,10,1],use_view_slice=0","support","1","yes","CUDA"
|
||||
"CUDA0","ADD","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
|
||||
"CUDA0","SUB","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
|
||||
"CUDA0","MUL","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
|
||||
@@ -5655,6 +5684,7 @@
|
||||
"CUDA0","MUL","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
|
||||
"CUDA0","DIV","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
|
||||
"CUDA0","ADD1","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","ADD1","type=f32,ne=[1024,1024,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=0.000000,inplace=0","support","1","yes","CUDA"
|
||||
"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=0","support","1","yes","CUDA"
|
||||
"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=1","support","1","yes","CUDA"
|
||||
@@ -8644,9 +8674,13 @@
|
||||
"CUDA0","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","CUDA"
|
||||
"CUDA0","LEAKY_RELU","type=f16,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","CUDA"
|
||||
"CUDA0","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
|
||||
"CUDA0","FLOOR","type=f16,ne=[1024,1024,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","CEIL","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
|
||||
"CUDA0","CEIL","type=f16,ne=[1024,1024,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","ROUND","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
|
||||
"CUDA0","ROUND","type=f16,ne=[1024,1024,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","TRUNC","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
|
||||
"CUDA0","TRUNC","type=f16,ne=[1024,1024,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","SQR","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","SQRT","type=f32,ne=[10,3,3,2]","support","1","yes","CUDA"
|
||||
"CUDA0","LOG","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
|
||||
@@ -8666,9 +8700,13 @@
|
||||
"CUDA0","CLAMP","type=f32,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","CUDA"
|
||||
"CUDA0","LEAKY_RELU","type=f32,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","CUDA"
|
||||
"CUDA0","FLOOR","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
|
||||
"CUDA0","FLOOR","type=f32,ne=[1024,1024,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","CEIL","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
|
||||
"CUDA0","CEIL","type=f32,ne=[1024,1024,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","ROUND","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
|
||||
"CUDA0","ROUND","type=f32,ne=[1024,1024,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","TRUNC","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
|
||||
"CUDA0","TRUNC","type=f32,ne=[1024,1024,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","yes","CUDA"
|
||||
"CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","yes","CUDA"
|
||||
"CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","1","yes","CUDA"
|
||||
@@ -9411,18 +9449,405 @@
|
||||
"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","0","no","CUDA"
|
||||
"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","CUDA"
|
||||
"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","0","no","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[3,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[4,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[7,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[15,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[16,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[31,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[32,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[63,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[64,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[127,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[128,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[255,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[256,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[511,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[512,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1023,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[2047,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[2048,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[4095,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[4096,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[8191,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[8192,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[16383,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[65535,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[65536,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[131071,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[131072,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[262143,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[262144,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[524287,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[524288,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1048575,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1048576,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1023,2,1,3],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1024,2,1,3],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1025,2,1,3],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[2047,2,1,3],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[2048,2,1,3],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[2049,2,1,3],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[2,8,8192,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[3,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[4,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[7,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[15,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[16,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[31,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[32,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[63,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[64,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[127,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[128,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[255,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[256,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[511,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[512,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1023,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[2047,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[2048,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[4095,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[4096,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[8191,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[8192,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[16383,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[65535,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[65536,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[131071,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[131072,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[262143,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[262144,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[524287,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[524288,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1048575,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1048576,1,1,1],order=0","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1024,1,1,1],order=1","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[16384,1,1,1],order=1","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1023,2,1,3],order=1","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1024,2,1,3],order=1","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[1025,2,1,3],order=1","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[2047,2,1,3],order=1","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","1","yes","CUDA"
|
||||
"CUDA0","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[12,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[13,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[13,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[15,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[15,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[15,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[19,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[19,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[19,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[19,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[27,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[27,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[27,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[27,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[27,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[43,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[43,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[43,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[43,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[43,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[64,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[75,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[64,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[75,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[64,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[75,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[64,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[75,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[64,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[75,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[128,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[139,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[128,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[139,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[128,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[139,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[128,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[139,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[128,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[139,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[128,1,1,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[139,1,2,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[256,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[267,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[256,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[267,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[256,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[267,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[256,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[267,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[256,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[267,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[256,1,1,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[267,1,2,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[512,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[523,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[512,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[523,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[512,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[523,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[512,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[523,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[512,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[523,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[512,1,1,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[523,1,2,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[512,1,1,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[523,1,2,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1024,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1035,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1024,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1035,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1024,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1035,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1024,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1035,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1024,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1035,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1024,1,1,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1035,1,2,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1024,1,1,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1035,1,2,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1024,1,1,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1035,1,2,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2048,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2059,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2048,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2059,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2048,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2059,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2048,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2059,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2048,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2059,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2048,1,1,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2059,1,2,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2048,1,1,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2059,1,2,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2048,1,1,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2059,1,2,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4096,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4107,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4096,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4107,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4096,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4107,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4096,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4107,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4096,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4107,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4096,1,1,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4107,1,2,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4096,1,1,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4107,1,2,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4096,1,1,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[4107,1,2,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8192,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8203,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8192,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8203,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8192,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8203,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8192,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8203,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8192,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8203,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8192,1,1,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8203,1,2,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8192,1,1,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8203,1,2,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8192,1,1,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[8203,1,2,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16395,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16395,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16395,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16395,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16395,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16384,1,1,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16395,1,2,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16384,1,1,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16395,1,2,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16384,1,1,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16395,1,2,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16384,1,1,1],k=9999,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16395,1,2,1],k=9999,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32768,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32779,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32768,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32779,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32768,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32779,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32768,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32779,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32768,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32779,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32768,1,1,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32779,1,2,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32768,1,1,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32779,1,2,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32768,1,1,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32779,1,2,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32768,1,1,1],k=9999,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[32779,1,2,1],k=9999,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65536,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65547,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65536,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65547,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65536,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65547,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65536,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65547,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65536,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65547,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65536,1,1,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65547,1,2,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65536,1,1,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65547,1,2,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65536,1,1,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65547,1,2,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65536,1,1,1],k=9999,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[65547,1,2,1],k=9999,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131072,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131083,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131072,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131083,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131072,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131083,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131072,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131083,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131072,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131083,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131072,1,1,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131083,1,2,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131072,1,1,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131083,1,2,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131072,1,1,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131083,1,2,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131072,1,1,1],k=9999,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[131083,1,2,1],k=9999,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262144,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262155,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262144,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262155,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262144,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262155,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262144,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262155,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262144,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262155,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262144,1,1,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262155,1,2,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262144,1,1,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262155,1,2,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262144,1,1,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262155,1,2,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262144,1,1,1],k=9999,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[262155,1,2,1],k=9999,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524288,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524299,1,2,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524288,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524299,1,2,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524288,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524299,1,2,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524288,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524299,1,2,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524288,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524299,1,2,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524288,1,1,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524299,1,2,1],k=100,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524288,1,1,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524299,1,2,1],k=500,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524288,1,1,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524299,1,2,1],k=1023,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524288,1,1,1],k=9999,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[524299,1,2,1],k=9999,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16,10,10,10],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[60,10,10,10],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1023,2,1,3],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1024,2,1,3],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1025,2,1,3],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2047,2,1,3],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2048,2,1,3],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2049,2,1,3],k=1,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16,10,10,10],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[60,10,10,10],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1023,2,1,3],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1024,2,1,3],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1025,2,1,3],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2047,2,1,3],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2048,2,1,3],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2049,2,1,3],k=2,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16,10,10,10],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[60,10,10,10],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1023,2,1,3],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1024,2,1,3],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1025,2,1,3],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2047,2,1,3],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2048,2,1,3],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2049,2,1,3],k=3,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16,10,10,10],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[60,10,10,10],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1023,2,1,3],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1024,2,1,3],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1025,2,1,3],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2047,2,1,3],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2048,2,1,3],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2049,2,1,3],k=7,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16,10,10,10],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[60,10,10,10],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1023,2,1,3],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1024,2,1,3],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[1025,2,1,3],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2047,2,1,3],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2048,2,1,3],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","TOP_K","type=f32,ne=[2049,2,1,3],k=15,ties=0","support","0","no","CUDA"
|
||||
"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","CUDA"
|
||||
"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","CUDA"
|
||||
"CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","1","yes","CUDA"
|
||||
@@ -9435,6 +9860,10 @@
|
||||
"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=1","support","1","yes","CUDA"
|
||||
"CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic,flags=none","support","1","yes","CUDA"
|
||||
"CUDA0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bicubic,flags=none","support","1","yes","CUDA"
|
||||
"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=513,transpose=0","support","1","yes","CUDA"
|
||||
"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=513,transpose=1","support","1","yes","CUDA"
|
||||
"CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=none","support","1","yes","CUDA"
|
||||
"CUDA0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear,flags=none","support","1","yes","CUDA"
|
||||
"CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=align_corners","support","1","yes","CUDA"
|
||||
"CUDA0","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bilinear,flags=align_corners","support","1","yes","CUDA"
|
||||
"CUDA0","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bilinear,flags=align_corners","support","1","yes","CUDA"
|
||||
@@ -9463,34 +9892,59 @@
|
||||
"CUDA0","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","yes","CUDA"
|
||||
"CUDA0","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","yes","CUDA"
|
||||
"CUDA0","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","yes","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","1","yes","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1,circular=0","support","1","yes","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[33,17,2,1],pad_0=4,pad_1=3,circular=1","support","1","yes","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0,circular=0","support","1","yes","CUDA"
|
||||
"CUDA0","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","yes","CUDA"
|
||||
"CUDA0","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","1","yes","CUDA"
|
||||
"CUDA0","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","1","yes","CUDA"
|
||||
"CUDA0","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","1","yes","CUDA"
|
||||
"CUDA0","ARANGE","type=f32,start=0.000000,stop=1048576.000000,step=1.000000","support","1","yes","CUDA"
|
||||
"CUDA0","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","yes","CUDA"
|
||||
"CUDA0","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[10,5,4,3]","support","0","no","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[127,5,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[128,5,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[128,128,4,4]","support","1","yes","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[255,5,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[256,5,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[511,5,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[512,5,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[1023,5,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[1024,5,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[2047,5,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[2048,5,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[242004,1,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","CUMSUM","type=f32,ne=[375960,1,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","XIELU","type=f32,ne=[10,5,4,3]","support","0","no","CUDA"
|
||||
"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=3","support","0","no","CUDA"
|
||||
"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=2","support","0","no","CUDA"
|
||||
"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=1","support","0","no","CUDA"
|
||||
"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=0","support","0","no","CUDA"
|
||||
"CUDA0","FILL","type=f32,ne=[10,10,4,3],c=0.000000","support","0","no","CUDA"
|
||||
"CUDA0","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","0","no","CUDA"
|
||||
"CUDA0","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","0","no","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","0","no","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","0","no","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","0","no","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","0","no","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","0","no","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","0","no","CUDA"
|
||||
"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=3","support","1","yes","CUDA"
|
||||
"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=2","support","1","yes","CUDA"
|
||||
"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=1","support","1","yes","CUDA"
|
||||
"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=0","support","1","yes","CUDA"
|
||||
"CUDA0","FILL","type=f32,ne=[10,10,4,3],c=0.000000","support","1","yes","CUDA"
|
||||
"CUDA0","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","1","yes","CUDA"
|
||||
"CUDA0","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","1","yes","CUDA"
|
||||
"CUDA0","FILL","type=f32,ne=[2048,512,2,2],c=3.500000","support","1","yes","CUDA"
|
||||
"CUDA0","DIAG","type=f32,ne=[10,1,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","DIAG","type=f32,ne=[79,1,19,13]","support","1","yes","CUDA"
|
||||
"CUDA0","DIAG","type=f32,ne=[256,1,8,16]","support","1","yes","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","1","yes","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","1","yes","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","1","yes","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","1","yes","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","1","yes","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","1","yes","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","0","no","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","1","yes","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","0","no","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1","support","0","no","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[31,128,4,4]","support","0","no","CUDA"
|
||||
"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[300,64,4,4]","support","0","no","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=0","support","1","yes","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=0","support","1","yes","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=1","support","1","yes","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=1","support","1","yes","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=0","support","0","no","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=0","support","0","no","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=1","support","0","no","CUDA"
|
||||
"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=1","support","0","no","CUDA"
|
||||
"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
|
||||
"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
|
||||
"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
|
||||
|
||||
|
Can't render this file because it is too large.
|
@@ -25,6 +25,7 @@ static bool ggml_is_view(const struct ggml_tensor * t) {
|
||||
// ops that return true for this function must not use restrict pointers for their backend implementations
|
||||
bool ggml_op_can_inplace(enum ggml_op op) {
|
||||
switch (op) {
|
||||
case GGML_OP_FILL:
|
||||
case GGML_OP_SCALE:
|
||||
case GGML_OP_DIAG_MASK_ZERO:
|
||||
case GGML_OP_DIAG_MASK_INF:
|
||||
|
||||
77
ggml/src/ggml-cuda/diag.cu
Normal file
77
ggml/src/ggml-cuda/diag.cu
Normal file
@@ -0,0 +1,77 @@
|
||||
#include "convert.cuh"
|
||||
#include "diag.cuh"
|
||||
#include "ggml.h"
|
||||
|
||||
template <typename T>
|
||||
static __global__ void diag_kernel(T * __restrict__ dst,
|
||||
const T * __restrict__ src,
|
||||
const int64_t ne0,
|
||||
const int64_t ne1,
|
||||
const int64_t ne2,
|
||||
const int64_t ne3,
|
||||
const int64_t total_elements) {
|
||||
const int64_t global_idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (global_idx >= total_elements) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int64_t i0 = global_idx % ne0;
|
||||
const int64_t i1 = (global_idx / ne0) % ne1;
|
||||
const int64_t i2 = (global_idx / (ne0 * ne1)) % ne2;
|
||||
const int64_t i3 = global_idx / (ne0 * ne1 * ne2);
|
||||
|
||||
const int64_t dst_idx = ((i3 * ne2 + i2) * ne1 + i1) * ne0 + i0;
|
||||
|
||||
if (i0 == i1) {
|
||||
const int64_t batch_idx = i3 * ne2 + i2;
|
||||
const int64_t src_idx = batch_idx * ne0 + i0;
|
||||
dst[dst_idx] = src[src_idx];
|
||||
} else {
|
||||
dst[dst_idx] = ggml_cuda_cast<T>(0);
|
||||
}
|
||||
GGML_UNUSED_VARS(ne3);
|
||||
}
|
||||
|
||||
void ggml_cuda_op_diag(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
|
||||
void * dst_d = dst->data;
|
||||
const void * src0_d = src0->data;
|
||||
|
||||
cudaStream_t stream = ctx.stream();
|
||||
|
||||
GGML_ASSERT(ggml_is_contiguous(dst));
|
||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||
|
||||
const int64_t ne00 = src0->ne[0];
|
||||
const int64_t ne01 = src0->ne[1];
|
||||
const int64_t ne02 = src0->ne[2];
|
||||
const int64_t ne03 = src0->ne[3];
|
||||
|
||||
const int64_t ne0 = dst->ne[0];
|
||||
const int64_t ne1 = dst->ne[1];
|
||||
const int64_t ne2 = dst->ne[2];
|
||||
const int64_t ne3 = dst->ne[3];
|
||||
|
||||
GGML_ASSERT(ne00 == ne0);
|
||||
GGML_ASSERT(ne01 == 1);
|
||||
GGML_ASSERT(ne02 == ne2);
|
||||
GGML_ASSERT(ne03 == ne3);
|
||||
|
||||
const int64_t n_elems = ggml_nelements(dst);
|
||||
const int64_t num_blocks = (n_elems + CUDA_DIAG_BLOCK_SIZE - 1) / CUDA_DIAG_BLOCK_SIZE;
|
||||
|
||||
switch (dst->type) {
|
||||
case GGML_TYPE_F32:
|
||||
diag_kernel<<<num_blocks, CUDA_DIAG_BLOCK_SIZE, 0, stream>>>((float *) dst_d, (const float *) src0_d, ne0,
|
||||
ne1, ne2, ne3, n_elems);
|
||||
break;
|
||||
case GGML_TYPE_F16:
|
||||
diag_kernel<<<num_blocks, CUDA_DIAG_BLOCK_SIZE, 0, stream>>>((half *) dst_d, (const half *) src0_d, ne0,
|
||||
ne1, ne2, ne3, n_elems);
|
||||
break;
|
||||
default:
|
||||
GGML_ABORT("unsupported type");
|
||||
}
|
||||
}
|
||||
5
ggml/src/ggml-cuda/diag.cuh
Normal file
5
ggml/src/ggml-cuda/diag.cuh
Normal file
@@ -0,0 +1,5 @@
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_DIAG_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_diag(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
@@ -4,7 +4,7 @@
|
||||
#define CUDA_FILL_BLOCK_SIZE 256
|
||||
|
||||
template <typename T>
|
||||
static __global__ void fill_kernel(T * __restrict__ dst, const int64_t k, const T value) {
|
||||
static __global__ void fill_kernel(T * dst, const int64_t k, const T value) {
|
||||
const int64_t i = (int64_t)blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i >= k) {
|
||||
return;
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "ggml-cuda/cpy.cuh"
|
||||
#include "ggml-cuda/cross-entropy-loss.cuh"
|
||||
#include "ggml-cuda/diagmask.cuh"
|
||||
#include "ggml-cuda/diag.cuh"
|
||||
#include "ggml-cuda/fattn.cuh"
|
||||
#include "ggml-cuda/getrows.cuh"
|
||||
#include "ggml-cuda/im2col.cuh"
|
||||
@@ -2641,6 +2642,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
|
||||
case GGML_OP_PERMUTE:
|
||||
case GGML_OP_TRANSPOSE:
|
||||
break;
|
||||
case GGML_OP_DIAG:
|
||||
ggml_cuda_op_diag(ctx, dst);
|
||||
break;
|
||||
case GGML_OP_DIAG_MASK_INF:
|
||||
ggml_cuda_op_diag_mask_inf(ctx, dst);
|
||||
break;
|
||||
@@ -4624,6 +4628,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
||||
case GGML_OP_FILL:
|
||||
case GGML_OP_CUMSUM:
|
||||
case GGML_OP_TRI:
|
||||
case GGML_OP_DIAG:
|
||||
return true;
|
||||
case GGML_OP_SOLVE_TRI:
|
||||
return op->src[0]->ne[0] <= 64 && op->src[1]->ne[0] <= 32;
|
||||
|
||||
@@ -411,6 +411,38 @@ ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_ssm_conv(ggml_me
|
||||
return res;
|
||||
}
|
||||
|
||||
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_ssm_conv_batched(ggml_metal_library_t lib, const ggml_tensor * op, int ssm_conv_bs) {
|
||||
GGML_ASSERT(op->src[0]->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(op->src[1]->type == GGML_TYPE_F32);
|
||||
|
||||
GGML_ASSERT(ggml_is_contiguous(op->src[0]));
|
||||
GGML_ASSERT(ggml_is_contiguous(op->src[1]));
|
||||
|
||||
char base[256];
|
||||
char name[256];
|
||||
|
||||
const char * suffix = "";
|
||||
if (op->src[1]->ne[0] % 4 == 0) {
|
||||
suffix = "_4";
|
||||
}
|
||||
|
||||
snprintf(base, 256, "kernel_ssm_conv_%s_%s_batched%s", ggml_type_name(op->src[0]->type), ggml_type_name(op->src[1]->type), suffix);
|
||||
snprintf(name, 256, "%s_ssm_conv_bs=%d", base, ssm_conv_bs);
|
||||
|
||||
ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name);
|
||||
if (!res.pipeline) {
|
||||
ggml_metal_cv_t cv = ggml_metal_cv_init();
|
||||
|
||||
ggml_metal_cv_set_int16(cv, ssm_conv_bs, FC_SSM_CONV + 0);
|
||||
|
||||
res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
|
||||
|
||||
ggml_metal_cv_free(cv);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_ssm_scan(ggml_metal_library_t lib, const ggml_tensor * op) {
|
||||
GGML_TENSOR_LOCALS( int32_t, ne0, op->src[0], ne);
|
||||
|
||||
@@ -427,7 +459,12 @@ ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_ssm_scan(ggml_me
|
||||
res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
|
||||
}
|
||||
|
||||
res.smem = 32*sizeof(float)*nsg;
|
||||
// Shared memory layout:
|
||||
// - sgptg * NW floats for partial sums (nsg * 32)
|
||||
// - sgptg floats for shared_x_dt (nsg)
|
||||
// - sgptg floats for shared_dA (nsg)
|
||||
// Total: nsg * (32 + 2) floats
|
||||
res.smem = (32 + 2)*sizeof(float)*nsg;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -117,6 +117,7 @@ struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_cumsum_ad
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_tri (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_soft_max (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_ssm_conv (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_ssm_conv_batched (ggml_metal_library_t lib, const struct ggml_tensor * op, int ssm_conv_bs);
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_ssm_scan (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_rwkv (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_mul_mv_ext (ggml_metal_library_t lib, enum ggml_type tsrc0, enum ggml_type tsrc1, int nsg, int nxpsg, int r1ptg);
|
||||
|
||||
@@ -77,6 +77,7 @@
|
||||
#define FC_MUL_MV 600
|
||||
#define FC_MUL_MM 700
|
||||
#define FC_ROPE 800
|
||||
#define FC_SSM_CONV 900
|
||||
|
||||
// op-specific constants
|
||||
#define OP_FLASH_ATTN_EXT_NQPTG 8
|
||||
|
||||
@@ -221,7 +221,7 @@ static int ggml_metal_op_encode_impl(ggml_metal_op_t ctx, int idx) {
|
||||
}
|
||||
|
||||
if (ctx->debug_graph > 0) {
|
||||
GGML_LOG_DEBUG("%s: node[%5d] - %-12s %s\n", __func__, idx, ggml_op_name(node->op), is_concurrent ? "(concurrent)" : "");
|
||||
GGML_LOG_DEBUG("%s: node[%5d] - %-12s %-12s %s\n", __func__, idx, ggml_op_name(node->op), ggml_get_name(node), is_concurrent ? "(concurrent)" : "");
|
||||
}
|
||||
if (ctx->debug_graph > 1) {
|
||||
GGML_TENSOR_LOCALS( int64_t, ne0, node->src[0], ne);
|
||||
@@ -1365,15 +1365,43 @@ int ggml_metal_op_ssm_conv(ggml_metal_op_t ctx, int idx) {
|
||||
/*.nb2 =*/ nb2,
|
||||
};
|
||||
|
||||
auto pipeline = ggml_metal_library_get_pipeline_ssm_conv(lib, op);
|
||||
// Use batched kernel for prefill (ne1 > 1) to reduce threadgroup dispatch overhead
|
||||
const bool use_batched = (ne1 > 1);
|
||||
|
||||
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
||||
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[0]), 1);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[1]), 2);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op), 3);
|
||||
if (use_batched) {
|
||||
// Determine the smallest power of 2 that's >= ne1, but <= 256
|
||||
int BATCH_SIZE;
|
||||
if (ne1 > 128) BATCH_SIZE = 256;
|
||||
else if (ne1 > 64 ) BATCH_SIZE = 128;
|
||||
else if (ne1 > 32 ) BATCH_SIZE = 64;
|
||||
else if (ne1 > 16 ) BATCH_SIZE = 32;
|
||||
else if (ne1 > 8 ) BATCH_SIZE = 16;
|
||||
else if (ne1 > 4 ) BATCH_SIZE = 8;
|
||||
else BATCH_SIZE = 2;
|
||||
|
||||
ggml_metal_encoder_dispatch_threadgroups(enc, ne01, ne1, ne02, 1, 1, 1);
|
||||
auto pipeline = ggml_metal_library_get_pipeline_ssm_conv_batched(lib, op, BATCH_SIZE);
|
||||
|
||||
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
||||
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[0]), 1);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[1]), 2);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op), 3);
|
||||
|
||||
// Dispatch: ne01 rows, ceil(ne1/BATCH_SIZE) token batches, ne02 sequences
|
||||
// Each threadgroup has BATCH_SIZE threads, each handling one token
|
||||
const int n_token_batches = (ne1 + BATCH_SIZE - 1) / BATCH_SIZE;
|
||||
ggml_metal_encoder_dispatch_threadgroups(enc, ne01, n_token_batches, ne02, BATCH_SIZE, 1, 1);
|
||||
} else {
|
||||
auto pipeline = ggml_metal_library_get_pipeline_ssm_conv(lib, op);
|
||||
|
||||
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
||||
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[0]), 1);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[1]), 2);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op), 3);
|
||||
|
||||
ggml_metal_encoder_dispatch_threadgroups(enc, ne01, ne1, ne02, 1, 1, 1);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -2343,7 +2343,102 @@ kernel void kernel_ssm_conv_f32_f32_4(
|
||||
x[0] = sumf;
|
||||
}
|
||||
|
||||
constant short FC_ssm_conv_bs [[function_constant(FC_SSM_CONV + 0)]];
|
||||
|
||||
// Batched version: each threadgroup processes multiple tokens for better efficiency
|
||||
// Thread layout: each thread handles one token, threadgroup covers BATCH_SIZE tokens
|
||||
kernel void kernel_ssm_conv_f32_f32_batched(
|
||||
constant ggml_metal_kargs_ssm_conv & args,
|
||||
device const void * src0,
|
||||
device const void * src1,
|
||||
device float * dst,
|
||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||
uint3 tpitg[[thread_position_in_threadgroup]],
|
||||
uint3 ntg[[threads_per_threadgroup]]) {
|
||||
// tgpig.x = row index (ir)
|
||||
// tgpig.y = batch of tokens (i2_base / BATCH_SIZE)
|
||||
// tgpig.z = sequence index (i3)
|
||||
// tpitg.x = thread within batch (0..BATCH_SIZE-1)
|
||||
const short BATCH_SIZE = FC_ssm_conv_bs;
|
||||
|
||||
const int64_t ir = tgpig.x;
|
||||
const int64_t i2_base = tgpig.y * BATCH_SIZE;
|
||||
const int64_t i3 = tgpig.z;
|
||||
const int64_t i2_off = tpitg.x;
|
||||
const int64_t i2 = i2_base + i2_off;
|
||||
|
||||
const int64_t nc = args.ne10; // conv kernel size (typically 4)
|
||||
const int64_t n_t = args.ne1; // number of tokens
|
||||
|
||||
// Bounds check for partial batches at the end
|
||||
if (i2 >= n_t) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Load conv weights (shared across all tokens for this row)
|
||||
device const float * c = (device const float *) ((device const char *) src1 + ir*args.nb11);
|
||||
|
||||
// Load source for this specific token
|
||||
device const float * s = (device const float *) ((device const char *) src0 + ir*args.nb01 + i2*args.nb00 + i3*args.nb02);
|
||||
|
||||
// Output location for this token
|
||||
device float * x = (device float *) ((device char *) dst + ir*args.nb0 + i2*args.nb1 + i3*args.nb2);
|
||||
|
||||
float sumf = 0.0f;
|
||||
for (int64_t i0 = 0; i0 < nc; ++i0) {
|
||||
sumf += s[i0] * c[i0];
|
||||
}
|
||||
|
||||
x[0] = sumf;
|
||||
}
|
||||
|
||||
kernel void kernel_ssm_conv_f32_f32_batched_4(
|
||||
constant ggml_metal_kargs_ssm_conv & args,
|
||||
device const void * src0,
|
||||
device const void * src1,
|
||||
device float * dst,
|
||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||
uint3 tpitg[[thread_position_in_threadgroup]],
|
||||
uint3 ntg[[threads_per_threadgroup]]) {
|
||||
// tgpig.x = row index (ir)
|
||||
// tgpig.y = batch of tokens (i2_base / BATCH_SIZE)
|
||||
// tgpig.z = sequence index (i3)
|
||||
// tpitg.x = thread within batch (0..BATCH_SIZE-1)
|
||||
const short BATCH_SIZE = FC_ssm_conv_bs;
|
||||
|
||||
const int64_t ir = tgpig.x;
|
||||
const int64_t i2_base = tgpig.y * BATCH_SIZE;
|
||||
const int64_t i3 = tgpig.z;
|
||||
const int64_t i2_off = tpitg.x;
|
||||
const int64_t i2 = i2_base + i2_off;
|
||||
|
||||
const int64_t nc = args.ne10; // conv kernel size (typically 4)
|
||||
const int64_t n_t = args.ne1; // number of tokens
|
||||
|
||||
// Bounds check for partial batches at the end
|
||||
if (i2 >= n_t) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Load conv weights (shared across all tokens for this row)
|
||||
device const float4 * c = (device const float4 *) ((device const char *) src1 + ir*args.nb11);
|
||||
|
||||
// Load source for this specific token
|
||||
device const float4 * s = (device const float4 *) ((device const char *) src0 + ir*args.nb01 + i2*args.nb00 + i3*args.nb02);
|
||||
|
||||
// Output location for this token
|
||||
device float * x = (device float *) ((device char *) dst + ir*args.nb0 + i2*args.nb1 + i3*args.nb2);
|
||||
|
||||
float sumf = 0.0f;
|
||||
for (int64_t i0 = 0; i0 < nc/4; ++i0) {
|
||||
sumf += dot(s[i0], c[i0]);
|
||||
}
|
||||
|
||||
x[0] = sumf;
|
||||
}
|
||||
|
||||
// ref: ggml.c:ggml_compute_forward_ssm_scan_f32, Mamba-2 part
|
||||
// Optimized version: reduces redundant memory loads by having one thread load shared values
|
||||
kernel void kernel_ssm_scan_f32(
|
||||
constant ggml_metal_kargs_ssm_scan & args,
|
||||
device const void * src0,
|
||||
@@ -2363,7 +2458,15 @@ kernel void kernel_ssm_scan_f32(
|
||||
uint3 tgpg[[threadgroups_per_grid]]) {
|
||||
constexpr short NW = N_SIMDWIDTH;
|
||||
|
||||
shared[tpitg.x] = 0.0f;
|
||||
// Shared memory layout:
|
||||
// [0..sgptg*NW-1]: partial sums for reduction (existing)
|
||||
// [sgptg*NW..sgptg*NW+sgptg-1]: pre-computed x_dt values for each token in batch
|
||||
// [sgptg*NW+sgptg..sgptg*NW+2*sgptg-1]: pre-computed dA values for each token in batch
|
||||
threadgroup float * shared_sums = shared;
|
||||
threadgroup float * shared_x_dt = shared + sgptg * NW;
|
||||
threadgroup float * shared_dA = shared + sgptg * NW + sgptg;
|
||||
|
||||
shared_sums[tpitg.x] = 0.0f;
|
||||
|
||||
const int32_t i0 = tpitg.x;
|
||||
const int32_t i1 = tgpig.x;
|
||||
@@ -2403,32 +2506,47 @@ kernel void kernel_ssm_scan_f32(
|
||||
for (int i2 = 0; i2 < n_t; i2 += sgptg) {
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
||||
for (int t = 0; t < sgptg && i2 + t < n_t; t++) {
|
||||
const float dt0 = dt[0];
|
||||
// Pre-compute x_dt and dA for this batch of tokens
|
||||
// Only first sgptg threads do the loads and expensive math
|
||||
if (i0 < sgptg && i2 + i0 < n_t) {
|
||||
// ns12 and ns21 are element strides (nb12/nb10, nb21/nb20)
|
||||
device const float * x_t = x + i0 * args.ns12;
|
||||
device const float * dt_t = dt + i0 * args.ns21;
|
||||
|
||||
const float dt0 = dt_t[0];
|
||||
const float dtsp = dt0 <= 20.0f ? log(1.0f + exp(dt0)) : dt0;
|
||||
const float x_dt = x[0] * dtsp;
|
||||
const float dA = exp(dtsp * A0);
|
||||
shared_x_dt[i0] = x_t[0] * dtsp;
|
||||
shared_dA[i0] = dtsp; // Store dtsp, compute exp(dtsp * A0) per-thread since A0 varies
|
||||
}
|
||||
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
||||
for (int t = 0; t < sgptg && i2 + t < n_t; t++) {
|
||||
const float x_dt = shared_x_dt[t];
|
||||
const float dA = exp(shared_dA[t] * A0);
|
||||
|
||||
s = (s0 * dA) + (B[i0] * x_dt);
|
||||
|
||||
const float sumf = simd_sum(s * C[i0]);
|
||||
|
||||
if (tiisg == 0) {
|
||||
shared[t*NW + sgitg] = sumf;
|
||||
shared_sums[t*NW + sgitg] = sumf;
|
||||
}
|
||||
|
||||
// recurse
|
||||
s0 = s;
|
||||
|
||||
x += args.ns12;
|
||||
dt += args.ns21;
|
||||
B += args.ns42;
|
||||
C += args.ns52;
|
||||
}
|
||||
|
||||
// Advance pointers for next batch
|
||||
x += sgptg * args.ns12;
|
||||
dt += sgptg * args.ns21;
|
||||
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
||||
const float sumf = simd_sum(shared[sgitg*NW + tiisg]);
|
||||
const float sumf = simd_sum(shared_sums[sgitg*NW + tiisg]);
|
||||
|
||||
if (tiisg == 0 && i2 + sgitg < n_t) {
|
||||
y[sgitg*nh*nr] = sumf;
|
||||
|
||||
@@ -277,7 +277,7 @@ static void soft_max_f32_sycl(const float *x, const T *mask,
|
||||
const int id = get_current_device_id();
|
||||
const size_t smpbo = ggml_sycl_info().devices[id].smpbo;
|
||||
|
||||
if (nbytes_shared <= smpbo) {
|
||||
if (nbytes_shared <= smpbo && ncols_x <= max_block_size) {
|
||||
launch_soft_max_kernels<32, 64, 128, 256, 512, 1024, 2048, 4096>(
|
||||
x, mask, sinks, dst, params, stream, block_dims, block_nums,
|
||||
nbytes_shared);
|
||||
|
||||
@@ -124,6 +124,13 @@ static void ggml_print_backtrace_symbols(void) {
|
||||
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
|
||||
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
|
||||
}
|
||||
#elif defined(__APPLE__)
|
||||
#include <execinfo.h>
|
||||
static void ggml_print_backtrace_symbols(void) {
|
||||
void * trace[100];
|
||||
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
|
||||
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
|
||||
}
|
||||
#else
|
||||
static void ggml_print_backtrace_symbols(void) {
|
||||
// platform not supported
|
||||
@@ -135,6 +142,20 @@ void ggml_print_backtrace(void) {
|
||||
if (GGML_NO_BACKTRACE) {
|
||||
return;
|
||||
}
|
||||
#if defined(__APPLE__)
|
||||
// On macOS, fork+debugger attachment is problematic due to:
|
||||
// 1. libdispatch "poisons" forked child processes
|
||||
// 2. lldb has issues attaching to parent from forked child
|
||||
// Use simple backtrace() instead to avoid Terminal.app crashes
|
||||
const char * GGML_BACKTRACE_LLDB = getenv("GGML_BACKTRACE_LLDB");
|
||||
if (!GGML_BACKTRACE_LLDB) {
|
||||
fprintf(stderr, "WARNING: Using native backtrace. Set GGML_BACKTRACE_LLDB for more info.\n");
|
||||
fprintf(stderr, "WARNING: GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
|
||||
fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
|
||||
ggml_print_backtrace_symbols();
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if defined(__linux__)
|
||||
FILE * f = fopen("/proc/self/status", "r");
|
||||
size_t size = 0;
|
||||
|
||||
126
models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja
Normal file
126
models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja
Normal file
@@ -0,0 +1,126 @@
|
||||
{#- Default system message if no system prompt is passed. #}
|
||||
{%- set default_system_message = '# HOW YOU SHOULD THINK AND ANSWER\n\nFirst draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input.\n\nYour thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response to the user.[/THINK]Here, provide a self-contained response.' %}
|
||||
|
||||
{#- Begin of sequence token. #}
|
||||
{{- bos_token }}
|
||||
|
||||
{#- Handle system prompt if it exists. #}
|
||||
{#- System prompt supports text content or text and thinking chunks. #}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{{- '[SYSTEM_PROMPT]' -}}
|
||||
{%- if messages[0]['content'] is string %}
|
||||
{{- messages[0]['content'] -}}
|
||||
{%- else %}
|
||||
{%- for block in messages[0]['content'] %}
|
||||
{%- if block['type'] == 'text' %}
|
||||
{{- block['text'] }}
|
||||
{%- elif block['type'] == 'thinking' %}
|
||||
{{- '[THINK]' + block['thinking'] + '[/THINK]' }}
|
||||
{%- else %}
|
||||
{{- raise_exception('Only text and thinking chunks are supported in system message contents.') }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '[/SYSTEM_PROMPT]' -}}
|
||||
{%- set loop_messages = messages[1:] %}
|
||||
{%- else %}
|
||||
{%- set loop_messages = messages %}
|
||||
{%- if default_system_message != '' %}
|
||||
{{- '[SYSTEM_PROMPT]' + default_system_message + '[/SYSTEM_PROMPT]' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
|
||||
|
||||
{#- Tools definition #}
|
||||
{%- set tools_definition = '' %}
|
||||
{%- set has_tools = false %}
|
||||
{%- if tools is defined and tools is not none and tools|length > 0 %}
|
||||
{%- set has_tools = true %}
|
||||
{%- set tools_definition = '[AVAILABLE_TOOLS]' + (tools| tojson) + '[/AVAILABLE_TOOLS]' %}
|
||||
{{- tools_definition }}
|
||||
{%- endif %}
|
||||
|
||||
{#- Checks for alternating user/assistant messages. #}
|
||||
{%- set ns = namespace(index=0) %}
|
||||
{%- for message in loop_messages %}
|
||||
{%- if message.role == 'user' or (message.role == 'assistant' and (message.tool_calls is not defined or message.tool_calls is none or message.tool_calls | length == 0)) %}
|
||||
{%- if (message['role'] == 'user') != (ns.index % 2 == 0) %}
|
||||
{{- raise_exception('After the optional system message, conversation roles must alternate user and assistant roles except for tool calls and results.') }}
|
||||
{%- endif %}
|
||||
{%- set ns.index = ns.index + 1 %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
|
||||
{#- Handle conversation messages. #}
|
||||
{%- for message in loop_messages %}
|
||||
|
||||
{#- User messages supports text content or text and image chunks. #}
|
||||
{%- if message['role'] == 'user' %}
|
||||
{%- if message['content'] is string %}
|
||||
{{- '[INST]' + message['content'] + '[/INST]' }}
|
||||
{%- elif message['content'] | length > 0 %}
|
||||
{{- '[INST]' }}
|
||||
{%- if message['content'] | length == 2 %}
|
||||
{%- set blocks = message['content'] | sort(attribute='type') %}
|
||||
{%- else %}
|
||||
{%- set blocks = message['content'] %}
|
||||
{%- endif %}
|
||||
{%- for block in blocks %}
|
||||
{%- if block['type'] == 'text' %}
|
||||
{{- block['text'] }}
|
||||
{%- elif block['type'] in ['image', 'image_url'] %}
|
||||
{{- '[IMG]' }}
|
||||
{%- else %}
|
||||
{{- raise_exception('Only text, image and image_url chunks are supported in user message content.') }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{{- '[/INST]' }}
|
||||
{%- else %}
|
||||
{{- raise_exception('User message must have a string or a list of chunks in content') }}
|
||||
{%- endif %}
|
||||
|
||||
{#- Assistant messages supports text content or text, image and thinking chunks. #}
|
||||
{%- elif message['role'] == 'assistant' %}
|
||||
{%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}
|
||||
{{- raise_exception('Assistant message must have a string or a list of chunks in content or a list of tool calls.') }}
|
||||
{%- endif %}
|
||||
|
||||
{%- if message['content'] is string and message['content'] != '' %}
|
||||
{{- message['content'] }}
|
||||
{%- elif message['content'] | length > 0 %}
|
||||
{%- for block in message['content'] %}
|
||||
{%- if block['type'] == 'text' %}
|
||||
{{- block['text'] }}
|
||||
{%- elif block['type'] == 'thinking' %}
|
||||
{{- '[THINK]' + block['thinking'] + '[/THINK]' }}
|
||||
{%- else %}
|
||||
{{- raise_exception('Only text and thinking chunks are supported in assistant message contents.') }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
|
||||
{%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 %}
|
||||
{%- for tool in message['tool_calls'] %}
|
||||
{{- '[TOOL_CALLS]' }}
|
||||
{%- set name = tool['function']['name'] %}
|
||||
{%- set arguments = tool['function']['arguments'] %}
|
||||
{%- if arguments is not string %}
|
||||
{%- set arguments = arguments|tojson|safe %}
|
||||
{%- elif arguments == '' %}
|
||||
{%- set arguments = '{}' %}
|
||||
{%- endif %}
|
||||
{{- name + '[ARGS]' + arguments }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
|
||||
{{- eos_token }}
|
||||
|
||||
{#- Tool messages only supports text content. #}
|
||||
{%- elif message['role'] == 'tool' %}
|
||||
{{- '[TOOL_RESULTS]' + message['content']|string + '[/TOOL_RESULTS]' }}
|
||||
|
||||
{#- Raise exception for unsupported roles. #}
|
||||
{%- else %}
|
||||
{{- raise_exception('Only user, assistant and tool roles are supported, got ' + message['role'] + '.') }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
@@ -139,6 +139,7 @@ add_library(llama
|
||||
set_target_properties(llama PROPERTIES
|
||||
VERSION ${LLAMA_INSTALL_VERSION}
|
||||
SOVERSION 0
|
||||
MACHO_CURRENT_VERSION 0 # keep macOS linker from seeing oversized version number
|
||||
)
|
||||
|
||||
target_include_directories(llama PRIVATE .)
|
||||
|
||||
@@ -6253,6 +6253,31 @@ struct test_solve_tri : public test_case {
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_DIAG
|
||||
struct test_diag : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
|
||||
std::string vars() override { return VARS_TO_STR2(type, ne); }
|
||||
|
||||
test_diag(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = { 10, 1, 4, 3 })
|
||||
: type(type), ne(ne) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
GGML_ASSERT(ne[1] == 1);
|
||||
ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
||||
ggml_set_param(a);
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * out = ggml_diag(ctx, a);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
enum llm_norm_type {
|
||||
LLM_NORM,
|
||||
LLM_NORM_RMS,
|
||||
@@ -7826,6 +7851,10 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_fill(-152.0f, GGML_TYPE_F32, { 800, 600, 4, 4 }));
|
||||
test_cases.emplace_back(new test_fill(3.5f, GGML_TYPE_F32, { 2048, 512, 2, 2 }));
|
||||
|
||||
test_cases.emplace_back(new test_diag());
|
||||
test_cases.emplace_back(new test_diag(GGML_TYPE_F32, { 79, 1, 19, 13 }));
|
||||
test_cases.emplace_back(new test_diag(GGML_TYPE_F32, { 256, 1, 8, 16 }));
|
||||
|
||||
test_cases.emplace_back(new test_solve_tri());
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 11, 11, 1, 1 }, { 5, 11, 1, 1 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 17, 17, 2, 4 }, { 9, 17, 2, 4 }));
|
||||
@@ -8164,6 +8193,13 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
||||
}
|
||||
}
|
||||
|
||||
// Examples from granite-4.0-h-1b/ggml-model-Q8_0.gguf
|
||||
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {515, 3328, 1, 1}, {4, 3328, 1, 1})); // prefill
|
||||
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, 3328, 1, 1}, {4, 3328, 1, 1})); // generate
|
||||
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 512, 1)); // prefill
|
||||
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 1, 1)); // generate
|
||||
|
||||
|
||||
return test_cases;
|
||||
}
|
||||
|
||||
|
||||
@@ -539,6 +539,71 @@ const common_chat_msg message_assist_call_python_lines = simple_assist
|
||||
const common_chat_msg message_assist_call_python_lines_unclosed = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')");
|
||||
const common_chat_msg message_assist_call_code_interpreter = simple_assist_msg("", "", "code_interpreter", "{\"code\":\"print('hey')\"}");
|
||||
|
||||
// Use for PEG parser implementations
|
||||
struct peg_test_case {
|
||||
common_chat_templates_inputs params;
|
||||
std::string input;
|
||||
common_chat_msg expect;
|
||||
};
|
||||
|
||||
struct make_peg_parser {
|
||||
common_chat_params params_;
|
||||
common_peg_arena arena_;
|
||||
|
||||
make_peg_parser(common_chat_templates * tmpls, const common_chat_templates_inputs & inputs) {
|
||||
params_ = common_chat_templates_apply(tmpls, inputs);
|
||||
arena_.load(params_.parser);
|
||||
}
|
||||
|
||||
common_chat_msg parse(const std::string & msg, bool is_partial) {
|
||||
return common_chat_peg_parse(arena_, msg, is_partial, /* syntax = */ {params_.format});
|
||||
}
|
||||
};
|
||||
|
||||
static void test_peg_parser(common_chat_templates * tmpls, const std::function<void(peg_test_case &)> & init) {
|
||||
peg_test_case tc;
|
||||
init(tc);
|
||||
if (tc.params.messages.empty()) {
|
||||
tc.params.messages = {message_user};
|
||||
}
|
||||
if (tc.expect.role.empty()) {
|
||||
tc.expect.role = "assistant";
|
||||
}
|
||||
|
||||
auto parser = make_peg_parser(tmpls, tc.params);
|
||||
|
||||
common_chat_msg msg_accum;
|
||||
common_chat_msg msg_prev;
|
||||
msg_accum.role = msg_prev.role = "assistant";
|
||||
|
||||
for (size_t i = 1; i <= tc.input.size(); ++i) {
|
||||
auto is_partial = i < tc.input.size();
|
||||
common_chat_msg msg_current = parser.parse(tc.input.substr(0, i), is_partial);
|
||||
|
||||
for (const auto & diff : common_chat_msg_diff::compute_diffs(msg_prev, msg_current)) {
|
||||
if (!diff.reasoning_content_delta.empty()) {
|
||||
msg_accum.reasoning_content += diff.reasoning_content_delta;
|
||||
}
|
||||
if (!diff.content_delta.empty()) {
|
||||
msg_accum.content += diff.content_delta;
|
||||
}
|
||||
if (diff.tool_call_index != std::string::npos) {
|
||||
if (!diff.tool_call_delta.name.empty()) {
|
||||
msg_accum.tool_calls.push_back({diff.tool_call_delta.name, "", ""});
|
||||
}
|
||||
if (!diff.tool_call_delta.arguments.empty()) {
|
||||
msg_accum.tool_calls.back().arguments += diff.tool_call_delta.arguments;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert_msg_equals(msg_current, msg_accum, true);
|
||||
msg_prev = msg_current;
|
||||
}
|
||||
|
||||
assert_msg_equals(tc.expect, parser.parse(tc.input, false), true);
|
||||
assert_msg_equals(tc.expect, msg_accum, true);
|
||||
}
|
||||
|
||||
static void test_msgs_oaicompat_json_conversion() {
|
||||
printf("[%s]\n", __func__);
|
||||
std::vector<common_chat_msg> msgs{
|
||||
@@ -3434,7 +3499,95 @@ Hey there!<|im_end|>
|
||||
auto grammar = build_grammar(params.grammar);
|
||||
GGML_ASSERT(grammar && "Failed to build Qwen3-Coder grammar with union types");
|
||||
}
|
||||
}
|
||||
|
||||
static void test_template_output_peg_parsers() {
|
||||
printf("[%s]\n", __func__);
|
||||
|
||||
// JSON schemas
|
||||
const char * invoice_schema = R"({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"amount": {"type": "number"},
|
||||
"date": {"type": "string"}
|
||||
}
|
||||
})";
|
||||
|
||||
{
|
||||
// Ministral-3-14B-Reasoning-2512
|
||||
auto tmpls = read_templates("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja");
|
||||
|
||||
// Test basic message
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input = "Hello, world!\nWhat's up?";
|
||||
t.expect = message_assist;
|
||||
});
|
||||
|
||||
// Test basic message and reasoning with reasoning_format = none
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
|
||||
t.expect.content = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
|
||||
});
|
||||
|
||||
// Test basic message and reasoning with reasoning_format = auto
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
|
||||
t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
|
||||
t.expect = message_assist_thoughts;
|
||||
});
|
||||
|
||||
// Test tool call
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})";
|
||||
t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
t.params.tools = {special_function_tool};
|
||||
|
||||
t.expect = message_assist_call;
|
||||
});
|
||||
|
||||
// Test tool call with reasoning
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input = "[THINK]I'm\nthinking[/THINK]"
|
||||
R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})";
|
||||
t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
t.params.tools = {special_function_tool};
|
||||
|
||||
t.expect = message_assist_call_thoughts;
|
||||
});
|
||||
|
||||
// Test parallel tool calls
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})"
|
||||
R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})";
|
||||
t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
t.params.parallel_tool_calls = true;
|
||||
t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
|
||||
|
||||
t.expect.tool_calls = {{
|
||||
/* .name = */ "special_function",
|
||||
/* .arguments = */ R"({"arg1": 1})",
|
||||
/* .id = */ {},
|
||||
}, {
|
||||
/* .name = */ "special_function_with_opt",
|
||||
/* .arguments = */ R"({"arg1": 1, "arg2": 2})",
|
||||
/* .id = */ {},
|
||||
}};
|
||||
});
|
||||
|
||||
// Test response format
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input = "[THINK]I need to output the invoice details in JSON[/THINK]"
|
||||
"```json\n"
|
||||
R"({"amount": 123.45, "date": "2025-12-03"})"
|
||||
"\n```";
|
||||
t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
t.params.json_schema = invoice_schema;
|
||||
|
||||
t.expect.reasoning_content = "I need to output the invoice details in JSON";
|
||||
t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static void test_msg_diffs_compute() {
|
||||
@@ -3560,6 +3713,7 @@ int main(int argc, char ** argv) {
|
||||
test_msgs_oaicompat_json_conversion();
|
||||
test_tools_oaicompat_json_conversion();
|
||||
test_template_output_parsers();
|
||||
test_template_output_peg_parsers();
|
||||
std::cout << "\n[chat] All tests passed!" << '\n';
|
||||
}
|
||||
return 0;
|
||||
|
||||
@@ -16,6 +16,7 @@ add_library(mtmd
|
||||
set_target_properties(mtmd PROPERTIES
|
||||
VERSION ${LLAMA_INSTALL_VERSION}
|
||||
SOVERSION 0
|
||||
MACHO_CURRENT_VERSION 0 # keep macOS linker from seeing oversized version number
|
||||
)
|
||||
|
||||
target_link_libraries (mtmd PUBLIC ggml llama)
|
||||
|
||||
@@ -972,6 +972,9 @@ json oaicompat_chat_params_parse(
|
||||
inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false);
|
||||
inputs.add_generation_prompt = json_value(body, "add_generation_prompt", true);
|
||||
inputs.reasoning_format = opt.reasoning_format;
|
||||
if (body.contains("reasoning_format")) {
|
||||
inputs.reasoning_format = common_reasoning_format_from_name(body.at("reasoning_format").get<std::string>());
|
||||
}
|
||||
inputs.enable_thinking = opt.enable_thinking;
|
||||
if (!inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
|
||||
if (body.contains("grammar")) {
|
||||
|
||||
Reference in New Issue
Block a user