update tokenizer

2025-12-12 21:38:58 +00:00 · 2025-12-07 02:44:41 +08:00 · 2025-12-07 02:44:41 +08:00 · 3002c55d2d
commit 3002c55d2d
parent 8b1c8d659e
2 changed files with 11 additions and 5 deletions
--- a/conditioner.hpp
+++ b/conditioner.hpp
@ -1728,6 +1728,7 @@ struct LLMEmbedder : public Conditioner {
        std::vector<std::pair<int, ggml_tensor*>> image_embeds;
        std::pair<int, int> prompt_attn_range;
        int prompt_template_encode_start_idx = 34;
+        int max_length = 0;
        std::set<int> out_layers;
        if (llm->enable_vision && conditioner_params.ref_images.size() > 0) {
            LOG_INFO("QwenImageEditPlusPipeline");
@ -1827,11 +1828,12 @@ struct LLMEmbedder : public Conditioner {
            prompt += "[/INST]";
        } else if (version == VERSION_OVIS_IMAGE) {
            prompt_template_encode_start_idx = 28;
+            max_length = prompt_template_encode_start_idx + 256;

-            prompt = "<|im_start|>system\nDescribe the image by detailing the color, quantity, text, shape, size, texture, spatial relationships of the objects and background: <|im_end|>\n<|im_start|>user\n";
+            prompt = "<|im_start|>user\nDescribe the image by detailing the color, quantity, text, shape, size, texture, spatial relationships of the objects and background:";

            prompt_attn_range.first = static_cast<int>(prompt.size());
-            prompt += conditioner_params.text;
+            prompt += " " + conditioner_params.text;
            prompt_attn_range.second = static_cast<int>(prompt.size());

            prompt += "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n";
@ -1847,7 +1849,7 @@ struct LLMEmbedder : public Conditioner {
            prompt += "<|im_end|>\n<|im_start|>assistant\n";
        }

-        auto tokens_and_weights = tokenize(prompt, prompt_attn_range, 0, false);
+        auto tokens_and_weights = tokenize(prompt, prompt_attn_range, max_length, max_length > 0);
        auto& tokens            = std::get<0>(tokens_and_weights);
        auto& weights           = std::get<1>(tokens_and_weights);

@ -1883,8 +1885,6 @@ struct LLMEmbedder : public Conditioner {
        int64_t min_length = 0;
        if (sd_version_is_flux2(version)) {
            min_length = 512;
-        } else if (version == VERSION_OVIS_IMAGE) {
-            min_length = 256;
        }

        int64_t zero_pad_len = 0;
@ -1908,6 +1908,8 @@ struct LLMEmbedder : public Conditioner {
            ggml_ext_tensor_set_f32(new_hidden_states, value, i0, i1, i2, i3);
        });

+        // print_ggml_tensor(new_hidden_states);
+
        int64_t t1 = ggml_time_ms();
        LOG_DEBUG("computing condition graph completed, taking %" PRId64 " ms", t1 - t0);
        return {new_hidden_states, nullptr, nullptr};
--- a/llm.hpp
+++ b/llm.hpp
@ -356,6 +356,10 @@ namespace LLM {
                "<|fim_pad|>",
                "<|repo_name|>",
                "<|file_sep|>",
+                "<tool_response>",
+                "</tool_response>",
+                "<think>",
+                "</think>",
            };

            if (merges_utf8_str.size() > 0) {