diff --git a/conditioner.hpp b/conditioner.hpp index 5be33be..2e5972c 100644 --- a/conditioner.hpp +++ b/conditioner.hpp @@ -1728,6 +1728,7 @@ struct LLMEmbedder : public Conditioner { std::vector> image_embeds; std::pair prompt_attn_range; int prompt_template_encode_start_idx = 34; + int max_length = 0; std::set out_layers; if (llm->enable_vision && conditioner_params.ref_images.size() > 0) { LOG_INFO("QwenImageEditPlusPipeline"); @@ -1827,11 +1828,12 @@ struct LLMEmbedder : public Conditioner { prompt += "[/INST]"; } else if (version == VERSION_OVIS_IMAGE) { prompt_template_encode_start_idx = 28; + max_length = prompt_template_encode_start_idx + 256; - prompt = "<|im_start|>system\nDescribe the image by detailing the color, quantity, text, shape, size, texture, spatial relationships of the objects and background: <|im_end|>\n<|im_start|>user\n"; + prompt = "<|im_start|>user\nDescribe the image by detailing the color, quantity, text, shape, size, texture, spatial relationships of the objects and background:"; prompt_attn_range.first = static_cast(prompt.size()); - prompt += conditioner_params.text; + prompt += " " + conditioner_params.text; prompt_attn_range.second = static_cast(prompt.size()); prompt += "<|im_end|>\n<|im_start|>assistant\n\n\n\n\n"; @@ -1847,7 +1849,7 @@ struct LLMEmbedder : public Conditioner { prompt += "<|im_end|>\n<|im_start|>assistant\n"; } - auto tokens_and_weights = tokenize(prompt, prompt_attn_range, 0, false); + auto tokens_and_weights = tokenize(prompt, prompt_attn_range, max_length, max_length > 0); auto& tokens = std::get<0>(tokens_and_weights); auto& weights = std::get<1>(tokens_and_weights); @@ -1883,8 +1885,6 @@ struct LLMEmbedder : public Conditioner { int64_t min_length = 0; if (sd_version_is_flux2(version)) { min_length = 512; - } else if (version == VERSION_OVIS_IMAGE) { - min_length = 256; } int64_t zero_pad_len = 0; @@ -1908,6 +1908,8 @@ struct LLMEmbedder : public Conditioner { ggml_ext_tensor_set_f32(new_hidden_states, value, i0, i1, i2, i3); }); + // print_ggml_tensor(new_hidden_states); + int64_t t1 = ggml_time_ms(); LOG_DEBUG("computing condition graph completed, taking %" PRId64 " ms", t1 - t0); return {new_hidden_states, nullptr, nullptr}; diff --git a/llm.hpp b/llm.hpp index aa5e025..af73591 100644 --- a/llm.hpp +++ b/llm.hpp @@ -356,6 +356,10 @@ namespace LLM { "<|fim_pad|>", "<|repo_name|>", "<|file_sep|>", + "", + "", + "", + "", }; if (merges_utf8_str.size() > 0) {