mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
add qwen2.5 vl support
This commit is contained in:
parent
f88daa5114
commit
fe4e73156f
@ -1142,17 +1142,7 @@ int main(int argc, const char* argv[]) {
|
|||||||
SDParams params;
|
SDParams params;
|
||||||
params.verbose = true;
|
params.verbose = true;
|
||||||
sd_set_log_callback(sd_log_cb, (void*)¶ms);
|
sd_set_log_callback(sd_log_cb, (void*)¶ms);
|
||||||
auto on_new_token_cb = [&](std::string& str, std::vector<int32_t>& bpe_tokens) -> bool {
|
Qwen::Qwen2_5_VLEmbedder::load_from_file_and_test(argv[1]);
|
||||||
return false;
|
|
||||||
};
|
|
||||||
// auto tokenizer = CLIPTokenizer();
|
|
||||||
auto tokenizer = Qwen::Qwen2Tokenizer();
|
|
||||||
std::string text("a lovely cat");
|
|
||||||
auto tokens = tokenizer.encode(text, on_new_token_cb);
|
|
||||||
for (auto token : tokens) {
|
|
||||||
std::cout << token << " ";
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
exit(1);
|
exit(1);
|
||||||
parse_args(argc, argv, params);
|
parse_args(argc, argv, params);
|
||||||
params.sample_params.guidance.slg.layers = params.skip_layers.data();
|
params.sample_params.guidance.slg.layers = params.skip_layers.data();
|
||||||
|
|||||||
@ -1119,9 +1119,9 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx
|
|||||||
return kqv;
|
return kqv;
|
||||||
}
|
}
|
||||||
|
|
||||||
// q: [N, L_q, C] or [N*n_head, L_q, d_head]
|
// q: [N, L_q, C(n_head*d_head)] or [N*n_head, L_q, d_head]
|
||||||
// k: [N, L_k, C] or [N*n_head, L_k, d_head]
|
// k: [N, L_k, n_kv_head*d_head] or [N*n_kv_head, L_k, d_head]
|
||||||
// v: [N, L_k, C] or [N, L_k, n_head, d_head]
|
// v: [N, L_k, n_kv_head*d_head] or [N, L_k, n_kv_head, d_head]
|
||||||
// mask: [N, L_q, L_k]
|
// mask: [N, L_q, L_k]
|
||||||
// return: [N, L_q, C]
|
// return: [N, L_q, C]
|
||||||
__STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context* ctx,
|
__STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context* ctx,
|
||||||
@ -1139,26 +1139,30 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
|
|||||||
int64_t C;
|
int64_t C;
|
||||||
int64_t N;
|
int64_t N;
|
||||||
int64_t d_head;
|
int64_t d_head;
|
||||||
|
int64_t n_kv_head;
|
||||||
if (!skip_reshape) {
|
if (!skip_reshape) {
|
||||||
L_q = q->ne[1];
|
L_q = q->ne[1];
|
||||||
L_k = k->ne[1];
|
L_k = k->ne[1];
|
||||||
C = q->ne[0];
|
C = q->ne[0];
|
||||||
N = q->ne[2];
|
N = q->ne[2];
|
||||||
d_head = C / n_head;
|
d_head = C / n_head;
|
||||||
|
n_kv_head = k->ne[0] / d_head;
|
||||||
|
|
||||||
q = ggml_reshape_4d(ctx, q, d_head, n_head, L_q, N); // [N, L_q, n_head, d_head]
|
q = ggml_reshape_4d(ctx, q, d_head, n_head, L_q, N); // [N, L_q, n_head, d_head]
|
||||||
q = ggml_nn_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3)); // [N, n_head, L_q, d_head]
|
q = ggml_nn_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3)); // [N, n_head, L_q, d_head]
|
||||||
q = ggml_reshape_3d(ctx, q, d_head, L_q, n_head * N); // [N * n_head, L_q, d_head]
|
q = ggml_reshape_3d(ctx, q, d_head, L_q, n_head * N); // [N * n_head, L_q, d_head]
|
||||||
|
|
||||||
k = ggml_reshape_4d(ctx, k, d_head, n_head, L_k, N); // [N, L_k, n_head, d_head]
|
k = ggml_reshape_4d(ctx, k, d_head, n_kv_head, L_k, N); // [N, L_k, n_kv_head, d_head]
|
||||||
k = ggml_nn_cont(ctx, ggml_permute(ctx, k, 0, 2, 1, 3)); // [N, n_head, L_k, d_head]
|
k = ggml_nn_cont(ctx, ggml_permute(ctx, k, 0, 2, 1, 3)); // [N, n_kv_head, L_k, d_head]
|
||||||
k = ggml_reshape_3d(ctx, k, d_head, L_k, n_head * N); // [N * n_head, L_k, d_head]
|
k = ggml_reshape_3d(ctx, k, d_head, L_k, n_kv_head * N); // [N * n_kv_head, L_k, d_head]
|
||||||
|
|
||||||
v = ggml_reshape_4d(ctx, v, d_head, n_head, L_k, N); // [N, L_k, n_head, d_head]
|
v = ggml_reshape_4d(ctx, v, d_head, n_kv_head, L_k, N); // [N, L_k, n_kv_head, d_head]
|
||||||
} else {
|
} else {
|
||||||
L_q = q->ne[1];
|
L_q = q->ne[1];
|
||||||
L_k = k->ne[1];
|
L_k = k->ne[1];
|
||||||
d_head = v->ne[0];
|
d_head = v->ne[0];
|
||||||
N = v->ne[3];
|
N = v->ne[3];
|
||||||
|
n_kv_head = k->ne[2] / N;
|
||||||
C = d_head * n_head;
|
C = d_head * n_head;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1174,7 +1178,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
|
|||||||
k_in = ggml_cast(ctx, k_in, GGML_TYPE_F16);
|
k_in = ggml_cast(ctx, k_in, GGML_TYPE_F16);
|
||||||
|
|
||||||
v_in = ggml_nn_cont(ctx, ggml_permute(ctx, v_in, 0, 2, 1, 3));
|
v_in = ggml_nn_cont(ctx, ggml_permute(ctx, v_in, 0, 2, 1, 3));
|
||||||
v_in = ggml_reshape_3d(ctx, v_in, d_head, L_k, n_head * N);
|
v_in = ggml_reshape_3d(ctx, v_in, d_head, L_k, n_kv_head * N);
|
||||||
if (kv_pad != 0) {
|
if (kv_pad != 0) {
|
||||||
v_in = ggml_pad(ctx, v_in, 0, kv_pad, 0, 0);
|
v_in = ggml_pad(ctx, v_in, 0, kv_pad, 0, 0);
|
||||||
}
|
}
|
||||||
@ -1232,8 +1236,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
|
|||||||
// if (flash_attn) {
|
// if (flash_attn) {
|
||||||
// LOG_DEBUG("fallback to default attention, L_q:%d L_k:%d n_head:%d C:%d d_head:%d N:%d", L_q, L_k, n_head, C, d_head, N);
|
// LOG_DEBUG("fallback to default attention, L_q:%d L_k:%d n_head:%d C:%d d_head:%d N:%d", L_q, L_k, n_head, C, d_head, N);
|
||||||
// }
|
// }
|
||||||
v = ggml_nn_cont(ctx, ggml_permute(ctx, v, 1, 2, 0, 3)); // [N, n_head, d_head, L_k]
|
v = ggml_nn_cont(ctx, ggml_permute(ctx, v, 1, 2, 0, 3)); // [N, n_kv_head, d_head, L_k]
|
||||||
v = ggml_reshape_3d(ctx, v, L_k, d_head, n_head * N); // [N * n_head, d_head, L_k]
|
v = ggml_reshape_3d(ctx, v, L_k, d_head, n_kv_head * N); // [N * n_kv_head, d_head, L_k]
|
||||||
|
|
||||||
auto kq = ggml_mul_mat(ctx, k, q); // [N * n_head, L_q, L_k]
|
auto kq = ggml_mul_mat(ctx, k, q); // [N * n_head, L_q, L_k]
|
||||||
kq = ggml_scale_inplace(ctx, kq, scale);
|
kq = ggml_scale_inplace(ctx, kq, scale);
|
||||||
|
|||||||
31
model.cpp
31
model.cpp
@ -110,6 +110,9 @@ const char* unused_tensors[] = {
|
|||||||
"embedding_manager",
|
"embedding_manager",
|
||||||
"denoiser.sigmas",
|
"denoiser.sigmas",
|
||||||
"text_encoders.t5xxl.transformer.encoder.embed_tokens.weight", // only used during training
|
"text_encoders.t5xxl.transformer.encoder.embed_tokens.weight", // only used during training
|
||||||
|
"qwen2vl.output.weight",
|
||||||
|
"qwen2vl.lm_head.",
|
||||||
|
"qwen2vl.visual.",
|
||||||
};
|
};
|
||||||
|
|
||||||
bool is_unused_tensor(std::string name) {
|
bool is_unused_tensor(std::string name) {
|
||||||
@ -193,6 +196,21 @@ std::unordered_map<std::string, std::string> pmid_v2_name_map = {
|
|||||||
"pmid.qformer_perceiver.token_proj.fc2.weight"},
|
"pmid.qformer_perceiver.token_proj.fc2.weight"},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::unordered_map<std::string, std::string> qwenvl_name_map{
|
||||||
|
{"token_embd.", "model.embed_tokens."},
|
||||||
|
{"blk.", "model.layers."},
|
||||||
|
{"attn_q.", "self_attn.q_proj."},
|
||||||
|
{"attn_k.", "self_attn.k_proj."},
|
||||||
|
{"attn_v.", "self_attn.v_proj."},
|
||||||
|
{"attn_output.", "self_attn.o_proj."},
|
||||||
|
{"attn_norm.", "input_layernorm."},
|
||||||
|
{"ffn_down.", "mlp.down_proj."},
|
||||||
|
{"ffn_gate.", "mlp.gate_proj."},
|
||||||
|
{"ffn_up.", "mlp.up_proj."},
|
||||||
|
{"ffn_norm.", "post_attention_layernorm."},
|
||||||
|
{"output_norm.", "model.norm."},
|
||||||
|
};
|
||||||
|
|
||||||
std::string convert_cond_model_name(const std::string& name) {
|
std::string convert_cond_model_name(const std::string& name) {
|
||||||
std::string new_name = name;
|
std::string new_name = name;
|
||||||
std::string prefix;
|
std::string prefix;
|
||||||
@ -250,6 +268,13 @@ std::string convert_cond_model_name(const std::string& name) {
|
|||||||
if (pos != std::string::npos) {
|
if (pos != std::string::npos) {
|
||||||
new_name.replace(pos, 11, "layer.0.SelfAttention.relative_attention_bias.");
|
new_name.replace(pos, 11, "layer.0.SelfAttention.relative_attention_bias.");
|
||||||
}
|
}
|
||||||
|
} else if (contains(name, "qwen2vl")) {
|
||||||
|
for (auto kv : qwenvl_name_map) {
|
||||||
|
size_t pos = new_name.find(kv.first);
|
||||||
|
if (pos != std::string::npos) {
|
||||||
|
new_name.replace(pos, kv.first.size(), kv.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
} else if (name == "text_encoders.t5xxl.transformer.token_embd.weight") {
|
} else if (name == "text_encoders.t5xxl.transformer.token_embd.weight") {
|
||||||
new_name = "text_encoders.t5xxl.transformer.shared.weight";
|
new_name = "text_encoders.t5xxl.transformer.shared.weight";
|
||||||
}
|
}
|
||||||
@ -580,7 +605,11 @@ std::string convert_tensor_name(std::string name) {
|
|||||||
// name.replace(pos, strlen("lora_B"), "lora_down");
|
// name.replace(pos, strlen("lora_B"), "lora_down");
|
||||||
// }
|
// }
|
||||||
std::string new_name = name;
|
std::string new_name = name;
|
||||||
if (starts_with(name, "cond_stage_model.") || starts_with(name, "conditioner.embedders.") || starts_with(name, "text_encoders.") || ends_with(name, ".vision_model.visual_projection.weight")) {
|
if (starts_with(name, "cond_stage_model.") ||
|
||||||
|
starts_with(name, "conditioner.embedders.") ||
|
||||||
|
starts_with(name, "text_encoders.") ||
|
||||||
|
ends_with(name, ".vision_model.visual_projection.weight") ||
|
||||||
|
starts_with(name, "qwen2vl")) {
|
||||||
new_name = convert_cond_model_name(name);
|
new_name = convert_cond_model_name(name);
|
||||||
} else if (starts_with(name, "first_stage_model.decoder")) {
|
} else if (starts_with(name, "first_stage_model.decoder")) {
|
||||||
new_name = convert_vae_decoder_name(name);
|
new_name = convert_vae_decoder_name(name);
|
||||||
|
|||||||
502
qwen.hpp
502
qwen.hpp
@ -3,25 +3,25 @@
|
|||||||
|
|
||||||
#include "ggml_extend.hpp"
|
#include "ggml_extend.hpp"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <optional>
|
||||||
|
#include <regex>
|
||||||
|
#include <set>
|
||||||
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <map>
|
|
||||||
#include <set>
|
|
||||||
#include <regex>
|
|
||||||
#include <fstream>
|
|
||||||
#include <sstream>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <optional>
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
#include "json.hpp"
|
|
||||||
#include "clip.hpp"
|
#include "clip.hpp"
|
||||||
|
#include "json.hpp"
|
||||||
#include "tokenize_util.h"
|
#include "tokenize_util.h"
|
||||||
|
|
||||||
namespace Qwen {
|
namespace Qwen {
|
||||||
|
|
||||||
class Qwen2Tokenizer {
|
class Qwen2Tokenizer {
|
||||||
private:
|
private:
|
||||||
std::map<int, std::u32string> byte_encoder;
|
std::map<int, std::u32string> byte_encoder;
|
||||||
std::map<std::u32string, int> byte_decoder;
|
std::map<std::u32string, int> byte_decoder;
|
||||||
std::map<std::u32string, int> encoder;
|
std::map<std::u32string, int> encoder;
|
||||||
@ -31,7 +31,7 @@ private:
|
|||||||
int encoder_len;
|
int encoder_len;
|
||||||
int bpe_len;
|
int bpe_len;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
const std::string UNK_TOKEN = "<|endoftext|>";
|
const std::string UNK_TOKEN = "<|endoftext|>";
|
||||||
const std::string EOS_TOKEN = "<|endoftext|>";
|
const std::string EOS_TOKEN = "<|endoftext|>";
|
||||||
const std::string PAD_TOKEN = "<|endoftext|>";
|
const std::string PAD_TOKEN = "<|endoftext|>";
|
||||||
@ -40,7 +40,32 @@ public:
|
|||||||
const int EOS_TOKEN_ID = 151643;
|
const int EOS_TOKEN_ID = 151643;
|
||||||
const int PAD_TOKEN_ID = 151643;
|
const int PAD_TOKEN_ID = 151643;
|
||||||
|
|
||||||
private:
|
std::vector<std::string> special_tokens = {
|
||||||
|
"<|endoftext|>",
|
||||||
|
"<|im_start|>",
|
||||||
|
"<|im_end|>",
|
||||||
|
"<|object_ref_start|>",
|
||||||
|
"<|object_ref_end|>",
|
||||||
|
"<|box_start|>",
|
||||||
|
"<|box_end|>",
|
||||||
|
"<|quad_start|>",
|
||||||
|
"<|quad_end|>",
|
||||||
|
"<|vision_start|>",
|
||||||
|
"<|vision_end|>",
|
||||||
|
"<|vision_pad|>",
|
||||||
|
"<|image_pad|>",
|
||||||
|
"<|video_pad|>",
|
||||||
|
"<tool_call>",
|
||||||
|
"</tool_call>",
|
||||||
|
"<|fim_prefix|>",
|
||||||
|
"<|fim_middle|>",
|
||||||
|
"<|fim_suffix|>",
|
||||||
|
"<|fim_pad|>",
|
||||||
|
"<|repo_name|>",
|
||||||
|
"<|file_sep|>",
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
static std::string strip(const std::string& str) {
|
static std::string strip(const std::string& str) {
|
||||||
std::string::size_type start = str.find_first_not_of(" \t\n\r\v\f");
|
std::string::size_type start = str.find_first_not_of(" \t\n\r\v\f");
|
||||||
std::string::size_type end = str.find_last_not_of(" \t\n\r\v\f");
|
std::string::size_type end = str.find_last_not_of(" \t\n\r\v\f");
|
||||||
@ -74,7 +99,16 @@ private:
|
|||||||
return pairs;
|
return pairs;
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
bool is_special_token(const std::string& token) {
|
||||||
|
for (auto& special_token : special_tokens) {
|
||||||
|
if (special_token == token) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
explicit Qwen2Tokenizer(const std::string& merges_utf8_str = "") {
|
explicit Qwen2Tokenizer(const std::string& merges_utf8_str = "") {
|
||||||
if (merges_utf8_str.size() > 0) {
|
if (merges_utf8_str.size() > 0) {
|
||||||
load_from_merges(merges_utf8_str);
|
load_from_merges(merges_utf8_str);
|
||||||
@ -102,7 +136,6 @@ public:
|
|||||||
start = pos + 1;
|
start = pos + 1;
|
||||||
}
|
}
|
||||||
LOG_DEBUG("merges size %llu", merges.size());
|
LOG_DEBUG("merges size %llu", merges.size());
|
||||||
// GGML_ASSERT(merges.size() == 48895);
|
|
||||||
merges = std::vector<std::u32string>(merges.begin(), merges.end());
|
merges = std::vector<std::u32string>(merges.begin(), merges.end());
|
||||||
std::vector<std::pair<std::u32string, std::u32string>> merge_pairs;
|
std::vector<std::pair<std::u32string, std::u32string>> merge_pairs;
|
||||||
for (const auto& merge : merges) {
|
for (const auto& merge : merges) {
|
||||||
@ -120,28 +153,9 @@ public:
|
|||||||
for (const auto& merge : merge_pairs) {
|
for (const auto& merge : merge_pairs) {
|
||||||
vocab.push_back(merge.first + merge.second);
|
vocab.push_back(merge.first + merge.second);
|
||||||
}
|
}
|
||||||
vocab.push_back(utf8_to_utf32("<|endoftext|>"));
|
for (auto& special_token : special_tokens) {
|
||||||
vocab.push_back(utf8_to_utf32("<|im_start|>"));
|
vocab.push_back(utf8_to_utf32(special_token));
|
||||||
vocab.push_back(utf8_to_utf32("<|im_end|>"));
|
}
|
||||||
vocab.push_back(utf8_to_utf32("<|object_ref_start|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|object_ref_end|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|box_start|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|box_end|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|quad_start|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|quad_end|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|vision_start|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|vision_end|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|vision_pad|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|image_pad|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|video_pad|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<tool_call>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("</tool_call>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|fim_prefix|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|fim_middle|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|fim_suffix|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|fim_pad|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|repo_name|>"));
|
|
||||||
vocab.push_back(utf8_to_utf32("<|file_sep|>"));
|
|
||||||
|
|
||||||
LOG_DEBUG("vocab size: %llu", vocab.size());
|
LOG_DEBUG("vocab size: %llu", vocab.size());
|
||||||
int i = 0;
|
int i = 0;
|
||||||
@ -234,17 +248,20 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<int> tokenize(std::string text,
|
std::vector<int> tokenize(std::string text,
|
||||||
on_new_token_cb_t on_new_token_cb,
|
on_new_token_cb_t on_new_token_cb = nullptr,
|
||||||
size_t max_length = 0,
|
size_t max_length = 0,
|
||||||
bool padding = false) {
|
bool padding = false) {
|
||||||
std::vector<int32_t> tokens = encode(text, on_new_token_cb);
|
std::vector<int32_t> tokens = encode(text, on_new_token_cb);
|
||||||
|
|
||||||
if (max_length > 0) {
|
if (max_length > 0) {
|
||||||
|
if (tokens.size() < max_length) {
|
||||||
tokens.resize(max_length);
|
tokens.resize(max_length);
|
||||||
|
} else {
|
||||||
if (padding) {
|
if (padding) {
|
||||||
tokens.insert(tokens.end(), max_length - tokens.size(), PAD_TOKEN_ID);
|
tokens.insert(tokens.end(), max_length - tokens.size(), PAD_TOKEN_ID);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
@ -265,17 +282,28 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<int> encode(std::string text, on_new_token_cb_t on_new_token_cb) {
|
std::vector<int> encode(std::string text, on_new_token_cb_t on_new_token_cb = nullptr) {
|
||||||
std::string original_text = text;
|
std::string original_text = text;
|
||||||
std::vector<int32_t> bpe_tokens;
|
std::vector<int32_t> bpe_tokens;
|
||||||
|
|
||||||
auto tokens = token_split(text);
|
|
||||||
std::vector<std::string> token_strs;
|
std::vector<std::string> token_strs;
|
||||||
|
|
||||||
|
auto splited_texts = split_with_special_tokens(text, special_tokens);
|
||||||
|
|
||||||
|
for (auto& splited_text : splited_texts) {
|
||||||
|
if (is_special_token(splited_text)) {
|
||||||
|
bpe_tokens.push_back(encoder[utf8_to_utf32(splited_text)]);
|
||||||
|
token_strs.push_back(splited_text);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto tokens = token_split(splited_text);
|
||||||
for (auto& token : tokens) {
|
for (auto& token : tokens) {
|
||||||
|
if (on_new_token_cb != nullptr) {
|
||||||
bool skip = on_new_token_cb(token, bpe_tokens);
|
bool skip = on_new_token_cb(token, bpe_tokens);
|
||||||
if (skip) {
|
if (skip) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string token_str = token;
|
std::string token_str = token;
|
||||||
std::u32string utf32_token;
|
std::u32string utf32_token;
|
||||||
for (int i = 0; i < token_str.length(); i++) {
|
for (int i = 0; i < token_str.length(); i++) {
|
||||||
@ -296,6 +324,7 @@ public:
|
|||||||
bpe_tokens.push_back(encoder[bpe_str]);
|
bpe_tokens.push_back(encoder[bpe_str]);
|
||||||
token_strs.push_back(utf32_to_utf8(bpe_str));
|
token_strs.push_back(utf32_to_utf8(bpe_str));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << "[";
|
ss << "[";
|
||||||
@ -307,10 +336,397 @@ public:
|
|||||||
// printf("split prompt \"%s\" to tokens %s \n", original_text.c_str(), ss.str().c_str());
|
// printf("split prompt \"%s\" to tokens %s \n", original_text.c_str(), ss.str().c_str());
|
||||||
return bpe_tokens;
|
return bpe_tokens;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
};
|
struct Qwen2_5_VLMLP : public GGMLBlock {
|
||||||
|
public:
|
||||||
|
Qwen2_5_VLMLP(int64_t hidden_size, int64_t intermediate_size, bool bias = false) {
|
||||||
|
blocks["gate_proj"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, intermediate_size, false));
|
||||||
|
blocks["up_proj"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, intermediate_size, false));
|
||||||
|
blocks["down_proj"] = std::shared_ptr<GGMLBlock>(new Linear(intermediate_size, hidden_size, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||||
|
// x: [N, n_token, hidden_size]
|
||||||
|
auto gate_proj = std::dynamic_pointer_cast<Linear>(blocks["gate_proj"]);
|
||||||
|
auto up_proj = std::dynamic_pointer_cast<Linear>(blocks["up_proj"]);
|
||||||
|
auto down_proj = std::dynamic_pointer_cast<Linear>(blocks["down_proj"]);
|
||||||
|
|
||||||
|
auto h = gate_proj->forward(ctx, x);
|
||||||
|
h = ggml_silu_inplace(ctx, h);
|
||||||
|
h = ggml_mul_inplace(ctx, h, up_proj->forward(ctx, x));
|
||||||
|
h = down_proj->forward(ctx, h);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class Qwen2_5_VLAttention : public GGMLBlock {
|
||||||
|
protected:
|
||||||
|
int64_t head_dim;
|
||||||
|
int64_t num_heads;
|
||||||
|
int64_t num_kv_heads;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Qwen2_5_VLAttention(int64_t hidden_size,
|
||||||
|
int64_t num_heads,
|
||||||
|
int64_t num_kv_heads)
|
||||||
|
: num_heads(num_heads), num_kv_heads(num_kv_heads) {
|
||||||
|
head_dim = hidden_size / num_heads;
|
||||||
|
GGML_ASSERT(num_heads * head_dim == hidden_size);
|
||||||
|
blocks["q_proj"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, num_heads * head_dim));
|
||||||
|
blocks["k_proj"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, num_kv_heads * head_dim));
|
||||||
|
blocks["v_proj"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, num_kv_heads * head_dim));
|
||||||
|
blocks["o_proj"] = std::shared_ptr<GGMLBlock>(new Linear(num_heads * head_dim, hidden_size, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||||
|
ggml_backend_t backend,
|
||||||
|
struct ggml_tensor* x,
|
||||||
|
struct ggml_tensor* input_pos) {
|
||||||
|
// x: [N, n_token, hidden_size]
|
||||||
|
int64_t n_token = x->ne[1];
|
||||||
|
int64_t N = x->ne[2];
|
||||||
|
auto q_proj = std::dynamic_pointer_cast<Linear>(blocks["q_proj"]);
|
||||||
|
auto k_proj = std::dynamic_pointer_cast<Linear>(blocks["k_proj"]);
|
||||||
|
auto v_proj = std::dynamic_pointer_cast<Linear>(blocks["v_proj"]);
|
||||||
|
auto out_proj = std::dynamic_pointer_cast<Linear>(blocks["o_proj"]);
|
||||||
|
|
||||||
|
auto q = q_proj->forward(ctx, x); // [N, n_token, num_heads*head_dim]
|
||||||
|
auto k = k_proj->forward(ctx, x); // [N, n_token, num_kv_heads*head_dim]
|
||||||
|
auto v = v_proj->forward(ctx, x); // [N, n_token, num_kv_heads*head_dim]
|
||||||
|
|
||||||
|
q = ggml_reshape_4d(ctx, q, head_dim, num_heads, n_token, N); // [N, n_token, num_heads, head_dim]
|
||||||
|
k = ggml_reshape_4d(ctx, k, head_dim, num_kv_heads, n_token, N); // [N, n_token, num_kv_heads, head_dim]
|
||||||
|
v = ggml_reshape_4d(ctx, v, head_dim, num_kv_heads, n_token, N); // [N, n_token, num_kv_heads, head_dim]
|
||||||
|
|
||||||
|
int sections[4] = {16, 24, 24, 0};
|
||||||
|
q = ggml_rope_multi(ctx, q, input_pos, nullptr, head_dim, sections, GGML_ROPE_TYPE_MROPE, 128000, 1000000.f, 1.f, 0.f, 1.f, 32.f, 1.f);
|
||||||
|
k = ggml_rope_multi(ctx, k, input_pos, nullptr, head_dim, sections, GGML_ROPE_TYPE_MROPE, 128000, 1000000.f, 1.f, 0.f, 1.f, 32.f, 1.f);
|
||||||
|
|
||||||
|
q = ggml_cont(ctx, ggml_torch_permute(ctx, q, 0, 2, 1, 3)); // [N, num_heads, n_token, head_dim]
|
||||||
|
q = ggml_reshape_3d(ctx, q, q->ne[0], q->ne[1], q->ne[2] * q->ne[3]); // [N*num_heads, n_token, head_dim]
|
||||||
|
|
||||||
|
k = ggml_cont(ctx, ggml_torch_permute(ctx, k, 0, 2, 1, 3)); // [N, num_kv_heads, n_token, head_dim]
|
||||||
|
k = ggml_reshape_3d(ctx, k, k->ne[0], k->ne[1], k->ne[2] * k->ne[3]); // [N*num_kv_heads, n_token, head_dim]
|
||||||
|
|
||||||
|
x = ggml_nn_attention_ext(ctx, backend, q, k, v, num_heads, nullptr, true, true, false); // [N, n_token, hidden_size]
|
||||||
|
|
||||||
|
x = out_proj->forward(ctx, x); // [N, n_token, hidden_size]
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Qwen2_5_VLBlock : public GGMLBlock {
|
||||||
|
public:
|
||||||
|
Qwen2_5_VLBlock(int64_t hidden_size,
|
||||||
|
int64_t intermediate_size,
|
||||||
|
int64_t num_heads,
|
||||||
|
int64_t num_kv_heads,
|
||||||
|
float eps = 1e-6f) {
|
||||||
|
blocks["self_attn"] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VLAttention(hidden_size, num_heads, num_kv_heads));
|
||||||
|
blocks["mlp"] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VLMLP(hidden_size, intermediate_size));
|
||||||
|
blocks["input_layernorm"] = std::shared_ptr<GGMLBlock>(new RMSNorm(hidden_size, eps));
|
||||||
|
blocks["post_attention_layernorm"] = std::shared_ptr<GGMLBlock>(new RMSNorm(hidden_size, eps));
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||||
|
ggml_backend_t backend,
|
||||||
|
struct ggml_tensor* x,
|
||||||
|
struct ggml_tensor* input_pos) {
|
||||||
|
// x: [N, n_token, hidden_size]
|
||||||
|
auto self_attn = std::dynamic_pointer_cast<Qwen2_5_VLAttention>(blocks["self_attn"]);
|
||||||
|
auto mlp = std::dynamic_pointer_cast<Qwen2_5_VLMLP>(blocks["mlp"]);
|
||||||
|
auto input_layernorm = std::dynamic_pointer_cast<RMSNorm>(blocks["input_layernorm"]);
|
||||||
|
auto post_attention_layernorm = std::dynamic_pointer_cast<RMSNorm>(blocks["post_attention_layernorm"]);
|
||||||
|
|
||||||
|
auto residual = x;
|
||||||
|
x = input_layernorm->forward(ctx, x);
|
||||||
|
x = self_attn->forward(ctx, backend, x, input_pos);
|
||||||
|
x = ggml_add_inplace(ctx, x, residual);
|
||||||
|
|
||||||
|
residual = x;
|
||||||
|
x = post_attention_layernorm->forward(ctx, x);
|
||||||
|
x = mlp->forward(ctx, x);
|
||||||
|
x = ggml_add_inplace(ctx, x, residual);
|
||||||
|
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Qwen2_5_VLTextModel : public GGMLBlock {
|
||||||
|
protected:
|
||||||
|
int64_t num_layers;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Qwen2_5_VLTextModel(int64_t num_layers,
|
||||||
|
int64_t vocab_size,
|
||||||
|
int64_t hidden_size,
|
||||||
|
int64_t intermediate_size,
|
||||||
|
int64_t num_heads,
|
||||||
|
int64_t num_kv_heads,
|
||||||
|
float eps = 1e-6f)
|
||||||
|
: num_layers(num_layers) {
|
||||||
|
blocks["embed_tokens"] = std::shared_ptr<GGMLBlock>(new Embedding(vocab_size, hidden_size));
|
||||||
|
for (int i = 0; i < num_layers; i++) {
|
||||||
|
blocks["layers." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VLBlock(hidden_size,
|
||||||
|
intermediate_size,
|
||||||
|
num_heads,
|
||||||
|
num_kv_heads));
|
||||||
|
}
|
||||||
|
blocks["norm"] = std::shared_ptr<GGMLBlock>(new RMSNorm(hidden_size, eps));
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||||
|
ggml_backend_t backend,
|
||||||
|
struct ggml_tensor* input_ids,
|
||||||
|
struct ggml_tensor* input_pos) {
|
||||||
|
// input_ids: [N, n_token]
|
||||||
|
// return: [N, n_token, hidden_size]
|
||||||
|
|
||||||
|
auto embed_tokens = std::dynamic_pointer_cast<Embedding>(blocks["embed_tokens"]);
|
||||||
|
auto norm = std::dynamic_pointer_cast<RMSNorm>(blocks["norm"]);
|
||||||
|
|
||||||
|
auto x = embed_tokens->forward(ctx, input_ids);
|
||||||
|
|
||||||
|
for (int i = 0; i < num_layers; i++) {
|
||||||
|
auto block = std::dynamic_pointer_cast<Qwen2_5_VLBlock>(blocks["layers." + std::to_string(i)]);
|
||||||
|
|
||||||
|
x = block->forward(ctx, backend, x, input_pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
x = norm->forward(ctx, x);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Qwen2_5_VLParams {
|
||||||
|
int64_t num_layers = 28;
|
||||||
|
int64_t hidden_size = 3584;
|
||||||
|
int64_t intermediate_size = 18944;
|
||||||
|
int64_t num_heads = 28;
|
||||||
|
int64_t num_kv_heads = 4;
|
||||||
|
int64_t vocab_size = 152064;
|
||||||
|
float rms_norm_eps = 1e-06f;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Qwen2_5_VL : public GGMLBlock {
|
||||||
|
Qwen2_5_VLParams params;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Qwen2_5_VL() {}
|
||||||
|
Qwen2_5_VL(Qwen2_5_VLParams params)
|
||||||
|
: params(params) {
|
||||||
|
blocks["model"] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VLTextModel(params.num_layers,
|
||||||
|
params.vocab_size,
|
||||||
|
params.hidden_size,
|
||||||
|
params.intermediate_size,
|
||||||
|
params.num_heads,
|
||||||
|
params.num_kv_heads,
|
||||||
|
params.rms_norm_eps));
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||||
|
ggml_backend_t backend,
|
||||||
|
struct ggml_tensor* input_ids,
|
||||||
|
struct ggml_tensor* input_pos) {
|
||||||
|
// input_ids: [N, n_token]
|
||||||
|
auto model = std::dynamic_pointer_cast<Qwen2_5_VLTextModel>(blocks["model"]);
|
||||||
|
|
||||||
|
auto x = model->forward(ctx, backend, input_ids, input_pos);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Qwen2_5_VLRunner : public GGMLRunner {
|
||||||
|
Qwen2_5_VLParams params;
|
||||||
|
Qwen2_5_VL model;
|
||||||
|
|
||||||
|
std::vector<int> input_pos_vec;
|
||||||
|
|
||||||
|
Qwen2_5_VLRunner(ggml_backend_t backend,
|
||||||
|
bool offload_params_to_cpu,
|
||||||
|
const String2GGMLType& tensor_types,
|
||||||
|
const std::string prefix)
|
||||||
|
: GGMLRunner(backend, offload_params_to_cpu) {
|
||||||
|
model = Qwen2_5_VL(params);
|
||||||
|
model.init(params_ctx, tensor_types, prefix);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string get_desc() {
|
||||||
|
return "qwenvl2.5";
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
||||||
|
model.get_param_tensors(tensors, prefix);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||||
|
ggml_backend_t backend,
|
||||||
|
struct ggml_tensor* input_ids,
|
||||||
|
struct ggml_tensor* input_pos) {
|
||||||
|
auto hidden_states = model.forward(ctx, backend, input_ids, input_pos); // [N, n_token, hidden_size]
|
||||||
|
return hidden_states;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_cgraph* build_graph(struct ggml_tensor* input_ids) {
|
||||||
|
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);
|
||||||
|
|
||||||
|
input_ids = to_backend(input_ids);
|
||||||
|
|
||||||
|
int64_t n_tokens = input_ids->ne[0];
|
||||||
|
input_pos_vec.resize(n_tokens * 4);
|
||||||
|
for (int i = 0; i < n_tokens; ++i) {
|
||||||
|
input_pos_vec[i] = i;
|
||||||
|
input_pos_vec[n_tokens + i] = i;
|
||||||
|
input_pos_vec[2 * n_tokens + i] = i;
|
||||||
|
input_pos_vec[3 * n_tokens + i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto input_pos = ggml_new_tensor_1d(compute_ctx,
|
||||||
|
GGML_TYPE_I32,
|
||||||
|
n_tokens * 4);
|
||||||
|
set_backend_tensor_data(input_pos, input_pos_vec.data());
|
||||||
|
|
||||||
|
struct ggml_tensor* hidden_states = forward(compute_ctx, runtime_backend, input_ids, input_pos);
|
||||||
|
|
||||||
|
ggml_build_forward_expand(gf, hidden_states);
|
||||||
|
|
||||||
|
return gf;
|
||||||
|
}
|
||||||
|
|
||||||
|
void compute(const int n_threads,
|
||||||
|
struct ggml_tensor* input_ids,
|
||||||
|
ggml_tensor** output,
|
||||||
|
ggml_context* output_ctx = NULL) {
|
||||||
|
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||||
|
return build_graph(input_ids);
|
||||||
|
};
|
||||||
|
GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Qwen2_5_VLEmbedder {
|
||||||
|
Qwen2Tokenizer tokenizer;
|
||||||
|
Qwen2_5_VLRunner model;
|
||||||
|
|
||||||
|
Qwen2_5_VLEmbedder(ggml_backend_t backend,
|
||||||
|
bool offload_params_to_cpu,
|
||||||
|
const String2GGMLType& tensor_types = {},
|
||||||
|
const std::string prefix = "")
|
||||||
|
: model(backend, offload_params_to_cpu, tensor_types, prefix) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
||||||
|
model.get_param_tensors(tensors, prefix);
|
||||||
|
}
|
||||||
|
|
||||||
|
void alloc_params_buffer() {
|
||||||
|
model.alloc_params_buffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<std::vector<int>, std::vector<float>> tokenize(std::string text,
|
||||||
|
size_t max_length = 0,
|
||||||
|
bool padding = false) {
|
||||||
|
auto parsed_attention = parse_prompt_attention(text);
|
||||||
|
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << "[";
|
||||||
|
for (const auto& item : parsed_attention) {
|
||||||
|
ss << "['" << item.first << "', " << item.second << "], ";
|
||||||
|
}
|
||||||
|
ss << "]";
|
||||||
|
LOG_DEBUG("parse '%s' to %s", text.c_str(), ss.str().c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> tokens;
|
||||||
|
std::vector<float> weights;
|
||||||
|
for (const auto& item : parsed_attention) {
|
||||||
|
const std::string& curr_text = item.first;
|
||||||
|
float curr_weight = item.second;
|
||||||
|
std::vector<int> curr_tokens = tokenizer.tokenize(curr_text, nullptr);
|
||||||
|
tokens.insert(tokens.end(), curr_tokens.begin(), curr_tokens.end());
|
||||||
|
weights.insert(weights.end(), curr_tokens.size(), curr_weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenizer.pad_tokens(tokens, weights, max_length, padding);
|
||||||
|
|
||||||
|
// for (int i = 0; i < tokens.size(); i++) {
|
||||||
|
// std::cout << tokens[i] << ":" << weights[i] << ", ";
|
||||||
|
// }
|
||||||
|
// std::cout << std::endl;
|
||||||
|
|
||||||
|
return {tokens, weights};
|
||||||
|
}
|
||||||
|
|
||||||
|
void test() {
|
||||||
|
struct ggml_init_params params;
|
||||||
|
params.mem_size = static_cast<size_t>(1024 * 1024) * 1024; // 1GB
|
||||||
|
params.mem_buffer = NULL;
|
||||||
|
params.no_alloc = false;
|
||||||
|
|
||||||
|
struct ggml_context* work_ctx = ggml_init(params);
|
||||||
|
GGML_ASSERT(work_ctx != NULL);
|
||||||
|
|
||||||
|
{
|
||||||
|
std::string text("<|im_start|>system\nDescribe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>\n<|im_start|>user\na lovely cat<|im_end|>\n<|im_start|>assistant\n");
|
||||||
|
auto tokens_and_weights = tokenize(text, 0, false);
|
||||||
|
std::vector<int>& tokens = std::get<0>(tokens_and_weights);
|
||||||
|
std::vector<float>& weights = std::get<1>(tokens_and_weights);
|
||||||
|
for (auto token : tokens) {
|
||||||
|
printf("%d ", token);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, tokens);
|
||||||
|
struct ggml_tensor* out = NULL;
|
||||||
|
|
||||||
|
int t0 = ggml_time_ms();
|
||||||
|
model.compute(8, input_ids, &out, work_ctx);
|
||||||
|
int t1 = ggml_time_ms();
|
||||||
|
|
||||||
|
print_ggml_tensor(out);
|
||||||
|
LOG_DEBUG("qwen2vl test done in %dms", t1 - t0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void load_from_file_and_test(const std::string& file_path) {
|
||||||
|
// cpu f16: pass
|
||||||
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
|
ggml_backend_t backend = ggml_backend_cpu_init();
|
||||||
|
ggml_type model_data_type = GGML_TYPE_Q8_0;
|
||||||
|
|
||||||
|
ModelLoader model_loader;
|
||||||
|
if (!model_loader.init_from_file(file_path, "qwen2vl.")) {
|
||||||
|
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto tensor_types = model_loader.tensor_storages_types;
|
||||||
|
for (auto& item : tensor_types) {
|
||||||
|
// LOG_DEBUG("%s %u", item.first.c_str(), item.second);
|
||||||
|
if (ends_with(item.first, "weight")) {
|
||||||
|
item.second = model_data_type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<Qwen2_5_VLEmbedder> qwenvl = std::shared_ptr<Qwen2_5_VLEmbedder>(new Qwen2_5_VLEmbedder(backend, false, tensor_types, "qwen2vl"));
|
||||||
|
|
||||||
|
qwenvl->alloc_params_buffer();
|
||||||
|
std::map<std::string, ggml_tensor*> tensors;
|
||||||
|
qwenvl->get_param_tensors(tensors, "qwen2vl");
|
||||||
|
|
||||||
|
bool success = model_loader.load_tensors(tensors);
|
||||||
|
|
||||||
|
if (!success) {
|
||||||
|
LOG_ERROR("load tensors from model loader failed");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_INFO("qwenvl model loaded");
|
||||||
|
qwenvl->test();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}; // Qwen
|
||||||
|
|
||||||
#endif // __QWEN_HPP__
|
#endif // __QWEN_HPP__
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
|
#include <algorithm>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "tokenize_util.h"
|
#include "tokenize_util.h"
|
||||||
|
|
||||||
@ -697,8 +697,9 @@ bool is_letter(char32_t ch) {
|
|||||||
{0x31350, 0x33479},
|
{0x31350, 0x33479},
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const auto &r : ranges) {
|
for (const auto& r : ranges) {
|
||||||
if (ch >= r.start && ch <= r.end) return true;
|
if (ch >= r.start && ch <= r.end)
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -736,7 +737,7 @@ bool is_space(char32_t cp) {
|
|||||||
std::string str_to_lower(const std::string& input) {
|
std::string str_to_lower(const std::string& input) {
|
||||||
std::string result = input;
|
std::string result = input;
|
||||||
std::transform(result.begin(), result.end(), result.begin(),
|
std::transform(result.begin(), result.end(), result.begin(),
|
||||||
[](unsigned char c){ return std::tolower(c); });
|
[](unsigned char c) { return std::tolower(c); });
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -749,13 +750,24 @@ std::vector<char32_t> utf8_to_codepoints(const std::string& str) {
|
|||||||
char32_t cp = 0;
|
char32_t cp = 0;
|
||||||
size_t extra_bytes = 0;
|
size_t extra_bytes = 0;
|
||||||
|
|
||||||
if ((c & 0x80) == 0) cp = c;
|
if ((c & 0x80) == 0)
|
||||||
else if ((c & 0xE0) == 0xC0) { cp = c & 0x1F; extra_bytes = 1; }
|
cp = c;
|
||||||
else if ((c & 0xF0) == 0xE0) { cp = c & 0x0F; extra_bytes = 2; }
|
else if ((c & 0xE0) == 0xC0) {
|
||||||
else if ((c & 0xF8) == 0xF0) { cp = c & 0x07; extra_bytes = 3; }
|
cp = c & 0x1F;
|
||||||
else { ++i; continue; } // Invalid UTF-8
|
extra_bytes = 1;
|
||||||
|
} else if ((c & 0xF0) == 0xE0) {
|
||||||
|
cp = c & 0x0F;
|
||||||
|
extra_bytes = 2;
|
||||||
|
} else if ((c & 0xF8) == 0xF0) {
|
||||||
|
cp = c & 0x07;
|
||||||
|
extra_bytes = 3;
|
||||||
|
} else {
|
||||||
|
++i;
|
||||||
|
continue;
|
||||||
|
} // Invalid UTF-8
|
||||||
|
|
||||||
if (i + extra_bytes >= str.size()) break;
|
if (i + extra_bytes >= str.size())
|
||||||
|
break;
|
||||||
|
|
||||||
for (size_t j = 1; j <= extra_bytes; ++j)
|
for (size_t j = 1; j <= extra_bytes; ++j)
|
||||||
cp = (cp << 6) | (str[i + j] & 0x3F);
|
cp = (cp << 6) | (str[i + j] & 0x3F);
|
||||||
@ -769,7 +781,8 @@ std::vector<char32_t> utf8_to_codepoints(const std::string& str) {
|
|||||||
// Unicode code point -> UTF-8
|
// Unicode code point -> UTF-8
|
||||||
std::string codepoint_to_utf8(char32_t cp) {
|
std::string codepoint_to_utf8(char32_t cp) {
|
||||||
std::string out;
|
std::string out;
|
||||||
if (cp <= 0x7F) out.push_back(static_cast<char>(cp));
|
if (cp <= 0x7F)
|
||||||
|
out.push_back(static_cast<char>(cp));
|
||||||
else if (cp <= 0x7FF) {
|
else if (cp <= 0x7FF) {
|
||||||
out.push_back(static_cast<char>(0xC0 | (cp >> 6)));
|
out.push_back(static_cast<char>(0xC0 | (cp >> 6)));
|
||||||
out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
|
out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
|
||||||
@ -786,6 +799,17 @@ std::string codepoint_to_utf8(char32_t cp) {
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool starts_with(const std::vector<char32_t>& text,
|
||||||
|
const std::vector<char32_t>& prefix,
|
||||||
|
std::size_t index) {
|
||||||
|
if (index > text.size()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (prefix.size() > text.size() - index) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return std::equal(prefix.begin(), prefix.end(), text.begin() + index);
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<std::string> token_split(const std::string& text) {
|
std::vector<std::string> token_split(const std::string& text) {
|
||||||
std::vector<std::string> tokens;
|
std::vector<std::string> tokens;
|
||||||
@ -797,14 +821,14 @@ std::vector<std::string> token_split(const std::string& text) {
|
|||||||
|
|
||||||
// `(?i:'s|'t|'re|'ve|'m|'ll|'d)`
|
// `(?i:'s|'t|'re|'ve|'m|'ll|'d)`
|
||||||
if (cp == U'\'' && i + 1 < cps.size()) {
|
if (cp == U'\'' && i + 1 < cps.size()) {
|
||||||
std::string next = str_to_lower(codepoint_to_utf8(cps[i+1]));
|
std::string next = str_to_lower(codepoint_to_utf8(cps[i + 1]));
|
||||||
if (next == "s" || next == "t" || next == "m") {
|
if (next == "s" || next == "t" || next == "m") {
|
||||||
tokens.push_back("'" + next);
|
tokens.push_back("'" + next);
|
||||||
i += 2;
|
i += 2;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (i + 2 < cps.size()) {
|
if (i + 2 < cps.size()) {
|
||||||
next += str_to_lower(codepoint_to_utf8(cps[i+2]));
|
next += str_to_lower(codepoint_to_utf8(cps[i + 2]));
|
||||||
if (next == "re" || next == "ve" || next == "ll" || next == "d") {
|
if (next == "re" || next == "ve" || next == "ll" || next == "d") {
|
||||||
tokens.push_back("'" + next);
|
tokens.push_back("'" + next);
|
||||||
i += 3;
|
i += 3;
|
||||||
@ -823,7 +847,7 @@ std::vector<std::string> token_split(const std::string& text) {
|
|||||||
// `[^\r\n\p{L}\p{N}]?\p{L}+`
|
// `[^\r\n\p{L}\p{N}]?\p{L}+`
|
||||||
{
|
{
|
||||||
// `[^\r\n\p{L}\p{N}]\p{L}+`
|
// `[^\r\n\p{L}\p{N}]\p{L}+`
|
||||||
if (!is_letter(cp) && cp != U'\r' && cp != U'\n' && i + 1 < cps.size() && is_letter(cps[i+1])) {
|
if (!is_letter(cp) && cp != U'\r' && cp != U'\n' && i + 1 < cps.size() && is_letter(cps[i + 1])) {
|
||||||
std::string token = codepoint_to_utf8(cp);
|
std::string token = codepoint_to_utf8(cp);
|
||||||
++i;
|
++i;
|
||||||
|
|
||||||
@ -851,10 +875,10 @@ std::vector<std::string> token_split(const std::string& text) {
|
|||||||
// ` ?[^\s\p{L}\p{N}]+[\r\n]*`
|
// ` ?[^\s\p{L}\p{N}]+[\r\n]*`
|
||||||
{
|
{
|
||||||
// ` [^\s\p{L}\p{N}]+[\r\n]*`
|
// ` [^\s\p{L}\p{N}]+[\r\n]*`
|
||||||
if (cp == U' ' && i + 1 < cps.size() && !isspace(cps[i+1]) && !is_letter(cps[i+1]) && !is_number(cps[i+1])) {
|
if (cp == U' ' && i + 1 < cps.size() && !isspace(cps[i + 1]) && !is_letter(cps[i + 1]) && !is_number(cps[i + 1])) {
|
||||||
std::string token = codepoint_to_utf8(cp);
|
std::string token = codepoint_to_utf8(cp);
|
||||||
token += codepoint_to_utf8(cps[i+1]);
|
token += codepoint_to_utf8(cps[i + 1]);
|
||||||
i+=2;
|
i += 2;
|
||||||
|
|
||||||
while (i < cps.size() && !is_letter(cps[i]) && !is_number(cps[i]) && !isspace(cps[i])) {
|
while (i < cps.size() && !is_letter(cps[i]) && !is_number(cps[i]) && !isspace(cps[i])) {
|
||||||
token += codepoint_to_utf8(cps[i]);
|
token += codepoint_to_utf8(cps[i]);
|
||||||
@ -915,6 +939,40 @@ std::vector<std::string> token_split(const std::string& text) {
|
|||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> split_with_special_tokens(
|
||||||
|
const std::string& text,
|
||||||
|
const std::vector<std::string>& special_tokens) {
|
||||||
|
std::vector<std::string> result;
|
||||||
|
size_t pos = 0;
|
||||||
|
size_t text_len = text.size();
|
||||||
|
|
||||||
|
while (pos < text_len) {
|
||||||
|
size_t next_pos = text_len;
|
||||||
|
std::string matched_token;
|
||||||
|
|
||||||
|
for (const auto& token : special_tokens) {
|
||||||
|
size_t token_pos = text.find(token, pos);
|
||||||
|
if (token_pos != std::string::npos && token_pos < next_pos) {
|
||||||
|
next_pos = token_pos;
|
||||||
|
matched_token = token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (next_pos > pos) {
|
||||||
|
result.push_back(text.substr(pos, next_pos - pos));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!matched_token.empty()) {
|
||||||
|
result.push_back(matched_token);
|
||||||
|
pos = next_pos + matched_token.size();
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
// int main() {
|
// int main() {
|
||||||
// std::string text = "I'm testing C++ token_split function. 你好,世界! 123";
|
// std::string text = "I'm testing C++ token_split function. 你好,世界! 123";
|
||||||
// auto tokens = token_split(text);
|
// auto tokens = token_split(text);
|
||||||
|
|||||||
@ -5,5 +5,6 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
std::vector<std::string> token_split(const std::string& text);
|
std::vector<std::string> token_split(const std::string& text);
|
||||||
|
std::vector<std::string> split_with_special_tokens(const std::string& text, const std::vector<std::string>& special_tokens);
|
||||||
|
|
||||||
#endif // __TOKENIZE_UTIL__
|
#endif // __TOKENIZE_UTIL__
|
||||||
Loading…
x
Reference in New Issue
Block a user