add umt5 support

This commit is contained in:
leejet 2025-08-09 16:07:04 +08:00
parent 5f7d98884c
commit bace0a08c4
12 changed files with 762430 additions and 81 deletions

View File

@ -1223,20 +1223,21 @@ struct FluxCLIPEmbedder : public Conditioner {
} }
}; };
struct PixArtCLIPEmbedder : public Conditioner { struct T5CLIPEmbedder : public Conditioner {
T5UniGramTokenizer t5_tokenizer; T5UniGramTokenizer t5_tokenizer;
std::shared_ptr<T5Runner> t5; std::shared_ptr<T5Runner> t5;
size_t chunk_len = 512; size_t chunk_len = 512;
bool use_mask = false; bool use_mask = false;
int mask_pad = 1; int mask_pad = 1;
PixArtCLIPEmbedder(ggml_backend_t backend, T5CLIPEmbedder(ggml_backend_t backend,
const String2GGMLType& tensor_types = {}, const String2GGMLType& tensor_types = {},
int clip_skip = -1, int clip_skip = -1,
bool use_mask = false, bool use_mask = false,
int mask_pad = 1) int mask_pad = 1,
: use_mask(use_mask), mask_pad(mask_pad) { bool is_umt5 = false)
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer"); : use_mask(use_mask), mask_pad(mask_pad), t5_tokenizer(is_umt5) {
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer", is_umt5);
} }
void set_clip_skip(int clip_skip) { void set_clip_skip(int clip_skip) {

View File

@ -27,7 +27,7 @@
#define SAFE_STR(s) ((s) ? (s) : "") #define SAFE_STR(s) ((s) ? (s) : "")
#define BOOL_STR(b) ((b) ? "true" : "false") #define BOOL_STR(b) ((b) ? "true" : "false")
#include "wan.hpp" #include "t5.hpp"
const char* modes_str[] = { const char* modes_str[] = {
"img_gen", "img_gen",
@ -746,11 +746,11 @@ void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
int main(int argc, const char* argv[]) { int main(int argc, const char* argv[]) {
SDParams params; SDParams params;
params.verbose = true; // params.verbose = true;
sd_set_log_callback(sd_log_cb, (void*)&params); // sd_set_log_callback(sd_log_cb, (void*)&params);
WAN::WanRunner::load_from_file_and_test(argv[1]); // T5Embedder::load_from_file_and_test(argv[1]);
return 0; // return 0;
parse_args(argc, argv, params); parse_args(argc, argv, params);

View File

@ -896,7 +896,7 @@ namespace Flux {
} }
for (auto pair : tensor_types) { for (auto pair : tensor_types) {
std::string tensor_name = pair.first; std::string tensor_name = pair.first;
if (tensor_name.find("model.diffusion_model.") == std::string::npos) if (!starts_with(tensor_name, prefix))
continue; continue;
if (tensor_name.find("guidance_in.in_layer.weight") != std::string::npos) { if (tensor_name.find("guidance_in.in_layer.weight") != std::string::npos) {
// not schnell // not schnell

View File

@ -1,2 +1,5 @@
clang-format -style=file -i *.cpp *.h *.hpp for f in *.cpp *.h *.hpp examples/cli/*.cpp; do
clang-format -style=file -i examples/cli/*.cpp [[ "$f" == vocab* ]] && continue
echo "formatting '$f'"
clang-format -style=file -i "$f"
done

2
ggml

@ -1 +1 @@
Subproject commit b96890f3ab5ffbdbe56bc126df5366c34bd08d39 Subproject commit e89bc7e8625f59145ee8c0b09383009c47752cd8

View File

@ -10,6 +10,7 @@
#include "stable-diffusion.h" #include "stable-diffusion.h"
#include "util.h" #include "util.h"
#include "vocab.hpp" #include "vocab.hpp"
#include "vocab_umt5.hpp"
#include "ggml-alloc.h" #include "ggml-alloc.h"
#include "ggml-backend.h" #include "ggml-backend.h"
@ -1157,6 +1158,10 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
std::string dtype = tensor_info["dtype"]; std::string dtype = tensor_info["dtype"];
nlohmann::json shape = tensor_info["shape"]; nlohmann::json shape = tensor_info["shape"];
if (dtype == "U8") {
continue;
}
size_t begin = tensor_info["data_offsets"][0].get<size_t>(); size_t begin = tensor_info["data_offsets"][0].get<size_t>();
size_t end = tensor_info["data_offsets"][1].get<size_t>(); size_t end = tensor_info["data_offsets"][1].get<size_t>();
@ -1856,6 +1861,11 @@ std::string ModelLoader::load_t5_tokenizer_json() {
return json_str; return json_str;
} }
std::string ModelLoader::load_umt5_tokenizer_json() {
std::string json_str(reinterpret_cast<const char*>(umt5_tokenizer_json_str), sizeof(umt5_tokenizer_json_str));
return json_str;
}
std::vector<TensorStorage> remove_duplicates(const std::vector<TensorStorage>& vec) { std::vector<TensorStorage> remove_duplicates(const std::vector<TensorStorage>& vec) {
std::vector<TensorStorage> res; std::vector<TensorStorage> res;
std::unordered_map<std::string, size_t> name_to_index_map; std::unordered_map<std::string, size_t> name_to_index_map;

View File

@ -258,6 +258,7 @@ public:
static std::string load_merges(); static std::string load_merges();
static std::string load_t5_tokenizer_json(); static std::string load_t5_tokenizer_json();
static std::string load_umt5_tokenizer_json();
}; };
#endif // __MODEL_H__ #endif // __MODEL_H__

View File

@ -249,4 +249,4 @@ struct Rope {
} }
}; // struct Rope }; // struct Rope
#endif __ROPE_HPP__ #endif // __ROPE_HPP__

View File

@ -344,11 +344,11 @@ public:
} }
} }
if (is_chroma) { if (is_chroma) {
cond_stage_model = std::make_shared<PixArtCLIPEmbedder>(clip_backend, cond_stage_model = std::make_shared<T5CLIPEmbedder>(clip_backend,
model_loader.tensor_storages_types, model_loader.tensor_storages_types,
-1, -1,
sd_ctx_params->chroma_use_t5_mask, sd_ctx_params->chroma_use_t5_mask,
sd_ctx_params->chroma_t5_mask_pad); sd_ctx_params->chroma_t5_mask_pad);
} else { } else {
cond_stage_model = std::make_shared<FluxCLIPEmbedder>(clip_backend, model_loader.tensor_storages_types); cond_stage_model = std::make_shared<FluxCLIPEmbedder>(clip_backend, model_loader.tensor_storages_types);
} }

143
t5.hpp
View File

@ -124,7 +124,10 @@ protected:
return; return;
} }
std::string piece = item[0]; std::string piece = item[0];
float score = item[1]; if (piece.empty()) {
piece = "<empty_token>";
}
float score = item[1];
piece_score_pairs.emplace_back(piece, score); piece_score_pairs.emplace_back(piece, score);
} }
} }
@ -147,6 +150,7 @@ protected:
std::vector<const char*> key(pieces->size()); std::vector<const char*> key(pieces->size());
std::vector<int> value(pieces->size()); std::vector<int> value(pieces->size());
for (size_t i = 0; i < pieces->size(); ++i) { for (size_t i = 0; i < pieces->size(); ++i) {
// LOG_DEBUG("%s %d", (*pieces)[i].first.c_str(), (*pieces)[i].second);
key[i] = (*pieces)[i].first.data(); // sorted piece. key[i] = (*pieces)[i].first.data(); // sorted piece.
value[i] = (*pieces)[i].second; // vocab_id value[i] = (*pieces)[i].second; // vocab_id
} }
@ -335,9 +339,9 @@ protected:
} }
public: public:
explicit T5UniGramTokenizer(const std::string& json_str = "") { explicit T5UniGramTokenizer(bool is_umt5 = false) {
if (json_str.size() != 0) { if (is_umt5) {
InitializePieces(json_str); InitializePieces(ModelLoader::load_umt5_tokenizer_json());
} else { } else {
InitializePieces(ModelLoader::load_t5_tokenizer_json()); InitializePieces(ModelLoader::load_t5_tokenizer_json());
} }
@ -673,10 +677,11 @@ public:
int64_t model_dim, int64_t model_dim,
int64_t inner_dim, int64_t inner_dim,
int64_t ff_dim, int64_t ff_dim,
int64_t num_heads) int64_t num_heads,
bool relative_attention = true)
: num_layers(num_layers) { : num_layers(num_layers) {
for (int i = 0; i < num_layers; i++) { for (int i = 0; i < num_layers; i++) {
blocks["block." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new T5Block(model_dim, inner_dim, ff_dim, num_heads, i == 0)); blocks["block." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new T5Block(model_dim, inner_dim, ff_dim, num_heads, (!relative_attention || i == 0)));
} }
blocks["final_layer_norm"] = std::shared_ptr<GGMLBlock>(new T5LayerNorm(model_dim)); blocks["final_layer_norm"] = std::shared_ptr<GGMLBlock>(new T5LayerNorm(model_dim));
@ -703,15 +708,30 @@ public:
} }
}; };
struct T5Params {
int64_t num_layers = 24;
int64_t model_dim = 4096;
int64_t ff_dim = 10240;
int64_t num_heads = 64;
int64_t vocab_size = 32128;
bool relative_attention = true;
};
struct T5 : public GGMLBlock { struct T5 : public GGMLBlock {
T5Params params;
public: public:
T5(int64_t num_layers, T5() {}
int64_t model_dim, T5(T5Params params)
int64_t ff_dim, : params(params) {
int64_t num_heads, blocks["encoder"] = std::shared_ptr<GGMLBlock>(new T5Stack(params.num_layers,
int64_t vocab_size) { params.model_dim,
blocks["encoder"] = std::shared_ptr<GGMLBlock>(new T5Stack(num_layers, model_dim, model_dim, ff_dim, num_heads)); params.model_dim,
blocks["shared"] = std::shared_ptr<GGMLBlock>(new Embedding(vocab_size, model_dim)); params.ff_dim,
params.num_heads,
params.relative_attention));
blocks["shared"] = std::shared_ptr<GGMLBlock>(new Embedding(params.vocab_size,
params.model_dim));
} }
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* forward(struct ggml_context* ctx,
@ -731,18 +751,20 @@ public:
}; };
struct T5Runner : public GGMLRunner { struct T5Runner : public GGMLRunner {
T5Params params;
T5 model; T5 model;
std::vector<int> relative_position_bucket_vec; std::vector<int> relative_position_bucket_vec;
T5Runner(ggml_backend_t backend, T5Runner(ggml_backend_t backend,
const String2GGMLType& tensor_types, const String2GGMLType& tensor_types,
const std::string prefix, const std::string prefix,
int64_t num_layers = 24, bool is_umt5 = false)
int64_t model_dim = 4096, : GGMLRunner(backend) {
int64_t ff_dim = 10240, if (is_umt5) {
int64_t num_heads = 64, params.vocab_size = 256384;
int64_t vocab_size = 32128) params.relative_attention = false;
: GGMLRunner(backend), model(num_layers, model_dim, ff_dim, num_heads, vocab_size) { }
model = T5(params);
model.init(params_ctx, tensor_types, prefix); model.init(params_ctx, tensor_types, prefix);
} }
@ -769,7 +791,8 @@ struct T5Runner : public GGMLRunner {
struct ggml_tensor* attention_mask = NULL) { struct ggml_tensor* attention_mask = NULL) {
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx); struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);
input_ids = to_backend(input_ids); input_ids = to_backend(input_ids);
attention_mask = to_backend(attention_mask);
relative_position_bucket_vec = compute_relative_position_bucket(input_ids->ne[0], input_ids->ne[0]); relative_position_bucket_vec = compute_relative_position_bucket(input_ids->ne[0], input_ids->ne[0]);
@ -879,12 +902,8 @@ struct T5Embedder {
T5Embedder(ggml_backend_t backend, T5Embedder(ggml_backend_t backend,
const String2GGMLType& tensor_types = {}, const String2GGMLType& tensor_types = {},
const std::string prefix = "", const std::string prefix = "",
int64_t num_layers = 24, bool is_umt5 = false)
int64_t model_dim = 4096, : model(backend, tensor_types, prefix, is_umt5), tokenizer(is_umt5) {
int64_t ff_dim = 10240,
int64_t num_heads = 64,
int64_t vocab_size = 32128)
: model(backend, tensor_types, prefix, num_layers, model_dim, ff_dim, num_heads, vocab_size) {
} }
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) { void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
@ -946,25 +965,22 @@ struct T5Embedder {
GGML_ASSERT(work_ctx != NULL); GGML_ASSERT(work_ctx != NULL);
{ {
// cpu f16: pass
// cpu f32: pass
// cuda f16: nan
// cuda f32: pass
// cuda q8_0: nan
// TODO: fix cuda nan
std::string text("a lovely cat"); std::string text("a lovely cat");
auto tokens_and_weights = tokenize(text, 77, true); // std::string text("一只可爱的猫"); // umt5 chinease test
auto tokens_and_weights = tokenize(text, 512, true);
std::vector<int>& tokens = std::get<0>(tokens_and_weights); std::vector<int>& tokens = std::get<0>(tokens_and_weights);
std::vector<float>& weights = std::get<1>(tokens_and_weights); std::vector<float>& weights = std::get<1>(tokens_and_weights);
std::vector<float>& masks = std::get<2>(tokens_and_weights);
for (auto token : tokens) { for (auto token : tokens) {
printf("%d ", token); printf("%d ", token);
} }
printf("\n"); printf("\n");
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, tokens); auto input_ids = vector_to_ggml_tensor_i32(work_ctx, tokens);
auto attention_mask = vector_to_ggml_tensor(work_ctx, masks);
struct ggml_tensor* out = NULL; struct ggml_tensor* out = NULL;
int t0 = ggml_time_ms(); int t0 = ggml_time_ms();
model.compute(8, input_ids, NULL, &out, work_ctx); model.compute(8, input_ids, attention_mask, &out, work_ctx);
int t1 = ggml_time_ms(); int t1 = ggml_time_ms();
print_ggml_tensor(out); print_ggml_tensor(out);
@ -973,32 +989,43 @@ struct T5Embedder {
} }
static void load_from_file_and_test(const std::string& file_path) { static void load_from_file_and_test(const std::string& file_path) {
// ggml_backend_t backend = ggml_backend_cuda_init(0); // cpu f16: pass
ggml_backend_t backend = ggml_backend_cpu_init(); // cpu f32: pass
ggml_type model_data_type = GGML_TYPE_F32; // cuda f16: pass
std::shared_ptr<T5Embedder> t5 = std::shared_ptr<T5Embedder>(new T5Embedder(backend)); // cuda f32: pass
{ // cuda q8_0: pass
LOG_INFO("loading from '%s'", file_path.c_str()); ggml_backend_t backend = ggml_backend_cuda_init(0);
// ggml_backend_t backend = ggml_backend_cpu_init();
ggml_type model_data_type = GGML_TYPE_F16;
t5->alloc_params_buffer(); ModelLoader model_loader;
std::map<std::string, ggml_tensor*> tensors; if (!model_loader.init_from_file(file_path)) {
t5->get_param_tensors(tensors, ""); LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
return;
ModelLoader model_loader;
if (!model_loader.init_from_file(file_path)) {
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
return;
}
bool success = model_loader.load_tensors(tensors, backend);
if (!success) {
LOG_ERROR("load tensors from model loader failed");
return;
}
LOG_INFO("t5 model loaded");
} }
auto tensor_types = model_loader.tensor_storages_types;
for (auto& item : tensor_types) {
// LOG_DEBUG("%s %u", item.first.c_str(), item.second);
if (ends_with(item.first, "weight")) {
item.second = model_data_type;
}
}
std::shared_ptr<T5Embedder> t5 = std::shared_ptr<T5Embedder>(new T5Embedder(backend, tensor_types, "", true));
t5->alloc_params_buffer();
std::map<std::string, ggml_tensor*> tensors;
t5->get_param_tensors(tensors, "");
bool success = model_loader.load_tensors(tensors, backend);
if (!success) {
LOG_ERROR("load tensors from model loader failed");
return;
}
LOG_INFO("t5 model loaded");
t5->test(); t5->test();
} }
}; };

3
thirdparty/darts.h vendored
View File

@ -4,6 +4,7 @@
#include <cstdio> #include <cstdio>
#include <exception> #include <exception>
#include <new> #include <new>
#include <iostream>
#define DARTS_VERSION "0.32" #define DARTS_VERSION "0.32"
@ -1140,9 +1141,11 @@ inline void DawgBuilder::insert(const char *key, std::size_t length,
if (value < 0) { if (value < 0) {
DARTS_THROW("failed to insert key: negative value"); DARTS_THROW("failed to insert key: negative value");
} else if (length == 0) { } else if (length == 0) {
std::cout << value << std::endl;
DARTS_THROW("failed to insert key: zero-length key"); DARTS_THROW("failed to insert key: zero-length key");
} }
id_type id = 0; id_type id = 0;
std::size_t key_pos = 0; std::size_t key_pos = 0;

762304
vocab_umt5.hpp Normal file

File diff suppressed because it is too large Load Diff