mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
add umt5 support
This commit is contained in:
parent
5f7d98884c
commit
bace0a08c4
@ -1223,20 +1223,21 @@ struct FluxCLIPEmbedder : public Conditioner {
|
||||
}
|
||||
};
|
||||
|
||||
struct PixArtCLIPEmbedder : public Conditioner {
|
||||
struct T5CLIPEmbedder : public Conditioner {
|
||||
T5UniGramTokenizer t5_tokenizer;
|
||||
std::shared_ptr<T5Runner> t5;
|
||||
size_t chunk_len = 512;
|
||||
bool use_mask = false;
|
||||
int mask_pad = 1;
|
||||
|
||||
PixArtCLIPEmbedder(ggml_backend_t backend,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
int clip_skip = -1,
|
||||
bool use_mask = false,
|
||||
int mask_pad = 1)
|
||||
: use_mask(use_mask), mask_pad(mask_pad) {
|
||||
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
||||
T5CLIPEmbedder(ggml_backend_t backend,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
int clip_skip = -1,
|
||||
bool use_mask = false,
|
||||
int mask_pad = 1,
|
||||
bool is_umt5 = false)
|
||||
: use_mask(use_mask), mask_pad(mask_pad), t5_tokenizer(is_umt5) {
|
||||
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer", is_umt5);
|
||||
}
|
||||
|
||||
void set_clip_skip(int clip_skip) {
|
||||
|
||||
@ -27,7 +27,7 @@
|
||||
#define SAFE_STR(s) ((s) ? (s) : "")
|
||||
#define BOOL_STR(b) ((b) ? "true" : "false")
|
||||
|
||||
#include "wan.hpp"
|
||||
#include "t5.hpp"
|
||||
|
||||
const char* modes_str[] = {
|
||||
"img_gen",
|
||||
@ -746,11 +746,11 @@ void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
|
||||
|
||||
int main(int argc, const char* argv[]) {
|
||||
SDParams params;
|
||||
params.verbose = true;
|
||||
sd_set_log_callback(sd_log_cb, (void*)¶ms);
|
||||
// params.verbose = true;
|
||||
// sd_set_log_callback(sd_log_cb, (void*)¶ms);
|
||||
|
||||
WAN::WanRunner::load_from_file_and_test(argv[1]);
|
||||
return 0;
|
||||
// T5Embedder::load_from_file_and_test(argv[1]);
|
||||
// return 0;
|
||||
|
||||
parse_args(argc, argv, params);
|
||||
|
||||
|
||||
2
flux.hpp
2
flux.hpp
@ -896,7 +896,7 @@ namespace Flux {
|
||||
}
|
||||
for (auto pair : tensor_types) {
|
||||
std::string tensor_name = pair.first;
|
||||
if (tensor_name.find("model.diffusion_model.") == std::string::npos)
|
||||
if (!starts_with(tensor_name, prefix))
|
||||
continue;
|
||||
if (tensor_name.find("guidance_in.in_layer.weight") != std::string::npos) {
|
||||
// not schnell
|
||||
|
||||
@ -1,2 +1,5 @@
|
||||
clang-format -style=file -i *.cpp *.h *.hpp
|
||||
clang-format -style=file -i examples/cli/*.cpp
|
||||
for f in *.cpp *.h *.hpp examples/cli/*.cpp; do
|
||||
[[ "$f" == vocab* ]] && continue
|
||||
echo "formatting '$f'"
|
||||
clang-format -style=file -i "$f"
|
||||
done
|
||||
2
ggml
2
ggml
@ -1 +1 @@
|
||||
Subproject commit b96890f3ab5ffbdbe56bc126df5366c34bd08d39
|
||||
Subproject commit e89bc7e8625f59145ee8c0b09383009c47752cd8
|
||||
10
model.cpp
10
model.cpp
@ -10,6 +10,7 @@
|
||||
#include "stable-diffusion.h"
|
||||
#include "util.h"
|
||||
#include "vocab.hpp"
|
||||
#include "vocab_umt5.hpp"
|
||||
|
||||
#include "ggml-alloc.h"
|
||||
#include "ggml-backend.h"
|
||||
@ -1157,6 +1158,10 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
|
||||
std::string dtype = tensor_info["dtype"];
|
||||
nlohmann::json shape = tensor_info["shape"];
|
||||
|
||||
if (dtype == "U8") {
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t begin = tensor_info["data_offsets"][0].get<size_t>();
|
||||
size_t end = tensor_info["data_offsets"][1].get<size_t>();
|
||||
|
||||
@ -1856,6 +1861,11 @@ std::string ModelLoader::load_t5_tokenizer_json() {
|
||||
return json_str;
|
||||
}
|
||||
|
||||
std::string ModelLoader::load_umt5_tokenizer_json() {
|
||||
std::string json_str(reinterpret_cast<const char*>(umt5_tokenizer_json_str), sizeof(umt5_tokenizer_json_str));
|
||||
return json_str;
|
||||
}
|
||||
|
||||
std::vector<TensorStorage> remove_duplicates(const std::vector<TensorStorage>& vec) {
|
||||
std::vector<TensorStorage> res;
|
||||
std::unordered_map<std::string, size_t> name_to_index_map;
|
||||
|
||||
1
model.h
1
model.h
@ -258,6 +258,7 @@ public:
|
||||
|
||||
static std::string load_merges();
|
||||
static std::string load_t5_tokenizer_json();
|
||||
static std::string load_umt5_tokenizer_json();
|
||||
};
|
||||
|
||||
#endif // __MODEL_H__
|
||||
|
||||
2
rope.hpp
2
rope.hpp
@ -249,4 +249,4 @@ struct Rope {
|
||||
}
|
||||
}; // struct Rope
|
||||
|
||||
#endif __ROPE_HPP__
|
||||
#endif // __ROPE_HPP__
|
||||
|
||||
@ -344,11 +344,11 @@ public:
|
||||
}
|
||||
}
|
||||
if (is_chroma) {
|
||||
cond_stage_model = std::make_shared<PixArtCLIPEmbedder>(clip_backend,
|
||||
model_loader.tensor_storages_types,
|
||||
-1,
|
||||
sd_ctx_params->chroma_use_t5_mask,
|
||||
sd_ctx_params->chroma_t5_mask_pad);
|
||||
cond_stage_model = std::make_shared<T5CLIPEmbedder>(clip_backend,
|
||||
model_loader.tensor_storages_types,
|
||||
-1,
|
||||
sd_ctx_params->chroma_use_t5_mask,
|
||||
sd_ctx_params->chroma_t5_mask_pad);
|
||||
} else {
|
||||
cond_stage_model = std::make_shared<FluxCLIPEmbedder>(clip_backend, model_loader.tensor_storages_types);
|
||||
}
|
||||
|
||||
143
t5.hpp
143
t5.hpp
@ -124,7 +124,10 @@ protected:
|
||||
return;
|
||||
}
|
||||
std::string piece = item[0];
|
||||
float score = item[1];
|
||||
if (piece.empty()) {
|
||||
piece = "<empty_token>";
|
||||
}
|
||||
float score = item[1];
|
||||
piece_score_pairs.emplace_back(piece, score);
|
||||
}
|
||||
}
|
||||
@ -147,6 +150,7 @@ protected:
|
||||
std::vector<const char*> key(pieces->size());
|
||||
std::vector<int> value(pieces->size());
|
||||
for (size_t i = 0; i < pieces->size(); ++i) {
|
||||
// LOG_DEBUG("%s %d", (*pieces)[i].first.c_str(), (*pieces)[i].second);
|
||||
key[i] = (*pieces)[i].first.data(); // sorted piece.
|
||||
value[i] = (*pieces)[i].second; // vocab_id
|
||||
}
|
||||
@ -335,9 +339,9 @@ protected:
|
||||
}
|
||||
|
||||
public:
|
||||
explicit T5UniGramTokenizer(const std::string& json_str = "") {
|
||||
if (json_str.size() != 0) {
|
||||
InitializePieces(json_str);
|
||||
explicit T5UniGramTokenizer(bool is_umt5 = false) {
|
||||
if (is_umt5) {
|
||||
InitializePieces(ModelLoader::load_umt5_tokenizer_json());
|
||||
} else {
|
||||
InitializePieces(ModelLoader::load_t5_tokenizer_json());
|
||||
}
|
||||
@ -673,10 +677,11 @@ public:
|
||||
int64_t model_dim,
|
||||
int64_t inner_dim,
|
||||
int64_t ff_dim,
|
||||
int64_t num_heads)
|
||||
int64_t num_heads,
|
||||
bool relative_attention = true)
|
||||
: num_layers(num_layers) {
|
||||
for (int i = 0; i < num_layers; i++) {
|
||||
blocks["block." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new T5Block(model_dim, inner_dim, ff_dim, num_heads, i == 0));
|
||||
blocks["block." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new T5Block(model_dim, inner_dim, ff_dim, num_heads, (!relative_attention || i == 0)));
|
||||
}
|
||||
|
||||
blocks["final_layer_norm"] = std::shared_ptr<GGMLBlock>(new T5LayerNorm(model_dim));
|
||||
@ -703,15 +708,30 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
struct T5Params {
|
||||
int64_t num_layers = 24;
|
||||
int64_t model_dim = 4096;
|
||||
int64_t ff_dim = 10240;
|
||||
int64_t num_heads = 64;
|
||||
int64_t vocab_size = 32128;
|
||||
bool relative_attention = true;
|
||||
};
|
||||
|
||||
struct T5 : public GGMLBlock {
|
||||
T5Params params;
|
||||
|
||||
public:
|
||||
T5(int64_t num_layers,
|
||||
int64_t model_dim,
|
||||
int64_t ff_dim,
|
||||
int64_t num_heads,
|
||||
int64_t vocab_size) {
|
||||
blocks["encoder"] = std::shared_ptr<GGMLBlock>(new T5Stack(num_layers, model_dim, model_dim, ff_dim, num_heads));
|
||||
blocks["shared"] = std::shared_ptr<GGMLBlock>(new Embedding(vocab_size, model_dim));
|
||||
T5() {}
|
||||
T5(T5Params params)
|
||||
: params(params) {
|
||||
blocks["encoder"] = std::shared_ptr<GGMLBlock>(new T5Stack(params.num_layers,
|
||||
params.model_dim,
|
||||
params.model_dim,
|
||||
params.ff_dim,
|
||||
params.num_heads,
|
||||
params.relative_attention));
|
||||
blocks["shared"] = std::shared_ptr<GGMLBlock>(new Embedding(params.vocab_size,
|
||||
params.model_dim));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
@ -731,18 +751,20 @@ public:
|
||||
};
|
||||
|
||||
struct T5Runner : public GGMLRunner {
|
||||
T5Params params;
|
||||
T5 model;
|
||||
std::vector<int> relative_position_bucket_vec;
|
||||
|
||||
T5Runner(ggml_backend_t backend,
|
||||
const String2GGMLType& tensor_types,
|
||||
const std::string prefix,
|
||||
int64_t num_layers = 24,
|
||||
int64_t model_dim = 4096,
|
||||
int64_t ff_dim = 10240,
|
||||
int64_t num_heads = 64,
|
||||
int64_t vocab_size = 32128)
|
||||
: GGMLRunner(backend), model(num_layers, model_dim, ff_dim, num_heads, vocab_size) {
|
||||
bool is_umt5 = false)
|
||||
: GGMLRunner(backend) {
|
||||
if (is_umt5) {
|
||||
params.vocab_size = 256384;
|
||||
params.relative_attention = false;
|
||||
}
|
||||
model = T5(params);
|
||||
model.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
@ -769,7 +791,8 @@ struct T5Runner : public GGMLRunner {
|
||||
struct ggml_tensor* attention_mask = NULL) {
|
||||
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);
|
||||
|
||||
input_ids = to_backend(input_ids);
|
||||
input_ids = to_backend(input_ids);
|
||||
attention_mask = to_backend(attention_mask);
|
||||
|
||||
relative_position_bucket_vec = compute_relative_position_bucket(input_ids->ne[0], input_ids->ne[0]);
|
||||
|
||||
@ -879,12 +902,8 @@ struct T5Embedder {
|
||||
T5Embedder(ggml_backend_t backend,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
const std::string prefix = "",
|
||||
int64_t num_layers = 24,
|
||||
int64_t model_dim = 4096,
|
||||
int64_t ff_dim = 10240,
|
||||
int64_t num_heads = 64,
|
||||
int64_t vocab_size = 32128)
|
||||
: model(backend, tensor_types, prefix, num_layers, model_dim, ff_dim, num_heads, vocab_size) {
|
||||
bool is_umt5 = false)
|
||||
: model(backend, tensor_types, prefix, is_umt5), tokenizer(is_umt5) {
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
||||
@ -946,25 +965,22 @@ struct T5Embedder {
|
||||
GGML_ASSERT(work_ctx != NULL);
|
||||
|
||||
{
|
||||
// cpu f16: pass
|
||||
// cpu f32: pass
|
||||
// cuda f16: nan
|
||||
// cuda f32: pass
|
||||
// cuda q8_0: nan
|
||||
// TODO: fix cuda nan
|
||||
std::string text("a lovely cat");
|
||||
auto tokens_and_weights = tokenize(text, 77, true);
|
||||
// std::string text("一只可爱的猫"); // umt5 chinease test
|
||||
auto tokens_and_weights = tokenize(text, 512, true);
|
||||
std::vector<int>& tokens = std::get<0>(tokens_and_weights);
|
||||
std::vector<float>& weights = std::get<1>(tokens_and_weights);
|
||||
std::vector<float>& masks = std::get<2>(tokens_and_weights);
|
||||
for (auto token : tokens) {
|
||||
printf("%d ", token);
|
||||
}
|
||||
printf("\n");
|
||||
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, tokens);
|
||||
auto attention_mask = vector_to_ggml_tensor(work_ctx, masks);
|
||||
struct ggml_tensor* out = NULL;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
model.compute(8, input_ids, NULL, &out, work_ctx);
|
||||
model.compute(8, input_ids, attention_mask, &out, work_ctx);
|
||||
int t1 = ggml_time_ms();
|
||||
|
||||
print_ggml_tensor(out);
|
||||
@ -973,32 +989,43 @@ struct T5Embedder {
|
||||
}
|
||||
|
||||
static void load_from_file_and_test(const std::string& file_path) {
|
||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_F32;
|
||||
std::shared_ptr<T5Embedder> t5 = std::shared_ptr<T5Embedder>(new T5Embedder(backend));
|
||||
{
|
||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||
// cpu f16: pass
|
||||
// cpu f32: pass
|
||||
// cuda f16: pass
|
||||
// cuda f32: pass
|
||||
// cuda q8_0: pass
|
||||
ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||
// ggml_backend_t backend = ggml_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_F16;
|
||||
|
||||
t5->alloc_params_buffer();
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
t5->get_param_tensors(tensors, "");
|
||||
|
||||
ModelLoader model_loader;
|
||||
if (!model_loader.init_from_file(file_path)) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
bool success = model_loader.load_tensors(tensors, backend);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_INFO("t5 model loaded");
|
||||
ModelLoader model_loader;
|
||||
if (!model_loader.init_from_file(file_path)) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
auto tensor_types = model_loader.tensor_storages_types;
|
||||
for (auto& item : tensor_types) {
|
||||
// LOG_DEBUG("%s %u", item.first.c_str(), item.second);
|
||||
if (ends_with(item.first, "weight")) {
|
||||
item.second = model_data_type;
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<T5Embedder> t5 = std::shared_ptr<T5Embedder>(new T5Embedder(backend, tensor_types, "", true));
|
||||
|
||||
t5->alloc_params_buffer();
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
t5->get_param_tensors(tensors, "");
|
||||
|
||||
bool success = model_loader.load_tensors(tensors, backend);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_INFO("t5 model loaded");
|
||||
t5->test();
|
||||
}
|
||||
};
|
||||
|
||||
3
thirdparty/darts.h
vendored
3
thirdparty/darts.h
vendored
@ -4,6 +4,7 @@
|
||||
#include <cstdio>
|
||||
#include <exception>
|
||||
#include <new>
|
||||
#include <iostream>
|
||||
|
||||
#define DARTS_VERSION "0.32"
|
||||
|
||||
@ -1140,9 +1141,11 @@ inline void DawgBuilder::insert(const char *key, std::size_t length,
|
||||
if (value < 0) {
|
||||
DARTS_THROW("failed to insert key: negative value");
|
||||
} else if (length == 0) {
|
||||
std::cout << value << std::endl;
|
||||
DARTS_THROW("failed to insert key: zero-length key");
|
||||
}
|
||||
|
||||
|
||||
id_type id = 0;
|
||||
std::size_t key_pos = 0;
|
||||
|
||||
|
||||
762304
vocab_umt5.hpp
Normal file
762304
vocab_umt5.hpp
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user