mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
add umt5 support
This commit is contained in:
parent
5f7d98884c
commit
bace0a08c4
@ -1223,20 +1223,21 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PixArtCLIPEmbedder : public Conditioner {
|
struct T5CLIPEmbedder : public Conditioner {
|
||||||
T5UniGramTokenizer t5_tokenizer;
|
T5UniGramTokenizer t5_tokenizer;
|
||||||
std::shared_ptr<T5Runner> t5;
|
std::shared_ptr<T5Runner> t5;
|
||||||
size_t chunk_len = 512;
|
size_t chunk_len = 512;
|
||||||
bool use_mask = false;
|
bool use_mask = false;
|
||||||
int mask_pad = 1;
|
int mask_pad = 1;
|
||||||
|
|
||||||
PixArtCLIPEmbedder(ggml_backend_t backend,
|
T5CLIPEmbedder(ggml_backend_t backend,
|
||||||
const String2GGMLType& tensor_types = {},
|
const String2GGMLType& tensor_types = {},
|
||||||
int clip_skip = -1,
|
int clip_skip = -1,
|
||||||
bool use_mask = false,
|
bool use_mask = false,
|
||||||
int mask_pad = 1)
|
int mask_pad = 1,
|
||||||
: use_mask(use_mask), mask_pad(mask_pad) {
|
bool is_umt5 = false)
|
||||||
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
: use_mask(use_mask), mask_pad(mask_pad), t5_tokenizer(is_umt5) {
|
||||||
|
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer", is_umt5);
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_clip_skip(int clip_skip) {
|
void set_clip_skip(int clip_skip) {
|
||||||
|
|||||||
@ -27,7 +27,7 @@
|
|||||||
#define SAFE_STR(s) ((s) ? (s) : "")
|
#define SAFE_STR(s) ((s) ? (s) : "")
|
||||||
#define BOOL_STR(b) ((b) ? "true" : "false")
|
#define BOOL_STR(b) ((b) ? "true" : "false")
|
||||||
|
|
||||||
#include "wan.hpp"
|
#include "t5.hpp"
|
||||||
|
|
||||||
const char* modes_str[] = {
|
const char* modes_str[] = {
|
||||||
"img_gen",
|
"img_gen",
|
||||||
@ -746,11 +746,11 @@ void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
|
|||||||
|
|
||||||
int main(int argc, const char* argv[]) {
|
int main(int argc, const char* argv[]) {
|
||||||
SDParams params;
|
SDParams params;
|
||||||
params.verbose = true;
|
// params.verbose = true;
|
||||||
sd_set_log_callback(sd_log_cb, (void*)¶ms);
|
// sd_set_log_callback(sd_log_cb, (void*)¶ms);
|
||||||
|
|
||||||
WAN::WanRunner::load_from_file_and_test(argv[1]);
|
// T5Embedder::load_from_file_and_test(argv[1]);
|
||||||
return 0;
|
// return 0;
|
||||||
|
|
||||||
parse_args(argc, argv, params);
|
parse_args(argc, argv, params);
|
||||||
|
|
||||||
|
|||||||
2
flux.hpp
2
flux.hpp
@ -896,7 +896,7 @@ namespace Flux {
|
|||||||
}
|
}
|
||||||
for (auto pair : tensor_types) {
|
for (auto pair : tensor_types) {
|
||||||
std::string tensor_name = pair.first;
|
std::string tensor_name = pair.first;
|
||||||
if (tensor_name.find("model.diffusion_model.") == std::string::npos)
|
if (!starts_with(tensor_name, prefix))
|
||||||
continue;
|
continue;
|
||||||
if (tensor_name.find("guidance_in.in_layer.weight") != std::string::npos) {
|
if (tensor_name.find("guidance_in.in_layer.weight") != std::string::npos) {
|
||||||
// not schnell
|
// not schnell
|
||||||
|
|||||||
@ -1,2 +1,5 @@
|
|||||||
clang-format -style=file -i *.cpp *.h *.hpp
|
for f in *.cpp *.h *.hpp examples/cli/*.cpp; do
|
||||||
clang-format -style=file -i examples/cli/*.cpp
|
[[ "$f" == vocab* ]] && continue
|
||||||
|
echo "formatting '$f'"
|
||||||
|
clang-format -style=file -i "$f"
|
||||||
|
done
|
||||||
2
ggml
2
ggml
@ -1 +1 @@
|
|||||||
Subproject commit b96890f3ab5ffbdbe56bc126df5366c34bd08d39
|
Subproject commit e89bc7e8625f59145ee8c0b09383009c47752cd8
|
||||||
10
model.cpp
10
model.cpp
@ -10,6 +10,7 @@
|
|||||||
#include "stable-diffusion.h"
|
#include "stable-diffusion.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
#include "vocab.hpp"
|
#include "vocab.hpp"
|
||||||
|
#include "vocab_umt5.hpp"
|
||||||
|
|
||||||
#include "ggml-alloc.h"
|
#include "ggml-alloc.h"
|
||||||
#include "ggml-backend.h"
|
#include "ggml-backend.h"
|
||||||
@ -1157,6 +1158,10 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
|
|||||||
std::string dtype = tensor_info["dtype"];
|
std::string dtype = tensor_info["dtype"];
|
||||||
nlohmann::json shape = tensor_info["shape"];
|
nlohmann::json shape = tensor_info["shape"];
|
||||||
|
|
||||||
|
if (dtype == "U8") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
size_t begin = tensor_info["data_offsets"][0].get<size_t>();
|
size_t begin = tensor_info["data_offsets"][0].get<size_t>();
|
||||||
size_t end = tensor_info["data_offsets"][1].get<size_t>();
|
size_t end = tensor_info["data_offsets"][1].get<size_t>();
|
||||||
|
|
||||||
@ -1856,6 +1861,11 @@ std::string ModelLoader::load_t5_tokenizer_json() {
|
|||||||
return json_str;
|
return json_str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string ModelLoader::load_umt5_tokenizer_json() {
|
||||||
|
std::string json_str(reinterpret_cast<const char*>(umt5_tokenizer_json_str), sizeof(umt5_tokenizer_json_str));
|
||||||
|
return json_str;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<TensorStorage> remove_duplicates(const std::vector<TensorStorage>& vec) {
|
std::vector<TensorStorage> remove_duplicates(const std::vector<TensorStorage>& vec) {
|
||||||
std::vector<TensorStorage> res;
|
std::vector<TensorStorage> res;
|
||||||
std::unordered_map<std::string, size_t> name_to_index_map;
|
std::unordered_map<std::string, size_t> name_to_index_map;
|
||||||
|
|||||||
1
model.h
1
model.h
@ -258,6 +258,7 @@ public:
|
|||||||
|
|
||||||
static std::string load_merges();
|
static std::string load_merges();
|
||||||
static std::string load_t5_tokenizer_json();
|
static std::string load_t5_tokenizer_json();
|
||||||
|
static std::string load_umt5_tokenizer_json();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __MODEL_H__
|
#endif // __MODEL_H__
|
||||||
|
|||||||
2
rope.hpp
2
rope.hpp
@ -249,4 +249,4 @@ struct Rope {
|
|||||||
}
|
}
|
||||||
}; // struct Rope
|
}; // struct Rope
|
||||||
|
|
||||||
#endif __ROPE_HPP__
|
#endif // __ROPE_HPP__
|
||||||
|
|||||||
@ -344,7 +344,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (is_chroma) {
|
if (is_chroma) {
|
||||||
cond_stage_model = std::make_shared<PixArtCLIPEmbedder>(clip_backend,
|
cond_stage_model = std::make_shared<T5CLIPEmbedder>(clip_backend,
|
||||||
model_loader.tensor_storages_types,
|
model_loader.tensor_storages_types,
|
||||||
-1,
|
-1,
|
||||||
sd_ctx_params->chroma_use_t5_mask,
|
sd_ctx_params->chroma_use_t5_mask,
|
||||||
|
|||||||
113
t5.hpp
113
t5.hpp
@ -124,6 +124,9 @@ protected:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
std::string piece = item[0];
|
std::string piece = item[0];
|
||||||
|
if (piece.empty()) {
|
||||||
|
piece = "<empty_token>";
|
||||||
|
}
|
||||||
float score = item[1];
|
float score = item[1];
|
||||||
piece_score_pairs.emplace_back(piece, score);
|
piece_score_pairs.emplace_back(piece, score);
|
||||||
}
|
}
|
||||||
@ -147,6 +150,7 @@ protected:
|
|||||||
std::vector<const char*> key(pieces->size());
|
std::vector<const char*> key(pieces->size());
|
||||||
std::vector<int> value(pieces->size());
|
std::vector<int> value(pieces->size());
|
||||||
for (size_t i = 0; i < pieces->size(); ++i) {
|
for (size_t i = 0; i < pieces->size(); ++i) {
|
||||||
|
// LOG_DEBUG("%s %d", (*pieces)[i].first.c_str(), (*pieces)[i].second);
|
||||||
key[i] = (*pieces)[i].first.data(); // sorted piece.
|
key[i] = (*pieces)[i].first.data(); // sorted piece.
|
||||||
value[i] = (*pieces)[i].second; // vocab_id
|
value[i] = (*pieces)[i].second; // vocab_id
|
||||||
}
|
}
|
||||||
@ -335,9 +339,9 @@ protected:
|
|||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit T5UniGramTokenizer(const std::string& json_str = "") {
|
explicit T5UniGramTokenizer(bool is_umt5 = false) {
|
||||||
if (json_str.size() != 0) {
|
if (is_umt5) {
|
||||||
InitializePieces(json_str);
|
InitializePieces(ModelLoader::load_umt5_tokenizer_json());
|
||||||
} else {
|
} else {
|
||||||
InitializePieces(ModelLoader::load_t5_tokenizer_json());
|
InitializePieces(ModelLoader::load_t5_tokenizer_json());
|
||||||
}
|
}
|
||||||
@ -673,10 +677,11 @@ public:
|
|||||||
int64_t model_dim,
|
int64_t model_dim,
|
||||||
int64_t inner_dim,
|
int64_t inner_dim,
|
||||||
int64_t ff_dim,
|
int64_t ff_dim,
|
||||||
int64_t num_heads)
|
int64_t num_heads,
|
||||||
|
bool relative_attention = true)
|
||||||
: num_layers(num_layers) {
|
: num_layers(num_layers) {
|
||||||
for (int i = 0; i < num_layers; i++) {
|
for (int i = 0; i < num_layers; i++) {
|
||||||
blocks["block." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new T5Block(model_dim, inner_dim, ff_dim, num_heads, i == 0));
|
blocks["block." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new T5Block(model_dim, inner_dim, ff_dim, num_heads, (!relative_attention || i == 0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
blocks["final_layer_norm"] = std::shared_ptr<GGMLBlock>(new T5LayerNorm(model_dim));
|
blocks["final_layer_norm"] = std::shared_ptr<GGMLBlock>(new T5LayerNorm(model_dim));
|
||||||
@ -703,15 +708,30 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct T5Params {
|
||||||
|
int64_t num_layers = 24;
|
||||||
|
int64_t model_dim = 4096;
|
||||||
|
int64_t ff_dim = 10240;
|
||||||
|
int64_t num_heads = 64;
|
||||||
|
int64_t vocab_size = 32128;
|
||||||
|
bool relative_attention = true;
|
||||||
|
};
|
||||||
|
|
||||||
struct T5 : public GGMLBlock {
|
struct T5 : public GGMLBlock {
|
||||||
|
T5Params params;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
T5(int64_t num_layers,
|
T5() {}
|
||||||
int64_t model_dim,
|
T5(T5Params params)
|
||||||
int64_t ff_dim,
|
: params(params) {
|
||||||
int64_t num_heads,
|
blocks["encoder"] = std::shared_ptr<GGMLBlock>(new T5Stack(params.num_layers,
|
||||||
int64_t vocab_size) {
|
params.model_dim,
|
||||||
blocks["encoder"] = std::shared_ptr<GGMLBlock>(new T5Stack(num_layers, model_dim, model_dim, ff_dim, num_heads));
|
params.model_dim,
|
||||||
blocks["shared"] = std::shared_ptr<GGMLBlock>(new Embedding(vocab_size, model_dim));
|
params.ff_dim,
|
||||||
|
params.num_heads,
|
||||||
|
params.relative_attention));
|
||||||
|
blocks["shared"] = std::shared_ptr<GGMLBlock>(new Embedding(params.vocab_size,
|
||||||
|
params.model_dim));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||||
@ -731,18 +751,20 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct T5Runner : public GGMLRunner {
|
struct T5Runner : public GGMLRunner {
|
||||||
|
T5Params params;
|
||||||
T5 model;
|
T5 model;
|
||||||
std::vector<int> relative_position_bucket_vec;
|
std::vector<int> relative_position_bucket_vec;
|
||||||
|
|
||||||
T5Runner(ggml_backend_t backend,
|
T5Runner(ggml_backend_t backend,
|
||||||
const String2GGMLType& tensor_types,
|
const String2GGMLType& tensor_types,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
int64_t num_layers = 24,
|
bool is_umt5 = false)
|
||||||
int64_t model_dim = 4096,
|
: GGMLRunner(backend) {
|
||||||
int64_t ff_dim = 10240,
|
if (is_umt5) {
|
||||||
int64_t num_heads = 64,
|
params.vocab_size = 256384;
|
||||||
int64_t vocab_size = 32128)
|
params.relative_attention = false;
|
||||||
: GGMLRunner(backend), model(num_layers, model_dim, ff_dim, num_heads, vocab_size) {
|
}
|
||||||
|
model = T5(params);
|
||||||
model.init(params_ctx, tensor_types, prefix);
|
model.init(params_ctx, tensor_types, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -770,6 +792,7 @@ struct T5Runner : public GGMLRunner {
|
|||||||
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);
|
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);
|
||||||
|
|
||||||
input_ids = to_backend(input_ids);
|
input_ids = to_backend(input_ids);
|
||||||
|
attention_mask = to_backend(attention_mask);
|
||||||
|
|
||||||
relative_position_bucket_vec = compute_relative_position_bucket(input_ids->ne[0], input_ids->ne[0]);
|
relative_position_bucket_vec = compute_relative_position_bucket(input_ids->ne[0], input_ids->ne[0]);
|
||||||
|
|
||||||
@ -879,12 +902,8 @@ struct T5Embedder {
|
|||||||
T5Embedder(ggml_backend_t backend,
|
T5Embedder(ggml_backend_t backend,
|
||||||
const String2GGMLType& tensor_types = {},
|
const String2GGMLType& tensor_types = {},
|
||||||
const std::string prefix = "",
|
const std::string prefix = "",
|
||||||
int64_t num_layers = 24,
|
bool is_umt5 = false)
|
||||||
int64_t model_dim = 4096,
|
: model(backend, tensor_types, prefix, is_umt5), tokenizer(is_umt5) {
|
||||||
int64_t ff_dim = 10240,
|
|
||||||
int64_t num_heads = 64,
|
|
||||||
int64_t vocab_size = 32128)
|
|
||||||
: model(backend, tensor_types, prefix, num_layers, model_dim, ff_dim, num_heads, vocab_size) {
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
||||||
@ -946,25 +965,22 @@ struct T5Embedder {
|
|||||||
GGML_ASSERT(work_ctx != NULL);
|
GGML_ASSERT(work_ctx != NULL);
|
||||||
|
|
||||||
{
|
{
|
||||||
// cpu f16: pass
|
|
||||||
// cpu f32: pass
|
|
||||||
// cuda f16: nan
|
|
||||||
// cuda f32: pass
|
|
||||||
// cuda q8_0: nan
|
|
||||||
// TODO: fix cuda nan
|
|
||||||
std::string text("a lovely cat");
|
std::string text("a lovely cat");
|
||||||
auto tokens_and_weights = tokenize(text, 77, true);
|
// std::string text("一只可爱的猫"); // umt5 chinease test
|
||||||
|
auto tokens_and_weights = tokenize(text, 512, true);
|
||||||
std::vector<int>& tokens = std::get<0>(tokens_and_weights);
|
std::vector<int>& tokens = std::get<0>(tokens_and_weights);
|
||||||
std::vector<float>& weights = std::get<1>(tokens_and_weights);
|
std::vector<float>& weights = std::get<1>(tokens_and_weights);
|
||||||
|
std::vector<float>& masks = std::get<2>(tokens_and_weights);
|
||||||
for (auto token : tokens) {
|
for (auto token : tokens) {
|
||||||
printf("%d ", token);
|
printf("%d ", token);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, tokens);
|
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, tokens);
|
||||||
|
auto attention_mask = vector_to_ggml_tensor(work_ctx, masks);
|
||||||
struct ggml_tensor* out = NULL;
|
struct ggml_tensor* out = NULL;
|
||||||
|
|
||||||
int t0 = ggml_time_ms();
|
int t0 = ggml_time_ms();
|
||||||
model.compute(8, input_ids, NULL, &out, work_ctx);
|
model.compute(8, input_ids, attention_mask, &out, work_ctx);
|
||||||
int t1 = ggml_time_ms();
|
int t1 = ggml_time_ms();
|
||||||
|
|
||||||
print_ggml_tensor(out);
|
print_ggml_tensor(out);
|
||||||
@ -973,16 +989,14 @@ struct T5Embedder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void load_from_file_and_test(const std::string& file_path) {
|
static void load_from_file_and_test(const std::string& file_path) {
|
||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// cpu f16: pass
|
||||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
// cpu f32: pass
|
||||||
ggml_type model_data_type = GGML_TYPE_F32;
|
// cuda f16: pass
|
||||||
std::shared_ptr<T5Embedder> t5 = std::shared_ptr<T5Embedder>(new T5Embedder(backend));
|
// cuda f32: pass
|
||||||
{
|
// cuda q8_0: pass
|
||||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
|
// ggml_backend_t backend = ggml_backend_cpu_init();
|
||||||
t5->alloc_params_buffer();
|
ggml_type model_data_type = GGML_TYPE_F16;
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
|
||||||
t5->get_param_tensors(tensors, "");
|
|
||||||
|
|
||||||
ModelLoader model_loader;
|
ModelLoader model_loader;
|
||||||
if (!model_loader.init_from_file(file_path)) {
|
if (!model_loader.init_from_file(file_path)) {
|
||||||
@ -990,6 +1004,20 @@ struct T5Embedder {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto tensor_types = model_loader.tensor_storages_types;
|
||||||
|
for (auto& item : tensor_types) {
|
||||||
|
// LOG_DEBUG("%s %u", item.first.c_str(), item.second);
|
||||||
|
if (ends_with(item.first, "weight")) {
|
||||||
|
item.second = model_data_type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<T5Embedder> t5 = std::shared_ptr<T5Embedder>(new T5Embedder(backend, tensor_types, "", true));
|
||||||
|
|
||||||
|
t5->alloc_params_buffer();
|
||||||
|
std::map<std::string, ggml_tensor*> tensors;
|
||||||
|
t5->get_param_tensors(tensors, "");
|
||||||
|
|
||||||
bool success = model_loader.load_tensors(tensors, backend);
|
bool success = model_loader.load_tensors(tensors, backend);
|
||||||
|
|
||||||
if (!success) {
|
if (!success) {
|
||||||
@ -998,7 +1026,6 @@ struct T5Embedder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
LOG_INFO("t5 model loaded");
|
LOG_INFO("t5 model loaded");
|
||||||
}
|
|
||||||
t5->test();
|
t5->test();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
3
thirdparty/darts.h
vendored
3
thirdparty/darts.h
vendored
@ -4,6 +4,7 @@
|
|||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <exception>
|
#include <exception>
|
||||||
#include <new>
|
#include <new>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
#define DARTS_VERSION "0.32"
|
#define DARTS_VERSION "0.32"
|
||||||
|
|
||||||
@ -1140,9 +1141,11 @@ inline void DawgBuilder::insert(const char *key, std::size_t length,
|
|||||||
if (value < 0) {
|
if (value < 0) {
|
||||||
DARTS_THROW("failed to insert key: negative value");
|
DARTS_THROW("failed to insert key: negative value");
|
||||||
} else if (length == 0) {
|
} else if (length == 0) {
|
||||||
|
std::cout << value << std::endl;
|
||||||
DARTS_THROW("failed to insert key: zero-length key");
|
DARTS_THROW("failed to insert key: zero-length key");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
id_type id = 0;
|
id_type id = 0;
|
||||||
std::size_t key_pos = 0;
|
std::size_t key_pos = 0;
|
||||||
|
|
||||||
|
|||||||
762304
vocab_umt5.hpp
Normal file
762304
vocab_umt5.hpp
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user