mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-23 22:56:42 +00:00
Compare commits
3 Commits
5a34bc7f6e
...
7f0e728b7d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7f0e728b7d | ||
|
|
92a3b73cdb | ||
|
|
710bc91c8f |
@ -1 +1 @@
|
||||
Subproject commit 797ccf80825cc035508ba9b599b2a21953e7f835
|
||||
Subproject commit c4bce3d6b3f236614cca21014f076083b7270ba8
|
||||
@ -142,8 +142,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: version(version), tokenizer(sd_version_is_sd2(version) ? 0 : 49407) {
|
||||
for (const auto& kv : orig_embedding_map) {
|
||||
std::string name = kv.first;
|
||||
std::transform(name.begin(), name.end(), name.begin(), [](unsigned char c) { return std::tolower(c); });
|
||||
std::string name = normalize_embedding_name(kv.first);
|
||||
embedding_map[name] = kv.second;
|
||||
tokenizer.add_special_token(name);
|
||||
}
|
||||
@ -278,17 +277,23 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
||||
return true;
|
||||
}
|
||||
|
||||
static std::string normalize_embedding_name(std::string name) {
|
||||
std::transform(name.begin(), name.end(), name.begin(), [](unsigned char c) { return std::tolower(c); });
|
||||
return name;
|
||||
}
|
||||
|
||||
bool append_embedding_tokens(std::string str, std::vector<int32_t>& bpe_tokens) {
|
||||
std::string name = normalize_embedding_name(std::move(str));
|
||||
auto iter = embedding_map.find(name);
|
||||
if (iter == embedding_map.end()) {
|
||||
return false;
|
||||
}
|
||||
return load_embedding(name, iter->second, bpe_tokens);
|
||||
}
|
||||
|
||||
std::vector<int> convert_token_to_id(std::string text) {
|
||||
auto on_new_token_cb = [&](std::string& str, std::vector<int32_t>& bpe_tokens) -> bool {
|
||||
auto iter = embedding_map.find(str);
|
||||
if (iter == embedding_map.end()) {
|
||||
return false;
|
||||
}
|
||||
std::string embedding_path = iter->second;
|
||||
if (load_embedding(str, embedding_path, bpe_tokens)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return append_embedding_tokens(str, bpe_tokens);
|
||||
};
|
||||
std::vector<int> curr_tokens = tokenizer.encode(text, on_new_token_cb);
|
||||
return curr_tokens;
|
||||
@ -315,15 +320,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
||||
}
|
||||
|
||||
auto on_new_token_cb = [&](std::string& str, std::vector<int32_t>& bpe_tokens) -> bool {
|
||||
auto iter = embedding_map.find(str);
|
||||
if (iter == embedding_map.end()) {
|
||||
return false;
|
||||
}
|
||||
std::string embedding_path = iter->second;
|
||||
if (load_embedding(str, embedding_path, bpe_tokens)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return append_embedding_tokens(str, bpe_tokens);
|
||||
};
|
||||
|
||||
std::vector<int> tokens;
|
||||
|
||||
@ -99,7 +99,7 @@ bool convert(const char* input_path,
|
||||
model_loader.convert_tensors_name();
|
||||
}
|
||||
|
||||
ggml_type type = (ggml_type)output_type;
|
||||
ggml_type type = sd_type_to_ggml_type(output_type);
|
||||
bool output_is_safetensors = ends_with(output_path, ".safetensors");
|
||||
TensorTypeRules type_rules = parse_tensor_type_rules(tensor_type_rules);
|
||||
|
||||
|
||||
@ -406,6 +406,15 @@ std::vector<std::string> split_string(const std::string& str, char delimiter) {
|
||||
return result;
|
||||
}
|
||||
|
||||
ggml_type sd_type_to_ggml_type(sd_type_t sdtype) {
|
||||
const int type_value = static_cast<int>(sdtype);
|
||||
if (type_value < std::min<int>(SD_TYPE_COUNT, GGML_TYPE_COUNT)) {
|
||||
return static_cast<ggml_type>(type_value);
|
||||
} else {
|
||||
return GGML_TYPE_COUNT;
|
||||
}
|
||||
}
|
||||
|
||||
KeyValueArgs parse_key_value_args(const char* args, const char* context) {
|
||||
KeyValueArgs pairs;
|
||||
|
||||
|
||||
@ -80,6 +80,8 @@ void pretty_bytes_progress(int step, int steps, uint64_t bytes_processed, float
|
||||
|
||||
void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...);
|
||||
|
||||
ggml_type sd_type_to_ggml_type(sd_type_t sdtype);
|
||||
|
||||
std::string trim(const std::string& s);
|
||||
|
||||
std::vector<std::pair<std::string, float>> parse_prompt_attention(const std::string& text);
|
||||
|
||||
@ -522,9 +522,7 @@ public:
|
||||
auto& tensor_storage_map = model_loader.get_tensor_storage_map();
|
||||
|
||||
LOG_INFO("Version: %s ", model_version_to_str[version]);
|
||||
ggml_type wtype = (int)sd_ctx_params->wtype < std::min<int>(SD_TYPE_COUNT, GGML_TYPE_COUNT)
|
||||
? (ggml_type)sd_ctx_params->wtype
|
||||
: GGML_TYPE_COUNT;
|
||||
ggml_type wtype = sd_type_to_ggml_type(sd_ctx_params->wtype);
|
||||
std::string tensor_type_rules = SAFE_STR(sd_ctx_params->tensor_type_rules);
|
||||
if (wtype != GGML_TYPE_COUNT || tensor_type_rules.size() > 0) {
|
||||
model_loader.set_wtype_override(wtype, tensor_type_rules);
|
||||
|
||||
@ -134,7 +134,8 @@ std::vector<int> BPETokenizer::encode(const std::string& text, on_new_token_cb_t
|
||||
std::vector<int32_t> bpe_tokens;
|
||||
std::vector<std::string> token_strs;
|
||||
|
||||
auto splited_texts = split_with_special_tokens(text, special_tokens);
|
||||
std::string normalized_text = normalize_before_split ? normalize(text) : text;
|
||||
auto splited_texts = split_with_special_tokens(normalized_text, special_tokens);
|
||||
|
||||
for (auto& splited_text : splited_texts) {
|
||||
if (is_special_token(splited_text)) {
|
||||
@ -159,7 +160,7 @@ std::vector<int> BPETokenizer::encode(const std::string& text, on_new_token_cb_t
|
||||
}
|
||||
}
|
||||
|
||||
std::string token_str = normalize(token);
|
||||
std::string token_str = normalize_before_split ? token : normalize(token);
|
||||
std::u32string utf32_token;
|
||||
if (byte_level_bpe) {
|
||||
for (int i = 0; i < token_str.length(); i++) {
|
||||
|
||||
@ -22,9 +22,10 @@ CLIPTokenizer::CLIPTokenizer(int pad_token_id, const std::string& merges_utf8_st
|
||||
EOS_TOKEN_ID = 49407;
|
||||
PAD_TOKEN_ID = pad_token_id;
|
||||
|
||||
end_of_word_suffix = "</w>";
|
||||
add_bos_token = true;
|
||||
add_eos_token = true;
|
||||
end_of_word_suffix = "</w>";
|
||||
add_bos_token = true;
|
||||
add_eos_token = true;
|
||||
normalize_before_split = true;
|
||||
|
||||
if (merges_utf8_str.size() > 0) {
|
||||
load_from_merges(merges_utf8_str);
|
||||
|
||||
@ -12,9 +12,10 @@ using on_new_token_cb_t = std::function<bool(std::string&, std::vector<int32_t>&
|
||||
class Tokenizer {
|
||||
protected:
|
||||
std::vector<std::string> special_tokens;
|
||||
bool add_bos_token = false;
|
||||
bool add_eos_token = false;
|
||||
bool pad_left = false;
|
||||
bool add_bos_token = false;
|
||||
bool add_eos_token = false;
|
||||
bool pad_left = false;
|
||||
bool normalize_before_split = false;
|
||||
std::string end_of_word_suffix;
|
||||
|
||||
virtual std::string decode_token(int token_id) const = 0;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user