mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-05-08 16:28:53 +00:00
feat: add left padding support to tokenizers (#1424)
This commit is contained in:
parent
9ac7b672c2
commit
c41c5ded7a
@ -107,13 +107,24 @@ void Tokenizer::pad_tokens(std::vector<int>& tokens,
|
|||||||
|
|
||||||
if (final_length > out_tokens.size()) {
|
if (final_length > out_tokens.size()) {
|
||||||
const size_t pad_count = final_length - out_tokens.size();
|
const size_t pad_count = final_length - out_tokens.size();
|
||||||
out_tokens.insert(out_tokens.end(), pad_count, PAD_TOKEN_ID);
|
if (pad_left) {
|
||||||
|
out_tokens.insert(out_tokens.begin(), pad_count, PAD_TOKEN_ID);
|
||||||
|
|
||||||
if (use_weights) {
|
if (use_weights) {
|
||||||
out_weights.insert(out_weights.end(), pad_count, 1.0f);
|
out_weights.insert(out_weights.begin(), pad_count, 1.0f);
|
||||||
}
|
}
|
||||||
if (use_mask) {
|
if (use_mask) {
|
||||||
out_mask.insert(out_mask.end(), pad_count, 0.0f);
|
out_mask.insert(out_mask.begin(), pad_count, 0.0f);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
out_tokens.insert(out_tokens.end(), pad_count, PAD_TOKEN_ID);
|
||||||
|
|
||||||
|
if (use_weights) {
|
||||||
|
out_weights.insert(out_weights.end(), pad_count, 1.0f);
|
||||||
|
}
|
||||||
|
if (use_mask) {
|
||||||
|
out_mask.insert(out_mask.end(), pad_count, 0.0f);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@ -14,6 +14,7 @@ protected:
|
|||||||
std::vector<std::string> special_tokens;
|
std::vector<std::string> special_tokens;
|
||||||
bool add_bos_token = false;
|
bool add_bos_token = false;
|
||||||
bool add_eos_token = false;
|
bool add_eos_token = false;
|
||||||
|
bool pad_left = false;
|
||||||
std::string end_of_word_suffix;
|
std::string end_of_word_suffix;
|
||||||
|
|
||||||
virtual std::string decode_token(int token_id) const = 0;
|
virtual std::string decode_token(int token_id) const = 0;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user