mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-05-08 08:18:51 +00:00
feat: add left padding support to tokenizers (#1424)
This commit is contained in:
parent
9ac7b672c2
commit
c41c5ded7a
@ -107,13 +107,24 @@ void Tokenizer::pad_tokens(std::vector<int>& tokens,
|
||||
|
||||
if (final_length > out_tokens.size()) {
|
||||
const size_t pad_count = final_length - out_tokens.size();
|
||||
out_tokens.insert(out_tokens.end(), pad_count, PAD_TOKEN_ID);
|
||||
if (pad_left) {
|
||||
out_tokens.insert(out_tokens.begin(), pad_count, PAD_TOKEN_ID);
|
||||
|
||||
if (use_weights) {
|
||||
out_weights.insert(out_weights.end(), pad_count, 1.0f);
|
||||
}
|
||||
if (use_mask) {
|
||||
out_mask.insert(out_mask.end(), pad_count, 0.0f);
|
||||
if (use_weights) {
|
||||
out_weights.insert(out_weights.begin(), pad_count, 1.0f);
|
||||
}
|
||||
if (use_mask) {
|
||||
out_mask.insert(out_mask.begin(), pad_count, 0.0f);
|
||||
}
|
||||
} else {
|
||||
out_tokens.insert(out_tokens.end(), pad_count, PAD_TOKEN_ID);
|
||||
|
||||
if (use_weights) {
|
||||
out_weights.insert(out_weights.end(), pad_count, 1.0f);
|
||||
}
|
||||
if (use_mask) {
|
||||
out_mask.insert(out_mask.end(), pad_count, 0.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -14,6 +14,7 @@ protected:
|
||||
std::vector<std::string> special_tokens;
|
||||
bool add_bos_token = false;
|
||||
bool add_eos_token = false;
|
||||
bool pad_left = false;
|
||||
std::string end_of_word_suffix;
|
||||
|
||||
virtual std::string decode_token(int token_id) const = 0;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user