style: format code

This commit is contained in:
leejet 2025-06-29 23:40:55 +08:00
parent b1cc40c35c
commit 45d0ebb30c
11 changed files with 92 additions and 99 deletions

View File

@ -1224,14 +1224,15 @@ struct PixArtCLIPEmbedder : public Conditioner {
T5UniGramTokenizer t5_tokenizer;
std::shared_ptr<T5Runner> t5;
size_t chunk_len = 512;
bool use_mask = false;
int mask_pad = 1;
bool use_mask = false;
int mask_pad = 1;
PixArtCLIPEmbedder(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types,
int clip_skip = -1,
bool use_mask = false,
int mask_pad = 1) : use_mask(use_mask), mask_pad(mask_pad) {
int mask_pad = 1)
: use_mask(use_mask), mask_pad(mask_pad) {
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
}

View File

@ -1019,7 +1019,7 @@ static void sample_k_diffusion(sample_method_t method,
// also needed to invert the behavior of CompVisDenoiser
// (k-diffusion's LMSDiscreteScheduler)
float beta_start = 0.00085f;
float beta_end = 0.0120f;
float beta_end = 0.0120f;
std::vector<double> alphas_cumprod;
std::vector<double> compvis_sigmas;
@ -1030,8 +1030,9 @@ static void sample_k_diffusion(sample_method_t method,
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
(1.0f -
std::pow(sqrtf(beta_start) +
(sqrtf(beta_end) - sqrtf(beta_start)) *
((float)i / (TIMESTEPS - 1)), 2));
(sqrtf(beta_end) - sqrtf(beta_start)) *
((float)i / (TIMESTEPS - 1)),
2));
compvis_sigmas[i] =
std::sqrt((1 - alphas_cumprod[i]) /
alphas_cumprod[i]);
@ -1061,7 +1062,8 @@ static void sample_k_diffusion(sample_method_t method,
// - pred_prev_sample -> "x_t-1"
int timestep =
roundf(TIMESTEPS -
i * ((float)TIMESTEPS / steps)) - 1;
i * ((float)TIMESTEPS / steps)) -
1;
// 1. get previous step value (=t-1)
int prev_timestep = timestep - TIMESTEPS / steps;
// The sigma here is chosen to cause the
@ -1086,10 +1088,9 @@ static void sample_k_diffusion(sample_method_t method,
float* vec_x = (float*)x->data;
for (int j = 0; j < ggml_nelements(x); j++) {
vec_x[j] *= std::sqrt(sigma * sigma + 1) /
sigma;
sigma;
}
}
else {
} else {
// For the subsequent steps after the first one,
// at this point x = latents or x = sample, and
// needs to be prescaled with x <- sample / c_in
@ -1127,9 +1128,8 @@ static void sample_k_diffusion(sample_method_t method,
float alpha_prod_t = alphas_cumprod[timestep];
// Note final_alpha_cumprod = alphas_cumprod[0] due to
// trailing timestep spacing
float alpha_prod_t_prev = prev_timestep >= 0 ?
alphas_cumprod[prev_timestep] : alphas_cumprod[0];
float beta_prod_t = 1 - alpha_prod_t;
float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
float beta_prod_t = 1 - alpha_prod_t;
// 3. compute predicted original sample from predicted
// noise also called "predicted x_0" of formula (12)
// from https://arxiv.org/pdf/2010.02502.pdf
@ -1145,7 +1145,7 @@ static void sample_k_diffusion(sample_method_t method,
vec_pred_original_sample[j] =
(vec_x[j] / std::sqrt(sigma * sigma + 1) -
std::sqrt(beta_prod_t) *
vec_model_output[j]) *
vec_model_output[j]) *
(1 / std::sqrt(alpha_prod_t));
}
}
@ -1159,8 +1159,8 @@ static void sample_k_diffusion(sample_method_t method,
// sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) *
// sqrt(1 - alpha_t/alpha_t-1)
float beta_prod_t_prev = 1 - alpha_prod_t_prev;
float variance = (beta_prod_t_prev / beta_prod_t) *
(1 - alpha_prod_t / alpha_prod_t_prev);
float variance = (beta_prod_t_prev / beta_prod_t) *
(1 - alpha_prod_t / alpha_prod_t_prev);
float std_dev_t = eta * std::sqrt(variance);
// 6. compute "direction pointing to x_t" of formula
// (12) from https://arxiv.org/pdf/2010.02502.pdf
@ -1179,8 +1179,8 @@ static void sample_k_diffusion(sample_method_t method,
std::pow(std_dev_t, 2)) *
vec_model_output[j];
vec_x[j] = std::sqrt(alpha_prod_t_prev) *
vec_pred_original_sample[j] +
pred_sample_direction;
vec_pred_original_sample[j] +
pred_sample_direction;
}
}
if (eta > 0) {
@ -1208,7 +1208,7 @@ static void sample_k_diffusion(sample_method_t method,
// by Semi-Linear Consistency Function with Trajectory
// Mapping", arXiv:2402.19159 [cs.CV]
float beta_start = 0.00085f;
float beta_end = 0.0120f;
float beta_end = 0.0120f;
std::vector<double> alphas_cumprod;
std::vector<double> compvis_sigmas;
@ -1219,8 +1219,9 @@ static void sample_k_diffusion(sample_method_t method,
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
(1.0f -
std::pow(sqrtf(beta_start) +
(sqrtf(beta_end) - sqrtf(beta_start)) *
((float)i / (TIMESTEPS - 1)), 2));
(sqrtf(beta_end) - sqrtf(beta_start)) *
((float)i / (TIMESTEPS - 1)),
2));
compvis_sigmas[i] =
std::sqrt((1 - alphas_cumprod[i]) /
alphas_cumprod[i]);
@ -1235,13 +1236,10 @@ static void sample_k_diffusion(sample_method_t method,
for (int i = 0; i < steps; i++) {
// Analytic form for TCD timesteps
int timestep = TIMESTEPS - 1 -
(TIMESTEPS / original_steps) *
(int)floor(i * ((float)original_steps / steps));
(TIMESTEPS / original_steps) *
(int)floor(i * ((float)original_steps / steps));
// 1. get previous step value
int prev_timestep = i >= steps - 1 ? 0 :
TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
(int)floor((i + 1) *
((float)original_steps / steps));
int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps));
// Here timestep_s is tau_n' in Algorithm 4. The _s
// notation appears to be that from C. Lu,
// "DPM-Solver: A Fast ODE Solver for Diffusion
@ -1258,10 +1256,9 @@ static void sample_k_diffusion(sample_method_t method,
float* vec_x = (float*)x->data;
for (int j = 0; j < ggml_nelements(x); j++) {
vec_x[j] *= std::sqrt(sigma * sigma + 1) /
sigma;
sigma;
}
}
else {
} else {
float* vec_x = (float*)x->data;
for (int j = 0; j < ggml_nelements(x); j++) {
vec_x[j] *= std::sqrt(sigma * sigma + 1);
@ -1294,15 +1291,14 @@ static void sample_k_diffusion(sample_method_t method,
// DPM-Solver. In fact, we have alpha_{t_n} =
// \sqrt{\hat{alpha_n}}, [...]"
float alpha_prod_t = alphas_cumprod[timestep];
float beta_prod_t = 1 - alpha_prod_t;
float beta_prod_t = 1 - alpha_prod_t;
// Note final_alpha_cumprod = alphas_cumprod[0] since
// TCD is always "trailing"
float alpha_prod_t_prev = prev_timestep >= 0 ?
alphas_cumprod[prev_timestep] : alphas_cumprod[0];
float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
// The subscript _s are the only portion in this
// section (2) unique to TCD
float alpha_prod_s = alphas_cumprod[timestep_s];
float beta_prod_s = 1 - alpha_prod_s;
float beta_prod_s = 1 - alpha_prod_s;
// 3. Compute the predicted noised sample x_s based on
// the model parameterization
//
@ -1317,7 +1313,7 @@ static void sample_k_diffusion(sample_method_t method,
vec_pred_original_sample[j] =
(vec_x[j] / std::sqrt(sigma * sigma + 1) -
std::sqrt(beta_prod_t) *
vec_model_output[j]) *
vec_model_output[j]) *
(1 / std::sqrt(alpha_prod_t));
}
}
@ -1339,9 +1335,9 @@ static void sample_k_diffusion(sample_method_t method,
// pred_epsilon = model_output
vec_x[j] =
std::sqrt(alpha_prod_s) *
vec_pred_original_sample[j] +
vec_pred_original_sample[j] +
std::sqrt(beta_prod_s) *
vec_model_output[j];
vec_model_output[j];
}
}
// 4. Sample and inject noise z ~ N(0, I) for
@ -1357,7 +1353,7 @@ static void sample_k_diffusion(sample_method_t method,
// In this case, x is still pred_noised_sample,
// continue in-place
ggml_tensor_set_f32_randn(noise, rng);
float* vec_x = (float*)x->data;
float* vec_x = (float*)x->data;
float* vec_noise = (float*)noise->data;
for (int j = 0; j < ggml_nelements(x); j++) {
// Corresponding to (35) in Zheng et
@ -1366,10 +1362,10 @@ static void sample_k_diffusion(sample_method_t method,
vec_x[j] =
std::sqrt(alpha_prod_t_prev /
alpha_prod_s) *
vec_x[j] +
vec_x[j] +
std::sqrt(1 - alpha_prod_t_prev /
alpha_prod_s) *
vec_noise[j];
alpha_prod_s) *
vec_noise[j];
}
}
}

View File

@ -13,7 +13,7 @@ struct DiffusionModel {
struct ggml_tensor* c_concat,
struct ggml_tensor* y,
struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {},
std::vector<ggml_tensor*> ref_latents = {},
int num_video_frames = -1,
std::vector<struct ggml_tensor*> controls = {},
float control_strength = 0.f,
@ -69,7 +69,7 @@ struct UNetModel : public DiffusionModel {
struct ggml_tensor* c_concat,
struct ggml_tensor* y,
struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {},
std::vector<ggml_tensor*> ref_latents = {},
int num_video_frames = -1,
std::vector<struct ggml_tensor*> controls = {},
float control_strength = 0.f,
@ -120,7 +120,7 @@ struct MMDiTModel : public DiffusionModel {
struct ggml_tensor* c_concat,
struct ggml_tensor* y,
struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {},
std::vector<ggml_tensor*> ref_latents = {},
int num_video_frames = -1,
std::vector<struct ggml_tensor*> controls = {},
float control_strength = 0.f,
@ -173,7 +173,7 @@ struct FluxModel : public DiffusionModel {
struct ggml_tensor* c_concat,
struct ggml_tensor* y,
struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {},
std::vector<ggml_tensor*> ref_latents = {},
int num_video_frames = -1,
std::vector<struct ggml_tensor*> controls = {},
float control_strength = 0.f,

View File

@ -133,9 +133,9 @@ struct SDParams {
float skip_layer_start = 0.01f;
float skip_layer_end = 0.2f;
bool chroma_use_dit_mask = true;
bool chroma_use_t5_mask = false;
int chroma_t5_mask_pad = 1;
bool chroma_use_dit_mask = true;
bool chroma_use_t5_mask = false;
int chroma_t5_mask_pad = 1;
};
void print_params(SDParams params) {
@ -919,7 +919,7 @@ int main(int argc, const char* argv[]) {
input_image_buffer = resized_image_buffer;
}
} else if (params.mode == EDIT) {
vae_decode_only = false;
vae_decode_only = false;
for (auto& path : params.ref_image_paths) {
int c = 0;
int width = 0;
@ -1113,7 +1113,7 @@ int main(int argc, const char* argv[]) {
params.skip_layer_start,
params.skip_layer_end);
}
} else { // EDIT
} else { // EDIT
results = edit(sd_ctx,
ref_images.data(),
ref_images.size(),
@ -1176,11 +1176,11 @@ int main(int argc, const char* argv[]) {
std::string dummy_name, ext, lc_ext;
bool is_jpg;
size_t last = params.output_path.find_last_of(".");
size_t last = params.output_path.find_last_of(".");
size_t last_path = std::min(params.output_path.find_last_of("/"),
params.output_path.find_last_of("\\"));
if (last != std::string::npos // filename has extension
&& (last_path == std::string::npos || last > last_path)) {
if (last != std::string::npos // filename has extension
&& (last_path == std::string::npos || last > last_path)) {
dummy_name = params.output_path.substr(0, last);
ext = lc_ext = params.output_path.substr(last);
std::transform(ext.begin(), ext.end(), lc_ext.begin(), ::tolower);
@ -1188,7 +1188,7 @@ int main(int argc, const char* argv[]) {
} else {
dummy_name = params.output_path;
ext = lc_ext = "";
is_jpg = false;
is_jpg = false;
}
// appending ".png" to absent or unknown extension
if (!is_jpg && lc_ext != ".png") {
@ -1200,7 +1200,7 @@ int main(int argc, const char* argv[]) {
continue;
}
std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ext : dummy_name + ext;
if(is_jpg) {
if (is_jpg) {
stbi_write_jpg(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
results[i].data, 90, get_image_params(params, params.seed + i).c_str());
printf("save result JPEG image to '%s'\n", final_image_path.c_str());

View File

@ -512,7 +512,8 @@ namespace Flux {
LastLayer(int64_t hidden_size,
int64_t patch_size,
int64_t out_channels,
bool prune_mod = false) : prune_mod(prune_mod) {
bool prune_mod = false)
: prune_mod(prune_mod) {
blocks["norm_final"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size, 1e-06f, false));
blocks["linear"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, patch_size * patch_size * out_channels));
if (!prune_mod) {
@ -723,7 +724,7 @@ namespace Flux {
auto txt_ids = gen_txt_ids(bs, context_len);
auto img_ids = gen_img_ids(h, w, patch_size, bs);
auto ids = concat_ids(txt_ids, img_ids, bs);
auto ids = concat_ids(txt_ids, img_ids, bs);
uint64_t curr_h_offset = 0;
uint64_t curr_w_offset = 0;
for (ggml_tensor* ref : ref_latents) {
@ -736,7 +737,7 @@ namespace Flux {
}
auto ref_ids = gen_img_ids(ref->ne[1], ref->ne[0], patch_size, bs, 1, h_offset, w_offset);
ids = concat_ids(ids, ref_ids, bs);
ids = concat_ids(ids, ref_ids, bs);
curr_h_offset = std::max(curr_h_offset, ref->ne[1] + h_offset);
curr_w_offset = std::max(curr_w_offset, ref->ne[0] + w_offset);
@ -744,7 +745,6 @@ namespace Flux {
return ids;
}
// Generate positional embeddings
std::vector<float> gen_pe(int h, int w, int patch_size, int bs, int context_len, std::vector<ggml_tensor*> ref_latents, int theta, const std::vector<int>& axes_dim) {
std::vector<std::vector<float>> ids = gen_ids(h, w, patch_size, bs, context_len, ref_latents);
@ -872,8 +872,8 @@ namespace Flux {
struct ggml_tensor* y,
struct ggml_tensor* guidance,
struct ggml_tensor* pe,
struct ggml_tensor* mod_index_arange = NULL,
std::vector<int> skip_layers = {}) {
struct ggml_tensor* mod_index_arange = NULL,
std::vector<int> skip_layers = {}) {
auto img_in = std::dynamic_pointer_cast<Linear>(blocks["img_in"]);
auto txt_in = std::dynamic_pointer_cast<Linear>(blocks["txt_in"]);
auto final_layer = std::dynamic_pointer_cast<LastLayer>(blocks["final_layer"]);
@ -887,7 +887,7 @@ namespace Flux {
auto distill_timestep = ggml_nn_timestep_embedding(ctx, timesteps, 16, 10000, 1000.f);
auto distill_guidance = ggml_nn_timestep_embedding(ctx, guidance, 16, 10000, 1000.f);
// auto mod_index_arange = ggml_arange(ctx, 0, (float)mod_index_length, 1);
// auto mod_index_arange = ggml_arange(ctx, 0, (float)mod_index_length, 1);
// ggml_arange tot working on a lot of backends, precomputing it on CPU instead
GGML_ASSERT(arange != NULL);
auto modulation_index = ggml_nn_timestep_embedding(ctx, mod_index_arange, 32, 10000, 1000.f); // [1, 344, 32]
@ -962,7 +962,6 @@ namespace Flux {
struct ggml_tensor* process_img(struct ggml_context* ctx,
struct ggml_tensor* x) {
int64_t W = x->ne[0];
int64_t H = x->ne[1];
int64_t patch_size = 2;
@ -983,9 +982,9 @@ namespace Flux {
struct ggml_tensor* y,
struct ggml_tensor* guidance,
struct ggml_tensor* pe,
struct ggml_tensor* mod_index_arange = NULL,
struct ggml_tensor* mod_index_arange = NULL,
std::vector<ggml_tensor*> ref_latents = {},
std::vector<int> skip_layers = {}) {
std::vector<int> skip_layers = {}) {
// Forward pass of DiT.
// x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
// timestep: (N,) tensor of diffusion timesteps
@ -1005,7 +1004,7 @@ namespace Flux {
int pad_h = (patch_size - H % patch_size) % patch_size;
int pad_w = (patch_size - W % patch_size) % patch_size;
auto img = process_img(ctx, x);
auto img = process_img(ctx, x);
uint64_t img_tokens = img->ne[1];
if (c_concat != NULL) {
@ -1013,7 +1012,7 @@ namespace Flux {
ggml_tensor* mask = ggml_view_4d(ctx, c_concat, c_concat->ne[0], c_concat->ne[1], 8 * 8, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], c_concat->nb[2] * C);
masked = process_img(ctx, masked);
mask = process_img(ctx, mask);
mask = process_img(ctx, mask);
img = ggml_concat(ctx, img, ggml_concat(ctx, masked, mask, 0), 0);
}
@ -1027,9 +1026,9 @@ namespace Flux {
auto out = forward_orig(ctx, img, context, timestep, y, guidance, pe, mod_index_arange, skip_layers); // [N, num_tokens, C * patch_size * patch_size]
if (out->ne[1] > img_tokens) {
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size]
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size]
out = ggml_view_3d(ctx, out, out->ne[0], out->ne[1], img_tokens, out->nb[1], out->nb[2], 0);
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [N, h*w, C * patch_size * patch_size]
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [N, h*w, C * patch_size * patch_size]
}
// rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)
@ -1120,7 +1119,7 @@ namespace Flux {
struct ggml_tensor* y,
struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {},
std::vector<int> skip_layers = {}) {
std::vector<int> skip_layers = {}) {
GGML_ASSERT(x->ne[3] == 1);
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, FLUX_GRAPH_SIZE, false);
@ -1139,8 +1138,8 @@ namespace Flux {
}
// ggml_arange is not working on some backends, precompute it
mod_index_arange_vec = arange(0, 344);
mod_index_arange = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, mod_index_arange_vec.size());
mod_index_arange_vec = arange(0, 344);
mod_index_arange = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, mod_index_arange_vec.size());
set_backend_tensor_data(mod_index_arange, mod_index_arange_vec.data());
}
y = to_backend(y);
@ -1187,9 +1186,9 @@ namespace Flux {
struct ggml_tensor* y,
struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {},
struct ggml_tensor** output = NULL,
struct ggml_context* output_ctx = NULL,
std::vector<int> skip_layers = std::vector<int>()) {
struct ggml_tensor** output = NULL,
struct ggml_context* output_ctx = NULL,
std::vector<int> skip_layers = std::vector<int>()) {
// x: [N, in_channels, h, w]
// timesteps: [N, ]
// context: [N, max_position, hidden_size]

View File

@ -291,7 +291,6 @@ struct LoraModel : public GGMLRunner {
std::string hada_2_down_name = "";
std::string hada_2_up_name = "";
hada_1_down_name = fk + ".hada_w1_b";
hada_1_up_name = fk + ".hada_w1_a";
hada_1_mid_name = fk + ".hada_t1";
@ -414,7 +413,7 @@ struct LoraModel : public GGMLRunner {
}
lokr_w2 = ggml_merge_lora(compute_ctx, down, up);
}
// Technically it might be unused, but I believe it's the expected behavior
applied_lora_tensors.insert(alpha_name);

View File

@ -12,9 +12,9 @@
#include "ggml-backend.h"
#include "ggml.h"
#include "gguf.h"
#include "json.hpp"
#include "zip.h"
#include "gguf.h"
#define SD_MAX_DIMS 5

View File

@ -48,8 +48,7 @@ const char* sampling_methods_str[] = {
"iPNDM_v",
"LCM",
"DDIM \"trailing\"",
"TCD"
};
"TCD"};
/*================================================== Helper Functions ================================================*/
@ -696,7 +695,7 @@ public:
float curr_multiplier = kv.second;
lora_state_diff[lora_name] -= curr_multiplier;
}
size_t rm = lora_state_diff.size() - lora_state.size();
if (rm != 0) {
LOG_INFO("Attempting to apply %lu LoRAs (removing %lu applied LoRAs)", lora_state.size(), rm);
@ -815,11 +814,11 @@ public:
int start_merge_step,
SDCondition id_cond,
std::vector<ggml_tensor*> ref_latents = {},
std::vector<int> skip_layers = {},
float slg_scale = 0,
float skip_layer_start = 0.01,
float skip_layer_end = 0.2,
ggml_tensor* noise_mask = nullptr) {
std::vector<int> skip_layers = {},
float slg_scale = 0,
float skip_layer_start = 0.01,
float skip_layer_end = 0.2,
ggml_tensor* noise_mask = nullptr) {
LOG_DEBUG("Sample");
struct ggml_init_params params;
size_t data_size = ggml_row_size(init_latent->type, init_latent->ne[0]);
@ -1973,7 +1972,6 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
return result_images;
}
sd_image_t* edit(sd_ctx_t* sd_ctx,
sd_image_t* ref_images,
int ref_images_count,
@ -2062,7 +2060,7 @@ sd_image_t* edit(sd_ctx_t* sd_ctx,
}
ref_latents.push_back(latent);
}
size_t t1 = ggml_time_ms();
LOG_INFO("encode_first_stage completed, taking %.2fs", (t1 - t0) * 1.0f / 1000);

View File

@ -61,10 +61,10 @@ enum schedule_t {
// same as enum ggml_type
enum sd_type_t {
SD_TYPE_F32 = 0,
SD_TYPE_F16 = 1,
SD_TYPE_Q4_0 = 2,
SD_TYPE_Q4_1 = 3,
SD_TYPE_F32 = 0,
SD_TYPE_F16 = 1,
SD_TYPE_Q4_0 = 2,
SD_TYPE_Q4_1 = 3,
// SD_TYPE_Q4_2 = 4, support has been removed
// SD_TYPE_Q4_3 = 5, support has been removed
SD_TYPE_Q5_0 = 6,
@ -95,12 +95,12 @@ enum sd_type_t {
// SD_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
// SD_TYPE_Q4_0_4_8 = 32,
// SD_TYPE_Q4_0_8_8 = 33,
SD_TYPE_TQ1_0 = 34,
SD_TYPE_TQ2_0 = 35,
SD_TYPE_TQ1_0 = 34,
SD_TYPE_TQ2_0 = 35,
// SD_TYPE_IQ4_NL_4_4 = 36,
// SD_TYPE_IQ4_NL_4_8 = 37,
// SD_TYPE_IQ4_NL_8_8 = 38,
SD_TYPE_COUNT = 39,
SD_TYPE_COUNT = 39,
};
SD_API const char* sd_type_name(enum sd_type_t type);

4
t5.hpp
View File

@ -434,7 +434,7 @@ public:
tokens.insert(tokens.end(), length - tokens.size(), pad_token_id);
weights.insert(weights.end(), length - weights.size(), 1.0);
if (attention_mask != nullptr) {
// maybe keep some padding tokens unmasked?
// maybe keep some padding tokens unmasked?
attention_mask->insert(attention_mask->end(), length - attention_mask->size(), -HUGE_VALF);
}
}
@ -797,7 +797,7 @@ struct T5Runner : public GGMLRunner {
struct ggml_tensor* input_ids,
struct ggml_tensor* attention_mask,
ggml_tensor** output,
ggml_context* output_ctx = NULL) {
ggml_context* output_ctx = NULL) {
auto get_graph = [&]() -> struct ggml_cgraph* {
return build_graph(input_ids, attention_mask);
};

View File

@ -112,7 +112,7 @@ std::vector<std::string> get_files_from_dir(const std::string& dir) {
sprintf(directoryPath, "%s\\%s\\*", currentDirectory, dir.c_str());
// Find the first file in the directory
hFind = FindFirstFile(directoryPath, &findFileData);
hFind = FindFirstFile(directoryPath, &findFileData);
bool isAbsolutePath = false;
// Check if the directory was found
if (hFind == INVALID_HANDLE_VALUE) {
@ -121,7 +121,7 @@ std::vector<std::string> get_files_from_dir(const std::string& dir) {
char directoryPathAbsolute[MAX_PATH];
sprintf(directoryPathAbsolute, "%s*", dir.c_str());
hFind = FindFirstFile(directoryPathAbsolute, &findFileData);
hFind = FindFirstFile(directoryPathAbsolute, &findFileData);
isAbsolutePath = true;
if (hFind == INVALID_HANDLE_VALUE) {
printf("Absolute path was also wrong.\n");