#ifndef __CACHE_DIT_HPP__ #define __CACHE_DIT_HPP__ #include #include #include #include #include #include #include "ggml_extend.hpp" struct DBCacheConfig { bool enabled = false; int Fn_compute_blocks = 8; int Bn_compute_blocks = 0; float residual_diff_threshold = 0.08f; int max_warmup_steps = 8; int max_cached_steps = -1; int max_continuous_cached_steps = -1; float max_accumulated_residual_diff = -1.0f; std::vector steps_computation_mask; bool scm_policy_dynamic = true; }; struct TaylorSeerConfig { bool enabled = false; int n_derivatives = 1; int max_warmup_steps = 2; int skip_interval_steps = 1; }; struct CacheDitConfig { DBCacheConfig dbcache; TaylorSeerConfig taylorseer; int double_Fn_blocks = -1; int double_Bn_blocks = -1; int single_Fn_blocks = -1; int single_Bn_blocks = -1; }; struct TaylorSeerState { int n_derivatives = 1; int current_step = -1; int last_computed_step = -1; std::vector> dY_prev; std::vector> dY_current; void init(int n_deriv, size_t hidden_size) { n_derivatives = n_deriv; int order = n_derivatives + 1; dY_prev.resize(order); dY_current.resize(order); for (int i = 0; i < order; i++) { dY_prev[i].clear(); dY_current[i].clear(); } current_step = -1; last_computed_step = -1; } void reset() { for (auto& v : dY_prev) v.clear(); for (auto& v : dY_current) v.clear(); current_step = -1; last_computed_step = -1; } bool can_approximate() const { return last_computed_step >= n_derivatives && !dY_prev.empty() && !dY_prev[0].empty(); } void update_derivatives(const float* Y, size_t size, int step) { int order = n_derivatives + 1; dY_prev = dY_current; dY_current[0].resize(size); for (size_t i = 0; i < size; i++) { dY_current[0][i] = Y[i]; } int window = step - last_computed_step; if (window <= 0) window = 1; for (int d = 0; d < n_derivatives; d++) { if (!dY_prev[d].empty() && dY_prev[d].size() == size) { dY_current[d + 1].resize(size); for (size_t i = 0; i < size; i++) { dY_current[d + 1][i] = (dY_current[d][i] - dY_prev[d][i]) / static_cast(window); } } else { dY_current[d + 1].clear(); } } current_step = step; last_computed_step = step; } void approximate(float* output, size_t size, int target_step) const { if (!can_approximate() || dY_prev[0].size() != size) { return; } int elapsed = target_step - last_computed_step; if (elapsed <= 0) elapsed = 1; std::fill(output, output + size, 0.0f); float factorial = 1.0f; int order = static_cast(dY_prev.size()); for (int o = 0; o < order; o++) { if (dY_prev[o].empty() || dY_prev[o].size() != size) continue; if (o > 0) factorial *= static_cast(o); float coeff = std::pow(static_cast(elapsed), o) / factorial; for (size_t i = 0; i < size; i++) { output[i] += coeff * dY_prev[o][i]; } } } }; struct BlockCacheEntry { std::vector residual_img; std::vector residual_txt; std::vector residual; std::vector prev_img; std::vector prev_txt; std::vector prev_output; bool has_prev = false; }; struct CacheDitState { CacheDitConfig config; bool initialized = false; int total_double_blocks = 0; int total_single_blocks = 0; size_t hidden_size = 0; int current_step = -1; int total_steps = 0; int warmup_remaining = 0; std::vector cached_steps; int continuous_cached_steps = 0; float accumulated_residual_diff = 0.0f; std::vector double_block_cache; std::vector single_block_cache; std::vector Fn_residual_img; std::vector Fn_residual_txt; std::vector prev_Fn_residual_img; std::vector prev_Fn_residual_txt; bool has_prev_Fn_residual = false; std::vector Bn_buffer_img; std::vector Bn_buffer_txt; std::vector Bn_buffer; bool has_Bn_buffer = false; TaylorSeerState taylor_state; bool can_cache_this_step = false; bool is_caching_this_step = false; int total_blocks_computed = 0; int total_blocks_cached = 0; void init(const CacheDitConfig& cfg, int num_double_blocks, int num_single_blocks, size_t h_size) { config = cfg; total_double_blocks = num_double_blocks; total_single_blocks = num_single_blocks; hidden_size = h_size; initialized = cfg.dbcache.enabled || cfg.taylorseer.enabled; if (!initialized) return; warmup_remaining = cfg.dbcache.max_warmup_steps; double_block_cache.resize(total_double_blocks); single_block_cache.resize(total_single_blocks); if (cfg.taylorseer.enabled) { taylor_state.init(cfg.taylorseer.n_derivatives, h_size); } reset_runtime(); } void reset_runtime() { current_step = -1; total_steps = 0; warmup_remaining = config.dbcache.max_warmup_steps; cached_steps.clear(); continuous_cached_steps = 0; accumulated_residual_diff = 0.0f; for (auto& entry : double_block_cache) { entry.residual_img.clear(); entry.residual_txt.clear(); entry.prev_img.clear(); entry.prev_txt.clear(); entry.has_prev = false; } for (auto& entry : single_block_cache) { entry.residual.clear(); entry.prev_output.clear(); entry.has_prev = false; } Fn_residual_img.clear(); Fn_residual_txt.clear(); prev_Fn_residual_img.clear(); prev_Fn_residual_txt.clear(); has_prev_Fn_residual = false; Bn_buffer_img.clear(); Bn_buffer_txt.clear(); Bn_buffer.clear(); has_Bn_buffer = false; taylor_state.reset(); can_cache_this_step = false; is_caching_this_step = false; total_blocks_computed = 0; total_blocks_cached = 0; } bool enabled() const { return initialized && (config.dbcache.enabled || config.taylorseer.enabled); } void begin_step(int step_index, float sigma = 0.0f) { if (!enabled()) return; if (step_index == current_step) return; current_step = step_index; total_steps++; bool in_warmup = warmup_remaining > 0; if (in_warmup) { warmup_remaining--; } bool scm_allows_cache = true; if (!config.dbcache.steps_computation_mask.empty()) { if (step_index < static_cast(config.dbcache.steps_computation_mask.size())) { scm_allows_cache = (config.dbcache.steps_computation_mask[step_index] == 0); if (!config.dbcache.scm_policy_dynamic && scm_allows_cache) { can_cache_this_step = true; is_caching_this_step = false; return; } } } bool max_cached_ok = (config.dbcache.max_cached_steps < 0) || (static_cast(cached_steps.size()) < config.dbcache.max_cached_steps); bool max_cont_ok = (config.dbcache.max_continuous_cached_steps < 0) || (continuous_cached_steps < config.dbcache.max_continuous_cached_steps); bool accum_ok = (config.dbcache.max_accumulated_residual_diff < 0.0f) || (accumulated_residual_diff < config.dbcache.max_accumulated_residual_diff); can_cache_this_step = !in_warmup && scm_allows_cache && max_cached_ok && max_cont_ok && accum_ok && has_prev_Fn_residual; is_caching_this_step = false; } void end_step(bool was_cached) { if (was_cached) { cached_steps.push_back(current_step); continuous_cached_steps++; } else { continuous_cached_steps = 0; } } static float calculate_residual_diff(const float* prev, const float* curr, size_t size) { if (size == 0) return 0.0f; float sum_diff = 0.0f; float sum_abs = 0.0f; for (size_t i = 0; i < size; i++) { sum_diff += std::fabs(prev[i] - curr[i]); sum_abs += std::fabs(prev[i]); } return sum_diff / (sum_abs + 1e-6f); } static float calculate_residual_diff(const std::vector& prev, const std::vector& curr) { if (prev.size() != curr.size() || prev.empty()) return 1.0f; return calculate_residual_diff(prev.data(), curr.data(), prev.size()); } int get_double_Fn_blocks() const { return (config.double_Fn_blocks >= 0) ? config.double_Fn_blocks : config.dbcache.Fn_compute_blocks; } int get_double_Bn_blocks() const { return (config.double_Bn_blocks >= 0) ? config.double_Bn_blocks : config.dbcache.Bn_compute_blocks; } int get_single_Fn_blocks() const { return (config.single_Fn_blocks >= 0) ? config.single_Fn_blocks : config.dbcache.Fn_compute_blocks; } int get_single_Bn_blocks() const { return (config.single_Bn_blocks >= 0) ? config.single_Bn_blocks : config.dbcache.Bn_compute_blocks; } bool is_Fn_double_block(int block_idx) const { return block_idx < get_double_Fn_blocks(); } bool is_Bn_double_block(int block_idx) const { int Bn = get_double_Bn_blocks(); return Bn > 0 && block_idx >= (total_double_blocks - Bn); } bool is_Mn_double_block(int block_idx) const { return !is_Fn_double_block(block_idx) && !is_Bn_double_block(block_idx); } bool is_Fn_single_block(int block_idx) const { return block_idx < get_single_Fn_blocks(); } bool is_Bn_single_block(int block_idx) const { int Bn = get_single_Bn_blocks(); return Bn > 0 && block_idx >= (total_single_blocks - Bn); } bool is_Mn_single_block(int block_idx) const { return !is_Fn_single_block(block_idx) && !is_Bn_single_block(block_idx); } void store_Fn_residual(const float* img, const float* txt, size_t img_size, size_t txt_size, const float* input_img, const float* input_txt) { Fn_residual_img.resize(img_size); Fn_residual_txt.resize(txt_size); for (size_t i = 0; i < img_size; i++) { Fn_residual_img[i] = img[i] - input_img[i]; } for (size_t i = 0; i < txt_size; i++) { Fn_residual_txt[i] = txt[i] - input_txt[i]; } } bool check_cache_decision() { if (!can_cache_this_step) { is_caching_this_step = false; return false; } if (!has_prev_Fn_residual || prev_Fn_residual_img.empty()) { is_caching_this_step = false; return false; } float diff_img = calculate_residual_diff(prev_Fn_residual_img, Fn_residual_img); float diff_txt = calculate_residual_diff(prev_Fn_residual_txt, Fn_residual_txt); float diff = (diff_img + diff_txt) / 2.0f; if (diff < config.dbcache.residual_diff_threshold) { is_caching_this_step = true; accumulated_residual_diff += diff; return true; } is_caching_this_step = false; return false; } void update_prev_Fn_residual() { prev_Fn_residual_img = Fn_residual_img; prev_Fn_residual_txt = Fn_residual_txt; has_prev_Fn_residual = !prev_Fn_residual_img.empty(); } void store_double_block_residual(int block_idx, const float* img, const float* txt, size_t img_size, size_t txt_size, const float* prev_img, const float* prev_txt) { if (block_idx < 0 || block_idx >= static_cast(double_block_cache.size())) return; BlockCacheEntry& entry = double_block_cache[block_idx]; entry.residual_img.resize(img_size); entry.residual_txt.resize(txt_size); for (size_t i = 0; i < img_size; i++) { entry.residual_img[i] = img[i] - prev_img[i]; } for (size_t i = 0; i < txt_size; i++) { entry.residual_txt[i] = txt[i] - prev_txt[i]; } entry.prev_img.resize(img_size); entry.prev_txt.resize(txt_size); for (size_t i = 0; i < img_size; i++) { entry.prev_img[i] = img[i]; } for (size_t i = 0; i < txt_size; i++) { entry.prev_txt[i] = txt[i]; } entry.has_prev = true; } void apply_double_block_cache(int block_idx, float* img, float* txt, size_t img_size, size_t txt_size) { if (block_idx < 0 || block_idx >= static_cast(double_block_cache.size())) return; const BlockCacheEntry& entry = double_block_cache[block_idx]; if (entry.residual_img.size() != img_size || entry.residual_txt.size() != txt_size) return; for (size_t i = 0; i < img_size; i++) { img[i] += entry.residual_img[i]; } for (size_t i = 0; i < txt_size; i++) { txt[i] += entry.residual_txt[i]; } total_blocks_cached++; } void store_single_block_residual(int block_idx, const float* output, size_t size, const float* input) { if (block_idx < 0 || block_idx >= static_cast(single_block_cache.size())) return; BlockCacheEntry& entry = single_block_cache[block_idx]; entry.residual.resize(size); for (size_t i = 0; i < size; i++) { entry.residual[i] = output[i] - input[i]; } entry.prev_output.resize(size); for (size_t i = 0; i < size; i++) { entry.prev_output[i] = output[i]; } entry.has_prev = true; } void apply_single_block_cache(int block_idx, float* output, size_t size) { if (block_idx < 0 || block_idx >= static_cast(single_block_cache.size())) return; const BlockCacheEntry& entry = single_block_cache[block_idx]; if (entry.residual.size() != size) return; for (size_t i = 0; i < size; i++) { output[i] += entry.residual[i]; } total_blocks_cached++; } void store_Bn_buffer(const float* img, const float* txt, size_t img_size, size_t txt_size, const float* Bn_start_img, const float* Bn_start_txt) { Bn_buffer_img.resize(img_size); Bn_buffer_txt.resize(txt_size); for (size_t i = 0; i < img_size; i++) { Bn_buffer_img[i] = img[i] - Bn_start_img[i]; } for (size_t i = 0; i < txt_size; i++) { Bn_buffer_txt[i] = txt[i] - Bn_start_txt[i]; } has_Bn_buffer = true; } void apply_Bn_buffer(float* img, float* txt, size_t img_size, size_t txt_size) { if (!has_Bn_buffer) return; if (Bn_buffer_img.size() != img_size || Bn_buffer_txt.size() != txt_size) return; for (size_t i = 0; i < img_size; i++) { img[i] += Bn_buffer_img[i]; } for (size_t i = 0; i < txt_size; i++) { txt[i] += Bn_buffer_txt[i]; } } void taylor_update(const float* hidden_state, size_t size) { if (!config.taylorseer.enabled) return; taylor_state.update_derivatives(hidden_state, size, current_step); } bool taylor_can_approximate() const { return config.taylorseer.enabled && taylor_state.can_approximate(); } void taylor_approximate(float* output, size_t size) { if (!config.taylorseer.enabled) return; taylor_state.approximate(output, size, current_step); } bool should_use_taylor_this_step() const { if (!config.taylorseer.enabled) return false; if (current_step < config.taylorseer.max_warmup_steps) return false; int interval = config.taylorseer.skip_interval_steps; if (interval <= 0) interval = 1; return (current_step % (interval + 1)) != 0; } void log_metrics() const { if (!enabled()) return; int total_blocks = total_blocks_computed + total_blocks_cached; float cache_ratio = (total_blocks > 0) ? (static_cast(total_blocks_cached) / total_blocks * 100.0f) : 0.0f; float step_cache_ratio = (total_steps > 0) ? (static_cast(cached_steps.size()) / total_steps * 100.0f) : 0.0f; LOG_INFO("CacheDIT: steps_cached=%zu/%d (%.1f%%), blocks_cached=%d/%d (%.1f%%), accum_diff=%.4f", cached_steps.size(), total_steps, step_cache_ratio, total_blocks_cached, total_blocks, cache_ratio, accumulated_residual_diff); } std::string get_summary() const { char buf[256]; snprintf(buf, sizeof(buf), "CacheDIT[thresh=%.2f]: cached %zu/%d steps, %d/%d blocks", config.dbcache.residual_diff_threshold, cached_steps.size(), total_steps, total_blocks_cached, total_blocks_computed + total_blocks_cached); return std::string(buf); } }; inline std::vector parse_scm_mask(const std::string& mask_str) { std::vector mask; if (mask_str.empty()) return mask; size_t pos = 0; size_t start = 0; while ((pos = mask_str.find(',', start)) != std::string::npos) { std::string token = mask_str.substr(start, pos - start); mask.push_back(std::stoi(token)); start = pos + 1; } if (start < mask_str.length()) { mask.push_back(std::stoi(mask_str.substr(start))); } return mask; } inline std::vector generate_scm_mask( const std::vector& compute_bins, const std::vector& cache_bins, int total_steps) { std::vector mask; size_t c_idx = 0, cache_idx = 0; while (static_cast(mask.size()) < total_steps) { if (c_idx < compute_bins.size()) { for (int i = 0; i < compute_bins[c_idx] && static_cast(mask.size()) < total_steps; i++) { mask.push_back(1); } c_idx++; } if (cache_idx < cache_bins.size()) { for (int i = 0; i < cache_bins[cache_idx] && static_cast(mask.size()) < total_steps; i++) { mask.push_back(0); } cache_idx++; } if (c_idx >= compute_bins.size() && cache_idx >= cache_bins.size()) break; } if (!mask.empty()) { mask.back() = 1; } return mask; } inline std::vector get_scm_preset(const std::string& preset, int total_steps) { struct Preset { std::vector compute_bins; std::vector cache_bins; }; Preset slow = {{8, 3, 3, 2, 1, 1}, {1, 2, 2, 2, 3}}; Preset medium = {{6, 2, 2, 2, 2, 1}, {1, 3, 3, 3, 3}}; Preset fast = {{6, 1, 1, 1, 1, 1}, {1, 3, 4, 5, 4}}; Preset ultra = {{4, 1, 1, 1, 1}, {2, 5, 6, 7}}; Preset* p = nullptr; if (preset == "slow" || preset == "s" || preset == "S") p = &slow; else if (preset == "medium" || preset == "m" || preset == "M") p = &medium; else if (preset == "fast" || preset == "f" || preset == "F") p = &fast; else if (preset == "ultra" || preset == "u" || preset == "U") p = &ultra; else return {}; if (total_steps != 28 && total_steps > 0) { float scale = static_cast(total_steps) / 28.0f; std::vector scaled_compute, scaled_cache; for (int v : p->compute_bins) { scaled_compute.push_back(std::max(1, static_cast(v * scale + 0.5f))); } for (int v : p->cache_bins) { scaled_cache.push_back(std::max(1, static_cast(v * scale + 0.5f))); } return generate_scm_mask(scaled_compute, scaled_cache, total_steps); } return generate_scm_mask(p->compute_bins, p->cache_bins, total_steps); } inline float get_preset_threshold(const std::string& preset) { if (preset == "slow" || preset == "s" || preset == "S") return 0.20f; if (preset == "medium" || preset == "m" || preset == "M") return 0.25f; if (preset == "fast" || preset == "f" || preset == "F") return 0.30f; if (preset == "ultra" || preset == "u" || preset == "U") return 0.34f; return 0.08f; } inline int get_preset_warmup(const std::string& preset) { if (preset == "slow" || preset == "s" || preset == "S") return 8; if (preset == "medium" || preset == "m" || preset == "M") return 6; if (preset == "fast" || preset == "f" || preset == "F") return 6; if (preset == "ultra" || preset == "u" || preset == "U") return 4; return 8; } inline int get_preset_Fn(const std::string& preset) { if (preset == "slow" || preset == "s" || preset == "S") return 8; if (preset == "medium" || preset == "m" || preset == "M") return 8; if (preset == "fast" || preset == "f" || preset == "F") return 6; if (preset == "ultra" || preset == "u" || preset == "U") return 4; return 8; } inline int get_preset_Bn(const std::string& preset) { (void)preset; return 0; } inline void parse_dbcache_options(const std::string& opts, DBCacheConfig& cfg) { if (opts.empty()) return; int Fn = 8, Bn = 0, warmup = 8, max_cached = -1, max_cont = -1; float thresh = 0.08f; sscanf(opts.c_str(), "%d,%d,%f,%d,%d,%d", &Fn, &Bn, &thresh, &warmup, &max_cached, &max_cont); cfg.Fn_compute_blocks = Fn; cfg.Bn_compute_blocks = Bn; cfg.residual_diff_threshold = thresh; cfg.max_warmup_steps = warmup; cfg.max_cached_steps = max_cached; cfg.max_continuous_cached_steps = max_cont; } inline void parse_taylorseer_options(const std::string& opts, TaylorSeerConfig& cfg) { if (opts.empty()) return; int n_deriv = 1, warmup = 2, interval = 1; sscanf(opts.c_str(), "%d,%d,%d", &n_deriv, &warmup, &interval); cfg.n_derivatives = n_deriv; cfg.max_warmup_steps = warmup; cfg.skip_interval_steps = interval; } struct CacheDitConditionState { DBCacheConfig config; TaylorSeerConfig taylor_config; bool initialized = false; int current_step_index = -1; bool step_active = false; bool skip_current_step = false; bool initial_step = true; int warmup_remaining = 0; std::vector cached_steps; int continuous_cached_steps = 0; float accumulated_residual_diff = 0.0f; int total_steps_skipped = 0; const void* anchor_condition = nullptr; struct CacheEntry { std::vector diff; std::vector prev_input; std::vector prev_output; bool has_prev = false; }; std::unordered_map cache_diffs; TaylorSeerState taylor_state; float start_sigma = std::numeric_limits::max(); float end_sigma = 0.0f; void reset_runtime() { current_step_index = -1; step_active = false; skip_current_step = false; initial_step = true; warmup_remaining = config.max_warmup_steps; cached_steps.clear(); continuous_cached_steps = 0; accumulated_residual_diff = 0.0f; total_steps_skipped = 0; anchor_condition = nullptr; cache_diffs.clear(); taylor_state.reset(); } void init(const DBCacheConfig& dbcfg, const TaylorSeerConfig& tcfg) { config = dbcfg; taylor_config = tcfg; initialized = dbcfg.enabled || tcfg.enabled; reset_runtime(); if (taylor_config.enabled) { taylor_state.init(taylor_config.n_derivatives, 0); } } void set_sigmas(const std::vector& sigmas) { if (!initialized || sigmas.size() < 2) return; float start_percent = 0.15f; float end_percent = 0.95f; size_t n_steps = sigmas.size() - 1; size_t start_step = static_cast(start_percent * n_steps); size_t end_step = static_cast(end_percent * n_steps); if (start_step >= n_steps) start_step = n_steps - 1; if (end_step >= n_steps) end_step = n_steps - 1; start_sigma = sigmas[start_step]; end_sigma = sigmas[end_step]; if (start_sigma < end_sigma) { std::swap(start_sigma, end_sigma); } } bool enabled() const { return initialized && (config.enabled || taylor_config.enabled); } void begin_step(int step_index, float sigma) { if (!enabled()) return; if (step_index == current_step_index) return; current_step_index = step_index; skip_current_step = false; step_active = false; if (sigma > start_sigma) return; if (!(sigma > end_sigma)) return; step_active = true; if (warmup_remaining > 0) { warmup_remaining--; return; } if (!config.steps_computation_mask.empty()) { if (step_index < static_cast(config.steps_computation_mask.size())) { if (config.steps_computation_mask[step_index] == 1) { return; } } } if (config.max_cached_steps >= 0 && static_cast(cached_steps.size()) >= config.max_cached_steps) { return; } if (config.max_continuous_cached_steps >= 0 && continuous_cached_steps >= config.max_continuous_cached_steps) { return; } } bool step_is_active() const { return enabled() && step_active; } bool is_step_skipped() const { return enabled() && step_active && skip_current_step; } bool has_cache(const void* cond) const { auto it = cache_diffs.find(cond); return it != cache_diffs.end() && !it->second.diff.empty(); } void update_cache(const void* cond, const float* input, const float* output, size_t size) { CacheEntry& entry = cache_diffs[cond]; entry.diff.resize(size); for (size_t i = 0; i < size; i++) { entry.diff[i] = output[i] - input[i]; } entry.prev_input.resize(size); entry.prev_output.resize(size); for (size_t i = 0; i < size; i++) { entry.prev_input[i] = input[i]; entry.prev_output[i] = output[i]; } entry.has_prev = true; } void apply_cache(const void* cond, const float* input, float* output, size_t size) { auto it = cache_diffs.find(cond); if (it == cache_diffs.end() || it->second.diff.empty()) return; if (it->second.diff.size() != size) return; for (size_t i = 0; i < size; i++) { output[i] = input[i] + it->second.diff[i]; } } bool before_condition(const void* cond, struct ggml_tensor* input, struct ggml_tensor* output, float sigma, int step_index) { if (!enabled() || step_index < 0) return false; if (step_index != current_step_index) { begin_step(step_index, sigma); } if (!step_active) return false; if (initial_step) { anchor_condition = cond; initial_step = false; } bool is_anchor = (cond == anchor_condition); if (skip_current_step) { if (has_cache(cond)) { apply_cache(cond, (float*)input->data, (float*)output->data, static_cast(ggml_nelements(output))); return true; } return false; } if (!is_anchor) return false; auto it = cache_diffs.find(cond); if (it == cache_diffs.end() || !it->second.has_prev) return false; size_t ne = static_cast(ggml_nelements(input)); if (it->second.prev_input.size() != ne) return false; float* input_data = (float*)input->data; float diff = CacheDitState::calculate_residual_diff( it->second.prev_input.data(), input_data, ne); float effective_threshold = config.residual_diff_threshold; if (config.Fn_compute_blocks > 0) { float fn_confidence = 1.0f + 0.02f * (config.Fn_compute_blocks - 8); fn_confidence = std::max(0.5f, std::min(2.0f, fn_confidence)); effective_threshold *= fn_confidence; } if (config.Bn_compute_blocks > 0) { float bn_quality = 1.0f - 0.03f * config.Bn_compute_blocks; bn_quality = std::max(0.5f, std::min(1.0f, bn_quality)); effective_threshold *= bn_quality; } if (diff < effective_threshold) { skip_current_step = true; total_steps_skipped++; cached_steps.push_back(current_step_index); continuous_cached_steps++; accumulated_residual_diff += diff; apply_cache(cond, input_data, (float*)output->data, ne); return true; } continuous_cached_steps = 0; return false; } void after_condition(const void* cond, struct ggml_tensor* input, struct ggml_tensor* output) { if (!step_is_active()) return; size_t ne = static_cast(ggml_nelements(output)); update_cache(cond, (float*)input->data, (float*)output->data, ne); if (cond == anchor_condition && taylor_config.enabled) { taylor_state.update_derivatives((float*)output->data, ne, current_step_index); } } void log_metrics() const { if (!enabled()) return; LOG_INFO("CacheDIT: steps_skipped=%d/%d (%.1f%%), accum_residual_diff=%.4f", total_steps_skipped, current_step_index + 1, (current_step_index > 0) ? (100.0f * total_steps_skipped / (current_step_index + 1)) : 0.0f, accumulated_residual_diff); } }; #endif