mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-10 00:06:38 +00:00
107 lines
4.3 KiB
C++
107 lines
4.3 KiB
C++
#ifndef __SD_GGML_GRAPH_CUT_H__
|
|
#define __SD_GGML_GRAPH_CUT_H__
|
|
|
|
#include <array>
|
|
#include <string>
|
|
#include <unordered_set>
|
|
#include <vector>
|
|
|
|
#include "ggml-backend.h"
|
|
#include "ggml.h"
|
|
|
|
namespace sd::ggml_graph_cut {
|
|
|
|
struct Segment {
|
|
enum InputType {
|
|
INPUT_EXTERNAL = 0,
|
|
INPUT_PREVIOUS_CUT,
|
|
INPUT_PARAM,
|
|
};
|
|
|
|
struct InputRef {
|
|
InputType type = INPUT_EXTERNAL;
|
|
std::string display_name;
|
|
int leaf_index = -1;
|
|
int node_index = -1;
|
|
};
|
|
|
|
size_t compute_buffer_size = 0;
|
|
size_t output_bytes = 0;
|
|
size_t input_external_bytes = 0;
|
|
size_t input_previous_cut_bytes = 0;
|
|
size_t input_param_bytes = 0;
|
|
std::string group_name;
|
|
std::vector<int> internal_node_indices;
|
|
std::vector<int> output_node_indices;
|
|
std::vector<InputRef> input_refs;
|
|
};
|
|
|
|
struct Plan {
|
|
struct InputShape {
|
|
int leaf_index = -1;
|
|
ggml_type type = GGML_TYPE_COUNT;
|
|
std::array<int64_t, GGML_MAX_DIMS> ne = {0, 0, 0, 0};
|
|
};
|
|
|
|
bool available = false;
|
|
bool has_cuts = false;
|
|
bool valid = true;
|
|
int n_nodes = 0;
|
|
int n_leafs = 0;
|
|
std::vector<InputShape> input_shapes;
|
|
std::vector<Segment> segments;
|
|
};
|
|
|
|
struct PlanCache {
|
|
Plan graph_cut_plan;
|
|
Plan budgeted_graph_cut_plan;
|
|
size_t budgeted_graph_cut_plan_max_vram_bytes = 0;
|
|
};
|
|
|
|
static constexpr const char* GGML_RUNNER_CUT_PREFIX = "ggml_runner_cut:";
|
|
|
|
bool is_graph_cut_tensor(const ggml_tensor* tensor);
|
|
std::string make_graph_cut_name(const std::string& group, const std::string& output);
|
|
void mark_graph_cut(ggml_tensor* tensor, const std::string& group, const std::string& output);
|
|
int leaf_count(ggml_cgraph* gf);
|
|
ggml_tensor* leaf_tensor(ggml_cgraph* gf, int leaf_index);
|
|
ggml_backend_buffer_t tensor_buffer(const ggml_tensor* tensor);
|
|
ggml_tensor* cache_source_tensor(ggml_tensor* tensor);
|
|
size_t cache_tensor_bytes(const ggml_tensor* tensor);
|
|
bool plan_matches_graph(ggml_cgraph* gf, const Plan& plan);
|
|
ggml_tensor* output_tensor(ggml_cgraph* gf, const Segment& segment, size_t output_index);
|
|
ggml_tensor* input_tensor(ggml_cgraph* gf, const Segment::InputRef& input_ref);
|
|
std::vector<ggml_tensor*> param_tensors(ggml_cgraph* gf, const Segment& segment);
|
|
std::vector<ggml_tensor*> runtime_param_tensors(ggml_cgraph* gf, const Segment& segment, const char* log_desc);
|
|
std::unordered_set<std::string> collect_future_input_names(ggml_cgraph* gf,
|
|
const Plan& plan,
|
|
size_t current_segment_index);
|
|
ggml_cgraph* build_segment_graph(ggml_cgraph* gf,
|
|
const Segment& segment,
|
|
ggml_context** graph_ctx_out);
|
|
size_t measure_segment_compute_buffer(ggml_backend_t backend,
|
|
ggml_cgraph* gf,
|
|
const Segment& segment,
|
|
const char* log_desc);
|
|
size_t max_vram_gib_to_bytes(float max_vram);
|
|
float resolve_max_vram_gib(float max_vram, ggml_backend_t backend);
|
|
Plan build_plan(ggml_backend_t backend,
|
|
ggml_cgraph* gf,
|
|
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
|
|
const char* log_desc);
|
|
Plan apply_max_vram_budget(ggml_cgraph* gf,
|
|
const Plan& base_plan,
|
|
size_t max_graph_vram_bytes,
|
|
ggml_backend_t backend,
|
|
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
|
|
const char* log_desc);
|
|
Plan resolve_plan(ggml_backend_t backend,
|
|
ggml_cgraph* gf,
|
|
PlanCache* cache,
|
|
size_t max_graph_vram_bytes,
|
|
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
|
|
const char* log_desc);
|
|
} // namespace sd::ggml_graph_cut
|
|
|
|
#endif
|