mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-10 00:06:38 +00:00
433 lines
18 KiB
C++
433 lines
18 KiB
C++
#include <algorithm>
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include "ggml.h"
|
|
#include "tensor.hpp"
|
|
|
|
const float ltxav_latent_rgb_proj[128][3] = {
|
|
{-0.0293802f, -0.0362516f, -0.0291386f},
|
|
{0.0117735f, 0.0223435f, 0.018856f},
|
|
{0.00922335f, 0.0145666f, 0.0038772f},
|
|
{0.0227299f, 0.0109122f, 0.0131384f},
|
|
{0.00192413f, 0.0024648f, 0.00689245f},
|
|
{-0.0105576f, -0.0135933f, -0.00873841f},
|
|
{-0.0310222f, -0.0396358f, -0.0408445f},
|
|
{0.0149737f, 0.0316323f, 0.03415f},
|
|
{0.0027752f, 0.00814889f, 0.0108575f},
|
|
{-0.000678017f, -0.00180589f, -0.0161684f},
|
|
{0.0153964f, 0.0159774f, 0.0186479f},
|
|
{-0.0222799f, -0.0202068f, -0.0181082f},
|
|
{0.0128696f, 0.00754416f, -0.00673279f},
|
|
{0.0142729f, 0.00448099f, -0.00193934f},
|
|
{-0.014066f, -0.0193755f, -0.0160104f},
|
|
{-0.0176785f, -0.015903f, -0.0152621f},
|
|
{0.0307381f, 0.0292082f, 0.0328668f},
|
|
{0.0332928f, 0.0368629f, 0.0440893f},
|
|
{0.0186304f, 0.0124069f, 0.0160734f},
|
|
{0.00477787f, -0.00315658f, -0.000145702f},
|
|
{0.0183099f, 0.0122593f, 0.00599732f},
|
|
{-0.0194551f, -0.0183924f, -0.0147465f},
|
|
{0.0025732f, 0.00442582f, 0.0173176f},
|
|
{-0.0169423f, -0.0293863f, -0.0225908f},
|
|
{-0.021228f, -0.0265094f, -0.0253049f},
|
|
{0.0327111f, 0.0187133f, 0.0266184f},
|
|
{-0.0226425f, -0.0313781f, -0.0414356f},
|
|
{-0.0163142f, -0.0146144f, -0.0171793f},
|
|
{0.0192183f, 0.0108411f, 0.00829186f},
|
|
{-0.032246f, -0.0274846f, -0.0287434f},
|
|
{0.00345399f, 0.0115567f, 0.015288f},
|
|
{0.000972292f, 0.00331303f, 0.0110501f},
|
|
{0.000939494f, -0.00705084f, -0.00979449f},
|
|
{0.0405155f, 0.0339534f, 0.0419513f},
|
|
{0.0198596f, 0.0186626f, 0.0213766f},
|
|
{-0.00982375f, -0.00880439f, -0.00470429f},
|
|
{-0.0313707f, -0.0258098f, -0.0211663f},
|
|
{0.0144159f, 0.0117896f, 0.0141573f},
|
|
{0.0164571f, 0.0149178f, 0.00921599f},
|
|
{0.0436184f, 0.0346583f, 0.0360647f},
|
|
{-0.00289744f, -0.000752502f, 0.000675415f},
|
|
{-0.00621715f, -0.000558851f, 0.0135814f},
|
|
{-0.00817579f, -0.0113584f, -0.00556793f},
|
|
{0.00965067f, 0.0178221f, 0.015821f},
|
|
{0.0211832f, 0.0180827f, 0.0154707f},
|
|
{-0.00412858f, -0.00374182f, 0.0029568f},
|
|
{-0.0175603f, -0.0226242f, -0.0279012f},
|
|
{-0.00437471f, -0.00668329f, 0.000164887f},
|
|
{-0.0355983f, -0.0419093f, -0.0383065f},
|
|
{0.0144314f, 0.0192514f, 0.0175639f},
|
|
{-0.0130693f, -0.00569884f, -0.00341647f},
|
|
{-0.00184689f, 0.00189034f, -0.00190561f},
|
|
{0.019457f, 0.00842282f, 0.0123738f},
|
|
{-0.00477146f, -0.00206932f, 0.00283336f},
|
|
{-0.0364544f, -0.0256141f, -0.0322336f},
|
|
{-0.0295634f, -0.0295048f, -0.021057f},
|
|
{0.0144484f, 0.0191862f, 0.0112445f},
|
|
{0.0536406f, 0.0582376f, 0.0570966f},
|
|
{0.0085178f, 0.00748455f, 0.00995162f},
|
|
{-0.0136637f, -0.0172914f, -0.0195978f},
|
|
{-0.0339128f, -0.0392692f, -0.0355216f},
|
|
{0.00612855f, 0.00568303f, -0.00212333f},
|
|
{-0.0029225f, 0.00668819f, 0.0122131f},
|
|
{0.00841843f, 0.000181587f, -0.00650644f},
|
|
{-0.00514432f, 0.0127043f, 0.0168049f},
|
|
{-0.00997384f, -0.00602262f, -0.0164031f},
|
|
{0.0233226f, 0.033254f, 0.0307266f},
|
|
{-0.0110201f, -0.0164169f, -0.0161829f},
|
|
{-0.0195952f, -0.0177943f, -0.0115377f},
|
|
{-0.00523918f, -0.00452043f, 0.00267397f},
|
|
{0.0313464f, 0.0288241f, 0.0262496f},
|
|
{0.0324018f, 0.0339792f, 0.0312209f},
|
|
{-0.0163247f, -0.0230503f, -0.0263239f},
|
|
{0.000420577f, -0.00535659f, -0.00663426f},
|
|
{-0.012897f, -0.00203767f, -0.000622678f},
|
|
{-0.0632956f, -0.0651325f, -0.0584479f},
|
|
{-0.00426634f, -0.0150098f, -0.00719348f},
|
|
{0.00476109f, 0.00674315f, 0.00895472f},
|
|
{0.0129384f, 0.0158352f, 0.00963773f},
|
|
{-0.0333379f, -0.0410522f, -0.0317462f},
|
|
{0.00344054f, 0.00275915f, 0.00355732f},
|
|
{0.0209062f, 0.0273453f, 0.0222967f},
|
|
{0.00827287f, 0.00223045f, 0.00325844f},
|
|
{-0.0149132f, -0.0183973f, -0.0199781f},
|
|
{-0.0100786f, -0.0103681f, -0.00218224f},
|
|
{-0.00791409f, -0.00405153f, -0.00599893f},
|
|
{0.0176126f, 0.00618342f, -6.6569e-05f},
|
|
{0.00942486f, -0.00206494f, -0.00580324f},
|
|
{0.00678093f, -0.00291742f, -0.000921195f},
|
|
{-0.0221992f, -0.00483162f, -0.000848514f},
|
|
{-0.0151587f, -0.0157166f, -0.0107302f},
|
|
{0.00909646f, 0.0171985f, 0.0169785f},
|
|
{0.0127224f, 0.0170612f, 0.0303428f},
|
|
{0.0196562f, 0.00212451f, 0.0127744f},
|
|
{0.0233013f, 0.0228994f, 0.0108387f},
|
|
{0.00520761f, 0.00992992f, 0.0066267f},
|
|
{-3.77736e-05f, 0.00460229f, -0.00475132f},
|
|
{-0.0311763f, -0.0453566f, -0.0486901f},
|
|
{0.0195798f, 0.0281246f, 0.0180102f},
|
|
{-0.0174149f, -0.0240867f, -0.0188785f},
|
|
{0.000104658f, 0.00659008f, 0.0144594f},
|
|
{-0.00311086f, -0.0241426f, -0.0244164f},
|
|
{0.0336462f, 0.0305173f, 0.0331101f},
|
|
{0.0613625f, 0.066561f, 0.0610198f},
|
|
{-0.0286757f, -0.0325401f, -0.0338036f},
|
|
{0.0141534f, 0.0188266f, 0.0253059f},
|
|
{-0.00548197f, -0.00170198f, 0.00561745f},
|
|
{-0.0117872f, -0.00763218f, -0.0145037f},
|
|
{-0.0253304f, -0.0245217f, -0.0144905f},
|
|
{-0.00393624f, 0.00350048f, 0.00765561f},
|
|
{0.0113625f, 0.00561576f, -0.0113672f},
|
|
{-0.0301278f, -0.0261472f, -0.0301903f},
|
|
{0.016863f, 0.0173781f, 0.0170916f},
|
|
{-0.00495108f, 0.00686749f, 0.00282767f},
|
|
{0.00125409f, -0.00378072f, -0.00264117f},
|
|
{-0.00264001f, -0.00529772f, -0.0113109f},
|
|
{-0.054888f, -0.0575461f, -0.0509146f},
|
|
{-0.019442f, -0.0232916f, -0.0258637f},
|
|
{0.0133362f, 0.0161808f, 0.00917951f},
|
|
{-0.0349002f, -0.0372642f, -0.0466206f},
|
|
{-0.00216926f, 0.00208738f, 0.00766492f},
|
|
{0.0268528f, 0.0301179f, 0.0228579f},
|
|
{0.0226176f, 0.021536f, 0.023152f},
|
|
{-0.0110646f, -0.00511349f, -0.0137346f},
|
|
{-0.0098424f, -0.00218176f, 0.00414545f},
|
|
{0.00200216f, 0.00441732f, -0.0136515f},
|
|
{0.00695946f, 0.00313109f, -0.00379435f},
|
|
{0.0188377f, 0.0144059f, 0.0229724f},
|
|
};
|
|
float ltxav_latent_rgb_bias[3] = {0.043849f, 0.0201085f, 0.0150286f};
|
|
|
|
const float wan_21_latent_rgb_proj[16][3] = {
|
|
{0.015123f, -0.148418f, 0.479828f},
|
|
{0.003652f, -0.010680f, -0.037142f},
|
|
{0.212264f, 0.063033f, 0.016779f},
|
|
{0.232999f, 0.406476f, 0.220125f},
|
|
{-0.051864f, -0.082384f, -0.069396f},
|
|
{0.085005f, -0.161492f, 0.010689f},
|
|
{-0.245369f, -0.506846f, -0.117010f},
|
|
{-0.151145f, 0.017721f, 0.007207f},
|
|
{-0.293239f, -0.207936f, -0.421135f},
|
|
{-0.187721f, 0.050783f, 0.177649f},
|
|
{-0.013067f, 0.265964f, 0.166578f},
|
|
{0.028327f, 0.109329f, 0.108642f},
|
|
{-0.205343f, 0.043991f, 0.148914f},
|
|
{0.014307f, -0.048647f, -0.007219f},
|
|
{0.217150f, 0.053074f, 0.319923f},
|
|
{0.155357f, 0.083156f, 0.064780f}};
|
|
float wan_21_latent_rgb_bias[3] = {-0.270270f, -0.234976f, -0.456853f};
|
|
|
|
const float wan_22_latent_rgb_proj[48][3] = {
|
|
{0.017126f, -0.027230f, -0.019257f},
|
|
{-0.113739f, -0.028715f, -0.022885f},
|
|
{-0.000106f, 0.021494f, 0.004629f},
|
|
{-0.013273f, -0.107137f, -0.033638f},
|
|
{-0.000381f, 0.000279f, 0.025877f},
|
|
{-0.014216f, -0.003975f, 0.040528f},
|
|
{0.001638f, -0.000748f, 0.011022f},
|
|
{0.029238f, -0.006697f, 0.035933f},
|
|
{0.021641f, -0.015874f, 0.040531f},
|
|
{-0.101984f, -0.070160f, -0.028855f},
|
|
{0.033207f, -0.021068f, 0.002663f},
|
|
{-0.104711f, 0.121673f, 0.102981f},
|
|
{0.082647f, -0.004991f, 0.057237f},
|
|
{-0.027375f, 0.031581f, 0.006868f},
|
|
{-0.045434f, 0.029444f, 0.019287f},
|
|
{-0.046572f, -0.012537f, 0.006675f},
|
|
{0.074709f, 0.033690f, 0.025289f},
|
|
{-0.008251f, -0.002745f, -0.006999f},
|
|
{0.012685f, -0.061856f, -0.048658f},
|
|
{0.042304f, -0.007039f, 0.000295f},
|
|
{-0.007644f, -0.060843f, -0.033142f},
|
|
{0.159909f, 0.045628f, 0.367541f},
|
|
{0.095171f, 0.086438f, 0.010271f},
|
|
{0.006812f, 0.019643f, 0.029637f},
|
|
{0.003467f, -0.010705f, 0.014252f},
|
|
{-0.099681f, -0.066272f, -0.006243f},
|
|
{0.047357f, 0.037040f, 0.000185f},
|
|
{-0.041797f, -0.089225f, -0.032257f},
|
|
{0.008928f, 0.017028f, 0.018684f},
|
|
{-0.042255f, 0.016045f, 0.006849f},
|
|
{0.011268f, 0.036462f, 0.037387f},
|
|
{0.011553f, -0.016375f, -0.048589f},
|
|
{0.046266f, -0.027189f, 0.056979f},
|
|
{0.009640f, -0.017576f, 0.030324f},
|
|
{-0.045794f, -0.036083f, -0.010616f},
|
|
{0.022418f, 0.039783f, -0.032939f},
|
|
{-0.052714f, -0.015525f, 0.007438f},
|
|
{0.193004f, 0.223541f, 0.264175f},
|
|
{-0.059406f, -0.008188f, 0.022867f},
|
|
{-0.156742f, -0.263791f, -0.007385f},
|
|
{-0.015717f, 0.016570f, 0.033969f},
|
|
{0.037969f, 0.109835f, 0.200449f},
|
|
{-0.000782f, -0.009566f, -0.008058f},
|
|
{0.010709f, 0.052960f, -0.044195f},
|
|
{0.017271f, 0.045839f, 0.034569f},
|
|
{0.009424f, 0.013088f, -0.001714f},
|
|
{-0.024805f, -0.059378f, -0.033756f},
|
|
{-0.078293f, 0.029070f, 0.026129f}};
|
|
float wan_22_latent_rgb_bias[3] = {0.013160f, -0.096492f, -0.071323f};
|
|
|
|
const float flux_latent_rgb_proj[16][3] = {
|
|
{-0.041168f, 0.019917f, 0.097253f},
|
|
{0.028096f, 0.026730f, 0.129576f},
|
|
{0.065618f, -0.067950f, -0.014651f},
|
|
{-0.012998f, -0.014762f, 0.081251f},
|
|
{0.078567f, 0.059296f, -0.024687f},
|
|
{-0.015987f, -0.003697f, 0.005012f},
|
|
{0.033605f, 0.138999f, 0.068517f},
|
|
{-0.024450f, -0.063567f, -0.030101f},
|
|
{-0.040194f, -0.016710f, 0.127185f},
|
|
{0.112681f, 0.088764f, -0.041940f},
|
|
{-0.023498f, 0.093664f, 0.025543f},
|
|
{0.082899f, 0.048320f, 0.007491f},
|
|
{0.075712f, 0.074139f, 0.081965f},
|
|
{-0.143501f, 0.018263f, -0.136138f},
|
|
{-0.025767f, -0.082035f, -0.040023f},
|
|
{-0.111849f, -0.055589f, -0.032361f}};
|
|
float flux_latent_rgb_bias[3] = {0.024600f, -0.006937f, -0.008089f};
|
|
|
|
const float flux2_latent_rgb_proj[32][3] = {
|
|
{0.000736f, -0.008385f, -0.019710f},
|
|
{-0.001352f, -0.016392f, 0.020693f},
|
|
{-0.006376f, 0.002428f, 0.036736f},
|
|
{0.039384f, 0.074167f, 0.119789f},
|
|
{0.007464f, -0.005705f, -0.004734f},
|
|
{-0.004086f, 0.005287f, -0.000409f},
|
|
{-0.032835f, 0.050802f, -0.028120f},
|
|
{-0.003158f, -0.000835f, 0.000406f},
|
|
{-0.112840f, -0.084337f, -0.023083f},
|
|
{0.001462f, -0.006656f, 0.000549f},
|
|
{-0.009980f, -0.007480f, 0.009702f},
|
|
{0.032540f, 0.000214f, -0.061388f},
|
|
{0.011023f, 0.000694f, 0.007143f},
|
|
{-0.001468f, -0.006723f, -0.001678f},
|
|
{-0.005921f, -0.010320f, -0.003907f},
|
|
{-0.028434f, 0.027584f, 0.018457f},
|
|
{0.014349f, 0.011523f, 0.000441f},
|
|
{0.009874f, 0.003081f, 0.001507f},
|
|
{0.002218f, 0.005712f, 0.001563f},
|
|
{0.053010f, -0.019844f, 0.008683f},
|
|
{-0.002507f, 0.005384f, 0.000938f},
|
|
{-0.002177f, -0.011366f, 0.003559f},
|
|
{-0.000261f, 0.015121f, -0.003240f},
|
|
{-0.003944f, -0.002083f, 0.005043f},
|
|
{-0.009138f, 0.011336f, 0.003781f},
|
|
{0.011429f, 0.003985f, -0.003855f},
|
|
{0.010518f, -0.005586f, 0.010131f},
|
|
{0.007883f, 0.002912f, -0.001473f},
|
|
{-0.003318f, -0.003160f, 0.003684f},
|
|
{-0.034560f, -0.008740f, 0.012996f},
|
|
{0.000166f, 0.001079f, -0.012153f},
|
|
{0.017772f, 0.000937f, -0.011953f}};
|
|
float flux2_latent_rgb_bias[3] = {-0.028738f, -0.098463f, -0.107619f};
|
|
|
|
// This one was taken straight from
|
|
// https://github.com/Stability-AI/sd3.5/blob/8565799a3b41eb0c7ba976d18375f0f753f56402/sd3_impls.py#L288-L303
|
|
// (MiT Licence)
|
|
const float sd3_latent_rgb_proj[16][3] = {
|
|
{-0.0645f, 0.0177f, 0.1052f},
|
|
{0.0028f, 0.0312f, 0.0650f},
|
|
{0.1848f, 0.0762f, 0.0360f},
|
|
{0.0944f, 0.0360f, 0.0889f},
|
|
{0.0897f, 0.0506f, -0.0364f},
|
|
{-0.0020f, 0.1203f, 0.0284f},
|
|
{0.0855f, 0.0118f, 0.0283f},
|
|
{-0.0539f, 0.0658f, 0.1047f},
|
|
{-0.0057f, 0.0116f, 0.0700f},
|
|
{-0.0412f, 0.0281f, -0.0039f},
|
|
{0.1106f, 0.1171f, 0.1220f},
|
|
{-0.0248f, 0.0682f, -0.0481f},
|
|
{0.0815f, 0.0846f, 0.1207f},
|
|
{-0.0120f, -0.0055f, -0.0867f},
|
|
{-0.0749f, -0.0634f, -0.0456f},
|
|
{-0.1418f, -0.1457f, -0.1259f},
|
|
};
|
|
float sd3_latent_rgb_bias[3] = {0, 0, 0};
|
|
|
|
const float sdxl_latent_rgb_proj[4][3] = {
|
|
{0.258303f, 0.277640f, 0.329699f},
|
|
{-0.299701f, 0.105446f, 0.014194f},
|
|
{0.050522f, 0.186163f, -0.143257f},
|
|
{-0.211938f, -0.149892f, -0.080036f}};
|
|
float sdxl_latent_rgb_bias[3] = {0.144381f, -0.033313f, 0.007061f};
|
|
|
|
const float sd_latent_rgb_proj[4][3] = {
|
|
{0.337366f, 0.216344f, 0.257386f},
|
|
{0.165636f, 0.386828f, 0.046994f},
|
|
{-0.267803f, 0.237036f, 0.223517f},
|
|
{-0.178022f, -0.200862f, -0.678514f}};
|
|
float sd_latent_rgb_bias[3] = {-0.017478f, -0.055834f, -0.105825f};
|
|
|
|
void preview_latent_video(uint8_t* buffer, ggml_tensor* latents, const float (*latent_rgb_proj)[3], const float latent_rgb_bias[3], int patch_size) {
|
|
size_t buffer_head = 0;
|
|
|
|
uint32_t latent_width = static_cast<uint32_t>(latents->ne[0]);
|
|
uint32_t latent_height = static_cast<uint32_t>(latents->ne[1]);
|
|
uint32_t dim = static_cast<uint32_t>(latents->ne[ggml_n_dims(latents) - 1]);
|
|
uint32_t frames = 1;
|
|
if (ggml_n_dims(latents) == 4) {
|
|
frames = static_cast<uint32_t>(latents->ne[2]);
|
|
}
|
|
|
|
uint32_t rgb_width = latent_width * patch_size;
|
|
uint32_t rgb_height = latent_height * patch_size;
|
|
|
|
uint32_t unpatched_dim = dim / (patch_size * patch_size);
|
|
|
|
for (uint32_t k = 0; k < frames; k++) {
|
|
for (uint32_t rgb_x = 0; rgb_x < rgb_width; rgb_x++) {
|
|
for (uint32_t rgb_y = 0; rgb_y < rgb_height; rgb_y++) {
|
|
int latent_x = rgb_x / patch_size;
|
|
int latent_y = rgb_y / patch_size;
|
|
|
|
int channel_offset = 0;
|
|
if (patch_size > 1) {
|
|
channel_offset = ((rgb_y % patch_size) * patch_size + (rgb_x % patch_size));
|
|
}
|
|
|
|
size_t latent_id = (latent_x * latents->nb[0] + latent_y * latents->nb[1] + k * latents->nb[2]);
|
|
|
|
// should be incremented by 1 for each pixel
|
|
size_t pixel_id = k * rgb_width * rgb_height + rgb_y * rgb_width + rgb_x;
|
|
|
|
float r = 0, g = 0, b = 0;
|
|
if (latent_rgb_proj != nullptr) {
|
|
for (uint32_t d = 0; d < unpatched_dim; d++) {
|
|
float value = *(float*)((char*)latents->data + latent_id + (d * patch_size * patch_size + channel_offset) * latents->nb[ggml_n_dims(latents) - 1]);
|
|
r += value * latent_rgb_proj[d][0];
|
|
g += value * latent_rgb_proj[d][1];
|
|
b += value * latent_rgb_proj[d][2];
|
|
}
|
|
} else {
|
|
// interpret first 3 channels as RGB
|
|
r = *(float*)((char*)latents->data + latent_id + 0 * latents->nb[ggml_n_dims(latents) - 1]);
|
|
g = *(float*)((char*)latents->data + latent_id + 1 * latents->nb[ggml_n_dims(latents) - 1]);
|
|
b = *(float*)((char*)latents->data + latent_id + 2 * latents->nb[ggml_n_dims(latents) - 1]);
|
|
}
|
|
if (latent_rgb_bias != nullptr) {
|
|
// bias
|
|
r += latent_rgb_bias[0];
|
|
g += latent_rgb_bias[1];
|
|
b += latent_rgb_bias[2];
|
|
}
|
|
// change range
|
|
r = r * .5f + .5f;
|
|
g = g * .5f + .5f;
|
|
b = b * .5f + .5f;
|
|
|
|
// clamp rgb values to [0,1] range
|
|
r = r >= 0 ? r <= 1 ? r : 1 : 0;
|
|
g = g >= 0 ? g <= 1 ? g : 1 : 0;
|
|
b = b >= 0 ? b <= 1 ? b : 1 : 0;
|
|
|
|
buffer[pixel_id * 3 + 0] = (uint8_t)(r * 255);
|
|
buffer[pixel_id * 3 + 1] = (uint8_t)(g * 255);
|
|
buffer[pixel_id * 3 + 2] = (uint8_t)(b * 255);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline bool preview_latent_tensor_is_video(const sd::Tensor<float>& latents) {
|
|
return latents.dim() == 5;
|
|
}
|
|
|
|
void preview_latent_video(uint8_t* buffer, const sd::Tensor<float>& latents, const float (*latent_rgb_proj)[3], const float latent_rgb_bias[3], int patch_size) {
|
|
uint32_t latent_width = static_cast<uint32_t>(latents.shape()[0]);
|
|
uint32_t latent_height = static_cast<uint32_t>(latents.shape()[1]);
|
|
bool is_video = preview_latent_tensor_is_video(latents);
|
|
uint32_t frames = is_video ? static_cast<uint32_t>(latents.shape()[2]) : 1;
|
|
uint32_t dim = is_video ? static_cast<uint32_t>(latents.shape()[3]) : static_cast<uint32_t>(latents.shape()[2]);
|
|
|
|
uint32_t rgb_width = latent_width * patch_size;
|
|
uint32_t rgb_height = latent_height * patch_size;
|
|
uint32_t unpatched_dim = dim / (patch_size * patch_size);
|
|
|
|
for (uint32_t k = 0; k < frames; k++) {
|
|
for (uint32_t rgb_x = 0; rgb_x < rgb_width; rgb_x++) {
|
|
for (uint32_t rgb_y = 0; rgb_y < rgb_height; rgb_y++) {
|
|
uint32_t latent_x = rgb_x / patch_size;
|
|
uint32_t latent_y = rgb_y / patch_size;
|
|
|
|
uint32_t channel_offset = 0;
|
|
if (patch_size > 1) {
|
|
channel_offset = ((rgb_y % patch_size) * patch_size + (rgb_x % patch_size));
|
|
}
|
|
|
|
size_t pixel_id = k * rgb_width * rgb_height + rgb_y * rgb_width + rgb_x;
|
|
auto latent_value = [&](uint32_t latent_channel) -> float {
|
|
return is_video
|
|
? latents.values()[latent_x + latent_width * (latent_y + latent_height * (k + frames * latent_channel))]
|
|
: latents.values()[latent_x + latent_width * (latent_y + latent_height * latent_channel)];
|
|
};
|
|
|
|
float r = 0.f, g = 0.f, b = 0.f;
|
|
if (latent_rgb_proj != nullptr) {
|
|
for (uint32_t d = 0; d < unpatched_dim; d++) {
|
|
uint32_t latent_channel = d * patch_size * patch_size + channel_offset;
|
|
float value = latent_value(latent_channel);
|
|
r += value * latent_rgb_proj[d][0];
|
|
g += value * latent_rgb_proj[d][1];
|
|
b += value * latent_rgb_proj[d][2];
|
|
}
|
|
} else {
|
|
r = latent_value(0);
|
|
g = latent_value(1);
|
|
b = latent_value(2);
|
|
}
|
|
if (latent_rgb_bias != nullptr) {
|
|
r += latent_rgb_bias[0];
|
|
g += latent_rgb_bias[1];
|
|
b += latent_rgb_bias[2];
|
|
}
|
|
r = std::min(1.0f, std::max(0.0f, r * .5f + .5f));
|
|
g = std::min(1.0f, std::max(0.0f, g * .5f + .5f));
|
|
b = std::min(1.0f, std::max(0.0f, b * .5f + .5f));
|
|
|
|
buffer[pixel_id * 3 + 0] = (uint8_t)(r * 255);
|
|
buffer[pixel_id * 3 + 1] = (uint8_t)(g * 255);
|
|
buffer[pixel_id * 3 + 2] = (uint8_t)(b * 255);
|
|
}
|
|
}
|
|
}
|
|
}
|