From dbc1f18fdbcfeeeef1e7c81f20084a36db9c10d4 Mon Sep 17 00:00:00 2001 From: Pedro Cabrera Date: Thu, 16 Oct 2025 21:27:06 +0200 Subject: [PATCH 1/3] Support Custom ESRGAN tile size --- esrgan.hpp | 3 ++- examples/cli/main.cpp | 12 +++++++++++- stable-diffusion.h | 3 ++- upscaler.cpp | 14 +++++++++----- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/esrgan.hpp b/esrgan.hpp index fe5f16d27..344fb927f 100644 --- a/esrgan.hpp +++ b/esrgan.hpp @@ -156,9 +156,10 @@ struct ESRGAN : public GGMLRunner { ESRGAN(ggml_backend_t backend, bool offload_params_to_cpu, + int tile_size = 128, const String2GGMLType& tensor_types = {}) : GGMLRunner(backend, offload_params_to_cpu) { - // rrdb_net will be created in load_from_file + this->tile_size = tile_size; } void enable_conv2d_direct() { diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index ff36cea25..c3c8b521b 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -116,6 +116,7 @@ struct SDParams { bool canny_preprocess = false; bool color = false; int upscale_repeats = 1; + int upscale_tile = 128; // Photo Maker std::string photo_maker_path; @@ -201,6 +202,7 @@ void print_params(SDParams params) { printf(" vae_tiling: %s\n", params.vae_tiling_params.enabled ? "true" : "false"); printf(" force_sdxl_vae_conv_scale: %s\n", params.force_sdxl_vae_conv_scale ? "true" : "false"); printf(" upscale_repeats: %d\n", params.upscale_repeats); + printf(" upscale_tile: %d\n", params.upscale_tile); printf(" chroma_use_dit_mask: %s\n", params.chroma_use_dit_mask ? "true" : "false"); printf(" chroma_use_t5_mask: %s\n", params.chroma_use_t5_mask ? "true" : "false"); printf(" chroma_t5_mask_pad: %d\n", params.chroma_t5_mask_pad); @@ -235,6 +237,7 @@ void print_usage(int argc, const char* argv[]) { printf(" --embd-dir [EMBEDDING_PATH] path to embeddings\n"); printf(" --upscale-model [ESRGAN_PATH] path to esrgan model. For img_gen mode, upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now\n"); printf(" --upscale-repeats Run the ESRGAN upscaler this many times (default 1)\n"); + printf(" --upscale-tile Tile size for the ESRGAN upscaler (default 128)\n"); printf(" --type [TYPE] weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K)\n"); printf(" If not specified, the default is the type of the weight file\n"); printf(" --tensor-type-rules [EXPRESSION] weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")\n"); @@ -527,6 +530,7 @@ void parse_args(int argc, const char** argv, SDParams& params) { options.int_options = { {"-t", "--threads", "", ¶ms.n_threads}, {"", "--upscale-repeats", "", ¶ms.upscale_repeats}, + {"","--upscale-tile", "", ¶ms.upscale_tile}, {"-H", "--height", "", ¶ms.height}, {"-W", "--width", "", ¶ms.width}, {"", "--steps", "", ¶ms.sample_params.sample_steps}, @@ -917,6 +921,11 @@ void parse_args(int argc, const char** argv, SDParams& params) { exit(1); } + if (params.upscale_tile < 1) { + fprintf(stderr, "error: upscale tile size must be at least 1\n"); + exit(1); + } + if (params.mode == UPSCALE) { if (params.esrgan_path.length() == 0) { fprintf(stderr, "error: upscale mode needs an upscaler model (--upscale-model)\n"); @@ -1486,7 +1495,8 @@ int main(int argc, const char* argv[]) { upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(), params.offload_params_to_cpu, params.diffusion_conv_direct, - params.n_threads); + params.n_threads, + params.upscale_tile); if (upscaler_ctx == NULL) { printf("new_upscaler_ctx failed\n"); diff --git a/stable-diffusion.h b/stable-diffusion.h index a891a58f1..925e83ca9 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -292,7 +292,8 @@ typedef struct upscaler_ctx_t upscaler_ctx_t; SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path, bool offload_params_to_cpu, bool direct, - int n_threads); + int n_threads, + int tile_size); SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx); SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, diff --git a/upscaler.cpp b/upscaler.cpp index d30423723..205094e78 100644 --- a/upscaler.cpp +++ b/upscaler.cpp @@ -10,11 +10,14 @@ struct UpscalerGGML { std::string esrgan_path; int n_threads; bool direct = false; + int tile_size = 128; UpscalerGGML(int n_threads, - bool direct = false) + bool direct = false, + int tile_size = 128) : n_threads(n_threads), - direct(direct) { + direct(direct), + tile_size(tile_size) { } bool load_from_file(const std::string& esrgan_path, @@ -51,7 +54,7 @@ struct UpscalerGGML { backend = ggml_backend_cpu_init(); } LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type)); - esrgan_upscaler = std::make_shared(backend, offload_params_to_cpu, model_loader.tensor_storages_types); + esrgan_upscaler = std::make_shared(backend, offload_params_to_cpu, tile_size, model_loader.tensor_storages_types); if (direct) { esrgan_upscaler->enable_conv2d_direct(); } @@ -113,14 +116,15 @@ struct upscaler_ctx_t { upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str, bool offload_params_to_cpu, bool direct, - int n_threads) { + int n_threads, + int tile_size) { upscaler_ctx_t* upscaler_ctx = (upscaler_ctx_t*)malloc(sizeof(upscaler_ctx_t)); if (upscaler_ctx == NULL) { return NULL; } std::string esrgan_path(esrgan_path_c_str); - upscaler_ctx->upscaler = new UpscalerGGML(n_threads, direct); + upscaler_ctx->upscaler = new UpscalerGGML(n_threads, direct, tile_size); if (upscaler_ctx->upscaler == NULL) { return NULL; } From 80268c58ab47f585c54f6d641dbfb615926ea68b Mon Sep 17 00:00:00 2001 From: Pedro Cabrera Date: Thu, 16 Oct 2025 21:27:06 +0200 Subject: [PATCH 2/3] Support Custom ESRGAN tile size --- esrgan.hpp | 3 ++- examples/cli/main.cpp | 14 +++++++++++++- stable-diffusion.h | 3 ++- upscaler.cpp | 14 +++++++++----- 4 files changed, 26 insertions(+), 8 deletions(-) diff --git a/esrgan.hpp b/esrgan.hpp index 21689ffa4..fa18532df 100644 --- a/esrgan.hpp +++ b/esrgan.hpp @@ -156,9 +156,10 @@ struct ESRGAN : public GGMLRunner { ESRGAN(ggml_backend_t backend, bool offload_params_to_cpu, + int tile_size = 128, const String2GGMLType& tensor_types = {}) : GGMLRunner(backend, offload_params_to_cpu) { - // rrdb_net will be created in load_from_file + this->tile_size = tile_size; } void enable_conv2d_direct() { diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 8f938c9b4..7bc956255 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -118,6 +118,7 @@ struct SDParams { bool canny_preprocess = false; bool color = false; int upscale_repeats = 1; + int upscale_tile_size = 128; // Photo Maker std::string photo_maker_path; @@ -204,6 +205,7 @@ void print_params(SDParams params) { printf(" vae_tiling: %s\n", params.vae_tiling_params.enabled ? "true" : "false"); printf(" force_sdxl_vae_conv_scale: %s\n", params.force_sdxl_vae_conv_scale ? "true" : "false"); printf(" upscale_repeats: %d\n", params.upscale_repeats); + printf(" upscale_tile_size: %d\n", params.upscale_tile_size); printf(" chroma_use_dit_mask: %s\n", params.chroma_use_dit_mask ? "true" : "false"); printf(" chroma_use_t5_mask: %s\n", params.chroma_use_t5_mask ? "true" : "false"); printf(" chroma_t5_mask_pad: %d\n", params.chroma_t5_mask_pad); @@ -605,6 +607,10 @@ void parse_args(int argc, const char** argv, SDParams& params) { "--upscale-repeats", "Run the ESRGAN upscaler this many times (default: 1)", ¶ms.upscale_repeats}, + {"", + "--upscale-tile", + "tile size for ESRGAN upscaling (default: 128)", + ¶ms.upscale_tile_size}, {"-H", "--height", "image height, in pixel space (default: 512)", @@ -1187,6 +1193,11 @@ void parse_args(int argc, const char** argv, SDParams& params) { exit(1); } + if (params.upscale_tile_size < 1) { + fprintf(stderr, "error: upscale tile size must be at least 1\n"); + exit(1); + } + if (params.mode == UPSCALE) { if (params.esrgan_path.length() == 0) { fprintf(stderr, "error: upscale mode needs an upscaler model (--upscale-model)\n"); @@ -1757,7 +1768,8 @@ int main(int argc, const char* argv[]) { upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(), params.offload_params_to_cpu, params.diffusion_conv_direct, - params.n_threads); + params.n_threads, + params.upscale_tile_size); if (upscaler_ctx == nullptr) { printf("new_upscaler_ctx failed\n"); diff --git a/stable-diffusion.h b/stable-diffusion.h index f618d457b..59a25cdc5 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -293,7 +293,8 @@ typedef struct upscaler_ctx_t upscaler_ctx_t; SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path, bool offload_params_to_cpu, bool direct, - int n_threads); + int n_threads, + int tile_size); SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx); SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, diff --git a/upscaler.cpp b/upscaler.cpp index 459b0ee49..68eb50ef3 100644 --- a/upscaler.cpp +++ b/upscaler.cpp @@ -10,11 +10,14 @@ struct UpscalerGGML { std::string esrgan_path; int n_threads; bool direct = false; + int tile_size = 128; UpscalerGGML(int n_threads, - bool direct = false) + bool direct = false, + int tile_size = 128) : n_threads(n_threads), - direct(direct) { + direct(direct), + tile_size(tile_size) { } bool load_from_file(const std::string& esrgan_path, @@ -51,7 +54,7 @@ struct UpscalerGGML { backend = ggml_backend_cpu_init(); } LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type)); - esrgan_upscaler = std::make_shared(backend, offload_params_to_cpu, model_loader.tensor_storages_types); + esrgan_upscaler = std::make_shared(backend, offload_params_to_cpu, tile_size, model_loader.tensor_storages_types); if (direct) { esrgan_upscaler->enable_conv2d_direct(); } @@ -113,14 +116,15 @@ struct upscaler_ctx_t { upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str, bool offload_params_to_cpu, bool direct, - int n_threads) { + int n_threads, + int tile_size) { upscaler_ctx_t* upscaler_ctx = (upscaler_ctx_t*)malloc(sizeof(upscaler_ctx_t)); if (upscaler_ctx == nullptr) { return nullptr; } std::string esrgan_path(esrgan_path_c_str); - upscaler_ctx->upscaler = new UpscalerGGML(n_threads, direct); + upscaler_ctx->upscaler = new UpscalerGGML(n_threads, direct, tile_size); if (upscaler_ctx->upscaler == nullptr) { return nullptr; } From ef38a2e2cd99344b7dd242b983cf35b48e8fe7a1 Mon Sep 17 00:00:00 2001 From: Pedro Cabrera Date: Sun, 19 Oct 2025 13:09:42 +0200 Subject: [PATCH 3/3] upscale-tile-size naming fix --- examples/cli/main.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index bad492008..24f810329 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -608,10 +608,9 @@ void parse_args(int argc, const char** argv, SDParams& params) { "Run the ESRGAN upscaler this many times (default: 1)", ¶ms.upscale_repeats}, {"", - "--upscale-tile", + "--upscale-tile-size", "tile size for ESRGAN upscaling (default: 128)", ¶ms.upscale_tile_size}, - {"","--upscale-tile", "", ¶ms.upscale_tile_size}, {"-H", "--height", "image height, in pixel space (default: 512)",