支持 z_image

shell-nlp · shell-nlp · commit e396f79c997d · 2025-11-30T21:43:00.000+08:00
diff --git a/gpt_server/model_worker/qwen_image.py b/gpt_server/model_worker/qwen_image.py
@@ -68,20 +68,19 @@ def __init__(
     async def get_image_output(self, params):
         self.call_ct += 1
         prompt = params["prompt"]
-        if contains_chinese(prompt):
-            prompt += positive_magic["zh"]
-        else:
-            prompt += positive_magic["en"]
         response_format = params.get("response_format", "b64_json")
         inputs = {
             "prompt": prompt,
             "negative_prompt": " ",
-            "height": height,
-            "width": width,
             "num_inference_steps": 50,
             "true_cfg_scale": 4.0,
             "generator": torch.Generator(self.device).manual_seed(0),
         }
+        size = params.get("size", None)
+        if size:
+            size_split = size.split("x")
+            width, height = int(size_split[0]), int(size_split[1])
+            inputs.update({"width": width, "height": height})
         output = await asyncio.to_thread(self.pipe, **inputs)
         image = output.images[0]
         result = {}
diff --git a/gpt_server/model_worker/z_image.py b/gpt_server/model_worker/z_image.py
@@ -0,0 +1,118 @@
+import asyncio
+import os
+from typing import List
+import uuid
+from loguru import logger
+import shortuuid
+from gpt_server.model_worker.base.model_worker_base import ModelWorkerBase
+from gpt_server.model_worker.utils import pil_to_base64
+import torch
+from diffusers import ZImagePipeline
+from gpt_server.utils import STATIC_DIR
+
+root_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+
+aspect_ratios = {
+    "1:1": (1328, 1328),
+    "16:9": (1664, 928),
+    "9:16": (928, 1664),
+    "4:3": (1472, 1140),
+    "3:4": (1140, 1472),
+    "3:2": (1584, 1056),
+    "2:3": (1056, 1584),
+}
+
+width, height = aspect_ratios["16:9"]
+import re
+
+
+def contains_chinese(text):
+    pattern = re.compile(r"[\u4e00-\u9fff]")
+    return bool(pattern.search(text))
+
+
+class ZImageWorker(ModelWorkerBase):
+    def __init__(
+        self,
+        controller_addr: str,
+        worker_addr: str,
+        worker_id: str,
+        model_path: str,
+        model_names: List[str],
+        limit_worker_concurrency: int,
+        conv_template: str = None,  # type: ignore
+    ):
+        super().__init__(
+            controller_addr,
+            worker_addr,
+            worker_id,
+            model_path,
+            model_names,
+            limit_worker_concurrency,
+            conv_template,
+            model_type="image",
+        )
+        backend = os.environ["backend"]
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.pipe = ZImagePipeline.from_pretrained(
+            model_path, torch_dtype=torch.bfloat16
+        ).to(self.device)
+
+        logger.warning(f"模型：{model_names[0]}")
+
+    async def get_image_output(self, params):
+        self.call_ct += 1
+        prompt = params["prompt"]
+        response_format = params.get("response_format", "b64_json")
+        inputs = {
+            "prompt": prompt,
+            "negative_prompt": " ",
+            "num_inference_steps": 8,
+            "guidance_scale": 0.0,
+            "generator": torch.Generator(self.device).manual_seed(42),
+        }
+        size = params.get("size", None)
+        if size:
+            size_split = size.split("x")
+            width, height = int(size_split[0]), int(size_split[1])
+            inputs.update({"width": width, "height": height})
+        output = await asyncio.to_thread(self.pipe, **inputs)
+        image = output.images[0]
+        result = {}
+        if response_format == "b64_json":
+            # Convert PIL image to base64
+            base64 = pil_to_base64(pil_img=image)
+            result = {
+                "created": shortuuid.random(),
+                "data": [{"b64_json": base64}],
+                "usage": {
+                    "total_tokens": 0,
+                    "input_tokens": 0,
+                    "output_tokens": 0,
+                    "input_tokens_details": {"text_tokens": 0, "image_tokens": 0},
+                },
+            }
+            return result
+        elif response_format == "url":
+            # 生成唯一文件名（避免冲突）
+            file_name = str(uuid.uuid4()) + ".png"
+            save_path = STATIC_DIR / file_name
+            image.save(save_path, format="PNG")
+            WORKER_PORT = os.environ["WORKER_PORT"]
+            WORKER_HOST = os.environ["WORKER_HOST"]
+            url = f"http://{WORKER_HOST}:{WORKER_PORT}/static/{file_name}"
+            result = {
+                "created": shortuuid.random(),
+                "data": [{"url": url}],
+                "usage": {
+                    "total_tokens": 0,
+                    "input_tokens": 0,
+                    "output_tokens": 0,
+                    "input_tokens_details": {"text_tokens": 0, "image_tokens": 0},
+                },
+            }
+        return result
+
+
+if __name__ == "__main__":
+    ZImageWorker.run()
diff --git a/gpt_server/openai_api_protocol/custom_api_protocol.py b/gpt_server/openai_api_protocol/custom_api_protocol.py
@@ -274,6 +274,7 @@ class ImagesGenRequest(BaseModel):
         default="url",
         description="生成图像时返回的格式。必须为“ur”或“b64_json”之一。URL仅在图像生成后60分钟内有效。",
     )
+    size: str | None = None
 
 
 # copy from https://github.com/remsky/Kokoro-FastAPI/blob/master/api/src/routers/openai_compatible.py
diff --git a/gpt_server/serving/openai_api_server.py b/gpt_server/serving/openai_api_server.py
@@ -1274,6 +1274,7 @@ async def images_generations(request: ImagesGenRequest):
         "prompt": request.prompt,
         "output_format": request.output_format,
         "response_format": request.response_format,
+        "size": request.size,
     }
     result = await get_images_gen(payload=payload)
     return result
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "gpt_server"
-version = "0.6.7"
+version = "0.6.8"
 description = "gpt_server是一个用于生产级部署LLMs、Embedding、Reranker、ASR和TTS的开源框架。"
 readme = "README.md"
 license = { text = "Apache 2.0" }
@@ -55,6 +55,9 @@ gpt_server = "gpt_server.cli:main"
 url = "https://pypi.tuna.tsinghua.edu.cn/simple"
 default = true
 
+[tool.uv.sources]
+diffusers = { git = "https://gitee.com/liuyu_1997/diffusers.git" }
+
 # [[tool.uv.index]]
 # name = "vllm-custom"
 # url = "https://wheels.vllm.ai/006e7a34aeb3e905ca4131a3251fe079f0511e2f"
diff --git a/tests/test_image_gen.py b/tests/test_image_gen.py
@@ -4,12 +4,14 @@
 client = OpenAI(api_key="EMPTY", base_url="http://localhost:8082/v1")
 # 两种响应方式
 ## response_format = "url"    默认为 url
-img = client.images.generate(model="flux", prompt="A red pig", response_format="url")
-print(img.data[0])
-## response_format = "b64_json"
+prompt = "身着粉色汉服、精致刺绣的中国年轻女子。无可挑剔的妆容，额头上的红色花卉图案。精致的高髻，金凤头饰，红花，珠子。持有圆形折扇，上面有女士、树木、鸟。霓虹灯闪电灯（⚡️），明亮的黄色光芒，位于伸出的左手掌上方。室外夜景柔和，剪影的西安大雁塔，远处的七彩灯光模糊。"
+model = "z_image"
 img = client.images.generate(
-    model="flux", prompt="A red pig", response_format="b64_json"
+    model=model, prompt=prompt, response_format="url", size="1664x928"
 )
+print(img.data[0])
+# response_format = "b64_json"
+img = client.images.generate(model=model, prompt=prompt, response_format="b64_json")
 image_bytes = base64.b64decode(img.data[0].b64_json)
 with open("output.png", "wb") as f:
     f.write(image_bytes)
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -274,6 +274,7 @@ class ImagesGenRequest(BaseModel):`
`274`	`274`	`default="url",`
`275`	`275`	`description="生成图像时返回的格式。必须为“ur”或“b64_json”之一。URL仅在图像生成后60分钟内有效。",`
`276`	`276`	`)`
	`277`	`+ size: str \| None = None`
`277`	`278`
`278`	`279`
`279`	`280`	`# copy from https://github.com/remsky/Kokoro-FastAPI/blob/master/api/src/routers/openai_compatible.py`
Original file line number	Diff line number	Diff line change
`@@ -1274,6 +1274,7 @@ async def images_generations(request: ImagesGenRequest):`
`1274`	`1274`	`"prompt": request.prompt,`
`1275`	`1275`	`"output_format": request.output_format,`
`1276`	`1276`	`"response_format": request.response_format,`
	`1277`	`+ "size": request.size,`
`1277`	`1278`	`}`
`1278`	`1279`	`result = await get_images_gen(payload=payload)`
`1279`	`1280`	`return result`