fix: correct the handling of weight loading

leejet · leejet · commit c542a77a3ff6 · 2023-08-30T21:44:06.000+08:00
diff --git a/models/convert.py b/models/convert.py
@@ -31,7 +31,7 @@
 
 QK4_0 = 32
 def quantize_q4_0(x):
-    assert x.shape[-1] % QK4_0 == 0
+    assert x.shape[-1] % QK4_0 == 0 and x.shape[-1] > QK4_0
     x = x.reshape(-1, QK4_0)
     max = np.take_along_axis(x, np.argmax(np.abs(x), axis=-1)[:, np.newaxis], axis=-1)
     d = max / -8
@@ -44,7 +44,7 @@ def quantize_q4_0(x):
 
 QK4_1 = 32
 def quantize_q4_1(x):
-    assert x.shape[-1] % QK4_1 == 0
+    assert x.shape[-1] % QK4_1 == 0 and x.shape[-1] > QK4_1
     x = x.reshape(-1, QK4_1)
     min = np.min(x, axis=-1, keepdims=True)
     max = np.max(x, axis=-1, keepdims=True)
@@ -59,7 +59,7 @@ def quantize_q4_1(x):
 
 QK5_0 = 32
 def quantize_q5_0(x):
-    assert x.shape[1] % QK5_0 == 0
+    assert x.shape[-1] % QK5_0 == 0 and x.shape[-1] > QK5_0
     x = x.reshape(-1, QK5_0)
     max = np.take_along_axis(x, np.argmax(np.abs(x), axis=-1)[:, np.newaxis], axis=-1)
     d = max / -16
@@ -76,7 +76,7 @@ def quantize_q5_0(x):
 
 QK5_1 = 32
 def quantize_q5_1(x):
-    assert x.shape[-1] % QK5_1 == 0
+    assert x.shape[-1] % QK5_1 == 0 and x.shape[-1] > QK5_1
     x = x.reshape(-1, QK5_1)
     min = np.min(x, axis=-1, keepdims=True)
     max = np.max(x, axis=-1, keepdims=True)
@@ -95,7 +95,7 @@ def quantize_q5_1(x):
 
 QK8_0 = 32
 def quantize_q8_0(x):
-    assert x.shape[-1] % QK8_0 == 0
+    assert x.shape[-1] % QK8_0 == 0 and x.shape[-1] > QK8_0
     x = x.reshape(-1, QK8_0)
     amax = np.max(np.abs(x), axis=-1, keepdims=True) 
     d = amax / ((1 << 7) - 1)
@@ -156,7 +156,10 @@ def get_alpha_comprod(linear_start=0.00085, linear_end=0.0120, timesteps=1000):
     "posterior_mean_coef2",
     "cond_stage_model.transformer.text_model.embeddings.position_ids",
     "model_ema.decay",
-    "model_ema.num_updates"
+    "model_ema.num_updates",
+    "control_model",
+    "lora_te_text_model",
+    "embedding_manager"
 ]
 
 def convert(model_path, out_type = None, out_file=None):
@@ -182,6 +185,10 @@ def convert(model_path, out_type = None, out_file=None):
             out_type = "f32"
         elif weight.dtype == np.float16:
             out_type = "f16"
+        elif weight.dtype == np.float64:
+            out_type = "f32"
+        else:
+            raise Exception("unsupported weight type %s" % weight.dtype)
     if out_file == None:
         out_file = os.path.splitext(os.path.basename(model_path))[0] + f"-ggml-model-{out_type}.bin"
         out_file = os.path.join(os.getcwd(), out_file)
@@ -207,6 +214,13 @@ def convert(model_path, out_type = None, out_file=None):
         for name in state_dict.keys():
             if not isinstance(state_dict[name], torch.Tensor):
                 continue
+            skip = False
+            for unused_tensor in unused_tensors:
+                if name.startswith(unused_tensor):
+                    skip = True
+                    break
+            if skip:
+                continue
             if name in unused_tensors:
                 continue
             data = state_dict[name].numpy()
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -2864,6 +2864,8 @@ class StableDiffusionGGML {
                     nelements *= ne[i];
                 }
 
+                const size_t num_bytes = nelements / ggml_blck_size(ggml_type(ttype)) * ggml_type_size(ggml_type(ttype));
+
                 std::string name(length, 0);
                 file.read(&name[0], length);
 
@@ -2891,7 +2893,7 @@ class StableDiffusionGGML {
                             return false;
                         }
                     }
-                    file.ignore(nelements * ggml_type_size((ggml_type)ttype));
+                    file.ignore(num_bytes);
                     continue;
                 }
 
@@ -2919,8 +2921,6 @@ class StableDiffusionGGML {
                     return false;
                 }
 
-                const size_t num_bytes = nelements / ggml_blck_size(ggml_type(ttype)) * ggml_type_size(ggml_type(ttype));
-
                 file.read(reinterpret_cast<char*>(tensor->data), num_bytes);
 
                 total_size += ggml_nbytes(tensor);

Original file line number	Diff line number	Diff line change
`@@ -2864,6 +2864,8 @@ class StableDiffusionGGML {`
`2864`	`2864`	`nelements *= ne[i];`
`2865`	`2865`	`}`
`2866`	`2866`
	`2867`	`+ const size_t num_bytes = nelements / ggml_blck_size(ggml_type(ttype)) * ggml_type_size(ggml_type(ttype));`
	`2868`	`+`
`2867`	`2869`	`std::string name(length, 0);`
`2868`	`2870`	`file.read(&name[0], length);`
`2869`	`2871`
`@@ -2891,7 +2893,7 @@ class StableDiffusionGGML {`
`2891`	`2893`	`return false;`
`2892`	`2894`	`}`
`2893`	`2895`	`}`
`2894`		`- file.ignore(nelements * ggml_type_size((ggml_type)ttype));`
	`2896`	`+ file.ignore(num_bytes);`
`2895`	`2897`	`continue;`
`2896`	`2898`	`}`
`2897`	`2899`
`@@ -2919,8 +2921,6 @@ class StableDiffusionGGML {`
`2919`	`2921`	`return false;`
`2920`	`2922`	`}`
`2921`	`2923`
`2922`		`- const size_t num_bytes = nelements / ggml_blck_size(ggml_type(ttype)) * ggml_type_size(ggml_type(ttype));`
`2923`		`-`
`2924`	`2924`	`file.read(reinterpret_cast<char*>(tensor->data), num_bytes);`
`2925`	`2925`
`2926`	`2926`	`total_size += ggml_nbytes(tensor);`