AnswerDotAI
diff --git a/‎examples/hello_world/run.cpp‎
Lines changed: 3 additions & 5 deletions b/‎examples/hello_world/run.cpp‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎examples/matmul/run.cpp‎
Lines changed: 33 additions & 12 deletions b/‎examples/matmul/run.cpp‎
Lines changed: 33 additions & 12 deletions
diff --git a/‎examples/webgpu_from_scratch/CMakeLists.txt‎
Lines changed: 0 additions & 21 deletions b/‎examples/webgpu_from_scratch/CMakeLists.txt‎
Lines changed: 0 additions & 21 deletions
diff --git a/‎examples/webgpu_from_scratch/Makefile‎
Lines changed: 0 additions & 8 deletions b/‎examples/webgpu_from_scratch/Makefile‎
Lines changed: 0 additions & 8 deletions
@@ -3,9 +3,7 @@
 #include <cstdio>
 #include <future>
 
-using namespace gpu; // createContext, createTensor, createKernel,
-                     // createShader, dispatchKernel, wait, toCPU
-                     // Tensor, Kernel, Context, Shape, kf32
+using namespace gpu;
 
 static const char *kGelu = R"(
 const GELU_SCALING_FACTOR: f32 = 0.7978845608028654; // sqrt(2.0 / PI)
@@ -29,6 +27,7 @@ int main(int argc, char **argv) {
   printf("\nHello gpu.cpp!\n");
   printf("--------------\n\n");
 
+  // std::unique_ptr<Context> ctx = createContext();
   Context ctx = createContext();
   static constexpr size_t N = 10000;
   std::array<float, N> inputArr, outputArr;
@@ -41,7 +40,7 @@ int main(int argc, char **argv) {
   std::future<void> future = promise.get_future();
   Kernel op = createKernel(ctx, {kGelu, 256, kf32},
                            Bindings{input, output},
-                           /* nWorkgroups */ {cdiv(N, 256), 1, 1});
+                           {cdiv(N, 256), 1, 1});
   dispatchKernel(ctx, op, promise);
   wait(ctx, future);
   toCPU(ctx, output, outputArr.data(), sizeof(outputArr));
@@ -50,5 +49,4 @@ int main(int argc, char **argv) {
   }
   printf("  ...\n\n");
   printf("Computed %zu values of GELU(x)\n\n", N);
-  return 0;
 }
@@ -792,13 +792,40 @@ void runTest(int version, size_t M, size_t K, size_t N,
   }
 
   // Allocate GPU buffers and copy data
-  Context ctx = createContext(
-      {}, {},
-      /*device descriptor, enabling f16 in WGSL*/
-      {
+  WGPUDeviceDescriptor devDescriptor = {};
+  devDescriptor.requiredFeatureCount = 1;
+  devDescriptor.requiredFeatures = std::array{WGPUFeatureName_ShaderF16}.data();
+
+  Context ctx;
+  if (numtype == kf16) {
+    ctx = createContext(
+        {}, {},
+        /*device descriptor, enabling f16 in WGSL*/
+        {
           .requiredFeatureCount = 1,
-          .requiredFeatures = std::array{WGPUFeatureName_ShaderF16}.data(),
-      });
+          .requiredFeatures = std::array{WGPUFeatureName_ShaderF16}.data()
+        });
+    if (ctx.adapterStatus != WGPURequestAdapterStatus_Success) {
+      LOG(kDefLog, kError, "Failed to create adapter with f16 support, try running an f32 test instead (`export MATMUL_VERSION=9).");
+      exit(1);
+    }
+    if (ctx.deviceStatus != WGPURequestDeviceStatus_Success) {
+      LOG(kDefLog, kError, "Failed to create device with f16 support, try running an f32 test instead. (`export MATMUL_VERSION=9)");
+      exit(1);
+    }
+  }
+
+  if (numtype == kf32) {
+    ctx = createContext({}, {}, {});
+    if (ctx.adapterStatus != WGPURequestAdapterStatus_Success ||
+        ctx.deviceStatus != WGPURequestDeviceStatus_Success) {
+      LOG(kDefLog, kError, "Failed to create adapter or device");
+      // stop execution
+      exit(1);
+    } else {
+      LOG(kDefLog, kInfo, "Successfully created adapter and device");
+    }
+  } 
 
   Tensor input = createTensor(ctx, Shape{M, K}, numtype, inputPtr.get());
   Tensor weights = createTensor(ctx, Shape{N, K}, numtype, weightsPtr.get()); // column-major
@@ -810,8 +837,6 @@ void runTest(int version, size_t M, size_t K, size_t N,
 #endif
 
   // Initialize Kernel and bind GPU buffers
-
-
   // pre-allocate for async dispatch
   std::array<std::promise<void>, nIter> promises;
   std::array<std::future<void>, nIter> futures;
@@ -823,10 +848,6 @@ void runTest(int version, size_t M, size_t K, size_t N,
     kernels[i] = selectMatmul(ctx, version, {input, weights, outputs[i]}, M, K, N, numtype);
   }
 
-#ifndef METAL_PROFILER
-  printf("[ Press enter to start tests ... ]\n");
-  getchar();
-#endif
   LOG(kDefLog, kInfo, "Dispatching Kernel version %d: %s, %d iterations ...",
       version, versionToStr(version).c_str(), nIter);