Skip to content

Commit e61e809

Browse files
committed
format
1 parent 16feb9e commit e61e809

File tree

1 file changed

+149
-145
lines changed

1 file changed

+149
-145
lines changed

test/test_gpu.cpp

Lines changed: 149 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include "gpu.hpp"
22
#include <array>
3-
#include <cstdio>
43
#include <cassert>
4+
#include <cstdio>
55
#include <cstring>
66
#include <future>
77
#include <vector>
@@ -24,169 +24,173 @@ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
2424

2525
// Test using the overload that takes a Tensor.
2626
void testToCPUWithTensor() {
27-
LOG(kDefLog, kInfo, "Running testToCPUWithTensor...");
28-
29-
// Create a real GPU context.
30-
#ifdef USE_DAWN_API
31-
Context ctx = createContextByGpuIdx(0);
32-
#else
33-
Context ctx = createContext();
34-
#endif
35-
36-
constexpr size_t N = 1024;
37-
std::array<float, N> inputData, outputData;
38-
for (size_t i = 0; i < N; ++i) {
39-
inputData[i] = static_cast<float>(i);
40-
outputData[i] = 0.0f;
41-
}
42-
43-
// Create input and output tensors.
44-
Tensor inputTensor = createTensor(ctx, Shape{N}, kf32, inputData.data());
45-
Tensor outputTensor = createTensor(ctx, Shape{N}, kf32);
46-
47-
// Create and dispatch the copy kernel.
48-
Kernel copyKernel = createKernel(ctx, {kCopyKernel, 256, kf32},
49-
Bindings{inputTensor, outputTensor},
50-
{cdiv(N, 256), 1, 1});
51-
dispatchKernel(ctx, copyKernel);
52-
53-
// Synchronously copy GPU output to CPU using the tensor overload.
54-
toCPU(ctx, outputTensor, outputData.data(), sizeof(outputData));
55-
56-
// Verify the output matches the input.
57-
for (size_t i = 0; i < N; ++i) {
58-
LOG(kDefLog, kInfo, "inputData[%zu] = %f", i, inputData[i]);
59-
LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]);
60-
assert(outputData[i] == inputData[i]);
61-
}
62-
LOG(kDefLog, kInfo, "testToCPUWithTensor passed.");
27+
LOG(kDefLog, kInfo, "Running testToCPUWithTensor...");
28+
29+
// Create a real GPU context.
30+
#ifdef USE_DAWN_API
31+
Context ctx = createContextByGpuIdx(0);
32+
#else
33+
Context ctx = createContext();
34+
#endif
35+
36+
constexpr size_t N = 1024;
37+
std::array<float, N> inputData, outputData;
38+
for (size_t i = 0; i < N; ++i) {
39+
inputData[i] = static_cast<float>(i);
40+
outputData[i] = 0.0f;
41+
}
42+
43+
// Create input and output tensors.
44+
Tensor inputTensor = createTensor(ctx, Shape{N}, kf32, inputData.data());
45+
Tensor outputTensor = createTensor(ctx, Shape{N}, kf32);
46+
47+
// Create and dispatch the copy kernel.
48+
Kernel copyKernel =
49+
createKernel(ctx, {kCopyKernel, 256, kf32},
50+
Bindings{inputTensor, outputTensor}, {cdiv(N, 256), 1, 1});
51+
dispatchKernel(ctx, copyKernel);
52+
53+
// Synchronously copy GPU output to CPU using the tensor overload.
54+
toCPU(ctx, outputTensor, outputData.data(), sizeof(outputData));
55+
56+
// Verify the output matches the input.
57+
for (size_t i = 0; i < N; ++i) {
58+
LOG(kDefLog, kInfo, "inputData[%zu] = %f", i, inputData[i]);
59+
LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]);
60+
assert(outputData[i] == inputData[i]);
61+
}
62+
LOG(kDefLog, kInfo, "testToCPUWithTensor passed.");
6363
}
6464

6565
// Test using the overload that takes a raw GPU buffer.
6666
// We reuse the Tensor's underlying buffer for this test.
6767
void testToCPUWithBuffer() {
68-
LOG(kDefLog, kInfo, "Running testToCPUWithBuffer...");
69-
70-
#ifdef USE_DAWN_API
71-
Context ctx = createContextByGpuIdx(0);
72-
#else
73-
Context ctx = createContext();
74-
#endif
75-
76-
constexpr size_t N = 1024;
77-
std::array<float, N> data, outputData;
78-
for (size_t i = 0; i < N; ++i) {
79-
data[i] = static_cast<float>(i * 2);
80-
outputData[i] = 0.0f;
81-
}
82-
83-
// Create a tensor to allocate a GPU buffer and initialize it.
84-
Tensor tensor = createTensor(ctx, Shape{N}, kf32, data.data());
85-
86-
// Now extract the raw GPU buffer from the tensor.
87-
WGPUBuffer gpuBuffer = tensor.data.buffer;
88-
89-
// Use the WGPUBuffer overload. This call returns a future.
90-
auto future = toCPUAsync(ctx, gpuBuffer, outputData.data(), sizeof(outputData), 0);
91-
wait(ctx, future);
92-
93-
// Verify that the CPU output matches the original data.
94-
for (size_t i = 0; i < N; ++i) {
95-
LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]);
96-
assert(outputData[i] == data[i]);
97-
}
98-
LOG(kDefLog, kInfo, "testToCPUWithBuffer passed.");
68+
LOG(kDefLog, kInfo, "Running testToCPUWithBuffer...");
69+
70+
#ifdef USE_DAWN_API
71+
Context ctx = createContextByGpuIdx(0);
72+
#else
73+
Context ctx = createContext();
74+
#endif
75+
76+
constexpr size_t N = 1024;
77+
std::array<float, N> data, outputData;
78+
for (size_t i = 0; i < N; ++i) {
79+
data[i] = static_cast<float>(i * 2);
80+
outputData[i] = 0.0f;
81+
}
82+
83+
// Create a tensor to allocate a GPU buffer and initialize it.
84+
Tensor tensor = createTensor(ctx, Shape{N}, kf32, data.data());
85+
86+
// Now extract the raw GPU buffer from the tensor.
87+
WGPUBuffer gpuBuffer = tensor.data.buffer;
88+
89+
// Use the WGPUBuffer overload. This call returns a future.
90+
auto future =
91+
toCPUAsync(ctx, gpuBuffer, outputData.data(), sizeof(outputData), 0);
92+
wait(ctx, future);
93+
94+
// Verify that the CPU output matches the original data.
95+
for (size_t i = 0; i < N; ++i) {
96+
LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]);
97+
assert(outputData[i] == data[i]);
98+
}
99+
LOG(kDefLog, kInfo, "testToCPUWithBuffer passed.");
99100
}
100101

101102
void testToCPUWithTensorSourceOffset() {
102-
LOG(kDefLog, kInfo, "Running testToCPUWithTensorSourceOffset...");
103+
LOG(kDefLog, kInfo, "Running testToCPUWithTensorSourceOffset...");
103104
#ifdef USE_DAWN_API
104-
Context ctx = createContextByGpuIdx(0);
105+
Context ctx = createContextByGpuIdx(0);
105106
#else
106-
Context ctx = createContext();
107+
Context ctx = createContext();
107108
#endif
108109

109-
constexpr size_t numElements = 25;
110-
constexpr size_t sourceOffsetElements = 5; // Skip first 5 elements
111-
constexpr size_t copyCount = 10; // Number of floats to copy
112-
size_t copySize = copyCount * sizeof(float);
113-
114-
// Create an input array with known data.
115-
std::array<float, numElements> inputData{};
116-
for (size_t i = 0; i < numElements; ++i) {
117-
inputData[i] = static_cast<float>(i + 50); // Arbitrary values
118-
}
119-
// Create a tensor from the full data.
120-
Tensor tensor = createTensor(ctx, Shape{numElements}, kf32, inputData.data());
121-
122-
// Allocate a destination CPU buffer exactly as large as the data we want to copy.
123-
std::vector<float> cpuOutput(copyCount, -1.0f);
124-
125-
// Set sourceOffset to skip the first few float elements
126-
size_t sourceOffsetBytes = sourceOffsetElements * sizeof(float);
127-
// Call the tensor overload with sourceOffset and destOffset = 0.
128-
auto future = toCPUAsync(ctx, tensor, cpuOutput.data(), copySize, sourceOffsetBytes);
129-
wait(ctx, future);
130-
131-
// Verify the copied data matches the expected subset.
132-
for (size_t i = 0; i < copyCount; ++i) {
133-
float expected = inputData[sourceOffsetElements + i];
134-
float actual = cpuOutput[i];
135-
LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual);
136-
LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected);
137-
assert(expected == actual);
138-
}
139-
LOG(kDefLog, kInfo, "testToCPUWithTensorSourceOffset passed.");
110+
constexpr size_t numElements = 25;
111+
constexpr size_t sourceOffsetElements = 5; // Skip first 5 elements
112+
constexpr size_t copyCount = 10; // Number of floats to copy
113+
size_t copySize = copyCount * sizeof(float);
114+
115+
// Create an input array with known data.
116+
std::array<float, numElements> inputData{};
117+
for (size_t i = 0; i < numElements; ++i) {
118+
inputData[i] = static_cast<float>(i + 50); // Arbitrary values
119+
}
120+
// Create a tensor from the full data.
121+
Tensor tensor = createTensor(ctx, Shape{numElements}, kf32, inputData.data());
122+
123+
// Allocate a destination CPU buffer exactly as large as the data we want to
124+
// copy.
125+
std::vector<float> cpuOutput(copyCount, -1.0f);
126+
127+
// Set sourceOffset to skip the first few float elements
128+
size_t sourceOffsetBytes = sourceOffsetElements * sizeof(float);
129+
// Call the tensor overload with sourceOffset and destOffset = 0.
130+
auto future =
131+
toCPUAsync(ctx, tensor, cpuOutput.data(), copySize, sourceOffsetBytes);
132+
wait(ctx, future);
133+
134+
// Verify the copied data matches the expected subset.
135+
for (size_t i = 0; i < copyCount; ++i) {
136+
float expected = inputData[sourceOffsetElements + i];
137+
float actual = cpuOutput[i];
138+
LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual);
139+
LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected);
140+
assert(expected == actual);
141+
}
142+
LOG(kDefLog, kInfo, "testToCPUWithTensorSourceOffset passed.");
140143
}
141144

142145
void testToCPUWithBufferSourceOffset() {
143-
LOG(kDefLog, kInfo, "Running testToCPUWithBufferSourceOffset...");
146+
LOG(kDefLog, kInfo, "Running testToCPUWithBufferSourceOffset...");
144147
#ifdef USE_DAWN_API
145-
Context ctx = createContextByGpuIdx(0);
148+
Context ctx = createContextByGpuIdx(0);
146149
#else
147-
Context ctx = createContext();
150+
Context ctx = createContext();
148151
#endif
149152

150-
constexpr size_t numElements = 30;
151-
constexpr size_t sourceOffsetElements = 7; // Skip first 7 elements
152-
constexpr size_t copyCount = 12; // Number of floats to copy
153-
size_t copySize = copyCount * sizeof(float);
154-
155-
// Create an input array with arbitrary data.
156-
std::array<float, numElements> inputData{};
157-
for (size_t i = 0; i < numElements; ++i) {
158-
inputData[i] = static_cast<float>(i + 100);
159-
}
160-
// Create a tensor to initialize a GPU buffer.
161-
Tensor tensor = createTensor(ctx, Shape{numElements}, kf32, inputData.data());
162-
// Extract the raw GPU buffer from the tensor.
163-
WGPUBuffer buffer = tensor.data.buffer;
164-
165-
// Allocate a destination CPU buffer exactly as large as needed.
166-
std::vector<float> cpuOutput(copyCount, -2.0f);
167-
size_t sourceOffsetBytes = sourceOffsetElements * sizeof(float);
168-
169-
// Call the buffer overload with sourceOffset and destOffset = 0.
170-
auto future = toCPUAsync(ctx, buffer, cpuOutput.data(), copySize, sourceOffsetBytes);
171-
wait(ctx, future);
172-
173-
// Verify that the copied data matches the expected subset.
174-
for (size_t i = 0; i < copyCount; ++i) {
175-
float expected = inputData[sourceOffsetElements + i];
176-
float actual = cpuOutput[i];
177-
LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual);
178-
LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected);
179-
assert(expected == actual);
180-
}
181-
LOG(kDefLog, kInfo, "testToCPUWithBufferSourceOffset passed.");
153+
constexpr size_t numElements = 30;
154+
constexpr size_t sourceOffsetElements = 7; // Skip first 7 elements
155+
constexpr size_t copyCount = 12; // Number of floats to copy
156+
size_t copySize = copyCount * sizeof(float);
157+
158+
// Create an input array with arbitrary data.
159+
std::array<float, numElements> inputData{};
160+
for (size_t i = 0; i < numElements; ++i) {
161+
inputData[i] = static_cast<float>(i + 100);
162+
}
163+
// Create a tensor to initialize a GPU buffer.
164+
Tensor tensor = createTensor(ctx, Shape{numElements}, kf32, inputData.data());
165+
// Extract the raw GPU buffer from the tensor.
166+
WGPUBuffer buffer = tensor.data.buffer;
167+
168+
// Allocate a destination CPU buffer exactly as large as needed.
169+
std::vector<float> cpuOutput(copyCount, -2.0f);
170+
size_t sourceOffsetBytes = sourceOffsetElements * sizeof(float);
171+
172+
// Call the buffer overload with sourceOffset and destOffset = 0.
173+
auto future =
174+
toCPUAsync(ctx, buffer, cpuOutput.data(), copySize, sourceOffsetBytes);
175+
wait(ctx, future);
176+
177+
// Verify that the copied data matches the expected subset.
178+
for (size_t i = 0; i < copyCount; ++i) {
179+
float expected = inputData[sourceOffsetElements + i];
180+
float actual = cpuOutput[i];
181+
LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual);
182+
LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected);
183+
assert(expected == actual);
184+
}
185+
LOG(kDefLog, kInfo, "testToCPUWithBufferSourceOffset passed.");
182186
}
183187

184188
int main() {
185-
LOG(kDefLog, kInfo, "Running GPU integration tests...");
186-
testToCPUWithTensor();
187-
testToCPUWithBuffer();
188-
testToCPUWithTensorSourceOffset();
189-
testToCPUWithBufferSourceOffset();
190-
LOG(kDefLog, kInfo, "All tests passed.");
191-
return 0;
192-
}
189+
LOG(kDefLog, kInfo, "Running GPU integration tests...");
190+
testToCPUWithTensor();
191+
testToCPUWithBuffer();
192+
testToCPUWithTensorSourceOffset();
193+
testToCPUWithBufferSourceOffset();
194+
LOG(kDefLog, kInfo, "All tests passed.");
195+
return 0;
196+
}

0 commit comments

Comments
 (0)