Skip to content

Commit 28ccbc1

Browse files
authored
fixed offset bug in IndexToGrid - see PR2013 (#2021)
* fixed offset bug in IndexToGrid - see PR2013 Signed-off-by: Ken <ken.museth@gmail.com> * added pendingchanges Signed-off-by: Ken <ken.museth@gmail.com> * improved documentation Signed-off-by: Ken <ken.museth@gmail.com> * minor update to documentation Signed-off-by: Ken <ken.museth@gmail.com> * addressed review comments Signed-off-by: Ken <ken.museth@gmail.com> --------- Signed-off-by: Ken <ken.museth@gmail.com>
1 parent 09f728e commit 28ccbc1

File tree

3 files changed

+63
-41
lines changed

3 files changed

+63
-41
lines changed

nanovdb/nanovdb/tools/cuda/IndexToGrid.cuh

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -176,16 +176,25 @@ template<typename SrcBuildT, typename DstBuildT>
176176
__global__ void processRootTilesKernel(typename IndexToGrid<SrcBuildT>::NodeAccessor *nodeAcc,
177177
const typename BuildToValueMap<DstBuildT>::type *srcValues)
178178
{
179-
const auto tid = blockIdx.x;
179+
const auto tileID = blockIdx.x, tileCount = nodeAcc->nodeCount[3];// note: tileID != childID!
180+
NANOVDB_ASSERT(tileID < tileCount);
180181

181-
// Process children and tiles
182-
const auto &srcTile = *nodeAcc->srcRoot().tile(tid);
183-
auto &dstTile = *nodeAcc->template dstRoot<DstBuildT>().tile(tid);
182+
// Process child nodes and tiles of the root node
183+
const auto &srcTile = *nodeAcc->srcRoot().tile(tileID);
184+
auto &dstTile = *nodeAcc->template dstRoot<DstBuildT>().tile(tileID);
184185
dstTile.key = srcTile.key;
185186
if (srcTile.child) {
186-
dstTile.child = sizeof(NanoRoot<DstBuildT>) + sizeof(NanoRoot<DstBuildT>::Tile)*((srcTile.child - sizeof(NanoRoot<SrcBuildT>))/sizeof(NanoRoot<SrcBuildT>::Tile));
187-
dstTile.value = srcValues[0];// set to background
188-
dstTile.state = false;
187+
// |<--NanoRoot-->|<--Tile[0]...Tile[tileCount-1]-->|<--Child[0]...child[childID-1]-->|
188+
// |<-------------------- offset -------------------|
189+
// |<-------------------------------- Tile::child ------------------------------------|
190+
// |<------ Tile::child-offset ----->|
191+
// |<--- childID x sizeof(ChildT) -->|
192+
uint64_t offset = sizeof(NanoRoot<SrcBuildT>) + tileCount*sizeof(NanoRoot<SrcBuildT>::Tile);// source offset
193+
const uint64_t childID = (srcTile.child - offset)/sizeof(NanoRoot<SrcBuildT>::ChildNodeType);// derived from source offset
194+
offset = sizeof(NanoRoot<DstBuildT>) + tileCount*sizeof(NanoRoot<DstBuildT>::Tile);// destination offset
195+
dstTile.child = offset + childID*sizeof(NanoRoot<DstBuildT>::ChildNodeType);
196+
dstTile.value = srcValues[0];// set to background
197+
dstTile.state = false;
189198
} else {
190199
dstTile.child = 0;// i.e. no child node
191200
dstTile.value = srcValues[srcTile.value];

nanovdb/nanovdb/unittest/TestNanoVDB.cu

Lines changed: 45 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,41 +1062,52 @@ TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_ValueOnIndex)
10621062
idxHdl.deviceUpload();
10631063
auto *idxGrid = idxHdl.grid<BuildT>();
10641064
EXPECT_TRUE(idxGrid);
1065-
//timer.restart("Create value list on CPU");
1066-
float *values = new float[idxGrid->valueCount()], *d_values = nullptr;
1067-
values[0] = floatGrid->tree().root().background();
1068-
for (auto it = floatGrid->indexBBox().begin(); it; ++it) {
1069-
EXPECT_EQ(acc.isActive(*it), idxGrid->tree().isActive(*it));
1070-
if (acc.isActive(*it)) {
1071-
const uint64_t idx = idxGrid->tree().getValue(*it);
1072-
EXPECT_TRUE(idx < idxGrid->valueCount());
1073-
values[idx] = acc.getValue(*it);
1065+
1066+
auto checkConvertedValueGrid = [&](const auto& fromOriginalData, const auto& toOriginalData) {
1067+
using FloatType = decltype(fromOriginalData(0.0f));
1068+
1069+
//timer.restart("Create value list on CPU");
1070+
FloatType *values = new FloatType[idxGrid->valueCount()], *d_values = nullptr;
1071+
values[0] = fromOriginalData(floatGrid->tree().root().background());
1072+
for (auto it = floatGrid->indexBBox().begin(); it; ++it) {
1073+
EXPECT_EQ(acc.isActive(*it), idxGrid->tree().isActive(*it));
1074+
if (acc.isActive(*it)) {
1075+
const uint64_t idx = idxGrid->tree().getValue(*it);
1076+
EXPECT_TRUE(idx < idxGrid->valueCount());
1077+
values[idx] = fromOriginalData(acc.getValue(*it));
1078+
}
10741079
}
1075-
}
1076-
//timer.restart("Allocate and copy values from CPU to GPU");
1077-
cudaCheck(cudaMalloc((void**)&d_values, idxGrid->valueCount()*sizeof(float)));
1078-
cudaCheck(cudaMemcpy(d_values, values, idxGrid->valueCount()*sizeof(float), cudaMemcpyHostToDevice));
1079-
EXPECT_FALSE(idxHdl.deviceGrid<float>());
1080-
auto *d_idxGrid = idxHdl.deviceGrid<BuildT>();
1081-
EXPECT_TRUE(d_idxGrid);
1082-
//timer.restart("Call CudaIndexToGrid");
1083-
auto hdl = nanovdb::tools::cuda::indexToGrid<float>(d_idxGrid, d_values);
1084-
//timer.restart("unit-test");
1085-
EXPECT_FALSE(hdl.grid<float>());// no host grid
1086-
EXPECT_TRUE(hdl.deviceGrid<float>());
1087-
hdl.deviceDownload();
1088-
auto *floatGrid2 = hdl.grid<float>();
1089-
EXPECT_TRUE(floatGrid2);
1090-
auto acc2 = floatGrid2->getAccessor();
1091-
EXPECT_EQ(floatGrid->indexBBox(), floatGrid2->indexBBox());
1092-
EXPECT_EQ(floatGrid->worldBBox(), floatGrid2->worldBBox());
1093-
EXPECT_EQ(floatGrid->tree().root().background(), floatGrid2->tree().root().background());
1094-
for (auto it = floatGrid->indexBBox().begin(); it; ++it) {
1095-
EXPECT_EQ(acc.isActive(*it), acc2.isActive(*it));
1096-
if (acc.isActive(*it)) EXPECT_EQ(acc.getValue(*it), acc2.getValue(*it));
1097-
}
1098-
//timer.stop();
1099-
cudaFree(d_values);
1080+
//timer.restart("Allocate and copy values from CPU to GPU");
1081+
cudaCheck(cudaMalloc((void**)&d_values, idxGrid->valueCount()*sizeof(FloatType)));
1082+
cudaCheck(cudaMemcpy(d_values, values, idxGrid->valueCount()*sizeof(FloatType), cudaMemcpyHostToDevice));
1083+
EXPECT_FALSE(idxHdl.deviceGrid<FloatType>());
1084+
auto *d_idxGrid = idxHdl.deviceGrid<BuildT>();
1085+
EXPECT_TRUE(d_idxGrid);
1086+
//timer.restart("Call CudaIndexToGrid");
1087+
auto hdl = nanovdb::tools::cuda::indexToGrid<FloatType>(d_idxGrid, d_values);
1088+
//timer.restart("unit-test");
1089+
EXPECT_FALSE(hdl.template grid<FloatType>());// no host grid
1090+
EXPECT_TRUE(hdl.template deviceGrid<FloatType>());
1091+
hdl.deviceDownload();
1092+
auto *floatGrid2 = hdl.template grid<FloatType>();
1093+
EXPECT_TRUE(floatGrid2);
1094+
auto acc2 = floatGrid2->getAccessor();
1095+
EXPECT_EQ(floatGrid->indexBBox(), floatGrid2->indexBBox());
1096+
EXPECT_EQ(floatGrid->worldBBox(), floatGrid2->worldBBox());
1097+
EXPECT_EQ(floatGrid->tree().root().background(), toOriginalData(floatGrid2->tree().root().background()));
1098+
for (auto it = floatGrid->indexBBox().begin(); it; ++it) {
1099+
EXPECT_EQ(acc.isActive(*it), acc2.isActive(*it));
1100+
if (acc.isActive(*it)) EXPECT_EQ(acc.getValue(*it), toOriginalData(acc2.getValue(*it)));
1101+
}
1102+
//timer.stop();
1103+
cudaFree(d_values);
1104+
};// checkConvertedValueGrid lambda
1105+
checkConvertedValueGrid([](float x) { return x; }, [](float x) { return x; });
1106+
checkConvertedValueGrid([](double x) { return float(x); }, [](float x) {return double(x); });
1107+
1108+
// Convert index grid to grid of Vec3fs, whereas the original float data is just stored in all components of a Vec3f.
1109+
// This test covers code in indexToGrid that is only relevant if the size of grid data changes and does not align.
1110+
checkConvertedValueGrid([](float x) { return nanovdb::Vec3f(x); }, [](const nanovdb::Vec3f& x) { return x[0]; });
11001111
}// CudaPointToGrid_ValueOnIndex
11011112

11021113
TEST(TestNanoVDBCUDA, CudaSignedFloodFill)

pendingchanges/nanovdb.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,5 @@ Added functions to independently stop and compute the elapsed time for timer cla
66

77
Fixed ostream specializations being hidden within the nanovdb namespace
88
Replaced CUB's CachingDeviceAllocator with the default asynchronous stream ordered allocator in PointsToGrid for improved performance
9+
10+
Fixed offset bug in nanovdb::tools::cuda::IndexToGrid related to non-POD build types (contributed by Andreas Henne)

0 commit comments

Comments
 (0)