options support; correctness across dimensions; more testing

liaopeiyuan · liaopeiyuan · commit 1111319de36d · 2020-05-21T07:49:06.000-04:00
diff --git a/csrc/cpu/radius_cpu.cpp b/csrc/cpu/radius_cpu.cpp
@@ -1,27 +1,12 @@
 #include "radius_cpu.h"
-
+#include <algorithm>
 #include "utils.h"
 
-torch::Tensor radius_cpu(torch::Tensor q, torch::Tensor s, 
-             torch::Tensor ptr_x, torch::Tensor ptr_y, 
+torch::Tensor radius_cpu(torch::Tensor query, torch::Tensor support, 
 			 float radius, int max_num){
 
-	CHECK_CPU(q);
-	CHECK_CPU(s);
-
-	/*
-	x = torch.cat([x, 2 * r * batch_x.view(-1, 1).to(x.dtype)], dim=-1)
-    y = torch.cat([y, 2 * r * batch_y.view(-1, 1).to(y.dtype)], dim=-1)
-	*/
-
-	auto batch_x = ptr_x.clone().reshape({-1, 1});
-	auto batch_y = ptr_y.clone().reshape({-1, 1});
-
-	batch_x.mul_(2*radius);
-	batch_y.mul_(2*radius);
-
-	auto query = torch::cat({q,batch_x},-1);
-	auto support = torch::cat({s,batch_y},-1);
+	CHECK_CPU(query);
+	CHECK_CPU(support);
 
 	torch::Tensor out;
 	std::vector<long> neighbors_indices;
@@ -58,6 +43,7 @@ torch::Tensor radius_cpu(torch::Tensor q, torch::Tensor s,
 	return result;
 }
 
+
 void get_size_batch(const vector<long>& batch, vector<long>& res){
 
 	res.resize(batch[batch.size()-1]-batch[0]+1, 0);
@@ -74,4 +60,54 @@ void get_size_batch(const vector<long>& batch, vector<long>& res){
 		}
 	}
 	res[ind-batch[0]] = incr;
+}
+
+torch::Tensor batch_radius_cpu(torch::Tensor query,
+			       torch::Tensor support,
+			       torch::Tensor query_batch,
+			       torch::Tensor support_batch,
+			       float radius, int max_num) {
+
+	torch::Tensor out;
+	auto data_qb = query_batch.data_ptr<long>();
+	auto data_sb = support_batch.data_ptr<long>();
+	std::vector<long> query_batch_stl = std::vector<long>(data_qb, data_qb+query_batch.size(0));
+	std::vector<long> size_query_batch_stl;
+	get_size_batch(query_batch_stl, size_query_batch_stl);
+	std::vector<long> support_batch_stl = std::vector<long>(data_sb, data_sb+support_batch.size(0));
+	std::vector<long> size_support_batch_stl;
+	get_size_batch(support_batch_stl, size_support_batch_stl);
+	std::vector<long> neighbors_indices;
+	auto options = torch::TensorOptions().dtype(torch::kLong).device(torch::kCPU);
+	int max_count = 0;
+
+	
+	AT_DISPATCH_ALL_TYPES(query.scalar_type(), "batch_radius_search", [&] {
+	auto data_q = query.data_ptr<scalar_t>();
+	auto data_s = support.data_ptr<scalar_t>();
+	std::vector<scalar_t> queries_stl = std::vector<scalar_t>(data_q,
+								  data_q + query.size(0)*query.size(1));
+	std::vector<scalar_t> supports_stl = std::vector<scalar_t>(data_s,
+								   data_s + support.size(0)*support.size(1));
+
+	int dim = torch::size(query, 1);
+	max_count = batch_nanoflann_neighbors<scalar_t>(queries_stl,
+							    supports_stl,
+							    size_query_batch_stl,
+							    size_support_batch_stl,
+							    neighbors_indices,
+							    radius,
+								dim,
+							    max_num
+							    );
+	});
+
+	long* neighbors_indices_ptr = neighbors_indices.data();
+
+
+	const long long tsize = static_cast<long long>(neighbors_indices.size()/2);
+	out = torch::from_blob(neighbors_indices_ptr, {tsize, 2}, options=options);
+	out = out.t();
+
+	return out.clone();
 }
diff --git a/csrc/cpu/radius_cpu.h b/csrc/cpu/radius_cpu.h
@@ -7,5 +7,10 @@
 #include "compat.h"
 
 torch::Tensor radius_cpu(torch::Tensor query, torch::Tensor support,
-                         torch::Tensor ptr_x, torch::Tensor ptr_y, 
-			 			 float radius, int max_num);
+			 			 float radius, int max_num);
+
+torch::Tensor batch_radius_cpu(torch::Tensor query,
+			       torch::Tensor support,
+			       torch::Tensor query_batch,
+			       torch::Tensor support_batch,
+			       float radius, int max_num);
diff --git a/csrc/cpu/utils/cloud.h b/csrc/cpu/utils/cloud.h
@@ -24,18 +24,13 @@ struct PointCloud
 
 	void set(std::vector<scalar_t> new_pts, int dim){
 
-		// pts = std::vector<Point>((Point*)new_pts, (Point*)new_pts+new_pts.size()/3);
 		std::vector<std::vector<scalar_t>> temp(new_pts.size()/dim);
 		for(size_t i=0; i < new_pts.size(); i++){
 			if(i%dim == 0){
-
-				//Point point;
 				std::vector<scalar_t> point(dim);
-				//std::vector<scalar_t> vect(sizeof(scalar_t)*dim, 0)
-				//point.pt = temp;
+
 				for (size_t j = 0; j < (size_t)dim; j++) {
 					point[j]=new_pts[i+j];
-					//point.pt[j] = new_pts[i+j];
 				}
 				temp[i/dim] = point;
 			}
@@ -46,7 +41,6 @@ struct PointCloud
 	void set_batch(std::vector<scalar_t> new_pts, int begin, int size, int dim){
 		std::vector<std::vector<scalar_t>> temp(size);
 		for(int i=0; i < size; i++){
-			//std::vector<scalar_t> temp(sizeof(scalar_t)*dim, 0);
 			std::vector<scalar_t> point(dim);
 			for (size_t j = 0; j < (size_t)dim; j++) {
 					point[j] = new_pts[dim*(begin+i)+j];
diff --git a/csrc/cpu/utils/neighbors.cpp b/csrc/cpu/utils/neighbors.cpp
@@ -40,10 +40,10 @@ int nanoflann_neighbors(vector<scalar_t>& queries, vector<scalar_t>& supports,
 
 	// Search params
 	nanoflann::SearchParams search_params;
-	search_params.sorted = true;
+	// search_params.sorted = true;
 	std::vector< std::vector<std::pair<size_t, scalar_t> > > list_matches(pcd_query.pts.size());
 
-	float eps = 0.00001;
+	float eps = 0.000001;
 
 	// indices
 	size_t i0 = 0;
@@ -61,6 +61,12 @@ int nanoflann_neighbors(vector<scalar_t>& queries, vector<scalar_t>& supports,
 		std::vector<std::pair<size_t, scalar_t> > ret_matches;
 
 		const size_t nMatches = index->radiusSearch(query_pt, search_radius+eps, ret_matches, search_params);
+		
+		//cout << "radiusSearch(): radius=" << search_radius << " -> " << nMatches << " matches\n";
+		//for (size_t i = 0; i < nMatches; i++)
+		//	cout << "idx["<< i << "]=" << ret_matches[i].first << " dist["<< i << "]=" << ret_matches[i].second << endl;
+		//cout << "\n";
+		
 		list_matches[i0] = ret_matches;
 		if(max_count < nMatches) max_count = nMatches;
 		i0++;
@@ -107,4 +113,139 @@ int nanoflann_neighbors(vector<scalar_t>& queries, vector<scalar_t>& supports,
 
 
 
-}
+}
+
+template<typename scalar_t>
+int batch_nanoflann_neighbors (vector<scalar_t>& queries,
+                               vector<scalar_t>& supports,
+                               vector<long>& q_batches,
+                               vector<long>& s_batches,
+                               vector<long>& neighbors_indices,
+                               float radius, int dim, int max_num){
+
+
+// Initiate variables
+// ******************
+// indices
+	int i0 = 0;
+
+// Square radius
+	const scalar_t r2 = static_cast<scalar_t>(radius*radius);
+
+	// Counting vector
+	int max_count = 0;
+	float d2;
+
+
+	// batch index
+	long b = 0;
+	long sum_qb = 0;
+	long sum_sb = 0;
+
+	float eps = 0.000001;
+	// Nanoflann related variables
+	// ***************************
+
+	// CLoud variable
+	PointCloud<scalar_t> current_cloud;
+	PointCloud<scalar_t> query_pcd;
+	query_pcd.set(queries, dim);
+	vector<vector<pair<size_t, scalar_t> > > all_inds_dists(query_pcd.pts.size());
+
+	// Tree parameters
+	nanoflann::KDTreeSingleIndexAdaptorParams tree_params(10 /* max leaf */);
+
+	// KDTree type definition
+	typedef nanoflann::KDTreeSingleIndexAdaptor< nanoflann::L2_Adaptor<scalar_t, PointCloud<scalar_t> > , PointCloud<scalar_t>> my_kd_tree_t;
+
+// Pointer to trees
+	my_kd_tree_t* index;
+    // Build KDTree for the first batch element
+	current_cloud.set_batch(supports, sum_sb, s_batches[b], dim);
+	index = new my_kd_tree_t(dim, current_cloud, tree_params);
+	index->buildIndex();
+// Search neigbors indices
+// ***********************
+// Search params
+	nanoflann::SearchParams search_params;
+	search_params.sorted = true;
+
+	for (auto& p0 : query_pcd.pts){
+// Check if we changed batch
+
+		scalar_t query_pt[dim];
+		std::copy(p0.begin(), p0.end(), query_pt); 
+
+		/*
+		std::cout << "\n ========== \n";
+		for(int i=0; i < dim; i++)
+			std::cout << query_pt[i] << '\n';
+		std::cout << "\n ========== \n";
+		*/
+	
+		if (i0 == sum_qb + q_batches[b]){
+			sum_qb += q_batches[b];
+			sum_sb += s_batches[b];
+			b++;
+
+// Change the points
+			current_cloud.pts.clear();
+			current_cloud.set_batch(supports, sum_sb, s_batches[b], dim);
+// Build KDTree of the current element of the batch
+			delete index;
+			index = new my_kd_tree_t(dim, current_cloud, tree_params);
+			index->buildIndex();
+		}
+// Initial guess of neighbors size
+		all_inds_dists[i0].reserve(max_count);
+// Find neighbors
+		size_t nMatches = index->radiusSearch(query_pt, r2+eps, all_inds_dists[i0], search_params);
+// Update max count
+
+		std::vector<std::pair<size_t, float> > indices_dists;
+		nanoflann::RadiusResultSet<float,size_t> resultSet(r2, indices_dists);
+
+		index->findNeighbors(resultSet, query_pt, search_params);
+
+		if (nMatches > max_count)
+			max_count = nMatches;
+// Increment query idx
+		i0++;
+	}
+	// how many neighbors do we keep
+	if(max_num > 0) {
+		max_count = max_num;
+	}
+// Reserve the memory
+	
+	int size = 0; // total number of edges
+	for (auto& inds_dists : all_inds_dists){
+		if(inds_dists.size() <= max_count)
+			size += inds_dists.size();
+		else
+			size += max_count;
+	}
+	neighbors_indices.resize(size * 2);
+	i0 = 0;
+	sum_sb = 0;
+	sum_qb = 0;
+	b = 0;
+	int u = 0;
+	for (auto& inds_dists : all_inds_dists){
+		if (i0 == sum_qb + q_batches[b]){
+			sum_qb += q_batches[b];
+			sum_sb += s_batches[b];
+			b++;
+		}
+		for (int j = 0; j < max_count; j++){
+			if (j < inds_dists.size()){
+				neighbors_indices[u] = inds_dists[j].first + sum_sb;
+				neighbors_indices[u + 1] = i0;
+				u += 2;
+			}
+		}
+		i0++;
+	}
+	
+	return max_count;
+}
diff --git a/csrc/cpu/utils/neighbors.h b/csrc/cpu/utils/neighbors.h
@@ -10,4 +10,12 @@ using namespace std;
 
 template<typename scalar_t>
 int nanoflann_neighbors(vector<scalar_t>& queries, vector<scalar_t>& supports,
-			vector<long>& neighbors_indices, float radius, int dim, int max_num, int mode);
+			vector<long>& neighbors_indices, float radius, int dim, int max_num);
+
+template<typename scalar_t>
+int batch_nanoflann_neighbors (vector<scalar_t>& queries,
+                               vector<scalar_t>& supports,
+                               vector<long>& q_batches,
+                               vector<long>& s_batches,
+                               vector<long>& neighbors_indices,
+                               float radius, int dim, int max_num);
diff --git a/csrc/radius.cpp b/csrc/radius.cpp
@@ -10,16 +10,48 @@
 PyMODINIT_FUNC PyInit__radius(void) { return NULL; }
 #endif
 
-torch::Tensor radius(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x,
-                     torch::Tensor ptr_y, double r, int64_t max_num_neighbors) {
+torch::Tensor radius(torch::Tensor x, torch::Tensor y, torch::optional<torch::Tensor> ptr_x,
+                     torch::optional<torch::Tensor> ptr_y, double r, int64_t max_num_neighbors) {
   if (x.device().is_cuda()) {
 #ifdef WITH_CUDA
-    return radius_cuda(x, y, ptr_x, ptr_y, r, max_num_neighbors);
+    if (!(ptr_x.has_value()) && !(ptr_y.has_value())) {
+      auto batch_x = torch::tensor({0,torch::size(x,0)}).to(torch::kLong).to(torch::kCUDA);
+      auto batch_y = torch::tensor({0,torch::size(y,0)}).to(torch::kLong).to(torch::kCUDA);
+      return radius_cuda(x, y, batch_x, batch_y, r, max_num_neighbors);
+    }
+    else if (!(ptr_x.has_value())) {
+      auto batch_x = torch::tensor({0,torch::size(x,0)}).to(torch::kLong).to(torch::kCUDA);
+      auto batch_y = ptr_y.value();
+      return radius_cuda(x, y, batch_x, batch_y, r, max_num_neighbors);
+    }
+    else if (!(ptr_y.has_value())) {
+      auto batch_x = ptr_x.value();
+      auto batch_y = torch::tensor({0,torch::size(y,0)}).to(torch::kLong).to(torch::kCUDA);
+      return radius_cuda(x, y, batch_x, batch_y, r, max_num_neighbors);
+    }
+    auto batch_x = ptr_x.value();
+    auto batch_y = ptr_y.value();
+    return radius_cuda(x, y, batch_x, batch_y, r, max_num_neighbors);
 #else
     AT_ERROR("Not compiled with CUDA support");
 #endif
   } else {
-    return radius_cpu(x, y, ptr_x, ptr_y, r, max_num_neighbors);
+    if (!(ptr_x.has_value()) && !(ptr_y.has_value())) {
+      return radius_cpu(x,y,r,max_num_neighbors);
+    }
+    if (!(ptr_x.has_value())) {
+      auto batch_x = torch::zeros({torch::size(x,0)}).to(torch::kLong);
+      auto batch_y = ptr_y.value();
+      return batch_radius_cpu(x, y, batch_x, batch_y, r, max_num_neighbors);
+    }
+    else if (!(ptr_y.has_value())) {
+      auto batch_x = ptr_x.value();
+      auto batch_y = torch::zeros({torch::size(y,0)}).to(torch::kLong);
+      return batch_radius_cpu(x, y, batch_x, batch_y, r, max_num_neighbors);
+    }
+    auto batch_x = ptr_x.value();
+    auto batch_y = ptr_y.value();
+    return batch_radius_cpu(x, y, batch_x, batch_y, r, max_num_neighbors);
   }
 }
 
diff --git a/test/test_radius.py b/test/test_radius.py
diff --git a/torch_cluster/radius.py b/torch_cluster/radius.py