rusty1s
diff --git a/‎csrc/cpu/radius_cpu.cpp‎
Lines changed: 3 additions & 3 deletions b/‎csrc/cpu/radius_cpu.cpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎csrc/cpu/radius_cpu.h‎
Lines changed: 2 additions & 2 deletions b/‎csrc/cpu/radius_cpu.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎csrc/cpu/utils/neighbors.cpp‎
Lines changed: 175 additions & 37 deletions b/‎csrc/cpu/utils/neighbors.cpp‎
Lines changed: 175 additions & 37 deletions
diff --git a/‎csrc/cpu/utils/neighbors.h‎
Lines changed: 0 additions & 22 deletions b/‎csrc/cpu/utils/neighbors.h‎
Lines changed: 0 additions & 22 deletions
diff --git a/‎csrc/radius.cpp‎
Lines changed: 3 additions & 2 deletions b/‎csrc/radius.cpp‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎test/radius_test_large.pkl‎
960 KB b/‎test/radius_test_large.pkl‎
960 KB
@@ -5,7 +5,7 @@
 
 
 torch::Tensor radius_cpu(torch::Tensor query, torch::Tensor support, 
-			 double radius, int64_t max_num){
+			 double radius, int64_t max_num, int64_t n_threads){
 
 	CHECK_CPU(query);
 	CHECK_CPU(support);
@@ -26,7 +26,7 @@ torch::Tensor radius_cpu(torch::Tensor query, torch::Tensor support,
 
 	int dim = torch::size(query, 1);
 
-	max_count = nanoflann_neighbors<scalar_t>(queries_stl, supports_stl ,neighbors_indices, radius, dim, max_num);
+	max_count = nanoflann_neighbors<scalar_t>(queries_stl, supports_stl ,neighbors_indices, radius, dim, max_num, n_threads);
 
 	});
 
@@ -40,7 +40,7 @@ torch::Tensor radius_cpu(torch::Tensor query, torch::Tensor support,
 }
 
 
-void get_size_batch(const vector<long>& batch, vector<long>& res){
+void get_size_batch(const std::vector<long>& batch, std::vector<long>& res){
 
 	res.resize(batch[batch.size()-1]-batch[0]+1, 0);
 	long ind = batch[0];
 
@@ -1,13 +1,13 @@
 #pragma once
 
 #include <torch/extension.h>
-#include "utils/neighbors.h"
+//#include "utils/neighbors.h"
 #include "utils/neighbors.cpp"
 #include <iostream>
 #include "compat.h"
 
 torch::Tensor radius_cpu(torch::Tensor query, torch::Tensor support,
-			 			 double radius, int64_t max_num);
+			 			 double radius, int64_t max_num, int64_t n_threads);
 
 torch::Tensor batch_radius_cpu(torch::Tensor query,
 			       torch::Tensor support,
 
@@ -1,29 +1,94 @@
+#include "cloud.h"
+#include "nanoflann.hpp"
+#include <set>
+#include <cstdint>
+#include <thread>
+
+typedef struct thread_struct {
+	void* kd_tree;
+	void* matches;
+	void* queries;
+	size_t* max_count;
+	std::mutex* ct_m;
+	std::mutex* tree_m;
+	size_t start;
+	size_t end;
+	double search_radius;
+	bool small;
+} thread_args;
 
-// 3D Version https://github.com/HuguesTHOMAS/KPConv
+template<typename scalar_t>
+void thread_routine(thread_args* targs) {
+	typedef nanoflann::KDTreeSingleIndexAdaptor< nanoflann::L2_Adaptor<scalar_t, PointCloud<scalar_t> > , PointCloud<scalar_t>> my_kd_tree_t;
+	typedef std::vector< std::vector<std::pair<size_t, scalar_t> > > kd_pair;
+	my_kd_tree_t* index = (my_kd_tree_t*) targs->kd_tree;
+	kd_pair* matches = (kd_pair*)targs->matches;
+	PointCloud<scalar_t>* pcd_query = (PointCloud<scalar_t>*)targs->queries;
+	size_t* max_count = targs->max_count;
+	std::mutex* ct_m = targs->ct_m;
+	std::mutex* tree_m = targs->tree_m;
+	double eps;
+	if (targs->small) {
+		eps = 0.000001;
+	}
+	else {
+		eps = 0;
+	}
+	double search_radius = (double) targs->search_radius;
+	size_t start = targs->start;
+	size_t end = targs->end;
+	
+	for (size_t i = start; i < end; i++) {
+
+		std::vector<scalar_t> p0 = *(((*pcd_query).pts)[i]);
+
+		scalar_t* query_pt = new scalar_t[p0.size()];
+		std::copy(p0.begin(), p0.end(), query_pt);
+		(*matches)[i].reserve(*max_count);
+		std::vector<std::pair<size_t, scalar_t> > ret_matches;
+
+		tree_m->lock();
+
+		const size_t nMatches = index->radiusSearch(query_pt, (scalar_t)(search_radius+eps), ret_matches, nanoflann::SearchParams());
+		
+		tree_m->unlock();
+
+		(*matches)[i] = ret_matches;
+		
+		ct_m->lock();
+		if(*max_count < nMatches) {
+			*max_count = nMatches;
+		}
+		ct_m->unlock();
+	
+	}
 
-#include "neighbors.h"
+}
 
 template<typename scalar_t>
-size_t nanoflann_neighbors(vector<scalar_t>& queries, vector<scalar_t>& supports,
-			vector<size_t>*& neighbors_indices, double radius, int dim, int64_t max_num){
+size_t nanoflann_neighbors(std::vector<scalar_t>& queries, std::vector<scalar_t>& supports,
+			std::vector<size_t>*& neighbors_indices, double radius, int dim, int64_t max_num, int64_t n_threads){
 
 	const scalar_t search_radius = static_cast<scalar_t>(radius*radius);
 
 	// Counting vector
-	size_t max_count = 1;
+	size_t* max_count = new size_t();
+	*max_count = 1;
 
+	size_t ssize = supports.size();
 	// CLoud variable
 	PointCloud<scalar_t> pcd;
 	pcd.set(supports, dim);
 	//Cloud query
-	PointCloud<scalar_t> pcd_query;
-	pcd_query.set(queries, dim);
+	PointCloud<scalar_t>* pcd_query = new PointCloud<scalar_t>();
+	(*pcd_query).set(queries, dim);
 
 	// Tree parameters
 	nanoflann::KDTreeSingleIndexAdaptorParams tree_params(15 /* max leaf */);
 
 	// KDTree type definition
 	typedef nanoflann::KDTreeSingleIndexAdaptor< nanoflann::L2_Adaptor<scalar_t, PointCloud<scalar_t> > , PointCloud<scalar_t>> my_kd_tree_t;
+	typedef std::vector< std::vector<std::pair<size_t, scalar_t> > > kd_pair;
 
 	// Pointer to trees
 	my_kd_tree_t* index;
@@ -35,47 +100,114 @@ size_t nanoflann_neighbors(vector<scalar_t>& queries, vector<scalar_t>& supports
 	// Search params
 	nanoflann::SearchParams search_params;
 	// search_params.sorted = true;
-	std::vector< std::vector<std::pair<size_t, scalar_t> > > list_matches(pcd_query.pts.size());
+	kd_pair* list_matches = new kd_pair((*pcd_query).pts.size());
+
+	// single threaded routine
+	if (n_threads == 1){
+		size_t i0 = 0;
+		double eps;
+		if (ssize < 10) {
+			eps = 0.000001;
+		}
+		else {
+			eps = 0;
+		}
 
-	double eps = 0.000001;
+		for (auto& p : (*pcd_query).pts){
+			auto p0 = *p;
+			// Find neighbors
+			scalar_t* query_pt = new scalar_t[dim];
+			std::copy(p0.begin(), p0.end(), query_pt); 
 
-	// indices
-	size_t i0 = 0;
+			(*list_matches)[i0].reserve(*max_count);
+			std::vector<std::pair<size_t, scalar_t> > ret_matches;
 
-	for (auto& p : pcd_query.pts){
-		auto p0 = *p;
-		// Find neighbors
-		scalar_t* query_pt = new scalar_t[dim];
-		std::copy(p0.begin(), p0.end(), query_pt); 
+			const size_t nMatches = index->radiusSearch(query_pt, (scalar_t)(search_radius+eps), ret_matches, search_params);
+			
+			(*list_matches)[i0] = ret_matches;
+			if(*max_count < nMatches) *max_count = nMatches;
+			i0++;
 
-		list_matches[i0].reserve(max_count);
-		std::vector<std::pair<size_t, scalar_t> > ret_matches;
+		}
+	}
+	else {// Multi-threaded routine
+		std::mutex* mtx = new std::mutex();
+		std::mutex* mtx_tree = new std::mutex();
 
-		const size_t nMatches = index->radiusSearch(query_pt, (scalar_t)(search_radius+eps), ret_matches, search_params);
-		
-		list_matches[i0] = ret_matches;
-		if(max_count < nMatches) max_count = nMatches;
-		i0++;
+		size_t n_queries = (*pcd_query).pts.size();
+		size_t actual_threads = std::min((long long)n_threads, (long long)n_queries);
+
+		std::thread* tid[actual_threads];
 
+		size_t start, end;
+		size_t length;
+		if (n_queries) {
+			length = 1;
+		}
+		else {
+			auto res = std::lldiv((long long)n_queries, (long long)n_threads);
+			length = (size_t)res.quot;
+			/*
+			if (res.rem == 0) {
+				length = res.quot;
+			}
+			else {
+				length = 
+			}
+			*/
+		}
+		for (size_t t = 0; t < actual_threads; t++) {
+			//sem->wait();
+			start = t*length;
+			if (t == actual_threads-1) {
+				end = n_queries;
+			}
+			else {
+				end = (t+1)*length;
+			}
+			thread_args* targs = new thread_args();
+			targs->kd_tree = index;
+			targs->matches = list_matches;
+			targs->max_count = max_count;
+			targs->ct_m = mtx;
+			targs->tree_m = mtx_tree;
+			targs->search_radius = search_radius;
+			targs->queries = pcd_query;
+			targs->start = start;
+			targs->end = end;
+			if (ssize < 10) {
+				targs->small = true;
+			}
+			else {
+				targs->small = false;
+			}
+			std::thread* temp = new std::thread(thread_routine<scalar_t>, targs);
+			tid[t] = temp;
+		}
+
+		for (size_t t = 0; t < actual_threads; t++){
+			tid[t]->join();
+		}
 	}
+
 	// Reserve the memory
 	if(max_num > 0) {
-		max_count = max_num;
+		*max_count = max_num;
 	}
 
 	size_t size = 0; // total number of edges
-	for (auto& inds : list_matches){
-		if(inds.size() <= max_count)
+	for (auto& inds : *list_matches){
+		if(inds.size() <= *max_count)
 			size += inds.size();
 		else
-			size += max_count;
+			size += *max_count;
 	}
 
 	neighbors_indices->resize(size*2);
 	size_t i1 = 0; // index of the query points
 	size_t u = 0; // curent index of the neighbors_indices
-	for (auto& inds : list_matches){
-		for (size_t j = 0; j < max_count; j++){
+	for (auto& inds : *list_matches){
+		for (size_t j = 0; j < *max_count; j++){
 			if(j < inds.size()){
 				(*neighbors_indices)[u] = inds[j].first;
 				(*neighbors_indices)[u + 1] = i1;
@@ -85,19 +217,19 @@ size_t nanoflann_neighbors(vector<scalar_t>& queries, vector<scalar_t>& supports
 		i1++;
 	}
 
-	return max_count;
+	return *max_count;
 
 
 
 
 }
 
 template<typename scalar_t>
-size_t batch_nanoflann_neighbors (vector<scalar_t>& queries,
-                               vector<scalar_t>& supports,
-                               vector<long>& q_batches,
-                               vector<long>& s_batches,
-                               vector<size_t>*& neighbors_indices,
+size_t batch_nanoflann_neighbors (std::vector<scalar_t>& queries,
+                               std::vector<scalar_t>& supports,
+                               std::vector<long>& q_batches,
+                               std::vector<long>& s_batches,
+                               std::vector<size_t>*& neighbors_indices,
                                double radius, int dim, int64_t max_num){
 
 
@@ -117,15 +249,21 @@ size_t batch_nanoflann_neighbors (vector<scalar_t>& queries,
 	size_t sum_qb = 0;
 	size_t sum_sb = 0;
 
-	double eps = 0.000001;
+	double eps;
+	if (supports.size() < 10){
+		eps = 0.000001;
+	}
+	else {
+		eps = 0;
+	}
 	// Nanoflann related variables
 	// ***************************
 
 	// CLoud variable
 	PointCloud<scalar_t> current_cloud;
 	PointCloud<scalar_t> query_pcd;
 	query_pcd.set(queries, dim);
-	vector<vector<pair<size_t, scalar_t> > > all_inds_dists(query_pcd.pts.size());
+	std::vector<std::vector<std::pair<size_t, scalar_t> > > all_inds_dists(query_pcd.pts.size());
 
 	// Tree parameters
 	nanoflann::KDTreeSingleIndexAdaptorParams tree_params(10 /* max leaf */);
 
@@ -1,5 +1,6 @@
 #include <Python.h>
 #include <torch/script.h>
+#include <iostream>
 
 #ifdef WITH_CUDA
 #include "cuda/radius_cuda.h"
@@ -11,7 +12,7 @@ PyMODINIT_FUNC PyInit__radius(void) { return NULL; }
 #endif
 
 torch::Tensor radius(torch::Tensor x, torch::Tensor y, torch::optional<torch::Tensor> ptr_x,
-                     torch::optional<torch::Tensor> ptr_y, double r, int64_t max_num_neighbors) {
+                     torch::optional<torch::Tensor> ptr_y, double r, int64_t max_num_neighbors, int64_t n_threads) {
   if (x.device().is_cuda()) {
 #ifdef WITH_CUDA
     if (!(ptr_x.has_value()) && !(ptr_y.has_value())) {
@@ -37,7 +38,7 @@ torch::Tensor radius(torch::Tensor x, torch::Tensor y, torch::optional<torch::Te
 #endif
   } else {
     if (!(ptr_x.has_value()) && !(ptr_y.has_value())) {
-      return radius_cpu(x,y,r,max_num_neighbors);
+      return radius_cpu(x,y,r,max_num_neighbors, n_threads);
     }
     if (!(ptr_x.has_value())) {
       auto batch_x = torch::zeros({torch::size(x,0)}).to(torch::kLong);