Skip to content

Commit fec86b8

Browse files
committed
fix: set default n_threads to physical cores number
1 parent 3265464 commit fec86b8

File tree

1 file changed

+52
-2
lines changed

1 file changed

+52
-2
lines changed

main.cpp

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,63 @@
44
#include <random>
55
#include <string>
66
#include <thread>
7+
#include <unordered_set>
78

89
#include "stable-diffusion.h"
910

1011
#define STB_IMAGE_WRITE_IMPLEMENTATION
1112
#define STB_IMAGE_WRITE_STATIC
1213
#include "stb_image_write.h"
1314

15+
#if defined(__APPLE__) && defined(__MACH__)
16+
#include <sys/types.h>
17+
#include <sys/sysctl.h>
18+
#endif
19+
20+
#if !defined(_WIN32)
21+
#include <sys/ioctl.h>
22+
#include <unistd.h>
23+
#endif
24+
25+
// get_num_physical_cores is copy from
26+
// https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp
27+
// LICENSE: https://github.com/ggerganov/llama.cpp/blob/master/LICENSE
28+
int32_t get_num_physical_cores() {
29+
#ifdef __linux__
30+
// enumerate the set of thread siblings, num entries is num cores
31+
std::unordered_set<std::string> siblings;
32+
for (uint32_t cpu=0; cpu < UINT32_MAX; ++cpu) {
33+
std::ifstream thread_siblings("/sys/devices/system/cpu"
34+
+ std::to_string(cpu) + "/topology/thread_siblings");
35+
if (!thread_siblings.is_open()) {
36+
break; // no more cpus
37+
}
38+
std::string line;
39+
if (std::getline(thread_siblings, line)) {
40+
siblings.insert(line);
41+
}
42+
}
43+
if (siblings.size() > 0) {
44+
return static_cast<int32_t>(siblings.size());
45+
}
46+
#elif defined(__APPLE__) && defined(__MACH__)
47+
int32_t num_physical_cores;
48+
size_t len = sizeof(num_physical_cores);
49+
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
50+
if (result == 0) {
51+
return num_physical_cores;
52+
}
53+
result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
54+
if (result == 0) {
55+
return num_physical_cores;
56+
}
57+
#elif defined(_WIN32)
58+
//TODO: Implement
59+
#endif
60+
unsigned int n_threads = std::thread::hardware_concurrency();
61+
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
62+
}
63+
1464
struct Option {
1565
int n_threads = -1;
1666
std::string model_path;
@@ -47,7 +97,7 @@ void print_usage(int argc, const char* argv[]) {
4797
printf("arguments:\n");
4898
printf(" -h, --help show this help message and exit\n");
4999
printf(" -t, --threads N number of threads to use during computation (default: -1).\n");
50-
printf(" If threads <= 0, then threads will be set to the number of CPU cores\n");
100+
printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n");
51101
printf(" -m, --model [MODEL] path to model\n");
52102
printf(" -o, --output OUTPUT path to write result image to (default: .\\output.png)\n");
53103
printf(" -p, --prompt [PROMPT] the prompt to render\n");
@@ -145,7 +195,7 @@ void parse_args(int argc, const char* argv[], Option* opt) {
145195
}
146196

147197
if (opt->n_threads <= 0) {
148-
opt->n_threads = std::thread::hardware_concurrency();
198+
opt->n_threads = get_num_physical_cores();
149199
}
150200

151201
if (opt->prompt.length() == 0) {

0 commit comments

Comments
 (0)