|
4 | 4 | #include <random> |
5 | 5 | #include <string> |
6 | 6 | #include <thread> |
| 7 | +#include <unordered_set> |
7 | 8 |
|
8 | 9 | #include "stable-diffusion.h" |
9 | 10 |
|
10 | 11 | #define STB_IMAGE_WRITE_IMPLEMENTATION |
11 | 12 | #define STB_IMAGE_WRITE_STATIC |
12 | 13 | #include "stb_image_write.h" |
13 | 14 |
|
| 15 | +#if defined(__APPLE__) && defined(__MACH__) |
| 16 | +#include <sys/types.h> |
| 17 | +#include <sys/sysctl.h> |
| 18 | +#endif |
| 19 | + |
| 20 | +#if !defined(_WIN32) |
| 21 | +#include <sys/ioctl.h> |
| 22 | +#include <unistd.h> |
| 23 | +#endif |
| 24 | + |
| 25 | +// get_num_physical_cores is copy from |
| 26 | +// https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp |
| 27 | +// LICENSE: https://github.com/ggerganov/llama.cpp/blob/master/LICENSE |
| 28 | +int32_t get_num_physical_cores() { |
| 29 | +#ifdef __linux__ |
| 30 | + // enumerate the set of thread siblings, num entries is num cores |
| 31 | + std::unordered_set<std::string> siblings; |
| 32 | + for (uint32_t cpu=0; cpu < UINT32_MAX; ++cpu) { |
| 33 | + std::ifstream thread_siblings("/sys/devices/system/cpu" |
| 34 | + + std::to_string(cpu) + "/topology/thread_siblings"); |
| 35 | + if (!thread_siblings.is_open()) { |
| 36 | + break; // no more cpus |
| 37 | + } |
| 38 | + std::string line; |
| 39 | + if (std::getline(thread_siblings, line)) { |
| 40 | + siblings.insert(line); |
| 41 | + } |
| 42 | + } |
| 43 | + if (siblings.size() > 0) { |
| 44 | + return static_cast<int32_t>(siblings.size()); |
| 45 | + } |
| 46 | +#elif defined(__APPLE__) && defined(__MACH__) |
| 47 | + int32_t num_physical_cores; |
| 48 | + size_t len = sizeof(num_physical_cores); |
| 49 | + int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); |
| 50 | + if (result == 0) { |
| 51 | + return num_physical_cores; |
| 52 | + } |
| 53 | + result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); |
| 54 | + if (result == 0) { |
| 55 | + return num_physical_cores; |
| 56 | + } |
| 57 | +#elif defined(_WIN32) |
| 58 | + //TODO: Implement |
| 59 | +#endif |
| 60 | + unsigned int n_threads = std::thread::hardware_concurrency(); |
| 61 | + return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4; |
| 62 | +} |
| 63 | + |
14 | 64 | struct Option { |
15 | 65 | int n_threads = -1; |
16 | 66 | std::string model_path; |
@@ -47,7 +97,7 @@ void print_usage(int argc, const char* argv[]) { |
47 | 97 | printf("arguments:\n"); |
48 | 98 | printf(" -h, --help show this help message and exit\n"); |
49 | 99 | printf(" -t, --threads N number of threads to use during computation (default: -1).\n"); |
50 | | - printf(" If threads <= 0, then threads will be set to the number of CPU cores\n"); |
| 100 | + printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n"); |
51 | 101 | printf(" -m, --model [MODEL] path to model\n"); |
52 | 102 | printf(" -o, --output OUTPUT path to write result image to (default: .\\output.png)\n"); |
53 | 103 | printf(" -p, --prompt [PROMPT] the prompt to render\n"); |
@@ -145,7 +195,7 @@ void parse_args(int argc, const char* argv[], Option* opt) { |
145 | 195 | } |
146 | 196 |
|
147 | 197 | if (opt->n_threads <= 0) { |
148 | | - opt->n_threads = std::thread::hardware_concurrency(); |
| 198 | + opt->n_threads = get_num_physical_cores(); |
149 | 199 | } |
150 | 200 |
|
151 | 201 | if (opt->prompt.length() == 0) { |
|
0 commit comments