Skip to content

Commit 54bc338

Browse files
committed
test/profiler
1 parent 55e75d4 commit 54bc338

File tree

4 files changed

+175
-0
lines changed

4 files changed

+175
-0
lines changed

test/profiler/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# A Quick Introduction to C++ Performance Tuning
2+
(From: https://github.com/adah1972/cpp_summit_2020.git)
3+
4+
This repository contains the presentation file and example code for my
5+
presentation at the C++ Summit 2020 held in Shenzhen, China on 4–5 December
6+
2020.
7+
8+
The presentation content is shared under a [Creative Commons Attribution-Share
9+
Alike 2.5 Licence](http://creativecommons.org/licenses/by-sa/2.5/). The code
10+
is put in the public domain (i.e. do whatever you like with it), though an
11+
acknowledgement will be appreciated (but not required).

test/profiler/profiler.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#include "profiler.h"
2+
#include <cassert>
3+
#include <iostream>
4+
#include <vector>
5+
6+
namespace {
7+
8+
struct profiling_data {
9+
int number;
10+
int call_count{};
11+
uint64_t call_duration{};
12+
};
13+
14+
class profiler {
15+
public:
16+
profiler();
17+
~profiler();
18+
19+
void add_data(int number, uint64_t duration);
20+
21+
private:
22+
std::vector<profiling_data> data_;
23+
};
24+
25+
profiler::profiler()
26+
{
27+
size_t len = 0;
28+
for (;;) {
29+
if (name_map[len].name == NULL) {
30+
break;
31+
}
32+
++len;
33+
}
34+
data_.resize(len);
35+
int i = 0;
36+
for (auto& item : data_) {
37+
assert(i == name_map[i].number);
38+
item.number = i;
39+
++i;
40+
}
41+
}
42+
43+
profiler::~profiler()
44+
{
45+
#ifndef NDEBUG
46+
for (auto& item : data_) {
47+
if (item.call_count == 0) {
48+
continue;
49+
}
50+
std::cout << item.number << " " << name_map[item.number].name
51+
<< ":\n";
52+
std::cout << " Call count: " << item.call_count << '\n';
53+
std::cout << " Call duration: " << item.call_duration << '\n';
54+
std::cout << " Average duration: "
55+
<< item.call_duration * 1.0 /
56+
(item.call_count != 0 ? item.call_count : 1)
57+
<< '\n';
58+
}
59+
#endif
60+
}
61+
62+
void profiler::add_data(int number, uint64_t duration)
63+
{
64+
assert(number >= 0 && number < static_cast<int>(data_.size()));
65+
data_[number].call_count++;
66+
data_[number].call_duration += duration;
67+
}
68+
69+
profiler profiler_instance;
70+
71+
} // unnamed namespace
72+
73+
profiling_checker::~profiling_checker()
74+
{
75+
auto end_time = rdtsc();
76+
profiler_instance.add_data(number_, end_time - start_time_);
77+
}

test/profiler/profiler.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#ifndef PROFILER_H
2+
#define PROFILER_H
3+
4+
#include "rdtsc.h"
5+
6+
struct name_mapper {
7+
int number;
8+
const char* name;
9+
};
10+
11+
extern name_mapper name_map[];
12+
13+
class profiling_checker {
14+
public:
15+
profiling_checker(int number);
16+
~profiling_checker();
17+
18+
private:
19+
int number_;
20+
uint64_t start_time_;
21+
};
22+
23+
inline profiling_checker::profiling_checker(int number)
24+
: number_(number)
25+
{
26+
start_time_ = rdtsc();
27+
}
28+
29+
#ifdef NDEBUG
30+
#define PROFILE_CHECK(func_number) (void)0
31+
#else
32+
#define PROFILE_CHECK(func_number) profiling_checker _checker(func_number)
33+
#endif
34+
35+
#endif // PROFILER_H

test/profiler/rdtsc.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#ifndef RDTSC_H
2+
#define RDTSC_H
3+
4+
#include <stdint.h> // uint64_t
5+
6+
#if defined(_M_X64) || defined(_M_IX86) || defined(__x86_64) || defined(__i386)
7+
# ifdef _WIN32
8+
# include <intrin.h> // __rdtsc
9+
# else
10+
# include <x86intrin.h> // __rdtsc
11+
# endif
12+
# define HAS_HW_RDTSC 1
13+
#else
14+
# include <chrono> // std::chrono::high_resolution_clock
15+
# define HAS_HW_RDTSC 0
16+
#endif
17+
18+
inline uint64_t rdtsc()
19+
{
20+
#if HAS_HW_RDTSC
21+
// _mm_lfence() might be used to serialize the instruction stream,
22+
// and it would guarantee that RDTSC will not be reordered with
23+
// other instructions. However, measurements show that the overhead
24+
// may be too big (easily 15 to 30 CPU cycles) for profiling
25+
// purposes: if reordering matters, the overhead matters too!
26+
27+
// Forbid the compiler from reordering instructions
28+
# ifdef _MSC_VER
29+
_ReadWriteBarrier();
30+
# else
31+
__asm__ __volatile__("" : : : "memory");
32+
# endif
33+
34+
uint64_t result = __rdtsc();
35+
36+
// Forbid the compiler from reordering instructions
37+
# ifdef _MSC_VER
38+
_ReadWriteBarrier();
39+
# else
40+
__asm__ __volatile__("" : : : "memory");
41+
# endif
42+
43+
return result;
44+
#else
45+
auto now = std::chrono::high_resolution_clock::now();
46+
return std::chrono::duration_cast<std::chrono::nanoseconds>(
47+
now.time_since_epoch())
48+
.count();
49+
#endif
50+
}
51+
52+
#endif // RDTSC_H

0 commit comments

Comments
 (0)