File tree Expand file tree Collapse file tree 4 files changed +175
-0
lines changed
Expand file tree Collapse file tree 4 files changed +175
-0
lines changed Original file line number Diff line number Diff line change 1+ # A Quick Introduction to C++ Performance Tuning
2+ (From: https://github.com/adah1972/cpp_summit_2020.git )
3+
4+ This repository contains the presentation file and example code for my
5+ presentation at the C++ Summit 2020 held in Shenzhen, China on 4–5 December
6+ 2020 .
7+
8+ The presentation content is shared under a [ Creative Commons Attribution-Share
9+ Alike 2.5 Licence] ( http://creativecommons.org/licenses/by-sa/2.5/ ) . The code
10+ is put in the public domain (i.e. do whatever you like with it), though an
11+ acknowledgement will be appreciated (but not required).
Original file line number Diff line number Diff line change 1+ #include " profiler.h"
2+ #include < cassert>
3+ #include < iostream>
4+ #include < vector>
5+
6+ namespace {
7+
8+ struct profiling_data {
9+ int number;
10+ int call_count{};
11+ uint64_t call_duration{};
12+ };
13+
14+ class profiler {
15+ public:
16+ profiler ();
17+ ~profiler ();
18+
19+ void add_data (int number, uint64_t duration);
20+
21+ private:
22+ std::vector<profiling_data> data_;
23+ };
24+
25+ profiler::profiler ()
26+ {
27+ size_t len = 0 ;
28+ for (;;) {
29+ if (name_map[len].name == NULL ) {
30+ break ;
31+ }
32+ ++len;
33+ }
34+ data_.resize (len);
35+ int i = 0 ;
36+ for (auto & item : data_) {
37+ assert (i == name_map[i].number );
38+ item.number = i;
39+ ++i;
40+ }
41+ }
42+
43+ profiler::~profiler ()
44+ {
45+ #ifndef NDEBUG
46+ for (auto & item : data_) {
47+ if (item.call_count == 0 ) {
48+ continue ;
49+ }
50+ std::cout << item.number << " " << name_map[item.number ].name
51+ << " :\n " ;
52+ std::cout << " Call count: " << item.call_count << ' \n ' ;
53+ std::cout << " Call duration: " << item.call_duration << ' \n ' ;
54+ std::cout << " Average duration: "
55+ << item.call_duration * 1.0 /
56+ (item.call_count != 0 ? item.call_count : 1 )
57+ << ' \n ' ;
58+ }
59+ #endif
60+ }
61+
62+ void profiler::add_data (int number, uint64_t duration)
63+ {
64+ assert (number >= 0 && number < static_cast <int >(data_.size ()));
65+ data_[number].call_count ++;
66+ data_[number].call_duration += duration;
67+ }
68+
69+ profiler profiler_instance;
70+
71+ } // unnamed namespace
72+
73+ profiling_checker::~profiling_checker ()
74+ {
75+ auto end_time = rdtsc ();
76+ profiler_instance.add_data (number_, end_time - start_time_);
77+ }
Original file line number Diff line number Diff line change 1+ #ifndef PROFILER_H
2+ #define PROFILER_H
3+
4+ #include " rdtsc.h"
5+
6+ struct name_mapper {
7+ int number;
8+ const char * name;
9+ };
10+
11+ extern name_mapper name_map[];
12+
13+ class profiling_checker {
14+ public:
15+ profiling_checker (int number);
16+ ~profiling_checker ();
17+
18+ private:
19+ int number_;
20+ uint64_t start_time_;
21+ };
22+
23+ inline profiling_checker::profiling_checker (int number)
24+ : number_(number)
25+ {
26+ start_time_ = rdtsc ();
27+ }
28+
29+ #ifdef NDEBUG
30+ #define PROFILE_CHECK (func_number ) (void )0
31+ #else
32+ #define PROFILE_CHECK (func_number ) profiling_checker _checker (func_number)
33+ #endif
34+
35+ #endif // PROFILER_H
Original file line number Diff line number Diff line change 1+ #ifndef RDTSC_H
2+ #define RDTSC_H
3+
4+ #include < stdint.h> // uint64_t
5+
6+ #if defined(_M_X64) || defined(_M_IX86) || defined(__x86_64) || defined(__i386)
7+ # ifdef _WIN32
8+ # include < intrin.h> // __rdtsc
9+ # else
10+ # include < x86intrin.h> // __rdtsc
11+ # endif
12+ # define HAS_HW_RDTSC 1
13+ #else
14+ # include < chrono> // std::chrono::high_resolution_clock
15+ # define HAS_HW_RDTSC 0
16+ #endif
17+
18+ inline uint64_t rdtsc ()
19+ {
20+ #if HAS_HW_RDTSC
21+ // _mm_lfence() might be used to serialize the instruction stream,
22+ // and it would guarantee that RDTSC will not be reordered with
23+ // other instructions. However, measurements show that the overhead
24+ // may be too big (easily 15 to 30 CPU cycles) for profiling
25+ // purposes: if reordering matters, the overhead matters too!
26+
27+ // Forbid the compiler from reordering instructions
28+ # ifdef _MSC_VER
29+ _ReadWriteBarrier ();
30+ # else
31+ __asm__ __volatile__ (" " : : : " memory" );
32+ # endif
33+
34+ uint64_t result = __rdtsc ();
35+
36+ // Forbid the compiler from reordering instructions
37+ # ifdef _MSC_VER
38+ _ReadWriteBarrier ();
39+ # else
40+ __asm__ __volatile__ (" " : : : " memory" );
41+ # endif
42+
43+ return result;
44+ #else
45+ auto now = std::chrono::high_resolution_clock::now ();
46+ return std::chrono::duration_cast<std::chrono::nanoseconds>(
47+ now.time_since_epoch ())
48+ .count ();
49+ #endif
50+ }
51+
52+ #endif // RDTSC_H
You can’t perform that action at this time.
0 commit comments