Setup
Get FlameGraph
% git clone https://github.com/brendangregg/FlameGraph fg
Run Docker image
% docker pull debian % docker run -d -v {mnt_path}:{mnt_path} --cap-add PERFMON debian sleep infinity
Setup container
# apt update && apt install -y g++ linux-perf # echo -1 > /proc/sys/kernel/perf_event_paranoid
Initial version
#include <iostream> #include <random> #include <vector> uint32_t calcAverage(const std::vector<uint32_t>& data) { if (data.empty()) return 0; uint64_t sum = std::accumulate(data.begin(), data.end(), uint64_t(0)); return static_cast<uint32_t>(sum / data.size()); } std::vector<uint32_t> createRndData(uint64_t num) { std::vector<uint32_t> data(num); std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<uint32_t> dist(0, RAND_MAX); for (uint64_t i = 0; i < num; ++i) { data[i] = dist(gen); } return data; } int main() { uint64_t num = 100'000'000; std::cout << "Average: " << calcAverage(createRndData(num)) << std::endl; return 0; }
# g++ main.cpp -g -o calc.out # perf record --freq 10000 --call-graph fp calc.out # perf script | ~/fg/stackcollapse-perf.pl | ~/fg/flamegraph.pl > calc.fg.svg
The function createRndData takes 90% of the calculation time. Let’s try to reduce it.
Updated version
#include <iostream> #include <random> #include <vector> uint32_t calcAverage(const std::vector<uint32_t>& data) { if (data.empty()) return 0; uint64_t sum = std::accumulate(data.begin(), data.end(), uint64_t(0)); return static_cast<uint32_t>(sum / data.size()); } std::vector<uint32_t> createRndData(uint64_t num) { std::vector<uint32_t> data(num); for (uint64_t i = 0; i < num; ++i) { data[i] = static_cast<uint32_t>(rand()); } return data; } int main() { uint64_t num = 100'000'000; std::cout << "Average: " << calcAverage(createRndData(num)) << std::endl; return 0; }
# g++ main.cpp -g -o calc.out # perf record --freq 10000 --call-graph fp calc.out # perf script | ~/fg/stackcollapse-perf.pl | ~/fg/flamegraph.pl > calc.fg.svg
Now, the function createRndData takes only 72% of the calculation time. Yeah.
Disclaimer: It’s build without any optimization!