FlameGraph (Docker)

Setup

Get FlameGraph

% git clone https://github.com/brendangregg/FlameGraph fg

Run Docker image

% docker pull debian
% docker run -d -v {mnt_path}:{mnt_path} --cap-add PERFMON debian sleep infinity

Setup container

# apt update && apt install -y g++ linux-perf
# echo -1 > /proc/sys/kernel/perf_event_paranoid

Initial version

#include <iostream>
#include <random>
#include <vector>

uint32_t calcAverage(const std::vector<uint32_t>& data) {
    if (data.empty()) return 0;
    uint64_t sum = std::accumulate(data.begin(), data.end(), uint64_t(0));
    return static_cast<uint32_t>(sum / data.size());
}

std::vector<uint32_t> createRndData(uint64_t num) {
    std::vector<uint32_t> data(num);
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<uint32_t> dist(0, RAND_MAX);
    for (uint64_t i = 0; i < num; ++i) {
        data[i] = dist(gen);
    }
    return data;
}

int main() {
    uint64_t num = 100'000'000;
    std::cout << "Average: " << calcAverage(createRndData(num)) << std::endl;
    return 0;
}
# g++ main.cpp -g -o calc.out
# perf record --freq 10000 --call-graph fp calc.out
# perf script | ~/fg/stackcollapse-perf.pl | ~/fg/flamegraph.pl > calc.fg.svg

The function createRndData takes 90% of the calculation time. Let’s try to reduce it.


Updated version

#include <iostream>
#include <random>
#include <vector>

uint32_t calcAverage(const std::vector<uint32_t>& data) {
    if (data.empty()) return 0;
    uint64_t sum = std::accumulate(data.begin(), data.end(), uint64_t(0));
    return static_cast<uint32_t>(sum / data.size());
}

std::vector<uint32_t> createRndData(uint64_t num) {
    std::vector<uint32_t> data(num);
    for (uint64_t i = 0; i < num; ++i) {
        data[i] = static_cast<uint32_t>(rand());
    }
    return data;
}

int main() {
    uint64_t num = 100'000'000;
    std::cout << "Average: " << calcAverage(createRndData(num)) << std::endl;
    return 0;
}
# g++ main.cpp -g -o calc.out
# perf record --freq 10000 --call-graph fp calc.out
# perf script | ~/fg/stackcollapse-perf.pl | ~/fg/flamegraph.pl > calc.fg.svg

Now, the function createRndData takes only 72% of the calculation time. Yeah.


Disclaimer: It’s build without any optimization!