Showcase
Code ▼
#include <chrono> #include <functional> #include <iostream> // Function pointer void func_1(void (*f)()) { f(); } // std::function void func_2(const std::function<void()>& f) { f(); } // Function template void func_3(std::regular_invocable auto&& f) { f(); } // Before C++20 it had to be written like this: // template <typename T> concept Callable = requires(T&& t) { { t() }; }; // template <Callable F> void func_3(F&& f) { f(); } int main() { using namespace std::chrono; { auto start = high_resolution_clock::now(); void (*f)() = [](){}; for (auto i = 0; i < 10000000; ++i) { func_1(f); } auto stop = high_resolution_clock::now(); auto ms = duration_cast<milliseconds>(stop - start); std::cout << "Function pointer: Time=" << ms.count() << "ms" << std::endl; } { auto start = high_resolution_clock::now(); std::function<void()> f = [](){}; for (auto i = 0; i < 10000000; ++i) { func_2(f); } auto stop = high_resolution_clock::now(); auto ms = duration_cast<milliseconds>(stop - start); std::cout << "std::function: Time=" << ms.count() << "ms" << std::endl; } { auto start = high_resolution_clock::now(); auto f = [](){}; for (auto i = 0; i < 10000000; ++i) { func_3(f); } auto stop = high_resolution_clock::now(); auto ms = duration_cast<milliseconds>(stop - start); std::cout << "Function template: Time=" << ms.count() << "ms" << std::endl; } return 0; }
Compiler ▼
– Tool: godbolt.org
– Compiler: x86-64 Clang 16.0.0
– Args: -std=c++20
– Output: Intel asm syntax and Demangle identifiers
Result ▼
Function pointer: Time=30ms std::function: Time=102ms Function template: Time=17ms
Analysis
Function Pointer ▼
mov dword ptr [rbp - 36], 0 .LBB4_1: cmp dword ptr [rbp - 36], 10000000 jge .LBB4_4 mov rdi, qword ptr [rbp - 24] call func_1(void (*)()) mov eax, dword ptr [rbp - 36] add eax, 1 mov dword ptr [rbp - 36], eax jmp .LBB4_1 func_1(void (*)()): push rbp mov rbp, rsp sub rsp, 16 mov qword ptr [rbp - 8], rdi call qword ptr [rbp - 8] add rsp, 16 pop rbp ret
When calling f() we have to go throw one indirection:
–> “call qword ptr [rbp – 8]” (Dereferencing the function pointer.)
std::function ▼
mov dword ptr [rbp - 116], 0 .LBB4_5: cmp dword ptr [rbp - 116], 10000000 jge .LBB4_10 lea rdi, [rbp - 104] call func_2(std::function<void ()> const&) jmp .LBB4_7 .LBB4_7: jmp .LBB4_8 .LBB4_8: mov eax, dword ptr [rbp - 116] add eax, 1 mov dword ptr [rbp - 116], eax jmp .LBB4_5 func_2(std::function<void ()> const&): push rbp mov rbp, rsp sub rsp, 16 mov qword ptr [rbp - 8], rdi mov rdi, qword ptr [rbp - 8] call std::function<void ()>::operator()() const add rsp, 16 pop rbp ret
When calling f() we have to go throw the std::function call logic:
–> “call std::function
Function template ▼
mov dword ptr [rbp - 180], 0 .LBB4_18: cmp dword ptr [rbp - 180], 10000000 jge .LBB4_21 lea rdi, [rbp - 176] call void func_3<main::$_1&>(main::$_1&) mov eax, dword ptr [rbp - 180] add eax, 1 mov dword ptr [rbp - 180], eax jmp .LBB4_18 void func_3<main::$_1&>(main::$_1&): push rbp mov rbp, rsp sub rsp, 16 mov qword ptr [rbp - 8], rdi mov rdi, qword ptr [rbp - 8] call main::$_1::operator()() const add rsp, 16 pop rbp ret
When calling f() we have to go throw zero indirections:
–> “call main::$_1::operator()() const”
Disclaimer: This code has been compiled without optimization!