Return value optimization (RVO)

It’s a part of the copy elision, which is a compiler optimization strategy defined in the C++ standard.


class T {
    public:
        T() : m_answer(42) {}
    private:
        int m_answer;
};

T createNamedT() {
    T t;
    return t;
}

T createUnnamedT() {
    return T();
}

int main() {
    T t2 = createNamedT();
    T t1 = createUnnamedT();
    return 0;
}

Scenario 1 (gcc 5.1 (x86-64), flags: ‘-fno-elide-constructors’) ▼
T::T() [base object constructor]:
        push    rbp
        mov     rbp, rsp
        mov     QWORD PTR [rbp-8], rdi
        mov     rax, QWORD PTR [rbp-8]
        mov     DWORD PTR [rax], 42
        nop
        pop     rbp
        ret
T::T(T const&) [base object constructor]:
        push    rbp
        mov     rbp, rsp
        mov     QWORD PTR [rbp-8], rdi
        mov     QWORD PTR [rbp-16], rsi
        mov     rax, QWORD PTR [rbp-8]
        mov     rdx, QWORD PTR [rbp-16]
        mov     edx, DWORD PTR [rdx]
        mov     DWORD PTR [rax], edx
        nop
        pop     rbp
        ret
createNamedT():
        push    rbp
        mov     rbp, rsp
        sub     rsp, 32
        lea     rax, [rbp-32]
        mov     rdi, rax
        call    T::T() [complete object constructor]
        lea     rdx, [rbp-32]
        lea     rax, [rbp-16]
        mov     rsi, rdx
        mov     rdi, rax
        call    T::T(T const&) [complete object constructor]
        mov     eax, DWORD PTR [rbp-16]
        leave
        ret
createUnnamedT():
        push    rbp
        mov     rbp, rsp
        sub     rsp, 32
        lea     rax, [rbp-16]
        mov     rdi, rax
        call    T::T() [complete object constructor]
        lea     rdx, [rbp-16]
        lea     rax, [rbp-32]
        mov     rsi, rdx
        mov     rdi, rax
        call    T::T(T const&) [complete object constructor]
        mov     eax, DWORD PTR [rbp-32]
        leave
        ret
main:
        push    rbp
        mov     rbp, rsp
        sub     rsp, 64
        call    createNamedT()
        mov     DWORD PTR [rbp-32], eax
        lea     rdx, [rbp-32]
        lea     rax, [rbp-48]
        mov     rsi, rdx
        mov     rdi, rax
        call    T::T(T const&) [complete object constructor]
        call    createUnnamedT()
        mov     DWORD PTR [rbp-16], eax
        lea     rdx, [rbp-16]
        lea     rax, [rbp-64]
        mov     rsi, rdx
        mov     rdi, rax
        call    T::T(T const&) [complete object constructor]
        mov     eax, 0
        leave
        ret

Summary

  • To call createNamedT()/createUnnamedT() 1 default c’tor and 2 copy c’tors are used.
Scenario 2 (gcc 12.2 (x86-64), flags: ‘-fno-elide-constructors’) ▼
T::T() [base object constructor]:
        push    rbp
        mov     rbp, rsp
        mov     QWORD PTR [rbp-8], rdi
        mov     rax, QWORD PTR [rbp-8]
        mov     DWORD PTR [rax], 42
        nop
        pop     rbp
        ret
T::T(T&&) [base object constructor]:
        push    rbp
        mov     rbp, rsp
        mov     QWORD PTR [rbp-8], rdi
        mov     QWORD PTR [rbp-16], rsi
        mov     rax, QWORD PTR [rbp-8]
        mov     rdx, QWORD PTR [rbp-16]
        mov     edx, DWORD PTR [rdx]
        mov     DWORD PTR [rax], edx
        nop
        pop     rbp
        ret
createNamedT():
        push    rbp
        mov     rbp, rsp
        sub     rsp, 16
        lea     rax, [rbp-8]
        mov     rdi, rax
        call    T::T() [complete object constructor]
        lea     rdx, [rbp-8]
        lea     rax, [rbp-4]
        mov     rsi, rdx
        mov     rdi, rax
        call    T::T(T&&) [complete object constructor]
        mov     eax, DWORD PTR [rbp-4]
        leave
        ret
createUnnamedT():
        push    rbp
        mov     rbp, rsp
        sub     rsp, 16
        lea     rax, [rbp-4]
        mov     rdi, rax
        call    T::T() [complete object constructor]
        mov     eax, DWORD PTR [rbp-4]
        leave
        ret
main:
        push    rbp
        mov     rbp, rsp
        sub     rsp, 16
        call    createNamedT()
        mov     DWORD PTR [rbp-4], eax
        call    createUnnamedT()
        mov     DWORD PTR [rbp-8], eax
        mov     eax, 0
        leave
        ret

Summary

  • To call createNamedT() 1 default c’tor and 1 move c’tor are used.
  • To call createNamedT() just 1 default c’tor is used.
Scenario 3 (gcc 5.1 (x86-64), flags: none) ▼
T::T() [base object constructor]:
        push    rbp
        mov     rbp, rsp
        mov     QWORD PTR [rbp-8], rdi
        mov     rax, QWORD PTR [rbp-8]
        mov     DWORD PTR [rax], 42
        nop
        pop     rbp
        ret
createNamedT():
        push    rbp
        mov     rbp, rsp
        sub     rsp, 16
        lea     rax, [rbp-16]
        mov     rdi, rax
        call    T::T() [complete object constructor]
        mov     eax, DWORD PTR [rbp-16]
        leave
        ret
createUnnamedT():
        push    rbp
        mov     rbp, rsp
        sub     rsp, 16
        lea     rax, [rbp-16]
        mov     rdi, rax
        call    T::T() [complete object constructor]
        mov     eax, DWORD PTR [rbp-16]
        leave
        ret
main:
        push    rbp
        mov     rbp, rsp
        sub     rsp, 32
        call    createNamedT()
        mov     DWORD PTR [rbp-16], eax
        call    createUnnamedT()
        mov     DWORD PTR [rbp-32], eax
        mov     eax, 0
        leave
        ret

Summary

  • To call createNamedT()/createUnnamedT() just 1 default c’tor is used.
Note

If you change CreateNamedT() like that (to be smart):

T createNamedT() {
    T t;
    return std:move(t);
}

The compiler (with flag ‘-Wall’) complains like that:

warning: moving a local object in a return statement prevents copy elision [-Wpessimizing-move]
      |     return std::move(t);
      |            ~~~~~~~~~^~~.

Used tool: https://godbolt.org/