[Development] are clangs 3.5 new builtin_operator_(new/delete) also useable for Qt Container optimization?

Dennis Luehring dl.soluz at gmx.net
Thu Jul 17 10:27:39 CEST 2014


these new builtins allows clang to optimize operator new/delete 
operations far better then before

__builtin_operator_new
__builtin_operator_delete

patches in review for clang 3.5
clang: http://reviews.llvm.org/rL210137
libc++: http://reviews.llvm.org/rL210211

the libc++ patch is very small and maybe the Qt container library
can also benefit from an clang specific specialization

#example 1

#include <vector>
#include <numeric>
int main()
{
     const std::vector<int> a{1,2};
     const std::vector<int> b{4,5};
     const std::vector<int> ints
     {
       std::accumulate(a.begin(),a.end(),1),
       std::accumulate(b.begin(),b.end(),2),
     };
     return std::accumulate(ints.begin(),ints.end(),100);
}

clang 3.4.1

main:                                   # @main
    pushq %rbp
    pushq %r15
    pushq %r14
    pushq %rbx
    pushq %rax
    movl  $8, %edi
    callq operator new(unsigned long)
    movq  %rax, %r14
    movabsq $8589934593, %rax       # imm = 0x200000001
    movq  %rax, (%r14)
    movl  $8, %edi
    callq operator new(unsigned long)
    movq  %rax, %rbx
    movabsq $21474836484, %rax      # imm = 0x500000004
    movq  %rax, (%rbx)
    movl  (%r14), %r15d
    movl  4(%r14), %ebp
    movl  $8, %edi
    callq operator new(unsigned long)
    leal  1(%r15,%rbp), %ebp
    testq %rax, %rax
    movl  %ebp, (%rax)
    movl  $11, 4(%rax)
    je  .LBB0_5
    movq  %rax, %rdi
    callq operator delete(void*)
.LBB0_5:                                # %_ZNSt6vectorIiSaIiEED2Ev.exit25
    testq %rbx, %rbx
    je  .LBB0_7
    movq  %rbx, %rdi
    callq operator delete(void*)
.LBB0_7:                                # %_ZNSt6vectorIiSaIiEED2Ev.exit23
    addl  $111, %ebp
    testq %r14, %r14
    je  .LBB0_9
    movq  %r14, %rdi
    callq operator delete(void*)
.LBB0_9:                                # %_ZNSt6vectorIiSaIiEED2Ev.exit21
    movl  %ebp, %eax
    addq  $8, %rsp
    popq  %rbx
    popq  %r14
    popq  %r15
    popq  %rbp
    ret
    movq  %rax, %rbp
    movq  %rbp, %rdi
    callq _Unwind_Resume
    movq  %rax, %rbp
    jmp .LBB0_14
    movq  %rax, %rbp
    testq %rbx, %rbx
    je  .LBB0_14
    movq  %rbx, %rdi
    callq operator delete(void*)
.LBB0_14:                               # %_ZNSt6vectorIiSaIiEED2Ev.exit15
    testq %r14, %r14
    je  .LBB0_16
    movq  %r14, %rdi
    callq operator delete(void*)
.LBB0_16:                               # %_ZNSt6vectorIiSaIiEED2Ev.exit
    movq  %rbp, %rdi
    callq _Unwind_Resume
GCC_except_table0:
    .byte 255                     # @LPStart Encoding = omit
    .byte 3                       # @TType Encoding = udata4
    .asciz  "\266\200\200"          # @TType base offset
    .byte 3                       # Call site Encoding = udata4
    .byte 52                      # Call site table length
    .long .Lset0
    .long .Lset1
    .long .Lset2
    .byte 0                       #   On action: cleanup
    .long .Lset3
    .long .Lset4
    .long .Lset5
    .byte 0                       #   On action: cleanup
    .long .Lset6
    .long .Lset7
    .long .Lset8
    .byte 0                       #   On action: cleanup
    .long .Lset9
    .long .Lset10
    .long 0                       #     has no landing pad
    .byte 0                       #   On action: cleanup


Ralph Smith patched clang/libc++

main:                                   # @main
    movl  $115, %eax
    retq


#example 2

#include <string>
int main()
{
    return std::string("hello").size();
}

clang 3.4.1

main:                                   # @main
    pushq %rbx
    subq  $32, %rsp
    leaq  16(%rsp), %rdi
    leaq  8(%rsp), %rdx
    movl  $.L.str, %esi
    callq std::basic_string<char, std::char_traits<char>,
std::allocator<char> >::basic_string(char const*, std::allocator<char>
const&)
    movq  16(%rsp), %rax
    leaq  -24(%rax), %rdi
    movl  std::basic_string<char, std::char_traits<char>,
std::allocator<char> >::_Rep::_S_empty_rep_storage, %ecx
    cmpq  %rcx, %rdi
    movl  -24(%rax), %ebx
    jne .LBB0_1
.LBB0_6:                                # %_ZNSsD1Ev.exit
    movl  %ebx, %eax
    addq  $32, %rsp
    popq  %rbx
    ret
.LBB0_1:
    addq  $-8, %rax
    movl  $__pthread_key_create, %ecx
    testq %rcx, %rcx
    je  .LBB0_3
    movl  $-1, %ecx
    lock
    xaddl %ecx, (%rax)
    movl  %ecx, 28(%rsp)
    movl  28(%rsp), %ecx
    jmp .LBB0_4
.LBB0_3:
    movl  (%rax), %ecx
    leal  -1(%rcx), %edx
    movl  %edx, (%rax)
.LBB0_4:                                #
%_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i.i
    testl %ecx, %ecx
    jg  .LBB0_6
    leaq  24(%rsp), %rsi
    callq std::basic_string<char, std::char_traits<char>,
std::allocator<char> >::_Rep::_M_destroy(std::allocator<char> const&)
    jmp .LBB0_6

.L.str:
    .asciz  "hello"

Ralph Smith patched clang/libc++

main:                                   # @main
    movl $5, %eax
    retq

the results of gcc and VS2013 optimizations are far away
from what clang can do with these patches




More information about the Development mailing list