[Development] are clangs 3.5 new builtin_operator_(new/delete) also useable for Qt Container optimization?
Dennis Luehring
dl.soluz at gmx.net
Thu Jul 17 10:27:39 CEST 2014
these new builtins allows clang to optimize operator new/delete
operations far better then before
__builtin_operator_new
__builtin_operator_delete
patches in review for clang 3.5
clang: http://reviews.llvm.org/rL210137
libc++: http://reviews.llvm.org/rL210211
the libc++ patch is very small and maybe the Qt container library
can also benefit from an clang specific specialization
#example 1
#include <vector>
#include <numeric>
int main()
{
const std::vector<int> a{1,2};
const std::vector<int> b{4,5};
const std::vector<int> ints
{
std::accumulate(a.begin(),a.end(),1),
std::accumulate(b.begin(),b.end(),2),
};
return std::accumulate(ints.begin(),ints.end(),100);
}
clang 3.4.1
main: # @main
pushq %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
movl $8, %edi
callq operator new(unsigned long)
movq %rax, %r14
movabsq $8589934593, %rax # imm = 0x200000001
movq %rax, (%r14)
movl $8, %edi
callq operator new(unsigned long)
movq %rax, %rbx
movabsq $21474836484, %rax # imm = 0x500000004
movq %rax, (%rbx)
movl (%r14), %r15d
movl 4(%r14), %ebp
movl $8, %edi
callq operator new(unsigned long)
leal 1(%r15,%rbp), %ebp
testq %rax, %rax
movl %ebp, (%rax)
movl $11, 4(%rax)
je .LBB0_5
movq %rax, %rdi
callq operator delete(void*)
.LBB0_5: # %_ZNSt6vectorIiSaIiEED2Ev.exit25
testq %rbx, %rbx
je .LBB0_7
movq %rbx, %rdi
callq operator delete(void*)
.LBB0_7: # %_ZNSt6vectorIiSaIiEED2Ev.exit23
addl $111, %ebp
testq %r14, %r14
je .LBB0_9
movq %r14, %rdi
callq operator delete(void*)
.LBB0_9: # %_ZNSt6vectorIiSaIiEED2Ev.exit21
movl %ebp, %eax
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
ret
movq %rax, %rbp
movq %rbp, %rdi
callq _Unwind_Resume
movq %rax, %rbp
jmp .LBB0_14
movq %rax, %rbp
testq %rbx, %rbx
je .LBB0_14
movq %rbx, %rdi
callq operator delete(void*)
.LBB0_14: # %_ZNSt6vectorIiSaIiEED2Ev.exit15
testq %r14, %r14
je .LBB0_16
movq %r14, %rdi
callq operator delete(void*)
.LBB0_16: # %_ZNSt6vectorIiSaIiEED2Ev.exit
movq %rbp, %rdi
callq _Unwind_Resume
GCC_except_table0:
.byte 255 # @LPStart Encoding = omit
.byte 3 # @TType Encoding = udata4
.asciz "\266\200\200" # @TType base offset
.byte 3 # Call site Encoding = udata4
.byte 52 # Call site table length
.long .Lset0
.long .Lset1
.long .Lset2
.byte 0 # On action: cleanup
.long .Lset3
.long .Lset4
.long .Lset5
.byte 0 # On action: cleanup
.long .Lset6
.long .Lset7
.long .Lset8
.byte 0 # On action: cleanup
.long .Lset9
.long .Lset10
.long 0 # has no landing pad
.byte 0 # On action: cleanup
Ralph Smith patched clang/libc++
main: # @main
movl $115, %eax
retq
#example 2
#include <string>
int main()
{
return std::string("hello").size();
}
clang 3.4.1
main: # @main
pushq %rbx
subq $32, %rsp
leaq 16(%rsp), %rdi
leaq 8(%rsp), %rdx
movl $.L.str, %esi
callq std::basic_string<char, std::char_traits<char>,
std::allocator<char> >::basic_string(char const*, std::allocator<char>
const&)
movq 16(%rsp), %rax
leaq -24(%rax), %rdi
movl std::basic_string<char, std::char_traits<char>,
std::allocator<char> >::_Rep::_S_empty_rep_storage, %ecx
cmpq %rcx, %rdi
movl -24(%rax), %ebx
jne .LBB0_1
.LBB0_6: # %_ZNSsD1Ev.exit
movl %ebx, %eax
addq $32, %rsp
popq %rbx
ret
.LBB0_1:
addq $-8, %rax
movl $__pthread_key_create, %ecx
testq %rcx, %rcx
je .LBB0_3
movl $-1, %ecx
lock
xaddl %ecx, (%rax)
movl %ecx, 28(%rsp)
movl 28(%rsp), %ecx
jmp .LBB0_4
.LBB0_3:
movl (%rax), %ecx
leal -1(%rcx), %edx
movl %edx, (%rax)
.LBB0_4: #
%_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i.i
testl %ecx, %ecx
jg .LBB0_6
leaq 24(%rsp), %rsi
callq std::basic_string<char, std::char_traits<char>,
std::allocator<char> >::_Rep::_M_destroy(std::allocator<char> const&)
jmp .LBB0_6
.L.str:
.asciz "hello"
Ralph Smith patched clang/libc++
main: # @main
movl $5, %eax
retq
the results of gcc and VS2013 optimizations are far away
from what clang can do with these patches
More information about the Development
mailing list