I'm looking at the assembly generated by GCC 9.1 for the new (C++17) function std::uninitialized_default_construct. In most cases (MSVC, Clang, even older GCC) it's what you would expect: a loop with calling (or inlining) the default constructor of the type T.
For some reason, the GCC from 7 to current run with -03 generates assembly that I do not fully understand. Especially the part that looks like hashing at the beginning.
I'm compiling with "-O3 -std=c++17 -fno-stack-protector -fno-exceptions -fomit-frame-pointer -fno-unroll-loops". Am I missing something here?
Tested the same code on various compilers. None behaves this strangely. There's no explanation I could find for the hashing thing.
Test code (run via godbolt.org):
#include <iostream>
#include <algorithm>
#include <memory>
#include <string>
#include <stdio.h>
struct Vector3 {
float x=0,y=0,z=0;
};
template< typename T >
static int init(T* mem, size_t len)
{
std::uninitialized_default_construct(mem, mem+len);
return 0;
}
template< typename T >
static int init2(T* mem, size_t len)
{
auto end = mem + len;
while (mem < end)
new (mem++) T();
return 0;
}
int main(int argc, char** argv)
{
Vector3 mem[100];
auto funcPtr = (argc > 2) ? &init<Vector3> : &init2<Vector3>;
if (0 != funcPtr(mem, (int)atoi(argv[1])))
return -1;
printf("%f\n", mem[0].x);
return 0;
}
Generated disassembly for function "init" (brace yourself):
int init<Vector3>(Vector3*, unsigned long):
lea rax, [rsi+rsi*2]
lea rcx, [rdi+rax*4]
cmp rcx, rdi
je .L2
movabs rsi, 3074457345618258603
mov rdx, rcx
mov rax, rdi
sub rdx, rdi
sub rdx, 12
shr rdx, 2
imul rdx, rsi
movabs rsi, 4611686018427387903
and rdx, rsi
lea rsi, [rdx+1]
cmp rdx, 2
jbe .L3
mov rdx, rsi
pxor xmm0, xmm0
shr rdx, 2
lea rdx, [rdx+rdx*2]
sal rdx, 4
add rdx, rdi
.L4:
movups XMMWORD PTR [rax], xmm0
add rax, 48
movups XMMWORD PTR [rax-32], xmm0
movups XMMWORD PTR [rax-16], xmm0
cmp rax, rdx
jne .L4
mov rax, rsi
and rax, -4
lea rdx, [rax+rax*2]
lea rdi, [rdi+rdx*4]
cmp rsi, rax
je .L2
.L3:
lea rax, [rdi+12]
mov QWORD PTR [rdi], 0
mov DWORD PTR [rdi+8], 0x00000000
cmp rcx, rax
je .L2
lea rax, [rdi+24]
mov QWORD PTR [rdi+12], 0
mov DWORD PTR [rdi+20], 0x00000000
cmp rcx, rax
je .L2
mov QWORD PTR [rdi+24], 0
mov DWORD PTR [rdi+32], 0x00000000
.L2:
xor eax, eax
ret
For comparison, this is what MSVC generates for the same code:
int init<Vector3>(Vector3 *,unsigned __int64) PROC ; init<Vector3>, COMDAT
lea rax, QWORD PTR [rdx+rdx*2]
lea rdx, QWORD PTR [rcx+rax*4]
xor eax, eax
cmp rcx, rdx
je SHORT $LN5@init
npad 1
$LL6@init:
mov QWORD PTR [rcx], rax
mov DWORD PTR [rcx+8], eax
add rcx, 12
cmp rcx, rdx
jne SHORT $LL6@init
$LN5@init:
ret 0
Is there a way to make GCC behave properly?
User contributions licensed under CC BY-SA 3.0