Why do gcc and icc generated assembly contain tons of "extra" pseudo ops?

0

When I look at the assembly generated by gcc or icc there's a ton of pseudo-ops. Do they all do something?

Here's the example that raised the question. I have two simple C++ files. One calls a function and the other does it.

call.cpp:

#include <iostream>

void vadd(float* __restrict__ A, float* __restrict__ B, float* __restrict__ C);

int main(int argc, char* argv[]) {

    constexpr const size_t size      = 16;
    constexpr const size_t alignment = 16;

    float* A;
    posix_memalign((void**)&A, alignment, sizeof(float)*size);

    A[ 0] =  0;
    A[ 1] =  1;
    A[ 2] =  2;
    A[ 3] =  3;
    A[ 4] =  4;
    A[ 5] =  5;
    A[ 6] =  6;
    A[ 7] =  7;
    A[ 8] =  8;
    A[ 9] =  9;
    A[10] = 10;
    A[11] = 11;
    A[12] = 12;
    A[13] = 13;
    A[14] = 14;
    A[15] = 15;

    float* B;
    posix_memalign((void**)&B, alignment, sizeof(float)*size);

    B[ 0] =   0;
    B[ 1] =  10;
    B[ 2] =  20;
    B[ 3] =  30;
    B[ 4] =  40;
    B[ 5] =  50;
    B[ 6] =  60;
    B[ 7] =  70;
    B[ 8] =  80;
    B[ 9] =  90;
    B[10] = 100;
    B[11] = 110;
    B[12] = 120;
    B[13] = 130;
    B[14] = 140;
    B[15] = 150;

    float* C;
    posix_memalign((void**)&C, alignment, sizeof(float)*size);

    vadd(A, B, C);
    for (int i=0; i<(size-1); i++) {std::cout << C[i] << " ";}
    std::cout << C[(size-1)] << std::endl;
}

do.cpp:

void vadd(float* __restrict__ A, float* __restrict__ B, float* __restrict__ C) {
    C[ 0] = A[ 0] + B[ 0];
    C[ 1] = A[ 1] + B[ 1];
    C[ 2] = A[ 2] + B[ 2];
    C[ 3] = A[ 3] + B[ 3];
    C[ 4] = A[ 4] + B[ 4];
    C[ 5] = A[ 5] + B[ 5];
    C[ 6] = A[ 6] + B[ 6];
    C[ 7] = A[ 7] + B[ 7];
    C[ 8] = A[ 8] + B[ 8];
    C[ 9] = A[ 9] + B[ 9];
    C[10] = A[10] + B[10];
    C[11] = A[11] + B[11];
    C[12] = A[12] + B[12];
    C[13] = A[13] + B[13];
    C[14] = A[14] + B[14];
    C[15] = A[15] + B[15];
}

When I compile with icc and inspect the output for do.cpp, I see tons of pseudo-ops accompanying the assembly. This example is very mild compared to other files I've looked at that contain far more pseudo-ops than opcodes, often hundreds of lines of .byte ops.

L_TXTST0:
# -- Begin  __Z4vaddPfS_S_
# mark_begin;
       .align    4
    .globl __Z4vaddPfS_S_
__Z4vaddPfS_S_:
# parameter 1: %rdi
# parameter 2: %rsi
# parameter 3: %rdx
L_B1.1:                         # Preds L_B1.0
L____tag_value___Z4vaddPfS_S_.1:                                #1.80
        movups    (%rdi), %xmm1                                 #2.10
        movups    16(%rdi), %xmm3                               #2.10
        movups    32(%rdi), %xmm5                               #2.10
        movups    48(%rdi), %xmm7                               #2.10
        movups    (%rsi), %xmm0                                 #2.18
        movups    16(%rsi), %xmm2                               #2.18
        movups    32(%rsi), %xmm4                               #2.18
        movups    48(%rsi), %xmm6                               #2.18
        addps     %xmm0, %xmm1                                  #2.18
        addps     %xmm2, %xmm3                                  #2.18
        addps     %xmm4, %xmm5                                  #2.18
        addps     %xmm6, %xmm7                                  #2.18
        movups    %xmm1, (%rdx)                                 #2.2
        movups    %xmm3, 16(%rdx)                               #2.2
        movups    %xmm5, 32(%rdx)                               #2.2
        movups    %xmm7, 48(%rdx)                               #2.2
        ret                                                     #18.1
        .align    4
L____tag_value___Z4vaddPfS_S_.3:                                #
                                # LOE
# mark_end;
    .section    __DATA, __data
# -- End  __Z4vaddPfS_S_
    .section    __DATA, __data
    .globl __Z4vaddPfS_S_.eh
// -- Begin SEGMENT __eh_frame
    .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
__eh_frame_seg:
L.__eh_frame_seg:
EH_frame0:
L_fde_cie_0:
    .long 0x0000001c
    .long 0x00000000
    .long 0x52507a01
    .long 0x10780100
    .short 0x9b06
    .long ___gxx_personality_v0@GOTPCREL+0x4
    .long 0x08070c10
    .long 0x01900190
    .short 0x0000
__Z4vaddPfS_S_.eh:
    .long 0x0000001c
    .long 0x00000024
    .quad L____tag_value___Z4vaddPfS_S_.1-__Z4vaddPfS_S_.eh-0x8
    .set L_Qlab1,L____tag_value___Z4vaddPfS_S_.3-L____tag_value___Z4vaddPfS_S_.1
    .quad L_Qlab1
    .long 0x00000000
    .long 0x00000000
# End
    .subsections_via_symbols

However, most of these pseudo-ops can be removed and the program seems to run just fine. Here's a stripped down version of the assembly from do.cpp that I can successfully link and run:

    .text
    .align    4
    .globl __Z4vaddPfS_S_
__Z4vaddPfS_S_:
    movups    (%rdi), %xmm1
    movups    16(%rdi), %xmm3
    movups    32(%rdi), %xmm5
    movups    48(%rdi), %xmm7
    movups    (%rsi), %xmm0
    movups    16(%rsi), %xmm2
    movups    32(%rsi), %xmm4
    movups    48(%rsi), %xmm6
    addps     %xmm0, %xmm1
    addps     %xmm2, %xmm3
    addps     %xmm4, %xmm5
    addps     %xmm6, %xmm7
    movups    %xmm1, (%rdx)
    movups    %xmm3, 16(%rdx)
    movups    %xmm5, 32(%rdx)
    movups    %xmm7, 48(%rdx)
    ret

gcc also generates tons of pseudo-ops but they seem to have a distinct flavor with a different preponderance of instructions. Here's a typical example:

LASFDE3:
    .long   LASFDE3-EH_frame1
    .quad   LFB1402-.
    .set L$set$8,LFE1402-LFB1402
    .quad L$set$8
    .byte   0
    .byte   0x4
    .set L$set$9,LCFI4-LFB1402
    .long L$set$9
    .byte   0xe
    .byte   0x10
    .byte   0x4
    .set L$set$10,LCFI5-LCFI4
    .long L$set$10
    .byte   0xe
    .byte   0x8
    .align 3

I realize this single question is actually hundreds of little questions, each with very specific answers, but what sorts of behind the scenes work are these "extra" instructions doing?

c++
gcc
assembly
icc
asked on Stack Overflow Mar 27, 2014 by Praxeolitic

0 Answers

Nobody has answered this question yet.


User contributions licensed under CC BY-SA 3.0