When I look at the assembly generated by gcc or icc there's a ton of pseudo-ops. Do they all do something?
Here's the example that raised the question. I have two simple C++ files. One calls a function and the other does it.
call.cpp:
#include <iostream>
void vadd(float* __restrict__ A, float* __restrict__ B, float* __restrict__ C);
int main(int argc, char* argv[]) {
constexpr const size_t size = 16;
constexpr const size_t alignment = 16;
float* A;
posix_memalign((void**)&A, alignment, sizeof(float)*size);
A[ 0] = 0;
A[ 1] = 1;
A[ 2] = 2;
A[ 3] = 3;
A[ 4] = 4;
A[ 5] = 5;
A[ 6] = 6;
A[ 7] = 7;
A[ 8] = 8;
A[ 9] = 9;
A[10] = 10;
A[11] = 11;
A[12] = 12;
A[13] = 13;
A[14] = 14;
A[15] = 15;
float* B;
posix_memalign((void**)&B, alignment, sizeof(float)*size);
B[ 0] = 0;
B[ 1] = 10;
B[ 2] = 20;
B[ 3] = 30;
B[ 4] = 40;
B[ 5] = 50;
B[ 6] = 60;
B[ 7] = 70;
B[ 8] = 80;
B[ 9] = 90;
B[10] = 100;
B[11] = 110;
B[12] = 120;
B[13] = 130;
B[14] = 140;
B[15] = 150;
float* C;
posix_memalign((void**)&C, alignment, sizeof(float)*size);
vadd(A, B, C);
for (int i=0; i<(size-1); i++) {std::cout << C[i] << " ";}
std::cout << C[(size-1)] << std::endl;
}
do.cpp:
void vadd(float* __restrict__ A, float* __restrict__ B, float* __restrict__ C) {
C[ 0] = A[ 0] + B[ 0];
C[ 1] = A[ 1] + B[ 1];
C[ 2] = A[ 2] + B[ 2];
C[ 3] = A[ 3] + B[ 3];
C[ 4] = A[ 4] + B[ 4];
C[ 5] = A[ 5] + B[ 5];
C[ 6] = A[ 6] + B[ 6];
C[ 7] = A[ 7] + B[ 7];
C[ 8] = A[ 8] + B[ 8];
C[ 9] = A[ 9] + B[ 9];
C[10] = A[10] + B[10];
C[11] = A[11] + B[11];
C[12] = A[12] + B[12];
C[13] = A[13] + B[13];
C[14] = A[14] + B[14];
C[15] = A[15] + B[15];
}
When I compile with icc and inspect the output for do.cpp, I see tons of pseudo-ops accompanying the assembly. This example is very mild compared to other files I've looked at that contain far more pseudo-ops than opcodes, often hundreds of lines of .byte ops.
L_TXTST0:
# -- Begin __Z4vaddPfS_S_
# mark_begin;
.align 4
.globl __Z4vaddPfS_S_
__Z4vaddPfS_S_:
# parameter 1: %rdi
# parameter 2: %rsi
# parameter 3: %rdx
L_B1.1: # Preds L_B1.0
L____tag_value___Z4vaddPfS_S_.1: #1.80
movups (%rdi), %xmm1 #2.10
movups 16(%rdi), %xmm3 #2.10
movups 32(%rdi), %xmm5 #2.10
movups 48(%rdi), %xmm7 #2.10
movups (%rsi), %xmm0 #2.18
movups 16(%rsi), %xmm2 #2.18
movups 32(%rsi), %xmm4 #2.18
movups 48(%rsi), %xmm6 #2.18
addps %xmm0, %xmm1 #2.18
addps %xmm2, %xmm3 #2.18
addps %xmm4, %xmm5 #2.18
addps %xmm6, %xmm7 #2.18
movups %xmm1, (%rdx) #2.2
movups %xmm3, 16(%rdx) #2.2
movups %xmm5, 32(%rdx) #2.2
movups %xmm7, 48(%rdx) #2.2
ret #18.1
.align 4
L____tag_value___Z4vaddPfS_S_.3: #
# LOE
# mark_end;
.section __DATA, __data
# -- End __Z4vaddPfS_S_
.section __DATA, __data
.globl __Z4vaddPfS_S_.eh
// -- Begin SEGMENT __eh_frame
.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
__eh_frame_seg:
L.__eh_frame_seg:
EH_frame0:
L_fde_cie_0:
.long 0x0000001c
.long 0x00000000
.long 0x52507a01
.long 0x10780100
.short 0x9b06
.long ___gxx_personality_v0@GOTPCREL+0x4
.long 0x08070c10
.long 0x01900190
.short 0x0000
__Z4vaddPfS_S_.eh:
.long 0x0000001c
.long 0x00000024
.quad L____tag_value___Z4vaddPfS_S_.1-__Z4vaddPfS_S_.eh-0x8
.set L_Qlab1,L____tag_value___Z4vaddPfS_S_.3-L____tag_value___Z4vaddPfS_S_.1
.quad L_Qlab1
.long 0x00000000
.long 0x00000000
# End
.subsections_via_symbols
However, most of these pseudo-ops can be removed and the program seems to run just fine. Here's a stripped down version of the assembly from do.cpp that I can successfully link and run:
.text
.align 4
.globl __Z4vaddPfS_S_
__Z4vaddPfS_S_:
movups (%rdi), %xmm1
movups 16(%rdi), %xmm3
movups 32(%rdi), %xmm5
movups 48(%rdi), %xmm7
movups (%rsi), %xmm0
movups 16(%rsi), %xmm2
movups 32(%rsi), %xmm4
movups 48(%rsi), %xmm6
addps %xmm0, %xmm1
addps %xmm2, %xmm3
addps %xmm4, %xmm5
addps %xmm6, %xmm7
movups %xmm1, (%rdx)
movups %xmm3, 16(%rdx)
movups %xmm5, 32(%rdx)
movups %xmm7, 48(%rdx)
ret
gcc also generates tons of pseudo-ops but they seem to have a distinct flavor with a different preponderance of instructions. Here's a typical example:
LASFDE3:
.long LASFDE3-EH_frame1
.quad LFB1402-.
.set L$set$8,LFE1402-LFB1402
.quad L$set$8
.byte 0
.byte 0x4
.set L$set$9,LCFI4-LFB1402
.long L$set$9
.byte 0xe
.byte 0x10
.byte 0x4
.set L$set$10,LCFI5-LCFI4
.long L$set$10
.byte 0xe
.byte 0x8
.align 3
I realize this single question is actually hundreds of little questions, each with very specific answers, but what sorts of behind the scenes work are these "extra" instructions doing?
User contributions licensed under CC BY-SA 3.0