I just wrote a C program that prints its command line argument without using the standard library or a main()
function. My motivation is simply curiosity and to understand how to play with inline assembly. I am using Ubuntu 17.10 x86_64 with the 4.13.0-39-generic kernel and GCC 7.2.0.
Below is my code which I have tried to comment as much as I understood. The functions print
, print_1
, my_exit
, and _start()
are required by the system to run the executable. Actually, without _start()
the linker will emit a warning and the program will segfault.
Functions print
and print_1
are different. The first one prints out a string to the console, measuring the length of the string internally. The second function needs the string length passed as an argument. The my_exit()
function just exits the program, returning the required value, which in my case is the string length or the number of command line arguments.
print_1
requires the string length as an argument so the characters are counted with a while()
loop and the length is stored in strLength
. In this case everything works pretty well.
Strange things happen when I use the print
function, which measures the string length internally. Simply speaking, it looks like this function somehow changes the string pointer to point to environment variables which should be the next pointer and instead of the first argument the function prints "CLUTTER_IM_MODULE=xim"
, which is my first environment variable. My workaround is to assign *a
to *b
in the next line.
I couldn't find any explanation inside the counting procedure, but it looks like it's changing my string pointer.
unsigned long long print(char * str){
unsigned long long ret;
__asm__(
"pushq %%rbx \n\t"
"pushq %%rcx \n\t" //RBX and RCX to the stack for further restoration
"movq %1, %%rdi \n\t" //pointer to string (char * str) into RDI for SCASB instruction
"movq %%rdi, %%rbx \n\t" //saving RDI in RBX for final substraction
"xor %%al, %%al \n\t" //zeroing AL for SCASB comparing
"movq $0xffffffff, %%rcx \n\t" //max string length for REPNE instruction
"repne scasb \n\t" //counting "loop" see details: https://www.felixcloutier.com/x86/index.html for REPNE and SCASB instructions
"sub %%rbx, %%rdi \n\t" //final substraction
"movq %%rdi, %%rdx \n\t" //string length for write syscall
"movq %%rdi, %0 \n\t" //string length into ret to return from print
"popq %%rcx \n\t"
"popq %%rbx \n\t" //RBX and RCX restoration
"movq $1, %%rax \n\t" //write - 1 for syscall
"movq $1, %%rdi \n\t" //destination pointer for string operations $1 - stdout
"movq %1, %%rsi \n\t" //source string pointer
"syscall \n\t"
: "=g"(ret)
: "g"(str)
);
return ret; }
void print_1(char * str, int l){
int ret = 0;
__asm__("movq $1, %%rax \n\t" //write - 1 for syscall
"movq $1, %%rdi \n\t" //destination pointer for string operations
"movq %1, %%rsi \n\t" //source pointer for string operations
"movl %2, %%edx \n\t" //string length
"syscall"
: "=g"(ret)
: "g"(str), "g" (l));}
void my_exit(unsigned long long ex){
int ret = 0;
__asm__("movq $60, %%rax\n\t" //syscall 60 - exit
"movq %1, %%rdi\n\t" //return value
"syscall\n\t"
"ret"
: "=g"(ret)
: "g"(ex)
);}
void _start(){
register int ac __asm__("%rsi"); // in absence of main() argc seems to be placed in rsi register
//int acp = ac;
unsigned long long strLength;
if(ac > 1){
register unsigned long long * arg __asm__("%rsp"); //argv array
char * a = (void*)*(arg + 7); //pointer to argv[1]
char * b = a; //work around for print function
/*version with print_1 and while() loop for counting
unsigned long long strLength = 0;
while(*(a + strLength)) strLength++;
print_1(a, strLength);
print_1("\n", 1);
*/
strLength = print(b);
print("\n");
}
//my_exit(acp); //echo $? prints argc
my_exit(strLength); //echo $? prints string length}
char * a = (void*)*(arg + 7);
is completely a "happens to work" thing, if it works at all. Unless you're writing __attribute__((naked))
functions that only use inline asm, it's completely up to the compiler how it lays out stack memory. It appears that you are getting rsp
, although that's not guaranteed for this unsupported use of a register-asm local. (Using the requested register is only guaranteed when used as an operand to an inline asm statement.)
If you compile with optimization disabled, gcc will reserve stack slots for locals so char * b = a;
makes gcc adjust RSP by more on function entry, so that's why your hack happens to change gcc's code-gen to match the hard-coded +7
(times 8 bytes) offset you put in the source.
On entry to _start
, the stack contents are: argc
at (%rsp)
, argv[]
starting at 8(%rsp)
. Above the terminating NULL pointer for argv[], the envp[]
array is also in stack memory. So that's why you get CLUTTER_IM_MODULE=xim
when your hard-coded offset gets the wrong stack slot.
// in absence of main() argc seems to be placed in rsi register
That's probably left over from the dynamic linker (which runs in your process before _start
). If you compiled with gcc -static -nostdlib -fno-pie
, your _start
would be the real process entry point reached directly from the kernel, with all registers = 0 (except RSP). Note that the ABI says undefined; Linux chooses to zero them to avoid information leaks.
You can write a void _start(){}
in GNU C that works reliably with and without optimization enabled, and works for the right reasons, with no inline asm (but still dependent on the x86-64 SysV ABI's calling convention and process-entry stack layout). No hard-coding of offsets that happen to occur in gcc's code-gen is needed. How Get arguments value using inline assembly in C without Glibc?. It uses stuff like int argc = (int)__builtin_return_address(0);
because _start
isn't a function: the first thing on the stack is argc rather than a return address. It's not pretty and not recommended, but given the calling convention that's how you can get gcc to generate code that knows where things are.
Your code clobbers registers without telling the compiler about it. Everything about this code is nasty, and there's no reason to expect any of it to work consistently. And if it does, it's by chance and could will break with different surrounding code or compiler options. If you want to write whole functions, do it in stand-alone asm (or in inline asm at global scope) and declare a C prototype so the compiler can call it.
Look at gcc's asm output to see what it generated around your code. (e.g. put your code on http://godbolt.org/). You'll probably see it using registers that you clobbered in your asm. (Unless you compiled with optimization disabled, in which case it doesn't keep anything in registers between C statements to support consistent debugging. Only clobbering RSP or RBP would cause problems; other inline asm clobber bugs would go undetected.) But clobbering the red zone would still be a problem.
See also https://stackoverflow.com/tags/inline-assembly/info for links to guides and tutorials.
The right way to use inline asm (if there is a right way) is normally to let the compiler do as much as possible. So to make a write system call, you'd do everything with input / output constraints, and the only instruction inside the asm template would be "syscall"
, like this nice example my_write
function: How to invoke a system call via sysenter in inline assembly? (The actual answer has 32-bit int $0x80
and x86-64 syscall
, but not an inline asm version using 32-bit sysenter
because that's not a guaranteed-stable ABI).
See also What is the difference between 'asm', '__asm' and '__asm__'? for another example.
https://gcc.gnu.org/wiki/DontUseInlineAsm for lots of reasons why you shouldn't use it (like defeating constant-propagation and other optimizations).
Beware that a pointer input constraint for an inline asm statement does not imply that the pointed-to memory is also an input or output. Use a "memory"
clobber, or see at&t asm inline c++ problem for a dummy operand workaround.
Many thanks for every suggestion in your answer and comments, it was really helpful. Peter Cordes , thanks for this link https://stackoverflow.com/a/50261819. I use this code as a base and following your advice to write inline asm at global scope. After couple days looking around and reading some documentation, finally here is the code doing what I have been looking for (checking command line arguments and environment variables with no stdlib).
Any improvements and advice are most welcome.
It is compiled with : gcc -Wall -o getArgs getArgs.c -nostdlib -nostartfiles -fno-ident -static -s
Run : ./getArgs -args Hello everybody -envs
*** Environment variable ***
/bin/bash
*** Command line arguments ***
-args
Hello
everybody
-envs
Calling conventions: User-level applications use as integer registers for passing the sequence * %rdi, %rsi, %rdx, %rcx, %r8 and %r9. So in function call we should have ex. print( %rdi, %rsi, %rdx); * The kernel interface uses %rdi, %rsi, %rdx, %r10, %r8 and %r9.
asm(
".global _start\n\t"
"_start:\n\t"
" xorl %ebp,%ebp\n\t" // Clear the frame pointer. As ABI suggests
" movq 0(%rsp),%rdi\n\t" // argc
" lea 8(%rsp),%rsi\n\t" // argv = %rsp + 8
" lea 8(%rsp,%rdi,8), %rdx\n\t" // pointer to environment variables (8*(argc+1))(%rsp) envp[0]
" call __main\n\t" // call main function
" movq %rax,%rdi\n\t" // main return code as an argument for exit syscall
" movl $60,%eax\n\t" // 60 = exit
" syscall\n\t");
asm(
"print:\n\t" // thanks to the calling convention when we call our print we get: int fd (%rdi), const void *buf (%rsi), unsigned count (%rdx)
" movq $1,%rax\n\t" // 1 = write syscall on x86_64
" syscall\n\t"
" ret\n\t"
);
int print(int fd, const void *buf, unsigned count); //do not forget to declare function from inline assembly
unsigned strLen(const char *ch) {
const char *ptr;
for(ptr = ch; *ptr; ++ptr); //ptr points to same place as ch, then looping until *ptr is not 0. If so, after substraction we get string length.
return ptr-ch; } //"When you substract two pointers, as long as they point into the same array, the result is the number of elements separating them"
char strCmp(const char * a, const char * b){
char t = 0;
int aLength = strLen(a);
int bLength = strLen(b);
if(aLength == bLength){
for(int j = 0; j < aLength; j++){
if(a[j] == b[j])
t++;
}
if(t == aLength)
return 1;
else
return 0;
}else{
return 0;
}} //strCmp - comparing 2 strings up to the length of first string, returns 1 if equal and 0 if not
char * getEnv(char * env, char **envp){
char * val;
int valL = strLen(env);
int k = 1;
//environment variables is null terminated array of strings, last array element is 0
while(*(envp + k)){
char t = 0;
for(val = *(envp + k); *val != 0x3d; ++val); //counting up to 3d (=) //ascii hex of "=" is 0x3d
int envpL = val - *(envp + k); //counting length of envp
if(valL == envpL){
for(int j = 0; j < valL; j++){
if(*(*(envp + k) + j) == *(env + j)){
t++;
}
}
if(t == valL){
return ++val;
}
}
k++;
}
return "";} //getEnv - looping through environment variables "envp" looking for "env", using strLen()
int __main(int argc, char **argv, char **envp) {
char arg1 = 0, arg2 = 0; int length; //arg1, arg2 - flags for argv checking
//arrays to compare with command line arguments
char envs[6] = {0x2d, 0x65, 0x6e, 0x76, 0x73, 0x00}; //ascii hex of "-envs"
char args[6] = {0x2d, 0x61, 0x72, 0x67, 0x73, 0x00}; //ascii hex of "-args"
//first of all we check for control arguments
for(int i = 1; i < argc; i++) {
if(strCmp(*(argv + i), envs))
arg1 = 1;
if(strCmp(*(argv + i), args))
arg2 = 1;
}
if(arg1){
char * b = getEnv("SHELL", envp); //we are looking for "SHELL"
print(1, "*** Environment variable ***\n", 30);
print(1, b, strLen(b));
print(1, "\n", 1);
}
if(arg2){
print(1, "\n", 1);
print(1, "*** Command line arguments ***\n", 31);
for(int i = 1; i < argc; i++) {
length = strLen(*(argv + i));
print(1, *(argv + i), length);
print(1, "\n", 1);
}
}
return argc; }//number of arguments
User contributions licensed under CC BY-SA 3.0