Calling function in payload at ARMv7

1

I want to write a simple payload for my ARMv7 platform. First, I tried a simple loop sending a character via UART:

void payload()
{
  while(1)
  {
    USART3->DR = 0x68;
  }
}

08000358 <payload>:
 8000358:   b480        push    {r7}
 800035a:   af00        add r7, sp, #0
 800035c:   4b01        ldr r3, [pc, #4]    ; (8000364 <payload+0xc>)
 800035e:   2268        movs    r2, #104    ; 0x68
 8000360:   809a        strh    r2, [r3, #4]
 8000362:   e7fb        b.n 800035c <payload+0x4>
 8000364:   40004800    andmi   r4, r0, r0, lsl #16

this is my desired payload, once in C and once in assembly.

int main()
{
  uint32 buffer[4];
  buffer[0] = 0xaf00b480;
  buffer[1] = 0x22684b01;
  buffer[2] = 0xe7fb809a;
  buffer[3] = 0x40004800;
  memcpy(0x20004000,&buffer,4*sizeof(uint32));
  goto *(void *)((uint32_t) buffer | 1);

  return 0;
}

This works great, the character is send over UART. Now, I want to call a function in my payload:

void payload()
{
  while(1)
    {
      USART3->DR = 0x68;
    asm volatile(
      "bl 0x08000348\n"
    );
  }
}

08000358 <payload>:
 8000358:   b480        push    {r7}
 800035a:   af00        add r7, sp, #0
 800035c:   4b02        ldr r3, [pc, #8]    ; (8000368 <payload+0x10>)
 800035e:   2268        movs    r2, #104    ; 0x68
 8000360:   809a        strh    r2, [r3, #4]
 8000362:   f7ff fff1   bl  8000348 <function>
 8000366:   e7f9        b.n 800035c <payload+0x4>
 8000368:   40004800    andmi   r4, r0, r0, lsl #16

again, my desired payload, once in C and once in assembly.

void function()
{
    asm volatile(
      "nop\n"
      "nop\n"
    );
}

int main()
{
  uint32 buffer[5];
  buffer[0] = 0xaf00b480;
  buffer[1] = 0x22684b02;
  buffer[2] = 0xf7ff809a;
  buffer[3] = 0xe7f9fff1;
  buffer[4] = 0x40004800;

  memcpy(0x20004000,&buffer,5*sizeof(uint32));
  goto *(void *)((uint32_t) buffer | 1);

  return 0;
}

The character is now send only 3 times, then I get a crash (fault handler of the processor). I checked the memory region of the function and my buffer, both look the same:

0x8000358 <payload>:    0xaf00b480  0x22684b02  0xf7ff809a  0xe7f9fff1
0x8000368 <payload+16>: 0x40004800  0xb082b580  0xf001af00  0x2102f863

0x20004000 <_heap+3144>:    0xaf00b480  0x22684b02  0xf7ff809a  0xe7f9fff1
0x20004010 <_heap+3160>:    0x40004800  0x00000000  0x00000000  0x00000000
assembly
arm
shellcode
cortex-m3
thumb
asked on Stack Overflow Sep 5, 2018 by float93 • edited Sep 6, 2018 by Peter Cordes

1 Answer

1

goto *(void *)((uint32_t) buffer | 1);

strange for someone using unnecessary inline assembly could have used some here.

When I compile this I get:

  38:   2201        movs    r2, #1
  3a:   4313        orrs    r3, r2
  3c:   469f        mov pc, r3
  3e:   46c0        nop 

and that should fail right away,

From the arm documentation:

ADD (register) and MOV (register) branch without interworking.

What you can do is use some real asm

.thumb
.thumb_func
.globl HOP
HOP:
   orr r0,#1
   bx r0


void HOP ( uint32_t *);
void function()
{
    asm volatile(
      "nop\n"
      "nop\n"
    );
}

int main()
{
  uint32_t buffer[5];
  buffer[0] = 0xaf00b480;
  buffer[1] = 0x22684b02;
  buffer[2] = 0xf7ff809a;
  buffer[3] = 0xe7f9fff1;
  buffer[4] = 0x40004800;

//  memcpy(0x20004000,&buffer,5*sizeof(uint32));
//  goto *(void *)((uint32_t) buffer | 1);
  HOP(buffer);
  return 0;
}

  38:   0018        movs    r0, r3
  3a:   f7ff fffe   bl  0 <HOP>

Now if you didnt want the branch link but instead a branch and since you are being free and loose with the inline assembly this is more like your style and should not have the mov problem that at least my compiler generated. BTW we need to see what your compiler generated to see why you didnt crash right away.

void function()
{
    asm volatile(
      "nop\n"
      "nop\n"
    );
}
int main()
{
  uint32_t buffer[5];
  buffer[0] = 0xaf00b480;
  buffer[1] = 0x22684b02;
  buffer[2] = 0xf7ff809a;
  buffer[3] = 0xe7f9fff1;
  buffer[4] = 0x40004800;
  asm (
      "add r3,r7,#4\n"
      "mov pc,r3\n"
  );
  return 0;
}

  36:   1d3b        adds    r3, r7, #4
  38:   469f        mov pc, r3

bx can be used to which makes it feel better

int main()
{
  uint32_t buffer[5];
  buffer[0] = 0xaf00b480;
  buffer[1] = 0x22684b02;
  buffer[2] = 0xf7ff809a;
  buffer[3] = 0xe7f9fff1;
  buffer[4] = 0x40004800;
  asm (
      "add r3,r7,#5\n"
      "bx r3\n"
  );
  return 0;
}

  36:   1d7b        adds    r3, r7, #5
  38:   4718        bx  r3

You cant treat a uart data register that way, once the fifo fills no more characters.

you cant copy this code and run it that way either.

 8000362:   f7ff fff1   bl  8000348 <function>

is fine for as built in flash so long as 8000348 remains where that function is, but bl is pc relative so if you were to want to copy this to sram (I knew the answer to my own question, will let you figure out how I know, this is a microcontroller so you meant to ask about armv7-m not armv7).

 20004000:   b480        push    {r7}
 20004002:   af00        add r7, sp, #0
 20004004:   4b02        ldr r3, [pc, #8]    ; 20004010 
 20004006:   2268        movs    r2, #104    ; 0x68
 20004008:   809a        strh    r2, [r3, #4]
 2000400A:   f7ff fff1   bl  20003FF0
 2000400E:   e7f9        b.n 20004004
 20004010:   40004800 

And I suppose you didnt copy function() to 0x20003FF0? What data do you have at that address when running this program? What does that look like disassembled?

So you are "getting it" for the most part with assembly language but missed a few things.

Now what you could do is just write the code you wanted:

.thumb
top:
ldr r3,=0x40004800
mov r2,#68
str r2,[r3,#4]
ldr r0,=function
blx r0
b top

no need to link

00000000 <top>:
   0:   4b02        ldr r3, [pc, #8]    ; (c <top+0xc>)
   2:   2244        movs    r2, #68 ; 0x44
   4:   605a        str r2, [r3, #4]
   6:   4802        ldr r0, [pc, #8]    ; (10 <top+0x10>)
   8:   4780        blx r0
   a:   e7f9        b.n 0 <top>
   c:   40004800    andmi   r4, r0, r0, lsl #16
  10:   00000000    andeq   r0, r0, r0

and using your style

void function()
{
    asm volatile(
      "nop\n"
      "nop\n"
    );
}
int main()
{
uint32_t buffer[10];
buffer[0]=0x22444b02;
buffer[1]=0x4802605a;
buffer[2]=0xe7f94780;
buffer[3]=0x40004800;
buffer[4]=((uint32_t)function)|1;
  asm (
      "add r3,r7,#5\n"
      "bx r3\n"
  );
  return 0;
}

gives, with my compiler:

08000000 <function>:
 8000000:   b580        push    {r7, lr}
 8000002:   af00        add r7, sp, #0
...
 8000016:   af00        add r7, sp, #0
 8000018:   003b        movs    r3, r7
 800001a:   4a0c        ldr r2, [pc, #48]   ; (800004c <main+0x3a>)
 800001c:   601a        str r2, [r3, #0]
...
 800003a:   1d7b        adds    r3, r7, #5
 800003c:   4718        bx  r3
...
 800004c:   22444b02
 8000050:   4802605a
 8000054:   e7f94780
 8000058:   40004800
 800005c:   08000001

still a bit hacky like your code was, but now you can relocate this code starting on a 4 byte aligned address and it wont have the problem of not being able to call the function function.

Yes I cheated on the link for this example I have no bootstrap. And man that really bothers me they use a stack frame by default, what a waste. I wonder if I can build my toolchain without that. It does make this hacky solution work better otherwise:

08000000 <function>:
 8000000:   46c0        nop         ; (mov r8, r8)
 8000002:   46c0        nop         ; (mov r8, r8)
 8000004:   46c0        nop         ; (mov r8, r8)
 8000006:   4770        bx  lr

08000008 <main>:
 8000008:   b08a        sub sp, #40 ; 0x28
 800000a:   466b        mov r3, sp
 800000c:   4a0a        ldr r2, [pc, #40]   ; (8000038 <main+0x30>)
 800000e:   601a        str r2, [r3, #0]
 8000010:   466b        mov r3, sp
 8000012:   4a0a        ldr r2, [pc, #40]   ; (800003c <main+0x34>)
 8000014:   605a        str r2, [r3, #4]

then can use

void function()
{
    asm volatile(
      "nop\n"
      "nop\n"
    );
}
int main()
{
uint32_t buffer[10];
buffer[0]=0x22444b02;
buffer[1]=0x4802605a;
buffer[2]=0xe7f94780;
buffer[3]=0x40004800;
buffer[4]=((uint32_t)function);
  asm (
      "mov r3,sp\n"
      "orr r3,r3,#1\n"
/*    "add r3,#1\n" */
      "bx r3\n"
  );
  return 0;
}

yeah, duh, you dont need to orr a 1 to function the toolchain should have already taken care of that.

08000008 <main>:
 8000008:   b08a        sub sp, #40 ; 0x28
 800000a:   4b09        ldr r3, [pc, #36]   ; (8000030 <main+0x28>)
 800000c:   9300        str r3, [sp, #0]
 800000e:   4b09        ldr r3, [pc, #36]   ; (8000034 <main+0x2c>)
 8000010:   9301        str r3, [sp, #4]
 8000012:   4b09        ldr r3, [pc, #36]   ; (8000038 <main+0x30>)
 8000014:   9302        str r3, [sp, #8]
 8000016:   4b09        ldr r3, [pc, #36]   ; (800003c <main+0x34>)
 8000018:   9303        str r3, [sp, #12]
 800001a:   4b09        ldr r3, [pc, #36]   ; (8000040 <main+0x38>)
 800001c:   9304        str r3, [sp, #16]
 800001e:   466b        mov r3, sp
 8000020:   f043 0301   orr.w   r3, r3, #1
 8000024:   4718        bx  r3
 8000026:   2300        movs    r3, #0
 8000028:   4618        mov r0, r3
 800002a:   b00a        add sp, #40 ; 0x28
 800002c:   4770        bx  lr
 800002e:   bf00        nop
 8000030:   22444b02    subcs   r4, r4, #2048   ; 0x800
 8000034:   4802605a    stmdami r2, {r1, r3, r4, r6, sp, lr}
 8000038:   e7f94780    ldrb    r4, [r9, r0, lsl #15]!
 800003c:   40004800    andmi   r4, r0, r0, lsl #16
 8000040:   08000001    stmdaeq r0, {r0}

Now there is inline asm magic you can do to load the address of buffer into r3 and not have to rely on a disassembly of the code to figure this out. Note because you appear to be using an armv7-m despite calling it an armv7. You can use the thumb2 instruction orr r3,r3,#1 which mentally is correct you want to orr the bit on there not add it. But if this is an armv6-m like a cortex-m0 or you want portability then just do the add as ugly as it is add r3,#1 because that is not a thumb2 instruction and is portable.

So if you got a few characters out I suspect your goto worked somehow, again sounds like you are using qemu perhaps? And maybe thats how? But not trying to run the copy of the code in sram, you branched backward to find function() but found data there, and either that crashed you or since that was just before the copied code it ran through the copied code again a few times and whatever was back there eventually went off the rails. Or maybe because you werent using the uart right you overflowed the buffer, which on at least one of these brands may cause the uart to stop working until you clear the buffer overflow flag. Assuming you are relying on the uart output to see success or failure. (you would have seen that failure when running this program without the call to function which leads me to believe this is a simulation and not real hardware, or real hardware you are just seeing a fraction of the values you are shoving into the uart).

Try this instead

void payload()
{
  uint32_t ra; 
  for(ra=0x30;;ra++)
  {
    ra&=0x37;
    USART3->DR = ra;
  }
}

as your starting point to see what I am talking about if you see 0123456701234567 forever then I suspect you are using a simulator not hardware....

EDIT:

The orr 1 was the problem with the goto and you can just do that:

void function()
{
    asm volatile(
      "nop\n"
      "nop\n"
    );
}
int main()
{
    uint32_t buffer[10];
    buffer[0]=0x22444b02;
    buffer[1]=0x4802605a;
    buffer[2]=0xe7f94780;
    buffer[3]=0x40004800;
    buffer[4]=((uint32_t)function);

    goto *(void *)((uint32_t) buffer);
    return 0;
}

EDIT2

The program is already in SRAM, you are simply copying it from SRAM to SRAM...If the copy is the "exploit" then okay, but on a microcontroller you arent copying this over top of some other code, most of the time all the code runs out of flash, so one sram location is as good as another. Anyway, that term is not the problem here.

In your comment below

In main, I branch to address 0x20004001.

Nope that is the same bug, if you want to orr the address with one you need to use the right instruction.

void function()
{
    asm volatile(
      "nop\n"
      "nop\n"
    );
}
int main()
{
    uint32_t buffer[10];
    buffer[0]=0x22444b02;
    buffer[1]=0x4802605a;
    buffer[2]=0xe7f94780;
    buffer[3]=0x40004800;
    buffer[4]=((uint32_t)function);

    //memcpy...
    //goto *(void *)((uint32_t) buffer);
    goto *(void *)(0x20004000);
    return 0;
}

with the compiler on my computer gives

  36:   4b06        ldr r3, [pc, #24]   ; (50 <main+0x3e>)
  38:   469f        mov pc, r3
  3a:   46c0        nop         ; (mov r8, r8)
  3c:   22444b02    subcs   r4, r4, #2048   ; 0x800
  40:   4802605a    stmdami r2, {r1, r3, r4, r6, sp, lr}
  44:   e7f94780    ldrb    r4, [r9, r0, lsl #15]!
  48:   40004800    andmi   r4, r0, r0, lsl #16
  4c:   00000000    andeq   r0, r0, r0
  50:   20004000    andcs   r4, r0, r0

When linked it will still work.

But if you do this

goto *(void *)(0x20004001);

  36:   4b06        ldr r3, [pc, #24]   ; (50 <main+0x3e>)
  38:   469f        mov pc, r3
  3a:   46c0        nop         ; (mov r8, r8)
  3c:   22444b02    subcs   r4, r4, #2048   ; 0x800
  40:   4802605a    stmdami r2, {r1, r3, r4, r6, sp, lr}
  44:   e7f94780    ldrb    r4, [r9, r0, lsl #15]!
  48:   40004800    andmi   r4, r0, r0, lsl #16
  4c:   00000000    andeq   r0, r0, r0
  50:   20004001    andcs   r4, r0, r1

Same bug as the very top of this answer:

ADD (register) and MOV (register) branch without interworking.

Search for that line and/or that term in the arm documentation.

answered on Stack Overflow Sep 6, 2018 by old_timer • edited Sep 6, 2018 by old_timer

User contributions licensed under CC BY-SA 3.0