How to simplify a function referencing itself as argument ? (and what does this mean)

3

I encounter often a function calling itself (in the pseudo code, produced by IDA) such as:

result = (**(__int64 (__fastcall ***)(volatile signed __int32 *))lambda)(lambda);
  1. Since the disassembly is call qword ptr [rax], can't this be simplified into result = lambda(); if I translate the pseudo code in c ?
  2. Why in the pseudo code the function is having itself as argument ?
  3. What is happening when there is a call such as lambda + 8i64 (ie., call qword ptr [rax+8]) ?

Here a more complete context:

__int64 __fastcall CR_maybeParseWithLambda(_QWORD *a1, __int64 newPtr, __int64 positionOrCounter)
{
  volatile signed __int32 *lambda; // rdi
  __int64 result; // rax

  lambda = (volatile signed __int32 *)a1[1];
  if ( lambda )
  {
    result = (unsigned int)_InterlockedExchangeAdd(lambda + 2, 0xFFFFFFFF);
    if ( (_DWORD)result == 1 )
    {
      result = (**(__int64 (__fastcall ***)(volatile signed __int32 *))lambda)(lambda);
      if ( _InterlockedExchangeAdd(lambda + 3, 0xFFFFFFFF) == 1 )
        result = (*(__int64 (__fastcall **)(volatile signed __int32 *))(*(_QWORD *)lambda + 8i64))(lambda);
      a1[1] = positionOrCounter;
      *a1 = newPtr;
    }
    else
    {
      a1[1] = positionOrCounter;
      *a1 = newPtr;
    }
  }
  else
  {
    a1[1] = positionOrCounter;
    *a1 = newPtr;
  }
  return result;
}

The disassembly, also from IDA:

.text:0000000180005F70 ; __int64 __fastcall CR_maybeParseWithLambda(_QWORD *a1, __int64 newPtr, __int64 positionOrCounter)
.text:0000000180005F70 CR_maybeParseWithLambda proc near       ; CODE XREF: sub_180005B10+10F↑p
.text:0000000180005F70                                         ; sub_180005B10+14A↑p ...
.text:0000000180005F70
.text:0000000180005F70 arg_0           = qword ptr  8
.text:0000000180005F70 arg_8           = qword ptr  10h
.text:0000000180005F70 arg_10          = qword ptr  18h
.text:0000000180005F70 arg_18          = qword ptr  20h
.text:0000000180005F70
.text:0000000180005F70                 mov     [rsp+arg_8], rbx
.text:0000000180005F75                 mov     [rsp+arg_10], rbp
.text:0000000180005F7A                 mov     [rsp+arg_18], rsi
.text:0000000180005F7F                 push    rdi
.text:0000000180005F80                 sub     rsp, 20h
.text:0000000180005F84                 mov     rdi, [rcx+8]
.text:0000000180005F88                 mov     rsi, r8
.text:0000000180005F8B                 mov     rbp, rdx
.text:0000000180005F8E                 mov     rbx, rcx
.text:0000000180005F91                 test    rdi, rdi
.text:0000000180005F94                 jz      short loc_180005FF3
.text:0000000180005F96
.text:0000000180005F96 loc_180005F96:                          ; DATA XREF: .rdata:0000000180401E74↓o
.text:0000000180005F96                                         ; .rdata:0000000180401E84↓o ...
.text:0000000180005F96                 mov     [rsp+28h+arg_0], r14
.text:0000000180005F9B                 or      r14d, 0FFFFFFFFh
.text:0000000180005F9F                 mov     eax, r14d
.text:0000000180005FA2                 lock xadd [rdi+8], eax
.text:0000000180005FA7                 cmp     eax, 1
.text:0000000180005FAA                 jnz     short loc_180005FEA
.text:0000000180005FAC                 mov     rax, [rdi]
.text:0000000180005FAF                 mov     rcx, rdi
.text:0000000180005FB2                 call    qword ptr [rax]
.text:0000000180005FB4                 lock xadd [rdi+0Ch], r14d
.text:0000000180005FBA                 cmp     r14d, 1
.text:0000000180005FBE                 jnz     short loc_180005FC9
.text:0000000180005FC0                 mov     rax, [rdi]
.text:0000000180005FC3                 mov     rcx, rdi
.text:0000000180005FC6                 call    qword ptr [rax+8]
.text:0000000180005FC9
.text:0000000180005FC9 loc_180005FC9:                          ; CODE XREF: CR_maybeParseWithLambda+4E↑j
.text:0000000180005FC9                 mov     [rbx+8], rsi
.text:0000000180005FCD                 mov     [rbx], rbp
.text:0000000180005FD0
.text:0000000180005FD0 loc_180005FD0:                          ; CODE XREF: CR_maybeParseWithLambda+81↓j
.text:0000000180005FD0                 mov     r14, [rsp+28h+arg_0]
.text:0000000180005FD5
.text:0000000180005FD5 loc_180005FD5:                          ; CODE XREF: CR_maybeParseWithLambda+8A↓j
.text:0000000180005FD5                                         ; DATA XREF: .pdata:0000000180483888↓o ...
.text:0000000180005FD5                 mov     rbx, [rsp+28h+arg_8]
.text:0000000180005FDA                 mov     rbp, [rsp+28h+arg_10]
.text:0000000180005FDF                 mov     rsi, [rsp+28h+arg_18]
.text:0000000180005FE4                 add     rsp, 20h
.text:0000000180005FE8                 pop     rdi
.text:0000000180005FE9                 retn
assembly
x86-64
reverse-engineering
disassembly
ida
asked on Stack Overflow Jan 25, 2021 by Soleil • edited Jan 25, 2021 by Soleil

1 Answer

4
  1. Since the disassembly is call qword ptr [rax], can't this be simplified into result = lambda(); if I translate the pseudo code in c ?

No. The decompiler detect that the variable passed in is likely an argument of the called function.

For example, void f() and void f(int) functions are both called with a single call assembly command, except that in the latter case, the caller moves an int value to an appropriate register before calling the function.

You can change the type of lambda to avoid that.

  1. Why in the pseudo code the function is having itself as argument ?

Read both the assembly code and the decompiled code very carefully. lambda is not a function pointer, to get a function pointer from it it's necessary to dereference twice. So it could be something like this (pseudo-C++ code)

using FunctionType=int(int);
struct B{
    FunctionType* functionPointer;
};
struct A{
    B* b;
};
A* lambda; // the variable name is a little misleading, given this interpretation.
auto functionPointer=(*(*lambda).b);
functionPointer(lambda);

Given the double-dereference, it's very likely that B is actually a vftable (although in these cases, the function are usually called with __thiscall convention) -- so the code could be written like this:

struct Base{
    virtual void someFunction(){}
    virtual void otherFunction(){}
};
struct Base_vftableType{ // compiler-generated
    void (*someFunction)(Base*); // explicit (this) argument shown
    void (*otherFunction)(Base*); // explicit (this) argument shown
};
struct Derived: Base{
    Base_vftableType *vftable; // compiler-generated
    void someFunction(){ /* ... */ }
};
Base_vftableType derived_vftable{ /* ... */ }; // compiler-generated vftable

Derived *a;

// the function call is something like this in pseudo-C
// (and probably how it will be displayed in IDA):
a->vftable->someFunction(a);
  1. What is happening when there is a call such as lambda + 8i64 (ie., call qword ptr [rax+8]) ?

Similarly, there could be more than one functions in the vftable, and the + simply takes the address of other functions.

Assuming 64-bit function pointers, +8 would be the second function in the table.

See also: c++ - How to organize vtables in IDA Pro? - Reverse Engineering Stack Exchange

answered on Stack Overflow Jan 25, 2021 by user202729

User contributions licensed under CC BY-SA 3.0