Can I do binary taint analysis?

Question

Can I do binary taint analysis?

I want to analyze the following binary.

4005e0:  55                      push   %rbp
4005e1:  48 89 e5                mov    %rsp,%rbp
4005e4:  48 83 ec 10             sub    $0x10,%rsp
4005e8:  c7 45 fc 09 00 00 00    movl   $0x9,-0x4(%rbp)
4005ef:  c7 45 f8 00 00 00 00    movl   $0x0,-0x8(%rbp)
4005f6:  c7 45 f4 00 00 00 00    movl   $0x0,-0xc(%rbp)
4005fd:  83 7d f4 0a             cmpl   $0xa,-0xc(%rbp)
400601:  0f 8d 2a 00 00 00       jge    400631 <func_1+0x51>
400607:  8b 45 f4                mov    -0xc(%rbp),%eax
40060a:  3b 45 fc                cmp    -0x4(%rbp),%eax
40060d:  0f 85 0b 00 00 00       jne    40061e <func_1+0x3e>
400613:  8b 45 f4                mov    -0xc(%rbp),%eax
400616:  89 45 f8                mov    %eax,-0x8(%rbp)
400619:  e9 13 00 00 00          jmpq   400631 <func_1+0x51>
40061e:  e9 00 00 00 00          jmpq   400623 <func_1+0x43>
400623:  8b 45 f4                mov    -0xc(%rbp),%eax
400626:  83 c0 01                add    $0x1,%eax
400629:  89 45 f4                mov    %eax,-0xc(%rbp)
40062c:  e9 cc ff ff ff          jmpq   4005fd <func_1+0x1d>
400631:  8b 75 f8                mov    -0x8(%rbp),%esi
400634:  48 bf a4 08 40 00 00    movabs $0x4008a4,%rdi
40063b:  00 00 00
40063e:  b0 00                   mov    $0x0,%al
400640:  e8 1b fe ff ff          callq  400460 <printf@plt>
400645:  89 45 f0                mov    %eax,-0x10(%rbp)
400648:  48 83 c4 10             add    $0x10,%rsp
40064c:  5d                      pop    %rbp
40064d:  c3                      retq

I tried Triton already, but the instruction with memory access such as movl (movq) was not disassembled and analysis failed.

I would like to know how to output binary that can be analyzed by Triton, or other taint analysis tools.

Source code (I compiled using clang.)

void func_0()
{
  int src = 9;
  int dest = 0;
  for (int i = 0; i < 10; ++i) {
    if (i == src) dest = i;
  }
  printf("%d\n", dest);
}

The code for Triton is below.

from __future__ import print_function
from triton     import *

import sys

function = {
    0x400570: b"\x55",                      # push   %rbp
    0x400571: b"\x48\x89\xe5",              # mov    %rsp,%rbp
    0x400574: b"\x48\x83\xec\x10",              # sub    $0x10,%rsp
    0x400578: b"\xc7\x45\xfc\x09\x00\x00\x00",  # movl   $0x9,-0x4(%rbp)
    0x40057f: b"\xc7\x45\xf8\x00\x00\x00\x00",  # movl   $0x0,-0x8(%rbp)
    0x400586: b"\xc7\x45\xf4\x00\x00\x00\x00",  # movl   $0x0,-0xc(%rbp)
    0x40058d: b"\x83\x7d\xf4\x0a",              # cmpl   $0xa,-0xc(%rbp)
    0x400591: b"\x0f\x8d\x25\x00\x00\x00",      # jge    4005bc <func_0+0x4c>
    0x400597: b"\x8b\x45\xf4",              # mov    -0xc(%rbp),%eax
    0x40059a: b"\x3b\x45\xfc",              # cmp    -0x4(%rbp),%eax
    0x40059d: b"\x0f\x85\x06\x00\x00\x00",      # jne    4005a9 <func_0+0x39>
    0x4005a3: b"\x8b\x45\xf4",              # mov    -0xc(%rbp),%eax
    0x4005a6: b"\x89\x45\xf8",              # mov    %eax,-0x8(%rbp)
    0x4005a9: b"\xe9\x00\x00\x00\x00",          # jmpq   4005ae <func_0+0x3e>
    0x4005ae: b"\x8b\x45\xf4",              # mov    -0xc(%rbp),%eax
    0x4005b1: b"\x83\xc0\x01",              # add    $0x1,%eax
    0x4005b4: b"\x89\x45\xf4",              # mov    %eax,-0xc(%rbp)
    0x4005b7: b"\xe9\xd1\xff\xff\xff",          # jmpq   40058d <func_0+0x1d>
    0x4005bc: b"\x8b\x75\xf8",              # mov    -0x8(%rbp),%esi
    0x4005bf: b"\x48\xbf\xa4\x08\x40\x00\x00",  # movabs $0x4008a4,%rdi
    0x4005c6: b"\x00\x00\x00",
    0x4005c9: b"\xb0\x00",                  # mov    $0x0,%al
    0x4005cb: b"\xe8\x90\xfe\xff\xff",          # callq  400460 <printf@plt>
    0x4005d0: b"\x89\x45\xf0",              # mov    %eax,-0x10(%rbp)
    0x4005d3: b"\x48\x83\xc4\x10",              # add    $0x10,%rsp
    0x4005d7: b"\x5d",                      # pop    %rbp
    0x4005d8: b"\xc3",                      # retq   
}
if __name__ == '__main__':
    # Triton context
    ctx = TritonContext()
    # Set the architecture
    ctx.setArchitecture(ARCH.X86_64)
    # Symbolic optimization
    ctx.enableMode(MODE.ALIGNED_MEMORY, True)
    # Define the Python syntax
    ctx.setAstRepresentationMode(AST_REPRESENTATION.PYTHON)

    # Define entry point
    pc = 0x400570

    # Setup stack
    ctx.setConcreteRegisterValue(ctx.registers.rsp, 0x7fffffff)
    ctx.setConcreteRegisterValue(ctx.registers.rbp, 0x7fffffff)

    # Let's emulate the function
    while pc in function:
        # Build an instruction
        inst = Instruction()

        # Setup opcode
        inst.setOpcode(function[pc])

        # Setup Address
        inst.setAddress(pc)
        print(hex(pc))

        # Process the instruction
        ctx.processing(inst)

        # Next instruction
        pc = ctx.getConcreteRegisterValue(ctx.registers.rip)

    sys.exit(0)

The error is below.

TypeError: x8664Cpu::disassembly(): Failed to disassemble the given code.

security

assembly

binary

analytics

dynamic-analysis

asked on Stack Overflow Jul 15, 2019 by

usagi • edited Jul 15, 2019 by

usagi

1 Answer

Opcode "\x48\xbf\xa4\x08\x40\x00\x00" is invalid. You have to provide "\x48\xbf\xa4\x08\x40\x00\x00\x00\x00\x00". Patch below:

--- a.py    2019-07-17 21:28:25.828014823 +0200
+++ b.py    2019-07-17 21:27:25.988013339 +0200
@@ -29,8 +29,7 @@
     0x4005b4: b"\x89\x45\xf4",              # mov    %eax,-0xc(%rbp)
     0x4005b7: b"\xe9\xd1\xff\xff\xff",          # jmpq   40058d <func_0+0x1d>
     0x4005bc: b"\x8b\x75\xf8",              # mov    -0x8(%rbp),%esi
-    0x4005bf: b"\x48\xbf\xa4\x08\x40\x00\x00",  # movabs $0x4008a4,%rdi
-    0x4005c6: b"\x00\x00\x00",
+    0x4005bf: b"\x48\xbf\xa4\x08\x40\x00\x00\x00\x00\x00",  # movabs $0x4008a4,%rdi
     0x4005c9: b"\xb0\x00",                  # mov    $0x0,%al
     0x4005cb: b"\xe8\x90\xfe\xff\xff",          # callq  400460 <printf@plt>
     0x4005d0: b"\x89\x45\xf0",              # mov    %eax,-0x10(%rbp)

answered on Stack Overflow Jul 17, 2019 by

Jonathan Salwan

User contributions licensed under CC BY-SA 3.0