I wrote the below program to dump symbols of a Mach-O file. (I realise this is duplicating existing commands like nm
, but I wanted to do it myself in order to learn. Note I could mmap
it myself but for various reasons I wanted to let dyld
load it into memory for me.)
#include <assert.h>
#include <dlfcn.h>
#include <inttypes.h>
#include <mach-o/dyld.h>
#include <mach-o/nlist.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef MH_DYLIB_IN_CACHE
# define MH_DYLIB_IN_CACHE 0x80000000
#endif
#define DYLD_MASKREMOVE(_handle) (((intptr_t)(_handle) & (-4)))
static const struct mach_header* dyld2hdr(void *handle) {
uint32_t count = _dyld_image_count();
for (uint32_t index = 0; index < count; index++) {
const char *dll = _dyld_get_image_name(index);
void *h2 = dlopen(dll, RTLD_LOCAL | RTLD_LAZY);
assert(h2 != NULL);
if (DYLD_MASKREMOVE(handle) == DYLD_MASKREMOVE(h2)) {
int rc = dlclose(h2);
assert(rc == 0);
return _dyld_get_image_header(index);
}
int rc = dlclose(h2);
assert(rc == 0);
}
abort(); // Shouldn't be reached
}
const struct load_command* findLoadCmd(const struct mach_header_64* header64, uint32_t cmd) {
const char *ptr = ((void*)header64) + sizeof(struct mach_header_64);
for (uint32_t j = 0; j < header64->ncmds; j++) {
const struct load_command *lc = (const struct load_command *)ptr;
ptr += lc->cmdsize;
if (lc->cmd == cmd)
return lc;
}
return NULL;
}
int main(int argc, char** argv) {
if (argc != 2) {
fprintf(stderr,"ERROR: wrong arguments\n");
return 1;
}
const char* imageName = argv[1];
void *dl = dlopen(imageName, RTLD_LAZY | RTLD_LOCAL);
if (dl == NULL) {
fprintf(stderr, "%s\n", dlerror());
return 1;
}
const struct mach_header* hdr = dyld2hdr(dl);
struct mach_header_64* hdr64 = (struct mach_header_64*)hdr;
if ((hdr64->flags & MH_DYLIB_IN_CACHE) != 0) {
fprintf(stderr,"ERROR: image '%s' is in the dylib cache, which is not supported by this tool\n",imageName);
return 1;
}
const struct symtab_command* symtab = (const struct symtab_command*)findLoadCmd(hdr64, LC_SYMTAB);
assert(symtab != NULL);
struct nlist_64 *syms = ((void*)hdr64) + (int64_t)(symtab->symoff);
char *strtab = (((void*)hdr64) + (int64_t)(symtab->stroff));
int count = 0;
for (uint32_t symidx = 0; symidx < symtab->nsyms; symidx++) {
uint32_t strx = syms[symidx].n_un.n_strx;
if (strx == 0)
continue;
char * symName = &(strtab[strx]);
if (symName[0] != '_')
continue;
symName++;
void *found = dlsym(dl,symName);
if (found == NULL)
continue;
printf("SYMBOL: %s\n", symName);
count++;
}
printf("Symbols found = %d\n", count);
int closeRC = dlclose(dl);
assert(closeRC == 0);
return 0;
}
Run it on itself, it works:
clang -DNDEBUG -g -o symdump symdump.c
./symdump symdump
And run it on a bunch of other random executables, e.g. /bin/ls
, it works for them too. ("Works" meaning it outputs some symbols and completes successfully without crashing; whether it is outputting the right ones is outside the scope of this question.) But, run it on /usr/bin/sqlite3
, and it segfaults reading the symbol name. What is different about that executable which makes that one fail but other ones like /bin/ls
work?
(Acknowledgement: the dyld2hdr
function to convert dyld handle to Mach-O header was inspired by this answer.)
(If relevant, testing on macOS 10.14.6 and using compiler Apple LLVM version 10.0.1 (clang-1001.0.46.4)
.)
I don't agree this is a duplicate of my previous question, because the solution to that was to use the dyld_cache_mapping_info
structure to convert file offsets to memory offsets for dyld shared cache images; that solution isn't relevant here, because the sqlite3
executable is not part of the dyld shared cache.
User contributions licensed under CC BY-SA 3.0