Browse Source

merge linker

Kevin Lange 4 years ago
parent
commit
8313d9906e
8 changed files with 692 additions and 0 deletions
  1. 4 0
      linker/.gitignore
  2. 65 0
      linker/Makefile
  3. 6 0
      linker/README.md
  4. 11 0
      linker/demo.c
  5. 6 0
      linker/demob.c
  6. 15 0
      linker/libdemo.c
  7. 45 0
      linker/link.ld
  8. 540 0
      linker/linker.c

+ 4 - 0
linker/.gitignore

@@ -0,0 +1,4 @@
+*.so
+demo
+demob
+.gdb_history

+ 65 - 0
linker/Makefile

@@ -0,0 +1,65 @@
+CC=i686-pc-toaru-gcc
+AR=i686-pc-toaru-ar
+
+.PHONY: all go
+all: ld.so libdemo.so demo
+
+ld.so: linker.c link.ld
+	i686-pc-toaru-gcc -static -Wl,-static -std=c99 -g -U__STRICT_ANSI__ -o ld.so -Os -T link.ld linker.c
+
+demo: demo.c
+	i686-pc-toaru-gcc -o demo -g demo.c -L. -ldemo
+
+demob: demob.c
+	i686-pc-toaru-gcc -o demob demob.c -L.
+
+libdemo.so: libdemo.c
+	i686-pc-toaru-gcc -shared -fPIC -Wl,-soname,libdemo.so -o libdemo.so libdemo.c
+
+libc.so:
+	cp ${TOARU_SYSROOT}/usr/lib/libc.a libc.a
+	# init and fini don't belong in our shared object
+	${AR} d libc.a lib_a-init.o
+	${AR} d libc.a lib_a-fini.o
+	# Remove references to newlib's reentrant malloc
+	${AR} d libc.a lib_a-calloc.o
+	${AR} d libc.a lib_a-callocr.o
+	${AR} d libc.a lib_a-cfreer.o
+	${AR} d libc.a lib_a-freer.o
+	${AR} d libc.a lib_a-malignr.o
+	${AR} d libc.a lib_a-mallinfor.o
+	${AR} d libc.a lib_a-mallocr.o
+	${AR} d libc.a lib_a-malloptr.o
+	${AR} d libc.a lib_a-mallstatsr.o
+	${AR} d libc.a lib_a-msizer.o
+	${AR} d libc.a lib_a-pvallocr.o
+	${AR} d libc.a lib_a-realloc.o
+	${AR} d libc.a lib_a-reallocr.o
+	${AR} d libc.a lib_a-vallocr.o
+	${CC} -shared -o libc.so -Wl,--whole-archive libc.a -Wl,--no-whole-archive
+	rm libc.a
+
+go: all
+	cp demo ../hdd/bin/ld-demo
+	cp demob ../hdd/bin/ld-demob
+	mkdir -p ../hdd/usr/lib
+	cp libdemo.so ../hdd/usr/lib/libdemo.so
+	cp libc.so ../hdd/usr/lib/libc.so
+	mkdir -p ../hdd/lib
+	cp ld.so ../hdd/lib/ld.so
+
+cd: go
+	cd ..; make cdrom
+	-VBoxManage controlvm "ToaruOS Live CD" poweroff
+	sleep 0.2
+	-VBoxManage startvm "ToaruOS Live CD"
+	sleep 3
+	-VBoxManage controlvm "ToaruOS Live CD" keyboardputscancode 1c 9c
+	sleep 2
+	-VBoxManage controlvm "ToaruOS Live CD" keyboardputscancode 38 3e be b8 1d 38 14 94 b8 9d
+	sleep 0.5
+	-VBoxManage controlvm "ToaruOS Live CD" keyboardputscancode 38 0f 8f b8
+	sleep 0.2
+	-VBoxManage controlvm "ToaruOS Live CD" keyboardputscancode 38 44 c4 b8
+
+

+ 6 - 0
linker/README.md

@@ -0,0 +1,6 @@
+# ToaruOS ld.so
+
+This is a (work-in-progress) dynamic ELF linker/loader for ToaruOS.
+
+This is a mostly-working loader capable of at least loading basic binaries that depend on a shared libc. There are still some bugs to work out, and it is not capable of loading some more complicated binaries.
+

+ 11 - 0
linker/demo.c

@@ -0,0 +1,11 @@
+#include <stdio.h>
+
+extern int return_42(void);
+
+int main(int argc, char * argv[]) {
+	fprintf(stderr, "Hello world!\n");
+
+	fprintf(stderr, "Hello, dynamic world: %d\n", return_42());
+
+	return 0;
+}

+ 6 - 0
linker/demob.c

@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(int argc, char * argv[]) {
+	puts("Hello, world!");
+	return 0;
+}

+ 15 - 0
linker/libdemo.c

@@ -0,0 +1,15 @@
+#include <stdio.h>
+#include <string.h>
+
+
+__attribute__((constructor))
+static void butts(void) {
+	fprintf(stderr, "I'm a constructor!\n");
+}
+
+extern char * _username;
+
+int return_42(void) {
+	fprintf(stderr, "I am a dynamically loaded shared object. pid = %d\n", getpid());
+	return 42;
+}

+ 45 - 0
linker/link.ld

@@ -0,0 +1,45 @@
+/* vim: tabstop=4 shiftwidth=4 noexpandtab
+ * 
+ */
+ENTRY(_start)
+
+SECTIONS
+{
+	. = 0x3F000000;
+	phys = .;
+
+	.text BLOCK(4K) : ALIGN(4K)
+	{
+		code = .;
+		*(.text)
+	}
+
+	.rodata BLOCK(4K) : ALIGN(4K)
+	{
+		*(.rodata)
+	}
+
+	.data BLOCK(4K) : ALIGN(4K)
+	{
+		*(.data)
+	}
+
+	.bss BLOCK(4K) : ALIGN(4K)
+	{
+		*(COMMON)
+		*(.bss)
+	}
+
+	.eh_frame BLOCK(4K) : ALIGN(4K)
+	{
+		*(.eh_frame)
+	}
+
+	end = .;
+
+	/DISCARD/ :
+	{
+		*(.comment)
+		*(.note.gnu.build-id)
+	}
+}

+ 540 - 0
linker/linker.c

@@ -0,0 +1,540 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <alloca.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <sys/stat.h>
+
+#define TRACE_APP_NAME "ld.so"
+
+#define TRACE_LD(...) do { if (__trace_ld) { TRACE(__VA_ARGS__); } } while (0)
+
+static int __trace_ld = 0;
+
+#include "../kernel/include/elf.h"
+#include "../userspace/lib/trace.h"
+
+#include "../userspace/lib/list.c"
+#include "../userspace/lib/hashmap.c"
+
+typedef int (*entry_point_t)(int, char *[], char**);
+
+extern char end[];
+
+static hashmap_t * dumb_symbol_table;
+static hashmap_t * glob_dat;
+
+typedef struct elf_object {
+	FILE * file;
+
+	/* Full copy of the header. */
+	Elf32_Header header;
+
+	/* Pointers to loaded stuff */
+	char * string_table;
+
+	char * dyn_string_table;
+	size_t dyn_string_table_size;
+
+	Elf32_Sym * dyn_symbol_table;
+	size_t dyn_symbol_table_size;
+
+	Elf32_Dyn * dynamic;
+	Elf32_Word * dyn_hash;
+
+	void (*init)(void);
+	void (**ctors)(void);
+	size_t ctors_size;
+
+	uintptr_t base;
+
+	list_t * dependencies;
+
+} elf_t;
+
+static char * find_lib(const char * file) {
+
+	if (strchr(file, '/')) return strdup(file);
+
+	char * path = getenv("LD_LIBRARY_PATH");
+	if (!path) {
+		path = "/usr/lib:/lib";
+	}
+	char * xpath = strdup(path);
+	int found = 0;
+	char * p, * tokens[10], * last;
+	int i = 0;
+	for ((p = strtok_r(xpath, ":", &last)); p; p = strtok_r(NULL, ":", &last)) {
+		int r;
+		struct stat stat_buf;
+		char * exe = malloc(strlen(p) + strlen(file) + 2);
+		strcpy(exe, p);
+		strcat(exe, "/");
+		strcat(exe, file);
+
+		r = stat(exe, &stat_buf);
+		if (r != 0) {
+			free(exe);
+			continue;
+		}
+		return exe;
+	}
+	free(xpath);
+
+	return NULL;
+}
+
+static elf_t * open_object(const char * path) {
+
+	char * file = find_lib(path);
+	if (!file) return NULL;
+
+	FILE * f = fopen(file, "r");
+
+	free(file);
+
+	if (!f) {
+		return NULL;
+	}
+
+	elf_t * object = calloc(1, sizeof(elf_t));
+
+	if (!object) {
+		return NULL;
+	}
+
+	object->file = f;
+
+	size_t r = fread(&object->header, sizeof(Elf32_Header), 1, object->file);
+
+	if (!r) {
+		free(object);
+		return NULL;
+	}
+
+	if (object->header.e_ident[0] != ELFMAG0 ||
+	    object->header.e_ident[1] != ELFMAG1 ||
+	    object->header.e_ident[2] != ELFMAG2 ||
+	    object->header.e_ident[3] != ELFMAG3) {
+
+		free(object);
+		return NULL;
+	}
+
+	object->dependencies = list_create();
+
+	return object;
+}
+
+static size_t object_calculate_size(elf_t * object) {
+
+	uintptr_t base_addr = 0xFFFFFFFF;
+	uintptr_t end_addr  = 0x0;
+
+	{
+		size_t headers = 0;
+		while (headers < object->header.e_phnum) {
+			Elf32_Phdr phdr;
+
+			fseek(object->file, object->header.e_phoff + object->header.e_phentsize * headers, SEEK_SET);
+			fread(&phdr, object->header.e_phentsize, 1, object->file);
+
+			switch (phdr.p_type) {
+				case PT_LOAD:
+					{
+						if (phdr.p_vaddr < base_addr) {
+							base_addr = phdr.p_vaddr;
+						}
+						if (phdr.p_memsz + phdr.p_vaddr > end_addr) {
+							end_addr = phdr.p_memsz + phdr.p_vaddr;
+						}
+					}
+					break;
+				default:
+					break;
+			}
+
+			headers++;
+		}
+	}
+
+	if (base_addr == 0xFFFFFFFF) return 0;
+	return end_addr - base_addr;
+}
+
+static uintptr_t object_load(elf_t * object, uintptr_t base) {
+
+	uintptr_t end_addr = 0x0;
+
+	object->base = base;
+
+	/* Load object */
+	{
+		size_t headers = 0;
+		while (headers < object->header.e_phnum) {
+			Elf32_Phdr phdr;
+
+			fseek(object->file, object->header.e_phoff + object->header.e_phentsize * headers, SEEK_SET);
+			fread(&phdr, object->header.e_phentsize, 1, object->file);
+
+			switch (phdr.p_type) {
+				case PT_LOAD:
+					{
+						char * args[] = {(char *)(base + phdr.p_vaddr), (char *)phdr.p_memsz};
+						syscall_system_function(10, args);
+						fseek(object->file, phdr.p_offset, SEEK_SET);
+						fread((void *)(base + phdr.p_vaddr), phdr.p_filesz, 1, object->file);
+						size_t r = phdr.p_filesz;
+						while (r < phdr.p_memsz) {
+							*(char *)(phdr.p_vaddr + base + r) = 0;
+							r++;
+						}
+
+						if (end_addr < phdr.p_vaddr + base + phdr.p_memsz) {
+							end_addr = phdr.p_vaddr + base + phdr.p_memsz;
+						}
+					}
+					break;
+				case PT_DYNAMIC:
+					{
+						object->dynamic = (Elf32_Dyn *)(base + phdr.p_vaddr);
+					}
+					break;
+				default:
+					break;
+			}
+
+			headers++;
+		}
+	}
+
+	return end_addr;
+}
+
+static int object_postload(elf_t * object) {
+
+	/* Load section string table */
+	{
+		Elf32_Shdr shdr;
+		fseek(object->file, object->header.e_shoff + object->header.e_shentsize * object->header.e_shstrndx, SEEK_SET);
+		fread(&shdr, object->header.e_shentsize, 1, object->file);
+		object->string_table = malloc(shdr.sh_size);
+		fseek(object->file, shdr.sh_offset, SEEK_SET);
+		fread(object->string_table, shdr.sh_size, 1, object->file);
+	}
+
+	if (object->dynamic) {
+		Elf32_Dyn * table;
+
+		/* Locate string table */
+		table = object->dynamic;
+		while (table->d_tag) {
+			switch (table->d_tag) {
+				case 4:
+					object->dyn_hash = (Elf32_Word *)(object->base + table->d_un.d_ptr);
+					object->dyn_symbol_table_size = object->dyn_hash[1];
+					break;
+				case 5: /* Dynamic String Table */
+					object->dyn_string_table = (char *)(object->base + table->d_un.d_ptr);
+					break;
+				case 6: /* Dynamic Symbol Table */
+					object->dyn_symbol_table = (Elf32_Sym *)(object->base + table->d_un.d_ptr);
+					break;
+				case 10: /* Size of string table */
+					object->dyn_string_table_size = table->d_un.d_val;
+					break;
+				case 12:
+					object->init = (void (*)(void))(table->d_un.d_ptr + object->base);
+					break;
+			}
+			table++;
+		}
+
+		table = object->dynamic;
+		while (table->d_tag) {
+			switch (table->d_tag) {
+				case 1:
+					list_insert(object->dependencies, object->dyn_string_table + table->d_un.d_val);
+					break;
+			}
+			table++;
+		}
+	}
+
+	size_t i = 0;
+	for (uintptr_t x = 0; x < object->header.e_shentsize * object->header.e_shnum; x += object->header.e_shentsize) {
+		Elf32_Shdr shdr;
+		fseek(object->file, object->header.e_shoff + x, SEEK_SET);
+		fread(&shdr, object->header.e_shentsize, 1, object->file);
+
+		if (!strcmp((char *)((uintptr_t)object->string_table + shdr.sh_name), ".ctors")) {
+			object->ctors = (void *)(shdr.sh_addr + object->base);
+			object->ctors_size = shdr.sh_size;
+		}
+	}
+
+	return 0;
+}
+
+static int need_symbol_for_type(unsigned char type) {
+	switch(type) {
+		case 1:
+		case 2:
+		case 5:
+		case 6:
+		case 7:
+			return 1;
+		default:
+			return 0;
+	}
+}
+
+
+static int object_relocate(elf_t * object) {
+	if (object->dyn_symbol_table) {
+		Elf32_Sym * table = object->dyn_symbol_table;
+		size_t i = 0;
+		while (i < object->dyn_symbol_table_size) {
+			char * symname = (char *)((uintptr_t)object->dyn_string_table + table->st_name);
+			if (!hashmap_has(dumb_symbol_table, symname)) {
+				if (table->st_shndx) {
+					hashmap_set(dumb_symbol_table, symname, (void*)(table->st_value + object->base));
+				}
+			} else {
+				if (table->st_shndx) {
+					//table->st_value = (uintptr_t)hashmap_get(dumb_symbol_table, symname);
+				}
+			}
+			table++;
+			i++;
+		}
+	}
+
+	size_t i = 0;
+	for (uintptr_t x = 0; x < object->header.e_shentsize * object->header.e_shnum; x += object->header.e_shentsize) {
+		Elf32_Shdr shdr;
+		fseek(object->file, object->header.e_shoff + x, SEEK_SET);
+		fread(&shdr, object->header.e_shentsize, 1, object->file);
+
+		if (shdr.sh_type == 9) {
+			Elf32_Rel * table = (Elf32_Rel *)(shdr.sh_addr + object->base);
+			while ((uintptr_t)table - ((uintptr_t)shdr.sh_addr + object->base) < shdr.sh_size) {
+				unsigned int  symbol = ELF32_R_SYM(table->r_info);
+				unsigned char type = ELF32_R_TYPE(table->r_info);
+				Elf32_Sym * sym = &object->dyn_symbol_table[symbol];
+
+				char * symname = NULL;
+				uintptr_t x = sym->st_value + object->base;
+				if (need_symbol_for_type(type) || (type == 5)) {
+					symname = (char *)((uintptr_t)object->dyn_string_table + sym->st_name);
+				}
+				if ((sym->st_shndx == 0) && need_symbol_for_type(type) || (type == 5)) {
+					if (hashmap_has(dumb_symbol_table, symname)) {
+						x = (uintptr_t)hashmap_get(dumb_symbol_table, symname);
+					} else {
+						fprintf(stderr, "Symbol not found: %s\n", symname);
+						x = 0x0;
+					}
+				}
+
+				/* Relocations, symbol lookups, etc. */
+				switch (type) {
+					case 6: /* GLOB_DAT */
+						if (hashmap_has(glob_dat, symname)) {
+							x = (uintptr_t)hashmap_get(glob_dat, symname);
+						}
+					case 7: /* JUMP_SLOT */
+						memcpy((void *)(table->r_offset + object->base), &x, sizeof(uintptr_t));
+						break;
+					case 1: /* 32 */
+						x += *((ssize_t *)(table->r_offset + object->base));
+						memcpy((void *)(table->r_offset + object->base), &x, sizeof(uintptr_t));
+						break;
+					case 2: /* PC32 */
+						x += *((ssize_t *)(table->r_offset + object->base));
+						x -= (table->r_offset + object->base);
+						memcpy((void *)(table->r_offset + object->base), &x, sizeof(uintptr_t));
+						break;
+					case 8: /* RELATIVE */
+						x = object->base;
+						x += *((ssize_t *)(table->r_offset + object->base));
+						memcpy((void *)(table->r_offset + object->base), &x, sizeof(uintptr_t));
+						break;
+					case 5: /* COPY */
+						memcpy((void *)(table->r_offset + object->base), (void *)x, sym->st_size);
+						break;
+					default:
+						TRACE_LD("Unknown relocation type: %d", type);
+				}
+
+				table++;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void object_find_copy_relocations(elf_t * object) {
+	size_t i = 0;
+	for (uintptr_t x = 0; x < object->header.e_shentsize * object->header.e_shnum; x += object->header.e_shentsize) {
+		Elf32_Shdr shdr;
+		fseek(object->file, object->header.e_shoff + x, SEEK_SET);
+		fread(&shdr, object->header.e_shentsize, 1, object->file);
+
+		if (shdr.sh_type == 9) {
+			Elf32_Rel * table = (Elf32_Rel *)(shdr.sh_addr + object->base);
+			while ((uintptr_t)table - ((uintptr_t)shdr.sh_addr + object->base) < shdr.sh_size) {
+				unsigned char type = ELF32_R_TYPE(table->r_info);
+				if (type == 5) {
+					unsigned int  symbol = ELF32_R_SYM(table->r_info);
+					Elf32_Sym * sym = &object->dyn_symbol_table[symbol];
+					char * symname = (char *)((uintptr_t)object->dyn_string_table + sym->st_name);
+					hashmap_set(glob_dat, symname, (void *)table->r_offset);
+				}
+				table++;
+			}
+		}
+	}
+
+}
+
+static void * object_find_symbol(elf_t * object, const char * symbol_name) {
+	if (!object->dyn_symbol_table) return NULL;
+
+	Elf32_Sym * table = object->dyn_symbol_table;
+	size_t i = 0;
+	while (i < object->dyn_symbol_table_size) {
+		if (!strcmp(symbol_name, (char *)((uintptr_t)object->dyn_string_table + table->st_name))) {
+			return (void *)(table->st_value + object->base);
+		}
+		table++;
+		i++;
+	}
+
+	return NULL;
+}
+
+
+static struct {
+	const char * name;
+	void * symbol;
+} ld_builtin_exports[] = {
+	{"_dl_open_object", open_object},
+	{NULL, NULL}
+};
+
+int main(int argc, char * argv[]) {
+
+	char * file = argv[1];
+	size_t arg_offset = 1;
+
+	if (!strcmp(argv[1], "-e")) {
+		arg_offset = 3;
+		file = argv[2];
+	}
+
+	char * trace_ld_env = getenv("LD_DEBUG");
+	if (trace_ld_env && (!strcmp(trace_ld_env,"1") || !strcmp(trace_ld_env,"yes"))) {
+		__trace_ld = 1;
+	}
+
+	dumb_symbol_table = hashmap_create(10);
+	glob_dat = hashmap_create(10);
+
+	elf_t * main_obj = open_object(file);
+
+	if (!main_obj) {
+		fprintf(stderr, "%s: error: failed to open object '%s'.\n", argv[0], file);
+		return 1;
+	}
+
+	size_t main_size = object_calculate_size(main_obj);
+	uintptr_t end_addr = object_load(main_obj, 0x0);
+	object_postload(main_obj);
+
+	object_find_copy_relocations(main_obj);
+
+	hashmap_t * libs = hashmap_create(10);
+
+	list_t * ctor_libs = list_create();
+	list_t * init_libs = list_create();
+
+	TRACE_LD("Loading dependencies.");
+	node_t * item;
+	while (item = list_pop(main_obj->dependencies)) {
+		while (end_addr & 0xFFF) {
+			end_addr++;
+		}
+
+		char * lib_name = item->value;
+		if (!strcmp(lib_name, "libg.so")) goto nope;
+		elf_t * lib = open_object(lib_name);
+		if (!lib) {
+			fprintf(stderr, "Failed to load dependency '%s'.\n", lib_name);
+			return 1;
+		}
+		hashmap_set(libs, lib_name, lib);
+
+		TRACE_LD("Loading %s at 0x%x", lib_name, end_addr);
+		end_addr = object_load(lib, end_addr);
+		object_postload(lib);
+		TRACE_LD("Relocating %s", lib_name);
+		object_relocate(lib);
+
+		fclose(lib->file);
+
+		/* Execute constructors */
+		if (lib->ctors) {
+			list_insert(ctor_libs, lib);
+		}
+		if (lib->init) {
+			list_insert(init_libs, lib);
+		}
+
+nope:
+		free(item);
+	}
+
+	TRACE_LD("Relocating main object");
+	object_relocate(main_obj);
+	TRACE_LD("Placing heap at end");
+	while (end_addr & 0xFFF) {
+		end_addr++;
+	}
+
+	char * ld_no_ctors = getenv("LD_DISABLE_CTORS");
+	if (ld_no_ctors && (!strcmp(ld_no_ctors,"1") || !strcmp(ld_no_ctors,"yes"))) {
+		TRACE_LD("skipping ctors because LD_DISABLE_CTORS was set");
+	} else {
+		foreach(node, ctor_libs) {
+			elf_t * lib = node->value;
+			if (lib->ctors) {
+				TRACE_LD("Executing ctors...");
+				for (size_t i = 0; i < lib->ctors_size; i += sizeof(uintptr_t)) {
+					TRACE_LD(" 0x%x()", lib->ctors[i]);
+					lib->ctors[i]();
+				}
+			}
+		}
+	}
+
+	foreach(node, init_libs) {
+		elf_t * lib = node->value;
+		lib->init();
+	}
+
+
+	{
+		char * args[] = {(char*)end_addr};
+		syscall_system_function(9, args);
+	}
+	TRACE_LD("Jumping to entry point");
+
+	entry_point_t entry = (entry_point_t)main_obj->header.e_entry;
+	entry(argc-arg_offset,argv+arg_offset,environ);
+
+	return 0;
+}