ref: 6a89c47b1ce152b0ec15a3fb9df225ae4ea6287a
parent: 868b16b5459193afd2044121c09884ccf5560df2
author: halfwit <michaelmisch1985@gmail.com>
date: Sat Sep 14 06:54:56 PDT 2024
Update patching, still not jumping to entrypoint correctly
--- a/include/lib.h
+++ b/include/lib.h
@@ -314,7 +314,8 @@
extern int getfields(char*, char**, int, int, char*);
extern char* utfecpy(char*, char*, char*);
extern int tas(int*);
-extern int trampoline(void*);
+extern void trampoline(void*);
+extern void start(uintptr_t entry, Tos *, int, char *[]);
extern long sysintercept(void*, void*, void*, void*, void*, void*, void*);
extern int patch(void*, int);
extern void quotefmtinstall(void);
--- a/kern/dat.h
+++ b/kern/dat.h
@@ -126,10 +126,10 @@
uintptr entry;
uintptr text;
uintptr data;
- uintptr bssz;
+ uintptr bss;
+ uintptr bs;
uintptr ts;
uintptr ds;
- uintptr bss;
};
struct Block
--- a/kern/posix.c
+++ b/kern/posix.c
@@ -35,9 +35,8 @@
};
static pthread_key_t prdakey;
-typedef void (*startfn)(uintptr, Tos*, int, char**);
+//typedef void (*startfn)(int, char**);
-
Proc*
_getproc(void)
{
@@ -142,7 +141,6 @@
trex(void *vp)
{
Proc *p;
- startfn start;
Tos tos;
int argc;
@@ -151,9 +149,9 @@
argc = nelem((char**)p->arg);
if(pthread_setspecific(prdakey, p))
panic("cannot setspecific");
- start = (startfn)up->bin->entry;
- start(up->bin->text, &tos, argc, p->arg);
- print("Done\n");
+ print("Greetings from TREX %lx\n", up->bin->entry+TRAMPSIZE);
+ /* Our entrypoint moves forward to accomodate the trampoline code */
+ start(up->bin->entry+TRAMPSIZE, &tos, argc, p->arg);
pexit("", 0);
return 0;
}
@@ -164,7 +162,6 @@
pthread_t pid;
pthread_attr_t attr;
pthread_attr_init(&attr);
- pthread_attr_setstacksize(&attr, 1024*1024);
if(pthread_create(&pid, &attr, trex, p)){
oserrstr();
panic("osexec: %r");
@@ -182,7 +179,7 @@
if(up->bin->data)
munmap((void*)up->bin->data, up->bin->ds);
if(up->bin->bss)
- munmap((void*)up->bin->bss, up->bin->bssz);
+ munmap((void*)up->bin->bss, up->bin->bs);
}
void
@@ -202,29 +199,24 @@
void
ospatchtext(void)
{
- int n;
void *text, *final;
/* Set up trampoline. Mach dependent */
text = mallocz(TRAMPSIZE, 1);
- n = trampoline(text);
- if(n != TRAMPSIZE)
- error("building trampoline failed");
+ trampoline(text);
- final = mmap(nil, up->bin->ts+n, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
- memmove(final, text, n);
- memmove(final+n, (void*)up->bin->text, up->bin->ts);
+ final = mmap(nil, up->bin->ts+TRAMPSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ memmove(final, text, TRAMPSIZE);
+ memmove(final+TRAMPSIZE, (void*)up->bin->text, up->bin->ts);
if(!final)
error("unable to set up text segment with trampoline");
/* Patch. Mach dependent */
- if(patch(final+n, up->bin->ts) < 0)
+ if(patch(final+TRAMPSIZE, up->bin->ts) < 0)
error("unable to patch syscalls");
-
- if(mprotect(final, up->bin->ts+n, PROT_READ|PROT_EXEC) != 0)
+ up->bin->text = (uintptr)final;
+ if(mprotect((void*)up->bin->text, up->bin->ts+TRAMPSIZE, PROT_READ|PROT_EXEC) != 0)
error("Unable to mprotect: %r");
-
- up->bin->text = (uintptr)final+n;
}
void
@@ -245,7 +237,7 @@
{
void *bss;
// BSS - set it up in READ/WRITE
- bss = mmap((void*)up->bin->bss, up->bin->bssz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ bss = mmap((void*)up->bin->bss, up->bin->bs, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if(!bss)
error("unable to set up bss segment");
up->bin->bss = (uintptr)bss;
--- a/kern/sysproc.c
+++ b/kern/sysproc.c
@@ -244,6 +244,7 @@
}
if(!indir)
kstrdup(&elem, up->genbuf);
+ // TODO: Triple check this for accurate text/entry. Ensure they are right, our ts should not be the same each time, for example
n = devtab[tc->type]->read(tc, u.buf, sizeof(u.buf), 0);
if (n >= sizeof(Exec)){
magic = beswal(u.ehdr.ex.magic);
@@ -252,16 +253,16 @@
if(n < sizeof(u.ehdr))
error(Ebadexec);
up->bin->entry = beswav(u.ehdr.hdr[0]);
- up->bin->text = UTZERO+sizeof(u.ehdr);
+ up->bin->ts = UTZERO+sizeof(u.ehdr);
} else {
up->bin->entry = beswal(u.ehdr.ex.entry);
- up->bin->text = UTZERO+sizeof(Exec);
+ up->bin->ts = UTZERO+sizeof(Exec);
}
- if(up->bin->entry < up->bin->text)
+ if(up->bin->entry < up->bin->ts)
error(Ebadexec);
- up->bin->text += beswal(u.ehdr.ex.text);
- // BUG: This errors, errors seem broken currently
- if(up->bin->text <= up->bin->entry || up->bin->text >= (uintptr)(USTKTOP-USTKSIZE))
+
+ up->bin->ts += beswal(u.ehdr.ex.text);
+ if(up->bin->ts <= up->bin->entry || up->bin->ts >= (uintptr)(USTKTOP-USTKSIZE))
error(Ebadexec);
switch(magic){
case S_MAGIC: /* 2MB segment alignment for amd64 */
@@ -300,16 +301,16 @@
// Check the actual sysproc, there's some missing bits here still
up->arg = progarg;
}
- up->bin->ts = ((up->bin->text+align) & ~align) / BY2WD;
- up->bin->data = beswal(u.ehdr.ex.data);
- up->bin->bss = beswal(u.ehdr.ex.bss);
- up->bin->text -= UTZERO;
+
+ up->bin->ds = beswal(u.ehdr.ex.data);
+ up->bin->bs = beswal(u.ehdr.ex.bss);
+ up->bin->text = align;
+ up->bin->ts -= UTZERO;
align = BY2PG-1;
- up->bin->ds = ((up->bin->ts + up->bin->data + align) & ~align) / BY2WD;
- bssend = up->bin->ts + up->bin->data + up->bin->bss;
- up->bin->bssz = ((bssend + align) & ~align) / BY2WD;
- if(up->bin->ts >= (ulong)(USTKTOP-USTKSIZE) || up->bin->ds >= (ulong)(USTKTOP-USTKSIZE) || up->bin->bss >= (ulong)(USTKTOP-USTKSIZE))
- error(Ebadexec);
+ up->bin->data = ((up->bin->ts+align) & ~align) / BY2WD;
+ bssend = up->bin->ts + up->bin->ds + up->bin->bs;
+ up->bin->bss = ((bssend + align) & ~align) / BY2WD;
+ up->bin->bss -= up->bin->bs;
/* Set up text, data, and bss. Patch syscalls. OS dependent. */
osbuildtext(tc);
--- a/main.c
+++ b/main.c
@@ -44,7 +44,7 @@
static void
usage(void)
{
- fprintf(stderr, "usage: drawcpu [-D]\n");
+ fprintf(stderr, "usage: drawcpu [-D] [-p 9bin]\n");
exit(1);
}
--- a/posix-amd64/trampoline.c
+++ b/posix-amd64/trampoline.c
@@ -1,8 +1,7 @@
#include <u.h>
#include <libc.h>
#include "../kern/fns.h"
-
-#define _NSYS 53
+#include "mem.h"
/* TODO: This is not arm64 code, this is amd64 code */
extern void asm_syscall_hook(void);
--- a/posix-arm64/Makefile
+++ b/posix-arm64/Makefile
@@ -1,8 +1,9 @@
ROOT=..
include ../Make.config
LIB=../libmachdep.a
-CFLAGS+=-fpie
-LDFLAGS+=-fpie
+# Cannot use PIE because of trampoline
+#CFLAGS+=-fpie
+#LDFLAGS+=-fpie
OFILES=\
getcallerpc.$O\
@@ -9,6 +10,7 @@
tas.$O\
trampoline.$O\
patch.$O\
+ start.$O\
default: $(LIB)
$(LIB): $(OFILES)
--- a/posix-arm64/mem.h
+++ b/posix-arm64/mem.h
@@ -83,8 +83,9 @@
#define USTKTOP ((EVAMASK>>1)-0xFFFF) /* user segment end +1 */
#define USTKSIZE (16*1024*1024) /* user stack size */
-#define BLOCKALIGN 64 /* only used in allocb.c */
-#define TRAMPSIZE 72 /* Size of the trampoline */
+#define _NSYS 53
+#define BLOCKALIGN 64 /* only used in allocb.c */
+#define TRAMPSIZE 231 /* Size of the trampoline */
/*
* Sizes
*/
--- a/posix-arm64/patch.c
+++ b/posix-arm64/patch.c
@@ -9,13 +9,13 @@
int ret = -1;
for(i = 0; i < size - BY2WD; i++){
// MOV X0, #immediate (could be D2800000 | syscall_number)
- // BL X0 (could be D4000000 | offset)
+ // BL X0 (could be D4000010 | offset)
// 0xD63F0000 is our BLR X0
// This sets up our jmp to the trampoline code
if ((*(ulong*)&text[i] & 0xFFFF0000) == 0xD2800000 && (*(ulong*)&text[i+BY2SE] & 0xFFFF0000) == 0xD4000000) {
- ulong *ptr = (ulong*)&text[i];
- ptr[0] = 0xD6;
- ptr[1] = 0x3F;
+ ulong *ptr = (ulong*)&text[i+BY2SE];
+ *ptr &= 0xFFFF0000;
+ *ptr |= 0xD63F0000;
ret++;
}
}
--- /dev/null
+++ b/posix-arm64/start.c
@@ -1,0 +1,42 @@
+#include "u.h"
+#include "libc.h"
+void start(uintptr_t entry, Tos *_tos, int argc, char *argv[])
+{
+ register uintptr_t r0 asm("x0") = entry;
+ register Tos *r1 asm("x1") = _tos;
+ register int r2 asm("w2") = argc;
+ register char **r3 asm("x3") = argv;
+
+ __asm__ __volatile__ (
+ // Load values into registers
+ "mov x0, %0\n\t"
+ "mov x1, %1\n\t"
+ "mov w2, %w2\n\t"
+ "mov x3, %3\n\t"
+
+ // push argv onto stack
+ "mov x4, x2\n\t"
+ "add x4, x4, #1\n\t"
+ "lsl x4, x4, #3\n\t"
+ "sub sp, sp, x4\n\t"
+ "mov x5, sp\n\t"
+ "mov x6, x3\n\t"
+
+ // copy argv in at a time
+ "copy_argv_loop:\n\t"
+ "ldr x7, [x6], #8\n\t"
+ "str x7, [x5], #8\n\t"
+ "subs x4, x4, #1\n\t"
+ "bne copy_argv_loop\n\t"
+
+ // push argc onto stack
+ "sub sp, sp, #4\n\t"
+ "str w2, [sp]\n\t"
+
+ // jump to entry point
+ "br x0\n\t"
+ "ret\n\t"
+ :
+ : "r"(r0), "r"(r1), "r"(r2), "r"(r3)
+ : "x4", "x5", "x6", "x7");
+}
\ No newline at end of file
--- a/posix-arm64/trampoline.c
+++ b/posix-arm64/trampoline.c
@@ -1,66 +1,75 @@
#include <u.h>
#include <libc.h>
+#include "mem.h"
-#define _NSYS 53
-
void asm_syscall_hook(void)
{
+ print("In hook somehow\n");
__asm__ __volatile__ (
- ".global asm_syscall_hook \n\t"
- "asm_syscall_hook: \n\t"
- "stp x29, x30, [sp, #-16]! \n\t" // Save frame pointer and link register
- "mov x29, sp \n\t" // Set up frame pointer
- "sub sp, sp, #32 \n\t" // Allocate 32 bytes on stack
- "str x8, [sp, #24] \n\t" // Save x8 (syscall number)
- "stp x0, x1, [sp, #8] \n\t" // Save x0 and x1
- "str x2, [sp] \n\t" // Save x2
- "mov x0, x8 \n\t" // Move syscall number to x0
- "mov x1, x0 \n\t" // Shift arguments: x0 -> x1
- "mov x2, x1 \n\t" // x1 -> x2
- "mov x3, x2 \n\t" // x2 -> x3
- "mov x4, x3 \n\t" // x3 -> x4
- "mov x5, x4 \n\t" // x4 -> x5
- "mov x6, x5 \n\t" // x5 -> x6
- "ldr x7, [sp, #24] \n\t" // Load original x8 into x7
- "bl _sysintercept \n\t" // Call syscall function
- "mov sp, x29 \n\t" // Restore stack pointer
- "ldp x29, x30, [sp], #16 \n\t" // Restore frame pointer and link register
- "ret \n\t" // Return
+ "asm_syscall_hook:\r\n"
+ "stp x29, x30, [sp, #-16]!\r\n" // Save frame pointer and link register
+ "mov x29, sp\r\n" // Set up frame pointer
+ "sub sp, sp, #32\r\n" // Allocate 32 bytes on stack
+ "stp x8, x0, [sp, #16]\r\n" // Save x8 (syscall number) and x0
+ "stp x1, x2, [sp]\r\n" // Save x1 and x2
+ "mov x1, x0\r\n" // Shift arguments: x0 -> x1
+ "mov x2, x1\r\n" // x1 -> x2
+ "mov x3, x2\r\n" // x2 -> x3
+ "mov x4, x3\r\n" // x3 -> x4
+ "mov x5, x4\r\n" // x4 -> x5
+ "mov x6, x5\r\n" // x5 -> x6
+ "mov x0, x8\r\n" // Move syscall number to x0
+ "bl _sysintercept\r\n" // Call syscall function
+ "ldp x1, x2, [sp]\r\n" // Restore original x1 and x2
+ "ldp x8, x3, [sp, #16]\r\n" // Restore original x8 and load original x0 into x3
+ "mov x1, x3\r\n" // Restore original x0 to x1 (new x0 is return value)
+ "ldp x29, x30, [sp], #48\r\n" // Restore frame pointer and link register, and deallocate stack
+ "ret\r\n"
);
}
-int
+/**
+ * based on https://github.com/yasukata/zpoline/blob/master/main.c
+ * This will add NOP into the first 53 sections of text
+ * When a syscall is patched, it will instead jump here
+ * From the NOP slide, it then calls into asm_syscall_hook which in turn calls sysintercept
+ */
+void
trampoline(void *text)
{
- int i;
+ void *hook_address = (void*)&asm_syscall_hook;
+ uint8_t code[] = {
+ // 53 nop instructions to catch the 9 syscalls (0xd503201f each)
+ 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+ 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+ 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+ 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+ 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+ 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+ 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+ 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+ 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+ 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+ 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+ 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+ 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+ 0x1f, 0x20, 0x03, 0xd5,
+ // Main code
+ 0xff, 0x43, 0x00, 0xd1, // sub sp, sp, #0x80
+ 0x00, 0x00, 0x00, 0x90, // adrp x11, 0 (placeholder)
+ 0x0b, 0x00, 0x00, 0x91, // add x11, x11, :lo12:0 (placeholder)
+ 0x60, 0x01, 0x3f, 0xd6 // br x11
+ };
- for(i = 0; i < _NSYS; i++)
- ((uint8_t *)text)[i] = 0x90;
+ // Calculate the relative address for adrp
+ uintptr_t adrp_offset = ((uintptr_t)hook_address >> 12) & 0x7FFFF;
+ code[216] |= (adrp_offset & 0xFF);
+ code[217] |= ((adrp_offset >> 8) & 0x7F) << 1;
- /* Preserve redzone */
- ((uint8_t *) text)[_NSYS + 0x00] = 0x48;
- ((uint8_t *) text)[_NSYS + 0x01] = 0x81;
- ((uint8_t *) text)[_NSYS + 0x02] = 0xec;
- ((uint8_t *) text)[_NSYS + 0x03] = 0x80;
- ((uint8_t *) text)[_NSYS + 0x04] = 0x00;
- ((uint8_t *) text)[_NSYS + 0x05] = 0x00;
- ((uint8_t *) text)[_NSYS + 0x06] = 0x00;
+ // Set the low 12 bits for add
+ uintptr_t add_offset = (uintptr_t)hook_address & 0xFFF;
+ code[220] |= (add_offset & 0xFF);
+ code[221] |= (add_offset >> 8) << 2;
- /* 49 bb [64-bit addr (8-byte)] movabs [64-bit addr (8-byte)],%r11 */
- ((uint8_t *) text)[_NSYS + 0x07] = 0x49;
- ((uint8_t *) text)[_NSYS + 0x08] = 0xbb;
- ((uint8_t *) text)[_NSYS + 0x09] = ((uint64_t) asm_syscall_hook >> (8 * 0)) & 0xff;
- ((uint8_t *) text)[_NSYS + 0x0a] = ((uint64_t) asm_syscall_hook >> (8 * 1)) & 0xff;
- ((uint8_t *) text)[_NSYS + 0x0b] = ((uint64_t) asm_syscall_hook >> (8 * 2)) & 0xff;
- ((uint8_t *) text)[_NSYS + 0x0c] = ((uint64_t) asm_syscall_hook >> (8 * 3)) & 0xff;
- ((uint8_t *) text)[_NSYS + 0x0d] = ((uint64_t) asm_syscall_hook >> (8 * 4)) & 0xff;
- ((uint8_t *) text)[_NSYS + 0x0e] = ((uint64_t) asm_syscall_hook >> (8 * 5)) & 0xff;
- ((uint8_t *) text)[_NSYS + 0x0f] = ((uint64_t) asm_syscall_hook >> (8 * 6)) & 0xff;
- ((uint8_t *) text)[_NSYS + 0x10] = ((uint64_t) asm_syscall_hook >> (8 * 7)) & 0xff;
-
- // 41 ff e3 jmp *%r11
- ((uint8_t *) text)[_NSYS + 0x11] = 0x41;
- ((uint8_t *) text)[_NSYS + 0x12] = 0xff;
- ((uint8_t *) text)[_NSYS + 0x13] = 0xe3;
- return i + 0x13;
+ memcpy(text, code, sizeof(code));
}