hlfw.ca

drawcpu

Download patch

ref: 6d3e368f2d0796e366d2df8ed56e3b6fe0c42182
parent: 4f64a0294ee182f54c5ec341ab13163f43038af0
author: halfwit <michaelmisch1985@gmail.com>
date: Sun Aug 18 15:18:21 PDT 2024

Many, many changes to get binaries loaded and patched for syscall intercepts

--- a/include/a.out.h
+++ b/include/a.out.h
@@ -1,5 +1,5 @@
 typedef	struct	Exec	Exec;
-struct	Exec
+struct Exec
 {
 	long	magic;		/* magic number */
 	long	text;	 	/* size of text segment */
--- a/include/lib.h
+++ b/include/lib.h
@@ -314,6 +314,10 @@
 extern	int	getfields(char*, char**, int, int, char*);
 extern	char*	utfecpy(char*, char*, char*);
 extern	int	tas(int*);
+extern  int trampoline(void*);
+extern  void run(uintptr_t, Tos *, int, char **);
+extern  long sysintercept(void*, void*, void*, void*, void*, void*, void*);
+extern  int patch(void*, int);
 extern	void	quotefmtinstall(void);
 extern	int	dec64(uchar*, int, char*, int);
 extern	int	enc64(char*, int, uchar*, int);
--- a/include/user.h
+++ b/include/user.h
@@ -68,7 +68,6 @@
 extern	long	dirread(int, Dir**);
 extern  long    dirreadall(int, Dir**);
 extern	ulong	iounit(int);
-
 extern	int	lfdfd(int);
 
 /*
--- a/kern/Makefile
+++ b/kern/Makefile
@@ -32,6 +32,7 @@
 	rwlock.$O\
 	sleep.$O\
 	stub.$O\
+	syscall.$O\
 	sysfile.$O\
 	sysproc.$O\
 	time.$O\
--- a/kern/dat.h
+++ b/kern/dat.h
@@ -1,7 +1,7 @@
 #define	KNAMELEN		28	/* max length of name held in kernel */
 
 //#define	BLOCKALIGN		8
-
+typedef struct Binary	Binary;
 typedef struct Block	Block;
 typedef struct Chan	Chan;
 typedef struct Cmdbuf	Cmdbuf;
@@ -30,6 +30,7 @@
 typedef struct Rendez	Rendez;
 typedef struct Rgrp	Rgrp;
 typedef struct RWlock	RWlock;
+typedef struct Segment  Segment;
 typedef struct Waitq	Waitq;
 typedef struct Walkqid	Walkqid;
 typedef struct Kmesg	Kmesg;
@@ -119,6 +120,18 @@
 	Bpktck	=	(1<<5),		/* packet checksum */
 };
 
+/* Not using segments, so this is our holder */
+struct Binary
+{
+	uintptr entry;
+	uintptr text;
+	uintptr data;
+	uintptr bssz;
+	uintptr ts;
+	uintptr ds;
+	uintptr bss;
+};
+
 struct Block
 {
 	Block*	next;
@@ -224,6 +237,7 @@
 	NSMAX	=	1000,
 	NSLOG	=	7,
 	NSCACHE	=	(1<<NSLOG),
+	NNOTE   =   5,
 };
 
 struct Mntwalk				/* state for /proc/#/ns */
@@ -340,6 +354,15 @@
 
 typedef uvlong	Ticks;
 
+enum {
+	SEGTEXT,
+	SEGDATA,
+	SEGBSS,
+	SEGSTACK,
+	SEGNUM = 8,
+};
+
+
 enum
 {
 	Running,
@@ -378,26 +401,40 @@
 	char	genbuf[128];	/* buffer used e.g. for last name element from namec */
 	char	text[KNAMELEN];
 
+	u32int lladdr;		/* LL/SC emulation */
+	u32int llval;
+
+
+	/* Notes */
+	u32int notehandler;
+	int innote;
+	jmp_buf notejmp;
+	char notes[ERRMAX][NNOTE];
+	long notein, noteout;
 	Chan	*slash;
 	Chan	*dot;
 
 	Proc	*qnext;
+	Binary  *bin;
 
-	long	stext;
-	long	sdata;
-	long	sbss;
-	long	sentry;
-	
-	uintptr t;
-	uintptr d;
-	uintptr e;
-	uintptr b;
-	
 	void	(*fn)(void*);
 	void	*arg;
 
 	char oproc[1024];	/* reserved for os */
+	u32int CPSR;		/* status register for step */
+	/* TODO: Multiarch */
+	u32int R[16];		/* general purpose registers / PC (R15) */
+	Segment *S[SEGNUM];
+};
 
+
+struct Segment {
+	Ref ref;
+	int flags;
+	RWlock rw; /* lock for SEGFLLOCK segments */
+	u32int start, size;
+	void *data;
+	Ref *dref;
 };
 
 enum
--- a/kern/fns.h
+++ b/kern/fns.h
@@ -222,8 +222,12 @@
 ulong	ticks(void);
 void	osproc(Proc*);
 void    osexec(Proc*);
-void    osbuildexec(Chan *);
+void    osbuildtext(Chan *);
+void    osbuilddata(Chan *);
+void    osbuildbss(Chan *);
+void    ospatchtext(void);
 void	osnewproc(Proc*);
+void    osclrmem(void);
 void	procsleep(void);
 void	procwakeup(Proc*);
 void	osinit(void);
--- a/kern/posix.c
+++ b/kern/posix.c
@@ -23,6 +23,7 @@
 #include "dat.h"
 #include "fns.h"
 #include "mem.h"
+#include <a.out.h>
 
 typedef struct Oproc Oproc;
 struct Oproc
@@ -138,19 +139,16 @@
 trex(void *vp)
 {
 	Proc *p;
-	void *run;
+	Tos tos;
+	int argc;
 
 	p = vp;
-	memcpy(run, &p->e, p->sentry);
-	// This will be start() instead
-	__asm__ volatile (
-		"mov x0, %0 \n\t"
-		"mov x1, %1 \n\t"
-		"blr %2"
-		:
-		: "r" (sizeof(p->arg)), "r" (p->arg), "r" (run)
-		: "x0", "x1", "x30", "memory"
-	);
+	tos.pid = p->pid;
+	argc = nelem((char**)p->arg);
+	if(pthread_setspecific(prdakey, p))
+		panic("cannot setspecific");
+	run(up->bin->text, &tos, argc, p->arg);
+	pexit("", 0);
 	return 0;
 }
 
@@ -158,9 +156,7 @@
 osexec(Proc *p)
 {
 	pthread_t pid;
-	pthread_attr_t attr;
-	print("Building and running and all that\n");
-	if(pthread_create(&pid, &attr, trex, p)){
+	if(pthread_create(&pid, nil, trex, p)){
 		oserrstr();
 		panic("osexec: %r");
 	}
@@ -167,8 +163,84 @@
 	pthread_join(pid, nil);
 }
 
+void
+osclrmem(void)
+{
+	/* Clean up text, data, bss */
+	if(up->bin->text)
+		munmap((void*)up->bin->text, up->bin->ts);
+	if(up->bin->data)
+		munmap((void*)up->bin->data, up->bin->ds);
+	if(up->bin->bss)
+		munmap((void*)up->bin->bss, up->bin->bssz);
+}
 
 void
+osbuildtext(Chan *tc)
+{
+	int n;
+	void *text;
+
+	//text = mmap(nil, up->bin->ts, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	text = mallocz(up->bin->ts, 1);
+	n = devtab[tc->type]->read(tc, text, up->bin->ts, 0);
+	if(!text || n == 0)
+		error("unable to set up text segment");
+	/* Stash this in text for now, we will be moving and rewriting in the patch */
+	up->bin->text = (uintptr)text;
+}
+
+void
+ospatchtext(void)
+{
+	int n;
+	void *text, *final;
+
+	/* Set up trampoline. Mach dependent */
+	text = mallocz(TRAMPSIZE, 1);
+	n = trampoline(text);
+	if(n != TRAMPSIZE)
+		error("building trampoline failed");
+
+	final = mmap(0, (up->bin->ts)+n, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	memmove(final, text, n);
+	memmove(final+n+1, (void*)up->bin->text, up->bin->ts);
+	if(!final)
+		error("unable to set up text segment with trampoline");
+
+	/* Patch. Mach dependent */
+	if(patch(final+n+1, up->bin->ts) < 0)
+		error("unable to patch syscalls");
+	mprotect(final, n, PROT_EXEC);
+	up->bin->text = (uintptr)final+n+1;
+	poperror();
+}
+
+void
+osbuilddata(Chan *tc)
+{
+	int n;
+	void *data;
+
+	data = mmap((void*)up->bin->data, up->bin->ds, PROT_READ| PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	n = devtab[tc->type]->read(tc, data, up->bin->ds, up->bin->ts);
+	if(!data || n == 0)
+		error("unable to set up data segment");
+	//up->bin->data = (uintptr)data;
+}
+
+void
+osbuildbss(Chan *tc)
+{
+	void *bss;
+	// BSS - set it up in READ/WRITE
+	bss = mmap((void*)up->bin->bss, up->bin->bssz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	if(!bss)
+		error("unable to set up bss segment");
+	//up->bin->bss = (uintptr)bss;
+}
+
+void
 osexit(void)
 {
 	pthread_setspecific(prdakey, 0);
@@ -358,38 +430,4 @@
 	else
 		t.c_lflag |= (ECHO|ICANON);
 	tcsetattr(0, TCSAFLUSH, &t);
-}
-
-void
-osbuildexec(Chan *tc)
-{
-	int fd;
-	void *exec, *text, *data;
-	int flag;
-
-	flag = MAP_PRIVATE | MAP_ANONYMOUS;
-#ifdef __APPLE__ // mmap runs R W|X on newer Apple silicon
-	flag |= MAP_JIT;
-#endif
-	fd = newfd(tc);
-	exec = mmap(nil, up->stext + up->sdata, PROT_READ, MAP_PRIVATE, fd, 0);
-	if (exec == MAP_FAILED) {
-		print("Yeah we failed\n");
-		error("mmap");
-		return;
-	}
-	text = mmap((void*)up->t, up->stext, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
-	if(text == MAP_FAILED) {
-		error("mmap");
-		return;
-	}
-	data = mmap((void*)up->d, up->sdata + up->sbss, PROT_READ | PROT_WRITE, flag, -1, 0);
-	if(data == MAP_FAILED) {
-		error("mmap");
-		return;
-	}
-	up->e = (uintptr)exec;
-	up->sentry = up->stext + up->sdata + up->sbss;
-	print("Bottom: 0, top: %d\n", up->sentry);
-	fdclose(fd, 0);
 }
--- a/kern/stub.c
+++ b/kern/stub.c
@@ -60,6 +60,7 @@
 	USED(tag);
 }
 
+/* TODO: We may want notes */
 int
 postnote(Proc *p, int x, char *msg, int flag)
 {
--- /dev/null
+++ b/kern/syscall.c
@@ -1,0 +1,95 @@
+#include "u.h"
+#include "lib.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "user.h"
+
+long
+sysintercept(void* r0, void* r1, void* r2, void* r3, void* r4, void* r5, void* r6)
+{
+    print("Interception %d\n", r0);
+    long ret = -1;
+    switch((int)r0){
+        case 0: /* SYSR1 */
+            ret = 0;
+            break;
+        case 1: /* _ERRSTR */
+            ret = errstr((char*)r1, 64);
+            break;
+        case 2: /* BIND */
+            ret = bind((char*) r1, (char*)r2, (int) r3);
+            break;
+        case 3: /* CHDIR */
+            ret = chdir((char *)r1);
+            break;
+        case 4: /* CLOSE */
+            ret = close((int)r1);
+            break;
+        case 5: /* DUP */
+            ret = dup((int)r1, (int)r2);
+            break;
+    	case 6: /* ALARM */
+            ret = alarm((int) r1);
+            break;
+        case 7: /* EXEC */
+            ret = exec((int) r1, (char**)r2);
+            break;
+        case 8: /* EXITS */
+            exits((char*)r1);
+            ret = 0;
+            break;
+        case 9: /* _FSESSION */
+            //ret = fsession();
+            break;
+        case 10: /* FAUTH */
+            // ret = fauth();
+            break;
+        case 11: /* _FSTAT */
+            ret = fstat((int) r1, (char*) r2, (int)r3);
+            break;  
+        case 12: /* SEGBRK */
+        case 13: /* _MOUNT */
+        case 14: /* OPEN */
+        case 15: /* _READ */
+        case 16: /* OSEEK */
+        case 17: /* SLEEP */
+        case 18: /* _STAT */
+        case 19: /* RFORK */
+        case 20: /* _WRITE */
+        case 21: /* PIPE */
+        case 22: /* CREATE */
+        case 23: /* FD2PATH */
+        case 24: /* BRK_ */
+        case 25: /* REMOVE */
+        case 26: /* _WSTAT */
+        case 27: /* _FWSAT */
+        case 28: /* NOTIFY */
+        case 29: /* NOTED */
+        case 30: /* SEGATTACH */
+        case 31: /* SEGDETACH */
+        case 32: /* SEGFREE */
+        case 33: /* SEGFLUSH */
+        case 34: /* RENDEZVOUS */
+        case 35: /* UNMOUNT */
+        case 36: /* _WAIT */
+        case 37: /* SEMACQUIRE */
+        case 38: /* SEMRELEASE */
+        case 39: /* SEEK */
+        case 40: /* FVERSION */
+        case 41: /* ERRSTR */
+        case 42: /* STAT */
+        case 43: /* FSTAT */
+        case 44: /* WSTAT */
+        case 45: /* FWSTAT */
+        case 46: /* MOUNT */
+        case 47: /* AWAIT */
+        case 50: /* PREAD */
+        case 51: /* PWRITE */
+        case 52: /* TSEMACQUIRE */
+        case 53: /* _NSEC */
+            break;
+    }
+
+    return ret;
+}
--- a/kern/sysproc.c
+++ b/kern/sysproc.c
@@ -227,12 +227,15 @@
 	char *file, *elem;
 	Chan *tc;
 	ulong magic;
-	uintptr t, d, b, entry, text, align, data, bss, bssend;
+	uintptr align, bssend;
 	int n, indir;
+
 	elem = nil;
 	align = BY2PG-1;
 	indir = 0;
 	file = argv[0];
+	up->bin = mallocz(sizeof(Binary*), 1);
+
 	for(;;){
 		tc = namec(file, Aopen, OEXEC, 0);
 		if(waserror()){
@@ -248,15 +251,15 @@
 				if(magic & HDR_MAGIC){
 					if(n < sizeof(u.ehdr))
 						error(Ebadexec);
-					entry = beswav(u.ehdr.hdr[0]);
-					text = UTZERO+sizeof(u.ehdr);
+					up->bin->entry = beswav(u.ehdr.hdr[0]);
+					up->bin->text = UTZERO+sizeof(u.ehdr);
 				} else {
-					entry = beswal(u.ehdr.ex.entry);
-					text = UTZERO+sizeof(Exec);
+					up->bin->entry = beswal(u.ehdr.ex.entry);
+					up->bin->text = UTZERO+sizeof(Exec);
 				}
-				if(entry < text)
+				if(up->bin->entry < up->bin->text)
 					error(Ebadexec);
-				text += beswal(u.ehdr.ex.text);
+				up->bin->text += beswal(u.ehdr.ex.text);
 				// BUG: This errors, errors seem broken currently
 //				if(text <= entry || text >= (uintptr)(USTKTOP-USTKSIZE))
 //					error(Ebadexec);
@@ -285,50 +288,66 @@
 		if(n < 1)
 			error(Ebadexec);
 
-		/* First arg becomes complete file name */
 		progarg[n++] = file;
 		progarg[n] = nil;
 		argv++;
 		file = progarg[0];
 		progarg[0] = elem;
-		up->arg = progarg;
-		strcpy(up->arg, file);
+		strcpy(up->text, elem);
 		poperror();
 		cclose(tc);
+		// Check the actual sysproc, there's some missing bits here still
+		up->arg = progarg;
 	}
 
-	t = (text+align) & ~align;
-	text -= UTZERO;
-	data = beswal(u.ehdr.ex.data);
-	bss = beswal(u.ehdr.ex.bss);
+	up->bin->ts = ((up->bin->text+align) & ~align) / BY2WD;
+	up->bin->text -= UTZERO;
+	up->bin->data = beswal(u.ehdr.ex.data);
+	up->bin->bss = beswal(u.ehdr.ex.bss);
 	align = BY2PG-1;
-	d = (t + data + align) & ~align;
-	bssend = t + data + bss;
-	b = (bssend + align) & ~align;
+	up->bin->ds = ((up->bin->ts + up->bin->data + align) & ~align) / BY2WD;
+	bssend = up->bin->ts + up->bin->data + up->bin->bss;
+	up->bin->bssz = ((bssend + align) & ~align) / BY2WD;
 
-	up->t = t;
-	up->d = data;
-	up->b = bss;
-
-	up->sentry = entry;
-	up->sdata = data;
-	up->sbss = bss;
-
-	if(t >= (ulong)(USTKTOP-USTKSIZE) || d >= (ulong)(USTKTOP-USTKSIZE) || b >= (ulong)(USTKTOP-USTKSIZE))
+	if(up->bin->ts >= (ulong)(USTKTOP-USTKSIZE) || up->bin->ds >= (ulong)(USTKTOP-USTKSIZE) || up->bin->bss >= (ulong)(USTKTOP-USTKSIZE))
 		error(Ebadexec);
-	/* Load in to memory. OS dependent. */
-	osbuildexec(tc);
+
+	/* Set up text, data, and bss. Patch syscalls. OS dependent. */
+	osbuildtext(tc);
 	if(waserror()){
-		cclose(tc);
+		osclrmem();
 		nexterror();
 	}
+	ospatchtext();
+	if(waserror()){
+		osclrmem();
+		nexterror();
+	}
+	osbuilddata(tc);
+	if(waserror()){
+		osclrmem();
+		nexterror();
+	}
+	osbuildbss(tc);
+	if(waserror()){
+		osclrmem();
+		nexterror();
+	}
 
+	/* Run binary. OS dependent. */
 	osexec(up);
 
-	// Clean up the proc after
+	print("Exiting sysproc\n");
+	/* Clean up */
+	osclrmem();
 	up->arg = nil;
+	free(up->bin);
+
+	poperror(); /* osbuildtext */
+	poperror(); /* ospatchtext */
+	poperror(); /* osbuilddata */
+	poperror(); /* osbuildbss */
 	cclose(tc);
-	poperror();
-	poperror();
+	
 	return 0;
 }
\ No newline at end of file
--- a/librc/drawcpu.c
+++ b/librc/drawcpu.c
@@ -211,7 +211,10 @@
 void
 Exec(char **argv)
 {
-	exec(nelem(argv), argv);
+	int argc = 0;
+
+	while(argv[argc]) argc++;
+	exec(argc, argv);
 }
 
 int
--- a/posix-amd64/Makefile
+++ b/posix-amd64/Makefile
@@ -7,7 +7,8 @@
 OFILES=\
 	getcallerpc.$O\
 	tas.$O\
-	start.$O\
+	run.$O\
+	trampoline.$O\
 
 default: $(LIB)
 $(LIB): $(OFILES)
--- /dev/null
+++ b/posix-amd64/run.c
@@ -1,0 +1,38 @@
+#include "u.h"
+#include "libc.h"
+
+void run(uintptr_t entry, Tos *_tos, int argc, char *argv[]) {
+    // entry point
+    register uintptr_t rdi asm("rdi") = entry;
+    register Tos *rsi asm("rsi") = _tos;
+    register int edx asm("edx") = argc;
+    register char **rcx asm("rcx") = argv;
+
+    __asm__ __volatile__ (
+        // Load values into registers
+        "mov rdi, %0\n\t"
+        "mov rsi, %1\n\t"
+        "mov edx, %2\n\t"
+        "mov rcx, %3\n\t"
+
+        // push argv onto stack
+        "mov r8, rdx\n\t"
+        "add r8, r8, 1\n\t"
+        "shl r8, r8, 3\n\t"
+        "sub rsp, r8\n\t"
+        "mov r9, rsp\n\t"
+        "rep movsb\n\t"
+
+        // push argc onto stack
+        "mov [rsp], edx\n\t"
+        "sub rsp, 8\n\t"
+
+        // jump to entry point
+        "jmp rdi\n\t"
+        "nop\n\t"
+
+        :
+        : "r" (rdi), "r" (rsi), "r" (edx), "r" (rcx)
+        : "r8", "r9"
+    );
+}
\ No newline at end of file
--- a/posix-amd64/start.c
+++ /dev/null
@@ -1,38 +1,0 @@
-#include "u.h"
-#include "libc.h"
-
-void start(uintptr_t entry, Tos *_tos, int argc, char *argv[]) {
-    // entry point
-    register uintptr_t rdi asm("rdi") = entry;
-    register Tos *rsi asm("rsi") = _tos;
-    register int edx asm("edx") = argc;
-    register char **rcx asm("rcx") = argv;
-
-    __asm__ (
-        // Load values into registers
-        "mov rdi, %0\n\t"
-        "mov rsi, %1\n\t"
-        "mov edx, %2\n\t"
-        "mov rcx, %3\n\t"
-
-        // push argv onto stack
-        "mov r8, rdx\n\t"
-        "add r8, r8, 1\n\t"
-        "shl r8, r8, 3\n\t"
-        "sub rsp, r8\n\t"
-        "mov r9, rsp\n\t"
-        "rep movsb\n\t"
-
-        // push argc onto stack
-        "mov [rsp], edx\n\t"
-        "sub rsp, 8\n\t"
-
-        // jump to entry point
-        "jmp rdi\n\t"
-        "nop\n\t"
-
-        :
-        : "r" (rdi), "r" (rsi), "r" (edx), "r" (rcx)
-        : "r8", "r9"
-    );
-}
\ No newline at end of file
--- /dev/null
+++ b/posix-amd64/trampoline.c
@@ -1,0 +1,80 @@
+#include <u.h>
+#include <libc.h>
+#include "../kern/fns.h"
+
+#define _NSYS		53
+
+/* TODO: This is not arm64 code, this is amd64 code */
+extern void asm_syscall_hook(void);
+void ____asm_syscall_hook(void)
+{
+	/*
+	 * asm_syscall_hook is the address where the
+	 * trampoline code first jumps to.
+	 *
+	 * the procedure below calls the C function
+	 * namded syscall_hook.
+	 *
+	 * at the entry point of this,
+	 * the register values follow the calling convention
+	 * of the system calls. the following transforms
+	 * to the calling convention of the C functions.
+	 *
+	 * we do this just for writing the hook in C.
+	 * so, this part would not be performance optimal.
+	 */
+	asm volatile (
+	".globl asm_syscall_hook \n\t"
+	"asm_syscall_hook: \n\t"
+	"movq (%rsp), %rcx \n\t"
+	"pushq %rbp \n\t"
+	"movq %rsp, %rbp \n\t"
+	"subq $16,%rsp \n\t"
+	"movq %rcx,8(%rsp) \n\t"
+	"movq %r9,(%rsp) \n\t"
+	"movq %r8, %r9 \n\t"
+	"movq %r10, %r8 \n\t"
+	"movq %rdx, %rcx \n\t"
+	"movq %rsi, %rdx \n\t"
+	"movq %rdi, %rsi \n\t"
+	"movq %rax, %rdi \n\t"
+	"call syscall \n\t"
+	"leaveq \n\t"
+	"retq \n\t"
+	);
+}
+
+int
+trampoline(void *text)
+{
+    int i;
+    for(i = 0; i < _NSYS; i++)
+        ((uint8_t *)text)[i] = 0x90;
+
+    /* Preserve redzone */
+    ((uint8_t*)text)[_NSYS + 0x00] = 0x48;
+    ((uint8_t *) text)[_NSYS + 0x01] = 0x81;
+	((uint8_t *) text)[_NSYS + 0x02] = 0xec;
+	((uint8_t *) text)[_NSYS + 0x03] = 0x80;
+	((uint8_t *) text)[_NSYS + 0x04] = 0x00;
+	((uint8_t *) text)[_NSYS + 0x05] = 0x00;
+	((uint8_t *) text)[_NSYS + 0x06] = 0x00;
+
+    /* 49 bb [64-bit addr (8-byte)] movabs [64-bit addr (8-byte)],%r11 */
+	((uint8_t *) text)[_NSYS + 0x07] = 0x49;
+	((uint8_t *) text)[_NSYS + 0x08] = 0xbb;
+	((uint8_t *) text)[_NSYS + 0x09] = ((uint64_t) asm_syscall_hook >> (8 * 0)) & 0xff;
+	((uint8_t *) text)[_NSYS + 0x0a] = ((uint64_t) asm_syscall_hook >> (8 * 1)) & 0xff;
+	((uint8_t *) text)[_NSYS + 0x0b] = ((uint64_t) asm_syscall_hook >> (8 * 2)) & 0xff;
+	((uint8_t *) text)[_NSYS + 0x0c] = ((uint64_t) asm_syscall_hook >> (8 * 3)) & 0xff;
+	((uint8_t *) text)[_NSYS + 0x0d] = ((uint64_t) asm_syscall_hook >> (8 * 4)) & 0xff;
+	((uint8_t *) text)[_NSYS + 0x0e] = ((uint64_t) asm_syscall_hook >> (8 * 5)) & 0xff;
+	((uint8_t *) text)[_NSYS + 0x0f] = ((uint64_t) asm_syscall_hook >> (8 * 6)) & 0xff;
+	((uint8_t *) text)[_NSYS + 0x10] = ((uint64_t) asm_syscall_hook >> (8 * 7)) & 0xff;
+
+	// 41 ff e3                jmp    *%r11
+	((uint8_t *) text)[_NSYS + 0x11] = 0x41;
+	((uint8_t *) text)[_NSYS + 0x12] = 0xff;
+	((uint8_t *) text)[_NSYS + 0x13] = 0xe3;
+	return i + 0x13;
+}
--- a/posix-arm64/Makefile
+++ b/posix-arm64/Makefile
@@ -7,7 +7,9 @@
 OFILES=\
 	getcallerpc.$O\
 	tas.$O\
-	start.$O\
+	run.$O\
+	trampoline.$O\
+	patch.$O\
 
 default: $(LIB)
 $(LIB): $(OFILES)
--- a/posix-arm64/mem.h
+++ b/posix-arm64/mem.h
@@ -84,7 +84,7 @@
 #define	USTKSIZE	(16*1024*1024)		/* user stack size */
 
 #define BLOCKALIGN	64			/* only used in allocb.c */
-
+#define TRAMPSIZE   72          /* Size of the trampoline */
 /*
  * Sizes
  */
--- /dev/null
+++ b/posix-arm64/patch.c
@@ -1,0 +1,22 @@
+#include <u.h>
+#include <libc.h>
+#include "mem.h"
+
+int
+patch(void* text, int size)
+{
+    ulong i;
+    int ret = -1;
+    for(i = 0; i < size - BY2WD; i++){
+        // MOV X0, #immediate (could be D2800000 | syscall_number)
+        // BL X0 (could be D4000000 | offset)
+        // 0xD63F0000 is our BLR X0
+        if ((*(ulong*)&text[i] & 0xFFFF0000) == 0xD2800000 && (*(ulong*)&text[i+BY2SE] & 0xFFFF0000) == 0xD4000000) {
+            long *ptr = (ulong*)&text[i];
+            ptr[1] = 0xD6;
+            ptr[0] = 0x3F;
+            ret++;
+        }
+    }
+    return ret;
+}
--- /dev/null
+++ b/posix-arm64/run.s
@@ -1,0 +1,24 @@
+/* SPDX-License-Identifier: Unlicense */
+.text
+.global _run
+_run:
+    mov x29, x0         // entry (x0 is the first argument, equivalent to rdi)
+    mov x9, x1          // _tos (x1 is the second argument, equivalent to rsi)
+    mov x19, x2         // argc (x2 is the third argument, equivalent to rdx)
+    mov x20, x3         // argv (x3 is the fourth argument, equivalent to rcx)
+
+    // Push argv onto stack
+    mov x10, x19        // x10 = argc
+    add x10, x10, #1    // x10 = argc + 1
+    lsl x10, x10, #3    // x10 = (argc + 1) * 8 (shifting left by 3 is multiplying by 8)
+    sub sp, sp, x10     // Allocate space on stack
+    mov x0, sp          // x0 = new stack pointer (destination for memcpy)
+    mov x1, x20         // x1 = argv (source for memcpy)
+    mov x2, x10         // x2 = number of bytes to copy
+    bl _memcpy           // Call memcpy to copy argv to stack
+
+    // Push argc onto stack
+    str x19, [sp, #-16]!  // Push argc onto stack and update stack pointer
+
+    // Jump to entry point
+    br x29
\ No newline at end of file
--- a/posix-arm64/start.c
+++ /dev/null
@@ -1,44 +1,0 @@
-#include "u.h"
-#include "libc.h"
-
-void start(uintptr_t entry, Tos *_tos, int argc, char *argv[]) {
-    // entry point
-    register uintptr_t r0 asm("x0") = entry;
-    register Tos *r1 asm("x1") = _tos;
-    register int r2 asm("w2") = argc;
-    register char **r3 asm("x3") = argv;
-
-    __asm__ (
-        // Load values into registers
-        "mov x0, %0\n\t"
-        "mov x1, %1\n\t"
-        "mov w2, %w2\n\t"
-        "mov x3, %3\n\t"
-
-        // push argv onto stack
-        "mov x4, x2\n\t"
-        "add x4, x4, #1\n\t"
-        "lsl x4, x4, #3\n\t"
-        "sub sp, sp, x4\n\t"
-        "mov x5, sp\n\t"
-        "mov x6, x3\n\t"
-
-    "copy_argv_loop:\n\t"
-        "ldr x7, [x6], #8\n\t"
-        "str x7, [x5], #8\n\t"  
-        "subs x4, x4, #1\n\t"
-        "bne copy_argv_loop\n\t"
-
-        // push argc onto stack
-        "sub sp, sp, #16\n\t"
-        "str w2, [sp, #8]\n\t"
-
-        // jump to entry point
-        "br x0\n\t"
-        "nop\n\t"
-
-        :
-        : "r" (r0), "r" (r1), "r" (r2), "r" (r3)
-        : "x4", "x5", "x6", "x7"
-    );
-}
\ No newline at end of file
--- /dev/null
+++ b/posix-arm64/trampoline.c
@@ -1,0 +1,66 @@
+#include <u.h>
+#include <libc.h>
+
+#define _NSYS		53
+
+void asm_syscall_hook(void)
+{
+	__asm__ __volatile__ (
+    	".global asm_syscall_hook \n\t"
+    	"asm_syscall_hook: \n\t"
+    	"stp x29, x30, [sp, #-16]! \n\t"  // Save frame pointer and link register
+    	"mov x29, sp \n\t"                // Set up frame pointer
+    	"sub sp, sp, #32 \n\t"            // Allocate 32 bytes on stack
+    	"str x8, [sp, #24] \n\t"          // Save x8 (syscall number)
+    	"stp x0, x1, [sp, #8] \n\t"       // Save x0 and x1
+    	"str x2, [sp] \n\t"               // Save x2
+    	"mov x0, x8 \n\t"                 // Move syscall number to x0
+    	"mov x1, x0 \n\t"                 // Shift arguments: x0 -> x1
+    	"mov x2, x1 \n\t"                 // x1 -> x2
+    	"mov x3, x2 \n\t"                 // x2 -> x3
+    	"mov x4, x3 \n\t"                 // x3 -> x4
+    	"mov x5, x4 \n\t"                 // x4 -> x5
+    	"mov x6, x5 \n\t"                 // x5 -> x6
+    	"ldr x7, [sp, #24] \n\t"          // Load original x8 into x7
+    	"bl _sysintercept \n\t"            // Call syscall function
+    	"mov sp, x29 \n\t"                // Restore stack pointer
+    	"ldp x29, x30, [sp], #16 \n\t"    // Restore frame pointer and link register
+    	"ret \n\t"                        // Return
+	);
+}
+
+int
+trampoline(void *text)
+{
+    int i;
+
+    for(i = 0; i < _NSYS; i++)
+        ((uint8_t *)text)[i] = 0x90;
+
+    /* Preserve redzone */
+    ((uint8_t*)text)[_NSYS + 0x00] = 0x48;
+    ((uint8_t *) text)[_NSYS + 0x01] = 0x81;
+	((uint8_t *) text)[_NSYS + 0x02] = 0xec;
+	((uint8_t *) text)[_NSYS + 0x03] = 0x80;
+	((uint8_t *) text)[_NSYS + 0x04] = 0x00;
+	((uint8_t *) text)[_NSYS + 0x05] = 0x00;
+	((uint8_t *) text)[_NSYS + 0x06] = 0x00;
+
+    /* 49 bb [64-bit addr (8-byte)] movabs [64-bit addr (8-byte)],%r11 */
+	((uint8_t *) text)[_NSYS + 0x07] = 0x49;
+	((uint8_t *) text)[_NSYS + 0x08] = 0xbb;
+	((uint8_t *) text)[_NSYS + 0x09] = ((uint64_t) asm_syscall_hook >> (8 * 0)) & 0xff;
+	((uint8_t *) text)[_NSYS + 0x0a] = ((uint64_t) asm_syscall_hook >> (8 * 1)) & 0xff;
+	((uint8_t *) text)[_NSYS + 0x0b] = ((uint64_t) asm_syscall_hook >> (8 * 2)) & 0xff;
+	((uint8_t *) text)[_NSYS + 0x0c] = ((uint64_t) asm_syscall_hook >> (8 * 3)) & 0xff;
+	((uint8_t *) text)[_NSYS + 0x0d] = ((uint64_t) asm_syscall_hook >> (8 * 4)) & 0xff;
+	((uint8_t *) text)[_NSYS + 0x0e] = ((uint64_t) asm_syscall_hook >> (8 * 5)) & 0xff;
+	((uint8_t *) text)[_NSYS + 0x0f] = ((uint64_t) asm_syscall_hook >> (8 * 6)) & 0xff;
+	((uint8_t *) text)[_NSYS + 0x10] = ((uint64_t) asm_syscall_hook >> (8 * 7)) & 0xff;
+
+	// 41 ff e3                jmp    *%r11
+	((uint8_t *) text)[_NSYS + 0x11] = 0x41;
+	((uint8_t *) text)[_NSYS + 0x12] = 0xff;
+	((uint8_t *) text)[_NSYS + 0x13] = 0xe3;
+	return i + 0x13;
+}