hlfw.ca

drawcpu

Download patch

ref: 6a89c47b1ce152b0ec15a3fb9df225ae4ea6287a
parent: 868b16b5459193afd2044121c09884ccf5560df2
author: halfwit <michaelmisch1985@gmail.com>
date: Sat Sep 14 06:54:56 PDT 2024

Update patching, still not jumping to entrypoint correctly

--- a/include/lib.h
+++ b/include/lib.h
@@ -314,7 +314,8 @@
 extern	int	getfields(char*, char**, int, int, char*);
 extern	char*	utfecpy(char*, char*, char*);
 extern	int	tas(int*);
-extern  int trampoline(void*);
+extern  void trampoline(void*);
+extern void start(uintptr_t entry, Tos *, int, char *[]);
 extern  long sysintercept(void*, void*, void*, void*, void*, void*, void*);
 extern  int patch(void*, int);
 extern	void	quotefmtinstall(void);
--- a/kern/dat.h
+++ b/kern/dat.h
@@ -126,10 +126,10 @@
 	uintptr entry;
 	uintptr text;
 	uintptr data;
-	uintptr bssz;
+	uintptr bss;
+	uintptr bs;
 	uintptr ts;
 	uintptr ds;
-	uintptr bss;
 };
 
 struct Block
--- a/kern/posix.c
+++ b/kern/posix.c
@@ -35,9 +35,8 @@
 };
 
 static pthread_key_t prdakey;
-typedef void (*startfn)(uintptr, Tos*, int, char**);
+//typedef void (*startfn)(int, char**);
 
-
 Proc*
 _getproc(void)
 {
@@ -142,7 +141,6 @@
 trex(void *vp)
 {
 	Proc *p;
-	startfn start;
 	Tos tos;
 	int argc;
 
@@ -151,9 +149,9 @@
 	argc = nelem((char**)p->arg);
 	if(pthread_setspecific(prdakey, p))
 		panic("cannot setspecific");
-	start = (startfn)up->bin->entry;
-	start(up->bin->text, &tos, argc, p->arg);
-	print("Done\n");
+	print("Greetings from TREX %lx\n", up->bin->entry+TRAMPSIZE);
+	/* Our entrypoint moves forward to accomodate the trampoline code */
+	start(up->bin->entry+TRAMPSIZE, &tos, argc, p->arg);
 	pexit("", 0);
 	return 0;
 }
@@ -164,7 +162,6 @@
 	pthread_t pid;
 	pthread_attr_t attr;
 	pthread_attr_init(&attr);
-	pthread_attr_setstacksize(&attr, 1024*1024);
 	if(pthread_create(&pid, &attr, trex, p)){
 		oserrstr();
 		panic("osexec: %r");
@@ -182,7 +179,7 @@
 	if(up->bin->data)
 		munmap((void*)up->bin->data, up->bin->ds);
 	if(up->bin->bss)
-		munmap((void*)up->bin->bss, up->bin->bssz);
+		munmap((void*)up->bin->bss, up->bin->bs);
 }
 
 void
@@ -202,29 +199,24 @@
 void
 ospatchtext(void)
 {
-	int n;
 	void *text, *final;
 
 	/* Set up trampoline. Mach dependent */
 	text = mallocz(TRAMPSIZE, 1);
-	n = trampoline(text);
-	if(n != TRAMPSIZE)
-		error("building trampoline failed");
+	trampoline(text);
 
-	final = mmap(nil, up->bin->ts+n, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-	memmove(final, text, n);
-	memmove(final+n, (void*)up->bin->text, up->bin->ts);
+	final = mmap(nil, up->bin->ts+TRAMPSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	memmove(final, text, TRAMPSIZE);
+	memmove(final+TRAMPSIZE, (void*)up->bin->text, up->bin->ts);
 	if(!final)
 		error("unable to set up text segment with trampoline");
 
 	/* Patch. Mach dependent */
-	if(patch(final+n, up->bin->ts) < 0)
+	if(patch(final+TRAMPSIZE, up->bin->ts) < 0)
 		error("unable to patch syscalls");
-
-	if(mprotect(final, up->bin->ts+n, PROT_READ|PROT_EXEC) != 0)
+	up->bin->text = (uintptr)final;
+	if(mprotect((void*)up->bin->text, up->bin->ts+TRAMPSIZE, PROT_READ|PROT_EXEC) != 0)
 		error("Unable to mprotect: %r");
-
-	up->bin->text = (uintptr)final+n;
 }
 
 void
@@ -245,7 +237,7 @@
 {
 	void *bss;
 	// BSS - set it up in READ/WRITE
-	bss = mmap((void*)up->bin->bss, up->bin->bssz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	bss = mmap((void*)up->bin->bss, up->bin->bs, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
 	if(!bss)
 		error("unable to set up bss segment");
 	up->bin->bss = (uintptr)bss;
--- a/kern/sysproc.c
+++ b/kern/sysproc.c
@@ -244,6 +244,7 @@
 		}
 		if(!indir)
 			kstrdup(&elem, up->genbuf);
+		// TODO: Triple check this for accurate text/entry. Ensure they are right, our ts should not be the same each time, for example
 		n = devtab[tc->type]->read(tc, u.buf, sizeof(u.buf), 0);
 		if (n >= sizeof(Exec)){
 			magic = beswal(u.ehdr.ex.magic);
@@ -252,16 +253,16 @@
 					if(n < sizeof(u.ehdr))
 						error(Ebadexec);
 					up->bin->entry = beswav(u.ehdr.hdr[0]);
-					up->bin->text = UTZERO+sizeof(u.ehdr);
+					up->bin->ts = UTZERO+sizeof(u.ehdr);
 				} else {
 					up->bin->entry = beswal(u.ehdr.ex.entry);
-					up->bin->text = UTZERO+sizeof(Exec);
+					up->bin->ts = UTZERO+sizeof(Exec);
 				}
-				if(up->bin->entry < up->bin->text)
+				if(up->bin->entry < up->bin->ts)
 					error(Ebadexec);
-				up->bin->text += beswal(u.ehdr.ex.text);
-				// BUG: This errors, errors seem broken currently
-				if(up->bin->text <= up->bin->entry || up->bin->text >= (uintptr)(USTKTOP-USTKSIZE))
+
+				up->bin->ts += beswal(u.ehdr.ex.text);
+				if(up->bin->ts <= up->bin->entry || up->bin->ts >= (uintptr)(USTKTOP-USTKSIZE))
 					error(Ebadexec);
 				switch(magic){
 				case S_MAGIC:	/* 2MB segment alignment for amd64 */
@@ -300,16 +301,16 @@
 		// Check the actual sysproc, there's some missing bits here still
 		up->arg = progarg;
 	}
-	up->bin->ts = ((up->bin->text+align) & ~align) / BY2WD;
-	up->bin->data = beswal(u.ehdr.ex.data);
-	up->bin->bss = beswal(u.ehdr.ex.bss);
-	up->bin->text -= UTZERO;
+
+	up->bin->ds = beswal(u.ehdr.ex.data);
+	up->bin->bs = beswal(u.ehdr.ex.bss);
+	up->bin->text = align;
+	up->bin->ts -= UTZERO;
 	align = BY2PG-1;
-	up->bin->ds = ((up->bin->ts + up->bin->data + align) & ~align) / BY2WD;
-	bssend = up->bin->ts + up->bin->data + up->bin->bss;
-	up->bin->bssz = ((bssend + align) & ~align) / BY2WD;
-	if(up->bin->ts >= (ulong)(USTKTOP-USTKSIZE) || up->bin->ds >= (ulong)(USTKTOP-USTKSIZE) || up->bin->bss >= (ulong)(USTKTOP-USTKSIZE))
-		error(Ebadexec);
+	up->bin->data = ((up->bin->ts+align) & ~align) / BY2WD;
+	bssend = up->bin->ts + up->bin->ds + up->bin->bs;
+	up->bin->bss = ((bssend + align) & ~align) / BY2WD;
+	up->bin->bss -= up->bin->bs;
 
 	/* Set up text, data, and bss. Patch syscalls. OS dependent. */
 	osbuildtext(tc);
--- a/main.c
+++ b/main.c
@@ -44,7 +44,7 @@
 static void
 usage(void)
 {
-	fprintf(stderr, "usage: drawcpu [-D]\n");
+	fprintf(stderr, "usage: drawcpu [-D] [-p 9bin]\n");
 	exit(1);
 }
 
--- a/posix-amd64/trampoline.c
+++ b/posix-amd64/trampoline.c
@@ -1,8 +1,7 @@
 #include <u.h>
 #include <libc.h>
 #include "../kern/fns.h"
-
-#define _NSYS		53
+#include "mem.h"
 
 /* TODO: This is not arm64 code, this is amd64 code */
 extern void asm_syscall_hook(void);
--- a/posix-arm64/Makefile
+++ b/posix-arm64/Makefile
@@ -1,8 +1,9 @@
 ROOT=..
 include ../Make.config
 LIB=../libmachdep.a
-CFLAGS+=-fpie
-LDFLAGS+=-fpie
+# Cannot use PIE because of trampoline
+#CFLAGS+=-fpie
+#LDFLAGS+=-fpie
 
 OFILES=\
 	getcallerpc.$O\
@@ -9,6 +10,7 @@
 	tas.$O\
 	trampoline.$O\
 	patch.$O\
+	start.$O\
 
 default: $(LIB)
 $(LIB): $(OFILES)
--- a/posix-arm64/mem.h
+++ b/posix-arm64/mem.h
@@ -83,8 +83,9 @@
 #define	USTKTOP		((EVAMASK>>1)-0xFFFF)	/* user segment end +1 */
 #define	USTKSIZE	(16*1024*1024)		/* user stack size */
 
-#define BLOCKALIGN	64			/* only used in allocb.c */
-#define TRAMPSIZE   72          /* Size of the trampoline */
+#define _NSYS		53
+#define BLOCKALIGN	64			       /* only used in allocb.c */
+#define TRAMPSIZE   231                /* Size of the trampoline */
 /*
  * Sizes
  */
--- a/posix-arm64/patch.c
+++ b/posix-arm64/patch.c
@@ -9,13 +9,13 @@
     int ret = -1;
     for(i = 0; i < size - BY2WD; i++){
         // MOV X0, #immediate (could be D2800000 | syscall_number)
-        // BL X0 (could be D4000000 | offset)
+        // BL X0 (could be D4000010 | offset)
         // 0xD63F0000 is our BLR X0
         // This sets up our jmp to the trampoline code
         if ((*(ulong*)&text[i] & 0xFFFF0000) == 0xD2800000 && (*(ulong*)&text[i+BY2SE] & 0xFFFF0000) == 0xD4000000) {
-            ulong *ptr = (ulong*)&text[i];
-            ptr[0] = 0xD6;
-            ptr[1] = 0x3F;
+            ulong *ptr = (ulong*)&text[i+BY2SE];
+            *ptr &= 0xFFFF0000;
+            *ptr |= 0xD63F0000;
             ret++;
         }
     }
--- /dev/null
+++ b/posix-arm64/start.c
@@ -1,0 +1,42 @@
+#include "u.h"
+#include "libc.h"
+void start(uintptr_t entry, Tos *_tos, int argc, char *argv[])
+{
+    register uintptr_t r0 asm("x0") = entry;
+    register Tos *r1 asm("x1") = _tos;
+    register int r2 asm("w2") = argc;
+    register char **r3 asm("x3") = argv;
+
+    __asm__ __volatile__ (
+                // Load values into registers
+                "mov x0, %0\n\t"
+                "mov x1, %1\n\t"
+                "mov w2, %w2\n\t"
+                "mov x3, %3\n\t"
+
+                // push argv onto stack
+                "mov x4, x2\n\t"
+                "add x4, x4, #1\n\t"
+                "lsl x4, x4, #3\n\t"
+                "sub sp, sp, x4\n\t"
+                "mov x5, sp\n\t"
+                "mov x6, x3\n\t"
+
+                // copy argv in at a time
+                "copy_argv_loop:\n\t"
+                "ldr x7, [x6], #8\n\t"
+                "str x7, [x5], #8\n\t"
+                "subs x4, x4, #1\n\t"
+                "bne copy_argv_loop\n\t"
+        
+                // push argc onto stack
+                "sub sp, sp, #4\n\t"
+                "str w2, [sp]\n\t"
+
+                // jump to entry point
+                "br x0\n\t"
+                "ret\n\t"
+                :
+                : "r"(r0), "r"(r1), "r"(r2), "r"(r3)
+                : "x4", "x5", "x6", "x7");
+}
\ No newline at end of file
--- a/posix-arm64/trampoline.c
+++ b/posix-arm64/trampoline.c
@@ -1,66 +1,75 @@
 #include <u.h>
 #include <libc.h>
+#include "mem.h"
 
-#define _NSYS		53
-
 void asm_syscall_hook(void)
 {
+	print("In hook somehow\n");
 	__asm__ __volatile__ (
-    	".global asm_syscall_hook \n\t"
-    	"asm_syscall_hook: \n\t"
-    	"stp x29, x30, [sp, #-16]! \n\t"  // Save frame pointer and link register
-    	"mov x29, sp \n\t"                // Set up frame pointer
-    	"sub sp, sp, #32 \n\t"            // Allocate 32 bytes on stack
-    	"str x8, [sp, #24] \n\t"          // Save x8 (syscall number)
-    	"stp x0, x1, [sp, #8] \n\t"       // Save x0 and x1
-    	"str x2, [sp] \n\t"               // Save x2
-    	"mov x0, x8 \n\t"                 // Move syscall number to x0
-    	"mov x1, x0 \n\t"                 // Shift arguments: x0 -> x1
-    	"mov x2, x1 \n\t"                 // x1 -> x2
-    	"mov x3, x2 \n\t"                 // x2 -> x3
-    	"mov x4, x3 \n\t"                 // x3 -> x4
-    	"mov x5, x4 \n\t"                 // x4 -> x5
-    	"mov x6, x5 \n\t"                 // x5 -> x6
-    	"ldr x7, [sp, #24] \n\t"          // Load original x8 into x7
-    	"bl _sysintercept \n\t"           // Call syscall function
-    	"mov sp, x29 \n\t"                // Restore stack pointer
-    	"ldp x29, x30, [sp], #16 \n\t"    // Restore frame pointer and link register
-    	"ret \n\t"                        // Return
+		"asm_syscall_hook:\r\n"
+    	"stp x29, x30, [sp, #-16]!\r\n"  // Save frame pointer and link register
+    	"mov x29, sp\r\n"                // Set up frame pointer
+    	"sub sp, sp, #32\r\n"            // Allocate 32 bytes on stack
+    	"stp x8, x0, [sp, #16]\r\n"      // Save x8 (syscall number) and x0
+    	"stp x1, x2, [sp]\r\n"           // Save x1 and x2
+    	"mov x1, x0\r\n"                 // Shift arguments: x0 -> x1
+    	"mov x2, x1\r\n"                 // x1 -> x2
+    	"mov x3, x2\r\n"                 // x2 -> x3
+    	"mov x4, x3\r\n"                 // x3 -> x4
+    	"mov x5, x4\r\n"                 // x4 -> x5
+    	"mov x6, x5\r\n"                 // x5 -> x6
+    	"mov x0, x8\r\n"                 // Move syscall number to x0
+    	"bl _sysintercept\r\n"           // Call syscall function
+    	"ldp x1, x2, [sp]\r\n"           // Restore original x1 and x2
+    	"ldp x8, x3, [sp, #16]\r\n"      // Restore original x8 and load original x0 into x3
+    	"mov x1, x3\r\n"                 // Restore original x0 to x1 (new x0 is return value)
+    	"ldp x29, x30, [sp], #48\r\n"    // Restore frame pointer and link register, and deallocate stack
+    	"ret\r\n"
 	);
 }
 
-int
+/**
+ * based on https://github.com/yasukata/zpoline/blob/master/main.c
+ * This will add NOP into the first 53 sections of text
+ * When a syscall is patched, it will instead jump here
+ * From the NOP slide, it then calls into asm_syscall_hook which in turn calls sysintercept
+ */
+void
 trampoline(void *text)
 {
-    int i;
+	void *hook_address = (void*)&asm_syscall_hook;
+    uint8_t code[] = {
+        // 53 nop instructions to catch the 9 syscalls (0xd503201f each)
+        0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+        0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+        0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+        0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+        0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+        0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+        0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+        0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+        0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+        0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+        0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+        0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+        0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5, 0x1f, 0x20, 0x03, 0xd5,
+        0x1f, 0x20, 0x03, 0xd5,
+        // Main code
+        0xff, 0x43, 0x00, 0xd1,  // sub sp, sp, #0x80
+        0x00, 0x00, 0x00, 0x90,  // adrp x11, 0 (placeholder)
+        0x0b, 0x00, 0x00, 0x91,  // add x11, x11, :lo12:0 (placeholder)
+        0x60, 0x01, 0x3f, 0xd6   // br x11
+    };
 
-    for(i = 0; i < _NSYS; i++)
-        ((uint8_t *)text)[i] = 0x90;
+    // Calculate the relative address for adrp
+    uintptr_t adrp_offset = ((uintptr_t)hook_address >> 12) & 0x7FFFF;
+    code[216] |= (adrp_offset & 0xFF);
+    code[217] |= ((adrp_offset >> 8) & 0x7F) << 1;
 
-    /* Preserve redzone */
-    ((uint8_t *) text)[_NSYS + 0x00] = 0x48;
-    ((uint8_t *) text)[_NSYS + 0x01] = 0x81;
-	((uint8_t *) text)[_NSYS + 0x02] = 0xec;
-	((uint8_t *) text)[_NSYS + 0x03] = 0x80;
-	((uint8_t *) text)[_NSYS + 0x04] = 0x00;
-	((uint8_t *) text)[_NSYS + 0x05] = 0x00;
-	((uint8_t *) text)[_NSYS + 0x06] = 0x00;
+    // Set the low 12 bits for add
+    uintptr_t add_offset = (uintptr_t)hook_address & 0xFFF;
+    code[220] |= (add_offset & 0xFF);
+    code[221] |= (add_offset >> 8) << 2;
 
-    /* 49 bb [64-bit addr (8-byte)] movabs [64-bit addr (8-byte)],%r11 */
-	((uint8_t *) text)[_NSYS + 0x07] = 0x49;
-	((uint8_t *) text)[_NSYS + 0x08] = 0xbb;
-	((uint8_t *) text)[_NSYS + 0x09] = ((uint64_t) asm_syscall_hook >> (8 * 0)) & 0xff;
-	((uint8_t *) text)[_NSYS + 0x0a] = ((uint64_t) asm_syscall_hook >> (8 * 1)) & 0xff;
-	((uint8_t *) text)[_NSYS + 0x0b] = ((uint64_t) asm_syscall_hook >> (8 * 2)) & 0xff;
-	((uint8_t *) text)[_NSYS + 0x0c] = ((uint64_t) asm_syscall_hook >> (8 * 3)) & 0xff;
-	((uint8_t *) text)[_NSYS + 0x0d] = ((uint64_t) asm_syscall_hook >> (8 * 4)) & 0xff;
-	((uint8_t *) text)[_NSYS + 0x0e] = ((uint64_t) asm_syscall_hook >> (8 * 5)) & 0xff;
-	((uint8_t *) text)[_NSYS + 0x0f] = ((uint64_t) asm_syscall_hook >> (8 * 6)) & 0xff;
-	((uint8_t *) text)[_NSYS + 0x10] = ((uint64_t) asm_syscall_hook >> (8 * 7)) & 0xff;
-
-	// 41 ff e3                jmp    *%r11
-	((uint8_t *) text)[_NSYS + 0x11] = 0x41;
-	((uint8_t *) text)[_NSYS + 0x12] = 0xff;
-	((uint8_t *) text)[_NSYS + 0x13] = 0xe3;
-	return i + 0x13;
+	memcpy(text, code, sizeof(code));
 }