diff --git a/Makefile b/Makefile index 55146a0..1f05858 100644 --- a/Makefile +++ b/Makefile @@ -40,7 +40,7 @@ ifneq ($(arch), x86_64) endif biscuit: -ifeq ($(arch), x86_64) +ifeq ($(arch), $(filter $(arch), x86_64 aarch64)) @echo Building biscuit programs @mkdir -p biscuit/build @cd biscuit/build && cmake $(cmake_build_args) .. && make diff --git a/biscuit/CMakeLists.txt b/biscuit/CMakeLists.txt index 238e1be..b94f1c0 100644 --- a/biscuit/CMakeLists.txt +++ b/biscuit/CMakeLists.txt @@ -14,12 +14,12 @@ if (${ARCH} STREQUAL i386) if(APPLE) set(PREFIX i386-elf-) endif () - set(CMAKE_C_FLAGS "-m32") + set(CMAKE_C_FLAGS "-m32 -mno-red-zone") elseif (${ARCH} STREQUAL x86_64) if(APPLE) set(PREFIX x86_64-elf-) endif () - set(CMAKE_C_FLAGS "-m64") + set(CMAKE_C_FLAGS "-m64 -mno-red-zone") elseif (${ARCH} STREQUAL riscv32) set(PREFIX riscv64-unknown-elf-) set(CMAKE_C_FLAGS "-march=rv32imac -mabi=ilp32 -mcmodel=medany") @@ -28,7 +28,6 @@ elseif (${ARCH} STREQUAL riscv64) set(CMAKE_C_FLAGS "-march=rv64imac -mabi=lp64 -mcmodel=medany") elseif (${ARCH} STREQUAL aarch64) set(PREFIX aarch64-none-elf-) - set(CMAKE_C_FLAGS "-mgeneral-regs-only") set(LINK_FLAGS "-Ttext 0xffff000000000000") else() message("Unsupported arch: ${ARCH}") @@ -36,7 +35,7 @@ endif () set(CMAKE_ASM_COMPILER ${PREFIX}gcc) set(CMAKE_C_COMPILER ${PREFIX}gcc) set(CMAKE_RANLIB ${PREFIX}ranlib) -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Werror -MMD -MP -O -g -ffreestanding -nostdlib -nostdinc -fno-builtin -mno-red-zone -fno-stack-protector -fPIC -std=gnu11") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Werror -MMD -MP -O -g -ffreestanding -nostdlib -nostdinc -fno-builtin -fno-stack-protector -fPIC -std=gnu11") set(CMAKE_ASM_FLAGS ${CMAKE_C_FLAGS}) set(CMAKE_C_LINK_FLAGS "${LINK_FLAGS} -nostdlib") # override default value to get rid of '-Wl,-search_paths_first -Wl,-headerpad_max_install_names' set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS) # override default value to get rid of '-rdynamic' on Linux @@ -48,5 +47,5 @@ add_library(ulib ${LIBS}) foreach (PATH ${SRCS}) get_filename_component(NAME ${PATH} NAME_WE) add_executable(${NAME} ${PATH}) - target_link_libraries(${NAME} ulib) + target_link_libraries(${NAME} ulib gcc) endforeach () diff --git a/biscuit/c/bmsmallfile.c b/biscuit/c/bmsmallfile.c index a0813c6..b466f5d 100644 --- a/biscuit/c/bmsmallfile.c +++ b/biscuit/c/bmsmallfile.c @@ -22,6 +22,7 @@ struct timeval before, end; void rgettimeofday(struct timeval *h, void *p) { +#if defined(__x86_64__) time_t hi, lo, v; asm( "rdtsc\n" @@ -31,6 +32,9 @@ void rgettimeofday(struct timeval *h, void *p) v = hi << 32 | lo; if (h) h->tv_usec = v; +#else + // TODO: aarch64 +#endif } void start() @@ -117,7 +121,7 @@ write_test(char *name, int n, int size) exit(1); } } - + if ((r = close(fd)) < 0) { printf("%s: close failed %d %d\n", prog_name, r, errno); } @@ -164,13 +168,13 @@ read_test(int n, int size) printf("%s: read took %ld usec\n", prog_name, time); } -void +void delete_test(int n) -{ +{ int i; int r; int j; - + start(); for (i = 0, j = 0; i < n; i ++) { @@ -208,11 +212,11 @@ int main(int argc, char *argv[]) printf("%s %d %d %s\n", prog_name, n, size, topdir); creat_dir(); - + //printstats(topdir, 1); - + creat_test(n, size); - + //printstats(topdir, 0); read_test(n, size); diff --git a/biscuit/c/libs/litc.c b/biscuit/c/libs/litc.c index 24d9d5e..eeb60a8 100644 --- a/biscuit/c/libs/litc.c +++ b/biscuit/c/libs/litc.c @@ -86,13 +86,20 @@ static struct kinfo_t *kinfo; // stack is not used after munmapping it, but before calling exit(2). we use // this macro to make sure the clobbers are coherent for these three pieces of // code using syscalls. +#if defined(__x86_64__) #define SYSCALL_CLOBBERS "cc", "memory", "r9", "r10", "r11", "r12", "r13", \ "r14", "r15" +#elif defined(__aarch64__) +#define SYSCALL_CLOBBERS "cc", "memory" +#endif + long syscall(long a1, long a2, long a3, long a4, long a5, long trap) { long ret; + +#if defined(__x86_64__) register long r8 asm("r8") = a5; // we may want to follow the sys5 abi and have the kernel restore @@ -104,6 +111,20 @@ syscall(long a1, long a2, long a3, long a4, : "=a"(ret) : "0"(trap), "D"(a1), "S"(a2), "d"(a3), "c"(a4), "r"(r8) : SYSCALL_CLOBBERS); +#elif defined(__aarch64__) + register long x8 asm("x8") = trap; + register long x0 asm("x0") = a1; + register long x1 asm("x1") = a2; + register long x2 asm("x2") = a3; + register long x3 asm("x3") = a4; + register long x4 asm("x4") = a5; + + asm volatile( + "svc 0" + : "=r"(ret) + : "r"(x8), "0"(x0), "r"(x1), "r"(x2), "r"(x3), "r"(x4) + : SYSCALL_CLOBBERS); +#endif return ret; } @@ -837,6 +858,7 @@ tfork_thread(struct tfork_t *args, long (*fn)(void *), void *fnarg) int tid; long flags = FORK_THREAD; +#if defined(__x86_64__) // rbx and rbp are preserved across syscalls. i don't know how to // specify rbp as a register contraint. register ulong rbp asm("rbp") = (ulong)fn; @@ -859,6 +881,30 @@ tfork_thread(struct tfork_t *args, long (*fn)(void *), void *fnarg) : "=a"(tid) : "D"(args), "S"(flags), "0"(SYS_FORK), "r"(rbp), "r"(rbx) : SYSCALL_CLOBBERS); +#elif defined(__aarch64__) + // all registers are preserved across syscalls for aarch64. + register ulong x8 asm("x8") = SYS_FORK; + register ulong x0 asm("x0") = (ulong)args; + register ulong x1 asm("x1") = flags; + + asm volatile( + "svc 0\n" + "cmp x0, #0\n" + // parent or error + "b.ne 1f\n" + // child + "ldr x0, %5\n" + "ldr x9, %4\n" + "blr x9\n" + "bl tfork_done\n" + "mov x0, #0\n" + "str xzr, [x0]\n" + "1:\n" + : "=r"(tid) + : "r"(x8), "0"(x0), "r"(x1), "m"(fn), "m"(fnarg) + : SYSCALL_CLOBBERS); +#endif + return tid; } @@ -907,6 +953,7 @@ _pcreate(void *vpcarg) status = (long)(pcargs.fn(pcargs.arg)); free(pcargs.tls); +#if defined(__x86_64__) // rbx and rbp are preserved across syscalls. i don't know how to // specify rbp as a register contraint. register ulong rbp asm("rbp") = SYS_THREXIT; @@ -929,6 +976,29 @@ _pcreate(void *vpcarg) : "a"(SYS_MUNMAP), "D"(pcargs.stack), "S"(pcargs.stksz), "r"(rbp), "r"(rbx) : SYSCALL_CLOBBERS); +#elif defined(__aarch64__) + register ulong x8 asm("x8") = SYS_MUNMAP; + register ulong x0 asm("x0") = (ulong)pcargs.stack; + register ulong x1 asm("x1") = (ulong)pcargs.stksz; + + asm volatile( + "svc 0\n" + "cmp x0, #0\n" + "b.eq 1f\n" + "mov x0, #0\n" + "str xzr, [x0]\n" + "1:\n" + "mov x8, %3\n" + "ldr x0, %4\n" + "svc 0\n" + "mov x0, #1\n" + "str xzr, [x0]\n" + : + : "r"(x8), "r"(x0), "r"(x1), + "X"(SYS_THREXIT), "m"(status) + : SYSCALL_CLOBBERS); +#endif + // not reached return 0; } @@ -1107,7 +1177,11 @@ pthread_barrier_wait(pthread_barrier_t *b) uint o = b->current; uint n = o + 1; if ((o & m) != 0) { +#if defined(__x86_64__) asm volatile("pause":::"memory"); +#elif defined(__aarch64__) + asm volatile("yield":::"memory"); +#endif continue; } c = n; @@ -1126,7 +1200,11 @@ pthread_barrier_wait(pthread_barrier_t *b) } while ((b->current & m) == 0) +#if defined(__x86_64__) asm volatile("pause":::"memory"); +#elif defined(__aarch64__) + asm volatile("yield":::"memory"); +#endif c = __sync_add_and_fetch(&b->current, -1); if (c == m) @@ -2159,7 +2237,7 @@ DIR * fdopendir(int fd) { #define BSIZE 4096 - + struct stat st; if (fstat(fd, &st) == -1) return NULL; @@ -2785,6 +2863,7 @@ sscanf(const char *src, const char *fmt, ...) ulong rdtsc(void) { +#if defined(__x86_64__) ulong low, hi; asm volatile( "rdtsc\n" @@ -2792,6 +2871,10 @@ rdtsc(void) : :); return hi << 32 | low; +#else + // TODO: aarch64 + return 0; +#endif } static char readlineb[256]; @@ -3534,17 +3617,25 @@ __start(int argc, char **argv, struct kinfo_t *k) void _start(void) { +#if defined(__x86_64__) // make sure that the stack is 16-byte aligned, as gcc assumes, after // _start's function prologue. gcc emits SSE instructions that require // 16-byte alignment (misalignment generates #GP). asm( "movq (%%rsp), %%rdi\n" // argc "leaq 8(%%rsp), %%rsi\n" // argv - "andq $0xfffffffffffffff0, %%rsp\n" - "subq $8, %%rsp\n" - "movabs $__start, %%rax\n" - "jmpq *%%rax\n" - ::: "memory", "cc"); + "andq $0xfffffffffffffff0, %%rsp\n" + "subq $8, %%rsp\n" + "movabs $__start, %%rax\n" + "jmpq *%%rax\n" + ::: "memory", "cc"); +#elif defined(__aarch64__) + asm( + "ldr x0, [sp]\n" // argc + "add x1, sp, #8\n" // argv + "bl __start\n" + ::: "memory", "cc"); +#endif } /* NGINX STUFF */ diff --git a/biscuit/c/pthtests.c b/biscuit/c/pthtests.c index 252c29e..271c5db 100644 --- a/biscuit/c/pthtests.c +++ b/biscuit/c/pthtests.c @@ -48,7 +48,11 @@ void *groupfault(void *a) int b = (int)(long)a; while (!go) - asm volatile("pause"); +#if defined(__x86_64__) + asm volatile("pause":::"memory"); +#elif defined(__aarch64__) + asm volatile("yield":::"memory"); +#endif blah = b; diff --git a/biscuit/c/sfork.c b/biscuit/c/sfork.c index 9535682..e2a8a31 100644 --- a/biscuit/c/sfork.c +++ b/biscuit/c/sfork.c @@ -118,6 +118,7 @@ pid_t _getppid(void) { pid_t ret; +#if defined(__x86_64__) asm volatile( "movq %%rsp, %%r10\n" "leaq 2(%%rip), %%r11\n" @@ -125,6 +126,14 @@ _getppid(void) : "=a"(ret) : "0"(40ul) : "cc", "memory", "r9", "r10", "r11", "edi", "esi", "edx", "ecx", "r8"); +#elif defined(__aarch64__) + register long x8 asm("x8") = 40ul; + asm volatile( + "svc 0" + : "=r"(ret) + : "r"(x8) + : "cc", "memory"); +#endif return ret; } @@ -135,6 +144,7 @@ void *igetpids(void *idp) long total = 0; while (!cease) { +#if defined(__x86_64__) asm volatile( "movl $40, %%eax\n" "movq %%rsp, %%r10\n" @@ -143,6 +153,14 @@ void *igetpids(void *idp) : : : SYSCALL_CLOBBERS, "eax", "edi", "esi", "edx", "ecx", "r8"); +#elif defined(__aarch64__) + asm volatile( + "mov x8, #40\n" + "svc 0" + : + : + : "cc", "memory"); +#endif total++; } return (void *)total; @@ -692,10 +710,14 @@ void *locks(void *_arg) pthread_barrier_wait(&bar); while (!cease) { +#if defined(__x86_64__) asm("lock incq %0\n" : : "m"(tot) : "cc", "memory"); +#else + // TODO: aarch64 +#endif } return (void *)tot; } diff --git a/biscuit/c/usertests.c b/biscuit/c/usertests.c index ae01e23..bb47c67 100644 --- a/biscuit/c/usertests.c +++ b/biscuit/c/usertests.c @@ -568,7 +568,7 @@ fourfiles(void) for(pi = 0; pi < 4; pi++){ fname = names[pi]; unlink(fname); - + pid = fork(); if(pid < 0){ printf("fork failed\n"); @@ -581,7 +581,7 @@ fourfiles(void) printf("create failed\n"); exit(0); } - + memset(buf, '0'+pi, BSIZE); for(i = 0; i < 12; i++){ if((n = write(fd, buf, BSIZE)) != BSIZE){ @@ -938,7 +938,7 @@ linkunlink() if(pid) wait(NULL); - else + else exit(0); printf("linkunlink ok\n"); @@ -1007,7 +1007,7 @@ subdir(void) } write(fd, "ff", 2); close(fd); - + if(rmdir("dd") >= 0){ printf("unlink dd (non-empty dir) succeeded!\n"); exit(0); @@ -1231,7 +1231,7 @@ bigfile(void) fsync(fd); close(fd); - + fd = open("bigfile", 0); if(fd < 0){ printf("cannot open bigfile\n"); @@ -1466,7 +1466,7 @@ forktest(void) if(pid == 0) exit(0); } - + if(n == ulim){ printf("fork claimed to work %d times!\n", ulim); exit(0); @@ -1489,7 +1489,7 @@ forktest(void) if (pthread_create(&t[n], NULL, _ptexit, NULL)) break; } - + if(n == ulim) errx(-1, "pthread_create claimed to work %d times!\n", ulim); @@ -1517,7 +1517,7 @@ forktest(void) // // can one sbrk() less than a page? // a = sbrk(0); // int i; -// for(i = 0; i < 5000; i++){ +// for(i = 0; i < 5000; i++){ // b = sbrk(1); // if(b != a){ // printf("sbrk test failed %d %x %x\n", i, a, b); @@ -1546,7 +1546,7 @@ forktest(void) // a = sbrk(0); // amt = (BIG) - (uint)a; // p = sbrk(amt); -// if (p != a) { +// if (p != a) { // printf("sbrk test failed to grow big address space; enough phys mem?\n"); // exit(0); // } @@ -1585,7 +1585,7 @@ forktest(void) // printf("sbrk downsize failed, a %x c %x\n", a, c); // exit(0); // } -// +// // // can we read the kernel's memory? // for(a = (char*)(KERNBASE); a < (char*) (KERNBASE+2000000); a += 50000){ // ppid = getpid(); @@ -1643,14 +1643,26 @@ void validateint(int *p) { ulong ret; + +#define SYS_PIPE2 293 +#if defined(__x86_64__) asm volatile( "movq %%rsp, %%r10\n" "leaq 2(%%rip), %%r11\n" "sysenter\n" : "=a"(ret) -#define SYS_PIPE2 293 : "0"(SYS_PIPE2), "D"(p) : "cc", "memory"); +#elif defined(__aarch64__) + register ulong x8 asm("x8") = SYS_PIPE2; + register ulong x0 asm("x0") = (ulong)p; + asm volatile( + "svc 0\n" + : "=r"(ret) + : "r"(x8), "0"(x0) + : "cc", "memory"); +#endif + if (ret == 0) errx(-1, "bad int passed?"); } @@ -2789,7 +2801,11 @@ static volatile int go; static void *_locker(void *v) { while (go != 1) +#if defined(__x86_64__) asm volatile("pause\n":::"memory"); +#elif defined(__aarch64__) + asm volatile("yield\n":::"memory"); +#endif pthread_mutex_t *m = (pthread_mutex_t *)v; int i; for (i = 0; i < ltimes; i++) { @@ -2904,7 +2920,11 @@ static void _condtest(const int nt) if (pthread_create(&t[i], NULL, _condsleep, &args[i])) errx(-1, "pthread_ create"); while (go == 0) +#if defined(__x86_64__) asm volatile("pause\n":::"memory"); +#elif defined(__aarch64__) + asm volatile("yield\n":::"memory"); +#endif } for (i = 0; i < nt; i++) @@ -2953,7 +2973,11 @@ static void _condbctest(const int nt) for (i = 0; i < bctimes; i++) { volatile int *p = &lcounter; while (*p < enext) +#if defined(__x86_64__) asm volatile("pause\n":::"memory"); +#elif defined(__aarch64__) + asm volatile("yield\n":::"memory"); +#endif if (pthread_mutex_lock(&m)) err(-1, "lock"); if (i == bctimes - 1) @@ -2985,7 +3009,7 @@ int pthreadsharedfd; void *threadfd(void *arg) { long n = (long) arg; - + for (int i = 0; i < 1000; i++) { if (n == 0) { pthreadsharedfd = open("sharedfdf", O_CREATE|O_RDWR); @@ -3007,13 +3031,13 @@ void _pthreadfd(void) { const int nthreads = 2; printf("pthread shared fd\n"); - + int i; pthread_t t[nthreads]; for (int i = 0; i < nthreads; i++) { if (pthread_create(&t[i], NULL, threadfd, (void *) (long) i)) errx(-1, "pthread create"); - } + } for (i = 0; i < nthreads; i++) { if (pthread_join(t[i], NULL)) errx(-1, "pthread join"); @@ -3620,7 +3644,7 @@ logtest() } free(buf); - + printf("log test OK\n"); } @@ -3835,9 +3859,9 @@ main(int argc, char *argv[]) posixtest(); barriertest(); threadwait(); - + pthreadfd(); - + fnonblock(); preadwrite(); stdiotest(); diff --git a/biscuit/cxx/libutil.cc b/biscuit/cxx/libutil.cc index 48c151d..b5b1d3c 100644 --- a/biscuit/cxx/libutil.cc +++ b/biscuit/cxx/libutil.cc @@ -101,6 +101,7 @@ setaffinity(int c) ulong rdtsc(void) { +#if defined(__x86_64__) ulong low, hi; asm volatile( "rdtsc\n" @@ -108,5 +109,9 @@ rdtsc(void) : :); return hi << 32 | low; +#else + // TODO: aarch64 + return 0; +#endif } #endif diff --git a/biscuit/cxx/spinbarrier.hh b/biscuit/cxx/spinbarrier.hh index 3447589..1c54271 100644 --- a/biscuit/cxx/spinbarrier.hh +++ b/biscuit/cxx/spinbarrier.hh @@ -39,7 +39,11 @@ public: { // Wait if the barrier is in the exit phase while (entered_ & phase_mask) +#if defined(__x86_64__) asm volatile("pause":::); +#elif defined(__aarch64__) + asm volatile("yield":::); +#endif // Enter the barrier auto v = ++entered_; @@ -52,7 +56,11 @@ public: // Wait until the barrier switches to the exit phase while (!(entered_.load(std::memory_order_relaxed) & phase_mask)) +#if defined(__x86_64__) asm volatile("pause":::); +#elif defined(__aarch64__) + asm volatile("yield":::); +#endif // Exit the batter if ((v = --entered_) == phase_mask)