Merge "bionic: add clean kernel header ucontext.h"
diff --git a/MAINTAINERS b/MAINTAINERS
deleted file mode 100644
index a76dc24..0000000
--- a/MAINTAINERS
+++ /dev/null
@@ -1,6 +0,0 @@
-
-Bionic support for SuperH
--------------------------
-Bionic support for SuperH architecture is written by
-Shin-ichiro KAWASAKI <shinichiro.kawasaki.mg@hitachi.com>
-and Contributed to Android by Hitachi, Ltd. and Renesas Solutions Corp.
diff --git a/libc/Android.mk b/libc/Android.mk
index f7e0e8f..8e86d26 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -459,6 +459,13 @@
   libc_common_cflags += -DDEBUG
 endif
 
+# To customize dlmalloc's alignment, set BOARD_MALLOC_ALIGNMENT in
+# the appropriate BoardConfig.mk file.
+#
+ifneq ($(BOARD_MALLOC_ALIGNMENT),)
+  libc_common_cflags += -DMALLOC_ALIGNMENT=$(BOARD_MALLOC_ALIGNMENT)
+endif
+
 ifeq ($(TARGET_ARCH),arm)
   libc_common_cflags += -fstrict-aliasing
   libc_crt_target_cflags := -mthumb-interwork
diff --git a/libc/SYSCALLS.TXT b/libc/SYSCALLS.TXT
index 765d2fa..e1934fa 100644
--- a/libc/SYSCALLS.TXT
+++ b/libc/SYSCALLS.TXT
@@ -22,18 +22,18 @@
 #        assembler template for the syscall; it's up to the bionic implementation to provide
 #        a relevant C stub
 #
-#      - additionally, if the syscall number is different amoung ARM, and x86, use:
-#        return_type funcname[:syscall_name](parameters) arm_number,x86_number
+#      - additionally, if the syscall number is different amoung ARM, and x86, MIPS use:
+#        return_type funcname[:syscall_name](parameters) arm_number,x86_number, mips_number
 #
 # the file is processed by a python script named gensyscalls.py
 #
 
 # process management
-void    _exit:exit_group (int)      248,252
+void    _exit:exit_group (int)      248,252,246
 void    _exit_thread:exit (int)     1
 pid_t   __fork:fork (void)           2
-pid_t   _waitpid:waitpid (pid_t, int*, int, struct rusage*)   -1,7
-int     __waitid:waitid(int, pid_t, struct siginfo_t*, int,void*)          280,284
+pid_t   _waitpid:waitpid (pid_t, int*, int, struct rusage*)   -1,7,7
+int     __waitid:waitid(int, pid_t, struct siginfo_t*, int,void*)          280,284,278
 
 # NOTE: this system call is never called directly, but we list it there
 #       to have __NR_clone properly defined.
@@ -42,71 +42,86 @@
 
 int     execve (const char*, char* const*, char* const*)  11
 
-int     __setuid:setuid32 (uid_t)    213
-uid_t   getuid:getuid32 ()         199
-gid_t   getgid:getgid32 ()         200
-uid_t   geteuid:geteuid32 ()       201
-gid_t   getegid:getegid32 ()       202
-uid_t   getresuid:getresuid32 (uid_t *ruid, uid_t *euid, uid_t *suid)   209
-gid_t   getresgid:getresgid32 (gid_t *rgid, gid_t *egid, gid_t *sgid)   211
-pid_t   gettid()                   224
-ssize_t readahead(int, off64_t, size_t)     225
-int     getgroups:getgroups32(int, gid_t *)    205
+int     __setuid:setuid32 (uid_t)    213,213,-1
+int     __setuid:setuid (uid_t)   -1,-1,23
+uid_t   getuid:getuid32 ()         199,199,-1
+uid_t   getuid:getuid ()           -1,-1,24
+gid_t   getgid:getgid32 ()         200,200,-1
+gid_t   getgid:getgid ()           -1,-1,47
+uid_t   geteuid:geteuid32 ()       201,201,-1
+uid_t   geteuid:geteuid ()         -1,-1,49
+gid_t   getegid:getegid32 ()       202,202,-1
+gid_t   getegid:getegid ()         -1,-1,50
+uid_t   getresuid:getresuid32 (uid_t *ruid, uid_t *euid, uid_t *suid)   209,209,-1
+uid_t   getresuid:getresuid (uid_t *ruid, uid_t *euid, uid_t *suid)     -1,-1,186
+gid_t   getresgid:getresgid32 (gid_t *rgid, gid_t *egid, gid_t *sgid)   211,211,-1
+gid_t   getresgid:getresgid (gid_t *rgid, gid_t *egid, gid_t *sgid)     -1,-1,191
+pid_t   gettid()                   224,224,222
+ssize_t readahead(int, off64_t, size_t)     225,225,223
+int     getgroups:getgroups32(int, gid_t *)    205,205,-1
+int     getgroups:getgroups(int, gid_t *)      -1,-1,80
 pid_t   getpgid(pid_t)             132
 pid_t   getppid()                  64
 pid_t   setsid()                   66
-int     setgid:setgid32(gid_t)     214
+int     setgid:setgid32(gid_t)     214,214,-1
+int     setgid:setgid(gid_t)       -1,-1,46
 int     seteuid:seteuid32(uid_t)   stub
-int     __setreuid:setreuid32(uid_t, uid_t)   203
-int     __setresuid:setresuid32(uid_t, uid_t, uid_t)   208
-int     setresgid:setresgid32(gid_t, gid_t, gid_t)   210
+int     __setreuid:setreuid32(uid_t, uid_t)   203,203,-1
+int     __setreuid:setreuid(uid_t, uid_t)     -1,-1,70
+int     __setresuid:setresuid32(uid_t, uid_t, uid_t)   208,208,-1
+int     __setresuid:setresuid(uid_t, uid_t, uid_t)     -1,-1,185
+int     setresgid:setresgid32(gid_t, gid_t, gid_t)   210,210,-1
+int     setresgid:setresgid(gid_t, gid_t, gid_t)     -1,-1,190
 void*   __brk:brk(void*)           45
 # see comments in arch-arm/bionic/kill.S to understand why we don't generate an ARM stub for kill/tkill
-int     kill(pid_t, int)           -1,37
-int     tkill(pid_t tid, int sig)  -1,238
+int     kill(pid_t, int)           -1,37,37
+int     tkill(pid_t tid, int sig)  -1,238,236
 int     __ptrace:ptrace(int request, int pid, void* addr, void* data)  26
-int     __set_thread_area:set_thread_area(void*  user_desc)  -1,243
+int     __set_thread_area:set_thread_area(void*  user_desc)  -1,243,283
 int     __getpriority:getpriority(int, int)  96
 int     setpriority(int, int, int)   97
 int     setrlimit(int resource, const struct rlimit *rlp)  75
-int     getrlimit:ugetrlimit(int resource, struct rlimit *rlp)  191
+int     getrlimit:ugetrlimit(int resource, struct rlimit *rlp)  191,191,-1
+int     getrlimit:getrlimit(int resource, struct rlimit *rlp)  -1,-1,76
 int     getrusage(int who, struct rusage*  r_usage)  77
-int     setgroups:setgroups32(int, const gid_t *)   206
+int     setgroups:setgroups32(int, const gid_t *)   206,206,-1
+int     setgroups:setgroups(int, const gid_t *)     -1,-1,81
 pid_t   getpgrp(void)  stub
 int     setpgid(pid_t, pid_t)  57
-pid_t   vfork(void)  190,-1,190
-int     setregid:setregid32(gid_t, gid_t)  204
+pid_t   vfork(void)  190,-1,-1
+int     setregid:setregid32(gid_t, gid_t)  204,204,-1
+int     setregid:setregid(gid_t, gid_t)    -1,-1,71
 int     chroot(const char *)  61
 # IMPORTANT: Even though <sys/prctl.h> declares prctl(int,...), the syscall stub must take 6 arguments
 #            to match the kernel implementation.
-int     prctl(int option, unsigned int arg2, unsigned int arg3, unsigned int arg4, unsigned int arg5)  172
-int     capget(cap_user_header_t header, cap_user_data_t data) 184
-int     capset(cap_user_header_t header, const cap_user_data_t data) 185
-int     sigaltstack(const stack_t*, stack_t*) 186
+int     prctl(int option, unsigned int arg2, unsigned int arg3, unsigned int arg4, unsigned int arg5)  172,172,192
+int     capget(cap_user_header_t header, cap_user_data_t data) 184,184,204
+int     capset(cap_user_header_t header, const cap_user_data_t data) 185,185,205
+int     sigaltstack(const stack_t*, stack_t*) 186,186,206
 int     acct(const char*  filepath)  51
 
 # file descriptors
 ssize_t     read (int, void*, size_t)        3
 ssize_t     write (int, const void*, size_t)       4
-ssize_t     pread64 (int, void *, size_t, off64_t) 180
-ssize_t     pwrite64 (int, void *, size_t, off64_t) 181
+ssize_t     pread64 (int, void *, size_t, off64_t) 180,180,200
+ssize_t     pwrite64 (int, void *, size_t, off64_t) 181,181,201
 int         __open:open (const char*, int, mode_t)  5
-int         __openat:openat (int, const char*, int, mode_t)  322,295
+int         __openat:openat (int, const char*, int, mode_t)  322,295,288
 int         close (int)                      6
 int         creat(const char*, mode_t)       stub
 off_t       lseek(int, off_t, int)           19
 int         __llseek:_llseek (int, unsigned long, unsigned long, loff_t*, int)  140
 pid_t       getpid ()    20
 void *      mmap(void *, size_t, int, int, int, long)  stub
-void *      __mmap2:mmap2(void*, size_t, int, int, int, long)   192
+void *      __mmap2:mmap2(void*, size_t, int, int, int, long)   192,192,210
 int         munmap(void *, size_t)  91
-void *      mremap(void *, size_t, size_t, unsigned long)  163
+void *      mremap(void *, size_t, size_t, unsigned long)  163,163,167
 int         msync(const void *, size_t, int)    144
 int         mprotect(const void *, size_t, int)  125
-int         madvise(const void *, size_t, int)  220,219
-int         mlock(const void *addr, size_t len)    150
-int         munlock(const void *addr, size_t len)   151
-int         mincore(void*  start, size_t  length, unsigned char*  vec)   219,218
+int         madvise(const void *, size_t, int)  220,219,218
+int         mlock(const void *addr, size_t len)    150,150,154
+int         munlock(const void *addr, size_t len)   151,151,155
+int         mincore(void*  start, size_t  length, unsigned char*  vec)   219,218,217
 int         __ioctl:ioctl(int, int, void *)  54
 int         readv(int, const struct iovec *, int)   145
 int         writev(int, const struct iovec *, int)  146
@@ -114,143 +129,146 @@
 int         flock(int, int)   143
 int         fchmod(int, mode_t)  94
 int         dup(int)  41
-int         pipe(int *)  42,42
-int         pipe2(int *, int) 359,331
+int         pipe(int *)  42,42,-1
+int         pipe2(int *, int) 359,331,328
 int         dup2(int, int)   63
 int         select:_newselect(int, struct fd_set *, struct fd_set *, struct fd_set *, struct timeval *)  142
 int         ftruncate(int, off_t)  93
-int         ftruncate64(int, off64_t) 194
-int         getdents:getdents64(unsigned int, struct dirent *, unsigned int)   217,220
+int         ftruncate64(int, off64_t) 194,194,212
+int         getdents:getdents64(unsigned int, struct dirent *, unsigned int)   217,220,219
 int         fsync(int)  118
-int         fdatasync(int) 148
-int         fchown:fchown32(int, uid_t, gid_t)  207
+int         fdatasync(int) 148,148,152
+int         fchown:fchown32(int, uid_t, gid_t)  207,207,-1
+int         fchown:fchown(int, uid_t, gid_t)    -1,-1,95
 void        sync(void)  36
-int         __fcntl64:fcntl64(int, int, void *)  221
-int         __fstatfs64:fstatfs64(int, size_t, struct statfs *)  267,269
-ssize_t     sendfile(int out_fd, int in_fd, off_t *offset, size_t count)  187
-int         fstatat:fstatat64(int dirfd, const char *path, struct stat *buf, int flags)   327,300
-int         mkdirat(int dirfd, const char *pathname, mode_t mode)  323,296
-int         fchownat(int dirfd, const char *path, uid_t owner, gid_t group, int flags)  325,298
-int         fchmodat(int dirfd, const char *path, mode_t mode, int flags)  333,306
-int         renameat(int olddirfd, const char *oldpath, int newdirfd, const char *newpath)  329,302
-int         fsetxattr(int, const char *, const void *, size_t, int) 228
-ssize_t     fgetxattr(int, const char *, void *, size_t) 231
-ssize_t     flistxattr(int, char *, size_t) 234
-int         fremovexattr(int, const char *) 237
+int         __fcntl64:fcntl64(int, int, void *)  221,221,220
+int         __fstatfs64:fstatfs64(int, size_t, struct statfs *)  267,269,256
+ssize_t     sendfile(int out_fd, int in_fd, off_t *offset, size_t count)  187,187,207
+int         fstatat:fstatat64(int dirfd, const char *path, struct stat *buf, int flags)   327,300,293
+int         mkdirat(int dirfd, const char *pathname, mode_t mode)  323,296,289
+int         fchownat(int dirfd, const char *path, uid_t owner, gid_t group, int flags)  325,298,291
+int         fchmodat(int dirfd, const char *path, mode_t mode, int flags)  333,306,299
+int         renameat(int olddirfd, const char *oldpath, int newdirfd, const char *newpath)  329,302,295
+int         fsetxattr(int, const char *, const void *, size_t, int) 228,228,226
+ssize_t     fgetxattr(int, const char *, void *, size_t) 231,231,229
+ssize_t     flistxattr(int, char *, size_t) 234,234,232
+int         fremovexattr(int, const char *) 237,237,235
 
 # file system
 int     link (const char*, const char*)  9
 int     unlink (const char*)             10
-int     unlinkat (int, const char *, int)   328,301
+int     unlinkat (int, const char *, int)   328,301,294
 int     chdir (const char*)              12
 int     mknod (const char*, mode_t, dev_t)  14
 int     chmod (const char*,mode_t)          15
-int     chown:chown32(const char *, uid_t, gid_t)  212
-int     lchown:lchown32 (const char*, uid_t, gid_t)  198
+int     chown:chown32(const char *, uid_t, gid_t)  212,212,-1
+int     chown:chown(const char *, uid_t, gid_t)    -1,-1,202
+int     lchown:lchown32 (const char*, uid_t, gid_t)  198,198,-1
+int     lchown:lchown (const char*, uid_t, gid_t)  -1,-1,16
 int     mount (const char*, const char*, const char*, unsigned long, const void*)  21
 int     umount(const char*)  stub
 int     umount2 (const char*, int)  52
-int     fstat:fstat64(int, struct stat*)    197
-int     stat:stat64(const char *, struct stat *)  195
-int     lstat:lstat64(const char *, struct stat *)  196
+int     fstat:fstat64(int, struct stat*)    197,197,215
+int     stat:stat64(const char *, struct stat *)  195,195,213
+int     lstat:lstat64(const char *, struct stat *)  196,196,214
 int     mkdir(const char *, mode_t) 39
 int     readlink(const char *, char *, size_t)  85
 int     rmdir(const char *)  40
 int     rename(const char *, const char *)  38
-int     __getcwd:getcwd(char * buf, size_t size)  183
+int     __getcwd:getcwd(char * buf, size_t size)  183,183,203
 int     access(const char *, int)  33
 int     symlink(const char *, const char *)  83
 int     fchdir(int)    133
 int     truncate(const char*, off_t)    92
-int     setxattr(const char *, const char *, const void *, size_t, int) 226
-int     lsetxattr(const char *, const char *, const void *, size_t, int) 227
-ssize_t getxattr(const char *, const char *, void *, size_t) 229
-ssize_t lgetxattr(const char *, const char *, void *, size_t) 230
-ssize_t listxattr(const char *, char *, size_t) 232
-ssize_t llistxattr(const char *, char *, size_t) 233
-int     removexattr(const char *, const char *) 235
-int     lremovexattr(const char *, const char *) 236
-int     __statfs64:statfs64(const char *, size_t, struct statfs *)  266,268
+int     setxattr(const char *, const char *, const void *, size_t, int) 226,226,224
+int     lsetxattr(const char *, const char *, const void *, size_t, int) 227,227,225
+ssize_t getxattr(const char *, const char *, void *, size_t) 229,229,227
+ssize_t lgetxattr(const char *, const char *, void *, size_t) 230,230,228
+ssize_t listxattr(const char *, char *, size_t) 232,232,230
+ssize_t llistxattr(const char *, char *, size_t) 233,233,231
+int     removexattr(const char *, const char *) 235,235,233
+int     lremovexattr(const char *, const char *) 236,236,234
+int     __statfs64:statfs64(const char *, size_t, struct statfs *)  266,268,255
 
 # time
 int           pause ()                       29
 int           gettimeofday(struct timeval*, struct timezone*)       78
 int           settimeofday(const struct timeval*, const struct timezone*)   79
 clock_t       times(struct tms *)       43
-int           nanosleep(const struct timespec *, struct timespec *)   162
-int           clock_gettime(clockid_t clk_id, struct timespec *tp)    263,265
-int           clock_settime(clockid_t clk_id, const struct timespec *tp)  262,264
-int           clock_getres(clockid_t clk_id, struct timespec *res)   264,266
-int           clock_nanosleep(clockid_t clock_id, int flags, const struct timespec *req, struct timespec *rem)  265,267
+int           nanosleep(const struct timespec *, struct timespec *)   162,162,166
+int           clock_gettime(clockid_t clk_id, struct timespec *tp)    263,265,263
+int           clock_settime(clockid_t clk_id, const struct timespec *tp)  262,264,262
+int           clock_getres(clockid_t clk_id, struct timespec *res)   264,266,264
+int           clock_nanosleep(clockid_t clock_id, int flags, const struct timespec *req, struct timespec *rem)  265,267,265
 int           getitimer(int, const struct itimerval *)   105
 int           setitimer(int, const struct itimerval *, struct itimerval *)  104
-int           __timer_create:timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid)    257,259
-int           __timer_settime:timer_settime(timer_t, int, const struct itimerspec*, struct itimerspec*) 258,260
-int           __timer_gettime:timer_gettime(timer_t, struct itimerspec*)                                259,261
-int           __timer_getoverrun:timer_getoverrun(timer_t)                                              260,262
-int           __timer_delete:timer_delete(timer_t)                                                      261,263
-int           utimes(const char*, const struct timeval tvp[2])                          269, 271
-int           utimensat(int, const char *, const struct timespec times[2], int)         348, 320
+int           __timer_create:timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid)    257,259,257
+int           __timer_settime:timer_settime(timer_t, int, const struct itimerspec*, struct itimerspec*) 258,260,258
+int           __timer_gettime:timer_gettime(timer_t, struct itimerspec*)                                259,261,259
+int           __timer_getoverrun:timer_getoverrun(timer_t)                                              260,262,260
+int           __timer_delete:timer_delete(timer_t)                                                      261,263,261
+int           utimes(const char*, const struct timeval tvp[2])                          269,271,267
+int           utimensat(int, const char *, const struct timespec times[2], int)         348,320,316
 
 # signals
 int     sigaction(int, const struct sigaction *, struct sigaction *)  67
 int     sigprocmask(int, const sigset_t *, sigset_t *)  126
 int     __sigsuspend:sigsuspend(int unused1, int unused2, unsigned mask)  72
-int     __rt_sigaction:rt_sigaction (int sig, const struct sigaction *act, struct sigaction *oact, size_t sigsetsize)  174
-int     __rt_sigprocmask:rt_sigprocmask (int  how, const sigset_t *set, sigset_t *oset, size_t sigsetsize)  175
-int     __rt_sigtimedwait:rt_sigtimedwait(const sigset_t *set, struct siginfo_t  *info, struct timespec_t  *timeout, size_t  sigset_size)  177
+int     __rt_sigaction:rt_sigaction (int sig, const struct sigaction *act, struct sigaction *oact, size_t sigsetsize)  174,174,194
+int     __rt_sigprocmask:rt_sigprocmask (int  how, const sigset_t *set, sigset_t *oset, size_t sigsetsize)  175,175,195
+int     __rt_sigtimedwait:rt_sigtimedwait(const sigset_t *set, struct siginfo_t  *info, struct timespec_t  *timeout, size_t  sigset_size)  177,177,197
 int     sigpending(sigset_t *)  73
 
 # sockets
-int           socket(int, int, int)              281,-1
-int           socketpair(int, int, int, int*)    288,-1
-int           bind(int, struct sockaddr *, int)  282,-1
-int           connect(int, struct sockaddr *, socklen_t)   283,-1
-int           listen(int, int)                   284,-1
-int           accept(int, struct sockaddr *, socklen_t *)  285,-1
-int           getsockname(int, struct sockaddr *, socklen_t *)  286,-1
-int           getpeername(int, struct sockaddr *, socklen_t *)  287,-1
-int           sendto(int, const void *, size_t, int, const struct sockaddr *, socklen_t)  290,-1
-int           recvfrom(int, void *, size_t, unsigned int, struct sockaddr *, socklen_t *)  292,-1
-int           shutdown(int, int)  293,-1
-int           setsockopt(int, int, int, const void *, socklen_t)  294,-1
-int           getsockopt(int, int, int, void *, socklen_t *)    295,-1
-int           sendmsg(int, const struct msghdr *, unsigned int)  296,-1
-int           recvmsg(int, struct msghdr *, unsigned int)   297,-1
+int           socket(int, int, int)              281,-1,183
+int           socketpair(int, int, int, int*)    288,-1,184
+int           bind(int, struct sockaddr *, int)  282,-1,169
+int           connect(int, struct sockaddr *, socklen_t)   283,-1,170
+int           listen(int, int)                   284,-1,174
+int           accept(int, struct sockaddr *, socklen_t *)  285,-1,168
+int           getsockname(int, struct sockaddr *, socklen_t *)  286,-1,172
+int           getpeername(int, struct sockaddr *, socklen_t *)  287,-1,171
+int           sendto(int, const void *, size_t, int, const struct sockaddr *, socklen_t)  290,-1,180
+int           recvfrom(int, void *, size_t, unsigned int, struct sockaddr *, socklen_t *)  292,-1,176
+int           shutdown(int, int)  293,-1,182
+int           setsockopt(int, int, int, const void *, socklen_t)  294,-1,181
+int           getsockopt(int, int, int, void *, socklen_t *)    295,-1,173
+int           sendmsg(int, const struct msghdr *, unsigned int)  296,-1,179
+int           recvmsg(int, struct msghdr *, unsigned int)   297,-1,177
 
 # sockets for x86. These are done as an "indexed" call to socketcall syscall.
-int           socket:socketcall:1 (int, int, int) -1,102
-int           bind:socketcall:2 (int, struct sockaddr *, int)  -1,102
-int           connect:socketcall:3(int, struct sockaddr *, socklen_t)   -1,102
-int           listen:socketcall:4(int, int)                   -1,102
-int           accept:socketcall:5(int, struct sockaddr *, socklen_t *)  -1,102
-int           getsockname:socketcall:6(int, struct sockaddr *, socklen_t *)  -1,102
-int           getpeername:socketcall:7(int, struct sockaddr *, socklen_t *)  -1,102
-int           socketpair:socketcall:8(int, int, int, int*)    -1,102
-int           sendto:socketcall:11(int, const void *, size_t, int, const struct sockaddr *, socklen_t)  -1,102
-int           recvfrom:socketcall:12(int, void *, size_t, unsigned int, struct sockaddr *, socklen_t *)  -1,102
-int           shutdown:socketcall:13(int, int)  -1,102
-int           setsockopt:socketcall:14(int, int, int, const void *, socklen_t)  -1,102
-int           getsockopt:socketcall:15(int, int, int, void *, socklen_t *)    -1,102
-int           sendmsg:socketcall:16(int, const struct msghdr *, unsigned int)  -1,102
-int           recvmsg:socketcall:17(int, struct msghdr *, unsigned int)   -1,102
+int           socket:socketcall:1 (int, int, int) -1,102,-1
+int           bind:socketcall:2 (int, struct sockaddr *, int)  -1,102,-1
+int           connect:socketcall:3(int, struct sockaddr *, socklen_t)   -1,102,-1
+int           listen:socketcall:4(int, int)                   -1,102,-1
+int           accept:socketcall:5(int, struct sockaddr *, socklen_t *)  -1,102,-1
+int           getsockname:socketcall:6(int, struct sockaddr *, socklen_t *)  -1,102,-1
+int           getpeername:socketcall:7(int, struct sockaddr *, socklen_t *)  -1,102,-1
+int           socketpair:socketcall:8(int, int, int, int*)    -1,102,-1
+int           sendto:socketcall:11(int, const void *, size_t, int, const struct sockaddr *, socklen_t)  -1,102,-1
+int           recvfrom:socketcall:12(int, void *, size_t, unsigned int, struct sockaddr *, socklen_t *)  -1,102,-1
+int           shutdown:socketcall:13(int, int)  -1,102,-1
+int           setsockopt:socketcall:14(int, int, int, const void *, socklen_t)  -1,102,-1
+int           getsockopt:socketcall:15(int, int, int, void *, socklen_t *)    -1,102,-1
+int           sendmsg:socketcall:16(int, const struct msghdr *, unsigned int)  -1,102,-1
+int           recvmsg:socketcall:17(int, struct msghdr *, unsigned int)   -1,102,-1
 
 # scheduler & real-time
-int sched_setscheduler(pid_t pid, int policy, const struct sched_param *param)  156
-int sched_getscheduler(pid_t pid)  157
-int sched_yield(void)  158
-int sched_setparam(pid_t pid, const struct sched_param *param)  154
-int sched_getparam(pid_t pid, struct sched_param *param)  155
-int sched_get_priority_max(int policy)  159
-int sched_get_priority_min(int policy)  160
-int sched_rr_get_interval(pid_t pid, struct timespec *interval)  161
-int sched_setaffinity(pid_t pid, size_t setsize, const cpu_set_t* set) 241
-int __sched_getaffinity:sched_getaffinity(pid_t pid, size_t setsize, cpu_set_t* set)  242
-int __getcpu:getcpu(unsigned *cpu, unsigned *node, void *unused) 345,318
+int sched_setscheduler(pid_t pid, int policy, const struct sched_param *param)  156,156,160
+int sched_getscheduler(pid_t pid)  157,157,161
+int sched_yield(void)  158,158,162
+int sched_setparam(pid_t pid, const struct sched_param *param)  154,154,158
+int sched_getparam(pid_t pid, struct sched_param *param)  155,155,159
+int sched_get_priority_max(int policy)  159,159,163
+int sched_get_priority_min(int policy)  160,160,164
+int sched_rr_get_interval(pid_t pid, struct timespec *interval)  161,161,165
+int sched_setaffinity(pid_t pid, size_t setsize, const cpu_set_t* set) 241,241,239
+int __sched_getaffinity:sched_getaffinity(pid_t pid, size_t setsize, cpu_set_t* set)  242,242,240
+int __getcpu:getcpu(unsigned *cpu, unsigned *node, void *unused) 345,318,312
 
 # io priorities
-int ioprio_set(int which, int who, int ioprio) 314,289
-int ioprio_get(int which, int who) 315,290
+int ioprio_set(int which, int who, int ioprio) 314,289,314
+int ioprio_get(int which, int who) 315,290,315
 
 # other
 int     uname(struct utsname *)  122
@@ -265,21 +283,25 @@
 int     personality(unsigned long)  136
 
 # futex
-int	futex(void *, int, int, void *, void *, int) 240
+int	futex(void *, int, int, void *, void *, int) 240,240,238
 
 # epoll
-int     epoll_create(int size)     250,254
-int     epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)    251,255
-int     epoll_wait(int epfd, struct epoll_event *events, int max, int timeout)   252,256
+int     epoll_create(int size)     250,254,248
+int     epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)    251,255,249
+int     epoll_wait(int epfd, struct epoll_event *events, int max, int timeout)   252,256,250
 
-int     inotify_init(void)      316,291,290
-int     inotify_add_watch(int, const char *, unsigned int)  317,292
-int     inotify_rm_watch(int, unsigned int)  318,293
+int     inotify_init(void)      316,291,284
+int     inotify_add_watch(int, const char *, unsigned int)  317,292,285
+int     inotify_rm_watch(int, unsigned int)  318,293,286
 
-int     poll(struct pollfd *, unsigned int, long)  168
+int     poll(struct pollfd *, unsigned int, long)  168,168,188
 
-int     eventfd:eventfd2(unsigned int, int)  356,328
+int     eventfd:eventfd2(unsigned int, int)  356,328,325
 
 # ARM-specific ARM_NR_BASE == 0x0f0000 == 983040
-int     __set_tls:ARM_set_tls(void*)                                 983045,-1
-int     cacheflush:ARM_cacheflush(long start, long end, long flags)  983042,-1
+int     __set_tls:ARM_set_tls(void*)                                 983045,-1,-1
+int     cacheflush:ARM_cacheflush(long start, long end, long flags)  983042,-1,-1
+
+# MIPS-specific
+int	_flush_cache:cacheflush(char *addr, const int nbytes, const int op)	-1,-1,147
+int	syscall(int number,...) -1,-1,0
diff --git a/libc/arch-arm/bionic/crtbegin_dynamic.S b/libc/arch-arm/bionic/crtbegin_dynamic.S
index 0999084..6ca0845 100644
--- a/libc/arch-arm/bionic/crtbegin_dynamic.S
+++ b/libc/arch-arm/bionic/crtbegin_dynamic.S
@@ -43,21 +43,17 @@
 #    - address of an "onexit" function, not used on any
 #      platform supported by Bionic
 #
-#    - address of the "main" function of the program. We
-#      can't hard-code it in the adr pseudo instruction
-#      so we use a tiny trampoline that will get relocated
-#      by the dynamic linker before this code runs
+#    - address of the "main" function of the program.
 #
 #    - address of the constructor list
 #
 _start:	
 	mov	r0, sp
 	mov	r1, #0
-	adr r2, 0f
-	adr r3, 1f
-	b	__libc_init
-
-0:  b   main
+	ldr	r2, =main
+	adr	r3, 1f
+	ldr	r4, =__libc_init
+	bx	r4
 
 1:  .long   __PREINIT_ARRAY__
     .long   __INIT_ARRAY__
diff --git a/libc/arch-arm/bionic/crtbegin_static.S b/libc/arch-arm/bionic/crtbegin_static.S
index 13b05b2..e62ac1d 100644
--- a/libc/arch-arm/bionic/crtbegin_static.S
+++ b/libc/arch-arm/bionic/crtbegin_static.S
@@ -43,21 +43,17 @@
 #    - address of an "onexit" function, not used on any
 #      platform supported by Bionic
 #
-#    - address of the "main" function of the program. We
-#      can't hard-code it in the adr pseudo instruction
-#      so we use a tiny trampoline that will get relocated
-#      by the dynamic linker before this code runs
+#    - address of the "main" function of the program.
 #
 #    - address of the constructor list
 #
 _start:	
 	mov	r0, sp
 	mov	r1, #0
-	adr r2, 0f
-	adr r3, 1f
-	b	__libc_init
-
-0:  b   main
+	ldr	r2, =main
+	adr	r3, 1f
+	ldr	r4, =__libc_init
+	bx	r4
 
 1:  .long   __PREINIT_ARRAY__
     .long   __INIT_ARRAY__
diff --git a/libc/arch-mips/syscalls.mk b/libc/arch-mips/syscalls.mk
new file mode 100644
index 0000000..d4b70a4
--- /dev/null
+++ b/libc/arch-mips/syscalls.mk
@@ -0,0 +1,198 @@
+# auto-generated by gensyscalls.py, do not touch
+syscall_src := 
+syscall_src += arch-mips/syscalls/_exit.S
+syscall_src += arch-mips/syscalls/_exit_thread.S
+syscall_src += arch-mips/syscalls/__fork.S
+syscall_src += arch-mips/syscalls/_waitpid.S
+syscall_src += arch-mips/syscalls/__waitid.S
+syscall_src += arch-mips/syscalls/__sys_clone.S
+syscall_src += arch-mips/syscalls/execve.S
+syscall_src += arch-mips/syscalls/__setuid.S
+syscall_src += arch-mips/syscalls/getuid.S
+syscall_src += arch-mips/syscalls/getgid.S
+syscall_src += arch-mips/syscalls/geteuid.S
+syscall_src += arch-mips/syscalls/getegid.S
+syscall_src += arch-mips/syscalls/getresuid.S
+syscall_src += arch-mips/syscalls/getresgid.S
+syscall_src += arch-mips/syscalls/gettid.S
+syscall_src += arch-mips/syscalls/readahead.S
+syscall_src += arch-mips/syscalls/getgroups.S
+syscall_src += arch-mips/syscalls/getpgid.S
+syscall_src += arch-mips/syscalls/getppid.S
+syscall_src += arch-mips/syscalls/setsid.S
+syscall_src += arch-mips/syscalls/setgid.S
+syscall_src += arch-mips/syscalls/__setreuid.S
+syscall_src += arch-mips/syscalls/__setresuid.S
+syscall_src += arch-mips/syscalls/setresgid.S
+syscall_src += arch-mips/syscalls/__brk.S
+syscall_src += arch-mips/syscalls/kill.S
+syscall_src += arch-mips/syscalls/tkill.S
+syscall_src += arch-mips/syscalls/__ptrace.S
+syscall_src += arch-mips/syscalls/__set_thread_area.S
+syscall_src += arch-mips/syscalls/__getpriority.S
+syscall_src += arch-mips/syscalls/setpriority.S
+syscall_src += arch-mips/syscalls/setrlimit.S
+syscall_src += arch-mips/syscalls/getrlimit.S
+syscall_src += arch-mips/syscalls/getrusage.S
+syscall_src += arch-mips/syscalls/setgroups.S
+syscall_src += arch-mips/syscalls/setpgid.S
+syscall_src += arch-mips/syscalls/setregid.S
+syscall_src += arch-mips/syscalls/chroot.S
+syscall_src += arch-mips/syscalls/prctl.S
+syscall_src += arch-mips/syscalls/capget.S
+syscall_src += arch-mips/syscalls/capset.S
+syscall_src += arch-mips/syscalls/sigaltstack.S
+syscall_src += arch-mips/syscalls/acct.S
+syscall_src += arch-mips/syscalls/read.S
+syscall_src += arch-mips/syscalls/write.S
+syscall_src += arch-mips/syscalls/pread64.S
+syscall_src += arch-mips/syscalls/pwrite64.S
+syscall_src += arch-mips/syscalls/__open.S
+syscall_src += arch-mips/syscalls/__openat.S
+syscall_src += arch-mips/syscalls/close.S
+syscall_src += arch-mips/syscalls/lseek.S
+syscall_src += arch-mips/syscalls/__llseek.S
+syscall_src += arch-mips/syscalls/getpid.S
+syscall_src += arch-mips/syscalls/__mmap2.S
+syscall_src += arch-mips/syscalls/munmap.S
+syscall_src += arch-mips/syscalls/mremap.S
+syscall_src += arch-mips/syscalls/msync.S
+syscall_src += arch-mips/syscalls/mprotect.S
+syscall_src += arch-mips/syscalls/madvise.S
+syscall_src += arch-mips/syscalls/mlock.S
+syscall_src += arch-mips/syscalls/munlock.S
+syscall_src += arch-mips/syscalls/mincore.S
+syscall_src += arch-mips/syscalls/__ioctl.S
+syscall_src += arch-mips/syscalls/readv.S
+syscall_src += arch-mips/syscalls/writev.S
+syscall_src += arch-mips/syscalls/__fcntl.S
+syscall_src += arch-mips/syscalls/flock.S
+syscall_src += arch-mips/syscalls/fchmod.S
+syscall_src += arch-mips/syscalls/dup.S
+syscall_src += arch-mips/syscalls/pipe2.S
+syscall_src += arch-mips/syscalls/dup2.S
+syscall_src += arch-mips/syscalls/select.S
+syscall_src += arch-mips/syscalls/ftruncate.S
+syscall_src += arch-mips/syscalls/ftruncate64.S
+syscall_src += arch-mips/syscalls/getdents.S
+syscall_src += arch-mips/syscalls/fsync.S
+syscall_src += arch-mips/syscalls/fdatasync.S
+syscall_src += arch-mips/syscalls/fchown.S
+syscall_src += arch-mips/syscalls/sync.S
+syscall_src += arch-mips/syscalls/__fcntl64.S
+syscall_src += arch-mips/syscalls/__fstatfs64.S
+syscall_src += arch-mips/syscalls/sendfile.S
+syscall_src += arch-mips/syscalls/fstatat.S
+syscall_src += arch-mips/syscalls/mkdirat.S
+syscall_src += arch-mips/syscalls/fchownat.S
+syscall_src += arch-mips/syscalls/fchmodat.S
+syscall_src += arch-mips/syscalls/renameat.S
+syscall_src += arch-mips/syscalls/fsetxattr.S
+syscall_src += arch-mips/syscalls/fgetxattr.S
+syscall_src += arch-mips/syscalls/flistxattr.S
+syscall_src += arch-mips/syscalls/fremovexattr.S
+syscall_src += arch-mips/syscalls/link.S
+syscall_src += arch-mips/syscalls/unlink.S
+syscall_src += arch-mips/syscalls/unlinkat.S
+syscall_src += arch-mips/syscalls/chdir.S
+syscall_src += arch-mips/syscalls/mknod.S
+syscall_src += arch-mips/syscalls/chmod.S
+syscall_src += arch-mips/syscalls/chown.S
+syscall_src += arch-mips/syscalls/lchown.S
+syscall_src += arch-mips/syscalls/mount.S
+syscall_src += arch-mips/syscalls/umount2.S
+syscall_src += arch-mips/syscalls/fstat.S
+syscall_src += arch-mips/syscalls/stat.S
+syscall_src += arch-mips/syscalls/lstat.S
+syscall_src += arch-mips/syscalls/mkdir.S
+syscall_src += arch-mips/syscalls/readlink.S
+syscall_src += arch-mips/syscalls/rmdir.S
+syscall_src += arch-mips/syscalls/rename.S
+syscall_src += arch-mips/syscalls/__getcwd.S
+syscall_src += arch-mips/syscalls/access.S
+syscall_src += arch-mips/syscalls/symlink.S
+syscall_src += arch-mips/syscalls/fchdir.S
+syscall_src += arch-mips/syscalls/truncate.S
+syscall_src += arch-mips/syscalls/setxattr.S
+syscall_src += arch-mips/syscalls/lsetxattr.S
+syscall_src += arch-mips/syscalls/getxattr.S
+syscall_src += arch-mips/syscalls/lgetxattr.S
+syscall_src += arch-mips/syscalls/listxattr.S
+syscall_src += arch-mips/syscalls/llistxattr.S
+syscall_src += arch-mips/syscalls/removexattr.S
+syscall_src += arch-mips/syscalls/lremovexattr.S
+syscall_src += arch-mips/syscalls/__statfs64.S
+syscall_src += arch-mips/syscalls/pause.S
+syscall_src += arch-mips/syscalls/gettimeofday.S
+syscall_src += arch-mips/syscalls/settimeofday.S
+syscall_src += arch-mips/syscalls/times.S
+syscall_src += arch-mips/syscalls/nanosleep.S
+syscall_src += arch-mips/syscalls/clock_gettime.S
+syscall_src += arch-mips/syscalls/clock_settime.S
+syscall_src += arch-mips/syscalls/clock_getres.S
+syscall_src += arch-mips/syscalls/clock_nanosleep.S
+syscall_src += arch-mips/syscalls/getitimer.S
+syscall_src += arch-mips/syscalls/setitimer.S
+syscall_src += arch-mips/syscalls/__timer_create.S
+syscall_src += arch-mips/syscalls/__timer_settime.S
+syscall_src += arch-mips/syscalls/__timer_gettime.S
+syscall_src += arch-mips/syscalls/__timer_getoverrun.S
+syscall_src += arch-mips/syscalls/__timer_delete.S
+syscall_src += arch-mips/syscalls/utimes.S
+syscall_src += arch-mips/syscalls/utimensat.S
+syscall_src += arch-mips/syscalls/sigaction.S
+syscall_src += arch-mips/syscalls/sigprocmask.S
+syscall_src += arch-mips/syscalls/__sigsuspend.S
+syscall_src += arch-mips/syscalls/__rt_sigaction.S
+syscall_src += arch-mips/syscalls/__rt_sigprocmask.S
+syscall_src += arch-mips/syscalls/__rt_sigtimedwait.S
+syscall_src += arch-mips/syscalls/sigpending.S
+syscall_src += arch-mips/syscalls/socket.S
+syscall_src += arch-mips/syscalls/socketpair.S
+syscall_src += arch-mips/syscalls/bind.S
+syscall_src += arch-mips/syscalls/connect.S
+syscall_src += arch-mips/syscalls/listen.S
+syscall_src += arch-mips/syscalls/accept.S
+syscall_src += arch-mips/syscalls/getsockname.S
+syscall_src += arch-mips/syscalls/getpeername.S
+syscall_src += arch-mips/syscalls/sendto.S
+syscall_src += arch-mips/syscalls/recvfrom.S
+syscall_src += arch-mips/syscalls/shutdown.S
+syscall_src += arch-mips/syscalls/setsockopt.S
+syscall_src += arch-mips/syscalls/getsockopt.S
+syscall_src += arch-mips/syscalls/sendmsg.S
+syscall_src += arch-mips/syscalls/recvmsg.S
+syscall_src += arch-mips/syscalls/sched_setscheduler.S
+syscall_src += arch-mips/syscalls/sched_getscheduler.S
+syscall_src += arch-mips/syscalls/sched_yield.S
+syscall_src += arch-mips/syscalls/sched_setparam.S
+syscall_src += arch-mips/syscalls/sched_getparam.S
+syscall_src += arch-mips/syscalls/sched_get_priority_max.S
+syscall_src += arch-mips/syscalls/sched_get_priority_min.S
+syscall_src += arch-mips/syscalls/sched_rr_get_interval.S
+syscall_src += arch-mips/syscalls/sched_setaffinity.S
+syscall_src += arch-mips/syscalls/__sched_getaffinity.S
+syscall_src += arch-mips/syscalls/__getcpu.S
+syscall_src += arch-mips/syscalls/ioprio_set.S
+syscall_src += arch-mips/syscalls/ioprio_get.S
+syscall_src += arch-mips/syscalls/uname.S
+syscall_src += arch-mips/syscalls/__wait4.S
+syscall_src += arch-mips/syscalls/umask.S
+syscall_src += arch-mips/syscalls/__reboot.S
+syscall_src += arch-mips/syscalls/__syslog.S
+syscall_src += arch-mips/syscalls/init_module.S
+syscall_src += arch-mips/syscalls/delete_module.S
+syscall_src += arch-mips/syscalls/klogctl.S
+syscall_src += arch-mips/syscalls/sysinfo.S
+syscall_src += arch-mips/syscalls/personality.S
+syscall_src += arch-mips/syscalls/futex.S
+syscall_src += arch-mips/syscalls/epoll_create.S
+syscall_src += arch-mips/syscalls/epoll_ctl.S
+syscall_src += arch-mips/syscalls/epoll_wait.S
+syscall_src += arch-mips/syscalls/inotify_init.S
+syscall_src += arch-mips/syscalls/inotify_add_watch.S
+syscall_src += arch-mips/syscalls/inotify_rm_watch.S
+syscall_src += arch-mips/syscalls/poll.S
+syscall_src += arch-mips/syscalls/eventfd.S
+syscall_src += arch-mips/syscalls/_flush_cache.S
+syscall_src += arch-mips/syscalls/syscall.S
diff --git a/libc/arch-mips/syscalls/__brk.S b/libc/arch-mips/syscalls/__brk.S
new file mode 100644
index 0000000..1f09772
--- /dev/null
+++ b/libc/arch-mips/syscalls/__brk.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __brk
+    .align 4
+    .ent __brk
+
+__brk:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_brk
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __brk
diff --git a/libc/arch-mips/syscalls/__fcntl.S b/libc/arch-mips/syscalls/__fcntl.S
new file mode 100644
index 0000000..6dd76e3
--- /dev/null
+++ b/libc/arch-mips/syscalls/__fcntl.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __fcntl
+    .align 4
+    .ent __fcntl
+
+__fcntl:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fcntl
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __fcntl
diff --git a/libc/arch-mips/syscalls/__fcntl64.S b/libc/arch-mips/syscalls/__fcntl64.S
new file mode 100644
index 0000000..e82e382
--- /dev/null
+++ b/libc/arch-mips/syscalls/__fcntl64.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __fcntl64
+    .align 4
+    .ent __fcntl64
+
+__fcntl64:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fcntl64
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __fcntl64
diff --git a/libc/arch-mips/syscalls/__fork.S b/libc/arch-mips/syscalls/__fork.S
new file mode 100644
index 0000000..db30472
--- /dev/null
+++ b/libc/arch-mips/syscalls/__fork.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __fork
+    .align 4
+    .ent __fork
+
+__fork:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fork
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __fork
diff --git a/libc/arch-mips/syscalls/__fstatfs64.S b/libc/arch-mips/syscalls/__fstatfs64.S
new file mode 100644
index 0000000..6485d10
--- /dev/null
+++ b/libc/arch-mips/syscalls/__fstatfs64.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __fstatfs64
+    .align 4
+    .ent __fstatfs64
+
+__fstatfs64:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fstatfs64
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __fstatfs64
diff --git a/libc/arch-mips/syscalls/__getcpu.S b/libc/arch-mips/syscalls/__getcpu.S
new file mode 100644
index 0000000..90d59f4
--- /dev/null
+++ b/libc/arch-mips/syscalls/__getcpu.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __getcpu
+    .align 4
+    .ent __getcpu
+
+__getcpu:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getcpu
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __getcpu
diff --git a/libc/arch-mips/syscalls/__getcwd.S b/libc/arch-mips/syscalls/__getcwd.S
new file mode 100644
index 0000000..e8fa340
--- /dev/null
+++ b/libc/arch-mips/syscalls/__getcwd.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __getcwd
+    .align 4
+    .ent __getcwd
+
+__getcwd:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getcwd
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __getcwd
diff --git a/libc/arch-mips/syscalls/__getpriority.S b/libc/arch-mips/syscalls/__getpriority.S
new file mode 100644
index 0000000..7cabd31
--- /dev/null
+++ b/libc/arch-mips/syscalls/__getpriority.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __getpriority
+    .align 4
+    .ent __getpriority
+
+__getpriority:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getpriority
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __getpriority
diff --git a/libc/arch-mips/syscalls/__ioctl.S b/libc/arch-mips/syscalls/__ioctl.S
new file mode 100644
index 0000000..2524e02
--- /dev/null
+++ b/libc/arch-mips/syscalls/__ioctl.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __ioctl
+    .align 4
+    .ent __ioctl
+
+__ioctl:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_ioctl
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __ioctl
diff --git a/libc/arch-mips/syscalls/__llseek.S b/libc/arch-mips/syscalls/__llseek.S
new file mode 100644
index 0000000..fbfd583
--- /dev/null
+++ b/libc/arch-mips/syscalls/__llseek.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __llseek
+    .align 4
+    .ent __llseek
+
+__llseek:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR__llseek
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __llseek
diff --git a/libc/arch-mips/syscalls/__mmap2.S b/libc/arch-mips/syscalls/__mmap2.S
new file mode 100644
index 0000000..98f97eb
--- /dev/null
+++ b/libc/arch-mips/syscalls/__mmap2.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __mmap2
+    .align 4
+    .ent __mmap2
+
+__mmap2:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_mmap2
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __mmap2
diff --git a/libc/arch-mips/syscalls/__open.S b/libc/arch-mips/syscalls/__open.S
new file mode 100644
index 0000000..0ccb286
--- /dev/null
+++ b/libc/arch-mips/syscalls/__open.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __open
+    .align 4
+    .ent __open
+
+__open:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_open
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __open
diff --git a/libc/arch-mips/syscalls/__openat.S b/libc/arch-mips/syscalls/__openat.S
new file mode 100644
index 0000000..04399b4
--- /dev/null
+++ b/libc/arch-mips/syscalls/__openat.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __openat
+    .align 4
+    .ent __openat
+
+__openat:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_openat
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __openat
diff --git a/libc/arch-mips/syscalls/__ptrace.S b/libc/arch-mips/syscalls/__ptrace.S
new file mode 100644
index 0000000..0bcba9f
--- /dev/null
+++ b/libc/arch-mips/syscalls/__ptrace.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __ptrace
+    .align 4
+    .ent __ptrace
+
+__ptrace:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_ptrace
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __ptrace
diff --git a/libc/arch-mips/syscalls/__reboot.S b/libc/arch-mips/syscalls/__reboot.S
new file mode 100644
index 0000000..5e8e57a
--- /dev/null
+++ b/libc/arch-mips/syscalls/__reboot.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __reboot
+    .align 4
+    .ent __reboot
+
+__reboot:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_reboot
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __reboot
diff --git a/libc/arch-mips/syscalls/__rt_sigaction.S b/libc/arch-mips/syscalls/__rt_sigaction.S
new file mode 100644
index 0000000..43a571a
--- /dev/null
+++ b/libc/arch-mips/syscalls/__rt_sigaction.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __rt_sigaction
+    .align 4
+    .ent __rt_sigaction
+
+__rt_sigaction:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_rt_sigaction
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __rt_sigaction
diff --git a/libc/arch-mips/syscalls/__rt_sigprocmask.S b/libc/arch-mips/syscalls/__rt_sigprocmask.S
new file mode 100644
index 0000000..59a8894
--- /dev/null
+++ b/libc/arch-mips/syscalls/__rt_sigprocmask.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __rt_sigprocmask
+    .align 4
+    .ent __rt_sigprocmask
+
+__rt_sigprocmask:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_rt_sigprocmask
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __rt_sigprocmask
diff --git a/libc/arch-mips/syscalls/__rt_sigtimedwait.S b/libc/arch-mips/syscalls/__rt_sigtimedwait.S
new file mode 100644
index 0000000..dae872e
--- /dev/null
+++ b/libc/arch-mips/syscalls/__rt_sigtimedwait.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __rt_sigtimedwait
+    .align 4
+    .ent __rt_sigtimedwait
+
+__rt_sigtimedwait:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_rt_sigtimedwait
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __rt_sigtimedwait
diff --git a/libc/arch-mips/syscalls/__sched_getaffinity.S b/libc/arch-mips/syscalls/__sched_getaffinity.S
new file mode 100644
index 0000000..cc01ff2
--- /dev/null
+++ b/libc/arch-mips/syscalls/__sched_getaffinity.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __sched_getaffinity
+    .align 4
+    .ent __sched_getaffinity
+
+__sched_getaffinity:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sched_getaffinity
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __sched_getaffinity
diff --git a/libc/arch-mips/syscalls/__set_thread_area.S b/libc/arch-mips/syscalls/__set_thread_area.S
new file mode 100644
index 0000000..2aac901
--- /dev/null
+++ b/libc/arch-mips/syscalls/__set_thread_area.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __set_thread_area
+    .align 4
+    .ent __set_thread_area
+
+__set_thread_area:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_set_thread_area
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __set_thread_area
diff --git a/libc/arch-mips/syscalls/__setresuid.S b/libc/arch-mips/syscalls/__setresuid.S
new file mode 100644
index 0000000..e00d2e1
--- /dev/null
+++ b/libc/arch-mips/syscalls/__setresuid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __setresuid
+    .align 4
+    .ent __setresuid
+
+__setresuid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_setresuid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __setresuid
diff --git a/libc/arch-mips/syscalls/__setreuid.S b/libc/arch-mips/syscalls/__setreuid.S
new file mode 100644
index 0000000..b45f3fc
--- /dev/null
+++ b/libc/arch-mips/syscalls/__setreuid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __setreuid
+    .align 4
+    .ent __setreuid
+
+__setreuid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_setreuid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __setreuid
diff --git a/libc/arch-mips/syscalls/__setuid.S b/libc/arch-mips/syscalls/__setuid.S
new file mode 100644
index 0000000..c221526
--- /dev/null
+++ b/libc/arch-mips/syscalls/__setuid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __setuid
+    .align 4
+    .ent __setuid
+
+__setuid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_setuid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __setuid
diff --git a/libc/arch-mips/syscalls/__sigsuspend.S b/libc/arch-mips/syscalls/__sigsuspend.S
new file mode 100644
index 0000000..b622efe
--- /dev/null
+++ b/libc/arch-mips/syscalls/__sigsuspend.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __sigsuspend
+    .align 4
+    .ent __sigsuspend
+
+__sigsuspend:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sigsuspend
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __sigsuspend
diff --git a/libc/arch-mips/syscalls/__statfs64.S b/libc/arch-mips/syscalls/__statfs64.S
new file mode 100644
index 0000000..4d1b17f
--- /dev/null
+++ b/libc/arch-mips/syscalls/__statfs64.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __statfs64
+    .align 4
+    .ent __statfs64
+
+__statfs64:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_statfs64
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __statfs64
diff --git a/libc/arch-mips/syscalls/__sys_clone.S b/libc/arch-mips/syscalls/__sys_clone.S
new file mode 100644
index 0000000..3451e02
--- /dev/null
+++ b/libc/arch-mips/syscalls/__sys_clone.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __sys_clone
+    .align 4
+    .ent __sys_clone
+
+__sys_clone:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_clone
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __sys_clone
diff --git a/libc/arch-mips/syscalls/__syslog.S b/libc/arch-mips/syscalls/__syslog.S
new file mode 100644
index 0000000..bdc194d
--- /dev/null
+++ b/libc/arch-mips/syscalls/__syslog.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __syslog
+    .align 4
+    .ent __syslog
+
+__syslog:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_syslog
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __syslog
diff --git a/libc/arch-mips/syscalls/__timer_create.S b/libc/arch-mips/syscalls/__timer_create.S
new file mode 100644
index 0000000..140d0d1
--- /dev/null
+++ b/libc/arch-mips/syscalls/__timer_create.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __timer_create
+    .align 4
+    .ent __timer_create
+
+__timer_create:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_timer_create
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __timer_create
diff --git a/libc/arch-mips/syscalls/__timer_delete.S b/libc/arch-mips/syscalls/__timer_delete.S
new file mode 100644
index 0000000..4f362e6
--- /dev/null
+++ b/libc/arch-mips/syscalls/__timer_delete.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __timer_delete
+    .align 4
+    .ent __timer_delete
+
+__timer_delete:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_timer_delete
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __timer_delete
diff --git a/libc/arch-mips/syscalls/__timer_getoverrun.S b/libc/arch-mips/syscalls/__timer_getoverrun.S
new file mode 100644
index 0000000..68afa1c
--- /dev/null
+++ b/libc/arch-mips/syscalls/__timer_getoverrun.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __timer_getoverrun
+    .align 4
+    .ent __timer_getoverrun
+
+__timer_getoverrun:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_timer_getoverrun
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __timer_getoverrun
diff --git a/libc/arch-mips/syscalls/__timer_gettime.S b/libc/arch-mips/syscalls/__timer_gettime.S
new file mode 100644
index 0000000..4a70da6
--- /dev/null
+++ b/libc/arch-mips/syscalls/__timer_gettime.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __timer_gettime
+    .align 4
+    .ent __timer_gettime
+
+__timer_gettime:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_timer_gettime
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __timer_gettime
diff --git a/libc/arch-mips/syscalls/__timer_settime.S b/libc/arch-mips/syscalls/__timer_settime.S
new file mode 100644
index 0000000..ad84606
--- /dev/null
+++ b/libc/arch-mips/syscalls/__timer_settime.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __timer_settime
+    .align 4
+    .ent __timer_settime
+
+__timer_settime:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_timer_settime
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __timer_settime
diff --git a/libc/arch-mips/syscalls/__wait4.S b/libc/arch-mips/syscalls/__wait4.S
new file mode 100644
index 0000000..713b7cb
--- /dev/null
+++ b/libc/arch-mips/syscalls/__wait4.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __wait4
+    .align 4
+    .ent __wait4
+
+__wait4:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_wait4
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __wait4
diff --git a/libc/arch-mips/syscalls/__waitid.S b/libc/arch-mips/syscalls/__waitid.S
new file mode 100644
index 0000000..83e5aa3
--- /dev/null
+++ b/libc/arch-mips/syscalls/__waitid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl __waitid
+    .align 4
+    .ent __waitid
+
+__waitid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_waitid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end __waitid
diff --git a/libc/arch-mips/syscalls/_exit.S b/libc/arch-mips/syscalls/_exit.S
new file mode 100644
index 0000000..b7ec876
--- /dev/null
+++ b/libc/arch-mips/syscalls/_exit.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl _exit
+    .align 4
+    .ent _exit
+
+_exit:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_exit_group
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end _exit
diff --git a/libc/arch-mips/syscalls/_exit_thread.S b/libc/arch-mips/syscalls/_exit_thread.S
new file mode 100644
index 0000000..0af9d3d
--- /dev/null
+++ b/libc/arch-mips/syscalls/_exit_thread.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl _exit_thread
+    .align 4
+    .ent _exit_thread
+
+_exit_thread:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_exit
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end _exit_thread
diff --git a/libc/arch-mips/syscalls/_flush_cache.S b/libc/arch-mips/syscalls/_flush_cache.S
new file mode 100644
index 0000000..07dee66
--- /dev/null
+++ b/libc/arch-mips/syscalls/_flush_cache.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl _flush_cache
+    .align 4
+    .ent _flush_cache
+
+_flush_cache:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_cacheflush
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end _flush_cache
diff --git a/libc/arch-mips/syscalls/_waitpid.S b/libc/arch-mips/syscalls/_waitpid.S
new file mode 100644
index 0000000..20db3a8
--- /dev/null
+++ b/libc/arch-mips/syscalls/_waitpid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl _waitpid
+    .align 4
+    .ent _waitpid
+
+_waitpid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_waitpid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end _waitpid
diff --git a/libc/arch-mips/syscalls/accept.S b/libc/arch-mips/syscalls/accept.S
new file mode 100644
index 0000000..c19216e
--- /dev/null
+++ b/libc/arch-mips/syscalls/accept.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl accept
+    .align 4
+    .ent accept
+
+accept:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_accept
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end accept
diff --git a/libc/arch-mips/syscalls/access.S b/libc/arch-mips/syscalls/access.S
new file mode 100644
index 0000000..a22ed92
--- /dev/null
+++ b/libc/arch-mips/syscalls/access.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl access
+    .align 4
+    .ent access
+
+access:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_access
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end access
diff --git a/libc/arch-mips/syscalls/acct.S b/libc/arch-mips/syscalls/acct.S
new file mode 100644
index 0000000..5235ede
--- /dev/null
+++ b/libc/arch-mips/syscalls/acct.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl acct
+    .align 4
+    .ent acct
+
+acct:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_acct
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end acct
diff --git a/libc/arch-mips/syscalls/bind.S b/libc/arch-mips/syscalls/bind.S
new file mode 100644
index 0000000..4f00922
--- /dev/null
+++ b/libc/arch-mips/syscalls/bind.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl bind
+    .align 4
+    .ent bind
+
+bind:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_bind
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end bind
diff --git a/libc/arch-mips/syscalls/capget.S b/libc/arch-mips/syscalls/capget.S
new file mode 100644
index 0000000..80e55ee
--- /dev/null
+++ b/libc/arch-mips/syscalls/capget.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl capget
+    .align 4
+    .ent capget
+
+capget:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_capget
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end capget
diff --git a/libc/arch-mips/syscalls/capset.S b/libc/arch-mips/syscalls/capset.S
new file mode 100644
index 0000000..0e4a3be
--- /dev/null
+++ b/libc/arch-mips/syscalls/capset.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl capset
+    .align 4
+    .ent capset
+
+capset:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_capset
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end capset
diff --git a/libc/arch-mips/syscalls/chdir.S b/libc/arch-mips/syscalls/chdir.S
new file mode 100644
index 0000000..42eb37c
--- /dev/null
+++ b/libc/arch-mips/syscalls/chdir.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl chdir
+    .align 4
+    .ent chdir
+
+chdir:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_chdir
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end chdir
diff --git a/libc/arch-mips/syscalls/chmod.S b/libc/arch-mips/syscalls/chmod.S
new file mode 100644
index 0000000..7ea3ede
--- /dev/null
+++ b/libc/arch-mips/syscalls/chmod.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl chmod
+    .align 4
+    .ent chmod
+
+chmod:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_chmod
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end chmod
diff --git a/libc/arch-mips/syscalls/chown.S b/libc/arch-mips/syscalls/chown.S
new file mode 100644
index 0000000..5fb1e32
--- /dev/null
+++ b/libc/arch-mips/syscalls/chown.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl chown
+    .align 4
+    .ent chown
+
+chown:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_chown
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end chown
diff --git a/libc/arch-mips/syscalls/chroot.S b/libc/arch-mips/syscalls/chroot.S
new file mode 100644
index 0000000..fafb6a2
--- /dev/null
+++ b/libc/arch-mips/syscalls/chroot.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl chroot
+    .align 4
+    .ent chroot
+
+chroot:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_chroot
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end chroot
diff --git a/libc/arch-mips/syscalls/clock_getres.S b/libc/arch-mips/syscalls/clock_getres.S
new file mode 100644
index 0000000..4021b14
--- /dev/null
+++ b/libc/arch-mips/syscalls/clock_getres.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl clock_getres
+    .align 4
+    .ent clock_getres
+
+clock_getres:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_clock_getres
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end clock_getres
diff --git a/libc/arch-mips/syscalls/clock_gettime.S b/libc/arch-mips/syscalls/clock_gettime.S
new file mode 100644
index 0000000..ba7afb9
--- /dev/null
+++ b/libc/arch-mips/syscalls/clock_gettime.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl clock_gettime
+    .align 4
+    .ent clock_gettime
+
+clock_gettime:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_clock_gettime
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end clock_gettime
diff --git a/libc/arch-mips/syscalls/clock_nanosleep.S b/libc/arch-mips/syscalls/clock_nanosleep.S
new file mode 100644
index 0000000..1a9aef5
--- /dev/null
+++ b/libc/arch-mips/syscalls/clock_nanosleep.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl clock_nanosleep
+    .align 4
+    .ent clock_nanosleep
+
+clock_nanosleep:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_clock_nanosleep
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end clock_nanosleep
diff --git a/libc/arch-mips/syscalls/clock_settime.S b/libc/arch-mips/syscalls/clock_settime.S
new file mode 100644
index 0000000..65d4388
--- /dev/null
+++ b/libc/arch-mips/syscalls/clock_settime.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl clock_settime
+    .align 4
+    .ent clock_settime
+
+clock_settime:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_clock_settime
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end clock_settime
diff --git a/libc/arch-mips/syscalls/close.S b/libc/arch-mips/syscalls/close.S
new file mode 100644
index 0000000..07ff4e3
--- /dev/null
+++ b/libc/arch-mips/syscalls/close.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl close
+    .align 4
+    .ent close
+
+close:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_close
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end close
diff --git a/libc/arch-mips/syscalls/connect.S b/libc/arch-mips/syscalls/connect.S
new file mode 100644
index 0000000..d1fc280
--- /dev/null
+++ b/libc/arch-mips/syscalls/connect.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl connect
+    .align 4
+    .ent connect
+
+connect:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_connect
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end connect
diff --git a/libc/arch-mips/syscalls/delete_module.S b/libc/arch-mips/syscalls/delete_module.S
new file mode 100644
index 0000000..db80ba7
--- /dev/null
+++ b/libc/arch-mips/syscalls/delete_module.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl delete_module
+    .align 4
+    .ent delete_module
+
+delete_module:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_delete_module
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end delete_module
diff --git a/libc/arch-mips/syscalls/dup.S b/libc/arch-mips/syscalls/dup.S
new file mode 100644
index 0000000..e815749
--- /dev/null
+++ b/libc/arch-mips/syscalls/dup.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl dup
+    .align 4
+    .ent dup
+
+dup:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_dup
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end dup
diff --git a/libc/arch-mips/syscalls/dup2.S b/libc/arch-mips/syscalls/dup2.S
new file mode 100644
index 0000000..6fb151f
--- /dev/null
+++ b/libc/arch-mips/syscalls/dup2.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl dup2
+    .align 4
+    .ent dup2
+
+dup2:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_dup2
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end dup2
diff --git a/libc/arch-mips/syscalls/epoll_create.S b/libc/arch-mips/syscalls/epoll_create.S
new file mode 100644
index 0000000..e5e420b
--- /dev/null
+++ b/libc/arch-mips/syscalls/epoll_create.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl epoll_create
+    .align 4
+    .ent epoll_create
+
+epoll_create:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_epoll_create
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end epoll_create
diff --git a/libc/arch-mips/syscalls/epoll_ctl.S b/libc/arch-mips/syscalls/epoll_ctl.S
new file mode 100644
index 0000000..f4f9f05
--- /dev/null
+++ b/libc/arch-mips/syscalls/epoll_ctl.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl epoll_ctl
+    .align 4
+    .ent epoll_ctl
+
+epoll_ctl:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_epoll_ctl
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end epoll_ctl
diff --git a/libc/arch-mips/syscalls/epoll_wait.S b/libc/arch-mips/syscalls/epoll_wait.S
new file mode 100644
index 0000000..64d33fb
--- /dev/null
+++ b/libc/arch-mips/syscalls/epoll_wait.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl epoll_wait
+    .align 4
+    .ent epoll_wait
+
+epoll_wait:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_epoll_wait
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end epoll_wait
diff --git a/libc/arch-mips/syscalls/eventfd.S b/libc/arch-mips/syscalls/eventfd.S
new file mode 100644
index 0000000..5c4e22b
--- /dev/null
+++ b/libc/arch-mips/syscalls/eventfd.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl eventfd
+    .align 4
+    .ent eventfd
+
+eventfd:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_eventfd2
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end eventfd
diff --git a/libc/arch-mips/syscalls/execve.S b/libc/arch-mips/syscalls/execve.S
new file mode 100644
index 0000000..31b9276
--- /dev/null
+++ b/libc/arch-mips/syscalls/execve.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl execve
+    .align 4
+    .ent execve
+
+execve:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_execve
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end execve
diff --git a/libc/arch-mips/syscalls/fchdir.S b/libc/arch-mips/syscalls/fchdir.S
new file mode 100644
index 0000000..93298bd
--- /dev/null
+++ b/libc/arch-mips/syscalls/fchdir.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl fchdir
+    .align 4
+    .ent fchdir
+
+fchdir:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fchdir
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end fchdir
diff --git a/libc/arch-mips/syscalls/fchmod.S b/libc/arch-mips/syscalls/fchmod.S
new file mode 100644
index 0000000..f483567
--- /dev/null
+++ b/libc/arch-mips/syscalls/fchmod.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl fchmod
+    .align 4
+    .ent fchmod
+
+fchmod:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fchmod
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end fchmod
diff --git a/libc/arch-mips/syscalls/fchmodat.S b/libc/arch-mips/syscalls/fchmodat.S
new file mode 100644
index 0000000..8ba1bbe
--- /dev/null
+++ b/libc/arch-mips/syscalls/fchmodat.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl fchmodat
+    .align 4
+    .ent fchmodat
+
+fchmodat:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fchmodat
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end fchmodat
diff --git a/libc/arch-mips/syscalls/fchown.S b/libc/arch-mips/syscalls/fchown.S
new file mode 100644
index 0000000..24fc08f
--- /dev/null
+++ b/libc/arch-mips/syscalls/fchown.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl fchown
+    .align 4
+    .ent fchown
+
+fchown:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fchown
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end fchown
diff --git a/libc/arch-mips/syscalls/fchownat.S b/libc/arch-mips/syscalls/fchownat.S
new file mode 100644
index 0000000..983ed56
--- /dev/null
+++ b/libc/arch-mips/syscalls/fchownat.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl fchownat
+    .align 4
+    .ent fchownat
+
+fchownat:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fchownat
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end fchownat
diff --git a/libc/arch-mips/syscalls/fdatasync.S b/libc/arch-mips/syscalls/fdatasync.S
new file mode 100644
index 0000000..312439d
--- /dev/null
+++ b/libc/arch-mips/syscalls/fdatasync.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl fdatasync
+    .align 4
+    .ent fdatasync
+
+fdatasync:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fdatasync
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end fdatasync
diff --git a/libc/arch-mips/syscalls/fgetxattr.S b/libc/arch-mips/syscalls/fgetxattr.S
new file mode 100644
index 0000000..7e77e61
--- /dev/null
+++ b/libc/arch-mips/syscalls/fgetxattr.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl fgetxattr
+    .align 4
+    .ent fgetxattr
+
+fgetxattr:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fgetxattr
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end fgetxattr
diff --git a/libc/arch-mips/syscalls/flistxattr.S b/libc/arch-mips/syscalls/flistxattr.S
new file mode 100644
index 0000000..26e26e1
--- /dev/null
+++ b/libc/arch-mips/syscalls/flistxattr.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl flistxattr
+    .align 4
+    .ent flistxattr
+
+flistxattr:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_flistxattr
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end flistxattr
diff --git a/libc/arch-mips/syscalls/flock.S b/libc/arch-mips/syscalls/flock.S
new file mode 100644
index 0000000..f9a3142
--- /dev/null
+++ b/libc/arch-mips/syscalls/flock.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl flock
+    .align 4
+    .ent flock
+
+flock:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_flock
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end flock
diff --git a/libc/arch-mips/syscalls/fremovexattr.S b/libc/arch-mips/syscalls/fremovexattr.S
new file mode 100644
index 0000000..db3eb6c
--- /dev/null
+++ b/libc/arch-mips/syscalls/fremovexattr.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl fremovexattr
+    .align 4
+    .ent fremovexattr
+
+fremovexattr:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fremovexattr
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end fremovexattr
diff --git a/libc/arch-mips/syscalls/fsetxattr.S b/libc/arch-mips/syscalls/fsetxattr.S
new file mode 100644
index 0000000..9e666f2
--- /dev/null
+++ b/libc/arch-mips/syscalls/fsetxattr.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl fsetxattr
+    .align 4
+    .ent fsetxattr
+
+fsetxattr:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fsetxattr
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end fsetxattr
diff --git a/libc/arch-mips/syscalls/fstat.S b/libc/arch-mips/syscalls/fstat.S
new file mode 100644
index 0000000..73d7235
--- /dev/null
+++ b/libc/arch-mips/syscalls/fstat.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl fstat
+    .align 4
+    .ent fstat
+
+fstat:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fstat64
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end fstat
diff --git a/libc/arch-mips/syscalls/fstatat.S b/libc/arch-mips/syscalls/fstatat.S
new file mode 100644
index 0000000..3ae229e
--- /dev/null
+++ b/libc/arch-mips/syscalls/fstatat.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl fstatat
+    .align 4
+    .ent fstatat
+
+fstatat:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fstatat64
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end fstatat
diff --git a/libc/arch-mips/syscalls/fsync.S b/libc/arch-mips/syscalls/fsync.S
new file mode 100644
index 0000000..e1cd316
--- /dev/null
+++ b/libc/arch-mips/syscalls/fsync.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl fsync
+    .align 4
+    .ent fsync
+
+fsync:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_fsync
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end fsync
diff --git a/libc/arch-mips/syscalls/ftruncate.S b/libc/arch-mips/syscalls/ftruncate.S
new file mode 100644
index 0000000..5d72b43
--- /dev/null
+++ b/libc/arch-mips/syscalls/ftruncate.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl ftruncate
+    .align 4
+    .ent ftruncate
+
+ftruncate:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_ftruncate
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end ftruncate
diff --git a/libc/arch-mips/syscalls/ftruncate64.S b/libc/arch-mips/syscalls/ftruncate64.S
new file mode 100644
index 0000000..216ae38
--- /dev/null
+++ b/libc/arch-mips/syscalls/ftruncate64.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl ftruncate64
+    .align 4
+    .ent ftruncate64
+
+ftruncate64:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_ftruncate64
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end ftruncate64
diff --git a/libc/arch-mips/syscalls/futex.S b/libc/arch-mips/syscalls/futex.S
new file mode 100644
index 0000000..65e537a
--- /dev/null
+++ b/libc/arch-mips/syscalls/futex.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl futex
+    .align 4
+    .ent futex
+
+futex:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_futex
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end futex
diff --git a/libc/arch-mips/syscalls/getdents.S b/libc/arch-mips/syscalls/getdents.S
new file mode 100644
index 0000000..8ff0f26
--- /dev/null
+++ b/libc/arch-mips/syscalls/getdents.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getdents
+    .align 4
+    .ent getdents
+
+getdents:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getdents64
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getdents
diff --git a/libc/arch-mips/syscalls/getegid.S b/libc/arch-mips/syscalls/getegid.S
new file mode 100644
index 0000000..dc2a04b
--- /dev/null
+++ b/libc/arch-mips/syscalls/getegid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getegid
+    .align 4
+    .ent getegid
+
+getegid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getegid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getegid
diff --git a/libc/arch-mips/syscalls/geteuid.S b/libc/arch-mips/syscalls/geteuid.S
new file mode 100644
index 0000000..8beca88
--- /dev/null
+++ b/libc/arch-mips/syscalls/geteuid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl geteuid
+    .align 4
+    .ent geteuid
+
+geteuid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_geteuid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end geteuid
diff --git a/libc/arch-mips/syscalls/getgid.S b/libc/arch-mips/syscalls/getgid.S
new file mode 100644
index 0000000..6a615ee
--- /dev/null
+++ b/libc/arch-mips/syscalls/getgid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getgid
+    .align 4
+    .ent getgid
+
+getgid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getgid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getgid
diff --git a/libc/arch-mips/syscalls/getgroups.S b/libc/arch-mips/syscalls/getgroups.S
new file mode 100644
index 0000000..8be7308
--- /dev/null
+++ b/libc/arch-mips/syscalls/getgroups.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getgroups
+    .align 4
+    .ent getgroups
+
+getgroups:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getgroups
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getgroups
diff --git a/libc/arch-mips/syscalls/getitimer.S b/libc/arch-mips/syscalls/getitimer.S
new file mode 100644
index 0000000..3df1d11
--- /dev/null
+++ b/libc/arch-mips/syscalls/getitimer.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getitimer
+    .align 4
+    .ent getitimer
+
+getitimer:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getitimer
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getitimer
diff --git a/libc/arch-mips/syscalls/getpeername.S b/libc/arch-mips/syscalls/getpeername.S
new file mode 100644
index 0000000..4a3beda
--- /dev/null
+++ b/libc/arch-mips/syscalls/getpeername.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getpeername
+    .align 4
+    .ent getpeername
+
+getpeername:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getpeername
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getpeername
diff --git a/libc/arch-mips/syscalls/getpgid.S b/libc/arch-mips/syscalls/getpgid.S
new file mode 100644
index 0000000..fd79351
--- /dev/null
+++ b/libc/arch-mips/syscalls/getpgid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getpgid
+    .align 4
+    .ent getpgid
+
+getpgid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getpgid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getpgid
diff --git a/libc/arch-mips/syscalls/getpid.S b/libc/arch-mips/syscalls/getpid.S
new file mode 100644
index 0000000..f52e31b
--- /dev/null
+++ b/libc/arch-mips/syscalls/getpid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getpid
+    .align 4
+    .ent getpid
+
+getpid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getpid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getpid
diff --git a/libc/arch-mips/syscalls/getppid.S b/libc/arch-mips/syscalls/getppid.S
new file mode 100644
index 0000000..5e3fe19
--- /dev/null
+++ b/libc/arch-mips/syscalls/getppid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getppid
+    .align 4
+    .ent getppid
+
+getppid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getppid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getppid
diff --git a/libc/arch-mips/syscalls/getresgid.S b/libc/arch-mips/syscalls/getresgid.S
new file mode 100644
index 0000000..8e9bf39
--- /dev/null
+++ b/libc/arch-mips/syscalls/getresgid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getresgid
+    .align 4
+    .ent getresgid
+
+getresgid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getresgid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getresgid
diff --git a/libc/arch-mips/syscalls/getresuid.S b/libc/arch-mips/syscalls/getresuid.S
new file mode 100644
index 0000000..ff27cde
--- /dev/null
+++ b/libc/arch-mips/syscalls/getresuid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getresuid
+    .align 4
+    .ent getresuid
+
+getresuid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getresuid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getresuid
diff --git a/libc/arch-mips/syscalls/getrlimit.S b/libc/arch-mips/syscalls/getrlimit.S
new file mode 100644
index 0000000..72ed7d8
--- /dev/null
+++ b/libc/arch-mips/syscalls/getrlimit.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getrlimit
+    .align 4
+    .ent getrlimit
+
+getrlimit:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getrlimit
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getrlimit
diff --git a/libc/arch-mips/syscalls/getrusage.S b/libc/arch-mips/syscalls/getrusage.S
new file mode 100644
index 0000000..4c0f7d0
--- /dev/null
+++ b/libc/arch-mips/syscalls/getrusage.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getrusage
+    .align 4
+    .ent getrusage
+
+getrusage:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getrusage
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getrusage
diff --git a/libc/arch-mips/syscalls/getsockname.S b/libc/arch-mips/syscalls/getsockname.S
new file mode 100644
index 0000000..e3d41f9
--- /dev/null
+++ b/libc/arch-mips/syscalls/getsockname.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getsockname
+    .align 4
+    .ent getsockname
+
+getsockname:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getsockname
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getsockname
diff --git a/libc/arch-mips/syscalls/getsockopt.S b/libc/arch-mips/syscalls/getsockopt.S
new file mode 100644
index 0000000..d55ae6a
--- /dev/null
+++ b/libc/arch-mips/syscalls/getsockopt.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getsockopt
+    .align 4
+    .ent getsockopt
+
+getsockopt:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getsockopt
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getsockopt
diff --git a/libc/arch-mips/syscalls/gettid.S b/libc/arch-mips/syscalls/gettid.S
new file mode 100644
index 0000000..de37df5
--- /dev/null
+++ b/libc/arch-mips/syscalls/gettid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl gettid
+    .align 4
+    .ent gettid
+
+gettid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_gettid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end gettid
diff --git a/libc/arch-mips/syscalls/gettimeofday.S b/libc/arch-mips/syscalls/gettimeofday.S
new file mode 100644
index 0000000..a9a93a5
--- /dev/null
+++ b/libc/arch-mips/syscalls/gettimeofday.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl gettimeofday
+    .align 4
+    .ent gettimeofday
+
+gettimeofday:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_gettimeofday
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end gettimeofday
diff --git a/libc/arch-mips/syscalls/getuid.S b/libc/arch-mips/syscalls/getuid.S
new file mode 100644
index 0000000..e5ba698
--- /dev/null
+++ b/libc/arch-mips/syscalls/getuid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getuid
+    .align 4
+    .ent getuid
+
+getuid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getuid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getuid
diff --git a/libc/arch-mips/syscalls/getxattr.S b/libc/arch-mips/syscalls/getxattr.S
new file mode 100644
index 0000000..6c70579
--- /dev/null
+++ b/libc/arch-mips/syscalls/getxattr.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl getxattr
+    .align 4
+    .ent getxattr
+
+getxattr:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_getxattr
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end getxattr
diff --git a/libc/arch-mips/syscalls/init_module.S b/libc/arch-mips/syscalls/init_module.S
new file mode 100644
index 0000000..5cfbd2f
--- /dev/null
+++ b/libc/arch-mips/syscalls/init_module.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl init_module
+    .align 4
+    .ent init_module
+
+init_module:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_init_module
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end init_module
diff --git a/libc/arch-mips/syscalls/inotify_add_watch.S b/libc/arch-mips/syscalls/inotify_add_watch.S
new file mode 100644
index 0000000..8dde667
--- /dev/null
+++ b/libc/arch-mips/syscalls/inotify_add_watch.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl inotify_add_watch
+    .align 4
+    .ent inotify_add_watch
+
+inotify_add_watch:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_inotify_add_watch
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end inotify_add_watch
diff --git a/libc/arch-mips/syscalls/inotify_init.S b/libc/arch-mips/syscalls/inotify_init.S
new file mode 100644
index 0000000..cde4667
--- /dev/null
+++ b/libc/arch-mips/syscalls/inotify_init.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl inotify_init
+    .align 4
+    .ent inotify_init
+
+inotify_init:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_inotify_init
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end inotify_init
diff --git a/libc/arch-mips/syscalls/inotify_rm_watch.S b/libc/arch-mips/syscalls/inotify_rm_watch.S
new file mode 100644
index 0000000..1b2964e
--- /dev/null
+++ b/libc/arch-mips/syscalls/inotify_rm_watch.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl inotify_rm_watch
+    .align 4
+    .ent inotify_rm_watch
+
+inotify_rm_watch:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_inotify_rm_watch
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end inotify_rm_watch
diff --git a/libc/arch-mips/syscalls/ioprio_get.S b/libc/arch-mips/syscalls/ioprio_get.S
new file mode 100644
index 0000000..f7f2441
--- /dev/null
+++ b/libc/arch-mips/syscalls/ioprio_get.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl ioprio_get
+    .align 4
+    .ent ioprio_get
+
+ioprio_get:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_ioprio_get
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end ioprio_get
diff --git a/libc/arch-mips/syscalls/ioprio_set.S b/libc/arch-mips/syscalls/ioprio_set.S
new file mode 100644
index 0000000..33a30e7
--- /dev/null
+++ b/libc/arch-mips/syscalls/ioprio_set.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl ioprio_set
+    .align 4
+    .ent ioprio_set
+
+ioprio_set:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_ioprio_set
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end ioprio_set
diff --git a/libc/arch-mips/syscalls/kill.S b/libc/arch-mips/syscalls/kill.S
new file mode 100644
index 0000000..a6640ff
--- /dev/null
+++ b/libc/arch-mips/syscalls/kill.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl kill
+    .align 4
+    .ent kill
+
+kill:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_kill
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end kill
diff --git a/libc/arch-mips/syscalls/klogctl.S b/libc/arch-mips/syscalls/klogctl.S
new file mode 100644
index 0000000..8c41dcf
--- /dev/null
+++ b/libc/arch-mips/syscalls/klogctl.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl klogctl
+    .align 4
+    .ent klogctl
+
+klogctl:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_syslog
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end klogctl
diff --git a/libc/arch-mips/syscalls/lchown.S b/libc/arch-mips/syscalls/lchown.S
new file mode 100644
index 0000000..9fb4660
--- /dev/null
+++ b/libc/arch-mips/syscalls/lchown.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl lchown
+    .align 4
+    .ent lchown
+
+lchown:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_lchown
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end lchown
diff --git a/libc/arch-mips/syscalls/lgetxattr.S b/libc/arch-mips/syscalls/lgetxattr.S
new file mode 100644
index 0000000..d546d28
--- /dev/null
+++ b/libc/arch-mips/syscalls/lgetxattr.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl lgetxattr
+    .align 4
+    .ent lgetxattr
+
+lgetxattr:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_lgetxattr
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end lgetxattr
diff --git a/libc/arch-mips/syscalls/link.S b/libc/arch-mips/syscalls/link.S
new file mode 100644
index 0000000..4096b44
--- /dev/null
+++ b/libc/arch-mips/syscalls/link.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl link
+    .align 4
+    .ent link
+
+link:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_link
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end link
diff --git a/libc/arch-mips/syscalls/listen.S b/libc/arch-mips/syscalls/listen.S
new file mode 100644
index 0000000..f365d1d
--- /dev/null
+++ b/libc/arch-mips/syscalls/listen.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl listen
+    .align 4
+    .ent listen
+
+listen:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_listen
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end listen
diff --git a/libc/arch-mips/syscalls/listxattr.S b/libc/arch-mips/syscalls/listxattr.S
new file mode 100644
index 0000000..90b4544
--- /dev/null
+++ b/libc/arch-mips/syscalls/listxattr.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl listxattr
+    .align 4
+    .ent listxattr
+
+listxattr:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_listxattr
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end listxattr
diff --git a/libc/arch-mips/syscalls/llistxattr.S b/libc/arch-mips/syscalls/llistxattr.S
new file mode 100644
index 0000000..e74bf85
--- /dev/null
+++ b/libc/arch-mips/syscalls/llistxattr.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl llistxattr
+    .align 4
+    .ent llistxattr
+
+llistxattr:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_llistxattr
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end llistxattr
diff --git a/libc/arch-mips/syscalls/lremovexattr.S b/libc/arch-mips/syscalls/lremovexattr.S
new file mode 100644
index 0000000..a8d8df9
--- /dev/null
+++ b/libc/arch-mips/syscalls/lremovexattr.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl lremovexattr
+    .align 4
+    .ent lremovexattr
+
+lremovexattr:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_lremovexattr
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end lremovexattr
diff --git a/libc/arch-mips/syscalls/lseek.S b/libc/arch-mips/syscalls/lseek.S
new file mode 100644
index 0000000..bf486ac
--- /dev/null
+++ b/libc/arch-mips/syscalls/lseek.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl lseek
+    .align 4
+    .ent lseek
+
+lseek:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_lseek
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end lseek
diff --git a/libc/arch-mips/syscalls/lsetxattr.S b/libc/arch-mips/syscalls/lsetxattr.S
new file mode 100644
index 0000000..19a0fdf
--- /dev/null
+++ b/libc/arch-mips/syscalls/lsetxattr.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl lsetxattr
+    .align 4
+    .ent lsetxattr
+
+lsetxattr:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_lsetxattr
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end lsetxattr
diff --git a/libc/arch-mips/syscalls/lstat.S b/libc/arch-mips/syscalls/lstat.S
new file mode 100644
index 0000000..355d1da
--- /dev/null
+++ b/libc/arch-mips/syscalls/lstat.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl lstat
+    .align 4
+    .ent lstat
+
+lstat:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_lstat64
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end lstat
diff --git a/libc/arch-mips/syscalls/madvise.S b/libc/arch-mips/syscalls/madvise.S
new file mode 100644
index 0000000..4df7325
--- /dev/null
+++ b/libc/arch-mips/syscalls/madvise.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl madvise
+    .align 4
+    .ent madvise
+
+madvise:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_madvise
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end madvise
diff --git a/libc/arch-mips/syscalls/mincore.S b/libc/arch-mips/syscalls/mincore.S
new file mode 100644
index 0000000..8f467cd
--- /dev/null
+++ b/libc/arch-mips/syscalls/mincore.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl mincore
+    .align 4
+    .ent mincore
+
+mincore:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_mincore
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end mincore
diff --git a/libc/arch-mips/syscalls/mkdir.S b/libc/arch-mips/syscalls/mkdir.S
new file mode 100644
index 0000000..abf7db8
--- /dev/null
+++ b/libc/arch-mips/syscalls/mkdir.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl mkdir
+    .align 4
+    .ent mkdir
+
+mkdir:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_mkdir
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end mkdir
diff --git a/libc/arch-mips/syscalls/mkdirat.S b/libc/arch-mips/syscalls/mkdirat.S
new file mode 100644
index 0000000..4ccbe3b
--- /dev/null
+++ b/libc/arch-mips/syscalls/mkdirat.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl mkdirat
+    .align 4
+    .ent mkdirat
+
+mkdirat:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_mkdirat
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end mkdirat
diff --git a/libc/arch-mips/syscalls/mknod.S b/libc/arch-mips/syscalls/mknod.S
new file mode 100644
index 0000000..5ebb0ad
--- /dev/null
+++ b/libc/arch-mips/syscalls/mknod.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl mknod
+    .align 4
+    .ent mknod
+
+mknod:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_mknod
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end mknod
diff --git a/libc/arch-mips/syscalls/mlock.S b/libc/arch-mips/syscalls/mlock.S
new file mode 100644
index 0000000..99aca6e
--- /dev/null
+++ b/libc/arch-mips/syscalls/mlock.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl mlock
+    .align 4
+    .ent mlock
+
+mlock:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_mlock
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end mlock
diff --git a/libc/arch-mips/syscalls/mount.S b/libc/arch-mips/syscalls/mount.S
new file mode 100644
index 0000000..1b3385a
--- /dev/null
+++ b/libc/arch-mips/syscalls/mount.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl mount
+    .align 4
+    .ent mount
+
+mount:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_mount
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end mount
diff --git a/libc/arch-mips/syscalls/mprotect.S b/libc/arch-mips/syscalls/mprotect.S
new file mode 100644
index 0000000..4dc3bea
--- /dev/null
+++ b/libc/arch-mips/syscalls/mprotect.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl mprotect
+    .align 4
+    .ent mprotect
+
+mprotect:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_mprotect
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end mprotect
diff --git a/libc/arch-mips/syscalls/mremap.S b/libc/arch-mips/syscalls/mremap.S
new file mode 100644
index 0000000..721ccd9
--- /dev/null
+++ b/libc/arch-mips/syscalls/mremap.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl mremap
+    .align 4
+    .ent mremap
+
+mremap:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_mremap
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end mremap
diff --git a/libc/arch-mips/syscalls/msync.S b/libc/arch-mips/syscalls/msync.S
new file mode 100644
index 0000000..37e1c9a
--- /dev/null
+++ b/libc/arch-mips/syscalls/msync.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl msync
+    .align 4
+    .ent msync
+
+msync:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_msync
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end msync
diff --git a/libc/arch-mips/syscalls/munlock.S b/libc/arch-mips/syscalls/munlock.S
new file mode 100644
index 0000000..befc282
--- /dev/null
+++ b/libc/arch-mips/syscalls/munlock.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl munlock
+    .align 4
+    .ent munlock
+
+munlock:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_munlock
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end munlock
diff --git a/libc/arch-mips/syscalls/munmap.S b/libc/arch-mips/syscalls/munmap.S
new file mode 100644
index 0000000..5b717a9
--- /dev/null
+++ b/libc/arch-mips/syscalls/munmap.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl munmap
+    .align 4
+    .ent munmap
+
+munmap:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_munmap
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end munmap
diff --git a/libc/arch-mips/syscalls/nanosleep.S b/libc/arch-mips/syscalls/nanosleep.S
new file mode 100644
index 0000000..58ace12
--- /dev/null
+++ b/libc/arch-mips/syscalls/nanosleep.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl nanosleep
+    .align 4
+    .ent nanosleep
+
+nanosleep:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_nanosleep
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end nanosleep
diff --git a/libc/arch-mips/syscalls/pause.S b/libc/arch-mips/syscalls/pause.S
new file mode 100644
index 0000000..70346b4
--- /dev/null
+++ b/libc/arch-mips/syscalls/pause.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl pause
+    .align 4
+    .ent pause
+
+pause:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_pause
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end pause
diff --git a/libc/arch-mips/syscalls/personality.S b/libc/arch-mips/syscalls/personality.S
new file mode 100644
index 0000000..0020469
--- /dev/null
+++ b/libc/arch-mips/syscalls/personality.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl personality
+    .align 4
+    .ent personality
+
+personality:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_personality
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end personality
diff --git a/libc/arch-mips/syscalls/pipe2.S b/libc/arch-mips/syscalls/pipe2.S
new file mode 100644
index 0000000..9dfde29
--- /dev/null
+++ b/libc/arch-mips/syscalls/pipe2.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl pipe2
+    .align 4
+    .ent pipe2
+
+pipe2:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_pipe2
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end pipe2
diff --git a/libc/arch-mips/syscalls/poll.S b/libc/arch-mips/syscalls/poll.S
new file mode 100644
index 0000000..c1fec97
--- /dev/null
+++ b/libc/arch-mips/syscalls/poll.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl poll
+    .align 4
+    .ent poll
+
+poll:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_poll
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end poll
diff --git a/libc/arch-mips/syscalls/prctl.S b/libc/arch-mips/syscalls/prctl.S
new file mode 100644
index 0000000..aaf1344
--- /dev/null
+++ b/libc/arch-mips/syscalls/prctl.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl prctl
+    .align 4
+    .ent prctl
+
+prctl:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_prctl
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end prctl
diff --git a/libc/arch-mips/syscalls/pread64.S b/libc/arch-mips/syscalls/pread64.S
new file mode 100644
index 0000000..86e817c
--- /dev/null
+++ b/libc/arch-mips/syscalls/pread64.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl pread64
+    .align 4
+    .ent pread64
+
+pread64:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_pread64
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end pread64
diff --git a/libc/arch-mips/syscalls/pwrite64.S b/libc/arch-mips/syscalls/pwrite64.S
new file mode 100644
index 0000000..a8dcd98
--- /dev/null
+++ b/libc/arch-mips/syscalls/pwrite64.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl pwrite64
+    .align 4
+    .ent pwrite64
+
+pwrite64:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_pwrite64
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end pwrite64
diff --git a/libc/arch-mips/syscalls/read.S b/libc/arch-mips/syscalls/read.S
new file mode 100644
index 0000000..75816cd
--- /dev/null
+++ b/libc/arch-mips/syscalls/read.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl read
+    .align 4
+    .ent read
+
+read:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_read
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end read
diff --git a/libc/arch-mips/syscalls/readahead.S b/libc/arch-mips/syscalls/readahead.S
new file mode 100644
index 0000000..5ec600d
--- /dev/null
+++ b/libc/arch-mips/syscalls/readahead.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl readahead
+    .align 4
+    .ent readahead
+
+readahead:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_readahead
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end readahead
diff --git a/libc/arch-mips/syscalls/readlink.S b/libc/arch-mips/syscalls/readlink.S
new file mode 100644
index 0000000..ccdd596
--- /dev/null
+++ b/libc/arch-mips/syscalls/readlink.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl readlink
+    .align 4
+    .ent readlink
+
+readlink:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_readlink
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end readlink
diff --git a/libc/arch-mips/syscalls/readv.S b/libc/arch-mips/syscalls/readv.S
new file mode 100644
index 0000000..0561c18
--- /dev/null
+++ b/libc/arch-mips/syscalls/readv.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl readv
+    .align 4
+    .ent readv
+
+readv:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_readv
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end readv
diff --git a/libc/arch-mips/syscalls/recvfrom.S b/libc/arch-mips/syscalls/recvfrom.S
new file mode 100644
index 0000000..cf3441d
--- /dev/null
+++ b/libc/arch-mips/syscalls/recvfrom.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl recvfrom
+    .align 4
+    .ent recvfrom
+
+recvfrom:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_recvfrom
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end recvfrom
diff --git a/libc/arch-mips/syscalls/recvmsg.S b/libc/arch-mips/syscalls/recvmsg.S
new file mode 100644
index 0000000..5f22c49
--- /dev/null
+++ b/libc/arch-mips/syscalls/recvmsg.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl recvmsg
+    .align 4
+    .ent recvmsg
+
+recvmsg:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_recvmsg
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end recvmsg
diff --git a/libc/arch-mips/syscalls/removexattr.S b/libc/arch-mips/syscalls/removexattr.S
new file mode 100644
index 0000000..7d12b96
--- /dev/null
+++ b/libc/arch-mips/syscalls/removexattr.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl removexattr
+    .align 4
+    .ent removexattr
+
+removexattr:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_removexattr
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end removexattr
diff --git a/libc/arch-mips/syscalls/rename.S b/libc/arch-mips/syscalls/rename.S
new file mode 100644
index 0000000..950d579
--- /dev/null
+++ b/libc/arch-mips/syscalls/rename.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl rename
+    .align 4
+    .ent rename
+
+rename:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_rename
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end rename
diff --git a/libc/arch-mips/syscalls/renameat.S b/libc/arch-mips/syscalls/renameat.S
new file mode 100644
index 0000000..a2f3c4f
--- /dev/null
+++ b/libc/arch-mips/syscalls/renameat.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl renameat
+    .align 4
+    .ent renameat
+
+renameat:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_renameat
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end renameat
diff --git a/libc/arch-mips/syscalls/rmdir.S b/libc/arch-mips/syscalls/rmdir.S
new file mode 100644
index 0000000..2f77d0d
--- /dev/null
+++ b/libc/arch-mips/syscalls/rmdir.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl rmdir
+    .align 4
+    .ent rmdir
+
+rmdir:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_rmdir
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end rmdir
diff --git a/libc/arch-mips/syscalls/sched_get_priority_max.S b/libc/arch-mips/syscalls/sched_get_priority_max.S
new file mode 100644
index 0000000..158b0fe
--- /dev/null
+++ b/libc/arch-mips/syscalls/sched_get_priority_max.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sched_get_priority_max
+    .align 4
+    .ent sched_get_priority_max
+
+sched_get_priority_max:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sched_get_priority_max
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sched_get_priority_max
diff --git a/libc/arch-mips/syscalls/sched_get_priority_min.S b/libc/arch-mips/syscalls/sched_get_priority_min.S
new file mode 100644
index 0000000..8c07d72
--- /dev/null
+++ b/libc/arch-mips/syscalls/sched_get_priority_min.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sched_get_priority_min
+    .align 4
+    .ent sched_get_priority_min
+
+sched_get_priority_min:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sched_get_priority_min
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sched_get_priority_min
diff --git a/libc/arch-mips/syscalls/sched_getparam.S b/libc/arch-mips/syscalls/sched_getparam.S
new file mode 100644
index 0000000..6aba7cf
--- /dev/null
+++ b/libc/arch-mips/syscalls/sched_getparam.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sched_getparam
+    .align 4
+    .ent sched_getparam
+
+sched_getparam:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sched_getparam
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sched_getparam
diff --git a/libc/arch-mips/syscalls/sched_getscheduler.S b/libc/arch-mips/syscalls/sched_getscheduler.S
new file mode 100644
index 0000000..33d7f49
--- /dev/null
+++ b/libc/arch-mips/syscalls/sched_getscheduler.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sched_getscheduler
+    .align 4
+    .ent sched_getscheduler
+
+sched_getscheduler:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sched_getscheduler
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sched_getscheduler
diff --git a/libc/arch-mips/syscalls/sched_rr_get_interval.S b/libc/arch-mips/syscalls/sched_rr_get_interval.S
new file mode 100644
index 0000000..e634ed0
--- /dev/null
+++ b/libc/arch-mips/syscalls/sched_rr_get_interval.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sched_rr_get_interval
+    .align 4
+    .ent sched_rr_get_interval
+
+sched_rr_get_interval:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sched_rr_get_interval
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sched_rr_get_interval
diff --git a/libc/arch-mips/syscalls/sched_setaffinity.S b/libc/arch-mips/syscalls/sched_setaffinity.S
new file mode 100644
index 0000000..aefc240
--- /dev/null
+++ b/libc/arch-mips/syscalls/sched_setaffinity.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sched_setaffinity
+    .align 4
+    .ent sched_setaffinity
+
+sched_setaffinity:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sched_setaffinity
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sched_setaffinity
diff --git a/libc/arch-mips/syscalls/sched_setparam.S b/libc/arch-mips/syscalls/sched_setparam.S
new file mode 100644
index 0000000..4d646bd
--- /dev/null
+++ b/libc/arch-mips/syscalls/sched_setparam.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sched_setparam
+    .align 4
+    .ent sched_setparam
+
+sched_setparam:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sched_setparam
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sched_setparam
diff --git a/libc/arch-mips/syscalls/sched_setscheduler.S b/libc/arch-mips/syscalls/sched_setscheduler.S
new file mode 100644
index 0000000..a79e39c
--- /dev/null
+++ b/libc/arch-mips/syscalls/sched_setscheduler.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sched_setscheduler
+    .align 4
+    .ent sched_setscheduler
+
+sched_setscheduler:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sched_setscheduler
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sched_setscheduler
diff --git a/libc/arch-mips/syscalls/sched_yield.S b/libc/arch-mips/syscalls/sched_yield.S
new file mode 100644
index 0000000..c7c99dc
--- /dev/null
+++ b/libc/arch-mips/syscalls/sched_yield.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sched_yield
+    .align 4
+    .ent sched_yield
+
+sched_yield:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sched_yield
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sched_yield
diff --git a/libc/arch-mips/syscalls/select.S b/libc/arch-mips/syscalls/select.S
new file mode 100644
index 0000000..ce4112f
--- /dev/null
+++ b/libc/arch-mips/syscalls/select.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl select
+    .align 4
+    .ent select
+
+select:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR__newselect
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end select
diff --git a/libc/arch-mips/syscalls/sendfile.S b/libc/arch-mips/syscalls/sendfile.S
new file mode 100644
index 0000000..c4c176e
--- /dev/null
+++ b/libc/arch-mips/syscalls/sendfile.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sendfile
+    .align 4
+    .ent sendfile
+
+sendfile:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sendfile
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sendfile
diff --git a/libc/arch-mips/syscalls/sendmsg.S b/libc/arch-mips/syscalls/sendmsg.S
new file mode 100644
index 0000000..1913f13
--- /dev/null
+++ b/libc/arch-mips/syscalls/sendmsg.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sendmsg
+    .align 4
+    .ent sendmsg
+
+sendmsg:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sendmsg
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sendmsg
diff --git a/libc/arch-mips/syscalls/sendto.S b/libc/arch-mips/syscalls/sendto.S
new file mode 100644
index 0000000..f17743d
--- /dev/null
+++ b/libc/arch-mips/syscalls/sendto.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sendto
+    .align 4
+    .ent sendto
+
+sendto:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sendto
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sendto
diff --git a/libc/arch-mips/syscalls/setgid.S b/libc/arch-mips/syscalls/setgid.S
new file mode 100644
index 0000000..83dbfe5
--- /dev/null
+++ b/libc/arch-mips/syscalls/setgid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl setgid
+    .align 4
+    .ent setgid
+
+setgid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_setgid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end setgid
diff --git a/libc/arch-mips/syscalls/setgroups.S b/libc/arch-mips/syscalls/setgroups.S
new file mode 100644
index 0000000..ef9327d
--- /dev/null
+++ b/libc/arch-mips/syscalls/setgroups.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl setgroups
+    .align 4
+    .ent setgroups
+
+setgroups:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_setgroups
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end setgroups
diff --git a/libc/arch-mips/syscalls/setitimer.S b/libc/arch-mips/syscalls/setitimer.S
new file mode 100644
index 0000000..b042e07
--- /dev/null
+++ b/libc/arch-mips/syscalls/setitimer.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl setitimer
+    .align 4
+    .ent setitimer
+
+setitimer:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_setitimer
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end setitimer
diff --git a/libc/arch-mips/syscalls/setpgid.S b/libc/arch-mips/syscalls/setpgid.S
new file mode 100644
index 0000000..2ddc828
--- /dev/null
+++ b/libc/arch-mips/syscalls/setpgid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl setpgid
+    .align 4
+    .ent setpgid
+
+setpgid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_setpgid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end setpgid
diff --git a/libc/arch-mips/syscalls/setpriority.S b/libc/arch-mips/syscalls/setpriority.S
new file mode 100644
index 0000000..02550e9
--- /dev/null
+++ b/libc/arch-mips/syscalls/setpriority.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl setpriority
+    .align 4
+    .ent setpriority
+
+setpriority:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_setpriority
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end setpriority
diff --git a/libc/arch-mips/syscalls/setregid.S b/libc/arch-mips/syscalls/setregid.S
new file mode 100644
index 0000000..619b3a1
--- /dev/null
+++ b/libc/arch-mips/syscalls/setregid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl setregid
+    .align 4
+    .ent setregid
+
+setregid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_setregid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end setregid
diff --git a/libc/arch-mips/syscalls/setresgid.S b/libc/arch-mips/syscalls/setresgid.S
new file mode 100644
index 0000000..3ff94c7
--- /dev/null
+++ b/libc/arch-mips/syscalls/setresgid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl setresgid
+    .align 4
+    .ent setresgid
+
+setresgid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_setresgid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end setresgid
diff --git a/libc/arch-mips/syscalls/setrlimit.S b/libc/arch-mips/syscalls/setrlimit.S
new file mode 100644
index 0000000..3ec37c5
--- /dev/null
+++ b/libc/arch-mips/syscalls/setrlimit.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl setrlimit
+    .align 4
+    .ent setrlimit
+
+setrlimit:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_setrlimit
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end setrlimit
diff --git a/libc/arch-mips/syscalls/setsid.S b/libc/arch-mips/syscalls/setsid.S
new file mode 100644
index 0000000..2a09663
--- /dev/null
+++ b/libc/arch-mips/syscalls/setsid.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl setsid
+    .align 4
+    .ent setsid
+
+setsid:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_setsid
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end setsid
diff --git a/libc/arch-mips/syscalls/setsockopt.S b/libc/arch-mips/syscalls/setsockopt.S
new file mode 100644
index 0000000..dc5e62f
--- /dev/null
+++ b/libc/arch-mips/syscalls/setsockopt.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl setsockopt
+    .align 4
+    .ent setsockopt
+
+setsockopt:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_setsockopt
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end setsockopt
diff --git a/libc/arch-mips/syscalls/settimeofday.S b/libc/arch-mips/syscalls/settimeofday.S
new file mode 100644
index 0000000..f7c03ea
--- /dev/null
+++ b/libc/arch-mips/syscalls/settimeofday.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl settimeofday
+    .align 4
+    .ent settimeofday
+
+settimeofday:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_settimeofday
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end settimeofday
diff --git a/libc/arch-mips/syscalls/setxattr.S b/libc/arch-mips/syscalls/setxattr.S
new file mode 100644
index 0000000..baa78a4
--- /dev/null
+++ b/libc/arch-mips/syscalls/setxattr.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl setxattr
+    .align 4
+    .ent setxattr
+
+setxattr:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_setxattr
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end setxattr
diff --git a/libc/arch-mips/syscalls/shutdown.S b/libc/arch-mips/syscalls/shutdown.S
new file mode 100644
index 0000000..b80d99b
--- /dev/null
+++ b/libc/arch-mips/syscalls/shutdown.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl shutdown
+    .align 4
+    .ent shutdown
+
+shutdown:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_shutdown
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end shutdown
diff --git a/libc/arch-mips/syscalls/sigaction.S b/libc/arch-mips/syscalls/sigaction.S
new file mode 100644
index 0000000..700da54
--- /dev/null
+++ b/libc/arch-mips/syscalls/sigaction.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sigaction
+    .align 4
+    .ent sigaction
+
+sigaction:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sigaction
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sigaction
diff --git a/libc/arch-mips/syscalls/sigaltstack.S b/libc/arch-mips/syscalls/sigaltstack.S
new file mode 100644
index 0000000..d2a4a9a
--- /dev/null
+++ b/libc/arch-mips/syscalls/sigaltstack.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sigaltstack
+    .align 4
+    .ent sigaltstack
+
+sigaltstack:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sigaltstack
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sigaltstack
diff --git a/libc/arch-mips/syscalls/sigpending.S b/libc/arch-mips/syscalls/sigpending.S
new file mode 100644
index 0000000..68df4db
--- /dev/null
+++ b/libc/arch-mips/syscalls/sigpending.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sigpending
+    .align 4
+    .ent sigpending
+
+sigpending:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sigpending
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sigpending
diff --git a/libc/arch-mips/syscalls/sigprocmask.S b/libc/arch-mips/syscalls/sigprocmask.S
new file mode 100644
index 0000000..17c01cb
--- /dev/null
+++ b/libc/arch-mips/syscalls/sigprocmask.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sigprocmask
+    .align 4
+    .ent sigprocmask
+
+sigprocmask:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sigprocmask
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sigprocmask
diff --git a/libc/arch-mips/syscalls/socket.S b/libc/arch-mips/syscalls/socket.S
new file mode 100644
index 0000000..e7d43f9
--- /dev/null
+++ b/libc/arch-mips/syscalls/socket.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl socket
+    .align 4
+    .ent socket
+
+socket:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_socket
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end socket
diff --git a/libc/arch-mips/syscalls/socketpair.S b/libc/arch-mips/syscalls/socketpair.S
new file mode 100644
index 0000000..97c67af
--- /dev/null
+++ b/libc/arch-mips/syscalls/socketpair.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl socketpair
+    .align 4
+    .ent socketpair
+
+socketpair:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_socketpair
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end socketpair
diff --git a/libc/arch-mips/syscalls/stat.S b/libc/arch-mips/syscalls/stat.S
new file mode 100644
index 0000000..aec910d
--- /dev/null
+++ b/libc/arch-mips/syscalls/stat.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl stat
+    .align 4
+    .ent stat
+
+stat:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_stat64
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end stat
diff --git a/libc/arch-mips/syscalls/symlink.S b/libc/arch-mips/syscalls/symlink.S
new file mode 100644
index 0000000..f81c6dd
--- /dev/null
+++ b/libc/arch-mips/syscalls/symlink.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl symlink
+    .align 4
+    .ent symlink
+
+symlink:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_symlink
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end symlink
diff --git a/libc/arch-mips/syscalls/sync.S b/libc/arch-mips/syscalls/sync.S
new file mode 100644
index 0000000..a122873
--- /dev/null
+++ b/libc/arch-mips/syscalls/sync.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sync
+    .align 4
+    .ent sync
+
+sync:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sync
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sync
diff --git a/libc/arch-mips/syscalls/syscall.S b/libc/arch-mips/syscalls/syscall.S
new file mode 100644
index 0000000..2192bd5
--- /dev/null
+++ b/libc/arch-mips/syscalls/syscall.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl syscall
+    .align 4
+    .ent syscall
+
+syscall:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_syscall
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end syscall
diff --git a/libc/arch-mips/syscalls/sysinfo.S b/libc/arch-mips/syscalls/sysinfo.S
new file mode 100644
index 0000000..4577af9
--- /dev/null
+++ b/libc/arch-mips/syscalls/sysinfo.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl sysinfo
+    .align 4
+    .ent sysinfo
+
+sysinfo:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_sysinfo
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end sysinfo
diff --git a/libc/arch-mips/syscalls/times.S b/libc/arch-mips/syscalls/times.S
new file mode 100644
index 0000000..3e8950d
--- /dev/null
+++ b/libc/arch-mips/syscalls/times.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl times
+    .align 4
+    .ent times
+
+times:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_times
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end times
diff --git a/libc/arch-mips/syscalls/tkill.S b/libc/arch-mips/syscalls/tkill.S
new file mode 100644
index 0000000..3037baa
--- /dev/null
+++ b/libc/arch-mips/syscalls/tkill.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl tkill
+    .align 4
+    .ent tkill
+
+tkill:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_tkill
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end tkill
diff --git a/libc/arch-mips/syscalls/truncate.S b/libc/arch-mips/syscalls/truncate.S
new file mode 100644
index 0000000..4a8f12c
--- /dev/null
+++ b/libc/arch-mips/syscalls/truncate.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl truncate
+    .align 4
+    .ent truncate
+
+truncate:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_truncate
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end truncate
diff --git a/libc/arch-mips/syscalls/umask.S b/libc/arch-mips/syscalls/umask.S
new file mode 100644
index 0000000..eee911e
--- /dev/null
+++ b/libc/arch-mips/syscalls/umask.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl umask
+    .align 4
+    .ent umask
+
+umask:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_umask
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end umask
diff --git a/libc/arch-mips/syscalls/umount2.S b/libc/arch-mips/syscalls/umount2.S
new file mode 100644
index 0000000..c6d9df6
--- /dev/null
+++ b/libc/arch-mips/syscalls/umount2.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl umount2
+    .align 4
+    .ent umount2
+
+umount2:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_umount2
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end umount2
diff --git a/libc/arch-mips/syscalls/uname.S b/libc/arch-mips/syscalls/uname.S
new file mode 100644
index 0000000..7e4f780
--- /dev/null
+++ b/libc/arch-mips/syscalls/uname.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl uname
+    .align 4
+    .ent uname
+
+uname:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_uname
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end uname
diff --git a/libc/arch-mips/syscalls/unlink.S b/libc/arch-mips/syscalls/unlink.S
new file mode 100644
index 0000000..fd8dad9
--- /dev/null
+++ b/libc/arch-mips/syscalls/unlink.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl unlink
+    .align 4
+    .ent unlink
+
+unlink:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_unlink
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end unlink
diff --git a/libc/arch-mips/syscalls/unlinkat.S b/libc/arch-mips/syscalls/unlinkat.S
new file mode 100644
index 0000000..793ab5e
--- /dev/null
+++ b/libc/arch-mips/syscalls/unlinkat.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl unlinkat
+    .align 4
+    .ent unlinkat
+
+unlinkat:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_unlinkat
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end unlinkat
diff --git a/libc/arch-mips/syscalls/utimensat.S b/libc/arch-mips/syscalls/utimensat.S
new file mode 100644
index 0000000..435c49e
--- /dev/null
+++ b/libc/arch-mips/syscalls/utimensat.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl utimensat
+    .align 4
+    .ent utimensat
+
+utimensat:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_utimensat
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end utimensat
diff --git a/libc/arch-mips/syscalls/utimes.S b/libc/arch-mips/syscalls/utimes.S
new file mode 100644
index 0000000..8ce1e44
--- /dev/null
+++ b/libc/arch-mips/syscalls/utimes.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl utimes
+    .align 4
+    .ent utimes
+
+utimes:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_utimes
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end utimes
diff --git a/libc/arch-mips/syscalls/write.S b/libc/arch-mips/syscalls/write.S
new file mode 100644
index 0000000..a772ace
--- /dev/null
+++ b/libc/arch-mips/syscalls/write.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl write
+    .align 4
+    .ent write
+
+write:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_write
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end write
diff --git a/libc/arch-mips/syscalls/writev.S b/libc/arch-mips/syscalls/writev.S
new file mode 100644
index 0000000..bdacb42
--- /dev/null
+++ b/libc/arch-mips/syscalls/writev.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl writev
+    .align 4
+    .ent writev
+
+writev:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_writev
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end writev
diff --git a/libc/arch-sh/syscalls/personality.S b/libc/arch-sh/syscalls/personality.S
deleted file mode 100644
index efc2ea3..0000000
--- a/libc/arch-sh/syscalls/personality.S
+++ /dev/null
@@ -1,32 +0,0 @@
-/* autogenerated by gensyscalls.py */
-#include <sys/linux-syscalls.h>
-
-    .text
-    .type personality, @function
-    .globl personality
-    .align 4
-
-personality:
-
-    /* invoke trap */
-    mov.l   0f, r3  /* trap num */
-    trapa   #(1 + 0x10)
-
-    /* check return value */
-    cmp/pz  r0
-    bt      __NR_personality_end
-
-    /* keep error number */
-    sts.l   pr, @-r15
-    mov.l   1f, r1
-    jsr     @r1
-    mov     r0, r4
-    lds.l   @r15+, pr
-
-__NR_personality_end:
-    rts
-    nop
-
-    .align  2
-0:  .long   __NR_personality
-1:  .long   __set_syscall_errno
diff --git a/libc/arch-x86/string/ssse3-memcpy5.S b/libc/arch-x86/string/ssse3-memcpy5.S
index 1bf6d22..b0612a6 100644
--- a/libc/arch-x86/string/ssse3-memcpy5.S
+++ b/libc/arch-x86/string/ssse3-memcpy5.S
@@ -29,23 +29,19 @@
 */
 
 #ifndef MEMCPY
-# define MEMCPY         ssse3_memcpy5
+# define MEMCPY	ssse3_memcpy5
 #endif
 
 #ifndef L
 # define L(label)	.L##label
 #endif
 
-#ifndef ALIGN
-# define ALIGN(n)	.p2align n
-#endif
-
 #ifndef cfi_startproc
-# define cfi_startproc			.cfi_startproc
+# define cfi_startproc	.cfi_startproc
 #endif
 
 #ifndef cfi_endproc
-# define cfi_endproc			.cfi_endproc
+# define cfi_endproc	.cfi_endproc
 #endif
 
 #ifndef cfi_rel_offset
@@ -53,33 +49,25 @@
 #endif
 
 #ifndef cfi_restore
-# define cfi_restore(reg)		.cfi_restore reg
+# define cfi_restore(reg)	.cfi_restore reg
 #endif
 
 #ifndef cfi_adjust_cfa_offset
 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
 #endif
 
-#ifndef cfi_remember_state
-# define cfi_remember_state		.cfi_remember_state
-#endif
-
-#ifndef cfi_restore_state
-# define cfi_restore_state		.cfi_restore_state
-#endif
-
 #ifndef ENTRY
-# define ENTRY(name)			\
-	.type name,  @function; 	\
-	.globl name;			\
-	.p2align 4;			\
-name:					\
+# define ENTRY(name)		\
+	.type name,  @function;		\
+	.globl name;		\
+	.p2align 4;		\
+name:		\
 	cfi_startproc
 #endif
 
 #ifndef END
-# define END(name)			\
-	cfi_endproc;			\
+# define END(name)		\
+	cfi_endproc;		\
 	.size name, .-name
 #endif
 
@@ -93,12 +81,12 @@
 # define LEN		SRC+4
 #endif
 
-#define CFI_PUSH(REG)						\
-  cfi_adjust_cfa_offset (4);					\
+#define CFI_PUSH(REG)		\
+  cfi_adjust_cfa_offset (4);		\
   cfi_rel_offset (REG, 0)
 
-#define CFI_POP(REG)						\
-  cfi_adjust_cfa_offset (-4);					\
+#define CFI_POP(REG)		\
+  cfi_adjust_cfa_offset (-4);		\
   cfi_restore (REG)
 
 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
@@ -110,38 +98,26 @@
 # define RETURN_END	POP (%ebx); ret
 # define RETURN		RETURN_END; CFI_PUSH (%ebx)
 # define JMPTBL(I, B)	I - B
+# undef __i686
+
+# define SETUP_PIC_REG(x)	call	__i686.get_pc_thunk.x
 
 /* Load an entry in a jump table into EBX and branch to it.  TABLE is a
-   jump table with relative offsets.  INDEX is a register contains the
-   index into the jump table.   SCALE is the scale of INDEX. */
+	jump table with relative offsets.  INDEX is a register contains the
+	index into the jump table.   SCALE is the scale of INDEX. */
+
 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
-    /* We first load PC into EBX.  */				\
-    call	__i686.get_pc_thunk.bx;				\
-    /* Get the address of the jump table.  */			\
-    addl	$(TABLE - .), %ebx;				\
-    /* Get the entry and convert the relative offset to the	\
-       absolute address.  */					\
-    addl	(%ebx,INDEX,SCALE), %ebx;			\
-    /* We loaded the jump table.  Go.  */			\
-    jmp		*%ebx
-
-# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE)			\
-    addl	$(TABLE - .), %ebx
-
-# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE)	\
-    addl	(%ebx,INDEX,SCALE), %ebx;			\
-    /* We loaded the jump table.  Go.  */			\
-    jmp		*%ebx
-
-	.section	.gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
-	.globl	__i686.get_pc_thunk.bx
-	.hidden	__i686.get_pc_thunk.bx
-	ALIGN (4)
-	.type	__i686.get_pc_thunk.bx,@function
-__i686.get_pc_thunk.bx:
-	movl	(%esp), %ebx
-	ret
+    /* We first load PC into EBX.  */		\
+	SETUP_PIC_REG(bx);		\
+    /* Get the address of the jump table.  */		\
+	addl	$(TABLE - .), %ebx;		\
+    /* Get the entry and convert the relative offset to the		\
+	absolute	address.  */		\
+	addl	(%ebx, INDEX, SCALE), %ebx;		\
+    /* We loaded the jump table.  Go.  */		\
+	jmp	*%ebx
 #else
+
 # define PARMS		4
 # define ENTRANCE
 # define RETURN_END	ret
@@ -149,15 +125,11 @@
 # define JMPTBL(I, B)	I
 
 /* Branch to an entry in a jump table.  TABLE is a jump table with
-   absolute offsets.  INDEX is a register contains the index into the
-   jump table.  SCALE is the scale of INDEX. */
+	absolute offsets.  INDEX is a register contains the index into the
+	jump table.  SCALE is the scale of INDEX. */
+
 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
-    jmp		*TABLE(,INDEX,SCALE)
-
-# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE)
-
-# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE)		\
-    jmp		*TABLE(,INDEX,SCALE)
+	jmp	*TABLE(, INDEX, SCALE)
 #endif
 
 	.section .text.ssse3,"ax",@progbits
@@ -174,6 +146,8 @@
 	cmp	$32, %ecx
 	jae	L(memmove_bwd)
 	jmp	L(bk_write_less32bytes_2)
+
+	.p2align 4
 L(memmove_bwd):
 	add	%ecx, %eax
 	cmp	%eax, %edx
@@ -194,21 +168,25 @@
 	add	%ecx, %eax
 	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
 #ifndef USE_AS_MEMMOVE
+	.p2align 4
 L(bk_write):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
 #endif
 
-	ALIGN (4)
-/* ECX > 32 and EDX is 4 byte aligned.  */
+	.p2align 4
 L(48bytesormore):
+#ifndef USE_AS_MEMMOVE
+	movlpd	(%eax), %xmm0
+	movlpd	8(%eax), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 8(%edx)
+#else
 	movdqu	(%eax), %xmm0
+#endif
 	PUSH (%edi)
 	movl	%edx, %edi
 	and	$-16, %edx
-	PUSH (%esi)
-	cfi_remember_state
 	add	$16, %edx
-	movl	%edi, %esi
 	sub	%edx, %edi
 	add	%edi, %ecx
 	sub	%edi, %eax
@@ -217,7 +195,7 @@
 	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
 #else
 # if (defined SHARED || defined __PIC__)
-	call	__i686.get_pc_thunk.bx
+	SETUP_PIC_REG(bx)
 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
 	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
 # else
@@ -229,19 +207,20 @@
 	jae	L(large_page)
 	and	$0xf, %edi
 	jz	L(shl_0)
-
 	BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shl_0):
-	movdqu	%xmm0, (%esi)
+#ifdef USE_AS_MEMMOVE
+	movl	DEST+4(%esp), %edi
+	movdqu	%xmm0, (%edi)
+#endif
 	xor	%edi, %edi
-	POP (%esi)
 	cmp	$127, %ecx
 	ja	L(shl_0_gobble)
 	lea	-32(%ecx), %ecx
+
+	.p2align 4
 L(shl_0_loop):
 	movdqa	(%eax, %edi), %xmm0
 	movdqa	16(%eax, %edi), %xmm1
@@ -273,32 +252,35 @@
 	movdqa	%xmm0, (%edx, %edi)
 	movdqa	%xmm1, 16(%edx, %edi)
 	lea	32(%edi), %edi
+
 L(shl_0_end):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	add	%edi, %eax
 	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
 
 	CFI_PUSH (%edi)
-L(shl_0_gobble):
 
+	.p2align 4
+L(shl_0_gobble):
 #ifdef DATA_CACHE_SIZE_HALF
 	cmp	$DATA_CACHE_SIZE_HALF, %ecx
 #else
 # if (defined SHARED || defined __PIC__)
-	call	__i686.get_pc_thunk.bx
+	SETUP_PIC_REG(bx)
 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
 	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
 # else
 	cmp	__x86_data_cache_size_half, %ecx
 # endif
 #endif
-
-	POP (%edi)
+	POP	(%edi)
 	lea	-128(%ecx), %ecx
 	jae	L(shl_0_gobble_mem_loop)
+
+	.p2align 4
 L(shl_0_gobble_cache_loop):
 	movdqa	(%eax), %xmm0
 	movdqa	0x10(%eax), %xmm1
@@ -328,17 +310,15 @@
 	movdqa	(%eax), %xmm0
 	sub	$0x40, %ecx
 	movdqa	0x10(%eax), %xmm1
-
 	movdqa	%xmm0, (%edx)
 	movdqa	%xmm1, 0x10(%edx)
-
 	movdqa	0x20(%eax), %xmm0
 	movdqa	0x30(%eax), %xmm1
 	add	$0x40, %eax
-
 	movdqa	%xmm0, 0x20(%edx)
 	movdqa	%xmm1, 0x30(%edx)
 	add	$0x40, %edx
+
 L(shl_0_cache_less_64bytes):
 	cmp	$0x20, %ecx
 	jb	L(shl_0_cache_less_32bytes)
@@ -349,6 +329,7 @@
 	movdqa	%xmm0, (%edx)
 	movdqa	%xmm1, 0x10(%edx)
 	add	$0x20, %edx
+
 L(shl_0_cache_less_32bytes):
 	cmp	$0x10, %ecx
 	jb	L(shl_0_cache_less_16bytes)
@@ -357,13 +338,13 @@
 	add	$0x10, %eax
 	movdqa	%xmm0, (%edx)
 	add	$0x10, %edx
+
 L(shl_0_cache_less_16bytes):
 	add	%ecx, %edx
 	add	%ecx, %eax
 	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
 
-
-	ALIGN (4)
+	.p2align 4
 L(shl_0_gobble_mem_loop):
 	prefetcht0 0x1c0(%eax)
 	prefetcht0 0x280(%eax)
@@ -408,6 +389,7 @@
 	movdqa	%xmm0, 0x20(%edx)
 	movdqa	%xmm1, 0x30(%edx)
 	add	$0x40, %edx
+
 L(shl_0_mem_less_64bytes):
 	cmp	$0x20, %ecx
 	jb	L(shl_0_mem_less_32bytes)
@@ -418,6 +400,7 @@
 	movdqa	%xmm0, (%edx)
 	movdqa	%xmm1, 0x10(%edx)
 	add	$0x20, %edx
+
 L(shl_0_mem_less_32bytes):
 	cmp	$0x10, %ecx
 	jb	L(shl_0_mem_less_16bytes)
@@ -426,24 +409,84 @@
 	add	$0x10, %eax
 	movdqa	%xmm0, (%edx)
 	add	$0x10, %edx
+
 L(shl_0_mem_less_16bytes):
 	add	%ecx, %edx
 	add	%ecx, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shl_1):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-1(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_1_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-1(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-1(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_1_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl1LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	15(%eax), %xmm2
+	movaps	31(%eax), %xmm3
+	movaps	47(%eax), %xmm4
+	movaps	63(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$1, %xmm4, %xmm5
+	palignr	$1, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$1, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$1, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl1LoopStart)
+
+L(Shl1LoopLeave):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	15(%eax), %xmm2
+	movaps	31(%eax), %xmm3
+	palignr	$1, %xmm2, %xmm3
+	palignr	$1, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_1_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-1(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_1_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -453,8 +496,7 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_1_end)
+	jb	L(sh_1_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -465,30 +507,90 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
+	jae	L(sh_1_no_prefetch_loop)
 
-	jae	L(shl_1_loop)
-
-L(shl_1_end):
+L(sh_1_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	1(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(shl_2):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-2(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_2_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-2(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-2(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_2_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl2LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	14(%eax), %xmm2
+	movaps	30(%eax), %xmm3
+	movaps	46(%eax), %xmm4
+	movaps	62(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$2, %xmm4, %xmm5
+	palignr	$2, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$2, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$2, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl2LoopStart)
+
+L(Shl2LoopLeave):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	14(%eax), %xmm2
+	movaps	30(%eax), %xmm3
+	palignr	$2, %xmm2, %xmm3
+	palignr	$2, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_2_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-2(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_2_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -498,8 +600,7 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_2_end)
+	jb	L(sh_2_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -510,30 +611,90 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
+	jae	L(sh_2_no_prefetch_loop)
 
-	jae	L(shl_2_loop)
-
-L(shl_2_end):
+L(sh_2_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	2(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(shl_3):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-3(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_3_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-3(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-3(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_3_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl3LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	13(%eax), %xmm2
+	movaps	29(%eax), %xmm3
+	movaps	45(%eax), %xmm4
+	movaps	61(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$3, %xmm4, %xmm5
+	palignr	$3, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$3, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$3, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl3LoopStart)
+
+L(Shl3LoopLeave):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	13(%eax), %xmm2
+	movaps	29(%eax), %xmm3
+	palignr	$3, %xmm2, %xmm3
+	palignr	$3, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_3_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-3(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_3_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -544,7 +705,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jb	L(shl_3_end)
+	jb	L(sh_3_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -556,29 +717,90 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jae	L(shl_3_loop)
+	jae	L(sh_3_no_prefetch_loop)
 
-L(shl_3_end):
+L(sh_3_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	3(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(shl_4):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-4(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_4_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-4(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-4(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_4_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl4LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	12(%eax), %xmm2
+	movaps	28(%eax), %xmm3
+	movaps	44(%eax), %xmm4
+	movaps	60(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$4, %xmm4, %xmm5
+	palignr	$4, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$4, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$4, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl4LoopStart)
+
+L(Shl4LoopLeave):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	12(%eax), %xmm2
+	movaps	28(%eax), %xmm3
+	palignr	$4, %xmm2, %xmm3
+	palignr	$4, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_4_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-4(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_4_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -589,7 +811,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jb	L(shl_4_end)
+	jb	L(sh_4_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -601,29 +823,90 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jae	L(shl_4_loop)
+	jae	L(sh_4_no_prefetch_loop)
 
-L(shl_4_end):
+L(sh_4_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	4(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(shl_5):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-5(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_5_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-5(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-5(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_5_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl5LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	11(%eax), %xmm2
+	movaps	27(%eax), %xmm3
+	movaps	43(%eax), %xmm4
+	movaps	59(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$5, %xmm4, %xmm5
+	palignr	$5, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$5, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$5, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl5LoopStart)
+
+L(Shl5LoopLeave):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	11(%eax), %xmm2
+	movaps	27(%eax), %xmm3
+	palignr	$5, %xmm2, %xmm3
+	palignr	$5, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_5_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-5(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_5_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -634,7 +917,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jb	L(shl_5_end)
+	jb	L(sh_5_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -646,29 +929,90 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jae	L(shl_5_loop)
+	jae	L(sh_5_no_prefetch_loop)
 
-L(shl_5_end):
+L(sh_5_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	5(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(shl_6):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-6(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_6_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-6(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-6(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_6_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl6LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	10(%eax), %xmm2
+	movaps	26(%eax), %xmm3
+	movaps	42(%eax), %xmm4
+	movaps	58(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$6, %xmm4, %xmm5
+	palignr	$6, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$6, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$6, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl6LoopStart)
+
+L(Shl6LoopLeave):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	10(%eax), %xmm2
+	movaps	26(%eax), %xmm3
+	palignr	$6, %xmm2, %xmm3
+	palignr	$6, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_6_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-6(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_6_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -679,7 +1023,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jb	L(shl_6_end)
+	jb	L(sh_6_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -691,29 +1035,90 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jae	L(shl_6_loop)
+	jae	L(sh_6_no_prefetch_loop)
 
-L(shl_6_end):
+L(sh_6_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	6(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(shl_7):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-7(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_7_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-7(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-7(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_7_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl7LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	9(%eax), %xmm2
+	movaps	25(%eax), %xmm3
+	movaps	41(%eax), %xmm4
+	movaps	57(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$7, %xmm4, %xmm5
+	palignr	$7, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$7, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$7, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl7LoopStart)
+
+L(Shl7LoopLeave):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	9(%eax), %xmm2
+	movaps	25(%eax), %xmm3
+	palignr	$7, %xmm2, %xmm3
+	palignr	$7, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_7_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-7(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_7_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -723,8 +1128,7 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_7_end)
+	jb	L(sh_7_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -735,30 +1139,90 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
+	jae	L(sh_7_no_prefetch_loop)
 
-	jae	L(shl_7_loop)
-
-L(shl_7_end):
+L(sh_7_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	7(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(shl_8):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-8(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_8_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-8(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-8(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_8_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl8LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	8(%eax), %xmm2
+	movaps	24(%eax), %xmm3
+	movaps	40(%eax), %xmm4
+	movaps	56(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$8, %xmm4, %xmm5
+	palignr	$8, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$8, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$8, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl8LoopStart)
+
+L(LoopLeave8):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	8(%eax), %xmm2
+	movaps	24(%eax), %xmm3
+	palignr	$8, %xmm2, %xmm3
+	palignr	$8, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_8_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-8(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_8_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -768,8 +1232,7 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_8_end)
+	jb	L(sh_8_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -780,30 +1243,91 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
+	jae	L(sh_8_no_prefetch_loop)
 
-	jae	L(shl_8_loop)
-
-L(shl_8_end):
+L(sh_8_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	8(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(shl_9):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-9(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_9_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-9(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-9(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_9_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl9LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	7(%eax), %xmm2
+	movaps	23(%eax), %xmm3
+	movaps	39(%eax), %xmm4
+	movaps	55(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$9, %xmm4, %xmm5
+	palignr	$9, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$9, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$9, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl9LoopStart)
+
+L(Shl9LoopLeave):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	7(%eax), %xmm2
+	movaps	23(%eax), %xmm3
+	palignr	$9, %xmm2, %xmm3
+	palignr	$9, %xmm1, %xmm2
+
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_9_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-9(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_9_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -813,8 +1337,7 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_9_end)
+	jb	L(sh_9_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -825,30 +1348,91 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
+	jae	L(sh_9_no_prefetch_loop)
 
-	jae	L(shl_9_loop)
-
-L(shl_9_end):
+L(sh_9_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	9(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(shl_10):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-10(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_10_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-10(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-10(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_10_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl10LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	6(%eax), %xmm2
+	movaps	22(%eax), %xmm3
+	movaps	38(%eax), %xmm4
+	movaps	54(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$10, %xmm4, %xmm5
+	palignr	$10, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$10, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$10, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl10LoopStart)
+
+L(Shl10LoopLeave):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	6(%eax), %xmm2
+	movaps	22(%eax), %xmm3
+	palignr	$10, %xmm2, %xmm3
+	palignr	$10, %xmm1, %xmm2
+
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_10_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-10(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_10_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -858,8 +1442,7 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_10_end)
+	jb	L(sh_10_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -870,30 +1453,91 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
+	jae	L(sh_10_no_prefetch_loop)
 
-	jae	L(shl_10_loop)
-
-L(shl_10_end):
+L(sh_10_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	10(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(shl_11):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-11(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_11_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-11(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-11(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_11_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl11LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	5(%eax), %xmm2
+	movaps	21(%eax), %xmm3
+	movaps	37(%eax), %xmm4
+	movaps	53(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$11, %xmm4, %xmm5
+	palignr	$11, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$11, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$11, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl11LoopStart)
+
+L(Shl11LoopLeave):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	5(%eax), %xmm2
+	movaps	21(%eax), %xmm3
+	palignr	$11, %xmm2, %xmm3
+	palignr	$11, %xmm1, %xmm2
+
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_11_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-11(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_11_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -903,8 +1547,7 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_11_end)
+	jb	L(sh_11_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -915,30 +1558,91 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
+	jae	L(sh_11_no_prefetch_loop)
 
-	jae	L(shl_11_loop)
-
-L(shl_11_end):
+L(sh_11_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	11(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(shl_12):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-12(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_12_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-12(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-12(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_12_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl12LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	4(%eax), %xmm2
+	movaps	20(%eax), %xmm3
+	movaps	36(%eax), %xmm4
+	movaps	52(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$12, %xmm4, %xmm5
+	palignr	$12, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$12, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$12, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl12LoopStart)
+
+L(Shl12LoopLeave):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	4(%eax), %xmm2
+	movaps	20(%eax), %xmm3
+	palignr	$12, %xmm2, %xmm3
+	palignr	$12, %xmm1, %xmm2
+
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_12_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-12(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_12_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -948,8 +1652,7 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_12_end)
+	jb	L(sh_12_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -960,30 +1663,91 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
+	jae	L(sh_12_no_prefetch_loop)
 
-	jae	L(shl_12_loop)
-
-L(shl_12_end):
+L(sh_12_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	12(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(shl_13):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-13(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_13_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-13(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-13(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_13_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl13LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	3(%eax), %xmm2
+	movaps	19(%eax), %xmm3
+	movaps	35(%eax), %xmm4
+	movaps	51(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$13, %xmm4, %xmm5
+	palignr	$13, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$13, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$13, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl13LoopStart)
+
+L(Shl13LoopLeave):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	3(%eax), %xmm2
+	movaps	19(%eax), %xmm3
+	palignr	$13, %xmm2, %xmm3
+	palignr	$13, %xmm1, %xmm2
+
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_13_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-13(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_13_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -993,8 +1757,7 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_13_end)
+	jb	L(sh_13_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -1005,30 +1768,91 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
+	jae	L(sh_13_no_prefetch_loop)
 
-	jae	L(shl_13_loop)
-
-L(shl_13_end):
+L(sh_13_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	13(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(shl_14):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-14(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_14_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-14(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-14(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_14_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl14LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	2(%eax), %xmm2
+	movaps	18(%eax), %xmm3
+	movaps	34(%eax), %xmm4
+	movaps	50(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$14, %xmm4, %xmm5
+	palignr	$14, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$14, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$14, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl14LoopStart)
+
+L(Shl14LoopLeave):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	2(%eax), %xmm2
+	movaps	18(%eax), %xmm3
+	palignr	$14, %xmm2, %xmm3
+	palignr	$14, %xmm1, %xmm2
+
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_14_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-14(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_14_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -1038,8 +1862,7 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_14_end)
+	jb	L(sh_14_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -1050,30 +1873,91 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
+	jae	L(sh_14_no_prefetch_loop)
 
-	jae	L(shl_14_loop)
-
-L(shl_14_end):
+L(sh_14_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	14(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(shl_15):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-15(%eax), %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_15_loop):
+#ifndef USE_AS_MEMMOVE
+	movaps	-15(%eax), %xmm1
+#else
+	movl	DEST+4(%esp), %edi
+	movaps	-15(%eax), %xmm1
+	movdqu	%xmm0, (%edi)
+#endif
+#ifdef DATA_CACHE_SIZE_HALF
+	cmp	$DATA_CACHE_SIZE_HALF, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+	cmp	__x86_data_cache_size_half, %ecx
+# endif
+#endif
+	jb L(sh_15_no_prefetch)
 
+	lea	-64(%ecx), %ecx
+
+	.p2align 4
+L(Shl15LoopStart):
+	prefetcht0 0x1c0(%eax)
+	prefetcht0 0x1c0(%edx)
+	movaps	1(%eax), %xmm2
+	movaps	17(%eax), %xmm3
+	movaps	33(%eax), %xmm4
+	movaps	49(%eax), %xmm5
+	movaps	%xmm5, %xmm7
+	palignr	$15, %xmm4, %xmm5
+	palignr	$15, %xmm3, %xmm4
+	movaps	%xmm5, 48(%edx)
+	palignr	$15, %xmm2, %xmm3
+	lea	64(%eax), %eax
+	palignr	$15, %xmm1, %xmm2
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm7, %xmm1
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	sub	$64, %ecx
+	ja	L(Shl15LoopStart)
+
+L(Shl15LoopLeave):
+	add	$32, %ecx
+	jle	L(shl_end_0)
+
+	movaps	1(%eax), %xmm2
+	movaps	17(%eax), %xmm3
+	palignr	$15, %xmm2, %xmm3
+	palignr	$15, %xmm1, %xmm2
+
+	movaps	%xmm2, (%edx)
+	movaps	%xmm3, 16(%edx)
+	lea	32(%edx, %ecx), %edx
+	lea	32(%eax, %ecx), %eax
+	POP (%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(sh_15_no_prefetch):
+	lea	-32(%ecx), %ecx
+	lea	-15(%eax), %eax
+	xor	%edi, %edi
+
+	.p2align 4
+L(sh_15_no_prefetch_loop):
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
 	movdqa	32(%eax, %edi), %xmm3
@@ -1083,8 +1967,7 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_15_end)
+	jb	L(sh_15_end_no_prefetch_loop)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -1095,52 +1978,70 @@
 	lea	32(%edi), %edi
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
+	jae	L(sh_15_no_prefetch_loop)
 
-	jae	L(shl_15_loop)
-
-L(shl_15_end):
+L(sh_15_end_no_prefetch_loop):
 	lea	32(%ecx), %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	15(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
 
+	CFI_PUSH (%edi)
 
-	ALIGN (4)
+	.p2align 4
+L(shl_end_0):
+	lea	32(%ecx), %ecx
+	lea	(%edx, %ecx), %edx
+	lea	(%eax, %ecx), %eax
+	POP	(%edi)
+	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+	.p2align 4
 L(fwd_write_44bytes):
-	movl	-44(%eax), %ecx
-	movl	%ecx, -44(%edx)
-L(fwd_write_40bytes):
-	movl	-40(%eax), %ecx
-	movl	%ecx, -40(%edx)
+	movq	-44(%eax), %xmm0
+	movq	%xmm0, -44(%edx)
 L(fwd_write_36bytes):
-	movl	-36(%eax), %ecx
-	movl	%ecx, -36(%edx)
-L(fwd_write_32bytes):
-	movl	-32(%eax), %ecx
-	movl	%ecx, -32(%edx)
+	movq	-36(%eax), %xmm0
+	movq	%xmm0, -36(%edx)
 L(fwd_write_28bytes):
-	movl	-28(%eax), %ecx
-	movl	%ecx, -28(%edx)
-L(fwd_write_24bytes):
-	movl	-24(%eax), %ecx
-	movl	%ecx, -24(%edx)
+	movq	-28(%eax), %xmm0
+	movq	%xmm0, -28(%edx)
 L(fwd_write_20bytes):
-	movl	-20(%eax), %ecx
-	movl	%ecx, -20(%edx)
-L(fwd_write_16bytes):
-	movl	-16(%eax), %ecx
-	movl	%ecx, -16(%edx)
+	movq	-20(%eax), %xmm0
+	movq	%xmm0, -20(%edx)
 L(fwd_write_12bytes):
-	movl	-12(%eax), %ecx
-	movl	%ecx, -12(%edx)
-L(fwd_write_8bytes):
-	movl	-8(%eax), %ecx
-	movl	%ecx, -8(%edx)
+	movq	-12(%eax), %xmm0
+	movq	%xmm0, -12(%edx)
 L(fwd_write_4bytes):
 	movl	-4(%eax), %ecx
 	movl	%ecx, -4(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_40bytes):
+	movq	-40(%eax), %xmm0
+	movq	%xmm0, -40(%edx)
+L(fwd_write_32bytes):
+	movq	-32(%eax), %xmm0
+	movq	%xmm0, -32(%edx)
+L(fwd_write_24bytes):
+	movq	-24(%eax), %xmm0
+	movq	%xmm0, -24(%edx)
+L(fwd_write_16bytes):
+	movq	-16(%eax), %xmm0
+	movq	%xmm0, -16(%edx)
+L(fwd_write_8bytes):
+	movq	-8(%eax), %xmm0
+	movq	%xmm0, -8(%edx)
 L(fwd_write_0bytes):
 #ifndef USE_AS_BCOPY
 # ifdef USE_AS_MEMPCPY
@@ -1151,7 +2052,7 @@
 #endif
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
 L(fwd_write_5bytes):
 	movl	-5(%eax), %ecx
 	movl	-4(%eax), %eax
@@ -1166,39 +2067,51 @@
 #endif
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
 L(fwd_write_45bytes):
-	movl	-45(%eax), %ecx
-	movl	%ecx, -45(%edx)
-L(fwd_write_41bytes):
-	movl	-41(%eax), %ecx
-	movl	%ecx, -41(%edx)
+	movq	-45(%eax), %xmm0
+	movq	%xmm0, -45(%edx)
 L(fwd_write_37bytes):
-	movl	-37(%eax), %ecx
-	movl	%ecx, -37(%edx)
-L(fwd_write_33bytes):
-	movl	-33(%eax), %ecx
-	movl	%ecx, -33(%edx)
+	movq	-37(%eax), %xmm0
+	movq	%xmm0, -37(%edx)
 L(fwd_write_29bytes):
-	movl	-29(%eax), %ecx
-	movl	%ecx, -29(%edx)
-L(fwd_write_25bytes):
-	movl	-25(%eax), %ecx
-	movl	%ecx, -25(%edx)
+	movq	-29(%eax), %xmm0
+	movq	%xmm0, -29(%edx)
 L(fwd_write_21bytes):
-	movl	-21(%eax), %ecx
-	movl	%ecx, -21(%edx)
-L(fwd_write_17bytes):
-	movl	-17(%eax), %ecx
-	movl	%ecx, -17(%edx)
+	movq	-21(%eax), %xmm0
+	movq	%xmm0, -21(%edx)
 L(fwd_write_13bytes):
-	movl	-13(%eax), %ecx
-	movl	%ecx, -13(%edx)
-L(fwd_write_9bytes):
-	movl	-9(%eax), %ecx
-	movl	%ecx, -9(%edx)
+	movq	-13(%eax), %xmm0
+	movq	%xmm0, -13(%edx)
 	movl	-5(%eax), %ecx
 	movl	%ecx, -5(%edx)
+	movzbl	-1(%eax), %ecx
+	movb	%cl, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_41bytes):
+	movq	-41(%eax), %xmm0
+	movq	%xmm0, -41(%edx)
+L(fwd_write_33bytes):
+	movq	-33(%eax), %xmm0
+	movq	%xmm0, -33(%edx)
+L(fwd_write_25bytes):
+	movq	-25(%eax), %xmm0
+	movq	%xmm0, -25(%edx)
+L(fwd_write_17bytes):
+	movq	-17(%eax), %xmm0
+	movq	%xmm0, -17(%edx)
+L(fwd_write_9bytes):
+	movq	-9(%eax), %xmm0
+	movq	%xmm0, -9(%edx)
 L(fwd_write_1bytes):
 	movzbl	-1(%eax), %ecx
 	movb	%cl, -1(%edx)
@@ -1211,40 +2124,52 @@
 #endif
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
 L(fwd_write_46bytes):
-	movl	-46(%eax), %ecx
-	movl	%ecx, -46(%edx)
-L(fwd_write_42bytes):
-	movl	-42(%eax), %ecx
-	movl	%ecx, -42(%edx)
+	movq	-46(%eax), %xmm0
+	movq	%xmm0, -46(%edx)
 L(fwd_write_38bytes):
-	movl	-38(%eax), %ecx
-	movl	%ecx, -38(%edx)
-L(fwd_write_34bytes):
-	movl	-34(%eax), %ecx
-	movl	%ecx, -34(%edx)
+	movq	-38(%eax), %xmm0
+	movq	%xmm0, -38(%edx)
 L(fwd_write_30bytes):
-	movl	-30(%eax), %ecx
-	movl	%ecx, -30(%edx)
-L(fwd_write_26bytes):
-	movl	-26(%eax), %ecx
-	movl	%ecx, -26(%edx)
+	movq	-30(%eax), %xmm0
+	movq	%xmm0, -30(%edx)
 L(fwd_write_22bytes):
-	movl	-22(%eax), %ecx
-	movl	%ecx, -22(%edx)
-L(fwd_write_18bytes):
-	movl	-18(%eax), %ecx
-	movl	%ecx, -18(%edx)
+	movq	-22(%eax), %xmm0
+	movq	%xmm0, -22(%edx)
 L(fwd_write_14bytes):
-	movl	-14(%eax), %ecx
-	movl	%ecx, -14(%edx)
-L(fwd_write_10bytes):
-	movl	-10(%eax), %ecx
-	movl	%ecx, -10(%edx)
+	movq	-14(%eax), %xmm0
+	movq	%xmm0, -14(%edx)
 L(fwd_write_6bytes):
 	movl	-6(%eax), %ecx
 	movl	%ecx, -6(%edx)
+	movzwl	-2(%eax), %ecx
+	movw	%cx, -2(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_42bytes):
+	movq	-42(%eax), %xmm0
+	movq	%xmm0, -42(%edx)
+L(fwd_write_34bytes):
+	movq	-34(%eax), %xmm0
+	movq	%xmm0, -34(%edx)
+L(fwd_write_26bytes):
+	movq	-26(%eax), %xmm0
+	movq	%xmm0, -26(%edx)
+L(fwd_write_18bytes):
+	movq	-18(%eax), %xmm0
+	movq	%xmm0, -18(%edx)
+L(fwd_write_10bytes):
+	movq	-10(%eax), %xmm0
+	movq	%xmm0, -10(%edx)
 L(fwd_write_2bytes):
 	movzwl	-2(%eax), %ecx
 	movw	%cx, -2(%edx)
@@ -1257,40 +2182,54 @@
 #endif
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
 L(fwd_write_47bytes):
-	movl	-47(%eax), %ecx
-	movl	%ecx, -47(%edx)
-L(fwd_write_43bytes):
-	movl	-43(%eax), %ecx
-	movl	%ecx, -43(%edx)
+	movq	-47(%eax), %xmm0
+	movq	%xmm0, -47(%edx)
 L(fwd_write_39bytes):
-	movl	-39(%eax), %ecx
-	movl	%ecx, -39(%edx)
-L(fwd_write_35bytes):
-	movl	-35(%eax), %ecx
-	movl	%ecx, -35(%edx)
+	movq	-39(%eax), %xmm0
+	movq	%xmm0, -39(%edx)
 L(fwd_write_31bytes):
-	movl	-31(%eax), %ecx
-	movl	%ecx, -31(%edx)
-L(fwd_write_27bytes):
-	movl	-27(%eax), %ecx
-	movl	%ecx, -27(%edx)
+	movq	-31(%eax), %xmm0
+	movq	%xmm0, -31(%edx)
 L(fwd_write_23bytes):
-	movl	-23(%eax), %ecx
-	movl	%ecx, -23(%edx)
-L(fwd_write_19bytes):
-	movl	-19(%eax), %ecx
-	movl	%ecx, -19(%edx)
+	movq	-23(%eax), %xmm0
+	movq	%xmm0, -23(%edx)
 L(fwd_write_15bytes):
-	movl	-15(%eax), %ecx
-	movl	%ecx, -15(%edx)
-L(fwd_write_11bytes):
-	movl	-11(%eax), %ecx
-	movl	%ecx, -11(%edx)
+	movq	-15(%eax), %xmm0
+	movq	%xmm0, -15(%edx)
 L(fwd_write_7bytes):
 	movl	-7(%eax), %ecx
 	movl	%ecx, -7(%edx)
+	movzwl	-3(%eax), %ecx
+	movzbl	-1(%eax), %eax
+	movw	%cx, -3(%edx)
+	movb	%al, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_43bytes):
+	movq	-43(%eax), %xmm0
+	movq	%xmm0, -43(%edx)
+L(fwd_write_35bytes):
+	movq	-35(%eax), %xmm0
+	movq	%xmm0, -35(%edx)
+L(fwd_write_27bytes):
+	movq	-27(%eax), %xmm0
+	movq	%xmm0, -27(%edx)
+L(fwd_write_19bytes):
+	movq	-19(%eax), %xmm0
+	movq	%xmm0, -19(%edx)
+L(fwd_write_11bytes):
+	movq	-11(%eax), %xmm0
+	movq	%xmm0, -11(%edx)
 L(fwd_write_3bytes):
 	movzwl	-3(%eax), %ecx
 	movzbl	-1(%eax), %eax
@@ -1303,20 +2242,374 @@
 	movl	DEST(%esp), %eax
 # endif
 #endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_40bytes_align):
+	movdqa	-40(%eax), %xmm0
+	movdqa	%xmm0, -40(%edx)
+L(fwd_write_24bytes_align):
+	movdqa	-24(%eax), %xmm0
+	movdqa	%xmm0, -24(%edx)
+L(fwd_write_8bytes_align):
+	movq	-8(%eax), %xmm0
+	movq	%xmm0, -8(%edx)
+L(fwd_write_0bytes_align):
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_32bytes_align):
+	movdqa	-32(%eax), %xmm0
+	movdqa	%xmm0, -32(%edx)
+L(fwd_write_16bytes_align):
+	movdqa	-16(%eax), %xmm0
+	movdqa	%xmm0, -16(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_5bytes_align):
+	movl	-5(%eax), %ecx
+	movl	-4(%eax), %eax
+	movl	%ecx, -5(%edx)
+	movl	%eax, -4(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_45bytes_align):
+	movdqa	-45(%eax), %xmm0
+	movdqa	%xmm0, -45(%edx)
+L(fwd_write_29bytes_align):
+	movdqa	-29(%eax), %xmm0
+	movdqa	%xmm0, -29(%edx)
+L(fwd_write_13bytes_align):
+	movq	-13(%eax), %xmm0
+	movq	%xmm0, -13(%edx)
+	movl	-5(%eax), %ecx
+	movl	%ecx, -5(%edx)
+	movzbl	-1(%eax), %ecx
+	movb	%cl, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_37bytes_align):
+	movdqa	-37(%eax), %xmm0
+	movdqa	%xmm0, -37(%edx)
+L(fwd_write_21bytes_align):
+	movdqa	-21(%eax), %xmm0
+	movdqa	%xmm0, -21(%edx)
+	movl	-5(%eax), %ecx
+	movl	%ecx, -5(%edx)
+	movzbl	-1(%eax), %ecx
+	movb	%cl, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_41bytes_align):
+	movdqa	-41(%eax), %xmm0
+	movdqa	%xmm0, -41(%edx)
+L(fwd_write_25bytes_align):
+	movdqa	-25(%eax), %xmm0
+	movdqa	%xmm0, -25(%edx)
+L(fwd_write_9bytes_align):
+	movq	-9(%eax), %xmm0
+	movq	%xmm0, -9(%edx)
+L(fwd_write_1bytes_align):
+	movzbl	-1(%eax), %ecx
+	movb	%cl, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_33bytes_align):
+	movdqa	-33(%eax), %xmm0
+	movdqa	%xmm0, -33(%edx)
+L(fwd_write_17bytes_align):
+	movdqa	-17(%eax), %xmm0
+	movdqa	%xmm0, -17(%edx)
+	movzbl	-1(%eax), %ecx
+	movb	%cl, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_46bytes_align):
+	movdqa	-46(%eax), %xmm0
+	movdqa	%xmm0, -46(%edx)
+L(fwd_write_30bytes_align):
+	movdqa	-30(%eax), %xmm0
+	movdqa	%xmm0, -30(%edx)
+L(fwd_write_14bytes_align):
+	movq	-14(%eax), %xmm0
+	movq	%xmm0, -14(%edx)
+L(fwd_write_6bytes_align):
+	movl	-6(%eax), %ecx
+	movl	%ecx, -6(%edx)
+	movzwl	-2(%eax), %ecx
+	movw	%cx, -2(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_38bytes_align):
+	movdqa	-38(%eax), %xmm0
+	movdqa	%xmm0, -38(%edx)
+L(fwd_write_22bytes_align):
+	movdqa	-22(%eax), %xmm0
+	movdqa	%xmm0, -22(%edx)
+	movl	-6(%eax), %ecx
+	movl	%ecx, -6(%edx)
+	movzwl	-2(%eax), %ecx
+	movw	%cx, -2(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_42bytes_align):
+	movdqa	-42(%eax), %xmm0
+	movdqa	%xmm0, -42(%edx)
+L(fwd_write_26bytes_align):
+	movdqa	-26(%eax), %xmm0
+	movdqa	%xmm0, -26(%edx)
+L(fwd_write_10bytes_align):
+	movq	-10(%eax), %xmm0
+	movq	%xmm0, -10(%edx)
+L(fwd_write_2bytes_align):
+	movzwl	-2(%eax), %ecx
+	movw	%cx, -2(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_34bytes_align):
+	movdqa	-34(%eax), %xmm0
+	movdqa	%xmm0, -34(%edx)
+L(fwd_write_18bytes_align):
+	movdqa	-18(%eax), %xmm0
+	movdqa	%xmm0, -18(%edx)
+	movzwl	-2(%eax), %ecx
+	movw	%cx, -2(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_47bytes_align):
+	movdqa	-47(%eax), %xmm0
+	movdqa	%xmm0, -47(%edx)
+L(fwd_write_31bytes_align):
+	movdqa	-31(%eax), %xmm0
+	movdqa	%xmm0, -31(%edx)
+L(fwd_write_15bytes_align):
+	movq	-15(%eax), %xmm0
+	movq	%xmm0, -15(%edx)
+L(fwd_write_7bytes_align):
+	movl	-7(%eax), %ecx
+	movl	%ecx, -7(%edx)
+	movzwl	-3(%eax), %ecx
+	movzbl	-1(%eax), %eax
+	movw	%cx, -3(%edx)
+	movb	%al, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_39bytes_align):
+	movdqa	-39(%eax), %xmm0
+	movdqa	%xmm0, -39(%edx)
+L(fwd_write_23bytes_align):
+	movdqa	-23(%eax), %xmm0
+	movdqa	%xmm0, -23(%edx)
+	movl	-7(%eax), %ecx
+	movl	%ecx, -7(%edx)
+	movzwl	-3(%eax), %ecx
+	movzbl	-1(%eax), %eax
+	movw	%cx, -3(%edx)
+	movb	%al, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_43bytes_align):
+	movdqa	-43(%eax), %xmm0
+	movdqa	%xmm0, -43(%edx)
+L(fwd_write_27bytes_align):
+	movdqa	-27(%eax), %xmm0
+	movdqa	%xmm0, -27(%edx)
+L(fwd_write_11bytes_align):
+	movq	-11(%eax), %xmm0
+	movq	%xmm0, -11(%edx)
+L(fwd_write_3bytes_align):
+	movzwl	-3(%eax), %ecx
+	movzbl	-1(%eax), %eax
+	movw	%cx, -3(%edx)
+	movb	%al, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_35bytes_align):
+	movdqa	-35(%eax), %xmm0
+	movdqa	%xmm0, -35(%edx)
+L(fwd_write_19bytes_align):
+	movdqa	-19(%eax), %xmm0
+	movdqa	%xmm0, -19(%edx)
+	movzwl	-3(%eax), %ecx
+	movzbl	-1(%eax), %eax
+	movw	%cx, -3(%edx)
+	movb	%al, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_44bytes_align):
+	movdqa	-44(%eax), %xmm0
+	movdqa	%xmm0, -44(%edx)
+L(fwd_write_28bytes_align):
+	movdqa	-28(%eax), %xmm0
+	movdqa	%xmm0, -28(%edx)
+L(fwd_write_12bytes_align):
+	movq	-12(%eax), %xmm0
+	movq	%xmm0, -12(%edx)
+L(fwd_write_4bytes_align):
+	movl	-4(%eax), %ecx
+	movl	%ecx, -4(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(fwd_write_36bytes_align):
+	movdqa	-36(%eax), %xmm0
+	movdqa	%xmm0, -36(%edx)
+L(fwd_write_20bytes_align):
+	movdqa	-20(%eax), %xmm0
+	movdqa	%xmm0, -20(%edx)
+	movl	-4(%eax), %ecx
+	movl	%ecx, -4(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+	movl	%edx, %eax
+# else
+	movl	DEST(%esp), %eax
+# endif
+#endif
 	RETURN_END
 
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(large_page):
 	movdqu	(%eax), %xmm1
+#ifdef USE_AS_MEMMOVE
+	movl	DEST+4(%esp), %edi
+	movdqu	%xmm0, (%edi)
+#endif
 	lea	16(%eax), %eax
-	movdqu	%xmm0, (%esi)
 	movntdq	%xmm1, (%edx)
 	lea	16(%edx), %edx
-	POP (%esi)
 	lea	-0x90(%ecx), %ecx
 	POP (%edi)
+
+	.p2align 4
 L(large_page_loop):
 	movdqu	(%eax), %xmm0
 	movdqu	0x10(%eax), %xmm1
@@ -1371,38 +2664,22 @@
 	sfence
 	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
 
-
-	ALIGN (4)
+	.p2align 4
 L(bk_write_44bytes):
-	movl	40(%eax), %ecx
-	movl	%ecx, 40(%edx)
-L(bk_write_40bytes):
-	movl	36(%eax), %ecx
-	movl	%ecx, 36(%edx)
+	movq	36(%eax), %xmm0
+	movq	%xmm0, 36(%edx)
 L(bk_write_36bytes):
-	movl	32(%eax), %ecx
-	movl	%ecx, 32(%edx)
-L(bk_write_32bytes):
-	movl	28(%eax), %ecx
-	movl	%ecx, 28(%edx)
+	movq	28(%eax), %xmm0
+	movq	%xmm0, 28(%edx)
 L(bk_write_28bytes):
-	movl	24(%eax), %ecx
-	movl	%ecx, 24(%edx)
-L(bk_write_24bytes):
-	movl	20(%eax), %ecx
-	movl	%ecx, 20(%edx)
+	movq	20(%eax), %xmm0
+	movq	%xmm0, 20(%edx)
 L(bk_write_20bytes):
-	movl	16(%eax), %ecx
-	movl	%ecx, 16(%edx)
-L(bk_write_16bytes):
-	movl	12(%eax), %ecx
-	movl	%ecx, 12(%edx)
+	movq	12(%eax), %xmm0
+	movq	%xmm0, 12(%edx)
 L(bk_write_12bytes):
-	movl	8(%eax), %ecx
-	movl	%ecx, 8(%edx)
-L(bk_write_8bytes):
-	movl	4(%eax), %ecx
-	movl	%ecx, 4(%edx)
+	movq	4(%eax), %xmm0
+	movq	%xmm0, 4(%edx)
 L(bk_write_4bytes):
 	movl	(%eax), %ecx
 	movl	%ecx, (%edx)
@@ -1416,37 +2693,47 @@
 #endif
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
+L(bk_write_40bytes):
+	movq	32(%eax), %xmm0
+	movq	%xmm0, 32(%edx)
+L(bk_write_32bytes):
+	movq	24(%eax), %xmm0
+	movq	%xmm0, 24(%edx)
+L(bk_write_24bytes):
+	movq	16(%eax), %xmm0
+	movq	%xmm0, 16(%edx)
+L(bk_write_16bytes):
+	movq	8(%eax), %xmm0
+	movq	%xmm0, 8(%edx)
+L(bk_write_8bytes):
+	movq	(%eax), %xmm0
+	movq	%xmm0, (%edx)
+#ifndef USE_AS_BCOPY
+	movl	DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+	movl	LEN(%esp), %ecx
+	add	%ecx, %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
 L(bk_write_45bytes):
-	movl	41(%eax), %ecx
-	movl	%ecx, 41(%edx)
-L(bk_write_41bytes):
-	movl	37(%eax), %ecx
-	movl	%ecx, 37(%edx)
+	movq	37(%eax), %xmm0
+	movq	%xmm0, 37(%edx)
 L(bk_write_37bytes):
-	movl	33(%eax), %ecx
-	movl	%ecx, 33(%edx)
-L(bk_write_33bytes):
-	movl	29(%eax), %ecx
-	movl	%ecx, 29(%edx)
+	movq	29(%eax), %xmm0
+	movq	%xmm0, 29(%edx)
 L(bk_write_29bytes):
-	movl	25(%eax), %ecx
-	movl	%ecx, 25(%edx)
-L(bk_write_25bytes):
-	movl	21(%eax), %ecx
-	movl	%ecx, 21(%edx)
+	movq	21(%eax), %xmm0
+	movq	%xmm0, 21(%edx)
 L(bk_write_21bytes):
-	movl	17(%eax), %ecx
-	movl	%ecx, 17(%edx)
-L(bk_write_17bytes):
-	movl	13(%eax), %ecx
-	movl	%ecx, 13(%edx)
+	movq	13(%eax), %xmm0
+	movq	%xmm0, 13(%edx)
 L(bk_write_13bytes):
-	movl	9(%eax), %ecx
-	movl	%ecx, 9(%edx)
-L(bk_write_9bytes):
-	movl	5(%eax), %ecx
-	movl	%ecx, 5(%edx)
+	movq	5(%eax), %xmm0
+	movq	%xmm0, 5(%edx)
 L(bk_write_5bytes):
 	movl	1(%eax), %ecx
 	movl	%ecx, 1(%edx)
@@ -1462,40 +2749,79 @@
 #endif
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
+L(bk_write_41bytes):
+	movq	33(%eax), %xmm0
+	movq	%xmm0, 33(%edx)
+L(bk_write_33bytes):
+	movq	25(%eax), %xmm0
+	movq	%xmm0, 25(%edx)
+L(bk_write_25bytes):
+	movq	17(%eax), %xmm0
+	movq	%xmm0, 17(%edx)
+L(bk_write_17bytes):
+	movq	9(%eax), %xmm0
+	movq	%xmm0, 9(%edx)
+L(bk_write_9bytes):
+	movq	1(%eax), %xmm0
+	movq	%xmm0, 1(%edx)
+	movzbl	(%eax), %ecx
+	movb	%cl, (%edx)
+#ifndef USE_AS_BCOPY
+	movl	DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+	movl	LEN(%esp), %ecx
+	add	%ecx, %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
 L(bk_write_46bytes):
-	movl	42(%eax), %ecx
-	movl	%ecx, 42(%edx)
-L(bk_write_42bytes):
-	movl	38(%eax), %ecx
-	movl	%ecx, 38(%edx)
+	movq	38(%eax), %xmm0
+	movq	%xmm0, 38(%edx)
 L(bk_write_38bytes):
-	movl	34(%eax), %ecx
-	movl	%ecx, 34(%edx)
-L(bk_write_34bytes):
-	movl	30(%eax), %ecx
-	movl	%ecx, 30(%edx)
+	movq	30(%eax), %xmm0
+	movq	%xmm0, 30(%edx)
 L(bk_write_30bytes):
-	movl	26(%eax), %ecx
-	movl	%ecx, 26(%edx)
-L(bk_write_26bytes):
-	movl	22(%eax), %ecx
-	movl	%ecx, 22(%edx)
+	movq	22(%eax), %xmm0
+	movq	%xmm0, 22(%edx)
 L(bk_write_22bytes):
-	movl	18(%eax), %ecx
-	movl	%ecx, 18(%edx)
-L(bk_write_18bytes):
-	movl	14(%eax), %ecx
-	movl	%ecx, 14(%edx)
+	movq	14(%eax), %xmm0
+	movq	%xmm0, 14(%edx)
 L(bk_write_14bytes):
-	movl	10(%eax), %ecx
-	movl	%ecx, 10(%edx)
-L(bk_write_10bytes):
-	movl	6(%eax), %ecx
-	movl	%ecx, 6(%edx)
+	movq	6(%eax), %xmm0
+	movq	%xmm0, 6(%edx)
 L(bk_write_6bytes):
 	movl	2(%eax), %ecx
 	movl	%ecx, 2(%edx)
+	movzwl	(%eax), %ecx
+	movw	%cx, (%edx)
+#ifndef USE_AS_BCOPY
+	movl	DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+	movl	LEN(%esp), %ecx
+	add	%ecx, %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(bk_write_42bytes):
+	movq	34(%eax), %xmm0
+	movq	%xmm0, 34(%edx)
+L(bk_write_34bytes):
+	movq	26(%eax), %xmm0
+	movq	%xmm0, 26(%edx)
+L(bk_write_26bytes):
+	movq	18(%eax), %xmm0
+	movq	%xmm0, 18(%edx)
+L(bk_write_18bytes):
+	movq	10(%eax), %xmm0
+	movq	%xmm0, 10(%edx)
+L(bk_write_10bytes):
+	movq	2(%eax), %xmm0
+	movq	%xmm0, 2(%edx)
 L(bk_write_2bytes):
 	movzwl	(%eax), %ecx
 	movw	%cx, (%edx)
@@ -1508,40 +2834,54 @@
 #endif
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
 L(bk_write_47bytes):
-	movl	43(%eax), %ecx
-	movl	%ecx, 43(%edx)
-L(bk_write_43bytes):
-	movl	39(%eax), %ecx
-	movl	%ecx, 39(%edx)
+	movq	39(%eax), %xmm0
+	movq	%xmm0, 39(%edx)
 L(bk_write_39bytes):
-	movl	35(%eax), %ecx
-	movl	%ecx, 35(%edx)
-L(bk_write_35bytes):
-	movl	31(%eax), %ecx
-	movl	%ecx, 31(%edx)
+	movq	31(%eax), %xmm0
+	movq	%xmm0, 31(%edx)
 L(bk_write_31bytes):
-	movl	27(%eax), %ecx
-	movl	%ecx, 27(%edx)
-L(bk_write_27bytes):
-	movl	23(%eax), %ecx
-	movl	%ecx, 23(%edx)
+	movq	23(%eax), %xmm0
+	movq	%xmm0, 23(%edx)
 L(bk_write_23bytes):
-	movl	19(%eax), %ecx
-	movl	%ecx, 19(%edx)
-L(bk_write_19bytes):
-	movl	15(%eax), %ecx
-	movl	%ecx, 15(%edx)
+	movq	15(%eax), %xmm0
+	movq	%xmm0, 15(%edx)
 L(bk_write_15bytes):
-	movl	11(%eax), %ecx
-	movl	%ecx, 11(%edx)
-L(bk_write_11bytes):
-	movl	7(%eax), %ecx
-	movl	%ecx, 7(%edx)
+	movq	7(%eax), %xmm0
+	movq	%xmm0, 7(%edx)
 L(bk_write_7bytes):
 	movl	3(%eax), %ecx
 	movl	%ecx, 3(%edx)
+	movzwl	1(%eax), %ecx
+	movw	%cx, 1(%edx)
+	movzbl	(%eax), %eax
+	movb	%al, (%edx)
+#ifndef USE_AS_BCOPY
+	movl	DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+	movl	LEN(%esp), %ecx
+	add	%ecx, %eax
+# endif
+#endif
+	RETURN
+
+	.p2align 4
+L(bk_write_43bytes):
+	movq	35(%eax), %xmm0
+	movq	%xmm0, 35(%edx)
+L(bk_write_35bytes):
+	movq	27(%eax), %xmm0
+	movq	%xmm0, 27(%edx)
+L(bk_write_27bytes):
+	movq	19(%eax), %xmm0
+	movq	%xmm0, 19(%edx)
+L(bk_write_19bytes):
+	movq	11(%eax), %xmm0
+	movq	%xmm0, 11(%edx)
+L(bk_write_11bytes):
+	movq	3(%eax), %xmm0
+	movq	%xmm0, 3(%edx)
 L(bk_write_3bytes):
 	movzwl	1(%eax), %ecx
 	movw	%cx, 1(%edx)
@@ -1558,7 +2898,7 @@
 
 
 	.pushsection .rodata.ssse3,"a",@progbits
-	ALIGN (2)
+	.p2align 2
 L(table_48bytes_fwd):
 	.int	JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
 	.int	JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
@@ -1609,7 +2949,58 @@
 	.int	JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
 	.int	JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
 
-	ALIGN (2)
+	.p2align 2
+L(table_48bytes_fwd_align):
+	.int	JMPTBL (L(fwd_write_0bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_1bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_2bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_3bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_4bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_5bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_6bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_7bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_8bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_9bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_10bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_11bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_12bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_13bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_14bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_15bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_16bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_17bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_18bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_19bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_20bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_21bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_22bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_23bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_24bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_25bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_26bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_27bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_28bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_29bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_30bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_31bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_32bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_33bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_34bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_35bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_36bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_37bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_38bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_39bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_40bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_41bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_42bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_43bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_44bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_45bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_46bytes_align), L(table_48bytes_fwd_align))
+	.int	JMPTBL (L(fwd_write_47bytes_align), L(table_48bytes_fwd_align))
+
+	.p2align 2
 L(shl_table):
 	.int	JMPTBL (L(shl_0), L(shl_table))
 	.int	JMPTBL (L(shl_1), L(shl_table))
@@ -1628,7 +3019,7 @@
 	.int	JMPTBL (L(shl_14), L(shl_table))
 	.int	JMPTBL (L(shl_15), L(shl_table))
 
-	ALIGN (2)
+	.p2align 2
 L(table_48_bytes_bwd):
 	.int	JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
 	.int	JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
@@ -1682,12 +3073,12 @@
 	.popsection
 
 #ifdef USE_AS_MEMMOVE
-	ALIGN (4)
+	.p2align 4
 L(copy_backward):
-	PUSH (%esi)
-	movl	%eax, %esi
+	PUSH (%edi)
+	movl	%eax, %edi
 	lea	(%ecx,%edx,1),%edx
-	lea	(%ecx,%esi,1),%esi
+	lea	(%ecx,%edi,1),%edi
 	testl	$0x3, %edx
 	jnz	L(bk_align)
 
@@ -1702,60 +3093,53 @@
 L(bk_write_more32bytes):
 	/* Copy 32 bytes at a time.  */
 	sub	$32, %ecx
-	movl	-4(%esi), %eax
-	movl	%eax, -4(%edx)
-	movl	-8(%esi), %eax
-	movl	%eax, -8(%edx)
-	movl	-12(%esi), %eax
-	movl	%eax, -12(%edx)
-	movl	-16(%esi), %eax
-	movl	%eax, -16(%edx)
-	movl	-20(%esi), %eax
-	movl	%eax, -20(%edx)
-	movl	-24(%esi), %eax
-	movl	%eax, -24(%edx)
-	movl	-28(%esi), %eax
-	movl	%eax, -28(%edx)
-	movl	-32(%esi), %eax
-	movl	%eax, -32(%edx)
+	movq	-8(%edi), %xmm0
+	movq	%xmm0, -8(%edx)
+	movq	-16(%edi), %xmm0
+	movq	%xmm0, -16(%edx)
+	movq	-24(%edi), %xmm0
+	movq	%xmm0, -24(%edx)
+	movq	-32(%edi), %xmm0
+	movq	%xmm0, -32(%edx)
 	sub	$32, %edx
-	sub	$32, %esi
+	sub	$32, %edi
 
 L(bk_write_less32bytes):
-	movl	%esi, %eax
+	movl	%edi, %eax
 	sub	%ecx, %edx
 	sub	%ecx, %eax
-	POP (%esi)
+	POP (%edi)
 L(bk_write_less32bytes_2):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
 
-	CFI_PUSH (%esi)
-	ALIGN (4)
+	CFI_PUSH (%edi)
+
+	.p2align 4
 L(bk_align):
 	cmp	$8, %ecx
 	jbe	L(bk_write_less32bytes)
 	testl	$1, %edx
 	/* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
-	   then (EDX & 2) must be != 0.  */
+	then	(EDX & 2) must be != 0.  */
 	jz	L(bk_got2)
-	sub	$1, %esi
+	sub	$1, %edi
 	sub	$1, %ecx
 	sub	$1, %edx
-	movzbl	(%esi), %eax
+	movzbl	(%edi), %eax
 	movb	%al, (%edx)
 
 	testl	$2, %edx
 	jz	L(bk_aligned_4)
 
 L(bk_got2):
-	sub	$2, %esi
+	sub	$2, %edi
 	sub	$2, %ecx
 	sub	$2, %edx
-	movzwl	(%esi), %eax
+	movzwl	(%edi), %eax
 	movw	%ax, (%edx)
 	jmp	L(bk_aligned_4)
 
-	ALIGN (4)
+	.p2align 4
 L(bk_write_more64bytes):
 	/* Check alignment of last byte.  */
 	testl	$15, %edx
@@ -1763,45 +3147,46 @@
 
 /* EDX is aligned 4 bytes, but not 16 bytes.  */
 L(bk_ssse3_align):
-	sub	$4, %esi
+	sub	$4, %edi
 	sub	$4, %ecx
 	sub	$4, %edx
-	movl	(%esi), %eax
+	movl	(%edi), %eax
 	movl	%eax, (%edx)
 
 	testl	$15, %edx
 	jz	L(bk_ssse3_cpy_pre)
 
-	sub	$4, %esi
+	sub	$4, %edi
 	sub	$4, %ecx
 	sub	$4, %edx
-	movl	(%esi), %eax
+	movl	(%edi), %eax
 	movl	%eax, (%edx)
 
 	testl	$15, %edx
 	jz	L(bk_ssse3_cpy_pre)
 
-	sub	$4, %esi
+	sub	$4, %edi
 	sub	$4, %ecx
 	sub	$4, %edx
-	movl	(%esi), %eax
+	movl	(%edi), %eax
 	movl	%eax, (%edx)
 
 L(bk_ssse3_cpy_pre):
 	cmp	$64, %ecx
 	jb	L(bk_write_more32bytes)
 
+	.p2align 4
 L(bk_ssse3_cpy):
-	sub	$64, %esi
+	sub	$64, %edi
 	sub	$64, %ecx
 	sub	$64, %edx
-	movdqu	0x30(%esi), %xmm3
+	movdqu	0x30(%edi), %xmm3
 	movdqa	%xmm3, 0x30(%edx)
-	movdqu	0x20(%esi), %xmm2
+	movdqu	0x20(%edi), %xmm2
 	movdqa	%xmm2, 0x20(%edx)
-	movdqu	0x10(%esi), %xmm1
+	movdqu	0x10(%edi), %xmm1
 	movdqa	%xmm1, 0x10(%edx)
-	movdqu	(%esi), %xmm0
+	movdqu	(%edi), %xmm0
 	movdqa	%xmm0, (%edx)
 	cmp	$64, %ecx
 	jae	L(bk_ssse3_cpy)
diff --git a/libc/bionic/pthread.c b/libc/bionic/pthread.c
index f611a21..c075d51 100644
--- a/libc/bionic/pthread.c
+++ b/libc/bionic/pthread.c
@@ -576,6 +576,17 @@
         _pthread_internal_remove(thread);
         _pthread_internal_free(thread);
     } else {
+        pthread_mutex_lock(&gThreadListLock);
+
+       /* make sure that the thread struct doesn't have stale pointers to a stack that
+        * will be unmapped after the exit call below.
+        */
+        if (!user_stack) {
+            thread->attr.stack_base = NULL;
+            thread->attr.stack_size = 0;
+            thread->tls = NULL;
+        }
+
        /* the join_count field is used to store the number of threads waiting for
         * the termination of this thread with pthread_join(),
         *
@@ -588,7 +599,6 @@
         * is gone (as well as its TLS area). when another thread calls pthread_join()
         * on it, it will immediately free the thread and return.
         */
-        pthread_mutex_lock(&gThreadListLock);
         thread->return_value = retval;
         if (thread->join_count > 0) {
             pthread_cond_broadcast(&thread->join_cond);
@@ -1712,7 +1722,9 @@
          * similarly, it is possible to have thr->tls == NULL for threads that
          * were just recently created through pthread_create() but whose
          * startup trampoline (__thread_entry) hasn't been run yet by the
-         * scheduler. so check for this too.
+         * scheduler. thr->tls will also be NULL after it's stack has been
+         * unmapped but before the ongoing pthread_join() is finished.
+         * so check for this too.
          */
         if (thr->join_count < 0 || !thr->tls)
             continue;
diff --git a/libc/include/sys/linux-syscalls.h b/libc/include/sys/linux-syscalls.h
index 0cb3100..de64c6b 100644
--- a/libc/include/sys/linux-syscalls.h
+++ b/libc/include/sys/linux-syscalls.h
@@ -1,17 +1,93 @@
 /* auto-generated by gensyscalls.py, do not touch */
 #ifndef _BIONIC_LINUX_SYSCALLS_H_
+#define _BIONIC_LINUX_SYSCALLS_H_
 
-#if !defined __ASM_ARM_UNISTD_H && !defined __ASM_I386_UNISTD_H
+#if !defined __ASM_ARM_UNISTD_H && !defined __ASM_I386_UNISTD_H && !defined __ASM_MIPS_UNISTD_H
 #if defined __arm__ && !defined __ARM_EABI__ && !defined __thumb__
-  #  define __NR_SYSCALL_BASE  0x900000
-  #else
-  #  define  __NR_SYSCALL_BASE  0
-  #endif
+  #  define __NR_SYSCALL_BASE 0x900000
+#elif defined(__mips__)
+  #  define __NR_SYSCALL_BASE 4000
+#else
+  #  define __NR_SYSCALL_BASE 0
+#endif
 
 #define __NR_exit                         (__NR_SYSCALL_BASE + 1)
 #define __NR_fork                         (__NR_SYSCALL_BASE + 2)
-#define __NR_clone                        (__NR_SYSCALL_BASE + 120)
+#define __NR_read                         (__NR_SYSCALL_BASE + 3)
+#define __NR_write                        (__NR_SYSCALL_BASE + 4)
+#define __NR_open                         (__NR_SYSCALL_BASE + 5)
+#define __NR_close                        (__NR_SYSCALL_BASE + 6)
+#define __NR_link                         (__NR_SYSCALL_BASE + 9)
+#define __NR_unlink                       (__NR_SYSCALL_BASE + 10)
 #define __NR_execve                       (__NR_SYSCALL_BASE + 11)
+#define __NR_chdir                        (__NR_SYSCALL_BASE + 12)
+#define __NR_mknod                        (__NR_SYSCALL_BASE + 14)
+#define __NR_chmod                        (__NR_SYSCALL_BASE + 15)
+#define __NR_lseek                        (__NR_SYSCALL_BASE + 19)
+#define __NR_getpid                       (__NR_SYSCALL_BASE + 20)
+#define __NR_mount                        (__NR_SYSCALL_BASE + 21)
+#define __NR_ptrace                       (__NR_SYSCALL_BASE + 26)
+#define __NR_pause                        (__NR_SYSCALL_BASE + 29)
+#define __NR_access                       (__NR_SYSCALL_BASE + 33)
+#define __NR_sync                         (__NR_SYSCALL_BASE + 36)
+#define __NR_rename                       (__NR_SYSCALL_BASE + 38)
+#define __NR_mkdir                        (__NR_SYSCALL_BASE + 39)
+#define __NR_rmdir                        (__NR_SYSCALL_BASE + 40)
+#define __NR_dup                          (__NR_SYSCALL_BASE + 41)
+#define __NR_times                        (__NR_SYSCALL_BASE + 43)
+#define __NR_brk                          (__NR_SYSCALL_BASE + 45)
+#define __NR_acct                         (__NR_SYSCALL_BASE + 51)
+#define __NR_umount2                      (__NR_SYSCALL_BASE + 52)
+#define __NR_ioctl                        (__NR_SYSCALL_BASE + 54)
+#define __NR_fcntl                        (__NR_SYSCALL_BASE + 55)
+#define __NR_setpgid                      (__NR_SYSCALL_BASE + 57)
+#define __NR_umask                        (__NR_SYSCALL_BASE + 60)
+#define __NR_chroot                       (__NR_SYSCALL_BASE + 61)
+#define __NR_dup2                         (__NR_SYSCALL_BASE + 63)
+#define __NR_getppid                      (__NR_SYSCALL_BASE + 64)
+#define __NR_setsid                       (__NR_SYSCALL_BASE + 66)
+#define __NR_sigaction                    (__NR_SYSCALL_BASE + 67)
+#define __NR_sigsuspend                   (__NR_SYSCALL_BASE + 72)
+#define __NR_sigpending                   (__NR_SYSCALL_BASE + 73)
+#define __NR_setrlimit                    (__NR_SYSCALL_BASE + 75)
+#define __NR_getrusage                    (__NR_SYSCALL_BASE + 77)
+#define __NR_gettimeofday                 (__NR_SYSCALL_BASE + 78)
+#define __NR_settimeofday                 (__NR_SYSCALL_BASE + 79)
+#define __NR_symlink                      (__NR_SYSCALL_BASE + 83)
+#define __NR_readlink                     (__NR_SYSCALL_BASE + 85)
+#define __NR_reboot                       (__NR_SYSCALL_BASE + 88)
+#define __NR_munmap                       (__NR_SYSCALL_BASE + 91)
+#define __NR_truncate                     (__NR_SYSCALL_BASE + 92)
+#define __NR_ftruncate                    (__NR_SYSCALL_BASE + 93)
+#define __NR_fchmod                       (__NR_SYSCALL_BASE + 94)
+#define __NR_getpriority                  (__NR_SYSCALL_BASE + 96)
+#define __NR_setpriority                  (__NR_SYSCALL_BASE + 97)
+#define __NR_syslog                       (__NR_SYSCALL_BASE + 103)
+#define __NR_syslog                       (__NR_SYSCALL_BASE + 103)
+#define __NR_setitimer                    (__NR_SYSCALL_BASE + 104)
+#define __NR_getitimer                    (__NR_SYSCALL_BASE + 105)
+#define __NR_wait4                        (__NR_SYSCALL_BASE + 114)
+#define __NR_sysinfo                      (__NR_SYSCALL_BASE + 116)
+#define __NR_fsync                        (__NR_SYSCALL_BASE + 118)
+#define __NR_clone                        (__NR_SYSCALL_BASE + 120)
+#define __NR_uname                        (__NR_SYSCALL_BASE + 122)
+#define __NR_mprotect                     (__NR_SYSCALL_BASE + 125)
+#define __NR_sigprocmask                  (__NR_SYSCALL_BASE + 126)
+#define __NR_init_module                  (__NR_SYSCALL_BASE + 128)
+#define __NR_delete_module                (__NR_SYSCALL_BASE + 129)
+#define __NR_getpgid                      (__NR_SYSCALL_BASE + 132)
+#define __NR_fchdir                       (__NR_SYSCALL_BASE + 133)
+#define __NR_personality                  (__NR_SYSCALL_BASE + 136)
+#define __NR__llseek                      (__NR_SYSCALL_BASE + 140)
+#define __NR__newselect                   (__NR_SYSCALL_BASE + 142)
+#define __NR_flock                        (__NR_SYSCALL_BASE + 143)
+#define __NR_msync                        (__NR_SYSCALL_BASE + 144)
+#define __NR_readv                        (__NR_SYSCALL_BASE + 145)
+#define __NR_writev                       (__NR_SYSCALL_BASE + 146)
+
+#ifdef __arm__
+#define __NR_exit_group                   (__NR_SYSCALL_BASE + 248)
+#define __NR_waitid                       (__NR_SYSCALL_BASE + 280)
 #define __NR_setuid32                     (__NR_SYSCALL_BASE + 213)
 #define __NR_getuid32                     (__NR_SYSCALL_BASE + 199)
 #define __NR_getgid32                     (__NR_SYSCALL_BASE + 200)
@@ -22,88 +98,52 @@
 #define __NR_gettid                       (__NR_SYSCALL_BASE + 224)
 #define __NR_readahead                    (__NR_SYSCALL_BASE + 225)
 #define __NR_getgroups32                  (__NR_SYSCALL_BASE + 205)
-#define __NR_getpgid                      (__NR_SYSCALL_BASE + 132)
-#define __NR_getppid                      (__NR_SYSCALL_BASE + 64)
-#define __NR_setsid                       (__NR_SYSCALL_BASE + 66)
 #define __NR_setgid32                     (__NR_SYSCALL_BASE + 214)
 #define __NR_setreuid32                   (__NR_SYSCALL_BASE + 203)
 #define __NR_setresuid32                  (__NR_SYSCALL_BASE + 208)
 #define __NR_setresgid32                  (__NR_SYSCALL_BASE + 210)
-#define __NR_brk                          (__NR_SYSCALL_BASE + 45)
-#define __NR_ptrace                       (__NR_SYSCALL_BASE + 26)
-#define __NR_getpriority                  (__NR_SYSCALL_BASE + 96)
-#define __NR_setpriority                  (__NR_SYSCALL_BASE + 97)
-#define __NR_setrlimit                    (__NR_SYSCALL_BASE + 75)
 #define __NR_ugetrlimit                   (__NR_SYSCALL_BASE + 191)
-#define __NR_getrusage                    (__NR_SYSCALL_BASE + 77)
 #define __NR_setgroups32                  (__NR_SYSCALL_BASE + 206)
-#define __NR_setpgid                      (__NR_SYSCALL_BASE + 57)
+#define __NR_vfork                        (__NR_SYSCALL_BASE + 190)
 #define __NR_setregid32                   (__NR_SYSCALL_BASE + 204)
-#define __NR_chroot                       (__NR_SYSCALL_BASE + 61)
 #define __NR_prctl                        (__NR_SYSCALL_BASE + 172)
 #define __NR_capget                       (__NR_SYSCALL_BASE + 184)
 #define __NR_capset                       (__NR_SYSCALL_BASE + 185)
 #define __NR_sigaltstack                  (__NR_SYSCALL_BASE + 186)
-#define __NR_acct                         (__NR_SYSCALL_BASE + 51)
-#define __NR_read                         (__NR_SYSCALL_BASE + 3)
-#define __NR_write                        (__NR_SYSCALL_BASE + 4)
 #define __NR_pread64                      (__NR_SYSCALL_BASE + 180)
 #define __NR_pwrite64                     (__NR_SYSCALL_BASE + 181)
-#define __NR_open                         (__NR_SYSCALL_BASE + 5)
-#define __NR_close                        (__NR_SYSCALL_BASE + 6)
-#define __NR_lseek                        (__NR_SYSCALL_BASE + 19)
-#define __NR__llseek                      (__NR_SYSCALL_BASE + 140)
-#define __NR_getpid                       (__NR_SYSCALL_BASE + 20)
+#define __NR_openat                       (__NR_SYSCALL_BASE + 322)
 #define __NR_mmap2                        (__NR_SYSCALL_BASE + 192)
-#define __NR_munmap                       (__NR_SYSCALL_BASE + 91)
 #define __NR_mremap                       (__NR_SYSCALL_BASE + 163)
-#define __NR_msync                        (__NR_SYSCALL_BASE + 144)
-#define __NR_mprotect                     (__NR_SYSCALL_BASE + 125)
+#define __NR_madvise                      (__NR_SYSCALL_BASE + 220)
 #define __NR_mlock                        (__NR_SYSCALL_BASE + 150)
 #define __NR_munlock                      (__NR_SYSCALL_BASE + 151)
-#define __NR_ioctl                        (__NR_SYSCALL_BASE + 54)
-#define __NR_readv                        (__NR_SYSCALL_BASE + 145)
-#define __NR_writev                       (__NR_SYSCALL_BASE + 146)
-#define __NR_fcntl                        (__NR_SYSCALL_BASE + 55)
-#define __NR_flock                        (__NR_SYSCALL_BASE + 143)
-#define __NR_fchmod                       (__NR_SYSCALL_BASE + 94)
-#define __NR_dup                          (__NR_SYSCALL_BASE + 41)
+#define __NR_mincore                      (__NR_SYSCALL_BASE + 219)
 #define __NR_pipe                         (__NR_SYSCALL_BASE + 42)
-#define __NR_dup2                         (__NR_SYSCALL_BASE + 63)
-#define __NR__newselect                   (__NR_SYSCALL_BASE + 142)
-#define __NR_ftruncate                    (__NR_SYSCALL_BASE + 93)
+#define __NR_pipe2                        (__NR_SYSCALL_BASE + 359)
 #define __NR_ftruncate64                  (__NR_SYSCALL_BASE + 194)
-#define __NR_fsync                        (__NR_SYSCALL_BASE + 118)
+#define __NR_getdents64                   (__NR_SYSCALL_BASE + 217)
 #define __NR_fdatasync                    (__NR_SYSCALL_BASE + 148)
 #define __NR_fchown32                     (__NR_SYSCALL_BASE + 207)
-#define __NR_sync                         (__NR_SYSCALL_BASE + 36)
 #define __NR_fcntl64                      (__NR_SYSCALL_BASE + 221)
+#define __NR_fstatfs64                    (__NR_SYSCALL_BASE + 267)
 #define __NR_sendfile                     (__NR_SYSCALL_BASE + 187)
+#define __NR_fstatat64                    (__NR_SYSCALL_BASE + 327)
+#define __NR_mkdirat                      (__NR_SYSCALL_BASE + 323)
+#define __NR_fchownat                     (__NR_SYSCALL_BASE + 325)
+#define __NR_fchmodat                     (__NR_SYSCALL_BASE + 333)
+#define __NR_renameat                     (__NR_SYSCALL_BASE + 329)
 #define __NR_fsetxattr                    (__NR_SYSCALL_BASE + 228)
 #define __NR_fgetxattr                    (__NR_SYSCALL_BASE + 231)
 #define __NR_flistxattr                   (__NR_SYSCALL_BASE + 234)
 #define __NR_fremovexattr                 (__NR_SYSCALL_BASE + 237)
-#define __NR_link                         (__NR_SYSCALL_BASE + 9)
-#define __NR_unlink                       (__NR_SYSCALL_BASE + 10)
-#define __NR_chdir                        (__NR_SYSCALL_BASE + 12)
-#define __NR_mknod                        (__NR_SYSCALL_BASE + 14)
-#define __NR_chmod                        (__NR_SYSCALL_BASE + 15)
+#define __NR_unlinkat                     (__NR_SYSCALL_BASE + 328)
 #define __NR_chown32                      (__NR_SYSCALL_BASE + 212)
 #define __NR_lchown32                     (__NR_SYSCALL_BASE + 198)
-#define __NR_mount                        (__NR_SYSCALL_BASE + 21)
-#define __NR_umount2                      (__NR_SYSCALL_BASE + 52)
 #define __NR_fstat64                      (__NR_SYSCALL_BASE + 197)
 #define __NR_stat64                       (__NR_SYSCALL_BASE + 195)
 #define __NR_lstat64                      (__NR_SYSCALL_BASE + 196)
-#define __NR_mkdir                        (__NR_SYSCALL_BASE + 39)
-#define __NR_readlink                     (__NR_SYSCALL_BASE + 85)
-#define __NR_rmdir                        (__NR_SYSCALL_BASE + 40)
-#define __NR_rename                       (__NR_SYSCALL_BASE + 38)
 #define __NR_getcwd                       (__NR_SYSCALL_BASE + 183)
-#define __NR_access                       (__NR_SYSCALL_BASE + 33)
-#define __NR_symlink                      (__NR_SYSCALL_BASE + 83)
-#define __NR_fchdir                       (__NR_SYSCALL_BASE + 133)
-#define __NR_truncate                     (__NR_SYSCALL_BASE + 92)
 #define __NR_setxattr                     (__NR_SYSCALL_BASE + 226)
 #define __NR_lsetxattr                    (__NR_SYSCALL_BASE + 227)
 #define __NR_getxattr                     (__NR_SYSCALL_BASE + 229)
@@ -112,60 +152,8 @@
 #define __NR_llistxattr                   (__NR_SYSCALL_BASE + 233)
 #define __NR_removexattr                  (__NR_SYSCALL_BASE + 235)
 #define __NR_lremovexattr                 (__NR_SYSCALL_BASE + 236)
-#define __NR_pause                        (__NR_SYSCALL_BASE + 29)
-#define __NR_gettimeofday                 (__NR_SYSCALL_BASE + 78)
-#define __NR_settimeofday                 (__NR_SYSCALL_BASE + 79)
-#define __NR_times                        (__NR_SYSCALL_BASE + 43)
-#define __NR_nanosleep                    (__NR_SYSCALL_BASE + 162)
-#define __NR_getitimer                    (__NR_SYSCALL_BASE + 105)
-#define __NR_setitimer                    (__NR_SYSCALL_BASE + 104)
-#define __NR_sigaction                    (__NR_SYSCALL_BASE + 67)
-#define __NR_sigprocmask                  (__NR_SYSCALL_BASE + 126)
-#define __NR_sigsuspend                   (__NR_SYSCALL_BASE + 72)
-#define __NR_rt_sigaction                 (__NR_SYSCALL_BASE + 174)
-#define __NR_rt_sigprocmask               (__NR_SYSCALL_BASE + 175)
-#define __NR_rt_sigtimedwait              (__NR_SYSCALL_BASE + 177)
-#define __NR_sigpending                   (__NR_SYSCALL_BASE + 73)
-#define __NR_sched_setscheduler           (__NR_SYSCALL_BASE + 156)
-#define __NR_sched_getscheduler           (__NR_SYSCALL_BASE + 157)
-#define __NR_sched_yield                  (__NR_SYSCALL_BASE + 158)
-#define __NR_sched_setparam               (__NR_SYSCALL_BASE + 154)
-#define __NR_sched_getparam               (__NR_SYSCALL_BASE + 155)
-#define __NR_sched_get_priority_max       (__NR_SYSCALL_BASE + 159)
-#define __NR_sched_get_priority_min       (__NR_SYSCALL_BASE + 160)
-#define __NR_sched_rr_get_interval        (__NR_SYSCALL_BASE + 161)
-#define __NR_sched_setaffinity            (__NR_SYSCALL_BASE + 241)
-#define __NR_sched_getaffinity            (__NR_SYSCALL_BASE + 242)
-#define __NR_uname                        (__NR_SYSCALL_BASE + 122)
-#define __NR_wait4                        (__NR_SYSCALL_BASE + 114)
-#define __NR_umask                        (__NR_SYSCALL_BASE + 60)
-#define __NR_reboot                       (__NR_SYSCALL_BASE + 88)
-#define __NR_syslog                       (__NR_SYSCALL_BASE + 103)
-#define __NR_init_module                  (__NR_SYSCALL_BASE + 128)
-#define __NR_delete_module                (__NR_SYSCALL_BASE + 129)
-#define __NR_syslog                       (__NR_SYSCALL_BASE + 103)
-#define __NR_sysinfo                      (__NR_SYSCALL_BASE + 116)
-#define __NR_personality                  (__NR_SYSCALL_BASE + 136)
-#define __NR_futex                        (__NR_SYSCALL_BASE + 240)
-#define __NR_poll                         (__NR_SYSCALL_BASE + 168)
-
-#ifdef __arm__
-#define __NR_exit_group                   (__NR_SYSCALL_BASE + 248)
-#define __NR_waitid                       (__NR_SYSCALL_BASE + 280)
-#define __NR_vfork                        (__NR_SYSCALL_BASE + 190)
-#define __NR_openat                       (__NR_SYSCALL_BASE + 322)
-#define __NR_madvise                      (__NR_SYSCALL_BASE + 220)
-#define __NR_mincore                      (__NR_SYSCALL_BASE + 219)
-#define __NR_pipe2                        (__NR_SYSCALL_BASE + 359)
-#define __NR_getdents64                   (__NR_SYSCALL_BASE + 217)
-#define __NR_fstatfs64                    (__NR_SYSCALL_BASE + 267)
-#define __NR_fstatat64                    (__NR_SYSCALL_BASE + 327)
-#define __NR_mkdirat                      (__NR_SYSCALL_BASE + 323)
-#define __NR_fchownat                     (__NR_SYSCALL_BASE + 325)
-#define __NR_fchmodat                     (__NR_SYSCALL_BASE + 333)
-#define __NR_renameat                     (__NR_SYSCALL_BASE + 329)
-#define __NR_unlinkat                     (__NR_SYSCALL_BASE + 328)
 #define __NR_statfs64                     (__NR_SYSCALL_BASE + 266)
+#define __NR_nanosleep                    (__NR_SYSCALL_BASE + 162)
 #define __NR_clock_gettime                (__NR_SYSCALL_BASE + 263)
 #define __NR_clock_settime                (__NR_SYSCALL_BASE + 262)
 #define __NR_clock_getres                 (__NR_SYSCALL_BASE + 264)
@@ -177,6 +165,9 @@
 #define __NR_timer_delete                 (__NR_SYSCALL_BASE + 261)
 #define __NR_utimes                       (__NR_SYSCALL_BASE + 269)
 #define __NR_utimensat                    (__NR_SYSCALL_BASE + 348)
+#define __NR_rt_sigaction                 (__NR_SYSCALL_BASE + 174)
+#define __NR_rt_sigprocmask               (__NR_SYSCALL_BASE + 175)
+#define __NR_rt_sigtimedwait              (__NR_SYSCALL_BASE + 177)
 #define __NR_socket                       (__NR_SYSCALL_BASE + 281)
 #define __NR_socketpair                   (__NR_SYSCALL_BASE + 288)
 #define __NR_bind                         (__NR_SYSCALL_BASE + 282)
@@ -192,62 +183,267 @@
 #define __NR_getsockopt                   (__NR_SYSCALL_BASE + 295)
 #define __NR_sendmsg                      (__NR_SYSCALL_BASE + 296)
 #define __NR_recvmsg                      (__NR_SYSCALL_BASE + 297)
+#define __NR_sched_setscheduler           (__NR_SYSCALL_BASE + 156)
+#define __NR_sched_getscheduler           (__NR_SYSCALL_BASE + 157)
+#define __NR_sched_yield                  (__NR_SYSCALL_BASE + 158)
+#define __NR_sched_setparam               (__NR_SYSCALL_BASE + 154)
+#define __NR_sched_getparam               (__NR_SYSCALL_BASE + 155)
+#define __NR_sched_get_priority_max       (__NR_SYSCALL_BASE + 159)
+#define __NR_sched_get_priority_min       (__NR_SYSCALL_BASE + 160)
+#define __NR_sched_rr_get_interval        (__NR_SYSCALL_BASE + 161)
+#define __NR_sched_setaffinity            (__NR_SYSCALL_BASE + 241)
+#define __NR_sched_getaffinity            (__NR_SYSCALL_BASE + 242)
 #define __NR_getcpu                       (__NR_SYSCALL_BASE + 345)
 #define __NR_ioprio_set                   (__NR_SYSCALL_BASE + 314)
 #define __NR_ioprio_get                   (__NR_SYSCALL_BASE + 315)
+#define __NR_futex                        (__NR_SYSCALL_BASE + 240)
 #define __NR_epoll_create                 (__NR_SYSCALL_BASE + 250)
 #define __NR_epoll_ctl                    (__NR_SYSCALL_BASE + 251)
 #define __NR_epoll_wait                   (__NR_SYSCALL_BASE + 252)
 #define __NR_inotify_init                 (__NR_SYSCALL_BASE + 316)
 #define __NR_inotify_add_watch            (__NR_SYSCALL_BASE + 317)
 #define __NR_inotify_rm_watch             (__NR_SYSCALL_BASE + 318)
+#define __NR_poll                         (__NR_SYSCALL_BASE + 168)
 #define __NR_eventfd2                     (__NR_SYSCALL_BASE + 356)
 #define __NR_ARM_set_tls                  (__NR_SYSCALL_BASE + 983045)
 #define __NR_ARM_cacheflush               (__NR_SYSCALL_BASE + 983042)
 #endif
 
 #ifdef __i386__
-#define __NR_exit_group                   (__NR_SYSCALL_BASE + 252)
 #define __NR_waitpid                      (__NR_SYSCALL_BASE + 7)
-#define __NR_waitid                       (__NR_SYSCALL_BASE + 284)
 #define __NR_kill                         (__NR_SYSCALL_BASE + 37)
-#define __NR_tkill                        (__NR_SYSCALL_BASE + 238)
-#define __NR_set_thread_area              (__NR_SYSCALL_BASE + 243)
-#define __NR_openat                       (__NR_SYSCALL_BASE + 295)
-#define __NR_madvise                      (__NR_SYSCALL_BASE + 219)
+#define __NR_pipe                         (__NR_SYSCALL_BASE + 42)
+#define __NR_socketcall                   (__NR_SYSCALL_BASE + 102)
+#define __NR_fdatasync                    (__NR_SYSCALL_BASE + 148)
+#define __NR_mlock                        (__NR_SYSCALL_BASE + 150)
+#define __NR_munlock                      (__NR_SYSCALL_BASE + 151)
+#define __NR_sched_setparam               (__NR_SYSCALL_BASE + 154)
+#define __NR_sched_getparam               (__NR_SYSCALL_BASE + 155)
+#define __NR_sched_setscheduler           (__NR_SYSCALL_BASE + 156)
+#define __NR_sched_getscheduler           (__NR_SYSCALL_BASE + 157)
+#define __NR_sched_yield                  (__NR_SYSCALL_BASE + 158)
+#define __NR_sched_get_priority_max       (__NR_SYSCALL_BASE + 159)
+#define __NR_sched_get_priority_min       (__NR_SYSCALL_BASE + 160)
+#define __NR_sched_rr_get_interval        (__NR_SYSCALL_BASE + 161)
+#define __NR_nanosleep                    (__NR_SYSCALL_BASE + 162)
+#define __NR_mremap                       (__NR_SYSCALL_BASE + 163)
+#define __NR_poll                         (__NR_SYSCALL_BASE + 168)
+#define __NR_prctl                        (__NR_SYSCALL_BASE + 172)
+#define __NR_rt_sigaction                 (__NR_SYSCALL_BASE + 174)
+#define __NR_rt_sigprocmask               (__NR_SYSCALL_BASE + 175)
+#define __NR_rt_sigtimedwait              (__NR_SYSCALL_BASE + 177)
+#define __NR_pread64                      (__NR_SYSCALL_BASE + 180)
+#define __NR_pwrite64                     (__NR_SYSCALL_BASE + 181)
+#define __NR_getcwd                       (__NR_SYSCALL_BASE + 183)
+#define __NR_capget                       (__NR_SYSCALL_BASE + 184)
+#define __NR_capset                       (__NR_SYSCALL_BASE + 185)
+#define __NR_sigaltstack                  (__NR_SYSCALL_BASE + 186)
+#define __NR_sendfile                     (__NR_SYSCALL_BASE + 187)
+#define __NR_ugetrlimit                   (__NR_SYSCALL_BASE + 191)
+#define __NR_mmap2                        (__NR_SYSCALL_BASE + 192)
+#define __NR_ftruncate64                  (__NR_SYSCALL_BASE + 194)
+#define __NR_stat64                       (__NR_SYSCALL_BASE + 195)
+#define __NR_lstat64                      (__NR_SYSCALL_BASE + 196)
+#define __NR_fstat64                      (__NR_SYSCALL_BASE + 197)
+#define __NR_lchown32                     (__NR_SYSCALL_BASE + 198)
+#define __NR_getuid32                     (__NR_SYSCALL_BASE + 199)
+#define __NR_getgid32                     (__NR_SYSCALL_BASE + 200)
+#define __NR_geteuid32                    (__NR_SYSCALL_BASE + 201)
+#define __NR_getegid32                    (__NR_SYSCALL_BASE + 202)
+#define __NR_setreuid32                   (__NR_SYSCALL_BASE + 203)
+#define __NR_setregid32                   (__NR_SYSCALL_BASE + 204)
+#define __NR_getgroups32                  (__NR_SYSCALL_BASE + 205)
+#define __NR_setgroups32                  (__NR_SYSCALL_BASE + 206)
+#define __NR_fchown32                     (__NR_SYSCALL_BASE + 207)
+#define __NR_setresuid32                  (__NR_SYSCALL_BASE + 208)
+#define __NR_getresuid32                  (__NR_SYSCALL_BASE + 209)
+#define __NR_setresgid32                  (__NR_SYSCALL_BASE + 210)
+#define __NR_getresgid32                  (__NR_SYSCALL_BASE + 211)
+#define __NR_chown32                      (__NR_SYSCALL_BASE + 212)
+#define __NR_setuid32                     (__NR_SYSCALL_BASE + 213)
+#define __NR_setgid32                     (__NR_SYSCALL_BASE + 214)
 #define __NR_mincore                      (__NR_SYSCALL_BASE + 218)
-#define __NR_pipe2                        (__NR_SYSCALL_BASE + 331)
+#define __NR_madvise                      (__NR_SYSCALL_BASE + 219)
 #define __NR_getdents64                   (__NR_SYSCALL_BASE + 220)
-#define __NR_fstatfs64                    (__NR_SYSCALL_BASE + 269)
-#define __NR_fstatat64                    (__NR_SYSCALL_BASE + 300)
-#define __NR_mkdirat                      (__NR_SYSCALL_BASE + 296)
-#define __NR_fchownat                     (__NR_SYSCALL_BASE + 298)
-#define __NR_fchmodat                     (__NR_SYSCALL_BASE + 306)
-#define __NR_renameat                     (__NR_SYSCALL_BASE + 302)
-#define __NR_unlinkat                     (__NR_SYSCALL_BASE + 301)
-#define __NR_statfs64                     (__NR_SYSCALL_BASE + 268)
-#define __NR_clock_gettime                (__NR_SYSCALL_BASE + 265)
-#define __NR_clock_settime                (__NR_SYSCALL_BASE + 264)
-#define __NR_clock_getres                 (__NR_SYSCALL_BASE + 266)
-#define __NR_clock_nanosleep              (__NR_SYSCALL_BASE + 267)
+#define __NR_fcntl64                      (__NR_SYSCALL_BASE + 221)
+#define __NR_gettid                       (__NR_SYSCALL_BASE + 224)
+#define __NR_readahead                    (__NR_SYSCALL_BASE + 225)
+#define __NR_setxattr                     (__NR_SYSCALL_BASE + 226)
+#define __NR_lsetxattr                    (__NR_SYSCALL_BASE + 227)
+#define __NR_fsetxattr                    (__NR_SYSCALL_BASE + 228)
+#define __NR_getxattr                     (__NR_SYSCALL_BASE + 229)
+#define __NR_lgetxattr                    (__NR_SYSCALL_BASE + 230)
+#define __NR_fgetxattr                    (__NR_SYSCALL_BASE + 231)
+#define __NR_listxattr                    (__NR_SYSCALL_BASE + 232)
+#define __NR_llistxattr                   (__NR_SYSCALL_BASE + 233)
+#define __NR_flistxattr                   (__NR_SYSCALL_BASE + 234)
+#define __NR_removexattr                  (__NR_SYSCALL_BASE + 235)
+#define __NR_lremovexattr                 (__NR_SYSCALL_BASE + 236)
+#define __NR_fremovexattr                 (__NR_SYSCALL_BASE + 237)
+#define __NR_tkill                        (__NR_SYSCALL_BASE + 238)
+#define __NR_futex                        (__NR_SYSCALL_BASE + 240)
+#define __NR_sched_setaffinity            (__NR_SYSCALL_BASE + 241)
+#define __NR_sched_getaffinity            (__NR_SYSCALL_BASE + 242)
+#define __NR_set_thread_area              (__NR_SYSCALL_BASE + 243)
+#define __NR_exit_group                   (__NR_SYSCALL_BASE + 252)
+#define __NR_epoll_create                 (__NR_SYSCALL_BASE + 254)
+#define __NR_epoll_ctl                    (__NR_SYSCALL_BASE + 255)
+#define __NR_epoll_wait                   (__NR_SYSCALL_BASE + 256)
 #define __NR_timer_create                 (__NR_SYSCALL_BASE + 259)
 #define __NR_timer_settime                (__NR_SYSCALL_BASE + 260)
 #define __NR_timer_gettime                (__NR_SYSCALL_BASE + 261)
 #define __NR_timer_getoverrun             (__NR_SYSCALL_BASE + 262)
 #define __NR_timer_delete                 (__NR_SYSCALL_BASE + 263)
+#define __NR_clock_settime                (__NR_SYSCALL_BASE + 264)
+#define __NR_clock_gettime                (__NR_SYSCALL_BASE + 265)
+#define __NR_clock_getres                 (__NR_SYSCALL_BASE + 266)
+#define __NR_clock_nanosleep              (__NR_SYSCALL_BASE + 267)
+#define __NR_statfs64                     (__NR_SYSCALL_BASE + 268)
+#define __NR_fstatfs64                    (__NR_SYSCALL_BASE + 269)
 #define __NR_utimes                       (__NR_SYSCALL_BASE + 271)
-#define __NR_utimensat                    (__NR_SYSCALL_BASE + 320)
-#define __NR_socketcall                   (__NR_SYSCALL_BASE + 102)
-#define __NR_getcpu                       (__NR_SYSCALL_BASE + 318)
+#define __NR_waitid                       (__NR_SYSCALL_BASE + 284)
 #define __NR_ioprio_set                   (__NR_SYSCALL_BASE + 289)
 #define __NR_ioprio_get                   (__NR_SYSCALL_BASE + 290)
-#define __NR_epoll_create                 (__NR_SYSCALL_BASE + 254)
-#define __NR_epoll_ctl                    (__NR_SYSCALL_BASE + 255)
-#define __NR_epoll_wait                   (__NR_SYSCALL_BASE + 256)
 #define __NR_inotify_init                 (__NR_SYSCALL_BASE + 291)
 #define __NR_inotify_add_watch            (__NR_SYSCALL_BASE + 292)
 #define __NR_inotify_rm_watch             (__NR_SYSCALL_BASE + 293)
+#define __NR_openat                       (__NR_SYSCALL_BASE + 295)
+#define __NR_mkdirat                      (__NR_SYSCALL_BASE + 296)
+#define __NR_fchownat                     (__NR_SYSCALL_BASE + 298)
+#define __NR_fstatat64                    (__NR_SYSCALL_BASE + 300)
+#define __NR_unlinkat                     (__NR_SYSCALL_BASE + 301)
+#define __NR_renameat                     (__NR_SYSCALL_BASE + 302)
+#define __NR_fchmodat                     (__NR_SYSCALL_BASE + 306)
+#define __NR_getcpu                       (__NR_SYSCALL_BASE + 318)
+#define __NR_utimensat                    (__NR_SYSCALL_BASE + 320)
 #define __NR_eventfd2                     (__NR_SYSCALL_BASE + 328)
+#define __NR_pipe2                        (__NR_SYSCALL_BASE + 331)
+#endif
+
+#ifdef __mips__
+#define __NR_syscall                      (__NR_SYSCALL_BASE + 0)
+#define __NR_waitpid                      (__NR_SYSCALL_BASE + 7)
+#define __NR_lchown                       (__NR_SYSCALL_BASE + 16)
+#define __NR_setuid                       (__NR_SYSCALL_BASE + 23)
+#define __NR_getuid                       (__NR_SYSCALL_BASE + 24)
+#define __NR_kill                         (__NR_SYSCALL_BASE + 37)
+#define __NR_setgid                       (__NR_SYSCALL_BASE + 46)
+#define __NR_getgid                       (__NR_SYSCALL_BASE + 47)
+#define __NR_geteuid                      (__NR_SYSCALL_BASE + 49)
+#define __NR_getegid                      (__NR_SYSCALL_BASE + 50)
+#define __NR_setreuid                     (__NR_SYSCALL_BASE + 70)
+#define __NR_setregid                     (__NR_SYSCALL_BASE + 71)
+#define __NR_getrlimit                    (__NR_SYSCALL_BASE + 76)
+#define __NR_getgroups                    (__NR_SYSCALL_BASE + 80)
+#define __NR_setgroups                    (__NR_SYSCALL_BASE + 81)
+#define __NR_fchown                       (__NR_SYSCALL_BASE + 95)
+#define __NR_cacheflush                   (__NR_SYSCALL_BASE + 147)
+#define __NR_fdatasync                    (__NR_SYSCALL_BASE + 152)
+#define __NR_mlock                        (__NR_SYSCALL_BASE + 154)
+#define __NR_munlock                      (__NR_SYSCALL_BASE + 155)
+#define __NR_sched_setparam               (__NR_SYSCALL_BASE + 158)
+#define __NR_sched_getparam               (__NR_SYSCALL_BASE + 159)
+#define __NR_sched_setscheduler           (__NR_SYSCALL_BASE + 160)
+#define __NR_sched_getscheduler           (__NR_SYSCALL_BASE + 161)
+#define __NR_sched_yield                  (__NR_SYSCALL_BASE + 162)
+#define __NR_sched_get_priority_max       (__NR_SYSCALL_BASE + 163)
+#define __NR_sched_get_priority_min       (__NR_SYSCALL_BASE + 164)
+#define __NR_sched_rr_get_interval        (__NR_SYSCALL_BASE + 165)
+#define __NR_nanosleep                    (__NR_SYSCALL_BASE + 166)
+#define __NR_mremap                       (__NR_SYSCALL_BASE + 167)
+#define __NR_accept                       (__NR_SYSCALL_BASE + 168)
+#define __NR_bind                         (__NR_SYSCALL_BASE + 169)
+#define __NR_connect                      (__NR_SYSCALL_BASE + 170)
+#define __NR_getpeername                  (__NR_SYSCALL_BASE + 171)
+#define __NR_getsockname                  (__NR_SYSCALL_BASE + 172)
+#define __NR_getsockopt                   (__NR_SYSCALL_BASE + 173)
+#define __NR_listen                       (__NR_SYSCALL_BASE + 174)
+#define __NR_recvfrom                     (__NR_SYSCALL_BASE + 176)
+#define __NR_recvmsg                      (__NR_SYSCALL_BASE + 177)
+#define __NR_sendmsg                      (__NR_SYSCALL_BASE + 179)
+#define __NR_sendto                       (__NR_SYSCALL_BASE + 180)
+#define __NR_setsockopt                   (__NR_SYSCALL_BASE + 181)
+#define __NR_shutdown                     (__NR_SYSCALL_BASE + 182)
+#define __NR_socket                       (__NR_SYSCALL_BASE + 183)
+#define __NR_socketpair                   (__NR_SYSCALL_BASE + 184)
+#define __NR_setresuid                    (__NR_SYSCALL_BASE + 185)
+#define __NR_getresuid                    (__NR_SYSCALL_BASE + 186)
+#define __NR_poll                         (__NR_SYSCALL_BASE + 188)
+#define __NR_setresgid                    (__NR_SYSCALL_BASE + 190)
+#define __NR_getresgid                    (__NR_SYSCALL_BASE + 191)
+#define __NR_prctl                        (__NR_SYSCALL_BASE + 192)
+#define __NR_rt_sigaction                 (__NR_SYSCALL_BASE + 194)
+#define __NR_rt_sigprocmask               (__NR_SYSCALL_BASE + 195)
+#define __NR_rt_sigtimedwait              (__NR_SYSCALL_BASE + 197)
+#define __NR_pread64                      (__NR_SYSCALL_BASE + 200)
+#define __NR_pwrite64                     (__NR_SYSCALL_BASE + 201)
+#define __NR_chown                        (__NR_SYSCALL_BASE + 202)
+#define __NR_getcwd                       (__NR_SYSCALL_BASE + 203)
+#define __NR_capget                       (__NR_SYSCALL_BASE + 204)
+#define __NR_capset                       (__NR_SYSCALL_BASE + 205)
+#define __NR_sigaltstack                  (__NR_SYSCALL_BASE + 206)
+#define __NR_sendfile                     (__NR_SYSCALL_BASE + 207)
+#define __NR_mmap2                        (__NR_SYSCALL_BASE + 210)
+#define __NR_ftruncate64                  (__NR_SYSCALL_BASE + 212)
+#define __NR_stat64                       (__NR_SYSCALL_BASE + 213)
+#define __NR_lstat64                      (__NR_SYSCALL_BASE + 214)
+#define __NR_fstat64                      (__NR_SYSCALL_BASE + 215)
+#define __NR_mincore                      (__NR_SYSCALL_BASE + 217)
+#define __NR_madvise                      (__NR_SYSCALL_BASE + 218)
+#define __NR_getdents64                   (__NR_SYSCALL_BASE + 219)
+#define __NR_fcntl64                      (__NR_SYSCALL_BASE + 220)
+#define __NR_gettid                       (__NR_SYSCALL_BASE + 222)
+#define __NR_readahead                    (__NR_SYSCALL_BASE + 223)
+#define __NR_setxattr                     (__NR_SYSCALL_BASE + 224)
+#define __NR_lsetxattr                    (__NR_SYSCALL_BASE + 225)
+#define __NR_fsetxattr                    (__NR_SYSCALL_BASE + 226)
+#define __NR_getxattr                     (__NR_SYSCALL_BASE + 227)
+#define __NR_lgetxattr                    (__NR_SYSCALL_BASE + 228)
+#define __NR_fgetxattr                    (__NR_SYSCALL_BASE + 229)
+#define __NR_listxattr                    (__NR_SYSCALL_BASE + 230)
+#define __NR_llistxattr                   (__NR_SYSCALL_BASE + 231)
+#define __NR_flistxattr                   (__NR_SYSCALL_BASE + 232)
+#define __NR_removexattr                  (__NR_SYSCALL_BASE + 233)
+#define __NR_lremovexattr                 (__NR_SYSCALL_BASE + 234)
+#define __NR_fremovexattr                 (__NR_SYSCALL_BASE + 235)
+#define __NR_tkill                        (__NR_SYSCALL_BASE + 236)
+#define __NR_futex                        (__NR_SYSCALL_BASE + 238)
+#define __NR_sched_setaffinity            (__NR_SYSCALL_BASE + 239)
+#define __NR_sched_getaffinity            (__NR_SYSCALL_BASE + 240)
+#define __NR_exit_group                   (__NR_SYSCALL_BASE + 246)
+#define __NR_epoll_create                 (__NR_SYSCALL_BASE + 248)
+#define __NR_epoll_ctl                    (__NR_SYSCALL_BASE + 249)
+#define __NR_epoll_wait                   (__NR_SYSCALL_BASE + 250)
+#define __NR_statfs64                     (__NR_SYSCALL_BASE + 255)
+#define __NR_fstatfs64                    (__NR_SYSCALL_BASE + 256)
+#define __NR_timer_create                 (__NR_SYSCALL_BASE + 257)
+#define __NR_timer_settime                (__NR_SYSCALL_BASE + 258)
+#define __NR_timer_gettime                (__NR_SYSCALL_BASE + 259)
+#define __NR_timer_getoverrun             (__NR_SYSCALL_BASE + 260)
+#define __NR_timer_delete                 (__NR_SYSCALL_BASE + 261)
+#define __NR_clock_settime                (__NR_SYSCALL_BASE + 262)
+#define __NR_clock_gettime                (__NR_SYSCALL_BASE + 263)
+#define __NR_clock_getres                 (__NR_SYSCALL_BASE + 264)
+#define __NR_clock_nanosleep              (__NR_SYSCALL_BASE + 265)
+#define __NR_utimes                       (__NR_SYSCALL_BASE + 267)
+#define __NR_waitid                       (__NR_SYSCALL_BASE + 278)
+#define __NR_set_thread_area              (__NR_SYSCALL_BASE + 283)
+#define __NR_inotify_init                 (__NR_SYSCALL_BASE + 284)
+#define __NR_inotify_add_watch            (__NR_SYSCALL_BASE + 285)
+#define __NR_inotify_rm_watch             (__NR_SYSCALL_BASE + 286)
+#define __NR_openat                       (__NR_SYSCALL_BASE + 288)
+#define __NR_mkdirat                      (__NR_SYSCALL_BASE + 289)
+#define __NR_fchownat                     (__NR_SYSCALL_BASE + 291)
+#define __NR_fstatat64                    (__NR_SYSCALL_BASE + 293)
+#define __NR_unlinkat                     (__NR_SYSCALL_BASE + 294)
+#define __NR_renameat                     (__NR_SYSCALL_BASE + 295)
+#define __NR_fchmodat                     (__NR_SYSCALL_BASE + 299)
+#define __NR_getcpu                       (__NR_SYSCALL_BASE + 312)
+#define __NR_ioprio_set                   (__NR_SYSCALL_BASE + 314)
+#define __NR_ioprio_get                   (__NR_SYSCALL_BASE + 315)
+#define __NR_utimensat                    (__NR_SYSCALL_BASE + 316)
+#define __NR_eventfd2                     (__NR_SYSCALL_BASE + 325)
+#define __NR_pipe2                        (__NR_SYSCALL_BASE + 328)
 #endif
 
 #endif
diff --git a/libc/include/sys/linux-unistd.h b/libc/include/sys/linux-unistd.h
index 12a7ac4..d445f9c 100644
--- a/libc/include/sys/linux-unistd.h
+++ b/libc/include/sys/linux-unistd.h
@@ -13,22 +13,34 @@
 pid_t            __sys_clone (int, void*, int*, void*, int*);
 int              execve (const char*, char* const*, char* const*);
 int              __setuid (uid_t);
+int              __setuid (uid_t);
+uid_t            getuid (void);
 uid_t            getuid (void);
 gid_t            getgid (void);
+gid_t            getgid (void);
+uid_t            geteuid (void);
 uid_t            geteuid (void);
 gid_t            getegid (void);
+gid_t            getegid (void);
 uid_t            getresuid (uid_t *ruid, uid_t *euid, uid_t *suid);
+uid_t            getresuid (uid_t *ruid, uid_t *euid, uid_t *suid);
+gid_t            getresgid (gid_t *rgid, gid_t *egid, gid_t *sgid);
 gid_t            getresgid (gid_t *rgid, gid_t *egid, gid_t *sgid);
 pid_t            gettid (void);
 ssize_t          readahead (int, off64_t, size_t);
 int              getgroups (int, gid_t *);
+int              getgroups (int, gid_t *);
 pid_t            getpgid (pid_t);
 pid_t            getppid (void);
 pid_t            setsid (void);
 int              setgid (gid_t);
+int              setgid (gid_t);
 int              seteuid (uid_t);
 int              __setreuid (uid_t, uid_t);
+int              __setreuid (uid_t, uid_t);
 int              __setresuid (uid_t, uid_t, uid_t);
+int              __setresuid (uid_t, uid_t, uid_t);
+int              setresgid (gid_t, gid_t, gid_t);
 int              setresgid (gid_t, gid_t, gid_t);
 void*            __brk (void*);
 int              kill (pid_t, int);
@@ -39,12 +51,15 @@
 int              setpriority (int, int, int);
 int              setrlimit (int resource, const struct rlimit *rlp);
 int              getrlimit (int resource, struct rlimit *rlp);
+int              getrlimit (int resource, struct rlimit *rlp);
 int              getrusage (int who, struct rusage*  r_usage);
 int              setgroups (int, const gid_t *);
+int              setgroups (int, const gid_t *);
 pid_t            getpgrp (void);
 int              setpgid (pid_t, pid_t);
 pid_t            vfork (void);
 int              setregid (gid_t, gid_t);
+int              setregid (gid_t, gid_t);
 int              chroot (const char *);
 int              prctl (int option, unsigned int arg2, unsigned int arg3, unsigned int arg4, unsigned int arg5);
 int              capget (cap_user_header_t header, cap_user_data_t data);
@@ -89,6 +104,7 @@
 int              fsync (int);
 int              fdatasync (int);
 int              fchown (int, uid_t, gid_t);
+int              fchown (int, uid_t, gid_t);
 void             sync (void);
 int              __fcntl64 (int, int, void *);
 int              __fstatfs64 (int, size_t, struct statfs *);
@@ -109,6 +125,8 @@
 int              mknod (const char*, mode_t, dev_t);
 int              chmod (const char*,mode_t);
 int              chown (const char *, uid_t, gid_t);
+int              chown (const char *, uid_t, gid_t);
+int              lchown (const char*, uid_t, gid_t);
 int              lchown (const char*, uid_t, gid_t);
 int              mount (const char*, const char*, const char*, unsigned long, const void*);
 int              umount (const char*);
@@ -223,6 +241,8 @@
 int              eventfd (unsigned int, int);
 int              __set_tls (void*);
 int              cacheflush (long start, long end, long flags);
+int              _flush_cache (char *addr, const int nbytes, const int op);
+int              syscall (int number,...);
 #ifdef __cplusplus
 }
 #endif
diff --git a/libc/tools/bionic_utils.py b/libc/tools/bionic_utils.py
index 0bc947b..abb7820 100644
--- a/libc/tools/bionic_utils.py
+++ b/libc/tools/bionic_utils.py
@@ -4,13 +4,13 @@
 
 # support Bionic architectures, add new ones as appropriate
 #
-bionic_archs = [ "arm", "x86" ]
+bionic_archs = [ "arm", "x86", "mips" ]
 
 # basic debugging trace support
 # call D_setlevel to set the verbosity level
 # and D(), D2(), D3(), D4() to add traces
 #
-verbose = 1
+verbose = 0
 
 def D(msg):
     global verbose
@@ -178,7 +178,7 @@
         self.syscalls = []
         self.lineno   = 0
 
-    def E(msg):
+    def E(self, msg):
         print "%d: %s" % (self.lineno, msg)
 
     def parse_line(self, line):
@@ -238,36 +238,55 @@
 
         number = line[pos_rparen+1:].strip()
         if number == "stub":
-            syscall_id  = -1
-            syscall_id2 = -1
+            syscall_common = -1
+            syscall_arm  = -1
+            syscall_x86 = -1
+            syscall_mips = -1
         else:
             try:
                 if number[0] == '#':
                     number = number[1:].strip()
                 numbers = string.split(number,',')
-                syscall_id  = int(numbers[0])
-                syscall_id2 = syscall_id
-                if len(numbers) > 1:
-                    syscall_id2 = int(numbers[1])
+                if len(numbers) == 1:
+                    syscall_common = int(numbers[0])
+                    syscall_arm = -1
+                    syscall_x86 = -1
+                    syscall_mips = -1
+                else:
+                    if len(numbers) == 3:
+                        syscall_common = -1
+                        syscall_arm  = int(numbers[0])
+                        syscall_x86 = int(numbers[1])
+                        syscall_mips = int(numbers[2])
+                    else:
+                        E("invalid syscall number format in '%s'" % line)
+                        return
             except:
                 E("invalid syscall number in '%s'" % line)
                 return
 
-		global verbose
+        global verbose
         if verbose >= 2:
-            if call_id < 0:
-                print "%s: %d,%d" % (syscall_name, syscall_id, syscall_id2)
+            if call_id == -1:
+                if syscall_common == -1:
+                    print "%s: %d,%d,%d" % (syscall_name, syscall_arm, syscall_x86, syscall_mips)
+                else:
+                    print "%s: %d" % (syscall_name, syscall_common)
             else:
-                print "%s(%d): %d,%d" % (syscall_name, call_id, syscall_id, syscall_id2)
+                if syscall_common == -1:
+                    print "%s(%d): %d,%d,%d" % (syscall_name, call_id, syscall_arm, syscall_x86, syscall_mips)
+                else:
+                    print "%s(%d): %d" % (syscall_name, call_id, syscall_common)
 
-        t = { "id"     : syscall_id,
-              "id2"    : syscall_id2,
+        t = { "armid"  : syscall_arm,
+              "x86id"  : syscall_x86,
+              "mipsid" : syscall_mips,
+              "common" : syscall_common,
               "cid"    : call_id,
               "name"   : syscall_name,
               "func"   : syscall_func,
               "params" : syscall_params,
               "decl"   : "%-15s  %s (%s);" % (return_type, syscall_func, params) }
-
         self.syscalls.append(t)
 
     def parse_file(self, file_path):
diff --git a/libc/tools/checksyscalls.py b/libc/tools/checksyscalls.py
index 2c563d7..286e727 100755
--- a/libc/tools/checksyscalls.py
+++ b/libc/tools/checksyscalls.py
@@ -70,10 +70,17 @@
 re_nr_clock_line = re.compile( r"#define __NR_(\w*)\s*\(__NR_timer_create\+(\w*)\)" )
 re_arm_nr_line   = re.compile( r"#define __ARM_NR_(\w*)\s*\(__ARM_NR_BASE\+\s*(\w*)\)" )
 re_x86_line      = re.compile( r"#define __NR_(\w*)\s*([0-9]*)" )
+re_mips_line     = re.compile( r"#define __NR_(\w*)\s*\(__NR_Linux\s*\+\s*([0-9]*)\)" )
 
 # now read the Linux arm header
 def process_nr_line(line,dict):
 
+    m = re_mips_line.match(line)
+    if m:
+        if dict["Linux"]==4000:
+            dict[m.group(1)] = int(m.group(2))
+        return
+
     m = re_nr_line.match(line)
     if m:
         dict[m.group(1)] = int(m.group(2))
@@ -118,6 +125,7 @@
 
 arm_dict = {}
 x86_dict = {}
+mips_dict = {}
 
 # remove trailing slash from the linux_root, if any
 if linux_root[-1] == '/':
@@ -141,8 +149,15 @@
         print "maybe using a different set of kernel headers might help."
         sys.exit(1)
 
+mips_unistd = find_arch_header(linux_root, "mips", "unistd.h")
+if not mips_unistd:
+    print "WEIRD: Could not locate the Mips unistd.h kernel header file,"
+    print "maybe using a different set of kernel headers might help."
+    sys.exit(1)
+
 process_header( arm_unistd, arm_dict )
 process_header( x86_unistd, x86_dict )
+process_header( mips_unistd, mips_dict )
 
 # now perform the comparison
 errors = 0
@@ -154,18 +169,19 @@
         sc_id   = sc[idname]
         if sc_id >= 0:
             if not arch_dict.has_key(sc_name):
-                print "%s syscall %s not defined, should be %d !!" % (archname, sc_name, sc_id)
+                print "error: %s syscall %s not defined, should be %d" % (archname, sc_name, sc_id)
                 errors += 1
             elif not arch_dict.has_key(sc_name):
-                print "%s syscall %s is not implemented!" % (archname, sc_name)
+                print "error: %s syscall %s is not implemented" % (archname, sc_name)
                 errors += 1
             elif arch_dict[sc_name] != sc_id:
-                print "%s syscall %s should be %d instead of %d !!" % (archname, sc_name, arch_dict[sc_name], sc_id)
+                print "error: %s syscall %s should be %d instead of %d" % (archname, sc_name, arch_dict[sc_name], sc_id)
                 errors += 1
     return errors
 
-errors += check_syscalls("arm", "id", arm_dict)
-errors += check_syscalls("x86", "id2", x86_dict)
+errors += check_syscalls("arm", "armid", arm_dict)
+errors += check_syscalls("x86", "x86id", x86_dict)
+errors += check_syscalls("mips", "mipsid", mips_dict)
 
 if errors == 0:
     print "congratulations, everything's fine !!"
diff --git a/libc/tools/gensyscalls.py b/libc/tools/gensyscalls.py
index bed9445..c113be1 100755
--- a/libc/tools/gensyscalls.py
+++ b/libc/tools/gensyscalls.py
@@ -1,11 +1,12 @@
 #!/usr/bin/python
 #
-# this tool is used to generate the syscall assmbler templates
-# to be placed into arch-x86/syscalls, as well as the content
-# of arch-x86/linux/_syscalls.h
+# this tool is used to generate the syscall assembler templates
+# to be placed into arch-{arm,x86,mips}/syscalls, as well as the content
+# of arch-{arm,x86,mips}/linux/_syscalls.h
 #
 
 import sys, os.path, glob, re, commands, filecmp, shutil
+import getpass
 
 from bionic_utils import *
 
@@ -31,9 +32,10 @@
 bionic_temp = "/tmp/bionic_gensyscalls/"
 
 # all architectures, update as you see fit
-all_archs = [ "arm", "x86" ]
+all_archs = [ "arm", "x86", "mips" ]
 
 def make_dir( path ):
+    path = os.path.abspath(path)
     if not os.path.exists(path):
         parent = os.path.dirname(path)
         if parent:
@@ -183,6 +185,32 @@
     .fnend
 """
 
+# mips assembler templates for each syscall stub
+#
+mips_call = """/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+    .text
+    .globl %(fname)s
+    .align 4
+    .ent %(fname)s
+
+%(fname)s:
+    .set noreorder
+    .cpload $t9
+    li $v0, %(idname)s
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end %(fname)s
+"""
+
 def param_uses_64bits(param):
     """Returns True iff a syscall parameter description corresponds
        to a 64-bit type."""
@@ -331,23 +359,10 @@
                 return thumb_call_long % t
         return thumb_call_default % t
 
-
-    def superh_genstub(self, fname, flags, idname):
-        numargs = int(flags)
+    def mips_genstub(self,fname, idname):
         t = { "fname"  : fname,
-              "idname" : idname,
-              "numargs" : numargs }
-        superh_call = superh_header
-        if flags:
-            if numargs == 5:
-                superh_call += superh_5args_header
-            if numargs == 6:
-                superh_call += superh_6args_header
-            if numargs == 7:
-                superh_call += superh_7args_header
-        superh_call += superh_call_default
-        return superh_call % t
-
+              "idname" : idname }
+        return mips_call % t
 
     def process_file(self,input):
         parser = SysCallsTxtParser()
@@ -360,7 +375,7 @@
             syscall_params = t["params"]
             syscall_name   = t["name"]
 
-            if t["id"] >= 0:
+            if t["common"] >= 0 or t["armid"] >= 0:
                 num_regs = count_arm_param_registers(syscall_params)
                 if gen_thumb_stubs:
                     t["asm-thumb"] = self.thumb_genstub(syscall_func,num_regs,"__NR_"+syscall_name)
@@ -370,7 +385,7 @@
                     else:
                         t["asm-arm"]   = self.arm_genstub(syscall_func,num_regs,"__NR_"+syscall_name)
 
-            if t["id2"] >= 0:
+            if t["common"] >= 0 or t["x86id"] >= 0:
                 num_regs = count_generic_param_registers(syscall_params)
                 if t["cid"] >= 0:
                     t["asm-x86"] = self.x86_genstub_cid(syscall_func, num_regs, "__NR_"+syscall_name, t["cid"])
@@ -380,62 +395,71 @@
                 E("cid for dispatch syscalls is only supported for x86 in "
                   "'%s'" % syscall_name)
                 return
+            if t["common"] >= 0 or t["mipsid"] >= 0:
+                t["asm-mips"] = self.mips_genstub(syscall_func,"__NR_"+syscall_name)
 
 
     def gen_NR_syscall(self,fp,name,id):
         fp.write( "#define __NR_%-25s    (__NR_SYSCALL_BASE + %d)\n" % (name,id) )
 
-    # now dump the content of linux/_syscalls.h
+    # now dump the content of linux-syscalls.h
     def gen_linux_syscalls_h(self):
         path = "include/sys/linux-syscalls.h"
         D( "generating "+path )
         fp = create_file( path )
         fp.write( "/* auto-generated by gensyscalls.py, do not touch */\n" )
-        fp.write( "#ifndef _BIONIC_LINUX_SYSCALLS_H_\n\n" )
-        fp.write( "#if !defined __ASM_ARM_UNISTD_H && !defined __ASM_I386_UNISTD_H\n" )
+        fp.write( "#ifndef _BIONIC_LINUX_SYSCALLS_H_\n" )
+        fp.write( "#define _BIONIC_LINUX_SYSCALLS_H_\n\n" )
+        fp.write( "#if !defined __ASM_ARM_UNISTD_H && !defined __ASM_I386_UNISTD_H && !defined __ASM_MIPS_UNISTD_H\n" )
         fp.write( "#if defined __arm__ && !defined __ARM_EABI__ && !defined __thumb__\n" )
-        fp.write( "  #  define __NR_SYSCALL_BASE  0x900000\n" )
-        fp.write( "  #else\n" )
-        fp.write( "  #  define  __NR_SYSCALL_BASE  0\n" )
-        fp.write( "  #endif\n\n" )
+        fp.write( "  #  define __NR_SYSCALL_BASE 0x900000\n" )
+        fp.write( "#elif defined(__mips__)\n" )
+        fp.write( "  #  define __NR_SYSCALL_BASE 4000\n" )
+        fp.write( "#else\n" )
+        fp.write( "  #  define __NR_SYSCALL_BASE 0\n" )
+        fp.write( "#endif\n\n" )
 
         # first, all common syscalls
-        for sc in self.syscalls:
-            sc_id  = sc["id"]
-            sc_id2 = sc["id2"]
+        for sc in sorted(self.syscalls,key=lambda x:x["common"]):
+            sc_id  = sc["common"]
             sc_name = sc["name"]
-            if sc_id == sc_id2 and sc_id >= 0:
+            if sc_id >= 0:
                 self.gen_NR_syscall( fp, sc_name, sc_id )
 
         # now, all arm-specific syscalls
         fp.write( "\n#ifdef __arm__\n" );
         for sc in self.syscalls:
-            sc_id  = sc["id"]
-            sc_id2 = sc["id2"]
+            sc_id  = sc["armid"]
             sc_name = sc["name"]
-            if sc_id != sc_id2 and sc_id >= 0:
+            if sc_id >= 0:
                 self.gen_NR_syscall( fp, sc_name, sc_id )
         fp.write( "#endif\n" );
 
         gen_syscalls = {}
         # finally, all i386-specific syscalls
         fp.write( "\n#ifdef __i386__\n" );
-        for sc in self.syscalls:
-            sc_id  = sc["id"]
-            sc_id2 = sc["id2"]
+        for sc in sorted(self.syscalls,key=lambda x:x["x86id"]):
+            sc_id  = sc["x86id"]
             sc_name = sc["name"]
-            if sc_id != sc_id2 and sc_id2 >= 0 and sc_name not in gen_syscalls:
-                self.gen_NR_syscall( fp, sc_name, sc_id2 )
+            if sc_id >= 0 and sc_name not in gen_syscalls:
+                self.gen_NR_syscall( fp, sc_name, sc_id )
                 gen_syscalls[sc_name] = True
         fp.write( "#endif\n" );
 
+        # all mips-specific syscalls
+        fp.write( "\n#ifdef __mips__\n" );
+        for sc in sorted(self.syscalls,key=lambda x:x["mipsid"]):
+            sc_id = sc["mipsid"]
+            if sc_id >= 0:
+                self.gen_NR_syscall( fp, sc["name"], sc_id )
+        fp.write( "#endif\n" );
+
         fp.write( "\n#endif\n" )
         fp.write( "\n#endif /* _BIONIC_LINUX_SYSCALLS_H_ */\n" );
         fp.close()
         self.other_files.append( path )
 
-
-    # now dump the content of linux/_syscalls.h
+    # now dump the content of linux-syscalls.h
     def gen_linux_unistd_h(self):
         path = "include/sys/linux-unistd.h"
         D( "generating "+path )
@@ -462,6 +486,7 @@
         arch_test = {
             "arm": lambda x: x.has_key("asm-arm") or x.has_key("asm-thumb"),
             "x86": lambda x: x.has_key("asm-x86"),
+            "mips": lambda x: x.has_key("asm-mips")
         }
 
         for sc in self.syscalls:
@@ -471,6 +496,7 @@
         fp.close()
         self.other_files.append( path )
 
+
     # now generate each syscall stub
     def gen_syscall_stubs(self):
         for sc in self.syscalls:
@@ -498,6 +524,13 @@
                 fp.close()
                 self.new_stubs.append( fname )
 
+            if sc.has_key("asm-mips") and 'mips' in all_archs:
+                fname = "arch-mips/syscalls/%s.S" % sc["func"]
+                D2( ">>> generating "+fname )
+                fp = create_file( fname )
+                fp.write(sc["asm-mips"])
+                fp.close()
+                self.new_stubs.append( fname )
 
     def  regenerate(self):
         D( "scanning for existing architecture-specific stub files" )
@@ -515,7 +548,7 @@
 
         if not os.path.exists( bionic_temp ):
             D( "creating %s" % bionic_temp )
-            os.mkdir( bionic_temp )
+            make_dir( bionic_temp )
 
 #        D( "p4 editing source files" )
 #        for arch in all_archs:
diff --git a/libdl/Android.mk b/libdl/Android.mk
index d2289f8..fb01ec2 100644
--- a/libdl/Android.mk
+++ b/libdl/Android.mk
@@ -34,21 +34,6 @@
 LOCAL_ALLOW_UNDEFINED_SYMBOLS := true
 LOCAL_SYSTEM_SHARED_LIBRARIES := 
 
-ifeq ($(TARGET_ARCH),sh)
-# for SuperH, additional code is necessary to handle .ctors section.
-GEN_SOBEGIN := $(TARGET_OUT_STATIC_LIBRARIES)/sobegin.o
-$(GEN_SOBEGIN): $(LOCAL_PATH)/arch-sh/sobegin.S
-	@mkdir -p $(dir $@)
-	$(TARGET_CC) -o $@ -c $<
-
-GEN_SOEND := $(TARGET_OUT_STATIC_LIBRARIES)/soend.o
-$(GEN_SOEND): $(LOCAL_PATH)/arch-sh/soend.S
-	@mkdir -p $(dir $@)
-	$(TARGET_CC) -o $@ -c $<
-
-LOCAL_ADDITIONAL_DEPENDENCIES := $(GEN_SOBEGIN) $(GEN_SOEND)
-endif
-
 include $(BUILD_SHARED_LIBRARY)
 
 BUILD_DLTEST:=0
diff --git a/libdl/arch-sh/sobegin.S b/libdl/arch-sh/sobegin.S
deleted file mode 100644
index 976b1a6..0000000
--- a/libdl/arch-sh/sobegin.S
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-    .text
-    .align 4
-    .type  _init,#function
-    .globl _init
-
-# The toolchain for SH-Linux does not produce INIT_ARRAY information which
-# bionic linker relies on.  Instead of it, The toolchain for SH-Linux produces
-# INIT information when it find the function whose name is '_init'.
-#
-_init:
-    sts.l   pr, @-r15
-    mov.l   r8, @-r15
-    mov.l   0f, r8      /* first entry is invalid */
-.L_loop:
-    add     #4, r8
-    mov.l   @r8, r0
-    cmp/eq  #0, r0      /* Zero terimnated. See 'soend.so'. */
-    bt      .L_end
-    jsr     @r0         /* invoke a constructor */
-    nop
-    bra     .L_loop
-    nop
-.L_end:
-    mov.l   @r15+, r8
-    lds.l   @r15+, pr
-
-    rts
-    nop
-
-    .balign 4
-0:  .long   __CTOR_LIST__
-
-# the .ctors section contains a list of pointers to "constructor"
-# functions that need to be called in order during C library initialization,
-# just before the program is being run. This is a C++ requirement
-#
-# the last entry shall be 0, and is defined in crtend.S
-#
-    .section .ctors, "aw"
-    .globl __CTOR_LIST__
-__CTOR_LIST__:
-    .long   -1
-
diff --git a/libdl/arch-sh/soend.S b/libdl/arch-sh/soend.S
deleted file mode 100644
index 7fa98a4..0000000
--- a/libdl/arch-sh/soend.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-    .section .ctors, "aw"
-    .long 0
-
diff --git a/libm/sh/_fpmath.h b/libm/sh/_fpmath.h
deleted file mode 100644
index f75ec7b..0000000
--- a/libm/sh/_fpmath.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2009 Android Open Source Project, All rights reserved.
- *   Derived from "bionic/libm/arm/_fpmath.h"
- *   Copyright (c) 2002, 2003 David Schultz <das@FreeBSD.ORG>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * Assumes that 'long double' on SH-linux is just an alias for 'double'.
- */
-union IEEEl2bits {
-	long double	e;
-	struct {
-#if  __BYTE_ORDER == __LITTLE_ENDIAN
-		unsigned int	manl	:32;
-		unsigned int	manh	:20;
-		unsigned int	exp	:11;
-		unsigned int	sign	:1;
-#elif __BYTE_ORDER == __BIG_ENDIAN
-		unsigned int	sign	:1;
-		unsigned int	exp	:11;
-		unsigned int	manh	:20;
-		unsigned int	manl	:32;
-#endif
-	} bits;
-};
-
-/*
- * LDBL_NBIT is a mask indicating the position of the integer
- * bit in a long double.  But SH4 does not support it.
- */
-#define	LDBL_NBIT	0
-#define	mask_nbit_l(u)	((void)0)
-
-#define	LDBL_MANH_SIZE	20
-#define	LDBL_MANL_SIZE	32
diff --git a/libm/sh/fenv.c b/libm/sh/fenv.c
deleted file mode 100644
index ca8f476..0000000
--- a/libm/sh/fenv.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2009 Android Open Source Project, All rights reserved.
- *   Derived from "bionic/libm/arm/fenv.c"
- *   Copyright (c) 2004 David Schultz <das@FreeBSD.ORG>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-long __fpscr_values[2] = { 0L, 0x80000L };
diff --git a/libm/sh/fenv.h b/libm/sh/fenv.h
deleted file mode 100644
index e872f47..0000000
--- a/libm/sh/fenv.h
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright (C) 2009 Android Open Source Project, All rights reserved.
- *   Derived from "bionic/libm/arm/fenv.h"
- *   Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _FENV_H_
-#define _FENV_H_
-
-#include <stdio.h>
-#include <sys/types.h>
-
-typedef	uint32_t	fenv_t;
-typedef	uint32_t	fexcept_t;
-
-/* Exception flags */
-#define	FE_INVALID		0x0010
-#define	FE_DIVBYZERO	0x0008
-#define	FE_OVERFLOW		0x0004
-#define	FE_UNDERFLOW	0x0002
-#define	FE_INEXACT		0x0001
-#define	FE_ALL_EXCEPT	(FE_DIVBYZERO | FE_INEXACT | \
-				 FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
-
-/* Rounding modes */
-#define	FE_TONEAREST	0x0000
-#define	FE_TOWARDZERO	0x0001
-#define	FE_UPWARD	0x0002 /* not supporetd */
-#define	FE_DOWNWARD	0x0003 /* not supporetd */
-#define	_ROUND_MASK	(FE_TONEAREST | FE_DOWNWARD | \
-				 FE_UPWARD | FE_TOWARDZERO)
-
-/* bit shift for FPSCR mapping */
-#define	_FPUE_CAUSE_SHIFT	12
-#define	_FPUE_ENABLE_SHIFT	17
-#define	_FPUE_FLAG_SHIFT	 2
-
-/* bit shifters */
-#define	_FPUE_CAUSE(_EXCS)	((_EXCS) << _FPUE_CAUSE_SHIFT)
-#define	_FPUE_ENABLE(_EXCS)	((_EXCS) << _FPUE_ENABLE_SHIFT)
-#define	_FPUE_FLAG(_EXCS)	((_EXCS) << _FPUE_FLAG_SHIFT)
-
-#define	_GET_FPUE_CAUSE(_FPUE)		(((_FPUE) >> _FPUE_CAUSE_SHIFT) & FE_ALL_EXCEPT)
-#define	_GET_FPUE_ENABLE(_FPUE)	(((_FPUE) >> _FPUE_ENABLE_SHIFT)& FE_ALL_EXCEPT)
-#define	_GET_FPUE_FLAG(_FPUE)		(((_FPUE) >> _FPUE_FLAG_SHIFT) & FE_ALL_EXCEPT)
-
-
-/* FPSCR register accessors */
-#ifdef	__SH4_NOFPU__
-#define	__read_fpscr(_ptr)
-#define	__write_fpscr(_val)
-#else
-#define	__read_fpscr(_ptr)	__asm __volatile("sts fpscr, %0" : "=r" (*(_ptr)))
-#define	__write_fpscr(_val)	__asm __volatile("lds %0, fpscr" : : "r" (_val))
-#endif
-
-
-/* functions for libm */
-static __inline int
-feclearexcept(int __excepts)
-{
-	uint32_t __fpscr;
-
-	__read_fpscr(&__fpscr);
-	__fpscr &= ~_FPUE_FLAG(__excepts);
-	__write_fpscr(__fpscr);
-	return (0);
-}
-
-static __inline int
-fegetexceptflag(fexcept_t *__flagp, int __excepts)
-{
-	uint32_t __fpscr;
-
-	__read_fpscr(&__fpscr);
-	*__flagp = _GET_FPUE_FLAG(__fpscr) & __excepts;
-	return (0);
-}
-
-
-static __inline int
-fesetexceptflag(const fexcept_t *__flagp, int __excepts)
-{
-	uint32_t __fpscr;
-
-	__read_fpscr(&__fpscr);
-	__fpscr &= ~_FPUE_FLAG(__excepts);
-	__fpscr |= ~_FPUE_FLAG(*__flagp & __excepts);
-	__write_fpscr(__fpscr);
-	return (0);
-}
-
-
-static __inline int
-feraiseexcept(int __excepts)
-{
-	fexcept_t __ex = __excepts;
-
-	fesetexceptflag(&__ex, __excepts);	/* XXX */
-	return (0);
-}
-
-
-static __inline int
-fetestexcept(int __excepts)
-{
-	fexcept_t __ex;
-
-	fegetexceptflag(&__ex,  __excepts);
-	return (__ex);
-}
-
-
-static __inline int
-fegetround(void)
-{
-	uint32_t __fpscr = 0;
-
-	__read_fpscr(&__fpscr);
-	return (__fpscr & _ROUND_MASK);	
-}
-
-static __inline int
-fesetround(int __round)
-{
-	uint32_t __fpscr = 0;
-
-	if (__round == FE_UPWARD || __round == FE_DOWNWARD) {
-		fprintf(stderr, "libm superh : "
-			"upward/downward rounding not supporetd.\n");
-		return -1;
-	}
-
-	__read_fpscr(&__fpscr);
-	__fpscr &= ~_ROUND_MASK;
-	__fpscr |= (__round & _ROUND_MASK);
-	__write_fpscr(__fpscr);
-	return (0);
-}
-
-static __inline int
-fegetenv(fenv_t *__envp)
-{
-	__read_fpscr(__envp);
-	return (0);
-}
-
-static __inline int
-feholdexcept(fenv_t *__envp)
-{
-	uint32_t __fpscr;
-
-	__read_fpscr(&__fpscr);
-	*__envp = __fpscr;
-	__fpscr &= ~_FPUE_FLAG(FE_ALL_EXCEPT);
-	__write_fpscr(__fpscr);
-	return (0);
-}
-
-
-static __inline int
-fesetenv(const fenv_t *__envp)
-{
-	__write_fpscr(*__envp);
-	return (0);
-}
-
-
-static __inline int
-feupdateenv(const fenv_t *__envp)
-{
-	uint32_t __fpscr;
-
-	__read_fpscr(&__fpscr);
-	__write_fpscr(*__envp);
-	feraiseexcept(_GET_FPUE_FLAG(__fpscr));
-	return (0);
-}
-
-#if __BSD_VISIBLE
-
-static __inline int
-feenableexcept(int __mask)
-{
-	uint32_t __old_fpscr, __new_fpscr;
-
-	__read_fpscr(&__old_fpscr);
-	__new_fpscr = __old_fpscr | _FPUE_ENABLE(__mask & FE_ALL_EXCEPT);
-	__write_fpscr(__new_fpscr);
-	return (_GET_FPUE_ENABLE(__old_fpscr));
-}
-
-static __inline int
-fedisableexcept(int __mask)
-{
-	uint32_t __old_fpscr, __new_fpscr;
-
-	__read_fpscr(&__old_fpscr);
-	__new_fpscr = __old_fpscr & ~(_FPUE_ENABLE(__mask & FE_ALL_EXCEPT));
-	__write_fpscr(__new_fpscr);
-	return (_GET_FPUE_ENABLE(__old_fpscr));
-}
-
-static __inline int
-fegetexcept(void)
-{
-	uint32_t __fpscr;
-
-	__read_fpscr(&__fpscr);
-	return (_GET_FPUE_ENABLE(__fpscr));
-}
-
-#endif /* __BSD_VISIBLE */
-
-
-#endif /* _FENV_H_ */
-
diff --git a/linker/Android.mk b/linker/Android.mk
index 803b6c3..8f8cc2b 100644
--- a/linker/Android.mk
+++ b/linker/Android.mk
@@ -10,27 +10,9 @@
 	dlfcn.c \
 	debugger.c
 
-ifeq ($(TARGET_ARCH),sh)
-# SH-4A series virtual address range from 0x00000000 to 0x7FFFFFFF.
-LINKER_TEXT_BASE := 0x70000100
-else
-# This is aligned to 4K page boundary so that both GNU ld and gold work.  Gold
-# actually produces a correct binary with starting address 0xB0000100 but the
-# extra objcopy step to rename symbols causes the resulting binary to be misaligned
-# and unloadable.  Increasing the alignment adds an extra 3840 bytes in padding
-# but switching to gold saves about 1M of space.
-LINKER_TEXT_BASE := 0xB0001000
-endif
+LOCAL_LDFLAGS := -shared
 
-# The maximum size set aside for the linker, from
-# LINKER_TEXT_BASE rounded down to a megabyte.
-LINKER_AREA_SIZE := 0x01000000
-
-LOCAL_LDFLAGS := -Wl,-Ttext,$(LINKER_TEXT_BASE)
-
-LOCAL_CFLAGS += -DPRELINK
-LOCAL_CFLAGS += -DLINKER_TEXT_BASE=$(LINKER_TEXT_BASE)
-LOCAL_CFLAGS += -DLINKER_AREA_SIZE=$(LINKER_AREA_SIZE)
+LOCAL_CFLAGS += -fno-stack-protector
 
 # Set LINKER_DEBUG to either 1 or 0
 #
@@ -69,6 +51,9 @@
 LOCAL_MODULE_CLASS := EXECUTABLES
 LOCAL_MODULE_SUFFIX := $(TARGET_EXECUTABLE_SUFFIX)
 
+# we don't want crtbegin.o (because we have begin.o), so unset it
+# just for this module
+LOCAL_NO_CRT := true
 
 include $(BUILD_SYSTEM)/dynamic_binary.mk
 
@@ -80,11 +65,3 @@
 #
 # end of BUILD_EXECUTABLE hack
 #
-
-# we don't want crtbegin.o (because we have begin.o), so unset it
-# just for this module
-$(LOCAL_BUILT_MODULE): TARGET_CRTBEGIN_STATIC_O :=
-# This line is not strictly necessary because the dynamic linker is built
-# as a static executable, but it won't hurt if in the future we start
-# building the linker as a dynamic one.
-$(LOCAL_BUILT_MODULE): TARGET_CRTBEGIN_DYNAMIC_O :=
diff --git a/linker/linker.c b/linker/linker.c
index bcfa8dc..883da3c 100644
--- a/linker/linker.c
+++ b/linker/linker.c
@@ -313,15 +313,6 @@
     freelist = si;
 }
 
-#ifndef LINKER_TEXT_BASE
-#error "linker's makefile must define LINKER_TEXT_BASE"
-#endif
-#ifndef LINKER_AREA_SIZE
-#error "linker's makefile must define LINKER_AREA_SIZE"
-#endif
-#define LINKER_BASE ((LINKER_TEXT_BASE) & 0xfff00000)
-#define LINKER_TOP  (LINKER_BASE + (LINKER_AREA_SIZE))
-
 const char *addr_to_name(unsigned addr)
 {
     soinfo *si;
@@ -332,10 +323,6 @@
         }
     }
 
-    if((addr >= LINKER_BASE) && (addr < LINKER_TOP)){
-        return "linker";
-    }
-
     return "";
 }
 
@@ -354,12 +341,10 @@
     soinfo *si;
     unsigned addr = (unsigned)pc;
 
-    if ((addr < LINKER_BASE) || (addr >= LINKER_TOP)) {
-        for (si = solist; si != 0; si = si->next){
-            if ((addr >= si->base) && (addr < (si->base + si->size))) {
-                *pcount = si->ARM_exidx_count;
-                return (_Unwind_Ptr)(si->base + (unsigned long)si->ARM_exidx);
-            }
+    for (si = solist; si != 0; si = si->next){
+        if ((addr >= si->base) && (addr < (si->base + si->size))) {
+            *pcount = si->ARM_exidx_count;
+            return (_Unwind_Ptr)(si->base + (unsigned long)si->ARM_exidx);
         }
     }
    *pcount = 0;
@@ -420,6 +405,33 @@
     return NULL;
 }
 
+/*
+ * Essentially the same method as _elf_lookup() above, but only
+ * searches for LOCAL symbols
+ */
+static Elf32_Sym *_elf_lookup_local(soinfo *si, unsigned hash, const char *name)
+{
+    Elf32_Sym *symtab = si->symtab;
+    const char *strtab = si->strtab;
+    unsigned n = hash % si->nbucket;;
+
+    TRACE_TYPE(LOOKUP, "%5d LOCAL SEARCH %s in %s@0x%08x %08x %d\n", pid,
+               name, si->name, si->base, hash, hash % si->nbucket);
+    for(n = si->bucket[hash % si->nbucket]; n != 0; n = si->chain[n]){
+        Elf32_Sym *s = symtab + n;
+        if (strcmp(strtab + s->st_name, name)) continue;
+        if (ELF32_ST_BIND(s->st_info) != STB_LOCAL) continue;
+        /* no section == undefined */
+        if(s->st_shndx == 0) continue;
+
+        TRACE_TYPE(LOOKUP, "%5d FOUND LOCAL %s in %s (%08x) %d\n", pid,
+                   name, si->name, s->st_value, s->st_size);
+        return s;
+    }
+
+    return NULL;
+}
+
 static unsigned elfhash(const char *_name)
 {
     const unsigned char *name = (const unsigned char *) _name;
@@ -443,7 +455,17 @@
     soinfo *lsi = si;
     int i;
 
-    /* Look for symbols in the local scope first (the object who is
+    /* If we are trying to find a symbol for the linker itself, look
+     * for LOCAL symbols first. Avoid using LOCAL symbols for other
+     * shared libraries until we have a better understanding of what
+     * might break by doing so. */
+    if (si->flags & FLAG_LINKER) {
+        s = _elf_lookup_local(si, elf_hash, name);
+        if(s != NULL)
+            goto done;
+    }
+
+    /* Look for symbols in the local scope (the object who is
      * searching). This happens with C++ templates on i386 for some
      * reason.
      *
@@ -452,6 +474,7 @@
      * dynamic linking.  Some systems return the first definition found
      * and some the first non-weak definition.   This is system dependent.
      * Here we return the first definition found for simplicity.  */
+
     s = _elf_lookup(si, elf_hash, name);
     if(s != NULL)
         goto done;
@@ -1628,10 +1651,10 @@
     DEBUG("%5d si->base = 0x%08x si->flags = 0x%08x\n", pid,
           si->base, si->flags);
 
-    if (si->flags & FLAG_EXE) {
+    if (si->flags & (FLAG_EXE | FLAG_LINKER)) {
         /* Locate the needed program segments (DYNAMIC/ARM_EXIDX) for
-         * linkage info if this is the executable. If this was a
-         * dynamic lib, that would have been done at load time.
+         * linkage info if this is the executable or the linker itself. 
+         * If this was a dynamic lib, that would have been done at load time.
          *
          * TODO: It's unfortunate that small pieces of this are
          * repeated from the load_library routine. Refactor this just
@@ -1650,16 +1673,17 @@
             if (phdr->p_type == PT_LOAD) {
                 /* For the executable, we use the si->size field only in
                    dl_unwind_find_exidx(), so the meaning of si->size
-                   is not the size of the executable; it is the last
-                   virtual address of the loadable part of the executable;
-                   since si->base == 0 for an executable, we use the
-                   range [0, si->size) to determine whether a PC value
-                   falls within the executable section.  Of course, if
-                   a value is below phdr->p_vaddr, it's not in the
-                   executable section, but a) we shouldn't be asking for
-                   such a value anyway, and b) if we have to provide
-                   an EXIDX for such a value, then the executable's
-                   EXIDX is probably the better choice.
+                   is not the size of the executable; it is the distance
+                   between the load location of the executable and the last
+                   address of the loadable part of the executable.
+                   We use the range [si->base, si->base + si->size) to
+                   determine whether a PC value falls within the executable
+                   section. Of course, if a value is between si->base and
+                   (si->base + phdr->p_vaddr), it's not in the executable
+                   section, but a) we shouldn't be asking for such a value
+                   anyway, and b) if we have to provide an EXIDX for such a
+                   value, then the executable's EXIDX is probably the better
+                   choice.
                 */
                 DEBUG_DUMP_PHDR(phdr, "PT_LOAD", pid);
                 if (phdr->p_vaddr + phdr->p_memsz > si->size)
@@ -1669,12 +1693,20 @@
                 if (!(phdr->p_flags & PF_W)) {
                     unsigned _end;
 
-                    if (phdr->p_vaddr < si->wrprotect_start)
-                        si->wrprotect_start = phdr->p_vaddr;
-                    _end = (((phdr->p_vaddr + phdr->p_memsz + PAGE_SIZE - 1) &
+                    if (si->base + phdr->p_vaddr < si->wrprotect_start)
+                        si->wrprotect_start = si->base + phdr->p_vaddr;
+                    _end = (((si->base + phdr->p_vaddr + phdr->p_memsz + PAGE_SIZE - 1) &
                              (~PAGE_MASK)));
                     if (_end > si->wrprotect_end)
                         si->wrprotect_end = _end;
+                    /* Make the section writable just in case we'll have to
+                     * write to it during relocation (i.e. text segment).
+                     * However, we will remember what range of addresses
+                     * should be write protected.
+                     */
+                    mprotect((void *) (si->base + phdr->p_vaddr),
+                             phdr->p_memsz,
+                             PFLAGS_TO_PROT(phdr->p_flags) | PROT_WRITE);
                 }
             } else if (phdr->p_type == PT_DYNAMIC) {
                 if (si->dynamic != (unsigned *)-1) {
@@ -1949,7 +1981,12 @@
 
 static void * __tls_area[ANDROID_TLS_SLOTS];
 
-unsigned __linker_init(unsigned **elfdata)
+/*
+ * This code is called after the linker has linked itself and
+ * fixed it's own GOT. It is safe to make references to externs
+ * and other non-local data at this point.
+ */
+static unsigned __linker_init_post_relocation(unsigned **elfdata)
 {
     static soinfo linker_soinfo;
 
@@ -2069,7 +2106,18 @@
         vecs += 2;
     }
 
+    /* Compute the value of si->base. We can't rely on the fact that
+     * the first entry is the PHDR because this will not be true
+     * for certain executables (e.g. some in the NDK unit test suite)
+     */
+    int nn;
     si->base = 0;
+    for ( nn = 0; nn < si->phnum; nn++ ) {
+        if (si->phdr[nn].p_type == PT_PHDR) {
+            si->base = (Elf32_Addr) si->phdr - si->phdr[nn].p_vaddr;
+            break;
+        }
+    }
     si->dynamic = (unsigned *)-1;
     si->wrprotect_start = 0xffffffff;
     si->wrprotect_end = 0;
@@ -2138,3 +2186,69 @@
           si->entry);
     return si->entry;
 }
+
+/*
+ * Find the value of AT_BASE passed to us by the kernel. This is the load
+ * location of the linker.
+ */
+static unsigned find_linker_base(unsigned **elfdata) {
+    int argc = (int) *elfdata;
+    char **argv = (char**) (elfdata + 1);
+    unsigned *vecs = (unsigned*) (argv + argc + 1);
+    while (vecs[0] != 0) {
+        vecs++;
+    }
+
+    /* The end of the environment block is marked by two NULL pointers */
+    vecs++;
+
+    while(vecs[0]) {
+        if (vecs[0] == AT_BASE) {
+            return vecs[1];
+        }
+        vecs += 2;
+    }
+
+    return 0; // should never happen
+}
+
+/*
+ * This is the entry point for the linker, called from begin.S. This
+ * method is responsible for fixing the linker's own relocations, and
+ * then calling __linker_init_post_relocation().
+ *
+ * Because this method is called before the linker has fixed it's own
+ * relocations, any attempt to reference an extern variable, extern
+ * function, or other GOT reference will generate a segfault.
+ */
+unsigned __linker_init(unsigned **elfdata) {
+    unsigned linker_addr = find_linker_base(elfdata);
+    Elf32_Ehdr *elf_hdr = (Elf32_Ehdr *) linker_addr;
+    Elf32_Phdr *phdr =
+        (Elf32_Phdr *)((unsigned char *) linker_addr + elf_hdr->e_phoff);
+
+    soinfo linker_so;
+    memset(&linker_so, 0, sizeof(soinfo));
+
+    linker_so.base = linker_addr;
+    linker_so.dynamic = (unsigned *) -1;
+    linker_so.phdr = phdr;
+    linker_so.phnum = elf_hdr->e_phnum;
+    linker_so.flags |= FLAG_LINKER;
+    linker_so.wrprotect_start = 0xffffffff;
+    linker_so.wrprotect_end = 0;
+
+    if (link_image(&linker_so, 0)) {
+        // It would be nice to print an error message, but if the linker
+        // can't link itself, there's no guarantee that we'll be able to
+        // call write() (because it involves a GOT reference).
+        //
+        // This situation should never occur unless the linker itself
+        // is corrupt.
+        exit(-1);
+    }
+
+    // We have successfully fixed our own relocations. It's safe to run
+    // the main part of the linker now.
+    return __linker_init_post_relocation(elfdata);
+}
diff --git a/linker/linker.h b/linker/linker.h
index d29484c..e67ae52 100644
--- a/linker/linker.h
+++ b/linker/linker.h
@@ -83,6 +83,7 @@
 #define FLAG_LINKED     0x00000001
 #define FLAG_ERROR      0x00000002
 #define FLAG_EXE        0x00000004 // The main executable
+#define FLAG_LINKER     0x00000010 // The linker itself
 
 #define SOINFO_NAME_LEN 128