The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame^] | 1 | /* $OpenBSD: swab.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */ |
| 2 | /* |
| 3 | * Written by J.T. Conklin <jtc@netbsd.org>. |
| 4 | * Public domain. |
| 5 | */ |
| 6 | |
| 7 | #include <machine/asm.h> |
| 8 | |
| 9 | /* |
| 10 | * On the i486, this code is negligibly faster than the code generated |
| 11 | * by gcc at about half the size. If my i386 databook is correct, it |
| 12 | * should be considerably faster than the gcc code on a i386. |
| 13 | */ |
| 14 | |
| 15 | ENTRY(swab) |
| 16 | pushl %esi |
| 17 | pushl %edi |
| 18 | movl 12(%esp),%esi |
| 19 | movl 16(%esp),%edi |
| 20 | movl 20(%esp),%ecx |
| 21 | |
| 22 | cld # set direction forward |
| 23 | |
| 24 | shrl $1,%ecx |
| 25 | testl $7,%ecx # copy first group of 1 to 7 words |
| 26 | jz L2 # while swaping alternate bytes. |
| 27 | .align 2,0x90 |
| 28 | L1: lodsw |
| 29 | rorw $8,%ax |
| 30 | stosw |
| 31 | decl %ecx |
| 32 | testl $7,%ecx |
| 33 | jnz L1 |
| 34 | |
| 35 | L2: shrl $3,%ecx # copy remainder 8 words at a time |
| 36 | jz L4 # while swapping alternate bytes. |
| 37 | .align 2,0x90 |
| 38 | L3: lodsw |
| 39 | rorw $8,%ax |
| 40 | stosw |
| 41 | lodsw |
| 42 | rorw $8,%ax |
| 43 | stosw |
| 44 | lodsw |
| 45 | rorw $8,%ax |
| 46 | stosw |
| 47 | lodsw |
| 48 | rorw $8,%ax |
| 49 | stosw |
| 50 | lodsw |
| 51 | rorw $8,%ax |
| 52 | stosw |
| 53 | lodsw |
| 54 | rorw $8,%ax |
| 55 | stosw |
| 56 | lodsw |
| 57 | rorw $8,%ax |
| 58 | stosw |
| 59 | lodsw |
| 60 | rorw $8,%ax |
| 61 | stosw |
| 62 | decl %ecx |
| 63 | jnz L3 |
| 64 | |
| 65 | L4: popl %edi |
| 66 | popl %esi |
| 67 | ret |