blob: 305586023da8c1ade4ce576bf28ae3231987a578 [file] [log] [blame]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -08001/* $OpenBSD: swab.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
2/*
3 * Written by J.T. Conklin <jtc@netbsd.org>.
4 * Public domain.
5 */
6
7#include <machine/asm.h>
8
9/*
10 * On the i486, this code is negligibly faster than the code generated
11 * by gcc at about half the size. If my i386 databook is correct, it
12 * should be considerably faster than the gcc code on a i386.
13 */
14
15ENTRY(swab)
16 pushl %esi
17 pushl %edi
18 movl 12(%esp),%esi
19 movl 16(%esp),%edi
20 movl 20(%esp),%ecx
21
22 cld # set direction forward
23
24 shrl $1,%ecx
25 testl $7,%ecx # copy first group of 1 to 7 words
26 jz L2 # while swaping alternate bytes.
27 .align 2,0x90
28L1: lodsw
29 rorw $8,%ax
30 stosw
31 decl %ecx
32 testl $7,%ecx
33 jnz L1
34
35L2: shrl $3,%ecx # copy remainder 8 words at a time
36 jz L4 # while swapping alternate bytes.
37 .align 2,0x90
38L3: lodsw
39 rorw $8,%ax
40 stosw
41 lodsw
42 rorw $8,%ax
43 stosw
44 lodsw
45 rorw $8,%ax
46 stosw
47 lodsw
48 rorw $8,%ax
49 stosw
50 lodsw
51 rorw $8,%ax
52 stosw
53 lodsw
54 rorw $8,%ax
55 stosw
56 lodsw
57 rorw $8,%ax
58 stosw
59 lodsw
60 rorw $8,%ax
61 stosw
62 decl %ecx
63 jnz L3
64
65L4: popl %edi
66 popl %esi
67 ret