blob: 815b5f6c16a89fd98f3aae811e173733949d287f [file] [log] [blame]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <machine/cpu-features.h>
Kenny Root420878c2011-02-16 11:55:58 -080030#include <machine/asm.h>
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080031
Colin Crossecede402010-03-09 16:23:51 -080032#if defined(__ARM_NEON__)
Mathias Agopianee223d02009-09-27 17:46:43 -070033
34 .text
35 .fpu neon
36
Henrik Smidingfe6338d2010-09-15 16:08:03 +020037#ifdef HAVE_32_BYTE_CACHE_LINE
38/* a prefetch distance of 2 cache-lines */
39#define CACHE_LINE_SIZE 32
40#define PREFETCH_DISTANCE (CACHE_LINE_SIZE*2)
41#else
Mathias Agopian199f9d92009-10-28 02:54:37 -070042/* a prefetch distance of 4 cache-lines works best experimentally */
43#define CACHE_LINE_SIZE 64
44#define PREFETCH_DISTANCE (CACHE_LINE_SIZE*4)
Henrik Smidingfe6338d2010-09-15 16:08:03 +020045#endif
Mathias Agopianee223d02009-09-27 17:46:43 -070046
Evgeniy Stepanov487b6132011-10-04 14:22:15 +040047ENTRY(memcpy)
Mathias Agopianee223d02009-09-27 17:46:43 -070048 .save {r0, lr}
Mathias Agopianee223d02009-09-27 17:46:43 -070049 /* start preloading as early as possible */
Mathias Agopian199f9d92009-10-28 02:54:37 -070050 pld [r1, #(CACHE_LINE_SIZE*0)]
Henrik Smidingfe6338d2010-09-15 16:08:03 +020051 stmfd sp!, {r0, lr}
Mathias Agopian199f9d92009-10-28 02:54:37 -070052 pld [r1, #(CACHE_LINE_SIZE*1)]
Mathias Agopianee223d02009-09-27 17:46:43 -070053
54 /* do we have at least 16-bytes to copy (needed for alignment below) */
55 cmp r2, #16
56 blo 5f
57
Henrik Smidingfe6338d2010-09-15 16:08:03 +020058 /* align destination to cache-line for the write-buffer */
Mathias Agopianee223d02009-09-27 17:46:43 -070059 rsb r3, r0, #0
60 ands r3, r3, #0xF
61 beq 0f
62
63 /* copy up to 15-bytes (count in r3) */
64 sub r2, r2, r3
65 movs ip, r3, lsl #31
66 ldrmib lr, [r1], #1
67 strmib lr, [r0], #1
68 ldrcsb ip, [r1], #1
69 ldrcsb lr, [r1], #1
70 strcsb ip, [r0], #1
71 strcsb lr, [r0], #1
72 movs ip, r3, lsl #29
73 bge 1f
74 // copies 4 bytes, destination 32-bits aligned
75 vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
76 vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
771: bcc 2f
78 // copies 8 bytes, destination 64-bits aligned
79 vld1.8 {d0}, [r1]!
80 vst1.8 {d0}, [r0, :64]!
812:
82
830: /* preload immediately the next cache line, which we may need */
Mathias Agopian199f9d92009-10-28 02:54:37 -070084 pld [r1, #(CACHE_LINE_SIZE*0)]
85 pld [r1, #(CACHE_LINE_SIZE*1)]
Mathias Agopianee223d02009-09-27 17:46:43 -070086
Henrik Smidingfe6338d2010-09-15 16:08:03 +020087#ifdef HAVE_32_BYTE_CACHE_LINE
88 /* make sure we have at least 32 bytes to copy */
89 subs r2, r2, #32
90 blo 4f
91
92 /* preload all the cache lines we need.
93 * NOTE: the number of pld below depends on PREFETCH_DISTANCE,
94 * ideally would would increase the distance in the main loop to
95 * avoid the goofy code below. In practice this doesn't seem to make
96 * a big difference.
97 */
98 pld [r1, #(PREFETCH_DISTANCE)]
99
1001: /* The main loop copies 32 bytes at a time */
101 vld1.8 {d0 - d3}, [r1]!
102 pld [r1, #(PREFETCH_DISTANCE)]
103 subs r2, r2, #32
104 vst1.8 {d0 - d3}, [r0, :128]!
105 bhs 1b
106#else
Mathias Agopian199f9d92009-10-28 02:54:37 -0700107 /* make sure we have at least 64 bytes to copy */
108 subs r2, r2, #64
Mathias Agopianee223d02009-09-27 17:46:43 -0700109 blo 2f
110
111 /* preload all the cache lines we need.
112 * NOTE: the number of pld below depends on PREFETCH_DISTANCE,
113 * ideally would would increase the distance in the main loop to
114 * avoid the goofy code below. In practice this doesn't seem to make
115 * a big difference.
116 */
Mathias Agopian199f9d92009-10-28 02:54:37 -0700117 pld [r1, #(CACHE_LINE_SIZE*2)]
118 pld [r1, #(CACHE_LINE_SIZE*3)]
119 pld [r1, #(PREFETCH_DISTANCE)]
Mathias Agopianee223d02009-09-27 17:46:43 -0700120
Mathias Agopian199f9d92009-10-28 02:54:37 -07001211: /* The main loop copies 64 bytes at a time */
Mathias Agopianee223d02009-09-27 17:46:43 -0700122 vld1.8 {d0 - d3}, [r1]!
123 vld1.8 {d4 - d7}, [r1]!
Mathias Agopian199f9d92009-10-28 02:54:37 -0700124 pld [r1, #(PREFETCH_DISTANCE)]
125 subs r2, r2, #64
Mathias Agopianee223d02009-09-27 17:46:43 -0700126 vst1.8 {d0 - d3}, [r0, :128]!
127 vst1.8 {d4 - d7}, [r0, :128]!
Mathias Agopianee223d02009-09-27 17:46:43 -0700128 bhs 1b
129
1302: /* fix-up the remaining count and make sure we have >= 32 bytes left */
Mathias Agopian199f9d92009-10-28 02:54:37 -0700131 add r2, r2, #64
Mathias Agopianee223d02009-09-27 17:46:43 -0700132 subs r2, r2, #32
133 blo 4f
134
1353: /* 32 bytes at a time. These cache lines were already preloaded */
136 vld1.8 {d0 - d3}, [r1]!
137 subs r2, r2, #32
138 vst1.8 {d0 - d3}, [r0, :128]!
139 bhs 3b
Henrik Smidingfe6338d2010-09-15 16:08:03 +0200140#endif
Mathias Agopianee223d02009-09-27 17:46:43 -07001414: /* less than 32 left */
142 add r2, r2, #32
143 tst r2, #0x10
144 beq 5f
145 // copies 16 bytes, 128-bits aligned
146 vld1.8 {d0, d1}, [r1]!
147 vst1.8 {d0, d1}, [r0, :128]!
148
1495: /* copy up to 15-bytes (count in r2) */
150 movs ip, r2, lsl #29
151 bcc 1f
152 vld1.8 {d0}, [r1]!
153 vst1.8 {d0}, [r0]!
1541: bge 2f
155 vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
156 vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]!
1572: movs ip, r2, lsl #31
158 ldrmib r3, [r1], #1
159 ldrcsb ip, [r1], #1
160 ldrcsb lr, [r1], #1
161 strmib r3, [r0], #1
162 strcsb ip, [r0], #1
163 strcsb lr, [r0], #1
164
165 ldmfd sp!, {r0, lr}
166 bx lr
Kenny Root420878c2011-02-16 11:55:58 -0800167END(memcpy)
Mathias Agopianee223d02009-09-27 17:46:43 -0700168
169
170#else /* __ARM_ARCH__ < 7 */
171
172
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800173 /*
174 * Optimized memcpy() for ARM.
175 *
176 * note that memcpy() always returns the destination pointer,
177 * so we have to preserve R0.
178 */
Mathias Agopianee223d02009-09-27 17:46:43 -0700179
Kenny Root420878c2011-02-16 11:55:58 -0800180ENTRY(memcpy)
Mathias Agopianee223d02009-09-27 17:46:43 -0700181 /* The stack must always be 64-bits aligned to be compliant with the
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800182 * ARM ABI. Since we have to save R0, we might as well save R4
183 * which we can use for better pipelining of the reads below
184 */
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800185 .save {r0, r4, lr}
186 stmfd sp!, {r0, r4, lr}
187 /* Making room for r5-r11 which will be spilled later */
188 .pad #28
189 sub sp, sp, #28
190
191 // preload the destination because we'll align it to a cache line
192 // with small writes. Also start the source "pump".
193 PLD (r0, #0)
194 PLD (r1, #0)
195 PLD (r1, #32)
196
197 /* it simplifies things to take care of len<4 early */
198 cmp r2, #4
199 blo copy_last_3_and_return
200
201 /* compute the offset to align the source
202 * offset = (4-(src&3))&3 = -src & 3
203 */
204 rsb r3, r1, #0
205 ands r3, r3, #3
206 beq src_aligned
207
208 /* align source to 32 bits. We need to insert 2 instructions between
209 * a ldr[b|h] and str[b|h] because byte and half-word instructions
210 * stall 2 cycles.
211 */
212 movs r12, r3, lsl #31
213 sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */
214 ldrmib r3, [r1], #1
215 ldrcsb r4, [r1], #1
216 ldrcsb r12,[r1], #1
217 strmib r3, [r0], #1
218 strcsb r4, [r0], #1
219 strcsb r12,[r0], #1
Mathias Agopianee223d02009-09-27 17:46:43 -0700220
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800221src_aligned:
222
Mathias Agopianee223d02009-09-27 17:46:43 -0700223 /* see if src and dst are aligned together (congruent) */
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800224 eor r12, r0, r1
225 tst r12, #3
226 bne non_congruent
227
228 /* Use post-incriment mode for stm to spill r5-r11 to reserved stack
229 * frame. Don't update sp.
230 */
231 stmea sp, {r5-r11}
232
233 /* align the destination to a cache-line */
234 rsb r3, r0, #0
235 ands r3, r3, #0x1C
236 beq congruent_aligned32
237 cmp r3, r2
238 andhi r3, r2, #0x1C
239
240 /* conditionnaly copies 0 to 7 words (length in r3) */
Mathias Agopianee223d02009-09-27 17:46:43 -0700241 movs r12, r3, lsl #28
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800242 ldmcsia r1!, {r4, r5, r6, r7} /* 16 bytes */
243 ldmmiia r1!, {r8, r9} /* 8 bytes */
244 stmcsia r0!, {r4, r5, r6, r7}
245 stmmiia r0!, {r8, r9}
246 tst r3, #0x4
247 ldrne r10,[r1], #4 /* 4 bytes */
248 strne r10,[r0], #4
249 sub r2, r2, r3
250
251congruent_aligned32:
252 /*
253 * here source is aligned to 32 bytes.
254 */
255
256cached_aligned32:
257 subs r2, r2, #32
258 blo less_than_32_left
259
260 /*
261 * We preload a cache-line up to 64 bytes ahead. On the 926, this will
Mathias Agopianee223d02009-09-27 17:46:43 -0700262 * stall only until the requested world is fetched, but the linefill
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800263 * continues in the the background.
264 * While the linefill is going, we write our previous cache-line
265 * into the write-buffer (which should have some free space).
266 * When the linefill is done, the writebuffer will
267 * start dumping its content into memory
268 *
269 * While all this is going, we then load a full cache line into
270 * 8 registers, this cache line should be in the cache by now
271 * (or partly in the cache).
272 *
273 * This code should work well regardless of the source/dest alignment.
274 *
275 */
276
277 // Align the preload register to a cache-line because the cpu does
278 // "critical word first" (the first word requested is loaded first).
279 bic r12, r1, #0x1F
280 add r12, r12, #64
281
2821: ldmia r1!, { r4-r11 }
283 PLD (r12, #64)
284 subs r2, r2, #32
285
286 // NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
287 // for ARM9 preload will not be safely guarded by the preceding subs.
Mathias Agopianee223d02009-09-27 17:46:43 -0700288 // When it is safely guarded the only possibility to have SIGSEGV here
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800289 // is because the caller overstates the length.
290 ldrhi r3, [r12], #32 /* cheap ARM9 preload */
291 stmia r0!, { r4-r11 }
292 bhs 1b
Mathias Agopianee223d02009-09-27 17:46:43 -0700293
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800294 add r2, r2, #32
295
296
297
298
299less_than_32_left:
Mathias Agopianee223d02009-09-27 17:46:43 -0700300 /*
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800301 * less than 32 bytes left at this point (length in r2)
302 */
303
304 /* skip all this if there is nothing to do, which should
305 * be a common case (if not executed the code below takes
306 * about 16 cycles)
307 */
308 tst r2, #0x1F
309 beq 1f
310
311 /* conditionnaly copies 0 to 31 bytes */
Mathias Agopianee223d02009-09-27 17:46:43 -0700312 movs r12, r2, lsl #28
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800313 ldmcsia r1!, {r4, r5, r6, r7} /* 16 bytes */
314 ldmmiia r1!, {r8, r9} /* 8 bytes */
315 stmcsia r0!, {r4, r5, r6, r7}
316 stmmiia r0!, {r8, r9}
317 movs r12, r2, lsl #30
318 ldrcs r3, [r1], #4 /* 4 bytes */
319 ldrmih r4, [r1], #2 /* 2 bytes */
Mathias Agopianee223d02009-09-27 17:46:43 -0700320 strcs r3, [r0], #4
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800321 strmih r4, [r0], #2
322 tst r2, #0x1
323 ldrneb r3, [r1] /* last byte */
324 strneb r3, [r0]
325
326 /* we're done! restore everything and return */
3271: ldmfd sp!, {r5-r11}
328 ldmfd sp!, {r0, r4, lr}
329 bx lr
330
331 /********************************************************************/
332
333non_congruent:
334 /*
335 * here source is aligned to 4 bytes
336 * but destination is not.
337 *
Mathias Agopianee223d02009-09-27 17:46:43 -0700338 * in the code below r2 is the number of bytes read
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800339 * (the number of bytes written is always smaller, because we have
340 * partial words in the shift queue)
341 */
342 cmp r2, #4
343 blo copy_last_3_and_return
Mathias Agopianee223d02009-09-27 17:46:43 -0700344
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800345 /* Use post-incriment mode for stm to spill r5-r11 to reserved stack
346 * frame. Don't update sp.
347 */
348 stmea sp, {r5-r11}
Mathias Agopianee223d02009-09-27 17:46:43 -0700349
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800350 /* compute shifts needed to align src to dest */
351 rsb r5, r0, #0
352 and r5, r5, #3 /* r5 = # bytes in partial words */
Mathias Agopianee223d02009-09-27 17:46:43 -0700353 mov r12, r5, lsl #3 /* r12 = right */
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800354 rsb lr, r12, #32 /* lr = left */
Mathias Agopianee223d02009-09-27 17:46:43 -0700355
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800356 /* read the first word */
357 ldr r3, [r1], #4
358 sub r2, r2, #4
Mathias Agopianee223d02009-09-27 17:46:43 -0700359
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800360 /* write a partial word (0 to 3 bytes), such that destination
361 * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
362 */
363 movs r5, r5, lsl #31
364 strmib r3, [r0], #1
Mathias Agopianee223d02009-09-27 17:46:43 -0700365 movmi r3, r3, lsr #8
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800366 strcsb r3, [r0], #1
367 movcs r3, r3, lsr #8
368 strcsb r3, [r0], #1
369 movcs r3, r3, lsr #8
370
371 cmp r2, #4
372 blo partial_word_tail
Mathias Agopianee223d02009-09-27 17:46:43 -0700373
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800374 /* Align destination to 32 bytes (cache line boundary) */
3751: tst r0, #0x1c
376 beq 2f
377 ldr r5, [r1], #4
378 sub r2, r2, #4
379 orr r4, r3, r5, lsl lr
380 mov r3, r5, lsr r12
381 str r4, [r0], #4
382 cmp r2, #4
383 bhs 1b
384 blo partial_word_tail
385
386 /* copy 32 bytes at a time */
3872: subs r2, r2, #32
388 blo less_than_thirtytwo
389
390 /* Use immediate mode for the shifts, because there is an extra cycle
391 * for register shifts, which could account for up to 50% of
392 * performance hit.
393 */
394
395 cmp r12, #24
396 beq loop24
397 cmp r12, #8
398 beq loop8
399
400loop16:
401 ldr r12, [r1], #4
4021: mov r4, r12
403 ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
404 PLD (r1, #64)
405 subs r2, r2, #32
406 ldrhs r12, [r1], #4
407 orr r3, r3, r4, lsl #16
408 mov r4, r4, lsr #16
409 orr r4, r4, r5, lsl #16
410 mov r5, r5, lsr #16
411 orr r5, r5, r6, lsl #16
412 mov r6, r6, lsr #16
413 orr r6, r6, r7, lsl #16
414 mov r7, r7, lsr #16
415 orr r7, r7, r8, lsl #16
416 mov r8, r8, lsr #16
417 orr r8, r8, r9, lsl #16
418 mov r9, r9, lsr #16
419 orr r9, r9, r10, lsl #16
420 mov r10, r10, lsr #16
421 orr r10, r10, r11, lsl #16
422 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
423 mov r3, r11, lsr #16
424 bhs 1b
425 b less_than_thirtytwo
426
427loop8:
428 ldr r12, [r1], #4
4291: mov r4, r12
430 ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
431 PLD (r1, #64)
432 subs r2, r2, #32
433 ldrhs r12, [r1], #4
434 orr r3, r3, r4, lsl #24
435 mov r4, r4, lsr #8
436 orr r4, r4, r5, lsl #24
437 mov r5, r5, lsr #8
438 orr r5, r5, r6, lsl #24
439 mov r6, r6, lsr #8
440 orr r6, r6, r7, lsl #24
441 mov r7, r7, lsr #8
442 orr r7, r7, r8, lsl #24
443 mov r8, r8, lsr #8
444 orr r8, r8, r9, lsl #24
445 mov r9, r9, lsr #8
446 orr r9, r9, r10, lsl #24
447 mov r10, r10, lsr #8
448 orr r10, r10, r11, lsl #24
449 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
450 mov r3, r11, lsr #8
451 bhs 1b
452 b less_than_thirtytwo
453
454loop24:
455 ldr r12, [r1], #4
4561: mov r4, r12
457 ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
458 PLD (r1, #64)
459 subs r2, r2, #32
460 ldrhs r12, [r1], #4
461 orr r3, r3, r4, lsl #8
462 mov r4, r4, lsr #24
463 orr r4, r4, r5, lsl #8
464 mov r5, r5, lsr #24
465 orr r5, r5, r6, lsl #8
466 mov r6, r6, lsr #24
467 orr r6, r6, r7, lsl #8
468 mov r7, r7, lsr #24
469 orr r7, r7, r8, lsl #8
470 mov r8, r8, lsr #24
471 orr r8, r8, r9, lsl #8
472 mov r9, r9, lsr #24
473 orr r9, r9, r10, lsl #8
474 mov r10, r10, lsr #24
475 orr r10, r10, r11, lsl #8
476 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
477 mov r3, r11, lsr #24
478 bhs 1b
479
480
481less_than_thirtytwo:
482 /* copy the last 0 to 31 bytes of the source */
483 rsb r12, lr, #32 /* we corrupted r12, recompute it */
484 add r2, r2, #32
485 cmp r2, #4
486 blo partial_word_tail
487
4881: ldr r5, [r1], #4
489 sub r2, r2, #4
490 orr r4, r3, r5, lsl lr
491 mov r3, r5, lsr r12
492 str r4, [r0], #4
493 cmp r2, #4
494 bhs 1b
495
496partial_word_tail:
497 /* we have a partial word in the input buffer */
498 movs r5, lr, lsl #(31-3)
499 strmib r3, [r0], #1
500 movmi r3, r3, lsr #8
501 strcsb r3, [r0], #1
502 movcs r3, r3, lsr #8
503 strcsb r3, [r0], #1
Mathias Agopianee223d02009-09-27 17:46:43 -0700504
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800505 /* Refill spilled registers from the stack. Don't update sp. */
506 ldmfd sp, {r5-r11}
507
508copy_last_3_and_return:
509 movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
510 ldrmib r2, [r1], #1
511 ldrcsb r3, [r1], #1
512 ldrcsb r12,[r1]
513 strmib r2, [r0], #1
514 strcsb r3, [r0], #1
515 strcsb r12,[r0]
516
517 /* we're done! restore sp and spilled registers and return */
518 add sp, sp, #28
519 ldmfd sp!, {r0, r4, lr}
520 bx lr
Kenny Root420878c2011-02-16 11:55:58 -0800521END(memcpy)
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800522
Mathias Agopianee223d02009-09-27 17:46:43 -0700523
524#endif /* __ARM_ARCH__ < 7 */