blob: 4b7f71bcada218574c0458fca7e6e6f9616eb7ef [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label) .L##label
33#endif
34
35#ifndef ALIGN
36# define ALIGN(n) .p2align n
37#endif
38
39#ifndef cfi_startproc
40# define cfi_startproc .cfi_startproc
41#endif
42
43#ifndef cfi_endproc
44# define cfi_endproc .cfi_endproc
45#endif
46
47#ifndef cfi_rel_offset
48# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
49#endif
50
51#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070052# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080053#endif
54
55#ifndef cfi_adjust_cfa_offset
56# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
57#endif
58
59#ifndef ENTRY
60# define ENTRY(name) \
61 .type name, @function; \
62 .globl name; \
63 .p2align 4; \
64name: \
65 cfi_startproc
66#endif
67
68#ifndef END
69# define END(name) \
70 cfi_endproc; \
71 .size name, .-name
72#endif
73
74#define CFI_PUSH(REG) \
75 cfi_adjust_cfa_offset (4); \
76 cfi_rel_offset (REG, 0)
77
78#define CFI_POP(REG) \
79 cfi_adjust_cfa_offset (-4); \
80 cfi_restore (REG)
81
82#define PUSH(REG) pushl REG; CFI_PUSH (REG)
83#define POP(REG) popl REG; CFI_POP (REG)
84
85#ifdef USE_AS_BZERO
86# define DEST PARMS
87# define LEN DEST+4
88# define SETRTNVAL
89#else
90# define DEST PARMS
91# define CHR DEST+4
92# define LEN CHR+4
93# define SETRTNVAL movl DEST(%esp), %eax
94#endif
95
96#ifdef SHARED
97# define ENTRANCE PUSH (%ebx);
98# define RETURN_END POP (%ebx); ret
99# define RETURN RETURN_END; CFI_PUSH (%ebx)
100# define PARMS 8 /* Preserve EBX. */
101# define JMPTBL(I, B) I - B
102
103/* Load an entry in a jump table into EBX and branch to it. TABLE is a
104 jump table with relative offsets. */
105# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
106 /* We first load PC into EBX. */ \
107 call __i686.get_pc_thunk.bx; \
108 /* Get the address of the jump table. */ \
109 add $(TABLE - .), %ebx; \
110 /* Get the entry and convert the relative offset to the \
111 absolute address. */ \
112 add (%ebx,%ecx,4), %ebx; \
113 add %ecx, %edx; \
114 /* We loaded the jump table and adjuested EDX. Go. */ \
115 jmp *%ebx
116
117 .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
118 .globl __i686.get_pc_thunk.bx
119 .hidden __i686.get_pc_thunk.bx
120 ALIGN (4)
121 .type __i686.get_pc_thunk.bx,@function
122__i686.get_pc_thunk.bx:
123 movl (%esp), %ebx
124 ret
125#else
126# define ENTRANCE
127# define RETURN_END ret
128# define RETURN RETURN_END
129# define PARMS 4
130# define JMPTBL(I, B) I
131
132/* Branch to an entry in a jump table. TABLE is a jump table with
133 absolute offsets. */
134# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
135 add %ecx, %edx; \
136 jmp *TABLE(,%ecx,4)
137#endif
138
139 .section .text.sse2,"ax",@progbits
140 ALIGN (4)
141ENTRY (sse2_memset5_atom)
142 ENTRANCE
143
144 movl LEN(%esp), %ecx
145#ifdef USE_AS_BZERO
146 xor %eax, %eax
147#else
148 movzbl CHR(%esp), %eax
149 movb %al, %ah
150 /* Fill the whole EAX with pattern. */
151 movl %eax, %edx
152 shl $16, %eax
153 or %edx, %eax
154#endif
155 movl DEST(%esp), %edx
156 cmp $32, %ecx
157 jae L(32bytesormore)
158
159L(write_less32bytes):
160 BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes))
161
162
163 .pushsection .rodata.sse2,"a",@progbits
164 ALIGN (2)
165L(table_less_32bytes):
166 .int JMPTBL (L(write_0bytes), L(table_less_32bytes))
167 .int JMPTBL (L(write_1bytes), L(table_less_32bytes))
168 .int JMPTBL (L(write_2bytes), L(table_less_32bytes))
169 .int JMPTBL (L(write_3bytes), L(table_less_32bytes))
170 .int JMPTBL (L(write_4bytes), L(table_less_32bytes))
171 .int JMPTBL (L(write_5bytes), L(table_less_32bytes))
172 .int JMPTBL (L(write_6bytes), L(table_less_32bytes))
173 .int JMPTBL (L(write_7bytes), L(table_less_32bytes))
174 .int JMPTBL (L(write_8bytes), L(table_less_32bytes))
175 .int JMPTBL (L(write_9bytes), L(table_less_32bytes))
176 .int JMPTBL (L(write_10bytes), L(table_less_32bytes))
177 .int JMPTBL (L(write_11bytes), L(table_less_32bytes))
178 .int JMPTBL (L(write_12bytes), L(table_less_32bytes))
179 .int JMPTBL (L(write_13bytes), L(table_less_32bytes))
180 .int JMPTBL (L(write_14bytes), L(table_less_32bytes))
181 .int JMPTBL (L(write_15bytes), L(table_less_32bytes))
182 .int JMPTBL (L(write_16bytes), L(table_less_32bytes))
183 .int JMPTBL (L(write_17bytes), L(table_less_32bytes))
184 .int JMPTBL (L(write_18bytes), L(table_less_32bytes))
185 .int JMPTBL (L(write_19bytes), L(table_less_32bytes))
186 .int JMPTBL (L(write_20bytes), L(table_less_32bytes))
187 .int JMPTBL (L(write_21bytes), L(table_less_32bytes))
188 .int JMPTBL (L(write_22bytes), L(table_less_32bytes))
189 .int JMPTBL (L(write_23bytes), L(table_less_32bytes))
190 .int JMPTBL (L(write_24bytes), L(table_less_32bytes))
191 .int JMPTBL (L(write_25bytes), L(table_less_32bytes))
192 .int JMPTBL (L(write_26bytes), L(table_less_32bytes))
193 .int JMPTBL (L(write_27bytes), L(table_less_32bytes))
194 .int JMPTBL (L(write_28bytes), L(table_less_32bytes))
195 .int JMPTBL (L(write_29bytes), L(table_less_32bytes))
196 .int JMPTBL (L(write_30bytes), L(table_less_32bytes))
197 .int JMPTBL (L(write_31bytes), L(table_less_32bytes))
198 .popsection
199
200 ALIGN (4)
201L(write_28bytes):
202 movl %eax, -28(%edx)
203L(write_24bytes):
204 movl %eax, -24(%edx)
205L(write_20bytes):
206 movl %eax, -20(%edx)
207L(write_16bytes):
208 movl %eax, -16(%edx)
209L(write_12bytes):
210 movl %eax, -12(%edx)
211L(write_8bytes):
212 movl %eax, -8(%edx)
213L(write_4bytes):
214 movl %eax, -4(%edx)
215L(write_0bytes):
216 SETRTNVAL
217 RETURN
218
219 ALIGN (4)
220L(write_29bytes):
221 movl %eax, -29(%edx)
222L(write_25bytes):
223 movl %eax, -25(%edx)
224L(write_21bytes):
225 movl %eax, -21(%edx)
226L(write_17bytes):
227 movl %eax, -17(%edx)
228L(write_13bytes):
229 movl %eax, -13(%edx)
230L(write_9bytes):
231 movl %eax, -9(%edx)
232L(write_5bytes):
233 movl %eax, -5(%edx)
234L(write_1bytes):
235 movb %al, -1(%edx)
236 SETRTNVAL
237 RETURN
238
239 ALIGN (4)
240L(write_30bytes):
241 movl %eax, -30(%edx)
242L(write_26bytes):
243 movl %eax, -26(%edx)
244L(write_22bytes):
245 movl %eax, -22(%edx)
246L(write_18bytes):
247 movl %eax, -18(%edx)
248L(write_14bytes):
249 movl %eax, -14(%edx)
250L(write_10bytes):
251 movl %eax, -10(%edx)
252L(write_6bytes):
253 movl %eax, -6(%edx)
254L(write_2bytes):
255 movw %ax, -2(%edx)
256 SETRTNVAL
257 RETURN
258
259 ALIGN (4)
260L(write_31bytes):
261 movl %eax, -31(%edx)
262L(write_27bytes):
263 movl %eax, -27(%edx)
264L(write_23bytes):
265 movl %eax, -23(%edx)
266L(write_19bytes):
267 movl %eax, -19(%edx)
268L(write_15bytes):
269 movl %eax, -15(%edx)
270L(write_11bytes):
271 movl %eax, -11(%edx)
272L(write_7bytes):
273 movl %eax, -7(%edx)
274L(write_3bytes):
275 movw %ax, -3(%edx)
276 movb %al, -1(%edx)
277 SETRTNVAL
278 RETURN
279
280 ALIGN (4)
281/* ECX > 32 and EDX is 4 byte aligned. */
282L(32bytesormore):
283 /* Fill xmm0 with the pattern. */
284#ifdef USE_AS_BZERO
285 pxor %xmm0, %xmm0
286#else
287 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800288 pshufd $0, %xmm0, %xmm0
289#endif
290 testl $0xf, %edx
291 jz L(aligned_16)
292/* ECX > 32 and EDX is not 16 byte aligned. */
293L(not_aligned_16):
294 movdqu %xmm0, (%edx)
295 movl %edx, %eax
296 and $-16, %edx
297 add $16, %edx
298 sub %edx, %eax
299 add %eax, %ecx
300 movd %xmm0, %eax
301
302 ALIGN (4)
303L(aligned_16):
304 cmp $128, %ecx
305 jae L(128bytesormore)
306
307L(aligned_16_less128bytes):
308 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
309
310 ALIGN (4)
311L(128bytesormore):
312#ifdef SHARED_CACHE_SIZE
313 PUSH (%ebx)
314 mov $SHARED_CACHE_SIZE, %ebx
315#else
316# ifdef SHARED
317 call __i686.get_pc_thunk.bx
318 add $_GLOBAL_OFFSET_TABLE_, %ebx
319 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx
320# else
321 PUSH (%ebx)
322 mov __x86_shared_cache_size, %ebx
323# endif
324#endif
325 cmp %ebx, %ecx
326 jae L(128bytesormore_nt_start)
327
328
329#ifdef DATA_CACHE_SIZE
330 POP (%ebx)
Bruce Beare124a5422010-10-11 12:24:41 -0700331# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800332 cmp $DATA_CACHE_SIZE, %ecx
333#else
334# ifdef SHARED
Bruce Beare124a5422010-10-11 12:24:41 -0700335# define RESTORE_EBX_STATE
Bruce Beare8ff1a272010-03-04 11:03:37 -0800336 call __i686.get_pc_thunk.bx
337 add $_GLOBAL_OFFSET_TABLE_, %ebx
338 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
339# else
340 POP (%ebx)
Bruce Beare124a5422010-10-11 12:24:41 -0700341# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800342 cmp __x86_data_cache_size, %ecx
343# endif
344#endif
345
346 jae L(128bytes_L2_normal)
347 subl $128, %ecx
348L(128bytesormore_normal):
349 sub $128, %ecx
350 movdqa %xmm0, (%edx)
351 movdqa %xmm0, 0x10(%edx)
352 movdqa %xmm0, 0x20(%edx)
353 movdqa %xmm0, 0x30(%edx)
354 movdqa %xmm0, 0x40(%edx)
355 movdqa %xmm0, 0x50(%edx)
356 movdqa %xmm0, 0x60(%edx)
357 movdqa %xmm0, 0x70(%edx)
358 lea 128(%edx), %edx
359 jb L(128bytesless_normal)
360
361
362 sub $128, %ecx
363 movdqa %xmm0, (%edx)
364 movdqa %xmm0, 0x10(%edx)
365 movdqa %xmm0, 0x20(%edx)
366 movdqa %xmm0, 0x30(%edx)
367 movdqa %xmm0, 0x40(%edx)
368 movdqa %xmm0, 0x50(%edx)
369 movdqa %xmm0, 0x60(%edx)
370 movdqa %xmm0, 0x70(%edx)
371 lea 128(%edx), %edx
372 jae L(128bytesormore_normal)
373
374L(128bytesless_normal):
Bruce Beare124a5422010-10-11 12:24:41 -0700375 add $128, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800376 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
377
378 ALIGN (4)
379L(128bytes_L2_normal):
380 prefetcht0 0x380(%edx)
381 prefetcht0 0x3c0(%edx)
382 sub $128, %ecx
383 movdqa %xmm0, (%edx)
384 movaps %xmm0, 0x10(%edx)
385 movaps %xmm0, 0x20(%edx)
386 movaps %xmm0, 0x30(%edx)
387 movaps %xmm0, 0x40(%edx)
388 movaps %xmm0, 0x50(%edx)
389 movaps %xmm0, 0x60(%edx)
390 movaps %xmm0, 0x70(%edx)
391 add $128, %edx
392 cmp $128, %ecx
393 jae L(128bytes_L2_normal)
394
395L(128bytesless_L2_normal):
396 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
397
Bruce Beare124a5422010-10-11 12:24:41 -0700398 RESTORE_EBX_STATE
Bruce Beare8ff1a272010-03-04 11:03:37 -0800399L(128bytesormore_nt_start):
400 sub %ebx, %ecx
Bruce Beare124a5422010-10-11 12:24:41 -0700401 mov %ebx, %eax
402 and $0x7f, %eax
403 add %eax, %ecx
404 movd %xmm0, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800405 ALIGN (4)
406L(128bytesormore_shared_cache_loop):
407 prefetcht0 0x3c0(%edx)
408 prefetcht0 0x380(%edx)
409 sub $0x80, %ebx
410 movdqa %xmm0, (%edx)
411 movdqa %xmm0, 0x10(%edx)
412 movdqa %xmm0, 0x20(%edx)
413 movdqa %xmm0, 0x30(%edx)
414 movdqa %xmm0, 0x40(%edx)
415 movdqa %xmm0, 0x50(%edx)
416 movdqa %xmm0, 0x60(%edx)
417 movdqa %xmm0, 0x70(%edx)
418 add $0x80, %edx
419 cmp $0x80, %ebx
420 jae L(128bytesormore_shared_cache_loop)
421 cmp $0x80, %ecx
422 jb L(shared_cache_loop_end)
423 ALIGN (4)
424L(128bytesormore_nt):
425 sub $0x80, %ecx
426 movntdq %xmm0, (%edx)
427 movntdq %xmm0, 0x10(%edx)
428 movntdq %xmm0, 0x20(%edx)
429 movntdq %xmm0, 0x30(%edx)
430 movntdq %xmm0, 0x40(%edx)
431 movntdq %xmm0, 0x50(%edx)
432 movntdq %xmm0, 0x60(%edx)
433 movntdq %xmm0, 0x70(%edx)
434 add $0x80, %edx
435 cmp $0x80, %ecx
436 jae L(128bytesormore_nt)
437 sfence
438L(shared_cache_loop_end):
439#if defined DATA_CACHE_SIZE || !defined SHARED
440 POP (%ebx)
441#endif
442 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
443
444
445 .pushsection .rodata.sse2,"a",@progbits
446 ALIGN (2)
447L(table_16_128bytes):
448 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
449 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
450 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
451 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
452 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
453 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
454 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
455 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
456 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
457 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
458 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
459 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
460 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
461 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
462 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
463 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
464 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
465 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
466 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
467 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
468 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
469 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
470 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
471 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
472 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
473 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
474 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
475 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
476 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
477 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
478 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
479 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
480 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
481 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
482 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
483 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
484 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
485 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
486 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
487 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
488 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
489 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
490 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
491 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
492 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
493 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
494 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
495 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
496 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
497 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
498 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
499 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
500 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
501 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
502 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
503 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
504 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
505 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
506 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
507 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
508 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
509 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
510 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
511 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
512 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
513 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
514 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
515 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
516 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
517 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
518 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
519 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
520 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
521 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
522 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
523 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
524 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
525 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
526 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
527 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
528 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
529 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
530 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
531 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
532 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
533 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
534 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
535 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
536 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
537 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
538 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
539 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
540 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
541 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
542 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
543 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
544 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
545 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
546 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
547 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
548 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
549 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
550 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
551 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
552 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
553 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
554 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
555 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
556 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
557 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
558 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
559 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
560 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
561 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
562 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
563 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
564 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
565 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
566 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
567 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
568 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
569 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
570 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
571 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
572 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
573 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
574 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
575 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
576 .popsection
577
578 ALIGN (4)
579L(aligned_16_112bytes):
580 movdqa %xmm0, -112(%edx)
581L(aligned_16_96bytes):
582 movdqa %xmm0, -96(%edx)
583L(aligned_16_80bytes):
584 movdqa %xmm0, -80(%edx)
585L(aligned_16_64bytes):
586 movdqa %xmm0, -64(%edx)
587L(aligned_16_48bytes):
588 movdqa %xmm0, -48(%edx)
589L(aligned_16_32bytes):
590 movdqa %xmm0, -32(%edx)
591L(aligned_16_16bytes):
592 movdqa %xmm0, -16(%edx)
593L(aligned_16_0bytes):
594 SETRTNVAL
595 RETURN
596
597 ALIGN (4)
598L(aligned_16_113bytes):
599 movdqa %xmm0, -113(%edx)
600L(aligned_16_97bytes):
601 movdqa %xmm0, -97(%edx)
602L(aligned_16_81bytes):
603 movdqa %xmm0, -81(%edx)
604L(aligned_16_65bytes):
605 movdqa %xmm0, -65(%edx)
606L(aligned_16_49bytes):
607 movdqa %xmm0, -49(%edx)
608L(aligned_16_33bytes):
609 movdqa %xmm0, -33(%edx)
610L(aligned_16_17bytes):
611 movdqa %xmm0, -17(%edx)
612L(aligned_16_1bytes):
613 movb %al, -1(%edx)
614 SETRTNVAL
615 RETURN
616
617 ALIGN (4)
618L(aligned_16_114bytes):
619 movdqa %xmm0, -114(%edx)
620L(aligned_16_98bytes):
621 movdqa %xmm0, -98(%edx)
622L(aligned_16_82bytes):
623 movdqa %xmm0, -82(%edx)
624L(aligned_16_66bytes):
625 movdqa %xmm0, -66(%edx)
626L(aligned_16_50bytes):
627 movdqa %xmm0, -50(%edx)
628L(aligned_16_34bytes):
629 movdqa %xmm0, -34(%edx)
630L(aligned_16_18bytes):
631 movdqa %xmm0, -18(%edx)
632L(aligned_16_2bytes):
633 movw %ax, -2(%edx)
634 SETRTNVAL
635 RETURN
636
637 ALIGN (4)
638L(aligned_16_115bytes):
639 movdqa %xmm0, -115(%edx)
640L(aligned_16_99bytes):
641 movdqa %xmm0, -99(%edx)
642L(aligned_16_83bytes):
643 movdqa %xmm0, -83(%edx)
644L(aligned_16_67bytes):
645 movdqa %xmm0, -67(%edx)
646L(aligned_16_51bytes):
647 movdqa %xmm0, -51(%edx)
648L(aligned_16_35bytes):
649 movdqa %xmm0, -35(%edx)
650L(aligned_16_19bytes):
651 movdqa %xmm0, -19(%edx)
652L(aligned_16_3bytes):
653 movw %ax, -3(%edx)
654 movb %al, -1(%edx)
655 SETRTNVAL
656 RETURN
657
658 ALIGN (4)
659L(aligned_16_116bytes):
660 movdqa %xmm0, -116(%edx)
661L(aligned_16_100bytes):
662 movdqa %xmm0, -100(%edx)
663L(aligned_16_84bytes):
664 movdqa %xmm0, -84(%edx)
665L(aligned_16_68bytes):
666 movdqa %xmm0, -68(%edx)
667L(aligned_16_52bytes):
668 movdqa %xmm0, -52(%edx)
669L(aligned_16_36bytes):
670 movdqa %xmm0, -36(%edx)
671L(aligned_16_20bytes):
672 movdqa %xmm0, -20(%edx)
673L(aligned_16_4bytes):
674 movl %eax, -4(%edx)
675 SETRTNVAL
676 RETURN
677
678 ALIGN (4)
679L(aligned_16_117bytes):
680 movdqa %xmm0, -117(%edx)
681L(aligned_16_101bytes):
682 movdqa %xmm0, -101(%edx)
683L(aligned_16_85bytes):
684 movdqa %xmm0, -85(%edx)
685L(aligned_16_69bytes):
686 movdqa %xmm0, -69(%edx)
687L(aligned_16_53bytes):
688 movdqa %xmm0, -53(%edx)
689L(aligned_16_37bytes):
690 movdqa %xmm0, -37(%edx)
691L(aligned_16_21bytes):
692 movdqa %xmm0, -21(%edx)
693L(aligned_16_5bytes):
694 movl %eax, -5(%edx)
695 movb %al, -1(%edx)
696 SETRTNVAL
697 RETURN
698
699 ALIGN (4)
700L(aligned_16_118bytes):
701 movdqa %xmm0, -118(%edx)
702L(aligned_16_102bytes):
703 movdqa %xmm0, -102(%edx)
704L(aligned_16_86bytes):
705 movdqa %xmm0, -86(%edx)
706L(aligned_16_70bytes):
707 movdqa %xmm0, -70(%edx)
708L(aligned_16_54bytes):
709 movdqa %xmm0, -54(%edx)
710L(aligned_16_38bytes):
711 movdqa %xmm0, -38(%edx)
712L(aligned_16_22bytes):
713 movdqa %xmm0, -22(%edx)
714L(aligned_16_6bytes):
715 movl %eax, -6(%edx)
716 movw %ax, -2(%edx)
717 SETRTNVAL
718 RETURN
719
720 ALIGN (4)
721L(aligned_16_119bytes):
722 movdqa %xmm0, -119(%edx)
723L(aligned_16_103bytes):
724 movdqa %xmm0, -103(%edx)
725L(aligned_16_87bytes):
726 movdqa %xmm0, -87(%edx)
727L(aligned_16_71bytes):
728 movdqa %xmm0, -71(%edx)
729L(aligned_16_55bytes):
730 movdqa %xmm0, -55(%edx)
731L(aligned_16_39bytes):
732 movdqa %xmm0, -39(%edx)
733L(aligned_16_23bytes):
734 movdqa %xmm0, -23(%edx)
735L(aligned_16_7bytes):
736 movl %eax, -7(%edx)
737 movw %ax, -3(%edx)
738 movb %al, -1(%edx)
739 SETRTNVAL
740 RETURN
741
742 ALIGN (4)
743L(aligned_16_120bytes):
744 movdqa %xmm0, -120(%edx)
745L(aligned_16_104bytes):
746 movdqa %xmm0, -104(%edx)
747L(aligned_16_88bytes):
748 movdqa %xmm0, -88(%edx)
749L(aligned_16_72bytes):
750 movdqa %xmm0, -72(%edx)
751L(aligned_16_56bytes):
752 movdqa %xmm0, -56(%edx)
753L(aligned_16_40bytes):
754 movdqa %xmm0, -40(%edx)
755L(aligned_16_24bytes):
756 movdqa %xmm0, -24(%edx)
757L(aligned_16_8bytes):
758 movq %xmm0, -8(%edx)
759 SETRTNVAL
760 RETURN
761
762 ALIGN (4)
763L(aligned_16_121bytes):
764 movdqa %xmm0, -121(%edx)
765L(aligned_16_105bytes):
766 movdqa %xmm0, -105(%edx)
767L(aligned_16_89bytes):
768 movdqa %xmm0, -89(%edx)
769L(aligned_16_73bytes):
770 movdqa %xmm0, -73(%edx)
771L(aligned_16_57bytes):
772 movdqa %xmm0, -57(%edx)
773L(aligned_16_41bytes):
774 movdqa %xmm0, -41(%edx)
775L(aligned_16_25bytes):
776 movdqa %xmm0, -25(%edx)
777L(aligned_16_9bytes):
778 movq %xmm0, -9(%edx)
779 movb %al, -1(%edx)
780 SETRTNVAL
781 RETURN
782
783 ALIGN (4)
784L(aligned_16_122bytes):
785 movdqa %xmm0, -122(%edx)
786L(aligned_16_106bytes):
787 movdqa %xmm0, -106(%edx)
788L(aligned_16_90bytes):
789 movdqa %xmm0, -90(%edx)
790L(aligned_16_74bytes):
791 movdqa %xmm0, -74(%edx)
792L(aligned_16_58bytes):
793 movdqa %xmm0, -58(%edx)
794L(aligned_16_42bytes):
795 movdqa %xmm0, -42(%edx)
796L(aligned_16_26bytes):
797 movdqa %xmm0, -26(%edx)
798L(aligned_16_10bytes):
799 movq %xmm0, -10(%edx)
800 movw %ax, -2(%edx)
801 SETRTNVAL
802 RETURN
803
804 ALIGN (4)
805L(aligned_16_123bytes):
806 movdqa %xmm0, -123(%edx)
807L(aligned_16_107bytes):
808 movdqa %xmm0, -107(%edx)
809L(aligned_16_91bytes):
810 movdqa %xmm0, -91(%edx)
811L(aligned_16_75bytes):
812 movdqa %xmm0, -75(%edx)
813L(aligned_16_59bytes):
814 movdqa %xmm0, -59(%edx)
815L(aligned_16_43bytes):
816 movdqa %xmm0, -43(%edx)
817L(aligned_16_27bytes):
818 movdqa %xmm0, -27(%edx)
819L(aligned_16_11bytes):
820 movq %xmm0, -11(%edx)
821 movw %ax, -3(%edx)
822 movb %al, -1(%edx)
823 SETRTNVAL
824 RETURN
825
826 ALIGN (4)
827L(aligned_16_124bytes):
828 movdqa %xmm0, -124(%edx)
829L(aligned_16_108bytes):
830 movdqa %xmm0, -108(%edx)
831L(aligned_16_92bytes):
832 movdqa %xmm0, -92(%edx)
833L(aligned_16_76bytes):
834 movdqa %xmm0, -76(%edx)
835L(aligned_16_60bytes):
836 movdqa %xmm0, -60(%edx)
837L(aligned_16_44bytes):
838 movdqa %xmm0, -44(%edx)
839L(aligned_16_28bytes):
840 movdqa %xmm0, -28(%edx)
841L(aligned_16_12bytes):
842 movq %xmm0, -12(%edx)
843 movl %eax, -4(%edx)
844 SETRTNVAL
845 RETURN
846
847 ALIGN (4)
848L(aligned_16_125bytes):
849 movdqa %xmm0, -125(%edx)
850L(aligned_16_109bytes):
851 movdqa %xmm0, -109(%edx)
852L(aligned_16_93bytes):
853 movdqa %xmm0, -93(%edx)
854L(aligned_16_77bytes):
855 movdqa %xmm0, -77(%edx)
856L(aligned_16_61bytes):
857 movdqa %xmm0, -61(%edx)
858L(aligned_16_45bytes):
859 movdqa %xmm0, -45(%edx)
860L(aligned_16_29bytes):
861 movdqa %xmm0, -29(%edx)
862L(aligned_16_13bytes):
863 movq %xmm0, -13(%edx)
864 movl %eax, -5(%edx)
865 movb %al, -1(%edx)
866 SETRTNVAL
867 RETURN
868
869 ALIGN (4)
870L(aligned_16_126bytes):
871 movdqa %xmm0, -126(%edx)
872L(aligned_16_110bytes):
873 movdqa %xmm0, -110(%edx)
874L(aligned_16_94bytes):
875 movdqa %xmm0, -94(%edx)
876L(aligned_16_78bytes):
877 movdqa %xmm0, -78(%edx)
878L(aligned_16_62bytes):
879 movdqa %xmm0, -62(%edx)
880L(aligned_16_46bytes):
881 movdqa %xmm0, -46(%edx)
882L(aligned_16_30bytes):
883 movdqa %xmm0, -30(%edx)
884L(aligned_16_14bytes):
885 movq %xmm0, -14(%edx)
886 movl %eax, -6(%edx)
887 movw %ax, -2(%edx)
888 SETRTNVAL
889 RETURN
890
891 ALIGN (4)
892L(aligned_16_127bytes):
893 movdqa %xmm0, -127(%edx)
894L(aligned_16_111bytes):
895 movdqa %xmm0, -111(%edx)
896L(aligned_16_95bytes):
897 movdqa %xmm0, -95(%edx)
898L(aligned_16_79bytes):
899 movdqa %xmm0, -79(%edx)
900L(aligned_16_63bytes):
901 movdqa %xmm0, -63(%edx)
902L(aligned_16_47bytes):
903 movdqa %xmm0, -47(%edx)
904L(aligned_16_31bytes):
905 movdqa %xmm0, -31(%edx)
906L(aligned_16_15bytes):
907 movq %xmm0, -15(%edx)
908 movl %eax, -7(%edx)
909 movw %ax, -3(%edx)
910 movb %al, -1(%edx)
911 SETRTNVAL
912 RETURN_END
913
914END (sse2_memset5_atom)