blob: a54bf51458d68586eb2993dfde997961c4771768 [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040031#include "cache.h"
32#undef __i686
33
Bruce Beare8ff1a272010-03-04 11:03:37 -080034#ifndef L
35# define L(label) .L##label
36#endif
37
38#ifndef ALIGN
39# define ALIGN(n) .p2align n
40#endif
41
42#ifndef cfi_startproc
43# define cfi_startproc .cfi_startproc
44#endif
45
46#ifndef cfi_endproc
47# define cfi_endproc .cfi_endproc
48#endif
49
50#ifndef cfi_rel_offset
51# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
52#endif
53
54#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070055# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080056#endif
57
58#ifndef cfi_adjust_cfa_offset
59# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
60#endif
61
62#ifndef ENTRY
63# define ENTRY(name) \
64 .type name, @function; \
65 .globl name; \
66 .p2align 4; \
67name: \
68 cfi_startproc
69#endif
70
71#ifndef END
72# define END(name) \
73 cfi_endproc; \
74 .size name, .-name
75#endif
76
77#define CFI_PUSH(REG) \
78 cfi_adjust_cfa_offset (4); \
79 cfi_rel_offset (REG, 0)
80
81#define CFI_POP(REG) \
82 cfi_adjust_cfa_offset (-4); \
83 cfi_restore (REG)
84
85#define PUSH(REG) pushl REG; CFI_PUSH (REG)
86#define POP(REG) popl REG; CFI_POP (REG)
87
88#ifdef USE_AS_BZERO
89# define DEST PARMS
90# define LEN DEST+4
91# define SETRTNVAL
92#else
93# define DEST PARMS
94# define CHR DEST+4
95# define LEN CHR+4
96# define SETRTNVAL movl DEST(%esp), %eax
97#endif
98
Nick Kralevich0aa82892011-11-11 15:47:24 -080099#if (defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800100# define ENTRANCE PUSH (%ebx);
101# define RETURN_END POP (%ebx); ret
102# define RETURN RETURN_END; CFI_PUSH (%ebx)
103# define PARMS 8 /* Preserve EBX. */
104# define JMPTBL(I, B) I - B
105
106/* Load an entry in a jump table into EBX and branch to it. TABLE is a
107 jump table with relative offsets. */
108# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
109 /* We first load PC into EBX. */ \
110 call __i686.get_pc_thunk.bx; \
111 /* Get the address of the jump table. */ \
112 add $(TABLE - .), %ebx; \
113 /* Get the entry and convert the relative offset to the \
114 absolute address. */ \
115 add (%ebx,%ecx,4), %ebx; \
116 add %ecx, %edx; \
117 /* We loaded the jump table and adjuested EDX. Go. */ \
118 jmp *%ebx
119
120 .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
121 .globl __i686.get_pc_thunk.bx
122 .hidden __i686.get_pc_thunk.bx
123 ALIGN (4)
124 .type __i686.get_pc_thunk.bx,@function
125__i686.get_pc_thunk.bx:
126 movl (%esp), %ebx
127 ret
128#else
129# define ENTRANCE
130# define RETURN_END ret
131# define RETURN RETURN_END
132# define PARMS 4
133# define JMPTBL(I, B) I
134
135/* Branch to an entry in a jump table. TABLE is a jump table with
136 absolute offsets. */
137# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
138 add %ecx, %edx; \
139 jmp *TABLE(,%ecx,4)
140#endif
141
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400142#ifndef MEMSET
143# define MEMSET memset
144#endif
145
Bruce Beare8ff1a272010-03-04 11:03:37 -0800146 .section .text.sse2,"ax",@progbits
147 ALIGN (4)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400148ENTRY (MEMSET)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800149 ENTRANCE
150
151 movl LEN(%esp), %ecx
152#ifdef USE_AS_BZERO
153 xor %eax, %eax
154#else
155 movzbl CHR(%esp), %eax
156 movb %al, %ah
157 /* Fill the whole EAX with pattern. */
158 movl %eax, %edx
159 shl $16, %eax
160 or %edx, %eax
161#endif
162 movl DEST(%esp), %edx
163 cmp $32, %ecx
164 jae L(32bytesormore)
165
166L(write_less32bytes):
167 BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes))
168
169
170 .pushsection .rodata.sse2,"a",@progbits
171 ALIGN (2)
172L(table_less_32bytes):
173 .int JMPTBL (L(write_0bytes), L(table_less_32bytes))
174 .int JMPTBL (L(write_1bytes), L(table_less_32bytes))
175 .int JMPTBL (L(write_2bytes), L(table_less_32bytes))
176 .int JMPTBL (L(write_3bytes), L(table_less_32bytes))
177 .int JMPTBL (L(write_4bytes), L(table_less_32bytes))
178 .int JMPTBL (L(write_5bytes), L(table_less_32bytes))
179 .int JMPTBL (L(write_6bytes), L(table_less_32bytes))
180 .int JMPTBL (L(write_7bytes), L(table_less_32bytes))
181 .int JMPTBL (L(write_8bytes), L(table_less_32bytes))
182 .int JMPTBL (L(write_9bytes), L(table_less_32bytes))
183 .int JMPTBL (L(write_10bytes), L(table_less_32bytes))
184 .int JMPTBL (L(write_11bytes), L(table_less_32bytes))
185 .int JMPTBL (L(write_12bytes), L(table_less_32bytes))
186 .int JMPTBL (L(write_13bytes), L(table_less_32bytes))
187 .int JMPTBL (L(write_14bytes), L(table_less_32bytes))
188 .int JMPTBL (L(write_15bytes), L(table_less_32bytes))
189 .int JMPTBL (L(write_16bytes), L(table_less_32bytes))
190 .int JMPTBL (L(write_17bytes), L(table_less_32bytes))
191 .int JMPTBL (L(write_18bytes), L(table_less_32bytes))
192 .int JMPTBL (L(write_19bytes), L(table_less_32bytes))
193 .int JMPTBL (L(write_20bytes), L(table_less_32bytes))
194 .int JMPTBL (L(write_21bytes), L(table_less_32bytes))
195 .int JMPTBL (L(write_22bytes), L(table_less_32bytes))
196 .int JMPTBL (L(write_23bytes), L(table_less_32bytes))
197 .int JMPTBL (L(write_24bytes), L(table_less_32bytes))
198 .int JMPTBL (L(write_25bytes), L(table_less_32bytes))
199 .int JMPTBL (L(write_26bytes), L(table_less_32bytes))
200 .int JMPTBL (L(write_27bytes), L(table_less_32bytes))
201 .int JMPTBL (L(write_28bytes), L(table_less_32bytes))
202 .int JMPTBL (L(write_29bytes), L(table_less_32bytes))
203 .int JMPTBL (L(write_30bytes), L(table_less_32bytes))
204 .int JMPTBL (L(write_31bytes), L(table_less_32bytes))
205 .popsection
206
207 ALIGN (4)
208L(write_28bytes):
209 movl %eax, -28(%edx)
210L(write_24bytes):
211 movl %eax, -24(%edx)
212L(write_20bytes):
213 movl %eax, -20(%edx)
214L(write_16bytes):
215 movl %eax, -16(%edx)
216L(write_12bytes):
217 movl %eax, -12(%edx)
218L(write_8bytes):
219 movl %eax, -8(%edx)
220L(write_4bytes):
221 movl %eax, -4(%edx)
222L(write_0bytes):
223 SETRTNVAL
224 RETURN
225
226 ALIGN (4)
227L(write_29bytes):
228 movl %eax, -29(%edx)
229L(write_25bytes):
230 movl %eax, -25(%edx)
231L(write_21bytes):
232 movl %eax, -21(%edx)
233L(write_17bytes):
234 movl %eax, -17(%edx)
235L(write_13bytes):
236 movl %eax, -13(%edx)
237L(write_9bytes):
238 movl %eax, -9(%edx)
239L(write_5bytes):
240 movl %eax, -5(%edx)
241L(write_1bytes):
242 movb %al, -1(%edx)
243 SETRTNVAL
244 RETURN
245
246 ALIGN (4)
247L(write_30bytes):
248 movl %eax, -30(%edx)
249L(write_26bytes):
250 movl %eax, -26(%edx)
251L(write_22bytes):
252 movl %eax, -22(%edx)
253L(write_18bytes):
254 movl %eax, -18(%edx)
255L(write_14bytes):
256 movl %eax, -14(%edx)
257L(write_10bytes):
258 movl %eax, -10(%edx)
259L(write_6bytes):
260 movl %eax, -6(%edx)
261L(write_2bytes):
262 movw %ax, -2(%edx)
263 SETRTNVAL
264 RETURN
265
266 ALIGN (4)
267L(write_31bytes):
268 movl %eax, -31(%edx)
269L(write_27bytes):
270 movl %eax, -27(%edx)
271L(write_23bytes):
272 movl %eax, -23(%edx)
273L(write_19bytes):
274 movl %eax, -19(%edx)
275L(write_15bytes):
276 movl %eax, -15(%edx)
277L(write_11bytes):
278 movl %eax, -11(%edx)
279L(write_7bytes):
280 movl %eax, -7(%edx)
281L(write_3bytes):
282 movw %ax, -3(%edx)
283 movb %al, -1(%edx)
284 SETRTNVAL
285 RETURN
286
287 ALIGN (4)
288/* ECX > 32 and EDX is 4 byte aligned. */
289L(32bytesormore):
290 /* Fill xmm0 with the pattern. */
291#ifdef USE_AS_BZERO
292 pxor %xmm0, %xmm0
293#else
294 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800295 pshufd $0, %xmm0, %xmm0
296#endif
297 testl $0xf, %edx
298 jz L(aligned_16)
299/* ECX > 32 and EDX is not 16 byte aligned. */
300L(not_aligned_16):
301 movdqu %xmm0, (%edx)
302 movl %edx, %eax
303 and $-16, %edx
304 add $16, %edx
305 sub %edx, %eax
306 add %eax, %ecx
307 movd %xmm0, %eax
308
309 ALIGN (4)
310L(aligned_16):
311 cmp $128, %ecx
312 jae L(128bytesormore)
313
314L(aligned_16_less128bytes):
315 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
316
317 ALIGN (4)
318L(128bytesormore):
319#ifdef SHARED_CACHE_SIZE
320 PUSH (%ebx)
321 mov $SHARED_CACHE_SIZE, %ebx
322#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800323# if (defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800324 call __i686.get_pc_thunk.bx
325 add $_GLOBAL_OFFSET_TABLE_, %ebx
326 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx
327# else
328 PUSH (%ebx)
329 mov __x86_shared_cache_size, %ebx
330# endif
331#endif
332 cmp %ebx, %ecx
333 jae L(128bytesormore_nt_start)
334
335
336#ifdef DATA_CACHE_SIZE
337 POP (%ebx)
Bruce Beare124a5422010-10-11 12:24:41 -0700338# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800339 cmp $DATA_CACHE_SIZE, %ecx
340#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800341# if (defined SHARED || defined __PIC__)
Bruce Beare124a5422010-10-11 12:24:41 -0700342# define RESTORE_EBX_STATE
Bruce Beare8ff1a272010-03-04 11:03:37 -0800343 call __i686.get_pc_thunk.bx
344 add $_GLOBAL_OFFSET_TABLE_, %ebx
345 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
346# else
347 POP (%ebx)
Bruce Beare124a5422010-10-11 12:24:41 -0700348# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800349 cmp __x86_data_cache_size, %ecx
350# endif
351#endif
352
353 jae L(128bytes_L2_normal)
354 subl $128, %ecx
355L(128bytesormore_normal):
356 sub $128, %ecx
357 movdqa %xmm0, (%edx)
358 movdqa %xmm0, 0x10(%edx)
359 movdqa %xmm0, 0x20(%edx)
360 movdqa %xmm0, 0x30(%edx)
361 movdqa %xmm0, 0x40(%edx)
362 movdqa %xmm0, 0x50(%edx)
363 movdqa %xmm0, 0x60(%edx)
364 movdqa %xmm0, 0x70(%edx)
365 lea 128(%edx), %edx
366 jb L(128bytesless_normal)
367
368
369 sub $128, %ecx
370 movdqa %xmm0, (%edx)
371 movdqa %xmm0, 0x10(%edx)
372 movdqa %xmm0, 0x20(%edx)
373 movdqa %xmm0, 0x30(%edx)
374 movdqa %xmm0, 0x40(%edx)
375 movdqa %xmm0, 0x50(%edx)
376 movdqa %xmm0, 0x60(%edx)
377 movdqa %xmm0, 0x70(%edx)
378 lea 128(%edx), %edx
379 jae L(128bytesormore_normal)
380
381L(128bytesless_normal):
Bruce Beare124a5422010-10-11 12:24:41 -0700382 add $128, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800383 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
384
385 ALIGN (4)
386L(128bytes_L2_normal):
387 prefetcht0 0x380(%edx)
388 prefetcht0 0x3c0(%edx)
389 sub $128, %ecx
390 movdqa %xmm0, (%edx)
391 movaps %xmm0, 0x10(%edx)
392 movaps %xmm0, 0x20(%edx)
393 movaps %xmm0, 0x30(%edx)
394 movaps %xmm0, 0x40(%edx)
395 movaps %xmm0, 0x50(%edx)
396 movaps %xmm0, 0x60(%edx)
397 movaps %xmm0, 0x70(%edx)
398 add $128, %edx
399 cmp $128, %ecx
400 jae L(128bytes_L2_normal)
401
402L(128bytesless_L2_normal):
403 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
404
Bruce Beare124a5422010-10-11 12:24:41 -0700405 RESTORE_EBX_STATE
Bruce Beare8ff1a272010-03-04 11:03:37 -0800406L(128bytesormore_nt_start):
407 sub %ebx, %ecx
Bruce Beare124a5422010-10-11 12:24:41 -0700408 mov %ebx, %eax
409 and $0x7f, %eax
410 add %eax, %ecx
411 movd %xmm0, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800412 ALIGN (4)
413L(128bytesormore_shared_cache_loop):
414 prefetcht0 0x3c0(%edx)
415 prefetcht0 0x380(%edx)
416 sub $0x80, %ebx
417 movdqa %xmm0, (%edx)
418 movdqa %xmm0, 0x10(%edx)
419 movdqa %xmm0, 0x20(%edx)
420 movdqa %xmm0, 0x30(%edx)
421 movdqa %xmm0, 0x40(%edx)
422 movdqa %xmm0, 0x50(%edx)
423 movdqa %xmm0, 0x60(%edx)
424 movdqa %xmm0, 0x70(%edx)
425 add $0x80, %edx
426 cmp $0x80, %ebx
427 jae L(128bytesormore_shared_cache_loop)
428 cmp $0x80, %ecx
429 jb L(shared_cache_loop_end)
430 ALIGN (4)
431L(128bytesormore_nt):
432 sub $0x80, %ecx
433 movntdq %xmm0, (%edx)
434 movntdq %xmm0, 0x10(%edx)
435 movntdq %xmm0, 0x20(%edx)
436 movntdq %xmm0, 0x30(%edx)
437 movntdq %xmm0, 0x40(%edx)
438 movntdq %xmm0, 0x50(%edx)
439 movntdq %xmm0, 0x60(%edx)
440 movntdq %xmm0, 0x70(%edx)
441 add $0x80, %edx
442 cmp $0x80, %ecx
443 jae L(128bytesormore_nt)
444 sfence
445L(shared_cache_loop_end):
Nick Kralevich0aa82892011-11-11 15:47:24 -0800446#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800447 POP (%ebx)
448#endif
449 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
450
451
452 .pushsection .rodata.sse2,"a",@progbits
453 ALIGN (2)
454L(table_16_128bytes):
455 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
456 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
457 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
458 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
459 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
460 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
461 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
462 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
463 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
464 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
465 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
466 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
467 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
468 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
469 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
470 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
471 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
472 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
473 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
474 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
475 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
476 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
477 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
478 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
479 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
480 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
481 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
482 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
483 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
484 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
485 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
486 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
487 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
488 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
489 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
490 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
491 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
492 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
493 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
494 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
495 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
496 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
497 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
498 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
499 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
500 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
501 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
502 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
503 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
504 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
505 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
506 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
507 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
508 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
509 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
510 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
511 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
512 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
513 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
514 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
515 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
516 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
517 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
518 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
519 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
520 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
521 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
522 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
523 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
524 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
525 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
526 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
527 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
528 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
529 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
530 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
531 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
532 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
533 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
534 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
535 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
536 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
537 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
538 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
539 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
540 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
541 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
542 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
543 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
544 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
545 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
546 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
547 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
548 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
549 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
550 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
551 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
552 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
553 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
554 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
555 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
556 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
557 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
558 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
559 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
560 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
561 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
562 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
563 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
564 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
565 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
566 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
567 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
568 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
569 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
570 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
571 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
572 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
573 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
574 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
575 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
576 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
577 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
578 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
579 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
580 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
581 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
582 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
583 .popsection
584
585 ALIGN (4)
586L(aligned_16_112bytes):
587 movdqa %xmm0, -112(%edx)
588L(aligned_16_96bytes):
589 movdqa %xmm0, -96(%edx)
590L(aligned_16_80bytes):
591 movdqa %xmm0, -80(%edx)
592L(aligned_16_64bytes):
593 movdqa %xmm0, -64(%edx)
594L(aligned_16_48bytes):
595 movdqa %xmm0, -48(%edx)
596L(aligned_16_32bytes):
597 movdqa %xmm0, -32(%edx)
598L(aligned_16_16bytes):
599 movdqa %xmm0, -16(%edx)
600L(aligned_16_0bytes):
601 SETRTNVAL
602 RETURN
603
604 ALIGN (4)
605L(aligned_16_113bytes):
606 movdqa %xmm0, -113(%edx)
607L(aligned_16_97bytes):
608 movdqa %xmm0, -97(%edx)
609L(aligned_16_81bytes):
610 movdqa %xmm0, -81(%edx)
611L(aligned_16_65bytes):
612 movdqa %xmm0, -65(%edx)
613L(aligned_16_49bytes):
614 movdqa %xmm0, -49(%edx)
615L(aligned_16_33bytes):
616 movdqa %xmm0, -33(%edx)
617L(aligned_16_17bytes):
618 movdqa %xmm0, -17(%edx)
619L(aligned_16_1bytes):
620 movb %al, -1(%edx)
621 SETRTNVAL
622 RETURN
623
624 ALIGN (4)
625L(aligned_16_114bytes):
626 movdqa %xmm0, -114(%edx)
627L(aligned_16_98bytes):
628 movdqa %xmm0, -98(%edx)
629L(aligned_16_82bytes):
630 movdqa %xmm0, -82(%edx)
631L(aligned_16_66bytes):
632 movdqa %xmm0, -66(%edx)
633L(aligned_16_50bytes):
634 movdqa %xmm0, -50(%edx)
635L(aligned_16_34bytes):
636 movdqa %xmm0, -34(%edx)
637L(aligned_16_18bytes):
638 movdqa %xmm0, -18(%edx)
639L(aligned_16_2bytes):
640 movw %ax, -2(%edx)
641 SETRTNVAL
642 RETURN
643
644 ALIGN (4)
645L(aligned_16_115bytes):
646 movdqa %xmm0, -115(%edx)
647L(aligned_16_99bytes):
648 movdqa %xmm0, -99(%edx)
649L(aligned_16_83bytes):
650 movdqa %xmm0, -83(%edx)
651L(aligned_16_67bytes):
652 movdqa %xmm0, -67(%edx)
653L(aligned_16_51bytes):
654 movdqa %xmm0, -51(%edx)
655L(aligned_16_35bytes):
656 movdqa %xmm0, -35(%edx)
657L(aligned_16_19bytes):
658 movdqa %xmm0, -19(%edx)
659L(aligned_16_3bytes):
660 movw %ax, -3(%edx)
661 movb %al, -1(%edx)
662 SETRTNVAL
663 RETURN
664
665 ALIGN (4)
666L(aligned_16_116bytes):
667 movdqa %xmm0, -116(%edx)
668L(aligned_16_100bytes):
669 movdqa %xmm0, -100(%edx)
670L(aligned_16_84bytes):
671 movdqa %xmm0, -84(%edx)
672L(aligned_16_68bytes):
673 movdqa %xmm0, -68(%edx)
674L(aligned_16_52bytes):
675 movdqa %xmm0, -52(%edx)
676L(aligned_16_36bytes):
677 movdqa %xmm0, -36(%edx)
678L(aligned_16_20bytes):
679 movdqa %xmm0, -20(%edx)
680L(aligned_16_4bytes):
681 movl %eax, -4(%edx)
682 SETRTNVAL
683 RETURN
684
685 ALIGN (4)
686L(aligned_16_117bytes):
687 movdqa %xmm0, -117(%edx)
688L(aligned_16_101bytes):
689 movdqa %xmm0, -101(%edx)
690L(aligned_16_85bytes):
691 movdqa %xmm0, -85(%edx)
692L(aligned_16_69bytes):
693 movdqa %xmm0, -69(%edx)
694L(aligned_16_53bytes):
695 movdqa %xmm0, -53(%edx)
696L(aligned_16_37bytes):
697 movdqa %xmm0, -37(%edx)
698L(aligned_16_21bytes):
699 movdqa %xmm0, -21(%edx)
700L(aligned_16_5bytes):
701 movl %eax, -5(%edx)
702 movb %al, -1(%edx)
703 SETRTNVAL
704 RETURN
705
706 ALIGN (4)
707L(aligned_16_118bytes):
708 movdqa %xmm0, -118(%edx)
709L(aligned_16_102bytes):
710 movdqa %xmm0, -102(%edx)
711L(aligned_16_86bytes):
712 movdqa %xmm0, -86(%edx)
713L(aligned_16_70bytes):
714 movdqa %xmm0, -70(%edx)
715L(aligned_16_54bytes):
716 movdqa %xmm0, -54(%edx)
717L(aligned_16_38bytes):
718 movdqa %xmm0, -38(%edx)
719L(aligned_16_22bytes):
720 movdqa %xmm0, -22(%edx)
721L(aligned_16_6bytes):
722 movl %eax, -6(%edx)
723 movw %ax, -2(%edx)
724 SETRTNVAL
725 RETURN
726
727 ALIGN (4)
728L(aligned_16_119bytes):
729 movdqa %xmm0, -119(%edx)
730L(aligned_16_103bytes):
731 movdqa %xmm0, -103(%edx)
732L(aligned_16_87bytes):
733 movdqa %xmm0, -87(%edx)
734L(aligned_16_71bytes):
735 movdqa %xmm0, -71(%edx)
736L(aligned_16_55bytes):
737 movdqa %xmm0, -55(%edx)
738L(aligned_16_39bytes):
739 movdqa %xmm0, -39(%edx)
740L(aligned_16_23bytes):
741 movdqa %xmm0, -23(%edx)
742L(aligned_16_7bytes):
743 movl %eax, -7(%edx)
744 movw %ax, -3(%edx)
745 movb %al, -1(%edx)
746 SETRTNVAL
747 RETURN
748
749 ALIGN (4)
750L(aligned_16_120bytes):
751 movdqa %xmm0, -120(%edx)
752L(aligned_16_104bytes):
753 movdqa %xmm0, -104(%edx)
754L(aligned_16_88bytes):
755 movdqa %xmm0, -88(%edx)
756L(aligned_16_72bytes):
757 movdqa %xmm0, -72(%edx)
758L(aligned_16_56bytes):
759 movdqa %xmm0, -56(%edx)
760L(aligned_16_40bytes):
761 movdqa %xmm0, -40(%edx)
762L(aligned_16_24bytes):
763 movdqa %xmm0, -24(%edx)
764L(aligned_16_8bytes):
765 movq %xmm0, -8(%edx)
766 SETRTNVAL
767 RETURN
768
769 ALIGN (4)
770L(aligned_16_121bytes):
771 movdqa %xmm0, -121(%edx)
772L(aligned_16_105bytes):
773 movdqa %xmm0, -105(%edx)
774L(aligned_16_89bytes):
775 movdqa %xmm0, -89(%edx)
776L(aligned_16_73bytes):
777 movdqa %xmm0, -73(%edx)
778L(aligned_16_57bytes):
779 movdqa %xmm0, -57(%edx)
780L(aligned_16_41bytes):
781 movdqa %xmm0, -41(%edx)
782L(aligned_16_25bytes):
783 movdqa %xmm0, -25(%edx)
784L(aligned_16_9bytes):
785 movq %xmm0, -9(%edx)
786 movb %al, -1(%edx)
787 SETRTNVAL
788 RETURN
789
790 ALIGN (4)
791L(aligned_16_122bytes):
792 movdqa %xmm0, -122(%edx)
793L(aligned_16_106bytes):
794 movdqa %xmm0, -106(%edx)
795L(aligned_16_90bytes):
796 movdqa %xmm0, -90(%edx)
797L(aligned_16_74bytes):
798 movdqa %xmm0, -74(%edx)
799L(aligned_16_58bytes):
800 movdqa %xmm0, -58(%edx)
801L(aligned_16_42bytes):
802 movdqa %xmm0, -42(%edx)
803L(aligned_16_26bytes):
804 movdqa %xmm0, -26(%edx)
805L(aligned_16_10bytes):
806 movq %xmm0, -10(%edx)
807 movw %ax, -2(%edx)
808 SETRTNVAL
809 RETURN
810
811 ALIGN (4)
812L(aligned_16_123bytes):
813 movdqa %xmm0, -123(%edx)
814L(aligned_16_107bytes):
815 movdqa %xmm0, -107(%edx)
816L(aligned_16_91bytes):
817 movdqa %xmm0, -91(%edx)
818L(aligned_16_75bytes):
819 movdqa %xmm0, -75(%edx)
820L(aligned_16_59bytes):
821 movdqa %xmm0, -59(%edx)
822L(aligned_16_43bytes):
823 movdqa %xmm0, -43(%edx)
824L(aligned_16_27bytes):
825 movdqa %xmm0, -27(%edx)
826L(aligned_16_11bytes):
827 movq %xmm0, -11(%edx)
828 movw %ax, -3(%edx)
829 movb %al, -1(%edx)
830 SETRTNVAL
831 RETURN
832
833 ALIGN (4)
834L(aligned_16_124bytes):
835 movdqa %xmm0, -124(%edx)
836L(aligned_16_108bytes):
837 movdqa %xmm0, -108(%edx)
838L(aligned_16_92bytes):
839 movdqa %xmm0, -92(%edx)
840L(aligned_16_76bytes):
841 movdqa %xmm0, -76(%edx)
842L(aligned_16_60bytes):
843 movdqa %xmm0, -60(%edx)
844L(aligned_16_44bytes):
845 movdqa %xmm0, -44(%edx)
846L(aligned_16_28bytes):
847 movdqa %xmm0, -28(%edx)
848L(aligned_16_12bytes):
849 movq %xmm0, -12(%edx)
850 movl %eax, -4(%edx)
851 SETRTNVAL
852 RETURN
853
854 ALIGN (4)
855L(aligned_16_125bytes):
856 movdqa %xmm0, -125(%edx)
857L(aligned_16_109bytes):
858 movdqa %xmm0, -109(%edx)
859L(aligned_16_93bytes):
860 movdqa %xmm0, -93(%edx)
861L(aligned_16_77bytes):
862 movdqa %xmm0, -77(%edx)
863L(aligned_16_61bytes):
864 movdqa %xmm0, -61(%edx)
865L(aligned_16_45bytes):
866 movdqa %xmm0, -45(%edx)
867L(aligned_16_29bytes):
868 movdqa %xmm0, -29(%edx)
869L(aligned_16_13bytes):
870 movq %xmm0, -13(%edx)
871 movl %eax, -5(%edx)
872 movb %al, -1(%edx)
873 SETRTNVAL
874 RETURN
875
876 ALIGN (4)
877L(aligned_16_126bytes):
878 movdqa %xmm0, -126(%edx)
879L(aligned_16_110bytes):
880 movdqa %xmm0, -110(%edx)
881L(aligned_16_94bytes):
882 movdqa %xmm0, -94(%edx)
883L(aligned_16_78bytes):
884 movdqa %xmm0, -78(%edx)
885L(aligned_16_62bytes):
886 movdqa %xmm0, -62(%edx)
887L(aligned_16_46bytes):
888 movdqa %xmm0, -46(%edx)
889L(aligned_16_30bytes):
890 movdqa %xmm0, -30(%edx)
891L(aligned_16_14bytes):
892 movq %xmm0, -14(%edx)
893 movl %eax, -6(%edx)
894 movw %ax, -2(%edx)
895 SETRTNVAL
896 RETURN
897
898 ALIGN (4)
899L(aligned_16_127bytes):
900 movdqa %xmm0, -127(%edx)
901L(aligned_16_111bytes):
902 movdqa %xmm0, -111(%edx)
903L(aligned_16_95bytes):
904 movdqa %xmm0, -95(%edx)
905L(aligned_16_79bytes):
906 movdqa %xmm0, -79(%edx)
907L(aligned_16_63bytes):
908 movdqa %xmm0, -63(%edx)
909L(aligned_16_47bytes):
910 movdqa %xmm0, -47(%edx)
911L(aligned_16_31bytes):
912 movdqa %xmm0, -31(%edx)
913L(aligned_16_15bytes):
914 movq %xmm0, -15(%edx)
915 movl %eax, -7(%edx)
916 movw %ax, -3(%edx)
917 movb %al, -1(%edx)
918 SETRTNVAL
919 RETURN_END
920
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400921END (MEMSET)