blob: b0963a187832881be38a0ab363f3f1354b129c12 [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040031#include "cache.h"
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040032
Bruce Beare8ff1a272010-03-04 11:03:37 -080033#ifndef L
34# define L(label) .L##label
35#endif
36
37#ifndef ALIGN
38# define ALIGN(n) .p2align n
39#endif
40
41#ifndef cfi_startproc
42# define cfi_startproc .cfi_startproc
43#endif
44
45#ifndef cfi_endproc
46# define cfi_endproc .cfi_endproc
47#endif
48
49#ifndef cfi_rel_offset
50# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
51#endif
52
53#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070054# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080055#endif
56
57#ifndef cfi_adjust_cfa_offset
58# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
59#endif
60
61#ifndef ENTRY
62# define ENTRY(name) \
63 .type name, @function; \
64 .globl name; \
65 .p2align 4; \
66name: \
67 cfi_startproc
68#endif
69
70#ifndef END
71# define END(name) \
72 cfi_endproc; \
73 .size name, .-name
74#endif
75
76#define CFI_PUSH(REG) \
77 cfi_adjust_cfa_offset (4); \
78 cfi_rel_offset (REG, 0)
79
80#define CFI_POP(REG) \
81 cfi_adjust_cfa_offset (-4); \
82 cfi_restore (REG)
83
84#define PUSH(REG) pushl REG; CFI_PUSH (REG)
85#define POP(REG) popl REG; CFI_POP (REG)
86
87#ifdef USE_AS_BZERO
88# define DEST PARMS
89# define LEN DEST+4
90# define SETRTNVAL
91#else
92# define DEST PARMS
93# define CHR DEST+4
94# define LEN CHR+4
95# define SETRTNVAL movl DEST(%esp), %eax
96#endif
97
Nick Kralevich0aa82892011-11-11 15:47:24 -080098#if (defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -080099# define ENTRANCE PUSH (%ebx);
100# define RETURN_END POP (%ebx); ret
101# define RETURN RETURN_END; CFI_PUSH (%ebx)
102# define PARMS 8 /* Preserve EBX. */
103# define JMPTBL(I, B) I - B
104
105/* Load an entry in a jump table into EBX and branch to it. TABLE is a
106 jump table with relative offsets. */
107# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
108 /* We first load PC into EBX. */ \
Varvara Rainchik5a922842014-04-24 15:41:20 +0400109 call __x86.get_pc_thunk.bx; \
Bruce Beare8ff1a272010-03-04 11:03:37 -0800110 /* Get the address of the jump table. */ \
111 add $(TABLE - .), %ebx; \
112 /* Get the entry and convert the relative offset to the \
113 absolute address. */ \
114 add (%ebx,%ecx,4), %ebx; \
115 add %ecx, %edx; \
116 /* We loaded the jump table and adjuested EDX. Go. */ \
117 jmp *%ebx
118
Varvara Rainchik5a922842014-04-24 15:41:20 +0400119 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
120 .globl __x86.get_pc_thunk.bx
121 .hidden __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800122 ALIGN (4)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400123 .type __x86.get_pc_thunk.bx,@function
124__x86.get_pc_thunk.bx:
Bruce Beare8ff1a272010-03-04 11:03:37 -0800125 movl (%esp), %ebx
126 ret
127#else
128# define ENTRANCE
129# define RETURN_END ret
130# define RETURN RETURN_END
131# define PARMS 4
132# define JMPTBL(I, B) I
133
134/* Branch to an entry in a jump table. TABLE is a jump table with
135 absolute offsets. */
136# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
137 add %ecx, %edx; \
138 jmp *TABLE(,%ecx,4)
139#endif
140
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400141#ifndef MEMSET
142# define MEMSET memset
143#endif
144
Bruce Beare8ff1a272010-03-04 11:03:37 -0800145 .section .text.sse2,"ax",@progbits
146 ALIGN (4)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400147ENTRY (MEMSET)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800148 ENTRANCE
149
150 movl LEN(%esp), %ecx
151#ifdef USE_AS_BZERO
152 xor %eax, %eax
153#else
154 movzbl CHR(%esp), %eax
155 movb %al, %ah
156 /* Fill the whole EAX with pattern. */
157 movl %eax, %edx
158 shl $16, %eax
159 or %edx, %eax
160#endif
161 movl DEST(%esp), %edx
162 cmp $32, %ecx
163 jae L(32bytesormore)
164
165L(write_less32bytes):
166 BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes))
167
168
169 .pushsection .rodata.sse2,"a",@progbits
170 ALIGN (2)
171L(table_less_32bytes):
172 .int JMPTBL (L(write_0bytes), L(table_less_32bytes))
173 .int JMPTBL (L(write_1bytes), L(table_less_32bytes))
174 .int JMPTBL (L(write_2bytes), L(table_less_32bytes))
175 .int JMPTBL (L(write_3bytes), L(table_less_32bytes))
176 .int JMPTBL (L(write_4bytes), L(table_less_32bytes))
177 .int JMPTBL (L(write_5bytes), L(table_less_32bytes))
178 .int JMPTBL (L(write_6bytes), L(table_less_32bytes))
179 .int JMPTBL (L(write_7bytes), L(table_less_32bytes))
180 .int JMPTBL (L(write_8bytes), L(table_less_32bytes))
181 .int JMPTBL (L(write_9bytes), L(table_less_32bytes))
182 .int JMPTBL (L(write_10bytes), L(table_less_32bytes))
183 .int JMPTBL (L(write_11bytes), L(table_less_32bytes))
184 .int JMPTBL (L(write_12bytes), L(table_less_32bytes))
185 .int JMPTBL (L(write_13bytes), L(table_less_32bytes))
186 .int JMPTBL (L(write_14bytes), L(table_less_32bytes))
187 .int JMPTBL (L(write_15bytes), L(table_less_32bytes))
188 .int JMPTBL (L(write_16bytes), L(table_less_32bytes))
189 .int JMPTBL (L(write_17bytes), L(table_less_32bytes))
190 .int JMPTBL (L(write_18bytes), L(table_less_32bytes))
191 .int JMPTBL (L(write_19bytes), L(table_less_32bytes))
192 .int JMPTBL (L(write_20bytes), L(table_less_32bytes))
193 .int JMPTBL (L(write_21bytes), L(table_less_32bytes))
194 .int JMPTBL (L(write_22bytes), L(table_less_32bytes))
195 .int JMPTBL (L(write_23bytes), L(table_less_32bytes))
196 .int JMPTBL (L(write_24bytes), L(table_less_32bytes))
197 .int JMPTBL (L(write_25bytes), L(table_less_32bytes))
198 .int JMPTBL (L(write_26bytes), L(table_less_32bytes))
199 .int JMPTBL (L(write_27bytes), L(table_less_32bytes))
200 .int JMPTBL (L(write_28bytes), L(table_less_32bytes))
201 .int JMPTBL (L(write_29bytes), L(table_less_32bytes))
202 .int JMPTBL (L(write_30bytes), L(table_less_32bytes))
203 .int JMPTBL (L(write_31bytes), L(table_less_32bytes))
204 .popsection
205
206 ALIGN (4)
207L(write_28bytes):
208 movl %eax, -28(%edx)
209L(write_24bytes):
210 movl %eax, -24(%edx)
211L(write_20bytes):
212 movl %eax, -20(%edx)
213L(write_16bytes):
214 movl %eax, -16(%edx)
215L(write_12bytes):
216 movl %eax, -12(%edx)
217L(write_8bytes):
218 movl %eax, -8(%edx)
219L(write_4bytes):
220 movl %eax, -4(%edx)
221L(write_0bytes):
222 SETRTNVAL
223 RETURN
224
225 ALIGN (4)
226L(write_29bytes):
227 movl %eax, -29(%edx)
228L(write_25bytes):
229 movl %eax, -25(%edx)
230L(write_21bytes):
231 movl %eax, -21(%edx)
232L(write_17bytes):
233 movl %eax, -17(%edx)
234L(write_13bytes):
235 movl %eax, -13(%edx)
236L(write_9bytes):
237 movl %eax, -9(%edx)
238L(write_5bytes):
239 movl %eax, -5(%edx)
240L(write_1bytes):
241 movb %al, -1(%edx)
242 SETRTNVAL
243 RETURN
244
245 ALIGN (4)
246L(write_30bytes):
247 movl %eax, -30(%edx)
248L(write_26bytes):
249 movl %eax, -26(%edx)
250L(write_22bytes):
251 movl %eax, -22(%edx)
252L(write_18bytes):
253 movl %eax, -18(%edx)
254L(write_14bytes):
255 movl %eax, -14(%edx)
256L(write_10bytes):
257 movl %eax, -10(%edx)
258L(write_6bytes):
259 movl %eax, -6(%edx)
260L(write_2bytes):
261 movw %ax, -2(%edx)
262 SETRTNVAL
263 RETURN
264
265 ALIGN (4)
266L(write_31bytes):
267 movl %eax, -31(%edx)
268L(write_27bytes):
269 movl %eax, -27(%edx)
270L(write_23bytes):
271 movl %eax, -23(%edx)
272L(write_19bytes):
273 movl %eax, -19(%edx)
274L(write_15bytes):
275 movl %eax, -15(%edx)
276L(write_11bytes):
277 movl %eax, -11(%edx)
278L(write_7bytes):
279 movl %eax, -7(%edx)
280L(write_3bytes):
281 movw %ax, -3(%edx)
282 movb %al, -1(%edx)
283 SETRTNVAL
284 RETURN
285
286 ALIGN (4)
287/* ECX > 32 and EDX is 4 byte aligned. */
288L(32bytesormore):
289 /* Fill xmm0 with the pattern. */
290#ifdef USE_AS_BZERO
291 pxor %xmm0, %xmm0
292#else
293 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800294 pshufd $0, %xmm0, %xmm0
295#endif
296 testl $0xf, %edx
297 jz L(aligned_16)
298/* ECX > 32 and EDX is not 16 byte aligned. */
299L(not_aligned_16):
300 movdqu %xmm0, (%edx)
301 movl %edx, %eax
302 and $-16, %edx
303 add $16, %edx
304 sub %edx, %eax
305 add %eax, %ecx
306 movd %xmm0, %eax
307
308 ALIGN (4)
309L(aligned_16):
310 cmp $128, %ecx
311 jae L(128bytesormore)
312
313L(aligned_16_less128bytes):
314 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
315
316 ALIGN (4)
317L(128bytesormore):
318#ifdef SHARED_CACHE_SIZE
319 PUSH (%ebx)
320 mov $SHARED_CACHE_SIZE, %ebx
321#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800322# if (defined SHARED || defined __PIC__)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400323 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800324 add $_GLOBAL_OFFSET_TABLE_, %ebx
325 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx
326# else
327 PUSH (%ebx)
328 mov __x86_shared_cache_size, %ebx
329# endif
330#endif
331 cmp %ebx, %ecx
332 jae L(128bytesormore_nt_start)
333
334
335#ifdef DATA_CACHE_SIZE
336 POP (%ebx)
Bruce Beare124a5422010-10-11 12:24:41 -0700337# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800338 cmp $DATA_CACHE_SIZE, %ecx
339#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800340# if (defined SHARED || defined __PIC__)
Bruce Beare124a5422010-10-11 12:24:41 -0700341# define RESTORE_EBX_STATE
Varvara Rainchik5a922842014-04-24 15:41:20 +0400342 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800343 add $_GLOBAL_OFFSET_TABLE_, %ebx
344 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
345# else
346 POP (%ebx)
Bruce Beare124a5422010-10-11 12:24:41 -0700347# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800348 cmp __x86_data_cache_size, %ecx
349# endif
350#endif
351
352 jae L(128bytes_L2_normal)
353 subl $128, %ecx
354L(128bytesormore_normal):
355 sub $128, %ecx
356 movdqa %xmm0, (%edx)
357 movdqa %xmm0, 0x10(%edx)
358 movdqa %xmm0, 0x20(%edx)
359 movdqa %xmm0, 0x30(%edx)
360 movdqa %xmm0, 0x40(%edx)
361 movdqa %xmm0, 0x50(%edx)
362 movdqa %xmm0, 0x60(%edx)
363 movdqa %xmm0, 0x70(%edx)
364 lea 128(%edx), %edx
365 jb L(128bytesless_normal)
366
367
368 sub $128, %ecx
369 movdqa %xmm0, (%edx)
370 movdqa %xmm0, 0x10(%edx)
371 movdqa %xmm0, 0x20(%edx)
372 movdqa %xmm0, 0x30(%edx)
373 movdqa %xmm0, 0x40(%edx)
374 movdqa %xmm0, 0x50(%edx)
375 movdqa %xmm0, 0x60(%edx)
376 movdqa %xmm0, 0x70(%edx)
377 lea 128(%edx), %edx
378 jae L(128bytesormore_normal)
379
380L(128bytesless_normal):
Bruce Beare124a5422010-10-11 12:24:41 -0700381 add $128, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800382 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
383
384 ALIGN (4)
385L(128bytes_L2_normal):
386 prefetcht0 0x380(%edx)
387 prefetcht0 0x3c0(%edx)
388 sub $128, %ecx
389 movdqa %xmm0, (%edx)
390 movaps %xmm0, 0x10(%edx)
391 movaps %xmm0, 0x20(%edx)
392 movaps %xmm0, 0x30(%edx)
393 movaps %xmm0, 0x40(%edx)
394 movaps %xmm0, 0x50(%edx)
395 movaps %xmm0, 0x60(%edx)
396 movaps %xmm0, 0x70(%edx)
397 add $128, %edx
398 cmp $128, %ecx
399 jae L(128bytes_L2_normal)
400
401L(128bytesless_L2_normal):
402 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
403
Bruce Beare124a5422010-10-11 12:24:41 -0700404 RESTORE_EBX_STATE
Bruce Beare8ff1a272010-03-04 11:03:37 -0800405L(128bytesormore_nt_start):
406 sub %ebx, %ecx
Bruce Beare124a5422010-10-11 12:24:41 -0700407 mov %ebx, %eax
408 and $0x7f, %eax
409 add %eax, %ecx
410 movd %xmm0, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800411 ALIGN (4)
412L(128bytesormore_shared_cache_loop):
413 prefetcht0 0x3c0(%edx)
414 prefetcht0 0x380(%edx)
415 sub $0x80, %ebx
416 movdqa %xmm0, (%edx)
417 movdqa %xmm0, 0x10(%edx)
418 movdqa %xmm0, 0x20(%edx)
419 movdqa %xmm0, 0x30(%edx)
420 movdqa %xmm0, 0x40(%edx)
421 movdqa %xmm0, 0x50(%edx)
422 movdqa %xmm0, 0x60(%edx)
423 movdqa %xmm0, 0x70(%edx)
424 add $0x80, %edx
425 cmp $0x80, %ebx
426 jae L(128bytesormore_shared_cache_loop)
427 cmp $0x80, %ecx
428 jb L(shared_cache_loop_end)
429 ALIGN (4)
430L(128bytesormore_nt):
431 sub $0x80, %ecx
432 movntdq %xmm0, (%edx)
433 movntdq %xmm0, 0x10(%edx)
434 movntdq %xmm0, 0x20(%edx)
435 movntdq %xmm0, 0x30(%edx)
436 movntdq %xmm0, 0x40(%edx)
437 movntdq %xmm0, 0x50(%edx)
438 movntdq %xmm0, 0x60(%edx)
439 movntdq %xmm0, 0x70(%edx)
440 add $0x80, %edx
441 cmp $0x80, %ecx
442 jae L(128bytesormore_nt)
443 sfence
444L(shared_cache_loop_end):
Nick Kralevich0aa82892011-11-11 15:47:24 -0800445#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800446 POP (%ebx)
447#endif
448 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
449
450
451 .pushsection .rodata.sse2,"a",@progbits
452 ALIGN (2)
453L(table_16_128bytes):
454 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
455 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
456 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
457 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
458 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
459 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
460 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
461 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
462 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
463 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
464 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
465 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
466 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
467 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
468 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
469 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
470 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
471 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
472 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
473 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
474 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
475 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
476 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
477 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
478 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
479 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
480 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
481 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
482 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
483 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
484 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
485 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
486 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
487 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
488 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
489 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
490 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
491 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
492 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
493 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
494 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
495 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
496 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
497 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
498 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
499 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
500 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
501 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
502 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
503 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
504 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
505 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
506 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
507 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
508 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
509 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
510 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
511 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
512 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
513 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
514 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
515 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
516 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
517 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
518 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
519 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
520 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
521 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
522 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
523 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
524 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
525 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
526 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
527 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
528 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
529 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
530 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
531 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
532 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
533 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
534 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
535 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
536 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
537 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
538 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
539 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
540 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
541 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
542 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
543 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
544 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
545 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
546 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
547 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
548 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
549 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
550 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
551 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
552 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
553 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
554 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
555 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
556 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
557 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
558 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
559 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
560 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
561 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
562 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
563 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
564 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
565 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
566 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
567 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
568 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
569 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
570 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
571 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
572 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
573 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
574 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
575 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
576 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
577 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
578 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
579 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
580 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
581 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
582 .popsection
583
584 ALIGN (4)
585L(aligned_16_112bytes):
586 movdqa %xmm0, -112(%edx)
587L(aligned_16_96bytes):
588 movdqa %xmm0, -96(%edx)
589L(aligned_16_80bytes):
590 movdqa %xmm0, -80(%edx)
591L(aligned_16_64bytes):
592 movdqa %xmm0, -64(%edx)
593L(aligned_16_48bytes):
594 movdqa %xmm0, -48(%edx)
595L(aligned_16_32bytes):
596 movdqa %xmm0, -32(%edx)
597L(aligned_16_16bytes):
598 movdqa %xmm0, -16(%edx)
599L(aligned_16_0bytes):
600 SETRTNVAL
601 RETURN
602
603 ALIGN (4)
604L(aligned_16_113bytes):
605 movdqa %xmm0, -113(%edx)
606L(aligned_16_97bytes):
607 movdqa %xmm0, -97(%edx)
608L(aligned_16_81bytes):
609 movdqa %xmm0, -81(%edx)
610L(aligned_16_65bytes):
611 movdqa %xmm0, -65(%edx)
612L(aligned_16_49bytes):
613 movdqa %xmm0, -49(%edx)
614L(aligned_16_33bytes):
615 movdqa %xmm0, -33(%edx)
616L(aligned_16_17bytes):
617 movdqa %xmm0, -17(%edx)
618L(aligned_16_1bytes):
619 movb %al, -1(%edx)
620 SETRTNVAL
621 RETURN
622
623 ALIGN (4)
624L(aligned_16_114bytes):
625 movdqa %xmm0, -114(%edx)
626L(aligned_16_98bytes):
627 movdqa %xmm0, -98(%edx)
628L(aligned_16_82bytes):
629 movdqa %xmm0, -82(%edx)
630L(aligned_16_66bytes):
631 movdqa %xmm0, -66(%edx)
632L(aligned_16_50bytes):
633 movdqa %xmm0, -50(%edx)
634L(aligned_16_34bytes):
635 movdqa %xmm0, -34(%edx)
636L(aligned_16_18bytes):
637 movdqa %xmm0, -18(%edx)
638L(aligned_16_2bytes):
639 movw %ax, -2(%edx)
640 SETRTNVAL
641 RETURN
642
643 ALIGN (4)
644L(aligned_16_115bytes):
645 movdqa %xmm0, -115(%edx)
646L(aligned_16_99bytes):
647 movdqa %xmm0, -99(%edx)
648L(aligned_16_83bytes):
649 movdqa %xmm0, -83(%edx)
650L(aligned_16_67bytes):
651 movdqa %xmm0, -67(%edx)
652L(aligned_16_51bytes):
653 movdqa %xmm0, -51(%edx)
654L(aligned_16_35bytes):
655 movdqa %xmm0, -35(%edx)
656L(aligned_16_19bytes):
657 movdqa %xmm0, -19(%edx)
658L(aligned_16_3bytes):
659 movw %ax, -3(%edx)
660 movb %al, -1(%edx)
661 SETRTNVAL
662 RETURN
663
664 ALIGN (4)
665L(aligned_16_116bytes):
666 movdqa %xmm0, -116(%edx)
667L(aligned_16_100bytes):
668 movdqa %xmm0, -100(%edx)
669L(aligned_16_84bytes):
670 movdqa %xmm0, -84(%edx)
671L(aligned_16_68bytes):
672 movdqa %xmm0, -68(%edx)
673L(aligned_16_52bytes):
674 movdqa %xmm0, -52(%edx)
675L(aligned_16_36bytes):
676 movdqa %xmm0, -36(%edx)
677L(aligned_16_20bytes):
678 movdqa %xmm0, -20(%edx)
679L(aligned_16_4bytes):
680 movl %eax, -4(%edx)
681 SETRTNVAL
682 RETURN
683
684 ALIGN (4)
685L(aligned_16_117bytes):
686 movdqa %xmm0, -117(%edx)
687L(aligned_16_101bytes):
688 movdqa %xmm0, -101(%edx)
689L(aligned_16_85bytes):
690 movdqa %xmm0, -85(%edx)
691L(aligned_16_69bytes):
692 movdqa %xmm0, -69(%edx)
693L(aligned_16_53bytes):
694 movdqa %xmm0, -53(%edx)
695L(aligned_16_37bytes):
696 movdqa %xmm0, -37(%edx)
697L(aligned_16_21bytes):
698 movdqa %xmm0, -21(%edx)
699L(aligned_16_5bytes):
700 movl %eax, -5(%edx)
701 movb %al, -1(%edx)
702 SETRTNVAL
703 RETURN
704
705 ALIGN (4)
706L(aligned_16_118bytes):
707 movdqa %xmm0, -118(%edx)
708L(aligned_16_102bytes):
709 movdqa %xmm0, -102(%edx)
710L(aligned_16_86bytes):
711 movdqa %xmm0, -86(%edx)
712L(aligned_16_70bytes):
713 movdqa %xmm0, -70(%edx)
714L(aligned_16_54bytes):
715 movdqa %xmm0, -54(%edx)
716L(aligned_16_38bytes):
717 movdqa %xmm0, -38(%edx)
718L(aligned_16_22bytes):
719 movdqa %xmm0, -22(%edx)
720L(aligned_16_6bytes):
721 movl %eax, -6(%edx)
722 movw %ax, -2(%edx)
723 SETRTNVAL
724 RETURN
725
726 ALIGN (4)
727L(aligned_16_119bytes):
728 movdqa %xmm0, -119(%edx)
729L(aligned_16_103bytes):
730 movdqa %xmm0, -103(%edx)
731L(aligned_16_87bytes):
732 movdqa %xmm0, -87(%edx)
733L(aligned_16_71bytes):
734 movdqa %xmm0, -71(%edx)
735L(aligned_16_55bytes):
736 movdqa %xmm0, -55(%edx)
737L(aligned_16_39bytes):
738 movdqa %xmm0, -39(%edx)
739L(aligned_16_23bytes):
740 movdqa %xmm0, -23(%edx)
741L(aligned_16_7bytes):
742 movl %eax, -7(%edx)
743 movw %ax, -3(%edx)
744 movb %al, -1(%edx)
745 SETRTNVAL
746 RETURN
747
748 ALIGN (4)
749L(aligned_16_120bytes):
750 movdqa %xmm0, -120(%edx)
751L(aligned_16_104bytes):
752 movdqa %xmm0, -104(%edx)
753L(aligned_16_88bytes):
754 movdqa %xmm0, -88(%edx)
755L(aligned_16_72bytes):
756 movdqa %xmm0, -72(%edx)
757L(aligned_16_56bytes):
758 movdqa %xmm0, -56(%edx)
759L(aligned_16_40bytes):
760 movdqa %xmm0, -40(%edx)
761L(aligned_16_24bytes):
762 movdqa %xmm0, -24(%edx)
763L(aligned_16_8bytes):
764 movq %xmm0, -8(%edx)
765 SETRTNVAL
766 RETURN
767
768 ALIGN (4)
769L(aligned_16_121bytes):
770 movdqa %xmm0, -121(%edx)
771L(aligned_16_105bytes):
772 movdqa %xmm0, -105(%edx)
773L(aligned_16_89bytes):
774 movdqa %xmm0, -89(%edx)
775L(aligned_16_73bytes):
776 movdqa %xmm0, -73(%edx)
777L(aligned_16_57bytes):
778 movdqa %xmm0, -57(%edx)
779L(aligned_16_41bytes):
780 movdqa %xmm0, -41(%edx)
781L(aligned_16_25bytes):
782 movdqa %xmm0, -25(%edx)
783L(aligned_16_9bytes):
784 movq %xmm0, -9(%edx)
785 movb %al, -1(%edx)
786 SETRTNVAL
787 RETURN
788
789 ALIGN (4)
790L(aligned_16_122bytes):
791 movdqa %xmm0, -122(%edx)
792L(aligned_16_106bytes):
793 movdqa %xmm0, -106(%edx)
794L(aligned_16_90bytes):
795 movdqa %xmm0, -90(%edx)
796L(aligned_16_74bytes):
797 movdqa %xmm0, -74(%edx)
798L(aligned_16_58bytes):
799 movdqa %xmm0, -58(%edx)
800L(aligned_16_42bytes):
801 movdqa %xmm0, -42(%edx)
802L(aligned_16_26bytes):
803 movdqa %xmm0, -26(%edx)
804L(aligned_16_10bytes):
805 movq %xmm0, -10(%edx)
806 movw %ax, -2(%edx)
807 SETRTNVAL
808 RETURN
809
810 ALIGN (4)
811L(aligned_16_123bytes):
812 movdqa %xmm0, -123(%edx)
813L(aligned_16_107bytes):
814 movdqa %xmm0, -107(%edx)
815L(aligned_16_91bytes):
816 movdqa %xmm0, -91(%edx)
817L(aligned_16_75bytes):
818 movdqa %xmm0, -75(%edx)
819L(aligned_16_59bytes):
820 movdqa %xmm0, -59(%edx)
821L(aligned_16_43bytes):
822 movdqa %xmm0, -43(%edx)
823L(aligned_16_27bytes):
824 movdqa %xmm0, -27(%edx)
825L(aligned_16_11bytes):
826 movq %xmm0, -11(%edx)
827 movw %ax, -3(%edx)
828 movb %al, -1(%edx)
829 SETRTNVAL
830 RETURN
831
832 ALIGN (4)
833L(aligned_16_124bytes):
834 movdqa %xmm0, -124(%edx)
835L(aligned_16_108bytes):
836 movdqa %xmm0, -108(%edx)
837L(aligned_16_92bytes):
838 movdqa %xmm0, -92(%edx)
839L(aligned_16_76bytes):
840 movdqa %xmm0, -76(%edx)
841L(aligned_16_60bytes):
842 movdqa %xmm0, -60(%edx)
843L(aligned_16_44bytes):
844 movdqa %xmm0, -44(%edx)
845L(aligned_16_28bytes):
846 movdqa %xmm0, -28(%edx)
847L(aligned_16_12bytes):
848 movq %xmm0, -12(%edx)
849 movl %eax, -4(%edx)
850 SETRTNVAL
851 RETURN
852
853 ALIGN (4)
854L(aligned_16_125bytes):
855 movdqa %xmm0, -125(%edx)
856L(aligned_16_109bytes):
857 movdqa %xmm0, -109(%edx)
858L(aligned_16_93bytes):
859 movdqa %xmm0, -93(%edx)
860L(aligned_16_77bytes):
861 movdqa %xmm0, -77(%edx)
862L(aligned_16_61bytes):
863 movdqa %xmm0, -61(%edx)
864L(aligned_16_45bytes):
865 movdqa %xmm0, -45(%edx)
866L(aligned_16_29bytes):
867 movdqa %xmm0, -29(%edx)
868L(aligned_16_13bytes):
869 movq %xmm0, -13(%edx)
870 movl %eax, -5(%edx)
871 movb %al, -1(%edx)
872 SETRTNVAL
873 RETURN
874
875 ALIGN (4)
876L(aligned_16_126bytes):
877 movdqa %xmm0, -126(%edx)
878L(aligned_16_110bytes):
879 movdqa %xmm0, -110(%edx)
880L(aligned_16_94bytes):
881 movdqa %xmm0, -94(%edx)
882L(aligned_16_78bytes):
883 movdqa %xmm0, -78(%edx)
884L(aligned_16_62bytes):
885 movdqa %xmm0, -62(%edx)
886L(aligned_16_46bytes):
887 movdqa %xmm0, -46(%edx)
888L(aligned_16_30bytes):
889 movdqa %xmm0, -30(%edx)
890L(aligned_16_14bytes):
891 movq %xmm0, -14(%edx)
892 movl %eax, -6(%edx)
893 movw %ax, -2(%edx)
894 SETRTNVAL
895 RETURN
896
897 ALIGN (4)
898L(aligned_16_127bytes):
899 movdqa %xmm0, -127(%edx)
900L(aligned_16_111bytes):
901 movdqa %xmm0, -111(%edx)
902L(aligned_16_95bytes):
903 movdqa %xmm0, -95(%edx)
904L(aligned_16_79bytes):
905 movdqa %xmm0, -79(%edx)
906L(aligned_16_63bytes):
907 movdqa %xmm0, -63(%edx)
908L(aligned_16_47bytes):
909 movdqa %xmm0, -47(%edx)
910L(aligned_16_31bytes):
911 movdqa %xmm0, -31(%edx)
912L(aligned_16_15bytes):
913 movq %xmm0, -15(%edx)
914 movl %eax, -7(%edx)
915 movw %ax, -3(%edx)
916 movb %al, -1(%edx)
917 SETRTNVAL
918 RETURN_END
919
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400920END (MEMSET)