blob: bfcafae078d5c3184142dea0c616a77e00947d0a [file] [log] [blame]
Varvara Rainchika020a242014-04-29 17:44:56 +04001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#include "cache.h"
32
33#ifndef MEMSET
34# define MEMSET memset
35#endif
36
37#ifndef L
38# define L(label) .L##label
39#endif
40
41#ifndef ALIGN
42# define ALIGN(n) .p2align n
43#endif
44
45#ifndef cfi_startproc
46# define cfi_startproc .cfi_startproc
47#endif
48
49#ifndef cfi_endproc
50# define cfi_endproc .cfi_endproc
51#endif
52
53#ifndef ENTRY
54# define ENTRY(name) \
55 .type name, @function; \
56 .globl name; \
57name: \
58 cfi_startproc
59#endif
60
61#ifndef END
62# define END(name) \
63 cfi_endproc; \
64 .size name, .-name
65#endif
66
67 .section .text.sse2,"ax",@progbits
68ENTRY (MEMSET)
69 movq %rdi, %rax
70#ifdef USE_AS_BZERO_P
71 mov %rsi, %rdx
72 xor %rcx, %rcx
73#else
74 and $0xff, %rsi
75 mov $0x0101010101010101, %rcx
76 imul %rsi, %rcx
77#endif
78 cmpq $16, %rdx
79 jae L(16bytesormore)
80 testb $8, %dl
81 jnz L(8_15bytes)
82 testb $4, %dl
83 jnz L(4_7bytes)
84 testb $2, %dl
85 jnz L(2_3bytes)
86 testb $1, %dl
87 jz L(return)
88 movb %cl, (%rdi)
89L(return):
90 ret
91
92L(8_15bytes):
93 movq %rcx, (%rdi)
94 movq %rcx, -8(%rdi, %rdx)
95 ret
96
97L(4_7bytes):
98 movl %ecx, (%rdi)
99 movl %ecx, -4(%rdi, %rdx)
100 ret
101
102L(2_3bytes):
103 movw %cx, (%rdi)
104 movw %cx, -2(%rdi, %rdx)
105 ret
106
107 ALIGN (4)
108L(16bytesormore):
109#ifdef USE_AS_BZERO_P
110 pxor %xmm0, %xmm0
111#else
112 movd %rcx, %xmm0
113 pshufd $0, %xmm0, %xmm0
114#endif
115 movdqu %xmm0, (%rdi)
116 movdqu %xmm0, -16(%rdi, %rdx)
117 cmpq $32, %rdx
118 jbe L(32bytesless)
119 movdqu %xmm0, 16(%rdi)
120 movdqu %xmm0, -32(%rdi, %rdx)
121 cmpq $64, %rdx
122 jbe L(64bytesless)
123 movdqu %xmm0, 32(%rdi)
124 movdqu %xmm0, 48(%rdi)
125 movdqu %xmm0, -64(%rdi, %rdx)
126 movdqu %xmm0, -48(%rdi, %rdx)
127 cmpq $128, %rdx
128 ja L(128bytesmore)
129L(32bytesless):
130L(64bytesless):
131 ret
132
133 ALIGN (4)
134L(128bytesmore):
135 leaq 64(%rdi), %rcx
136 andq $-64, %rcx
137 movq %rdx, %r8
138 addq %rdi, %rdx
139 andq $-64, %rdx
140 cmpq %rcx, %rdx
141 je L(return)
142
143#ifdef SHARED_CACHE_SIZE
144 cmp $SHARED_CACHE_SIZE, %r8
145#else
146 cmp __x86_64_shared_cache_size(%rip), %r8
147#endif
148 ja L(128bytesmore_nt)
149
150 ALIGN (4)
151L(128bytesmore_normal):
152 movdqa %xmm0, (%rcx)
153 movaps %xmm0, 0x10(%rcx)
154 movaps %xmm0, 0x20(%rcx)
155 movaps %xmm0, 0x30(%rcx)
156 addq $64, %rcx
157 cmpq %rcx, %rdx
158 jne L(128bytesmore_normal)
159 ret
160
161 ALIGN (4)
162L(128bytesmore_nt):
163 movntdq %xmm0, (%rcx)
164 movntdq %xmm0, 0x10(%rcx)
165 movntdq %xmm0, 0x20(%rcx)
166 movntdq %xmm0, 0x30(%rcx)
167 leaq 64(%rcx), %rcx
168 cmpq %rcx, %rdx
169 jne L(128bytesmore_nt)
170 sfence
171 ret
172
173END (MEMSET)