blob: 22058bce818145988fb39d6babb7fcaae5a07dad [file] [log] [blame]
Andy McFadden4ce737f2011-02-04 14:45:57 -08001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <stdlib.h>
18#include <assert.h>
19
20/*
21 * Works like memmove(), except:
22 * - if all arguments are at least 32-bit aligned, we guarantee that we
23 * will use operations that preserve atomicity of 32-bit values
24 * - if not, we guarantee atomicity of 16-bit values
25 *
26 * If all three arguments are not at least 16-bit aligned, the behavior
27 * of this function is undefined. (We could remove this restriction by
28 * testing for unaligned values and punting to memmove(), but that's
29 * not currently useful.)
30 *
31 * TODO: add loop for 64-bit alignment
32 * TODO: use __builtin_prefetch
33 * TODO: write an ARM-optimized version
34 */
35void _memmove_words(void* dest, const void* src, size_t n)
36{
37 assert((((uintptr_t) dest | (uintptr_t) src | n) & 0x01) == 0);
38
39 char* d = (char*) dest;
40 const char* s = (const char*) src;
41 size_t copyCount;
42
43 /*
44 * If the source and destination pointers are the same, this is
45 * an expensive no-op. Testing for an empty move now allows us
46 * to skip a check later.
47 */
48 if (n == 0 || d == s)
49 return;
50
51 /*
52 * Determine if the source and destination buffers will overlap if
53 * we copy data forward (i.e. *dest++ = *src++).
54 *
55 * It's okay if the destination buffer starts before the source and
56 * there is some overlap, because the reader is always ahead of the
57 * writer.
58 */
59 if (__builtin_expect((d < s) || ((size_t)(d - s) >= n), 1)) {
60 /*
61 * Copy forward. We prefer 32-bit loads and stores even for 16-bit
62 * data, so sort that out.
63 */
64 if ((((uintptr_t) d | (uintptr_t) s) & 0x03) != 0) {
65 /*
66 * Not 32-bit aligned. Two possibilities:
67 * (1) Congruent, we can align to 32-bit by copying one 16-bit val
68 * (2) Non-congruent, we can do one of:
69 * a. copy whole buffer as a series of 16-bit values
70 * b. load/store 32 bits, using shifts to ensure alignment
71 * c. just copy the as 32-bit values and assume the CPU
72 * will do a reasonable job
73 *
74 * We're currently using (a), which is suboptimal.
75 */
76 if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) {
77 copyCount = n;
78 } else {
79 copyCount = 2;
80 }
81 n -= copyCount;
82 copyCount /= sizeof(uint16_t);
83
84 while (copyCount--) {
85 *(uint16_t*)d = *(uint16_t*)s;
86 d += sizeof(uint16_t);
87 s += sizeof(uint16_t);
88 }
89 }
90
91 /*
92 * Copy 32-bit aligned words.
93 */
94 copyCount = n / sizeof(uint32_t);
95 while (copyCount--) {
96 *(uint32_t*)d = *(uint32_t*)s;
97 d += sizeof(uint32_t);
98 s += sizeof(uint32_t);
99 }
100
101 /*
102 * Check for leftovers. Either we finished exactly, or we have
103 * one remaining 16-bit chunk.
104 */
105 if ((n & 0x02) != 0) {
106 *(uint16_t*)d = *(uint16_t*)s;
107 }
108 } else {
109 /*
110 * Copy backward, starting at the end.
111 */
112 d += n;
113 s += n;
114
115 if ((((uintptr_t) d | (uintptr_t) s) & 0x03) != 0) {
116 /* try for 32-bit alignment */
117 if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) {
118 copyCount = n;
119 } else {
120 copyCount = 2;
121 }
122 n -= copyCount;
123 copyCount /= sizeof(uint16_t);
124
125 while (copyCount--) {
126 d -= sizeof(uint16_t);
127 s -= sizeof(uint16_t);
128 *(uint16_t*)d = *(uint16_t*)s;
129 }
130 }
131
132 /* copy 32-bit aligned words */
133 copyCount = n / sizeof(uint32_t);
134 while (copyCount--) {
135 d -= sizeof(uint32_t);
136 s -= sizeof(uint32_t);
137 *(uint32_t*)d = *(uint32_t*)s;
138 }
139
140 /* copy leftovers */
141 if ((n & 0x02) != 0) {
142 d -= sizeof(uint16_t);
143 s -= sizeof(uint16_t);
144 *(uint16_t*)d = *(uint16_t*)s;
145 }
146 }
147}