blob: 26df7fafa0e55b394908e0ef247ee0fd54c72f96 [file] [log] [blame]
Andy McFadden4ce737f2011-02-04 14:45:57 -08001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <stdlib.h>
Carl Shapiro2cc2b2b2011-03-21 20:01:03 -070018#include <stdint.h>
Andy McFadden4ce737f2011-02-04 14:45:57 -080019#include <assert.h>
20
21/*
22 * Works like memmove(), except:
23 * - if all arguments are at least 32-bit aligned, we guarantee that we
24 * will use operations that preserve atomicity of 32-bit values
25 * - if not, we guarantee atomicity of 16-bit values
26 *
27 * If all three arguments are not at least 16-bit aligned, the behavior
28 * of this function is undefined. (We could remove this restriction by
29 * testing for unaligned values and punting to memmove(), but that's
30 * not currently useful.)
31 *
32 * TODO: add loop for 64-bit alignment
33 * TODO: use __builtin_prefetch
34 * TODO: write an ARM-optimized version
35 */
36void _memmove_words(void* dest, const void* src, size_t n)
37{
38 assert((((uintptr_t) dest | (uintptr_t) src | n) & 0x01) == 0);
39
40 char* d = (char*) dest;
41 const char* s = (const char*) src;
42 size_t copyCount;
43
44 /*
45 * If the source and destination pointers are the same, this is
46 * an expensive no-op. Testing for an empty move now allows us
47 * to skip a check later.
48 */
49 if (n == 0 || d == s)
50 return;
51
52 /*
53 * Determine if the source and destination buffers will overlap if
54 * we copy data forward (i.e. *dest++ = *src++).
55 *
56 * It's okay if the destination buffer starts before the source and
57 * there is some overlap, because the reader is always ahead of the
58 * writer.
59 */
60 if (__builtin_expect((d < s) || ((size_t)(d - s) >= n), 1)) {
61 /*
62 * Copy forward. We prefer 32-bit loads and stores even for 16-bit
63 * data, so sort that out.
64 */
65 if ((((uintptr_t) d | (uintptr_t) s) & 0x03) != 0) {
66 /*
67 * Not 32-bit aligned. Two possibilities:
68 * (1) Congruent, we can align to 32-bit by copying one 16-bit val
69 * (2) Non-congruent, we can do one of:
70 * a. copy whole buffer as a series of 16-bit values
71 * b. load/store 32 bits, using shifts to ensure alignment
72 * c. just copy the as 32-bit values and assume the CPU
73 * will do a reasonable job
74 *
75 * We're currently using (a), which is suboptimal.
76 */
77 if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) {
78 copyCount = n;
79 } else {
80 copyCount = 2;
81 }
82 n -= copyCount;
83 copyCount /= sizeof(uint16_t);
84
85 while (copyCount--) {
86 *(uint16_t*)d = *(uint16_t*)s;
87 d += sizeof(uint16_t);
88 s += sizeof(uint16_t);
89 }
90 }
91
92 /*
93 * Copy 32-bit aligned words.
94 */
95 copyCount = n / sizeof(uint32_t);
96 while (copyCount--) {
97 *(uint32_t*)d = *(uint32_t*)s;
98 d += sizeof(uint32_t);
99 s += sizeof(uint32_t);
100 }
101
102 /*
103 * Check for leftovers. Either we finished exactly, or we have
104 * one remaining 16-bit chunk.
105 */
106 if ((n & 0x02) != 0) {
107 *(uint16_t*)d = *(uint16_t*)s;
108 }
109 } else {
110 /*
111 * Copy backward, starting at the end.
112 */
113 d += n;
114 s += n;
115
116 if ((((uintptr_t) d | (uintptr_t) s) & 0x03) != 0) {
117 /* try for 32-bit alignment */
118 if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) {
119 copyCount = n;
120 } else {
121 copyCount = 2;
122 }
123 n -= copyCount;
124 copyCount /= sizeof(uint16_t);
125
126 while (copyCount--) {
127 d -= sizeof(uint16_t);
128 s -= sizeof(uint16_t);
129 *(uint16_t*)d = *(uint16_t*)s;
130 }
131 }
132
133 /* copy 32-bit aligned words */
134 copyCount = n / sizeof(uint32_t);
135 while (copyCount--) {
136 d -= sizeof(uint32_t);
137 s -= sizeof(uint32_t);
138 *(uint32_t*)d = *(uint32_t*)s;
139 }
140
141 /* copy leftovers */
142 if ((n & 0x02) != 0) {
143 d -= sizeof(uint16_t);
144 s -= sizeof(uint16_t);
145 *(uint16_t*)d = *(uint16_t*)s;
146 }
147 }
148}