Blame - libc/bionic/memmove_words.c - platform_bionic

blob: 22058bce818145988fb39d6babb7fcaae5a07dad [file] [log] [blame]

Andy McFadden	4ce737f	2011-02-04 14:45:57 -0800	[diff] [blame^]	1	/*
				2	* Copyright (C) 2011 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	#include <stdlib.h>
				18	#include <assert.h>
				19
				20	/*
				21	* Works like memmove(), except:
				22	* - if all arguments are at least 32-bit aligned, we guarantee that we
				23	* will use operations that preserve atomicity of 32-bit values
				24	* - if not, we guarantee atomicity of 16-bit values
				25	*
				26	* If all three arguments are not at least 16-bit aligned, the behavior
				27	* of this function is undefined. (We could remove this restriction by
				28	* testing for unaligned values and punting to memmove(), but that's
				29	* not currently useful.)
				30	*
				31	* TODO: add loop for 64-bit alignment
				32	* TODO: use __builtin_prefetch
				33	* TODO: write an ARM-optimized version
				34	*/
				35	void _memmove_words(void* dest, const void* src, size_t n)
				36	{
				37	assert((((uintptr_t) dest \| (uintptr_t) src \| n) & 0x01) == 0);
				38
				39	char* d = (char*) dest;
				40	const char* s = (const char*) src;
				41	size_t copyCount;
				42
				43	/*
				44	* If the source and destination pointers are the same, this is
				45	* an expensive no-op. Testing for an empty move now allows us
				46	* to skip a check later.
				47	*/
				48	if (n == 0 \|\| d == s)
				49	return;
				50
				51	/*
				52	* Determine if the source and destination buffers will overlap if
				53	* we copy data forward (i.e. dest++ = src++).
				54	*
				55	* It's okay if the destination buffer starts before the source and
				56	* there is some overlap, because the reader is always ahead of the
				57	* writer.
				58	*/
				59	if (__builtin_expect((d < s) \|\| ((size_t)(d - s) >= n), 1)) {
				60	/*
				61	* Copy forward. We prefer 32-bit loads and stores even for 16-bit
				62	* data, so sort that out.
				63	*/
				64	if ((((uintptr_t) d \| (uintptr_t) s) & 0x03) != 0) {
				65	/*
				66	* Not 32-bit aligned. Two possibilities:
				67	* (1) Congruent, we can align to 32-bit by copying one 16-bit val
				68	* (2) Non-congruent, we can do one of:
				69	* a. copy whole buffer as a series of 16-bit values
				70	* b. load/store 32 bits, using shifts to ensure alignment
				71	* c. just copy the as 32-bit values and assume the CPU
				72	* will do a reasonable job
				73	*
				74	* We're currently using (a), which is suboptimal.
				75	*/
				76	if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) {
				77	copyCount = n;
				78	} else {
				79	copyCount = 2;
				80	}
				81	n -= copyCount;
				82	copyCount /= sizeof(uint16_t);
				83
				84	while (copyCount--) {
				85	(uint16_t)d = (uint16_t)s;
				86	d += sizeof(uint16_t);
				87	s += sizeof(uint16_t);
				88	}
				89	}
				90
				91	/*
				92	* Copy 32-bit aligned words.
				93	*/
				94	copyCount = n / sizeof(uint32_t);
				95	while (copyCount--) {
				96	(uint32_t)d = (uint32_t)s;
				97	d += sizeof(uint32_t);
				98	s += sizeof(uint32_t);
				99	}
				100
				101	/*
				102	* Check for leftovers. Either we finished exactly, or we have
				103	* one remaining 16-bit chunk.
				104	*/
				105	if ((n & 0x02) != 0) {
				106	(uint16_t)d = (uint16_t)s;
				107	}
				108	} else {
				109	/*
				110	* Copy backward, starting at the end.
				111	*/
				112	d += n;
				113	s += n;
				114
				115	if ((((uintptr_t) d \| (uintptr_t) s) & 0x03) != 0) {
				116	/* try for 32-bit alignment */
				117	if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) {
				118	copyCount = n;
				119	} else {
				120	copyCount = 2;
				121	}
				122	n -= copyCount;
				123	copyCount /= sizeof(uint16_t);
				124
				125	while (copyCount--) {
				126	d -= sizeof(uint16_t);
				127	s -= sizeof(uint16_t);
				128	(uint16_t)d = (uint16_t)s;
				129	}
				130	}
				131
				132	/* copy 32-bit aligned words */
				133	copyCount = n / sizeof(uint32_t);
				134	while (copyCount--) {
				135	d -= sizeof(uint32_t);
				136	s -= sizeof(uint32_t);
				137	(uint32_t)d = (uint32_t)s;
				138	}
				139
				140	/* copy leftovers */
				141	if ((n & 0x02) != 0) {
				142	d -= sizeof(uint16_t);
				143	s -= sizeof(uint16_t);
				144	(uint16_t)d = (uint16_t)s;
				145	}
				146	}
				147	}