41 #define WORD_REF(ADDRESS, OFFSET) \
42 *((WORD_TYPE*)((char*)(ADDRESS) + (OFFSET)))
43 #define WORD_COPY(OUT, IN, OFFSET) \
44 WORD_REF(OUT, OFFSET) = WORD_REF(IN, OFFSET)
46 #define DWORD_REF(ADDRESS, OFFSET) \
47 *((DWORD_TYPE*)((char*)(ADDRESS) + (OFFSET)))
49 #define DWORD_COPY(OUT, IN, OFFSET) \
50 DWORD_REF(OUT, OFFSET) = DWORD_REF(IN, OFFSET)
54 #if defined(__ARM_NEON__)
57 #define WORD_TYPE uint8x16_t
59 #define MAYBE_PREFETCH(IN) __builtin_prefetch((IN), 0, 0)
64 #elif defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
65 || defined(__ARM_ARCH_5TEJ__) || defined(_ISA_ARM_6)
66 #define WORD_TYPE uint64_t
68 #define MAYBE_PREFETCH(IN) __builtin_prefetch((IN), 0, 0)
73 #define WORD_TYPE uint32_t
75 #define MAYBE_PREFETCH(IN)
76 #define DWORD_TYPE uint64_t
81 #define SHORTWORD_TYPE uint32_t
82 #define SHORTWORD_SIZE 4
83 #define SHORTWORD_REF(ADDRESS, OFFSET) \
84 *((SHORTWORD_TYPE*)((char*)(ADDRESS) + (OFFSET)))
85 #define SHORTWORD_COPY(OUT, IN, OFFSET) \
86 SHORTWORD_REF(OUT, OFFSET) = SHORTWORD_REF(IN, OFFSET)
90 #define SHORTWORD_SHIFT(IN0, IN1, OFFSET) \
91 ((IN0) << ((OFFSET)*8)) | ((IN1) >> (SHORTWORD_SIZE*8 - (OFFSET)*8))
93 #define SHORTWORD_SHIFT(IN0, IN1, OFFSET) \
94 ((IN0) >> ((OFFSET)*8)) | ((IN1) << (SHORTWORD_SIZE*8 - (OFFSET)*8))
98 _DEFUN (
memcpy, (OUT, IN, N),
105 #if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
106 const char* OUT_end = (
char*)OUT + N;
107 while ((
char*)OUT < OUT_end) {
108 *((
char*)OUT) = *((
char*)IN);
116 if (__builtin_expect(N < SHORTWORD_SIZE, 1)) {
119 ((
char*)OUT)[i] = ((
char*)IN)[i];
125 const char* OUT_end = (
char*)OUT + N;
128 while ((uintptr_t)OUT % SHORTWORD_SIZE != 0) {
129 *(
char*) (OUT++) = *(
char*) (IN++);
133 while ((uintptr_t)OUT % DWORD_SIZE != 0) {
134 *(
char*) (OUT++) = *(
char*) (IN++);
137 if ((uintptr_t) IN % DWORD_SIZE == 0) {
138 while (OUT_end - (
char*)OUT >= DWORD_SIZE * 16) {
139 DWORD_COPY(OUT, IN, 0);
140 DWORD_COPY(OUT, IN, DWORD_SIZE * 1);
141 DWORD_COPY(OUT, IN, DWORD_SIZE * 2);
142 DWORD_COPY(OUT, IN, DWORD_SIZE * 3);
143 DWORD_COPY(OUT, IN, DWORD_SIZE * 4);
144 DWORD_COPY(OUT, IN, DWORD_SIZE * 5);
145 DWORD_COPY(OUT, IN, DWORD_SIZE * 6);
146 DWORD_COPY(OUT, IN, DWORD_SIZE * 7);
147 DWORD_COPY(OUT, IN, DWORD_SIZE * 8);
148 DWORD_COPY(OUT, IN, DWORD_SIZE * 9);
149 DWORD_COPY(OUT, IN, DWORD_SIZE * 10);
150 DWORD_COPY(OUT, IN, DWORD_SIZE * 11);
151 DWORD_COPY(OUT, IN, DWORD_SIZE * 12);
152 DWORD_COPY(OUT, IN, DWORD_SIZE * 13);
153 DWORD_COPY(OUT, IN, DWORD_SIZE * 14);
154 DWORD_COPY(OUT, IN, DWORD_SIZE * 15);
155 OUT += DWORD_SIZE * 16;
156 IN += DWORD_SIZE * 16;
160 if ((uintptr_t) IN % SHORTWORD_SIZE == 0) {
162 #if WORD_SIZE > SHORTWORD_SIZE
164 if (__builtin_expect(OUT_end - (
char*)OUT >= WORD_SIZE, 0)) {
165 while ((uintptr_t)OUT % WORD_SIZE != 0) {
166 SHORTWORD_COPY(OUT, IN, 0);
167 OUT += SHORTWORD_SIZE;
168 IN += SHORTWORD_SIZE;
171 if ((uintptr_t) IN % WORD_SIZE == 0) {
174 #if defined(__ARM_NEON__)
179 N = OUT_end - (
char*)OUT;
180 MAYBE_PREFETCH(IN + 64);
181 MAYBE_PREFETCH(IN + 128);
182 MAYBE_PREFETCH(IN + 192);
184 MAYBE_PREFETCH(IN + 256);
185 MAYBE_PREFETCH(IN + 320);
186 MAYBE_PREFETCH(IN + 384);
187 MAYBE_PREFETCH(IN + 448);
188 MAYBE_PREFETCH(IN + 512);
189 MAYBE_PREFETCH(IN + 576);
190 MAYBE_PREFETCH(IN + 640);
191 MAYBE_PREFETCH(IN + 704);
194 while (i + WORD_SIZE * 16 <= N - 640) {
195 MAYBE_PREFETCH(IN + 768);
196 MAYBE_PREFETCH(IN + 832);
197 MAYBE_PREFETCH(IN + 896);
198 MAYBE_PREFETCH(IN + 960);
199 WORD_COPY(OUT, IN, i);
200 WORD_COPY(OUT, IN, i + WORD_SIZE * 1);
201 WORD_COPY(OUT, IN, i + WORD_SIZE * 2);
202 WORD_COPY(OUT, IN, i + WORD_SIZE * 3);
203 WORD_COPY(OUT, IN, i + WORD_SIZE * 4);
204 WORD_COPY(OUT, IN, i + WORD_SIZE * 5);
205 WORD_COPY(OUT, IN, i + WORD_SIZE * 6);
206 WORD_COPY(OUT, IN, i + WORD_SIZE * 7);
207 WORD_COPY(OUT, IN, i + WORD_SIZE * 8);
208 WORD_COPY(OUT, IN, i + WORD_SIZE * 9);
209 WORD_COPY(OUT, IN, i + WORD_SIZE * 10);
210 WORD_COPY(OUT, IN, i + WORD_SIZE * 11);
211 WORD_COPY(OUT, IN, i + WORD_SIZE * 12);
212 WORD_COPY(OUT, IN, i + WORD_SIZE * 13);
213 WORD_COPY(OUT, IN, i + WORD_SIZE * 14);
214 WORD_COPY(OUT, IN, i + WORD_SIZE * 15);
218 while (i + WORD_SIZE * 16 <= N) {
219 WORD_COPY(OUT, IN, i);
220 WORD_COPY(OUT, IN, i + WORD_SIZE * 1);
221 WORD_COPY(OUT, IN, i + WORD_SIZE * 2);
222 WORD_COPY(OUT, IN, i + WORD_SIZE * 3);
223 WORD_COPY(OUT, IN, i + WORD_SIZE * 4);
224 WORD_COPY(OUT, IN, i + WORD_SIZE * 5);
225 WORD_COPY(OUT, IN, i + WORD_SIZE * 6);
226 WORD_COPY(OUT, IN, i + WORD_SIZE * 7);
227 WORD_COPY(OUT, IN, i + WORD_SIZE * 8);
228 WORD_COPY(OUT, IN, i + WORD_SIZE * 9);
229 WORD_COPY(OUT, IN, i + WORD_SIZE * 10);
230 WORD_COPY(OUT, IN, i + WORD_SIZE * 11);
231 WORD_COPY(OUT, IN, i + WORD_SIZE * 12);
232 WORD_COPY(OUT, IN, i + WORD_SIZE * 13);
233 WORD_COPY(OUT, IN, i + WORD_SIZE * 14);
234 WORD_COPY(OUT, IN, i + WORD_SIZE * 15);
237 while (i + WORD_SIZE * 4 <= N) {
238 WORD_COPY(OUT, IN, i);
239 WORD_COPY(OUT, IN, i + WORD_SIZE * 1);
240 WORD_COPY(OUT, IN, i + WORD_SIZE * 2);
241 WORD_COPY(OUT, IN, i + WORD_SIZE * 3);
244 while (i + WORD_SIZE <= N) {
245 WORD_COPY(OUT, IN, i);
253 MAYBE_PREFETCH(IN + 64);
254 MAYBE_PREFETCH(IN + 128);
255 MAYBE_PREFETCH(IN + 192);
256 while (OUT_end - (
char*)OUT >= WORD_SIZE * 16) {
257 MAYBE_PREFETCH(IN + 256);
258 MAYBE_PREFETCH(IN + 320);
259 WORD_COPY(OUT, IN, 0);
260 WORD_COPY(OUT, IN, WORD_SIZE * 1);
261 WORD_COPY(OUT, IN, WORD_SIZE * 2);
262 WORD_COPY(OUT, IN, WORD_SIZE * 3);
263 WORD_COPY(OUT, IN, WORD_SIZE * 4);
264 WORD_COPY(OUT, IN, WORD_SIZE * 5);
265 WORD_COPY(OUT, IN, WORD_SIZE * 6);
266 WORD_COPY(OUT, IN, WORD_SIZE * 7);
267 WORD_COPY(OUT, IN, WORD_SIZE * 8);
268 WORD_COPY(OUT, IN, WORD_SIZE * 9);
269 WORD_COPY(OUT, IN, WORD_SIZE * 10);
270 WORD_COPY(OUT, IN, WORD_SIZE * 11);
271 WORD_COPY(OUT, IN, WORD_SIZE * 12);
272 WORD_COPY(OUT, IN, WORD_SIZE * 13);
273 WORD_COPY(OUT, IN, WORD_SIZE * 14);
274 WORD_COPY(OUT, IN, WORD_SIZE * 15);
275 OUT += WORD_SIZE * 16;
276 IN += WORD_SIZE * 16;
278 while (WORD_SIZE * 4 <= OUT_end - (
char*)OUT) {
279 WORD_COPY(OUT, IN, 0);
280 WORD_COPY(OUT, IN, WORD_SIZE * 1);
281 WORD_COPY(OUT, IN, WORD_SIZE * 2);
282 WORD_COPY(OUT, IN, WORD_SIZE * 3);
283 OUT += WORD_SIZE * 4;
286 while (WORD_SIZE <= OUT_end - (
char*)OUT) {
287 WORD_COPY(OUT, IN, 0);
293 #if WORD_SIZE > SHORTWORD_SIZE
295 while (SHORTWORD_SIZE * 4 <= OUT_end - (
char*)OUT) {
296 SHORTWORD_COPY(OUT, IN, 0);
297 SHORTWORD_COPY(OUT, IN, SHORTWORD_SIZE * 1);
298 SHORTWORD_COPY(OUT, IN, SHORTWORD_SIZE * 2);
299 SHORTWORD_COPY(OUT, IN, SHORTWORD_SIZE * 3);
300 OUT += SHORTWORD_SIZE * 4;
301 IN += SHORTWORD_SIZE * 4;
306 while (SHORTWORD_SIZE <= OUT_end - (
char*)OUT) {
307 SHORTWORD_COPY(OUT, IN, 0);
308 OUT += SHORTWORD_SIZE;
309 IN += SHORTWORD_SIZE;
314 ptrdiff_t misalign = (uintptr_t)IN % SHORTWORD_SIZE;
316 SHORTWORD_TYPE temp1, temp2;
317 temp1 = SHORTWORD_REF(IN, -misalign);
321 while (SHORTWORD_SIZE <= OUT_end - (
char*)OUT) {
322 IN += SHORTWORD_SIZE;
323 temp2 = SHORTWORD_REF(IN, -misalign);
324 SHORTWORD_REF(OUT, 0) = SHORTWORD_SHIFT(temp1, temp2, misalign);
326 OUT += SHORTWORD_SIZE;
331 while ((
char*)OUT < OUT_end) {
332 *((
char*)OUT) = *((
char*)IN);
void * memcpy(void *dest, const void *src, size_t n)