string: Add MTE support to string tests.

Set taggs for every test case so that boundaries are as narrow as
possible. There is no handling of tag faults, so the test will
crash if there is a MTE problem.

The implementations that are not compatible are excluded, including
the standard symbols that may come from an mte incompatible libc.
diff --git a/config.mk.dist b/config.mk.dist
index cac40eb..3e55c98 100644
--- a/config.mk.dist
+++ b/config.mk.dist
@@ -9,6 +9,9 @@
 # Target architecture: aarch64, arm or x86_64
 ARCH = aarch64
 
+# Use for cross compilation with gcc.
+#CROSS_COMPILE = aarch64-none-linux-gnu-
+
 # Compiler for the target
 CC = $(CROSS_COMPILE)gcc
 CFLAGS = -std=c99 -pipe -O3
@@ -27,8 +30,8 @@
 # Optimize the shared libraries on aarch64 assuming they fit in 1M.
 #CFLAGS_SHARED = -fPIC -mcmodel=tiny
 
-# Use for cross compilation with gcc.
-#CROSS_COMPILE = aarch64-none-linux-gnu-
+# Enable MTE support.
+#CFLAGS += -march=armv8.5-a+memtag -DWANT_MTE_TEST=1
 
 # Use with cross testing.
 #EMULATOR = qemu-aarch64-static
diff --git a/string/Dir.mk b/string/Dir.mk
index 3237e10..ae7c673 100644
--- a/string/Dir.mk
+++ b/string/Dir.mk
@@ -63,6 +63,8 @@
 $(string-objs): $(string-includes)
 $(string-objs): CFLAGS_ALL += $(string-cflags)
 
+$(string-test-objs): CFLAGS_ALL += -D_GNU_SOURCE
+
 build/lib/libstringlib.so: $(string-lib-objs:%.o=%.os)
 	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -shared -o $@ $^
 
diff --git a/string/test/memchr.c b/string/test/memchr.c
index 9bc9c5a..0ff77f5 100644
--- a/string/test/memchr.c
+++ b/string/test/memchr.c
@@ -10,35 +10,37 @@
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   void *(*fun) (const void *s, int c, size_t n);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(memchr)
+  F(memchr, 0)
 #if __aarch64__
-  F(__memchr_aarch64)
-  F(__memchr_aarch64_mte)
+  F(__memchr_aarch64, 0)
+  F(__memchr_aarch64_mte, 1)
 # if __ARM_FEATURE_SVE
-  F(__memchr_aarch64_sve)
+  F(__memchr_aarch64_sve, 1)
 # endif
 #elif __arm__
-  F(__memchr_arm)
+  F(__memchr_arm, 0)
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define ALIGN 32
 #define LEN 512
-static char sbuf[LEN + 3 * ALIGN];
+static char *sbuf;
 
 static void *
 alignup (void *p)
@@ -70,7 +72,12 @@
   s[seekpos] = seekchar;
   s[((len ^ align) & 1) ? seekpos + 1 : len] = seekchar;
 
+  int mte_len = seekpos != -1 ? seekpos + 1 : maxlen;
+  s = tag_buffer (s, mte_len, fun->test_mte);
   p = fun->fun (s, seekchar, maxlen);
+  untag_buffer (s, mte_len, fun->test_mte);
+  p = untag_pointer (p);
+
   if (p != f)
     {
       ERR ("%s (%p, 0x%02x, %zu) returned %p, expected %p\n", fun->name, s,
@@ -82,6 +89,7 @@
 int
 main (void)
 {
+  sbuf = mte_mmap (LEN + 3 * ALIGN);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -93,8 +101,8 @@
 	      test (funtab + i, a, sp, n, n);
 	    test (funtab + i, a, n, n, SIZE_MAX - a);
 	  }
-
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
diff --git a/string/test/memcmp.c b/string/test/memcmp.c
index 0b05a67..dd93698 100644
--- a/string/test/memcmp.c
+++ b/string/test/memcmp.c
@@ -9,33 +9,35 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   int (*fun) (const void *s1, const void *s2, size_t n);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(memcmp)
+  F(memcmp, 0)
 #if __aarch64__
-  F(__memcmp_aarch64)
+  F(__memcmp_aarch64, 1)
 # if __ARM_FEATURE_SVE
-  F(__memcmp_aarch64_sve)
+  F(__memcmp_aarch64_sve, 1)
 # endif
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define A 32
 #define LEN 250000
-static unsigned char s1buf[LEN + 2 * A];
-static unsigned char s2buf[LEN + 2 * A];
+static unsigned char *s1buf;
+static unsigned char *s2buf;
 
 static void *
 alignup (void *p)
@@ -69,7 +71,11 @@
   if (delta)
     s1[diffpos] += delta;
 
+  s1 = tag_buffer (s1, len, fun->test_mte);
+  s2 = tag_buffer (s2, len, fun->test_mte);
   r = fun->fun (s1, s2, len);
+  untag_buffer (s1, len, fun->test_mte);
+  untag_buffer (s2, len, fun->test_mte);
 
   if ((delta == 0 && r != 0) || (delta > 0 && r <= 0) || (delta < 0 && r >= 0))
     {
@@ -83,6 +89,8 @@
 int
 main ()
 {
+  s1buf = mte_mmap (LEN + 2 * A);
+  s2buf = mte_mmap (LEN + 2 * A);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -108,7 +116,8 @@
 		test (funtab + i, d, s, n, n / 2, -1);
 	      }
 	  }
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
diff --git a/string/test/memcpy.c b/string/test/memcpy.c
index 7fd3903..346d920 100644
--- a/string/test/memcpy.c
+++ b/string/test/memcpy.c
@@ -9,35 +9,37 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   void *(*fun) (void *, const void *, size_t);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(memcpy)
+  F(memcpy, 0)
 #if __aarch64__
-  F(__memcpy_aarch64)
+  F(__memcpy_aarch64, 1)
 # if __ARM_NEON
-  F(__memcpy_aarch64_simd)
+  F(__memcpy_aarch64_simd, 1)
 # endif
 #elif __arm__
-  F(__memcpy_arm)
+  F(__memcpy_arm, 0)
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define A 32
 #define LEN 250000
-static unsigned char dbuf[LEN + 2 * A];
-static unsigned char sbuf[LEN + 2 * A];
+static unsigned char *dbuf;
+static unsigned char *sbuf;
 static unsigned char wbuf[LEN + 2 * A];
 
 static void *
@@ -70,7 +72,12 @@
   for (i = 0; i < len; i++)
     s[i] = w[i] = 'a' + i % 23;
 
+  s = tag_buffer (s, len, fun->test_mte);
+  d = tag_buffer (d, len, fun->test_mte);
   p = fun->fun (d, s, len);
+  untag_buffer (s, len, fun->test_mte);
+  untag_buffer (d, len, fun->test_mte);
+
   if (p != d)
     ERR ("%s(%p,..) returned %p\n", fun->name, d, p);
   for (i = 0; i < len + A; i++)
@@ -89,6 +96,8 @@
 int
 main ()
 {
+  dbuf = mte_mmap (LEN + 2 * A);
+  sbuf = mte_mmap (LEN + 2 * A);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -102,7 +111,8 @@
 	    for (; n < LEN; n *= 2)
 	      test (funtab + i, d, s, n);
 	  }
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
diff --git a/string/test/memmove.c b/string/test/memmove.c
index 7603546..af92fe3 100644
--- a/string/test/memmove.c
+++ b/string/test/memmove.c
@@ -9,33 +9,35 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   void *(*fun) (void *, const void *, size_t);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(memmove)
+  F(memmove, 0)
 #if __aarch64__
-  F(__memmove_aarch64)
+  F(__memmove_aarch64, 1)
 # if __ARM_NEON
-  F(__memmove_aarch64_simd)
+  F(__memmove_aarch64_simd, 1)
 # endif
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define A 32
 #define LEN 250000
-static unsigned char dbuf[LEN + 2 * A];
-static unsigned char sbuf[LEN + 2 * A];
+static unsigned char *dbuf;
+static unsigned char *sbuf;
 static unsigned char wbuf[LEN + 2 * A];
 
 static void *
@@ -108,7 +110,12 @@
   for (int i = 0; i < len; i++)
     w[i] = s[i];
 
+  s = tag_buffer (s, len, fun->test_mte);
+  d = tag_buffer (d, len, fun->test_mte);
   p = fun->fun (d, s, len);
+  untag_buffer (s, len, fun->test_mte);
+  untag_buffer (d, len, fun->test_mte);
+
   if (p != d)
     ERR ("%s(%p,..) returned %p\n", fun->name, d, p);
   for (int i = 0; i < len + A; i++)
@@ -127,6 +134,8 @@
 int
 main ()
 {
+  dbuf = mte_mmap (LEN + 2 * A);
+  sbuf = mte_mmap (LEN + 2 * A);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -146,7 +155,8 @@
 		test_overlap (funtab + i, d, s, n);
 	      }
 	  }
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
diff --git a/string/test/memrchr.c b/string/test/memrchr.c
index 20bb3a1..adf96f0 100644
--- a/string/test/memrchr.c
+++ b/string/test/memrchr.c
@@ -5,41 +5,45 @@
  * SPDX-License-Identifier: MIT
  */
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   void *(*fun) (const void *s, int c, size_t n);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(memrchr)
+  F(memrchr, 0)
 #if __aarch64__
-  F(__memrchr_aarch64)
+  F(__memrchr_aarch64, 1)
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define ALIGN 32
 #define LEN 512
-static char sbuf[LEN + 3 * ALIGN];
+static char *sbuf;
 
 static void *
 alignup (void *p)
 {
-  return (void *) (((uintptr_t) p + ALIGN - 1) & -ALIGN);
+  return (void *) (((uintptr_t) p + ALIGN) & -ALIGN);
 }
 
 static void
@@ -66,7 +70,11 @@
   s[seekpos] = seekchar;
   s[((len ^ align) & 1) && seekpos < maxlen ? seekpos - 1 : len] = seekchar;
 
+  s = tag_buffer (s, maxlen, fun->test_mte);
   p = fun->fun (s, seekchar, maxlen);
+  untag_buffer (s, maxlen, fun->test_mte);
+  p = untag_pointer (p);
+
   if (p != f)
     {
       ERR ("%s (%p, 0x%02x, %zu) returned %p, expected %p\n", fun->name, s,
@@ -78,6 +86,7 @@
 int
 main (void)
 {
+  sbuf = mte_mmap (LEN + 3 * ALIGN);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -88,8 +97,8 @@
 	    for (int sp = 0; sp < LEN; sp++)
 	      test (funtab + i, a, sp, n, n);
 	  }
-
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
diff --git a/string/test/memset.c b/string/test/memset.c
index 4ac5ba1..cebe9ad 100644
--- a/string/test/memset.c
+++ b/string/test/memset.c
@@ -9,31 +9,33 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   void *(*fun) (void *s, int c, size_t n);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(memset)
+  F(memset, 0)
 #if __aarch64__
-  F(__memset_aarch64)
+  F(__memset_aarch64, 1)
 #elif __arm__
-  F(__memset_arm)
+  F(__memset_arm, 0)
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define A 32
 #define LEN 250000
-static unsigned char sbuf[LEN + 2 * A];
+static unsigned char *sbuf;
 
 static void *
 alignup (void *p)
@@ -58,7 +60,10 @@
   for (i = 0; i < len; i++)
     s[i] = 'a' + i % 23;
 
+  s = tag_buffer (s, len, fun->test_mte);
   p = fun->fun (s, c, len);
+  untag_buffer (s, len, fun->test_mte);
+
   if (p != s)
     ERR ("%s(%p,..) returned %p\n", fun->name, s, p);
 
@@ -94,6 +99,7 @@
 int
 main ()
 {
+  sbuf = mte_mmap (LEN + 2 * A);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -114,7 +120,8 @@
 	      test (funtab + i, s, 0xaa25, n);
 	    }
 	}
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
diff --git a/string/test/mte.h b/string/test/mte.h
new file mode 100644
index 0000000..e67cbd9
--- /dev/null
+++ b/string/test/mte.h
@@ -0,0 +1,142 @@
+/*
+ * Memory tagging testing code.
+ *
+ * Copyright (c) 2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef __TEST_MTE_H
+#define __TEST_MTE_H
+
+#include <stdlib.h>
+
+#if __ARM_FEATURE_MEMORY_TAGGING && WANT_MTE_TEST
+#include <arm_acle.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+// These depend on a not yet merged kernel ABI.
+#define PR_SET_TAGGED_ADDR_CTRL 55
+#define PR_TAGGED_ADDR_ENABLE (1UL << 0)
+#define PR_MTE_TCF_SHIFT 1
+#define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT)
+#define PR_MTE_TAG_SHIFT 3
+#define PROT_MTE 0x20
+
+#define MTE_GRANULE_SIZE 16
+
+int
+mte_enabled ()
+{
+  static int enabled = -1;
+  if (enabled == -1)
+    {
+      int res = prctl (PR_SET_TAGGED_ADDR_CTRL,
+		       PR_TAGGED_ADDR_ENABLE | PR_MTE_TCF_SYNC
+			 | (0xfffe << PR_MTE_TAG_SHIFT),
+		       0, 0, 0);
+      enabled = (res == 0);
+    }
+  return enabled;
+}
+
+static void *
+mte_mmap (size_t size)
+{
+  if (mte_enabled ())
+    {
+      return mmap (NULL, size, PROT_READ | PROT_WRITE | PROT_MTE,
+		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    }
+  else
+    {
+      return malloc (size);
+    }
+}
+
+void *
+alignup_mte (void *p)
+{
+  return (void *) (((uintptr_t) p + MTE_GRANULE_SIZE - 1)
+		   & ~(MTE_GRANULE_SIZE - 1));
+}
+
+void *
+aligndown_mte (void *p)
+{
+  return (void *) ((uintptr_t) p & ~(MTE_GRANULE_SIZE - 1));
+}
+
+void *
+untag_pointer (void *p)
+{
+  return (void *) ((unsigned long long) p & (~0ULL >> 8));
+}
+
+void
+tag_buffer_helper (void *p, int len)
+{
+  char *ptr = p;
+  char *end = alignup_mte (ptr + len);
+  ptr = aligndown_mte (p);
+  for (; ptr < end; ptr += MTE_GRANULE_SIZE)
+    {
+      __arm_mte_set_tag (ptr);
+    }
+}
+
+void *
+tag_buffer (void *p, int len, int test_mte)
+{
+  if (test_mte && mte_enabled ())
+    {
+      p = __arm_mte_increment_tag (p, 1);
+      tag_buffer_helper (p, len);
+    }
+  return p;
+}
+
+void *
+untag_buffer (void *p, int len, int test_mte)
+{
+  p = untag_pointer (p);
+  if (test_mte && mte_enabled ())
+    {
+      tag_buffer_helper (p, len);
+    }
+  return p;
+}
+
+#else  // __ARM_FEATURE_MEMORY_TAGGING
+int
+mte_enabled ()
+{
+  return 0;
+}
+static void *
+mte_mmap (size_t size)
+{
+  return malloc (size);
+}
+void *
+tag_buffer (void *p, int len, int test_mte)
+{
+  (void) len;
+  (void) test_mte;
+  return p;
+}
+void *
+untag_buffer (void *p, int len, int test_mte)
+{
+  (void) len;
+  (void) test_mte;
+  return p;
+}
+void *
+untag_pointer (void *p)
+{
+  return p;
+}
+#endif // __ARM_FEATURE_MEMORY_TAGGING
+
+#endif
diff --git a/string/test/stpcpy.c b/string/test/stpcpy.c
index e13b42c..1827e68 100644
--- a/string/test/stpcpy.c
+++ b/string/test/stpcpy.c
@@ -5,39 +5,44 @@
  * SPDX-License-Identifier: MIT
  */
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
+
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   char *(*fun) (char *dest, const char *src);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(stpcpy)
+  F(stpcpy, 0)
 #if __aarch64__
-  F(__stpcpy_aarch64)
-  F(__stpcpy_aarch64_mte)
+  F(__stpcpy_aarch64, 0)
+  F(__stpcpy_aarch64_mte, 1)
 # if __ARM_FEATURE_SVE
-  F(__stpcpy_aarch64_sve)
+  F(__stpcpy_aarch64_sve, 1)
 # endif
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define ALIGN 32
 #define LEN 512
-static char dbuf[LEN + 3 * ALIGN];
-static char sbuf[LEN + 3 * ALIGN];
+static char *dbuf;
+static char *sbuf;
 static char wbuf[LEN + 3 * ALIGN];
 
 static void *
@@ -75,7 +80,12 @@
     s[i] = w[i] = 'a' + (i & 31);
   s[len] = w[len] = '\0';
 
+  s = tag_buffer (s, len + 1, fun->test_mte);
+  d = tag_buffer (d, len + 1, fun->test_mte);
   p = fun->fun (d, s);
+  untag_buffer (s, len + 1, fun->test_mte);
+  untag_buffer (d, len + 1, fun->test_mte);
+
   if (p != d + len)
     ERR ("%s (%p,..) returned %p expected %p\n", fun->name, d, p, d + len);
 
@@ -95,6 +105,8 @@
 int
 main (void)
 {
+  sbuf = mte_mmap (LEN + 3 * ALIGN);
+  dbuf = mte_mmap (LEN + 3 * ALIGN);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -104,7 +116,8 @@
 	  for (int n = 0; n < LEN; n++)
 	    test (funtab + i, d, s, n);
 
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
diff --git a/string/test/strchr.c b/string/test/strchr.c
index 8535686..f3ae982 100644
--- a/string/test/strchr.c
+++ b/string/test/strchr.c
@@ -10,33 +10,35 @@
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   char *(*fun) (const char *s, int c);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(strchr)
+  F(strchr, 0)
 #if __aarch64__
-  F(__strchr_aarch64)
-  F(__strchr_aarch64_mte)
+  F(__strchr_aarch64, 0)
+  F(__strchr_aarch64_mte, 1)
 # if __ARM_FEATURE_SVE
-  F(__strchr_aarch64_sve)
+  F(__strchr_aarch64_sve, 1)
 # endif
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define ALIGN 32
 #define LEN 512
-static char sbuf[LEN + 3 * ALIGN];
+static char *sbuf;
 
 static void *
 alignup (void *p)
@@ -70,7 +72,11 @@
     s[seekpos + 1] = seekchar;
   s[len] = '\0';
 
+  s = tag_buffer (s, len + 1, fun->test_mte);
   p = fun->fun (s, seekchar);
+  untag_buffer (s, len + 1, fun->test_mte);
+  p = untag_pointer (p);
+
   if (p != f)
     {
       ERR ("%s (%p, 0x%02x) len %d returned %p, expected %p pos %d\n",
@@ -78,7 +84,10 @@
       quote ("input", s, len);
     }
 
+  s = tag_buffer (s, len + 1, fun->test_mte);
   p = fun->fun (s, 0);
+  untag_buffer (s, len + 1, fun->test_mte);
+
   if (p != s + len)
     {
       ERR ("%s (%p, 0x%02x) len %d returned %p, expected %p pos %d\n",
@@ -90,6 +99,7 @@
 int
 main (void)
 {
+  sbuf = mte_mmap (LEN + 3 * ALIGN);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -102,7 +112,8 @@
 	    test (funtab + i, a, -1, n);
 	  }
 
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
diff --git a/string/test/strchrnul.c b/string/test/strchrnul.c
index 198c2ad..6c30ab2 100644
--- a/string/test/strchrnul.c
+++ b/string/test/strchrnul.c
@@ -5,40 +5,44 @@
  * SPDX-License-Identifier: MIT
  */
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   char *(*fun) (const char *s, int c);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(strchrnul)
+  F(strchrnul, 0)
 #if __aarch64__
-  F(__strchrnul_aarch64)
-  F(__strchrnul_aarch64_mte)
+  F(__strchrnul_aarch64, 0)
+  F(__strchrnul_aarch64_mte, 1)
 # if __ARM_FEATURE_SVE
-  F(__strchrnul_aarch64_sve)
+  F(__strchrnul_aarch64_sve, 1)
 # endif
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define ALIGN 32
 #define LEN 512
-static char sbuf[LEN + 3 * ALIGN];
+static char *sbuf;
 
 static void *
 alignup (void *p)
@@ -72,7 +76,12 @@
     s[seekpos + 1] = seekchar;
   s[len] = '\0';
 
+  int mte_len = seekpos != -1 ? seekpos + 1 : len + 1;
+  s = tag_buffer (s, mte_len, fun->test_mte);
   p = fun->fun (s, seekchar);
+  untag_buffer (s, mte_len, fun->test_mte);
+  p = untag_pointer (p);
+
   if (p != f)
     {
       ERR ("%s (%p, 0x%02x) len %d returned %p, expected %p pos %d\n",
@@ -80,7 +89,10 @@
       quote ("input", s, len);
     }
 
+  s = tag_buffer (s, len + 1, fun->test_mte);
   p = fun->fun (s, 0);
+  untag_buffer (s, len + 1, fun->test_mte);
+
   if (p != s + len)
     {
       ERR ("%s (%p, 0x%02x) len %d returned %p, expected %p pos %d\n",
@@ -92,6 +104,7 @@
 int
 main (void)
 {
+  sbuf = mte_mmap (LEN + 3 * ALIGN);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -104,7 +117,8 @@
 	    test (funtab + i, a, -1, n);
 	  }
 
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
diff --git a/string/test/strcmp.c b/string/test/strcmp.c
index 9252233..4e718e3 100644
--- a/string/test/strcmp.c
+++ b/string/test/strcmp.c
@@ -9,40 +9,42 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   int (*fun) (const char *s1, const char *s2);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(strcmp)
+  F(strcmp, 0)
 #if __aarch64__
-  F(__strcmp_aarch64)
-  F(__strcmp_aarch64_mte)
+  F(__strcmp_aarch64, 0)
+  F(__strcmp_aarch64_mte, 1)
 # if __ARM_FEATURE_SVE
-  F(__strcmp_aarch64_sve)
+  F(__strcmp_aarch64_sve, 1)
 # endif
 #elif __arm__
 # if __ARM_ARCH >= 7 && __ARM_ARCH_ISA_ARM >= 1
-  F(__strcmp_arm)
+  F(__strcmp_arm, 0)
 # elif __ARM_ARCH == 6 && __ARM_ARCH_6M__ >= 1
-  F(__strcmp_armv6m)
+  F(__strcmp_armv6m, 0)
 # endif
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define A 32
 #define LEN 250000
-static char s1buf[LEN + 2 * A + 1];
-static char s2buf[LEN + 2 * A + 1];
+static char *s1buf;
+static char *s2buf;
 
 static void *
 alignup (void *p)
@@ -77,7 +79,11 @@
     s1[diffpos] += delta;
   s1[len] = s2[len] = '\0';
 
+  s1 = tag_buffer (s1, len + 1, fun->test_mte);
+  s2 = tag_buffer (s2, len + 1, fun->test_mte);
   r = fun->fun (s1, s2);
+  untag_buffer (s1, len + 1, fun->test_mte);
+  untag_buffer (s2, len + 1, fun->test_mte);
 
   if ((delta == 0 && r != 0) || (delta > 0 && r <= 0) || (delta < 0 && r >= 0))
     {
@@ -91,6 +97,8 @@
 int
 main ()
 {
+  s1buf = mte_mmap (LEN + 2 * A + 1);
+  s2buf = mte_mmap (LEN + 2 * A + 1);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -115,7 +123,8 @@
 		test (funtab + i, d, s, n, n / 2, -1);
 	      }
 	  }
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
diff --git a/string/test/strcpy.c b/string/test/strcpy.c
index d8f58f4..e84cace 100644
--- a/string/test/strcpy.c
+++ b/string/test/strcpy.c
@@ -9,36 +9,38 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   char *(*fun) (char *dest, const char *src);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(strcpy)
+  F(strcpy, 0)
 #if __aarch64__
-  F(__strcpy_aarch64)
-  F(__strcpy_aarch64_mte)
+  F(__strcpy_aarch64, 0)
+  F(__strcpy_aarch64_mte, 1)
 # if __ARM_FEATURE_SVE
-  F(__strcpy_aarch64_sve)
+  F(__strcpy_aarch64_sve, 1)
 # endif
 #elif __arm__ && defined (__thumb2__) && !defined (__thumb__)
-  F(__strcpy_arm)
+  F(__strcpy_arm, 0)
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define ALIGN 32
 #define LEN 512
-static char dbuf[LEN + 3 * ALIGN];
-static char sbuf[LEN + 3 * ALIGN];
+static char *dbuf;
+static char *sbuf;
 static char wbuf[LEN + 3 * ALIGN];
 
 static void *
@@ -76,7 +78,12 @@
     s[i] = w[i] = 'a' + (i & 31);
   s[len] = w[len] = '\0';
 
+  s = tag_buffer (s, len + 1, fun->test_mte);
+  d = tag_buffer (d, len + 1, fun->test_mte);
   p = fun->fun (d, s);
+  untag_buffer (s, len + 1, fun->test_mte);
+  untag_buffer (d, len + 1, fun->test_mte);
+
   if (p != d)
     ERR ("%s (%p,..) returned %p\n", fun->name, d, p);
 
@@ -96,6 +103,8 @@
 int
 main (void)
 {
+  sbuf = mte_mmap (LEN + 3 * ALIGN);
+  dbuf = mte_mmap (LEN + 3 * ALIGN);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -105,7 +114,8 @@
 	  for (int n = 0; n < LEN; n++)
 	    test (funtab + i, d, s, n);
 
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
diff --git a/string/test/strlen.c b/string/test/strlen.c
index 2262660..6278380 100644
--- a/string/test/strlen.c
+++ b/string/test/strlen.c
@@ -9,38 +9,41 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/mman.h>
 #include <limits.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   size_t (*fun) (const char *s);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(strlen)
+  F(strlen, 0)
 #if __aarch64__
-  F(__strlen_aarch64)
-  F(__strlen_aarch64_mte)
+  F(__strlen_aarch64, 0)
+  F(__strlen_aarch64_mte, 1)
 # if __ARM_FEATURE_SVE
-  F(__strlen_aarch64_sve)
+  F(__strlen_aarch64_sve, 1)
 # endif
 #elif __arm__
 # if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
-  F(__strlen_armv6t2)
+  F(__strlen_armv6t2, 0)
 # endif
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define ALIGN 32
 #define LEN 512
-static char sbuf[LEN + 3 * ALIGN];
+static char *sbuf;
 
 static void *
 alignup (void *p)
@@ -68,7 +71,10 @@
     s[i] = 'a' + (i & 31);
   s[len] = '\0';
 
+  s = tag_buffer (s, len + 1, fun->test_mte);
   r = fun->fun (s);
+  untag_buffer (s, len + 1, fun->test_mte);
+
   if (r != len)
     {
       ERR ("%s (%p) returned %zu expected %d\n", fun->name, s, r, len);
@@ -79,6 +85,7 @@
 int
 main (void)
 {
+  sbuf = mte_mmap (LEN + 3 * ALIGN);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -87,7 +94,8 @@
 	for (int n = 0; n < LEN; n++)
 	  test (funtab + i, a, n);
 
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
diff --git a/string/test/strncmp.c b/string/test/strncmp.c
index c8ad1bc..23fbb0a 100644
--- a/string/test/strncmp.c
+++ b/string/test/strncmp.c
@@ -9,34 +9,36 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   int (*fun) (const char *, const char *, size_t);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(strncmp)
+  F(strncmp, 0)
 #if __aarch64__
-  F(__strncmp_aarch64)
-  F(__strncmp_aarch64_mte)
+  F(__strncmp_aarch64, 0)
+  F(__strncmp_aarch64_mte, 1)
 # if __ARM_FEATURE_SVE
-  F(__strncmp_aarch64_sve)
+  F(__strncmp_aarch64_sve, 1)
 # endif
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define A 32
 #define LEN 250000
-static char s1buf[LEN + 2 * A + 1];
-static char s2buf[LEN + 2 * A + 1];
+static char *s1buf;
+static char *s2buf;
 
 static void *
 alignup (void *p)
@@ -71,7 +73,12 @@
     s1[diffpos] += delta;
   s1[len] = s2[len] = '\0';
 
+  size_t mte_len = maxlen < len + 1 ? maxlen : len + 1;
+  s1 = tag_buffer (s1, mte_len, fun->test_mte);
+  s2 = tag_buffer (s2, mte_len, fun->test_mte);
   r = fun->fun (s1, s2, maxlen);
+  untag_buffer (s1, mte_len, fun->test_mte);
+  untag_buffer (s2, mte_len, fun->test_mte);
 
   if (diffpos >= maxlen)
     {
@@ -91,6 +98,8 @@
 int
 main ()
 {
+  s1buf = mte_mmap (LEN + 2 * A + 1);
+  s2buf = mte_mmap (LEN + 2 * A + 1);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -121,7 +130,8 @@
 		test (funtab + i, d, s, n / 2, n / 2, n, 1);
 	      }
 	  }
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
diff --git a/string/test/strnlen.c b/string/test/strnlen.c
index c838092..0dea00e 100644
--- a/string/test/strnlen.c
+++ b/string/test/strnlen.c
@@ -5,39 +5,43 @@
  * SPDX-License-Identifier: MIT
  */
 
-#define _POSIX_C_SOURCE 200809L
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
 
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   size_t (*fun) (const char *s, size_t m);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(strnlen)
+  F(strnlen, 0)
 #if __aarch64__
-  F(__strnlen_aarch64)
+  F(__strnlen_aarch64, 1)
 # if __ARM_FEATURE_SVE
-  F(__strnlen_aarch64_sve)
+  F(__strnlen_aarch64_sve, 1)
 # endif
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define ALIGN 32
 #define LEN 512
-static char sbuf[LEN + 3 * ALIGN];
+static char *sbuf;
 
 static void *
 alignup (void *p)
@@ -68,7 +72,11 @@
   if ((len + align) & 1)
     s[e + 1] = 0;
 
+  size_t mte_len = maxlen < len + 1 ? maxlen : len + 1;
+  s = tag_buffer (s, mte_len, fun->test_mte);
   r = fun->fun (s, maxlen);
+  untag_buffer (s, mte_len, fun->test_mte);
+
   if (r != e)
     {
       ERR ("%s (%p, %zu) len %zu returned %zu, expected %zu\n",
@@ -80,6 +88,7 @@
 int
 main (void)
 {
+  sbuf = mte_mmap (LEN + 3 * ALIGN);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -91,8 +100,8 @@
 	      test (funtab + i, a, maxlen, n);
 	    test (funtab + i, a, SIZE_MAX - a, n);
 	  }
-
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
diff --git a/string/test/strrchr.c b/string/test/strrchr.c
index 2ea676c..b968457 100644
--- a/string/test/strrchr.c
+++ b/string/test/strrchr.c
@@ -10,33 +10,35 @@
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
+#include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
-#define F(x) {#x, x},
+#define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   char *(*fun) (const char *s, int c);
+  int test_mte;
 } funtab[] = {
   // clang-format off
-  F(strrchr)
+  F(strrchr, 0)
 #if __aarch64__
-  F(__strrchr_aarch64)
-  F(__strrchr_aarch64_mte)
+  F(__strrchr_aarch64, 0)
+  F(__strrchr_aarch64_mte, 1)
 # if __ARM_FEATURE_SVE
-  F(__strrchr_aarch64_sve)
+  F(__strrchr_aarch64_sve, 1)
 # endif
 #endif
-  {0, 0}
+  {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define ALIGN 32
 #define LEN 512
-static char sbuf[LEN + 3 * ALIGN];
+static char *sbuf;
 
 static void *
 alignup (void *p)
@@ -70,7 +72,11 @@
     s[seekpos - 1] = seekchar;
   s[len] = '\0';
 
+  s = tag_buffer (s, len + 1, fun->test_mte);
   p = fun->fun (s, seekchar);
+  untag_buffer (s, len + 1, fun->test_mte);
+  p = untag_pointer (p);
+
   if (p != f)
     {
       ERR ("%s (%p, 0x%02x) len %d returned %p, expected %p pos %d\n",
@@ -78,7 +84,10 @@
       quote ("input", s, len);
     }
 
+  s = tag_buffer (s, len + 1, fun->test_mte);
   p = fun->fun (s, 0);
+  untag_buffer (s, len + 1, fun->test_mte);
+
   if (p != s + len)
     {
       ERR ("%s (%p, 0x%02x) len %d returned %p, expected %p pos %d\n",
@@ -90,6 +99,7 @@
 int
 main (void)
 {
+  sbuf = mte_mmap (LEN + 3 * ALIGN);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
@@ -102,7 +112,8 @@
 	    test (funtab + i, a, -1, n);
 	  }
 
-      printf ("%s %s\n", err_count ? "FAIL" : "PASS", funtab[i].name);
+      char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
+      printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }