Add IFUNC support for arm64 and IRELATIVE reloc

 There are number of changes in the way IFUNC related relocations are done:
 1. IRELATIVE relocations are now supported for x86/x86_64 and arm64.
 2. IFUNC relocations are now relying on static linker to generate
    them in correct order - this removes necessety of additional
    relocation pass for ifuncs.
 3. Related to 2: rela?.dyn relocations are preformed before .plt ones.
 4. Ifunc are resolved on symbol lookup this approach allowed to avoid
    mprotect(PROT_WRITE) call on r-x program segments.

Bug: 17399706
Bug: 17177284
Change-Id: I414dd3e82bd47cc03442c5dfc7c279949aec51ed
diff --git a/libc/arch-arm64/include/machine/elf_machdep.h b/libc/arch-arm64/include/machine/elf_machdep.h
index 2bf8189..6eab313 100644
--- a/libc/arch-arm64/include/machine/elf_machdep.h
+++ b/libc/arch-arm64/include/machine/elf_machdep.h
@@ -99,6 +99,7 @@
 #define R_AARCH64_RELATIVE              1027    /* Adjust by program base.  */
 #define R_AARCH64_TLS_TPREL64           1030
 #define R_AARCH64_TLS_DTPREL32          1031
+#define R_AARCH64_IRELATIVE             1032
 
 #define R_TYPE(name)        __CONCAT(R_AARCH64_,name)
 
diff --git a/libc/arch-x86/include/machine/elf_machdep.h b/libc/arch-x86/include/machine/elf_machdep.h
index 442c561..4bce933 100644
--- a/libc/arch-x86/include/machine/elf_machdep.h
+++ b/libc/arch-x86/include/machine/elf_machdep.h
@@ -59,5 +59,6 @@
 #define	R_386_TLS_GOTDESC	39
 #define	R_386_TLS_DESC_CALL	40
 #define	R_386_TLS_DESC		41
+#define	R_386_IRELATIVE		42
 
 #define	R_TYPE(name)	__CONCAT(R_386_,name)
diff --git a/libc/arch-x86_64/include/machine/elf_machdep.h b/libc/arch-x86_64/include/machine/elf_machdep.h
index 20f8c6d..bf1f273 100644
--- a/libc/arch-x86_64/include/machine/elf_machdep.h
+++ b/libc/arch-x86_64/include/machine/elf_machdep.h
@@ -46,6 +46,8 @@
 #define R_X86_64_GOTTPOFF	22
 #define R_X86_64_TPOFF32	23
 
+#define R_X86_64_IRELATIVE	37
+
 #define	R_TYPE(name)	__CONCAT(R_X86_64_,name)
 
 #else	/*	!__i386__	*/
diff --git a/linker/dlfcn.cpp b/linker/dlfcn.cpp
index 3024b3c..9801fa1 100644
--- a/linker/dlfcn.cpp
+++ b/linker/dlfcn.cpp
@@ -118,7 +118,7 @@
     unsigned bind = ELF_ST_BIND(sym->st_info);
 
     if ((bind == STB_GLOBAL || bind == STB_WEAK) && sym->st_shndx != 0) {
-      return reinterpret_cast<void*>(sym->st_value + found->load_bias);
+      return reinterpret_cast<void*>(found->resolve_symbol_address(sym));
     }
 
     __bionic_format_dlerror("symbol found but not global", symbol);
@@ -148,7 +148,7 @@
   ElfW(Sym)* sym = dladdr_find_symbol(si, addr);
   if (sym != nullptr) {
     info->dli_sname = si->strtab + sym->st_name;
-    info->dli_saddr = reinterpret_cast<void*>(si->load_bias + sym->st_value);
+    info->dli_saddr = reinterpret_cast<void*>(si->resolve_symbol_address(sym));
   }
 
   return 1;
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 793ffd5..0e5142a 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -470,32 +470,6 @@
   }
 }
 
-void soinfo::resolve_ifunc_symbols() {
-  if (!get_has_ifuncs()) {
-    return;
-  }
-
-  phdr_table_unprotect_segments(phdr, phnum, load_bias);
-
-  TRACE_TYPE(IFUNC, "CHECKING FOR IFUNCS AND PERFORMING SYMBOL UPDATES");
-
-  for (size_t i = 0; i < nchain; ++i) {
-    ElfW(Sym)* s = &symtab[i];
-    if (ELF_ST_TYPE(s->st_info) == STT_GNU_IFUNC) {
-      // The address of the ifunc in the symbol table is the address of the
-      // function that chooses the function to which the ifunc will refer.
-      // In order to return the proper value, we run the choosing function
-      // in the linker and then return its result (minus the base offset).
-      TRACE_TYPE(IFUNC, "FOUND IFUNC");
-      ElfW(Addr) (*ifunc_ptr)();
-      ifunc_ptr = reinterpret_cast<ElfW(Addr)(*)()>(s->st_value + base);
-      s->st_value = (ifunc_ptr() - base);
-      TRACE_TYPE(IFUNC, "NEW VALUE IS %p", (void*)s->st_value);
-    }
-  }
-  phdr_table_protect_segments(phdr, phnum, load_bias);
-}
-
 static unsigned elfhash(const char* _name) {
     const unsigned char* name = reinterpret_cast<const unsigned char*>(_name);
     unsigned h = 0, g;
@@ -1111,52 +1085,14 @@
   protect_data(PROT_READ);
 }
 
-// ifuncs are only defined for x86
-#if defined(__i386__)
-static void soinfo_ifunc_relocate(soinfo* si, ElfW(Rel)* rel, unsigned count) {
-  for (size_t idx = 0; idx < count; ++idx, ++rel) {
-    ElfW(Sym)* s;
-    soinfo* lsi;
-    unsigned type = ELFW(R_TYPE)(rel->r_info);
-    unsigned sym = ELFW(R_SYM)(rel->r_info);
-    ElfW(Addr) reloc = static_cast<ElfW(Addr)>(rel->r_offset + si->load_bias);
-    ElfW(Addr) sym_addr = 0;
-    const char* sym_name = nullptr;
-    sym_name = reinterpret_cast<const char*>(si->strtab + si->symtab[sym].st_name);
-    s = soinfo_do_lookup(si, sym_name, &lsi);
+static ElfW(Addr) call_ifunc_resolver(ElfW(Addr) resolver_addr) {
+  typedef ElfW(Addr) (*ifunc_resolver_t)(void);
+  ifunc_resolver_t ifunc_resolver = reinterpret_cast<ifunc_resolver_t>(resolver_addr);
+  ElfW(Addr) ifunc_addr = ifunc_resolver();
+  TRACE_TYPE(RELO, "Called ifunc_resolver@%p. The result is %p", ifunc_resolver, reinterpret_cast<void*>(ifunc_addr));
 
-    if (ELF_ST_TYPE(s->st_info) == STT_GNU_IFUNC && type == R_386_JMP_SLOT) {
-      TRACE("IFUNC RELOCATION, PASS 2: %p",  (void*)(sym_addr));
-      ElfW(Addr) (*ifunc_ptr)();
-      ifunc_ptr = reinterpret_cast<ElfW(Addr)(*)()>(s->st_value + si->base);
-      *reinterpret_cast<ElfW(Addr)*>(reloc) = ifunc_ptr();
-    }
-  }
+  return ifunc_addr;
 }
-#endif
-
-#if defined(__x86_64__)
-static void soinfo_ifunc_relocate(soinfo* si, ElfW(Rela)* rela, unsigned count) {
-  for (size_t idx = 0; idx < count; ++idx, ++rela) {
-    ElfW(Sym)* s;
-    soinfo* lsi;
-    unsigned type = ELFW(R_TYPE)(rela->r_info);
-    unsigned sym = ELFW(R_SYM)(rela->r_info);
-    ElfW(Addr) reloc = static_cast<ElfW(Addr)>(rela->r_offset + si->load_bias);
-    ElfW(Addr) sym_addr = 0;
-    const char* sym_name = nullptr;
-    sym_name = reinterpret_cast<const char*>(si->strtab + si->symtab[sym].st_name);
-    s = soinfo_do_lookup(si, sym_name, &lsi);
-
-    if (ELF_ST_TYPE(s->st_info) == STT_GNU_IFUNC && type == R_X86_64_JUMP_SLOT) {
-      TRACE("IFUNC RELOCATION, PASS 2: %p",  (void*)(sym_addr + rela->r_addend));
-      ElfW(Addr) (*ifunc_ptr)();
-      ifunc_ptr = reinterpret_cast<ElfW(Addr)(*)()>(s->st_value + si->base);
-      *reinterpret_cast<ElfW(Addr)*>(reloc) = ifunc_ptr();
-    }
-  }
-}
-#endif
 
 #if defined(USE_RELA)
 int soinfo::Relocate(ElfW(Rela)* rela, unsigned count) {
@@ -1206,6 +1142,7 @@
         case R_AARCH64_ABS32:
         case R_AARCH64_ABS16:
         case R_AARCH64_RELATIVE:
+        case R_AARCH64_IRELATIVE:
           /*
            * The sym_addr was initialized to be zero above, or the relocation
            * code below does not care about value of sym_addr.
@@ -1218,6 +1155,7 @@
         case R_X86_64_32:
         case R_X86_64_64:
         case R_X86_64_RELATIVE:
+        case R_X86_64_IRELATIVE:
           // No need to do anything.
           break;
         case R_X86_64_PC32:
@@ -1230,7 +1168,7 @@
         }
       } else {
         // We got a definition.
-        sym_addr = static_cast<ElfW(Addr)>(s->st_value + lsi->load_bias);
+        sym_addr = lsi->resolve_symbol_address(s);
       }
       count_relocation(kRelocSymbol);
     }
@@ -1342,6 +1280,13 @@
         *reinterpret_cast<ElfW(Addr)*>(reloc) = (base + rela->r_addend);
         break;
 
+    case R_AARCH64_IRELATIVE:
+      count_relocation(kRelocRelative);
+      MARK(rela->r_offset);
+      TRACE_TYPE(RELO, "RELO IRELATIVE %16llx <- %16llx\n", reloc, (base + rela->r_addend));
+      *reinterpret_cast<ElfW(Addr)*>(reloc) = call_ifunc_resolver(base + rela->r_addend);
+      break;
+
     case R_AARCH64_COPY:
         /*
          * ET_EXEC is not supported so this should not happen.
@@ -1368,11 +1313,7 @@
       MARK(rela->r_offset);
       TRACE_TYPE(RELO, "RELO JMP_SLOT %08zx <- %08zx %s", static_cast<size_t>(reloc),
                  static_cast<size_t>(sym_addr + rela->r_addend), sym_name);
-      if (ELF_ST_TYPE(s->st_info) == STT_GNU_IFUNC) {
-        set_has_ifuncs(true);
-      } else {
-        *reinterpret_cast<ElfW(Addr)*>(reloc) = sym_addr + rela->r_addend;
-      }
+      *reinterpret_cast<ElfW(Addr)*>(reloc) = sym_addr + rela->r_addend;
       break;
     case R_X86_64_GLOB_DAT:
       count_relocation(kRelocAbsolute);
@@ -1392,6 +1333,12 @@
                  static_cast<size_t>(base));
       *reinterpret_cast<ElfW(Addr)*>(reloc) = base + rela->r_addend;
       break;
+    case R_X86_64_IRELATIVE:
+      count_relocation(kRelocRelative);
+      MARK(rela->r_offset);
+      TRACE_TYPE(RELO, "RELO IRELATIVE %16llx <- %16llx\n", reloc, (base + rela->r_addend));
+      *reinterpret_cast<ElfW(Addr)*>(reloc) = call_ifunc_resolver(base + rela->r_addend);
+      break;
     case R_X86_64_32:
       count_relocation(kRelocRelative);
       MARK(rela->r_offset);
@@ -1481,6 +1428,7 @@
                 case R_386_GLOB_DAT:
                 case R_386_32:
                 case R_386_RELATIVE:    /* Don't care. */
+                case R_386_IRELATIVE:
                     // sym_addr was initialized to be zero above or relocation
                     // code below does not care about value of sym_addr.
                     // No need to do anything.
@@ -1500,7 +1448,7 @@
                 }
             } else {
                 // We got a definition.
-                sym_addr = static_cast<ElfW(Addr)>(s->st_value + lsi->load_bias);
+                sym_addr = lsi->resolve_symbol_address(s);
             }
             count_relocation(kRelocSymbol);
         }
@@ -1549,11 +1497,7 @@
             count_relocation(kRelocAbsolute);
             MARK(rel->r_offset);
             TRACE_TYPE(RELO, "RELO JMP_SLOT %08x <- %08x %s", reloc, sym_addr, sym_name);
-            if (ELF_ST_TYPE(s->st_info) == STT_GNU_IFUNC) {
-              set_has_ifuncs(true);
-            } else {
-              *reinterpret_cast<ElfW(Addr)*>(reloc) = sym_addr;
-            }
+            *reinterpret_cast<ElfW(Addr)*>(reloc) = sym_addr;
             break;
         case R_386_GLOB_DAT:
             count_relocation(kRelocAbsolute);
@@ -1614,6 +1558,14 @@
                        reinterpret_cast<void*>(reloc), reinterpret_cast<void*>(base));
             *reinterpret_cast<ElfW(Addr)*>(reloc) += base;
             break;
+#if defined(__i386__)
+        case R_386_IRELATIVE:
+          count_relocation(kRelocRelative);
+          MARK(rel->r_offset);
+          TRACE_TYPE(RELO, "RELO IRELATIVE %p <- %p", reinterpret_cast<void*>(reloc), reinterpret_cast<void*>(base));
+          *reinterpret_cast<ElfW(Addr)*>(reloc) = call_ifunc_resolver(base + *reinterpret_cast<ElfW(Addr)*>(reloc));
+          break;
+#endif
 
         default:
             DL_ERR("unknown reloc type %d @ %p (%zu)", type, rel, idx);
@@ -1671,7 +1623,7 @@
             // FIXME: is this sufficient?
             // For reference see NetBSD link loader
             // http://cvsweb.netbsd.org/bsdweb.cgi/src/libexec/ld.elf_so/arch/mips/mips_reloc.c?rev=1.53&content-type=text/x-cvsweb-markup
-            *got = reinterpret_cast<ElfW(Addr)*>(lsi->load_bias + s->st_value);
+            *got = reinterpret_cast<ElfW(Addr)*>(lsi->resolve_symbol_address(s));
         }
     }
     return true;
@@ -1749,8 +1701,6 @@
   // DT_INIT should be called before DT_INIT_ARRAY if both are present.
   CallFunction("DT_INIT", init_func);
   CallArray("DT_INIT_ARRAY", init_array, init_array_count, false);
-
-  resolve_ifunc_symbols();
 }
 
 void soinfo::CallDestructors() {
@@ -1812,12 +1762,6 @@
   }
 }
 
-void soinfo::set_has_ifuncs(bool ifuncs) {
-  if (has_min_version(1)) {
-    has_ifuncs = ifuncs;
-  }
-}
-
 dev_t soinfo::get_st_dev() {
   if (has_min_version(0)) {
     return st_dev;
@@ -1834,14 +1778,6 @@
   return 0;
 }
 
-bool soinfo::get_has_ifuncs() {
-  if (has_min_version(1)) {
-    return has_ifuncs;
-  }
-
-  return false;
-}
-
 // This is a return on get_children()/get_parents() if
 // 'this->flags' does not have FLAG_NEW_SOINFO set.
 static soinfo::soinfo_list_t g_empty_list;
@@ -1862,6 +1798,14 @@
   return this->parents;
 }
 
+ElfW(Addr) soinfo::resolve_symbol_address(ElfW(Sym)* s) {
+  if (ELF_ST_TYPE(s->st_info) == STT_GNU_IFUNC) {
+    return call_ifunc_resolver(s->st_value + load_bias);
+  }
+
+  return static_cast<ElfW(Addr)>(s->st_value + load_bias);
+}
+
 /* Force any of the closed stdin, stdout and stderr to be associated with
    /dev/null. */
 static int nullify_closed_stdio() {
@@ -2168,44 +2112,32 @@
 #endif
 
 #if defined(USE_RELA)
-    if (plt_rela != nullptr) {
-        DEBUG("[ relocating %s plt ]\n", name);
-        if (Relocate(plt_rela, plt_rela_count)) {
-            return false;
-        }
-    }
     if (rela != nullptr) {
-        DEBUG("[ relocating %s ]\n", name);
+        DEBUG("[ relocating %s ]", name);
         if (Relocate(rela, rela_count)) {
             return false;
         }
     }
-#else
-    if (plt_rel != nullptr) {
+    if (plt_rela != nullptr) {
         DEBUG("[ relocating %s plt ]", name);
-        if (Relocate(plt_rel, plt_rel_count)) {
+        if (Relocate(plt_rela, plt_rela_count)) {
             return false;
         }
     }
+#else
     if (rel != nullptr) {
         DEBUG("[ relocating %s ]", name);
         if (Relocate(rel, rel_count)) {
             return false;
         }
     }
-#endif
-
-    // if there are ifuncs, we need to do an additional relocation pass.
-    // they cannot be resolved until the rest of the relocations are done
-    // because we need to call the resolution function which may be waiting
-    // on relocations.
-    if(get_has_ifuncs()) {
-#if defined(__i386__)
-      soinfo_ifunc_relocate(this, plt_rel, plt_rel_count);
-#elif defined(__x86_64__)
-      soinfo_ifunc_relocate(this, plt_rela, plt_rela_count);
-#endif
+    if (plt_rel != nullptr) {
+        DEBUG("[ relocating %s plt ]", name);
+        if (Relocate(plt_rel, plt_rel_count)) {
+            return false;
+        }
     }
+#endif
 
 #if defined(__mips__)
     if (!mips_relocate_got(this)) {
diff --git a/linker/linker.h b/linker/linker.h
index 3024d3a..37d513e 100644
--- a/linker/linker.h
+++ b/linker/linker.h
@@ -197,6 +197,8 @@
 #if !defined(__LP64__)
   bool has_text_relocations;
 #endif
+  // TODO: remove this flag, dynamic linker
+  // should not use it in any way.
   bool has_DT_SYMBOLIC;
 
   soinfo(const char* name, const struct stat* file_stat);
@@ -212,21 +214,20 @@
 
   void set_st_dev(dev_t st_dev);
   void set_st_ino(ino_t st_ino);
-  void set_has_ifuncs(bool ifunc);
   ino_t get_st_ino();
   dev_t get_st_dev();
-  bool get_has_ifuncs();
 
   soinfo_list_t& get_children();
   soinfo_list_t& get_parents();
 
+  ElfW(Addr) resolve_symbol_address(ElfW(Sym)* s);
+
   bool inline has_min_version(uint32_t min_version) {
     return (flags & FLAG_NEW_SOINFO) != 0 && version >= min_version;
   }
  private:
   void CallArray(const char* array_name, linker_function_t* functions, size_t count, bool reverse);
   void CallFunction(const char* function_name, linker_function_t function);
-  void resolve_ifunc_symbols();
 #if defined(USE_RELA)
   int Relocate(ElfW(Rela)* rela, unsigned count);
 #else
@@ -247,7 +248,6 @@
   soinfo_list_t parents;
 
   // version >= 1
-  bool has_ifuncs;
 };
 
 extern soinfo* get_libdl_info();
diff --git a/tests/Android.build.mk b/tests/Android.build.mk
index d4b0396..d54c851 100644
--- a/tests/Android.build.mk
+++ b/tests/Android.build.mk
@@ -37,6 +37,10 @@
 
 LOCAL_FORCE_STATIC_EXECUTABLE := $($(module)_force_static_executable)
 
+ifneq ($($(module)_multilib),)
+    LOCAL_MULTILIB := $($(module)_multilib)
+endif
+
 LOCAL_CFLAGS := \
     $(common_cflags) \
     $($(module)_cflags) \
diff --git a/tests/dlfcn_test.cpp b/tests/dlfcn_test.cpp
index 3568f8f..4ba19c1 100644
--- a/tests/dlfcn_test.cpp
+++ b/tests/dlfcn_test.cpp
@@ -89,8 +89,8 @@
   ASSERT_EQ(0, dlclose(handle2));
 }
 
-// ifuncs are only supported on intel for now
-#if defined(__i386__) || defined(__x86_64__)
+// ifuncs are only supported on intel and arm64 for now
+#if defined (__aarch64__) || defined(__i386__) || defined(__x86_64__)
 TEST(dlfcn, ifunc) {
   typedef const char* (*fn_ptr)();
 
@@ -124,9 +124,13 @@
   typedef const char* (*fn_ptr)();
 
   void* handle = dlopen("libtest_ifunc.so", RTLD_NOW);
-  ASSERT_TRUE(handle != NULL) << dlerror();
-  fn_ptr is_ctor_called =  reinterpret_cast<fn_ptr>(dlsym(handle, "is_ctor_called"));
-  ASSERT_TRUE(is_ctor_called != NULL) << dlerror();
+  ASSERT_TRUE(handle != nullptr) << dlerror();
+  fn_ptr is_ctor_called =  reinterpret_cast<fn_ptr>(dlsym(handle, "is_ctor_called_irelative"));
+  ASSERT_TRUE(is_ctor_called != nullptr) << dlerror();
+  ASSERT_STREQ("false", is_ctor_called());
+
+  is_ctor_called =  reinterpret_cast<fn_ptr>(dlsym(handle, "is_ctor_called_jump_slot"));
+  ASSERT_TRUE(is_ctor_called != nullptr) << dlerror();
   ASSERT_STREQ("true", is_ctor_called());
   dlclose(handle);
 }
diff --git a/tests/libs/Android.mk b/tests/libs/Android.mk
index be6565b..a675a4f 100644
--- a/tests/libs/Android.mk
+++ b/tests/libs/Android.mk
@@ -295,7 +295,7 @@
 # -----------------------------------------------------------------------------
 # Library used by ifunc tests
 # -----------------------------------------------------------------------------
-ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),x86 x86_64))
+ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),arm64 x86 x86_64))
     libtest_ifunc_src_files := \
         dlopen_testlib_ifunc.c
 
@@ -303,6 +303,14 @@
     module := libtest_ifunc
     build_type := target
     build_target := SHARED_LIBRARY
+
+    ifeq ($(TARGET_ARCH),arm64)
+      libtest_ifunc_multilib := 64
+      # TODO: This is a workaround - remove it once gcc
+      # removes its Android ifunc checks
+      libtest_ifunc_cflags := -mglibc
+    endif
+
     include $(TEST_PATH)/Android.build.mk
 endif
 
diff --git a/tests/libs/dlopen_testlib_ifunc.c b/tests/libs/dlopen_testlib_ifunc.c
index 4874841..b68a3dd 100644
--- a/tests/libs/dlopen_testlib_ifunc.c
+++ b/tests/libs/dlopen_testlib_ifunc.c
@@ -23,8 +23,17 @@
   g_flag = 1;
 }
 
+static const char* is_ctor_called() __attribute__ ((ifunc("is_ctor_called_ifun")));
+
 const char* foo() __attribute__ ((ifunc ("foo_ifunc")));
-const char* is_ctor_called() __attribute__ ((ifunc("is_ctor_called_ifun")));
+
+// Static linker creates GLOBAL/IFUNC symbol and JUMP_SLOT relocation type for plt segment
+const char* is_ctor_called_jump_slot() __attribute__ ((ifunc("is_ctor_called_ifun")));
+
+const char* is_ctor_called_irelative() {
+  // Call internal ifunc-resolved function with IRELATIVE reloc
+  return is_ctor_called();
+}
 
 const char* return_true() {
   return "true";