init: Add support for ambient capabilities.

Ambient capabilities are inherited in a straightforward way across
execve(2):

"
If you are nonroot but you have a capability, you can add it to pA.
If you do so, your children get that capability in pA, pP, and pE.
For example, you can set pA = CAP_NET_BIND_SERVICE, and your
children can automatically bind low-numbered ports.
"

This will allow us to get rid of the special meaning for AID_NET_ADMIN
and AID_NET_RAW, and if desired, to reduce the use of file capabilities
(which grant capabilities to any process that can execute the file). An
additional benefit of the latter is that a single .rc file can specify
all properties for a service, without having to rely on a separate file
for file capabilities.

Ambient capabilities are supported starting with kernel 4.3 and have
been backported to all Android common kernels back to 3.10.

I chose to not use Minijail here (though I'm still using libcap) for
two reasons:

1-The Minijail code is designed to work in situations where the process
is holding any set of capabilities, so it's more complex. The situation
when forking from init allows for simpler code.

2-The way Minijail is structured right now, we would not be able to
make the required SELinux calls between UID/GID dropping and other priv
dropping code. In the future, it will make sense to add some sort of
"hook" to Minijail so that it can be used in situations where we want
to do other operations between some of the privilege-dropping
operations carried out by Minijail.

Bug: 32438163
Test: Use sample service.
Change-Id: I3226cc95769d1beacbae619cb6c6e6a5425890fb
diff --git a/init/Android.mk b/init/Android.mk
index 1be064c..4bfd743 100644
--- a/init/Android.mk
+++ b/init/Android.mk
@@ -45,6 +45,7 @@
 LOCAL_CPPFLAGS := $(init_cflags)
 LOCAL_SRC_FILES:= \
     action.cpp \
+    capabilities.cpp \
     import_parser.cpp \
     init_parser.cpp \
     log.cpp \
@@ -53,6 +54,7 @@
     util.cpp \
 
 LOCAL_STATIC_LIBRARIES := libbase libselinux liblog libprocessgroup
+LOCAL_WHOLE_STATIC_LIBRARIES := libcap
 LOCAL_MODULE := libinit
 LOCAL_SANITIZE := integer
 LOCAL_CLANG := true
@@ -102,7 +104,7 @@
     libz \
     libprocessgroup
 
-# Create symlinks
+# Create symlinks.
 LOCAL_POST_INSTALL_CMD := $(hide) mkdir -p $(TARGET_ROOT_OUT)/sbin; \
     ln -sf ../init $(TARGET_ROOT_OUT)/sbin/ueventd; \
     ln -sf ../init $(TARGET_ROOT_OUT)/sbin/watchdogd
@@ -112,8 +114,8 @@
 include $(BUILD_EXECUTABLE)
 
 
-
-
+# Unit tests.
+# =========================================================
 include $(CLEAR_VARS)
 LOCAL_MODULE := init_tests
 LOCAL_SRC_FILES := \
diff --git a/init/capabilities.cpp b/init/capabilities.cpp
new file mode 100644
index 0000000..4592adc
--- /dev/null
+++ b/init/capabilities.cpp
@@ -0,0 +1,166 @@
+// Copyright (C) 2016 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "capabilities.h"
+
+#include <sys/capability.h>
+#include <sys/prctl.h>
+
+#include <map>
+#include <memory>
+
+#include <android-base/logging.h>
+#include <android-base/macros.h>
+
+#define CAP_MAP_ENTRY(cap) { #cap, CAP_##cap }
+
+namespace {
+const std::map<std::string, int> cap_map = {
+    CAP_MAP_ENTRY(CHOWN),
+    CAP_MAP_ENTRY(DAC_OVERRIDE),
+    CAP_MAP_ENTRY(DAC_READ_SEARCH),
+    CAP_MAP_ENTRY(FOWNER),
+    CAP_MAP_ENTRY(FSETID),
+    CAP_MAP_ENTRY(KILL),
+    CAP_MAP_ENTRY(SETGID),
+    CAP_MAP_ENTRY(SETUID),
+    CAP_MAP_ENTRY(SETPCAP),
+    CAP_MAP_ENTRY(LINUX_IMMUTABLE),
+    CAP_MAP_ENTRY(NET_BIND_SERVICE),
+    CAP_MAP_ENTRY(NET_BROADCAST),
+    CAP_MAP_ENTRY(NET_ADMIN),
+    CAP_MAP_ENTRY(NET_RAW),
+    CAP_MAP_ENTRY(IPC_LOCK),
+    CAP_MAP_ENTRY(IPC_OWNER),
+    CAP_MAP_ENTRY(SYS_MODULE),
+    CAP_MAP_ENTRY(SYS_RAWIO),
+    CAP_MAP_ENTRY(SYS_CHROOT),
+    CAP_MAP_ENTRY(SYS_PTRACE),
+    CAP_MAP_ENTRY(SYS_PACCT),
+    CAP_MAP_ENTRY(SYS_ADMIN),
+    CAP_MAP_ENTRY(SYS_BOOT),
+    CAP_MAP_ENTRY(SYS_NICE),
+    CAP_MAP_ENTRY(SYS_RESOURCE),
+    CAP_MAP_ENTRY(SYS_TIME),
+    CAP_MAP_ENTRY(SYS_TTY_CONFIG),
+    CAP_MAP_ENTRY(MKNOD),
+    CAP_MAP_ENTRY(LEASE),
+    CAP_MAP_ENTRY(AUDIT_WRITE),
+    CAP_MAP_ENTRY(AUDIT_CONTROL),
+    CAP_MAP_ENTRY(SETFCAP),
+    CAP_MAP_ENTRY(MAC_OVERRIDE),
+    CAP_MAP_ENTRY(MAC_ADMIN),
+    CAP_MAP_ENTRY(SYSLOG),
+    CAP_MAP_ENTRY(WAKE_ALARM),
+    CAP_MAP_ENTRY(BLOCK_SUSPEND),
+    CAP_MAP_ENTRY(AUDIT_READ),
+};
+
+static_assert(CAP_LAST_CAP == CAP_AUDIT_READ, "CAP_LAST_CAP is not CAP_AUDIT_READ");
+
+bool DropBoundingSet(const CapSet& to_keep) {
+    for (size_t cap = 0; cap < to_keep.size(); ++cap) {
+        if (to_keep.test(cap)) {
+            // No need to drop this capability.
+            continue;
+        }
+        if (cap_drop_bound(cap) == -1) {
+            PLOG(ERROR) << "cap_drop_bound(" << cap << ") failed";
+            return false;
+        }
+    }
+    return true;
+}
+
+bool SetProcCaps(const CapSet& to_keep, bool add_setpcap) {
+    cap_t caps = cap_init();
+    auto deleter = [](cap_t* p) { cap_free(*p); };
+    std::unique_ptr<cap_t, decltype(deleter)> ptr_caps(&caps, deleter);
+
+    cap_clear(caps);
+    cap_value_t value[1];
+    for (size_t cap = 0; cap <= to_keep.size(); ++cap) {
+        if (to_keep.test(cap)) {
+            value[0] = cap;
+            if (cap_set_flag(caps, CAP_INHERITABLE, arraysize(value), value, CAP_SET) != 0 ||
+                cap_set_flag(caps, CAP_PERMITTED, arraysize(value), value, CAP_SET) != 0) {
+                PLOG(ERROR) << "cap_set_flag(INHERITABLE|PERMITTED, " << cap << ") failed";
+                return false;
+            }
+        }
+    }
+
+    if (add_setpcap) {
+        value[0] = CAP_SETPCAP;
+        if (cap_set_flag(caps, CAP_PERMITTED, arraysize(value), value, CAP_SET) != 0 ||
+            cap_set_flag(caps, CAP_EFFECTIVE, arraysize(value), value, CAP_SET) != 0) {
+            PLOG(ERROR) << "cap_set_flag(PERMITTED|EFFECTIVE, " << CAP_SETPCAP << ") failed";
+            return false;
+        }
+    }
+
+    if (cap_set_proc(caps) != 0) {
+        PLOG(ERROR) << "cap_set_proc(" << to_keep.to_ulong() << ") failed";
+        return false;
+    }
+    return true;
+}
+
+bool SetAmbientCaps(const CapSet& to_raise) {
+    for (size_t cap = 0; cap < to_raise.size(); ++cap) {
+        if (to_raise.test(cap)) {
+            if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0) != 0) {
+                PLOG(ERROR) << "prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, " << cap << ") failed";
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+}  // namespace anonymous
+
+int LookupCap(const std::string& cap_name) {
+    auto e = cap_map.find(cap_name);
+    if (e != cap_map.end()) {
+        return e->second;
+    } else {
+        return -1;
+    }
+}
+
+bool SetCapsForExec(const CapSet& to_keep) {
+    // Need to keep SETPCAP to drop bounding set below.
+    bool add_setpcap = true;
+    if (!SetProcCaps(to_keep, add_setpcap)) {
+        LOG(ERROR) << "failed to apply initial capset";
+        return false;
+    }
+
+    if (!DropBoundingSet(to_keep)) {
+        return false;
+    }
+
+    // If SETPCAP wasn't specifically requested, drop it now.
+    add_setpcap = false;
+    if (!SetProcCaps(to_keep, add_setpcap)) {
+        LOG(ERROR) << "failed to apply final capset";
+        return false;
+    }
+
+    // Add the capabilities to the ambient set so that they are preserved across
+    // execve(2).
+    // See http://man7.org/linux/man-pages/man7/capabilities.7.html.
+    return SetAmbientCaps(to_keep);
+}
diff --git a/init/capabilities.h b/init/capabilities.h
new file mode 100644
index 0000000..368178d
--- /dev/null
+++ b/init/capabilities.h
@@ -0,0 +1,23 @@
+// Copyright (C) 2016 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <linux/capability.h>
+
+#include <bitset>
+#include <string>
+
+using CapSet = std::bitset<CAP_LAST_CAP + 1>;
+
+int LookupCap(const std::string& cap_name);
+bool SetCapsForExec(const CapSet& to_keep);
diff --git a/init/readme.txt b/init/readme.txt
index 26225b2..e7e369c 100644
--- a/init/readme.txt
+++ b/init/readme.txt
@@ -149,25 +149,33 @@
   computed based on the service executable file security context.
 
 user <username>
-  Change to username before exec'ing this service.
+  Change to 'username' before exec'ing this service.
   Currently defaults to root.  (??? probably should default to nobody)
   As of Android M, processes should use this option even if they
-  require linux capabilities.  Previously, to acquire linux
+  require Linux capabilities.  Previously, to acquire Linux
   capabilities, a process would need to run as root, request the
   capabilities, then drop to its desired uid.  There is a new
   mechanism through fs_config that allows device manufacturers to add
-  linux capabilities to specific binaries on a file system that should
+  Linux capabilities to specific binaries on a file system that should
   be used instead. This mechanism is described on
   http://source.android.com/devices/tech/config/filesystem.html.  When
   using this new mechanism, processes can use the user option to
   select their desired uid without ever running as root.
+  As of Android O, processes can also request capabilities directly in their .rc
+  files. See the "capabilities" option below.
 
 group <groupname> [ <groupname> ]*
-  Change to groupname before exec'ing this service.  Additional
+  Change to 'groupname' before exec'ing this service.  Additional
   groupnames beyond the (required) first one are used to set the
   supplemental groups of the process (via setgroups()).
   Currently defaults to root.  (??? probably should default to nobody)
 
+capabilities <capability> [ <capability> ]*
+  Set capabilities when exec'ing this service. 'capability' should be a Linux
+  capability without the "CAP_" prefix, like "NET_ADMIN" or "SETPCAP". See
+  http://man7.org/linux/man-pages/man7/capabilities.7.html for a list of Linux
+  capabilities.
+
 seclabel <seclabel>
   Change to 'seclabel' before exec'ing this service.
   Primarily for use by services run from the rootfs, e.g. ueventd, adbd.
diff --git a/init/service.cpp b/init/service.cpp
index 92f1615..e052b45 100644
--- a/init/service.cpp
+++ b/init/service.cpp
@@ -17,6 +17,7 @@
 #include "service.h"
 
 #include <fcntl.h>
+#include <linux/securebits.h>
 #include <sched.h>
 #include <sys/mount.h>
 #include <sys/prctl.h>
@@ -171,14 +172,16 @@
 
 Service::Service(const std::string& name, const std::string& classname,
                  unsigned flags, uid_t uid, gid_t gid,
-                 const std::vector<gid_t>& supp_gids, unsigned namespace_flags,
+                 const std::vector<gid_t>& supp_gids,
+                 const CapSet& capabilities, unsigned namespace_flags,
                  const std::string& seclabel,
                  const std::vector<std::string>& args)
     : name_(name), classname_(classname), flags_(flags), pid_(0),
       time_started_(0), time_crashed_(0), nr_crashed_(0), uid_(uid), gid_(gid),
-      supp_gids_(supp_gids), namespace_flags_(namespace_flags),
-      seclabel_(seclabel), ioprio_class_(IoSchedClass_NONE), ioprio_pri_(0),
-      priority_(0), oom_score_adjust_(-1000), args_(args) {
+      supp_gids_(supp_gids), capabilities_(capabilities),
+      namespace_flags_(namespace_flags), seclabel_(seclabel),
+      ioprio_class_(IoSchedClass_NONE), ioprio_pri_(0), priority_(0),
+      oom_score_adjust_(-1000), args_(args) {
     onrestart_.InitSingleTrigger("onrestart");
 }
 
@@ -225,6 +228,13 @@
 }
 
 void Service::SetProcessAttributes() {
+    // Keep capabilites on uid change.
+    if (capabilities_.any() && uid_) {
+        if (prctl(PR_SET_SECUREBITS, SECBIT_KEEP_CAPS | SECBIT_KEEP_CAPS_LOCKED) != 0) {
+            PLOG(FATAL) << "prtcl(PR_SET_KEEPCAPS) failed for " << name_;
+        }
+    }
+
     // TODO: work out why this fails for `console` then upgrade to FATAL.
     if (setpgid(0, getpid()) == -1) PLOG(ERROR) << "setpgid failed for " << name_;
 
@@ -251,6 +261,11 @@
             PLOG(FATAL) << "setpriority failed for " << name_;
         }
     }
+    if (capabilities_.any()) {
+        if (!SetCapsForExec(capabilities_)) {
+            LOG(FATAL) << "cannot set capabilities for " << name_;
+        }
+    }
 }
 
 bool Service::Reap() {
@@ -320,6 +335,21 @@
     }
 }
 
+bool Service::ParseCapabilities(const std::vector<std::string>& args, std::string* err) {
+    capabilities_ = 0;
+
+    for (size_t i = 1; i < args.size(); i++) {
+        const std::string& arg = args[i];
+        int cap = LookupCap(arg);
+        if (cap == -1) {
+            *err = StringPrintf("invalid capability '%s'", arg.c_str());
+            return false;
+        }
+        capabilities_[cap] = true;
+    }
+    return true;
+}
+
 bool Service::ParseClass(const std::vector<std::string>& args, std::string* err) {
     classname_ = args[1];
     return true;
@@ -476,6 +506,8 @@
 Service::OptionParserMap::Map& Service::OptionParserMap::map() const {
     constexpr std::size_t kMax = std::numeric_limits<std::size_t>::max();
     static const Map option_parsers = {
+        {"capabilities",
+                        {1,     kMax, &Service::ParseCapabilities}},
         {"class",       {1,     1,    &Service::ParseClass}},
         {"console",     {0,     1,    &Service::ParseConsole}},
         {"critical",    {0,     0,    &Service::ParseCritical}},
@@ -807,6 +839,7 @@
     exec_count_++;
     std::string name = StringPrintf("exec %d (%s)", exec_count_, str_args[0].c_str());
     unsigned flags = SVC_EXEC | SVC_ONESHOT;
+    CapSet no_capabilities;
     unsigned namespace_flags = 0;
 
     std::string seclabel = "";
@@ -827,9 +860,9 @@
         }
     }
 
-    std::unique_ptr<Service> svc_p(new Service(name, "default", flags, uid, gid,
-                                               supp_gids, namespace_flags,
-                                               seclabel, str_args));
+    std::unique_ptr<Service> svc_p(new Service(name, "default", flags, uid, gid, supp_gids,
+                                               no_capabilities, namespace_flags, seclabel,
+                                               str_args));
     if (!svc_p) {
         LOG(ERROR) << "Couldn't allocate service for exec of '" << str_args[0] << "'";
         return nullptr;
diff --git a/init/service.h b/init/service.h
index 4a3412c..7f035ef 100644
--- a/init/service.h
+++ b/init/service.h
@@ -26,6 +26,7 @@
 #include <vector>
 
 #include "action.h"
+#include "capabilities.h"
 #include "init_parser.h"
 #include "keyword_map.h"
 
@@ -73,8 +74,9 @@
 
     Service(const std::string& name, const std::string& classname,
             unsigned flags, uid_t uid, gid_t gid,
-            const std::vector<gid_t>& supp_gids, unsigned namespace_flags,
-            const std::string& seclabel, const std::vector<std::string>& args);
+            const std::vector<gid_t>& supp_gids, const CapSet& capabilities,
+            unsigned namespace_flags, const std::string& seclabel,
+            const std::vector<std::string>& args);
 
     bool ParseLine(const std::vector<std::string>& args, std::string* err);
     bool Start();
@@ -116,6 +118,7 @@
     void CreateSockets(const std::string& scon);
     void SetProcessAttributes();
 
+    bool ParseCapabilities(const std::vector<std::string>& args, std::string *err);
     bool ParseClass(const std::vector<std::string>& args, std::string* err);
     bool ParseConsole(const std::vector<std::string>& args, std::string* err);
     bool ParseCritical(const std::vector<std::string>& args, std::string* err);
@@ -147,6 +150,7 @@
     uid_t uid_;
     gid_t gid_;
     std::vector<gid_t> supp_gids_;
+    CapSet capabilities_;
     unsigned namespace_flags_;
 
     std::string seclabel_;