Add taskstats reporting tool

Add a tool for querying and printing out the kernel's taskstats
structure. This structure contains various profiling information about a
given process or thread group.

Sample output:

    Basic task statistics
    ---------------------
    Stats version:           8
    Exit code:               0
    Flags:                   0x2
    Nice value:              0
    Command name:            init
    Scheduling discipline:   0
    UID:                     0
    GID:                     0
    PID:                     1
    PPID:                    0
    Begin time:              Fri Sep 20 17:06:14 2013
    Elapsed time:            611384577 usec
    User CPU time:           980000 usec
    Minor page faults:       515
    Major page faults:       0
    Scaled user time:        980000 usec
    Scaled system time:      2590000 usec

    Delay accounting
    ----------------
                     Count     Delay (ms)  Average delay     Real delay    Scaled real  Virtual delay
    CPU               1101         90.615          0.082       3570.000       3570.000       3620.204
    IO                  45         50.155          1.115
    Swap                 0          0.000          0.000
    Reclaim              0          0.000          0.000

    Extended accounting fields
    --------------------------
    Average RSS usage:       0.348 MB
    Average VM usage:        0.727 MB
    RSS high water mark:     548 KB
    VM high water mark:      688 KB
    IO bytes read:           330752
    IO bytes written:        979968
    IO read syscalls:        17408
    IO write syscalls:       0

    Per-task/thread statistics
    --------------------------
    Voluntary switches:      768
    Involuntary switches:    333

Change-Id: I4dab68d98de5db0f9112bec014872a067684f799
diff --git a/taskstats/taskstats.c b/taskstats/taskstats.c
new file mode 100644
index 0000000..66cc0c5
--- /dev/null
+++ b/taskstats/taskstats.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Linux task stats reporting tool. Queries and prints out the kernel's
+ * taskstats structure for a given process or thread group id. See
+ * https://www.kernel.org/doc/Documentation/accounting/ for more information
+ * about the reported fields.
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <netlink-types.h>
+#include <netlink/attr.h>
+#include <netlink/genl/genl.h>
+#include <netlink/handlers.h>
+#include <netlink/msg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/taskstats.h>
+
+struct TaskStatistics {
+    int pid;
+    int tgid;
+    struct taskstats stats;
+};
+
+int send_command(struct nl_sock* netlink_socket, uint16_t nlmsg_type,
+                 uint32_t nlmsg_pid, uint8_t genl_cmd, uint16_t nla_type,
+                 void* nla_data, int nla_len) {
+    struct nl_msg* message = nlmsg_alloc();
+    int seq = 0;
+    int version = 1;
+    int header_length = 0;
+    int flags = NLM_F_REQUEST;
+    genlmsg_put(message, nlmsg_pid, seq, nlmsg_type, header_length, flags,
+                genl_cmd, version);
+    nla_put(message, nla_type, nla_len, nla_data);
+
+    /* Override the header flags since we don't want NLM_F_ACK. */
+    struct nlmsghdr* header = nlmsg_hdr(message);
+    header->nlmsg_flags = flags;
+
+    int result = nl_send(netlink_socket, message);
+    nlmsg_free(message);
+    return result;
+}
+
+int print_receive_error(struct sockaddr_nl* address, struct nlmsgerr* error,
+                        void* arg) {
+    fprintf(stderr, "Netlink receive error: %s\n", strerror(-error->error));
+    return NL_STOP;
+}
+
+int parse_family_id(struct nl_msg* msg, void* arg) {
+    struct genlmsghdr* gnlh = (struct genlmsghdr*)nlmsg_data(nlmsg_hdr(msg));
+    struct nlattr* attr = genlmsg_attrdata(gnlh, 0);
+    int remaining = genlmsg_attrlen(gnlh, 0);
+
+    do {
+        if (attr->nla_type == CTRL_ATTR_FAMILY_ID) {
+            *((int*)arg) = nla_get_u16(attr);
+            return NL_STOP;
+        }
+    } while ((attr = nla_next(attr, &remaining)));
+    return NL_OK;
+}
+
+int get_family_id(struct nl_sock* netlink_socket, const char* name) {
+    if (send_command(netlink_socket, GENL_ID_CTRL, getpid(),
+                     CTRL_CMD_GETFAMILY,
+                     CTRL_ATTR_FAMILY_NAME,
+                     (void*)name, strlen(name) + 1) < 0) {
+        return 0;
+    }
+
+    int family_id = 0;
+    struct nl_cb* callbacks = nl_cb_get(nl_cb_alloc(NL_CB_VALID));
+    nl_cb_set(callbacks, NL_CB_VALID, NL_CB_DEFAULT, &parse_family_id,
+              &family_id);
+    nl_cb_err(callbacks, NL_CB_DEFAULT, &print_receive_error, NULL);
+
+    if (nl_recvmsgs(netlink_socket, callbacks) < 0) {
+        return 0;
+    }
+    nl_cb_put(callbacks);
+    return family_id;
+}
+
+void parse_aggregate_task_stats(struct nlattr* attr, int attr_size,
+                                struct TaskStatistics* stats) {
+    do {
+        switch (attr->nla_type) {
+            case TASKSTATS_TYPE_PID:
+                stats->pid = nla_get_u32(attr);
+                break;
+            case TASKSTATS_TYPE_TGID:
+                stats->tgid = nla_get_u32(attr);
+                break;
+            case TASKSTATS_TYPE_STATS:
+                nla_memcpy(&stats->stats, attr, sizeof(stats->stats));
+                break;
+            default:
+                break;
+        }
+    } while ((attr = nla_next(attr, &attr_size)));
+}
+
+int parse_task_stats(struct nl_msg* msg, void* arg) {
+    struct TaskStatistics* stats = (struct TaskStatistics*)arg;
+    struct genlmsghdr* gnlh = (struct genlmsghdr*)nlmsg_data(nlmsg_hdr(msg));
+    struct nlattr* attr = genlmsg_attrdata(gnlh, 0);
+    int remaining = genlmsg_attrlen(gnlh, 0);
+
+    do {
+        switch (attr->nla_type) {
+            case TASKSTATS_TYPE_AGGR_PID:
+            case TASKSTATS_TYPE_AGGR_TGID:
+                parse_aggregate_task_stats(nla_data(attr), nla_len(attr),
+                                           stats);
+                break;
+            default:
+                break;
+        }
+    } while ((attr = nla_next(attr, &remaining)));
+    return NL_STOP;
+}
+
+int query_task_stats(struct nl_sock* netlink_socket, int family_id,
+                     int command_type, int parameter,
+                     struct TaskStatistics* stats) {
+    memset(stats, 0, sizeof(*stats));
+    int result = send_command(netlink_socket, family_id, getpid(),
+                              TASKSTATS_CMD_GET, command_type, &parameter,
+                              sizeof(parameter));
+    if (result < 0) {
+        return result;
+    }
+
+    struct nl_cb* callbacks = nl_cb_get(nl_cb_alloc(NL_CB_VALID));
+    nl_cb_set(callbacks, NL_CB_VALID, NL_CB_DEFAULT, &parse_task_stats, stats);
+    nl_cb_err(callbacks, NL_CB_DEFAULT, &print_receive_error, &family_id);
+
+    result = nl_recvmsgs(netlink_socket, callbacks);
+    if (result < 0) {
+        return result;
+    }
+    nl_cb_put(callbacks);
+    return stats->pid || stats->tgid;
+}
+
+double average_ms(uint64_t total, uint64_t count) {
+    if (!count) {
+        return 0;
+    }
+    return ((double)total) / count / 1e6;
+}
+
+uint64_t average_ns(uint64_t total, uint64_t count) {
+    if (!count) {
+        return 0;
+    }
+    return total / count;
+}
+
+void print_task_stats(const struct TaskStatistics* stats,
+                      int human_readable) {
+    const struct taskstats* s = &stats->stats;
+    printf("Basic task statistics\n");
+    printf("---------------------\n");
+    printf("%-25s%d\n", "Stats version:", s->version);
+    printf("%-25s%d\n", "Exit code:", s->ac_exitcode);
+    printf("%-25s0x%x\n", "Flags:", s->ac_flag);
+    printf("%-25s%d\n", "Nice value:", s->ac_nice);
+    printf("%-25s%s\n", "Command name:", s->ac_comm);
+    printf("%-25s%d\n", "Scheduling discipline:", s->ac_sched);
+    printf("%-25s%d\n", "UID:", s->ac_uid);
+    printf("%-25s%d\n", "GID:", s->ac_gid);
+    printf("%-25s%d\n", "PID:", s->ac_pid);
+    printf("%-25s%d\n", "PPID:", s->ac_ppid);
+
+    if (human_readable) {
+        time_t begin_time = s->ac_btime;
+        printf("%-25s%s", "Begin time:", ctime(&begin_time));
+    } else {
+        printf("%-25s%d sec\n", "Begin time:", s->ac_btime);
+    }
+    printf("%-25s%llu usec\n", "Elapsed time:", s->ac_etime);
+    printf("%-25s%llu usec\n", "User CPU time:", s->ac_utime);
+    printf("%-25s%llu\n", "Minor page faults:", s->ac_minflt);
+    printf("%-25s%llu\n", "Major page faults:", s->ac_majflt);
+    printf("%-25s%llu usec\n", "Scaled user time:", s->ac_utimescaled);
+    printf("%-25s%llu usec\n", "Scaled system time:", s->ac_stimescaled);
+
+    printf("\nDelay accounting\n");
+    printf("----------------\n");
+    printf("       %15s%15s%15s%15s%15s%15s\n",
+           "Count",
+           human_readable ? "Delay (ms)" : "Delay (ns)",
+           "Average delay",
+           "Real delay",
+           "Scaled real",
+           "Virtual delay");
+
+    if (!human_readable) {
+        printf("CPU    %15llu%15llu%15llu%15llu%15llu%15llu\n",
+               s->cpu_count,
+               s->cpu_delay_total,
+               average_ns(s->cpu_delay_total, s->cpu_count),
+               s->cpu_run_real_total,
+               s->cpu_scaled_run_real_total,
+               s->cpu_run_virtual_total);
+        printf("IO     %15llu%15llu%15llu\n",
+               s->blkio_count,
+               s->blkio_delay_total,
+               average_ns(s->blkio_delay_total, s->blkio_count));
+        printf("Swap   %15llu%15llu%15llu\n",
+               s->swapin_count,
+               s->swapin_delay_total,
+               average_ns(s->swapin_delay_total, s->swapin_count));
+        printf("Reclaim%15llu%15llu%15llu\n",
+               s->freepages_count,
+               s->freepages_delay_total,
+               average_ns(s->freepages_delay_total, s->freepages_count));
+    } else {
+        const double ms_per_ns = 1e6;
+        printf("CPU    %15llu%15.3f%15.3f%15.3f%15.3f%15.3f\n",
+               s->cpu_count,
+               s->cpu_delay_total / ms_per_ns,
+               average_ms(s->cpu_delay_total, s->cpu_count),
+               s->cpu_run_real_total / ms_per_ns,
+               s->cpu_scaled_run_real_total / ms_per_ns,
+               s->cpu_run_virtual_total / ms_per_ns);
+        printf("IO     %15llu%15.3f%15.3f\n",
+               s->blkio_count,
+               s->blkio_delay_total / ms_per_ns,
+               average_ms(s->blkio_delay_total, s->blkio_count));
+        printf("Swap   %15llu%15.3f%15.3f\n",
+               s->swapin_count,
+               s->swapin_delay_total / ms_per_ns,
+               average_ms(s->swapin_delay_total, s->swapin_count));
+        printf("Reclaim%15llu%15.3f%15.3f\n",
+               s->freepages_count,
+               s->freepages_delay_total / ms_per_ns,
+               average_ms(s->freepages_delay_total, s->freepages_count));
+    }
+
+    printf("\nExtended accounting fields\n");
+    printf("--------------------------\n");
+    if (human_readable && s->ac_stime) {
+        printf("%-25s%.3f MB\n", "Average RSS usage:",
+               (double)s->coremem / s->ac_stime);
+        printf("%-25s%.3f MB\n", "Average VM usage:",
+               (double)s->virtmem / s->ac_stime);
+    } else {
+        printf("%-25s%llu MB\n", "Accumulated RSS usage:", s->coremem);
+        printf("%-25s%llu MB\n", "Accumulated VM usage:", s->virtmem);
+    }
+    printf("%-25s%llu KB\n", "RSS high water mark:", s->hiwater_rss);
+    printf("%-25s%llu KB\n", "VM high water mark:", s->hiwater_vm);
+    printf("%-25s%llu\n", "IO bytes read:", s->read_char);
+    printf("%-25s%llu\n", "IO bytes written:", s->write_char);
+    printf("%-25s%llu\n", "IO read syscalls:", s->read_syscalls);
+    printf("%-25s%llu\n", "IO write syscalls:", s->write_syscalls);
+
+    printf("\nPer-task/thread statistics\n");
+    printf("--------------------------\n");
+    printf("%-25s%llu\n", "Voluntary switches:", s->nvcsw);
+    printf("%-25s%llu\n", "Involuntary switches:", s->nivcsw);
+}
+
+void print_usage() {
+  printf("Linux task stats reporting tool\n"
+         "\n"
+         "Usage: taskstats [options]\n"
+         "\n"
+         "Options:\n"
+         "  --help        This text\n"
+         "  --pid PID     Print stats for the process id PID\n"
+         "  --tgid TGID   Print stats for the thread group id TGID\n"
+         "  --raw         Print raw numbers instead of human readable units\n"
+         "\n"
+         "Either PID or TGID must be specified. For more documentation about "
+         "the reported fields, see\n"
+         "https://www.kernel.org/doc/Documentation/accounting/"
+         "taskstats-struct.txt\n");
+}
+
+int main(int argc, char** argv) {
+    int command_type = 0;
+    int pid = 0;
+    int human_readable = 1;
+
+    const struct option long_options[] = {
+        {"help", no_argument, 0, 0},
+        {"pid", required_argument, 0, 0},
+        {"tgid", required_argument, 0, 0},
+        {"raw", no_argument, 0, 0},
+        {0, 0, 0, 0}
+    };
+
+    while (1) {
+        int option_index;
+        int option_char = getopt_long_only(argc, argv, "", long_options,
+                                           &option_index);
+        if (option_char == -1) {
+            break;
+        }
+        switch (option_index) {
+            case 0:
+                print_usage();
+                return EXIT_SUCCESS;
+            case 1:
+                command_type = TASKSTATS_CMD_ATTR_PID;
+                pid = atoi(optarg);
+                break;
+            case 2:
+                command_type = TASKSTATS_CMD_ATTR_TGID;
+                pid = atoi(optarg);
+                break;
+            case 3:
+                human_readable = 0;
+                break;
+            default:
+                break;
+        };
+    }
+
+    if (!pid) {
+        printf("Either PID or TGID must be specified\n");
+        return EXIT_FAILURE;
+    }
+
+    struct nl_sock* netlink_socket = nl_socket_alloc();
+    if (!netlink_socket || genl_connect(netlink_socket) < 0) {
+        perror("Unable to open netlink socket (are you root?)");
+        goto error;
+    }
+
+    int family_id = get_family_id(netlink_socket, TASKSTATS_GENL_NAME);
+    if (!family_id) {
+        perror("Unable to determine taskstats family id "
+               "(does your kernel support taskstats?)");
+        goto error;
+    }
+    struct TaskStatistics stats;
+    if (query_task_stats(netlink_socket, family_id, command_type, pid,
+                         &stats) < 0) {
+        perror("Failed to query taskstats");
+        goto error;
+    }
+    print_task_stats(&stats, human_readable);
+
+    nl_socket_free(netlink_socket);
+    return EXIT_SUCCESS;
+
+error:
+    if (netlink_socket) {
+        nl_socket_free(netlink_socket);
+    }
+    return EXIT_FAILURE;
+}