Fix histogram memory issues.
Before we had buckets of 5 micro second size. For a 100 MS GC
this used at least 16 * (100 / .005) bytes of storage inside of the
histogram data structure. If you consider the 3 different GC types,
and each timing logger having its own histogram its easy to see
how memory used was significant.
We now have an upper bound on the number of buckets (default 100).
When we hit the upper bound we simply combine adjacent buckets
together. This reduces the total number of buckets by a factor of
2, while increasing the bucket size by a factor of 2. This means
that we may lose a slight amount of precision, but the confidence
intervals remain nearly as useful.
Total unknown memory (occam-svelte):
Before: 45648 kB:
After: 33304 kB
There are probably still some additional optimizations which can be
done as disabling histograms altogether reduces the memory used by
another ~2mB.
A bit of other cleanup in image_space.cc and dlmalloc_space.cc.
Bug: 9967927
Change-Id: I87bb6fe4a3e0e790f104abf3cf07f67677cd7ab3
diff --git a/runtime/base/histogram.h b/runtime/base/histogram.h
index 33a1e65..f508af9 100644
--- a/runtime/base/histogram.h
+++ b/runtime/base/histogram.h
@@ -31,19 +31,33 @@
template <class Value> class Histogram {
const double kAdjust;
- const Value kBucketWidth;
const size_t kInitialBucketCount;
public:
- explicit Histogram(std::string);
+ class CumulativeData {
+ friend class Histogram<Value>;
+ std::vector<uint64_t> freq_;
+ std::vector<double> perc_;
+ };
+
+ Histogram(const char* name, Value initial_bucket_width, size_t max_buckets = 100);
void AddValue(Value);
- void CreateHistogram();
+ // Builds the cumulative distribution function from the frequency data.
+ // Accumulative summation of frequencies.
+ // cumulative_freq[i] = sum(frequency[j] : 0 < j < i )
+ // Accumulative summation of percentiles; which is the frequency / SampleSize
+ // cumulative_perc[i] = sum(frequency[j] / SampleSize : 0 < j < i )
+ void CreateHistogram(CumulativeData& data);
+ // Reset the cumulative values, next time CreateHistogram is called it will recreate the cache.
void Reset();
double Mean() const;
double Variance() const;
- double Percentile(double) const;
- void PrintConfidenceIntervals(std::ostream &, double) const;
- void PrintBins(std::ostream &);
+ double Percentile(double per, const CumulativeData& data) const;
+ void PrintConfidenceIntervals(std::ostream& os, double interval,
+ const CumulativeData& data) const;
+ void PrintBins(std::ostream& os, const CumulativeData& data) const;
+ Value GetRange(size_t bucket_idx) const;
+ size_t GetBucketCount() const;
uint64_t SampleSize() const {
return sample_size_;
@@ -61,49 +75,33 @@
return max_value_added_;
}
- const std::string &Name() const {
+ const std::string& Name() const {
return name_;
}
-
private:
- void BuildRanges();
void Initialize();
- size_t FindBucket(Value);
- void BucketiseValue(Value);
- // Builds the cumulative distribution function from the frequency data.
- // Must be called before using the percentile function.
- void BuildCDF();
+ size_t FindBucket(Value val) const;
+ void BucketiseValue(Value val);
// Add more buckets to the histogram to fill in a new value that exceeded
// the max_read_value_.
- void GrowBuckets(Value);
- bool new_values_added_;
+ void GrowBuckets(Value val);
std::string name_;
+ // Maximum number of buckets.
+ const size_t max_buckets_;
// Number of samples placed in histogram.
- uint64_t sample_size_;
+ size_t sample_size_;
// Width of the bucket range. The lower the value is the more accurate
- // histogram percentiles are.
+ // histogram percentiles are. Grows adaptively when we hit max buckets.
Value bucket_width_;
- // Number of bucket to have in the histogram. this value will increase
- // to accommodate for big values that don't fit in initial bucket ranges.
- size_t bucket_count_;
- // Represents the ranges of the histograms. Has SampleSize() + 1 elements
- // e.g. 0,5,10,15 represents ranges 0-5, 5-10, 10-15
- std::vector<Value> ranges_;
- // How many occurrences of values fall within a corresponding range that is
- // saved in the ranges_ vector.
- std::vector<uint64_t> frequency_;
- // Accumulative summation of frequencies.
- // cumulative_freq_[i] = sum(cumulative_freq_[j] : 0 < j < i )
- std::vector<uint64_t> cumulative_freq_;
- // Accumulative summation of percentiles; which is the frequency / SampleSize
- // cumulative_freq_[i] = sum(cumulative_freq_[j] : 0 < j < i )
- std::vector<double> cumulative_perc_;
+ // How many occurrences of values fall within a bucket at index i where i covers the range
+ // starting at min_ + i * bucket_width_ with size bucket_size_.
+ std::vector<uint32_t> frequency_;
// Summation of all the elements inputed by the user.
Value sum_;
- // Maximum value that can fit in the histogram, grows adaptively.
- Value min_;
// Minimum value that can fit in the histogram. Fixed to zero for now.
+ Value min_;
+ // Maximum value that can fit in the histogram, grows adaptively.
Value max_;
// Summation of the values entered. Used to calculate variance.
Value sum_of_squares_;