1 /* 2 * Copyright 2021 Google LLC 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * https://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include "aggregator.pb.h" 20 #include "compactor_stack.h" 21 #include "random_generator.h" 22 23 // KLL Quantile - Implementation of Approximate quantiles algorithm based on 24 // the KLL16 paper (up to section 3), see https://arxiv.org/abs/1603.05346. 25 // 26 // Simplified, single-type library for use on Android devices which is 27 // compatible with internal libraries. Cannot use Abseil; trimmed down feature 28 // set, doing leaf aggregation only; no multi-type support/templates, no dynamic 29 // types. 30 namespace dist_proc { 31 namespace aggregation { 32 class KllQuantileOptions; 33 34 class KllQuantile { 35 public: 36 static std::unique_ptr<KllQuantile> Create(std::string* error = nullptr); 37 static std::unique_ptr<KllQuantile> Create(const KllQuantileOptions& options, 38 std::string* error = nullptr); num_values()39 int64_t num_values() const { 40 return num_values_; 41 } inv_eps()42 int64_t inv_eps() const { 43 return inv_eps_; 44 } k()45 int k() const { 46 return compactor_stack_.k(); 47 } num_stored_values()48 int64_t num_stored_values() const { 49 return compactor_stack_.num_stored_items(); 50 } 51 // Reset the aggregator to its state just after construction. 52 void Reset(); 53 void Add(int64_t value); 54 55 // Adds a value to the aggregator with multiplicity 'weight' (same as adding 56 // the value with Add(value) 'weight' times). Does nothing if weight <= 0. 57 // 58 // If your weights exceed the max int32 size, we recommend to scale all 59 // weights down to make them fit within that bound, and use (randomized) 60 // rounding where needed. 61 // Background: Even at high precision values (inv_eps ~100k), the compactor 62 // stack will only accurately track weights in a range of ~31 consecutive 63 // powers of 2, covering the largest weights encountered, while reservoir 64 // sampling is used for weights below that range. So the precision loss by 65 // downscaling and randomized rounding is negligible. 66 void AddWeighted(int64_t value, int weight); 67 68 // Not safe to be called concurrently. 69 zetasketch::android::AggregatorStateProto SerializeToProto(); 70 IsSamplerOn()71 bool IsSamplerOn() const { 72 return compactor_stack_.IsSamplerOn(); 73 } 74 75 private: 76 // Constructor. KllQuantile(int64_t inv_eps,int64_t inv_delta,int k,RandomGenerator * random)77 KllQuantile(int64_t inv_eps, int64_t inv_delta, int k, RandomGenerator* random) 78 : inv_eps_(inv_eps), 79 owned_random_(random != nullptr ? nullptr : std::make_unique<MTRandomGenerator>()), 80 compactor_stack_(inv_eps_, inv_delta, k, 81 random != nullptr ? random : owned_random_.get()) { 82 Reset(); 83 } 84 void UpdateMin(const int64_t value); 85 void UpdateMax(const int64_t value); 86 int64_t inv_eps_; 87 // The (exact) minimum item encountered among all items. 88 int64_t min_{}; 89 // The (exact) maximum item encountered among all items. 90 int64_t max_{}; 91 // Number of items added into the aggregator. 92 int64_t num_values_; 93 // Owned MTRandom instance, if not given a RandomGenerator. 94 std::unique_ptr<MTRandomGenerator> owned_random_; 95 // Stack of compactors to which newly added items are added; 96 // it maintains a 'sketch' of hitherto added items. 97 internal::CompactorStack compactor_stack_; 98 99 KllQuantile(const KllQuantile&) = delete; 100 KllQuantile& operator=(const KllQuantile&) = delete; 101 }; 102 103 // Explicitly set KLL's epsilon and delta parameters that control 104 // approximation error and failure probability. 105 // *inv_eps is 1/epsilon, where epsilon is the approximation error parameter: 106 // When a user queries for a quantile phi, the rank of the returned 107 // approximate answer may be +/- (epsilon * n) off from the correct 108 // rank ceil(phi * n), where n is the number of aggregated items. 109 // *inv_delta is 1/delta, where delta is the failure probability parameter: 110 // with delta probability, at most one out of all possible quantile 111 // queries can be further off than the approximation guarantee. 112 class KllQuantileOptions { 113 public: 114 // Set inv_eps. Default value: 1000 set_inv_eps(int64_t inv_eps)115 void set_inv_eps(int64_t inv_eps) { 116 inv_eps_ = inv_eps; 117 } 118 // Set inv_delta. Default value: 100000 set_inv_delta(int64_t inv_delta)119 void set_inv_delta(int64_t inv_delta) { 120 inv_delta_ = inv_delta; 121 } 122 // Set k, overriding the default calculation of this parameter from inv_eps 123 // and inv_delta. set_k(int k)124 void set_k(int k) { 125 k_ = k; 126 } 127 // Set RandomGenerator pointer to use (caller retains ownership). Default is 128 // to use an owned MTRandomGenerator instance. set_random(RandomGenerator * random)129 void set_random(RandomGenerator* random) { 130 random_ = random; 131 } inv_eps()132 int64_t inv_eps() const { 133 return inv_eps_; 134 } inv_delta()135 int64_t inv_delta() const { 136 return inv_delta_; 137 } k()138 int k() const { 139 return k_; 140 } random()141 RandomGenerator* random() const { 142 return random_; 143 } 144 145 private: 146 int64_t inv_eps_ = 1000; 147 int64_t inv_delta_ = 100000; 148 int k_ = 0; 149 RandomGenerator* random_ = nullptr; 150 }; 151 152 } // namespace aggregation 153 } // namespace dist_proc 154