1 /*
2  * Copyright 2021 Google LLC
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     https://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "aggregator.pb.h"
20 #include "compactor_stack.h"
21 #include "random_generator.h"
22 
23 // KLL Quantile - Implementation of Approximate quantiles algorithm based on
24 // the KLL16 paper (up to section 3), see https://arxiv.org/abs/1603.05346.
25 //
26 // Simplified, single-type library for use on Android devices which is
27 // compatible with internal libraries. Cannot use Abseil; trimmed down feature
28 // set, doing leaf aggregation only; no multi-type support/templates, no dynamic
29 // types.
30 namespace dist_proc {
31 namespace aggregation {
32 class KllQuantileOptions;
33 
34 class KllQuantile {
35 public:
36     static std::unique_ptr<KllQuantile> Create(std::string* error = nullptr);
37     static std::unique_ptr<KllQuantile> Create(const KllQuantileOptions& options,
38                                                std::string* error = nullptr);
num_values()39     int64_t num_values() const {
40         return num_values_;
41     }
inv_eps()42     int64_t inv_eps() const {
43         return inv_eps_;
44     }
k()45     int k() const {
46         return compactor_stack_.k();
47     }
num_stored_values()48     int64_t num_stored_values() const {
49         return compactor_stack_.num_stored_items();
50     }
51     // Reset the aggregator to its state just after construction.
52     void Reset();
53     void Add(int64_t value);
54 
55     // Adds a value to the aggregator with multiplicity 'weight' (same as adding
56     // the value with Add(value) 'weight' times). Does nothing if weight <= 0.
57     //
58     // If your weights exceed the max int32 size, we recommend to scale all
59     // weights down to make them fit within that bound, and use (randomized)
60     // rounding where needed.
61     // Background: Even at high precision values (inv_eps ~100k), the compactor
62     // stack will only accurately track weights in a range of ~31 consecutive
63     // powers of 2, covering the largest weights encountered, while reservoir
64     // sampling is used for weights below that range. So the precision loss by
65     // downscaling and randomized rounding is negligible.
66     void AddWeighted(int64_t value, int weight);
67 
68     // Not safe to be called concurrently.
69     zetasketch::android::AggregatorStateProto SerializeToProto();
70 
IsSamplerOn()71     bool IsSamplerOn() const {
72         return compactor_stack_.IsSamplerOn();
73     }
74 
75 private:
76     // Constructor.
KllQuantile(int64_t inv_eps,int64_t inv_delta,int k,RandomGenerator * random)77     KllQuantile(int64_t inv_eps, int64_t inv_delta, int k, RandomGenerator* random)
78         : inv_eps_(inv_eps),
79           owned_random_(random != nullptr ? nullptr : std::make_unique<MTRandomGenerator>()),
80           compactor_stack_(inv_eps_, inv_delta, k,
81                            random != nullptr ? random : owned_random_.get()) {
82         Reset();
83     }
84     void UpdateMin(const int64_t value);
85     void UpdateMax(const int64_t value);
86     int64_t inv_eps_;
87     // The (exact) minimum item encountered among all items.
88     int64_t min_{};
89     // The (exact) maximum item encountered among all items.
90     int64_t max_{};
91     // Number of items added into the aggregator.
92     int64_t num_values_;
93     // Owned MTRandom instance, if not given a RandomGenerator.
94     std::unique_ptr<MTRandomGenerator> owned_random_;
95     // Stack of compactors to which newly added items are added;
96     // it maintains a 'sketch' of hitherto added items.
97     internal::CompactorStack compactor_stack_;
98 
99     KllQuantile(const KllQuantile&) = delete;
100     KllQuantile& operator=(const KllQuantile&) = delete;
101 };
102 
103 // Explicitly set KLL's epsilon and delta parameters that control
104 // approximation error and failure probability.
105 // *inv_eps is 1/epsilon, where epsilon is the approximation error parameter:
106 // When a user queries for a quantile phi, the rank of the returned
107 // approximate answer may be +/- (epsilon * n) off from the correct
108 // rank ceil(phi * n), where n is the number of aggregated items.
109 // *inv_delta is 1/delta, where delta is the failure probability parameter:
110 // with delta probability, at most one out of all possible quantile
111 // queries can be further off than the approximation guarantee.
112 class KllQuantileOptions {
113 public:
114     // Set inv_eps. Default value: 1000
set_inv_eps(int64_t inv_eps)115     void set_inv_eps(int64_t inv_eps) {
116         inv_eps_ = inv_eps;
117     }
118     // Set inv_delta. Default value: 100000
set_inv_delta(int64_t inv_delta)119     void set_inv_delta(int64_t inv_delta) {
120         inv_delta_ = inv_delta;
121     }
122     // Set k, overriding the default calculation of this parameter from inv_eps
123     // and inv_delta.
set_k(int k)124     void set_k(int k) {
125         k_ = k;
126     }
127     // Set RandomGenerator pointer to use (caller retains ownership). Default is
128     // to use an owned MTRandomGenerator instance.
set_random(RandomGenerator * random)129     void set_random(RandomGenerator* random) {
130         random_ = random;
131     }
inv_eps()132     int64_t inv_eps() const {
133         return inv_eps_;
134     }
inv_delta()135     int64_t inv_delta() const {
136         return inv_delta_;
137     }
k()138     int k() const {
139         return k_;
140     }
random()141     RandomGenerator* random() const {
142         return random_;
143     }
144 
145 private:
146     int64_t inv_eps_ = 1000;
147     int64_t inv_delta_ = 100000;
148     int k_ = 0;
149     RandomGenerator* random_ = nullptr;
150 };
151 
152 }  // namespace aggregation
153 }  // namespace dist_proc
154