1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14syntax = "proto3";
15
16package cobalt;
17
18import "window_size.proto";
19
20option java_multiple_files = true;
21option java_package = "com.google.cobalt";
22
23////////////////////////////////////////////////////////////////////////////////
24// NOTE: This file is used by the Cobalt client and the Cobalt servers.
25// The source-of-truth of this file is located in Cobalt's open source code
26// repository, and the file is copied to Android where it is used by the Cobalt
27// client. Do not edit the copy of this file in this Android repo as those edits
28// will be overwritten when the file is next copied.
29////////////////////////////////////////////////////////////////////////////////
30
31// A Report analyzes Events that were logged to Cobalt and emits an aggregated
32// output that may then be queried or visualized by an analyst user of Cobalt.
33//
34// A Report is associated with a Metric and this means that the Report analyzes
35// the Events that were logged to that Metric. The first step occurs on a
36// device where Cobalt analyzes the logged Events in order to form Observations.
37//
38// An Observation is built for a particular Report. The type of observation,
39// including which of several privacy-oriented Encodings is used or not, depends
40// on the Report type.
41//
42// The Observations are sent to the Cobalt Shuffler which shuffles them in order
43// to break linkability between Observations and linkability with the
44// originating device. Next the shuffled Observations are sent to the Analyzer
45// which aggregates Observations from all devices in order to generate a report.
46//
47// There are multiple types of Metrics and multiple types of Reports. Each
48// Report type is compatible with only some of the Metric types.
49//
50// A ReportDefinition defines a Cobalt Report to be generated.
51// An instance of ReportDefinition is always associated with an instance of
52// MetricDefinition called the owning MetricDefinition.
53// Next ID: 33
54message ReportDefinition {
55  reserved 4, 5, 6, 7, 8, 9, 11, 14, 15, 16, 12, 101, 102, 31, 21;
56  reserved "aggregation_type", "aggregation_window", "candidate_lis", "dp_release_config",
57      "expected_population_size", "expected_string_set_size", "export_location_override",
58      "local_privacy_noise_level", "output_location", "percentiles", "threshold", "window_size",
59      "use_poisson_mechanism_for_privacy", "prob_bit_flip", "candidate_file";
60
61  // Unique name for this Report within its owning MetricDefinition.
62  // The name must obey the syntax of a C variable name and must have length
63  // at most 64. The integer |id| field is the stable identifier for a report
64  // so this name may be changed. However doing this may affect the
65  // names and locations of some artifacts produced by Cobalt's report
66  // generation pipeline.
67  string report_name = 1;
68
69  // The unique integer ID for this report within its owning metric.
70  // The user must manually set this |id| field. This is the stable identifier
71  // for a report and should not be changed once data collection begins.
72  uint32 id = 2;
73
74  // A Report has one of the following types.
75  // Next standard report type ID: 22
76  enum ReportType {
77    reserved 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 19, 9999;
78    reserved "CUSTOM_RAW_DUMP", "EVENT_COMPONENT_OCCURRENCE_COUNT", "HIGH_FREQUENCY_STRING_COUNTS",
79        "INT_RANGE_HISTOGRAM", "NUMERIC_AGGREGATION", "NUMERIC_PERF_RAW_DUMP",
80        "PER_DEVICE_HISTOGRAM", "PER_DEVICE_NUMERIC_STATS", "SIMPLE_OCCURRENCE_COUNT",
81        "STRING_COUNTS_WITH_THRESHOLD", "STRING_HISTOGRAMS", "UNIQUE_N_DAY_ACTIVES";
82
83    REPORT_TYPE_UNSET = 0;
84
85    // For each system_profile SP and each event_vector EV, produces the total
86    // count of all occurrences on all devices in the fleet with system profile
87    // SP of the event associated with EV over the course of the report day.
88    // For example, a report of this type might give the total number of times
89    // a medium, red widget was used across the fleet yesterday.
90    //
91    // Input metric types: OCCURRENCE
92    //
93    // Local aggregation: COUNT
94    // Local aggregation period: 1 hour
95    // Global aggregation: OCCURRENCE_COUNTS
96    // System Profile Selection Policy: REPORT_ALL
97    //
98    // Output report row type: OccurrenceCountReportRow
99    // (See report_row.proto)
100    //
101    // ReportDefinition fields particular to this type:
102    //    none
103    FLEETWIDE_OCCURRENCE_COUNTS = 11;
104
105    // For each system_profile SP and each event_vector EV, produces the count
106    // of the number of unique devices with system profile SP for which EV
107    // “is accepted” during the aggregation period, which must be DAYS_1,
108    // DAYS_7, DAYS_28 or DAYS_30.
109    //
110    // There are different versions of what “is accepted” means depending on
111    // which local aggregation procedure is specified:
112    //
113    // AT_LEAST_ONCE. In this case EV is accepted if EV was logged at least once
114    // during the aggregation period. For example, a report of this type might
115    // give the total number of devices with system profile SP on which a
116    // medium, red widget was used at least once in the seven-day period
117    // ending yesterday.
118    //
119    // SELECT_FIRST, SELECT_MOST_COMMON. In this case EV is accepted if the
120    // category selection procedure selected EV. For example, a report of this
121    // type using SELECT_MOST_COMMON might give the total number of devices
122    // with system profile SP on which most of the widgets used during the
123    // seven-day period ending yesterday were medium-red.
124    //
125    // NOTE: Using a local aggregation procedure of AT_LEAST_ONCE or
126    // SELECT_FIRST, in combination with setting expedited_sending, results in
127    // the count being sent by the device when the event occurs (instead of at
128    // the end of the day). This can be desirable for having data for the
129    // current day appear faster in the reports output by Cobalt.
130    //
131    // Input metric types: OCCURRENCE
132    //
133    // Local aggregation: AT_LEAST_ONCE, SELECT_FIRST, or SELECT_MOST_COMMON
134    // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
135    // Global aggregation: OCCURRENCE_COUNTS
136    //
137    // Output report row type: OccurrenceCountReportRow
138    // (See report_row.proto)
139    //
140    // ReportDefinition fields particular to this type:
141    //   - local_aggregation_procedure
142    //   - local_aggregation_period
143    //   - expedited_sending
144    //   - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain
145    //     uniqueness, REPORT_ALL may be useful in some cases)
146    UNIQUE_DEVICE_COUNTS = 12;
147
148    // For each system_profile SP and each event_vector EV, produces an
149    // int-range histogram such that in each int range bucket it gives the
150    // number of unique devices with system_profile SP for which an integer
151    // value, aggregated locally on device over the aggregation period,
152    // associated with EV, falls into the bucket.
153    //
154    // There are two versions of this depending on the metric type:
155    //
156    // With metrics of type OCCURRENCE the integer values are occurrence counts.
157    // For example, for the integer bucket 10-100, a report of this type might
158    // give the number of devices with system profile SP on which a medium,
159    // red widget was used between 10 and 100 times in the seven-day period
160    // ending yesterday.
161    //
162    // With metrics of type INTEGER the integer values are computed statistics.
163    // For example, for the integer bucket 10-100, a report of this type that
164    // specifies the MINIMUM local aggregation procedure might give the number
165    // of devices with system profile SP on which the minimum temperature of a
166    // medium red widget over the seven-day period ending yesterday was between
167    // 10 and 100 degrees.
168    //
169    // Input metric types: OCCURRENCE or INTEGER
170    //
171    // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or
172    //                    NUMERIC_STAT (used with INTEGER metrics)
173    // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
174    // Global aggregation: INTEGER_HISTOGRAMS
175    //
176    // Output report row type: IntegerHistogramReportRow
177    // (See report_row.proto)
178    //
179    // ReportDefinition fields particular to this type:
180    //   - local_aggregation_procedure (only when the metric type is INTEGER)
181    //   - local_aggregation_period
182    //   - int_buckets (this is used only on the server for reports without
183    //     added privacy, but is used on the client for reports with added
184    //     privacy)
185    //   - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain
186    //     uniqueness, REPORT_ALL may be useful in some cases)
187    UNIQUE_DEVICE_HISTOGRAMS = 13;
188
189    // For each system_profile SP and each event_vector EV, produces an
190    // int-range histogram such that in each int range bucket it gives the
191    // number of values, associated with EV, from devices
192    // with system_profile SP, that fall into the bucket, where each device
193    // computes one such value per hour.
194    //
195    // Computationally this report type is identical to
196    // UNQIQUE_DEVICE_HISTOGRAMS except that the local aggregation period
197    // used is one hour and so the counts in each buckets are not interpreted
198    // as a number of unique devices.
199    //
200    // There are two versions of this depending on the metric type:
201    //
202    // With metrics of type OCCURRENCE the integer values are occurrence counts.
203    // For example, for the integer bucket 10-100, a report of this type might
204    // give the number of times that the hourly count of medium red widgets
205    // used was between 10 and 100 over devices with system profile SP,
206    // yesterday.
207    //
208    // With metrics of type INTEGER the integer values are computed statistics.
209    // For example, for the integer bucket 10-100, a report of this that
210    // specifies the MINIMUM local aggregation procedure might give the number
211    // of times that the minimum temperature over an hour of all medium red
212    // widgets used was between 10 and 100 degrees over all devices with
213    // system profile SP, yesterday.
214    //
215    // Input metric types: OCCURRENCE or INTEGER
216    //
217    // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or
218    //                    NUMERIC_STAT (used with INTEGER metrics)
219    // Local aggregation period: one hour
220    // Global aggregation: INTEGER_HISTOGRAMS
221    //
222    // Output report row type: IntegerHistogramReportRow
223    // (See report_row.proto)
224    //
225    // ReportDefinition fields particular to this type:
226    //   - local_aggregation_procedure (only when the metric type is INTEGER)
227    //   - int_buckets (this is used only on the server for reports without
228    //     added privacy, but is used on the client for reports with added
229    //     privacy)
230    //   - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain
231    //     uniqueness, REPORT_ALL may be useful in some cases)
232    HOURLY_VALUE_HISTOGRAMS = 14;
233
234    // For each system_profile SP and each event_vector EV, produces an
235    // int-range histogram such that in each int range bucket it gives the
236    // number of integer measurements, associated with EV, logged on devices
237    // with system_profile SP, that fall into the bucket. Here we are counting
238    // each value logged by the instrumented code individually and so the rate
239    // at which values are being recorded is arbitrary and varies from device
240    // to device. For example, for the integer bucket 10-100, a report of this
241    // type might give the number of times that a medium red widget's
242    // temperature was measured as being between 10 and 100 degrees over all
243    // devices with system profile SP, yesterday. The rate at which these
244    // widget temperature measurements are taken is arbitrary and may vary
245    // from device to device.
246    //
247    // Input metric types: INTEGER or INTEGER_HISTOGRAM
248    //
249    // Local aggregation: INTEGER_HISTOGRAM
250    // Local aggregation period: one hour
251    // Global aggregation: INTEGER_HISTOGRAMS
252    // System Profile Selection Policy: REPORT_ALL
253    //
254    // Output report row type: IntegerHistogramReportRow
255    // (See report_row.proto)
256    //
257    // ReportDefinition fields particular to this type:
258    //   - int_buckets (Only with metric_type = INTEGER)
259    FLEETWIDE_HISTOGRAMS = 15;
260
261    // For each system_profile SP and each event_vector EV, produces the sum
262    // and count of many integer measurements associated with EV, logged on
263    // devices with system_profile SP. Here we are counting each value logged
264    // by the instrumented code individually and so the rate at which values are
265    // being recorded is arbitrary and varies from device to device. This allows
266    // us to  produce a fleetwide mean. For example, a report of this type might
267    // give the mean of all temperature measurements of medium-red widgets
268    // yesterday, across all devices with system profile SP, regardless of how
269    // many temperature measurements were taken on each device individually.
270    //
271    // Input metric types: INTEGER
272    //
273    // Local aggregation: SUM_AND_COUNT
274    // Local aggregation period: one hour
275    // Global aggregation: SUM_AND_COUNTS
276    // System Profile Selection Policy: REPORT_ALL
277    //
278    // Output report row type: SumAndCountReportRow
279    // (See report_row.proto)
280    //
281    // ReportDefinition fields particular to this type:
282    //   none
283    FLEETWIDE_MEANS = 16;
284
285    // For each system_profile SP and each event_vector EV, produces several
286    // numeric statistics (e.g. 95%-ile) over a set of integers associated
287    // with EV, collected from all devices with system_profile SP. Each unique
288    // device contributes a single value and so the distribution of the values
289    // may be thought of as a distribution of unique devices.
290    //
291    // There are different versions of this depending on the metric type:
292    //
293    // With metrics of type OCCURRENCE the integer values are occurrence counts
294    // over the course of the aggregation period. For example a report of this
295    // type might give the 95%-ile of the counts of medium-red widgets used by
296    // each device over the 7-day period ending yesterday.
297    //
298    // With metrics of type INTEGER the integer values are computed statistics.
299    // For example, a report of this type that specifies the MINIMUM local
300    // aggregation procedure might give the 95%-ile of the minimum temperature
301    // over the 7-day period ending yesterday of all medium-red widgets over
302    // all devices with system profile SP.
303    //
304    // Input metric types: OCCURRENCE or INTEGER
305    //
306    // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or
307    //                    NUMERIC_STAT (used with INTEGER metrics)
308    // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
309    // Global aggregation: NUMERIC_STATS
310    // System Profile Selection Policy: REPORT_ALL
311    //
312    // Output report row type: NumericStatsReportRow
313    // (See report_row.proto)
314    //
315    // ReportDefinition fields particular to this type:
316    //   - local_aggregation_procedure (only when the metric type is INTEGER)
317    //   - local_aggregation_period
318    UNIQUE_DEVICE_NUMERIC_STATS = 17;
319
320    // For each system_profile SP and each event_vector EV, produces several
321    // numeric statistics (e.g. 95%-ile) over a set of integers associated
322    // with EV, collected from all devices with system_profile SP. Each unique
323    // device contributes a value every hour and so the distribution of the
324    // values may NOT be thought of as a distribution of unique devices.
325    //
326    // Computationally this report type is identical to
327    // UNIQUE_DEVICE_NUMERIC_STATS except that the local aggregation period
328    // used is one hour.
329    //
330    // There are different versions of this depending on the metric type:
331    //
332    // With metrics of type OCCURRENCE the integer values are occurrence counts
333    // over the course of the hour. For example a report of this
334    // type might give the 95%-ile of the counts of medium-red widgets used in
335    // any one hour period on any device with System profile SP, yesterday.
336    //
337    // With metrics of type INTEGER the integer values are computed statistics.
338    // For example, a report of this type that specifies the MINIMUM local
339    // aggregation procedure might give the 95%-ile of the minimum temperature
340    // over any one-hour period of medium-red widgets use on any device
341    // with system profile SP, yesterday.
342    //
343    // Input metric types: OCCURRENCE or INTEGER
344    //
345    // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or
346    //                    NUMERIC_STAT (used with INTEGER metrics)
347    // Local aggregation period: 1 hour
348    // Global aggregation: NUMERIC_STATS
349    // System Profile Selection Policy: REPORT_ALL
350    //
351    // Output report row type: NumericStatsReportRow
352    // (See report_row.proto)
353    //
354    // ReportDefinition fields particular to this type:
355    //   - local_aggregation_procedure (only when the metric type is INTEGER)
356    HOURLY_VALUE_NUMERIC_STATS = 18;
357
358    // For each system_profile SP and each event_vector EV, produces the total
359    // count of all occurrences of a string value on all devices in the fleet
360    // with system profile SP of the event associated with EV over the course
361    // of the report day.
362    //
363    // Input metric types: STRING
364    //
365    // Local aggregation: STRING_HISTOGRAM
366    // Local aggregation period: 1 hour
367    // Global aggregation: STRING_HISTOGRAMS
368    // System Profile Selection Policy: REPORT_ALL
369    //
370    // Output report row type: StringCountReportRow
371    // (See report_row.proto)
372    //
373    // ReportDefinition fields particular to this type:
374    //   - string_buffer_max
375    STRING_COUNTS = 20;
376
377    // For each system_profile SP, each event_vector EV, and each string value
378    // produces the count of the number of unique devices with system profile
379    // SP on which the string value was logged in connection with the EV during
380    // the aggregation period, which must be DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
381    //
382    // This is similar to the AT_LEAST_ONCE local aggregation procedure for
383    // UNIQUE_DEVICE_COUNTS. For example, a report of this type might
384    // give the total number of devices with system profile SP on which a
385    // medium, red widget was used in conjunction with the component name
386    // "widget-consumer" at least once in the seven-day period ending
387    // yesterday.
388    //
389    // Input metric types: STRING
390    //
391    // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
392    // Global aggregation: STRING_HISTOGRAMS
393    //
394    // Output report row type: StringCountReportRow
395    // (See report_row.proto)
396    //
397    // ReportDefinition fields particular to this type:
398    //   - local_aggregation_period
399    //   - string_buffer_max
400    //   - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain
401    //     uniqueness, REPORT_ALL may be useful in some cases)
402    UNIQUE_DEVICE_STRING_COUNTS = 21;
403  }
404  ReportType report_type = 3;
405
406  ////////////////  Fields for reports with privacy enabled  /////////////////
407
408  // The level of differential privacy applied to the report. Each level
409  // corresponds to an epsilon value in the shuffled model. The mapping
410  // from enum values to epsilon values is hard-coded in makePrivacyConstants()
411  // in the file //src/bin/config_parser/src/privacy/privacy_encoding_params.go
412  enum PrivacyLevel {
413    PRIVACY_LEVEL_UNKNOWN = 0;
414
415    // epsilon = infinity
416    NO_ADDED_PRIVACY = 1;
417
418    LOW_PRIVACY = 2;
419
420    MEDIUM_PRIVACY = 3;
421
422    HIGH_PRIVACY = 4;
423  }
424
425  // This field is used to specify the privacy level for a Cobalt report.
426  // All Cobalt report types support differential privacy and are required
427  // to set this field (use NO_ADDED_PRIVACY to disable differential privacy).
428  PrivacyLevel privacy_level = 20;
429
430  // The mean number of observations added per index point when performing the
431  // Poisson mechanism encoding for Cobalt reports. Should be set if and only if
432  // `privacy_level` is not NO_ADDED_PRIVACY.
433  //
434  // In the future, the value of this field will be computed by the registry
435  // parser as a function of other privacy-related fields and an estimate of the
436  // user population size. For now, it should be set manually in the Cobalt
437  // registry in consultation with the Cobalt team.
438  //
439  // TODO(b/295053509): update this comment once the field is populated by
440  // the registry parser.
441  double poisson_mean = 30;
442
443  // When reporting numerical values with privacy, the values are mapped to
444  // indices from 0 to num_index_points-1 with a randomized rounding method.
445  //
446  // In the future, the value of this field will be computed by the registry
447  // parser as a function of other privacy-related fields and an estimate of the
448  // user population size. For now, it should be set manually in the Cobalt
449  // registry in consultation with the Cobalt team.
450  //
451  // TODO(b/278932979): update this comment once the field is populated by
452  // the registry parser.
453  uint32 num_index_points = 22;
454
455  // When reporting strings with privacy, the strings are counted using a linear
456  // sketch.
457  //
458  // In the future, the value of this field will be computed by the registry
459  // parser as a function of other privacy-related fields and an estimate of the
460  // user population size. For now, it should be set manually in the Cobalt
461  // registry in consultation with the Cobalt team.
462  //
463  // TODO(b/278932979): update this comment once the field is populated by
464  // the registry parser.
465  StringSketchParameters string_sketch_params = 27;
466
467  // These fields specify the range of values that can be reported by a device
468  // in the specified local_aggregation_period. If the true value to be reported
469  // falls outside specified range, the value is clipped.
470  //
471  // For FLEETWIDE_OCCURRENCE_COUNTS, UNIQUE_DEVICE_NUMERIC_STATS and
472  // HOURLY_VALUE_NUMERIC_STATS, the range applies to the total numerical value
473  // computed for the device over the aggregation period specified in the
474  // report.
475  //
476  // For FLEETWIDE_MEANS, the range applies to the per-device sum of the value
477  // to be averaged over one hour. (For FLEETWIDE_MEANS, the `max_count` field
478  // is also required in order to bound the `count` value.)
479  //
480  // If a privacy_level other than NO_ADDED_PRIVACY is specified, this field is
481  // required for reports of type:
482  // * FLEETWIDE_OCCURRENCE_COUNTS
483  // * UNIQUE_DEVICE_NUMERIC_STATS
484  // * HOURLY_VALUE_NUMERIC_STATS
485  // * FLEETWIDE_MEANS
486  int64 min_value = 23;
487  int64 max_value = 24;
488
489  // This field specifies the maximum count to be reported by a device in the
490  // specified local_aggregation_period. If the true count is greater than
491  // max_count, then the count will be reported as max_count.
492  //
493  // For FLEETWIDE_HISTOGRAMS, the bound applies to the count for each
494  // individual histogram bucket over the aggregation period of one hour. For
495  // STRING_COUNTS, it applies to the count for each string over one hour.
496  //
497  // For FLEETWIDE_MEANS, the bound applies to the per-device count of the
498  // values to be averaged over one hour.
499  //
500  // If a privacy_level other than NO_ADDED_PRIVACY is specified, this field is
501  // required for reports of type:
502  // * FLEETWIDE_HISTOGRAMS
503  // * FLEETWIDE_MEANS
504  // * STRING_COUNTS
505  uint64 max_count = 25;
506
507  ////////////////  Fields specific to some report types /////////////////
508
509  // A specification of integer-range buckets for a histogram.
510  //
511  // This field is for reports of type UNIQUE_DEVICE_HISTOGRAMS,
512  // HOURLY_VALUE_HISTOGRAMS, and FLEETWIDE_HISTOGRAMS -- but for
513  // FLEETWIDE_HISTOGRAMS only with metrics of type INTEGER, not with metrics of
514  // type INTEGER_HISTOGRAM, because in that case the MetricDefinition already
515  // contains an instance of IntegerBuckets.
516  IntegerBuckets int_buckets = 10;
517
518  // The interval with which clients will generate and upload observations.
519  enum ReportingInterval {
520    REPORTING_INTERVAL_UNSET = 0;
521    HOURS_1 = 1;
522    DAYS_1 = 2;
523  }
524
525  // This field is optional for FLEETWIDE_OCCURRENCE_COUNTS reports, and is only
526  // supported by some client platforms. If not set, the reporting interval
527  // defaults to 1 hour for FLEETWIDE_OCCURRENCE_COUNTS reports.
528  ReportingInterval reporting_interval = 32;
529
530  // This field can be used with all Report types. When set, the generated
531  // report will exclude an Observation if there are not at least
532  // |reporting_threshold| number of distinct devices reporting Observations
533  // with the same ObservationMetadata.
534  uint32 reporting_threshold = 13;
535
536  // The on-device function computed on the metric during the aggregation
537  // window.
538  enum LocalAggregationProcedure {
539    LOCAL_AGGREGATION_PROCEDURE_UNSET = 0;
540
541    // Numerical statistic aggregation procedures to be used with reports
542    // of type UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS,
543    // UNIQUE_DEVICE_NUMERIC_STATS and HOURLY_VALUE_NUMERIC_STATS.
544    // TODO(fxbug.dev/87151): Rename these to remove the '_PROCEDURE' suffix.
545    SUM_PROCEDURE = 1;
546    MIN_PROCEDURE = 2;
547    MAX_PROCEDURE = 3;
548    MEAN = 4;
549    MEDIAN = 5;
550    // The value of N is set in the field
551    // |local_aggregation_procedure_percentile_n|.
552    PERCENTILE_N = 6;
553
554    // Logical aggregation procedures to be used with reports of type
555    // UNIQUE_DEVICE_COUNTS
556    AT_LEAST_ONCE = 7;
557    SELECT_FIRST = 8;
558    SELECT_MOST_COMMON = 9;
559  }
560
561  // This field is required for reports of type
562  // UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS,
563  // UNIQUE_DEVICE_NUMERIC_STATS, HOURLY_VALUE_NUMERIC_STATS
564  // and UNIQUE_DEVICE_COUNTS. Different report types support
565  // different values of this field. See the comments on the
566  // enum values in LocalAggregationProcedure.
567  LocalAggregationProcedure local_aggregation_procedure = 17;
568
569  // This field is required when
570  // local_aggregation_procedure = LOCAL_AGGREGATION_PROCEDURE_PERCENTILE_N.
571  // In this case it gives the value of N to use. Otherwise this field is
572  // ignored.
573  uint32 local_aggregation_procedure_percentile_n = 18;
574
575  // Time window over which the metric is aggregated. The local aggregation
576  // period is specified for UNIQUE_DEVICE_* report types.
577  WindowSize local_aggregation_period = 19;
578
579  // The maximum number of distinct event vectors for which an instance of the Cobalt
580  // client should produce an observation, for a given local aggregation period. Event
581  // vectors are prioritized in order of first arrival during the aggregation period.
582  //
583  // For example, if a report has an event_vector_buffer_max of 10, and 12 distinct event
584  // vectors are logged for this metric over an aggregation period, then Cobalt will send
585  // observations of the first 10 event vectors for that aggregation period and drop the
586  // last 2.
587  //
588  // If this field is unset, the registry parser assigns to it the total number of event
589  // vectors for the report's parent metric (i.e., the product over all metric dimensions
590  // of the number of event codes per dimension).
591  //
592  // The report's project will be charged against a resource budget for this value
593  // so project owners are encouraged to set this as small as possible.  For example,
594  // the report's parent metric may include a dimension with thousands of event codes,
595  // but it is expected that any one device will log only a few distinct event vectors
596  // per day. In that case we may set event_vector_buffer_max to a relatively small number,
597  // say 20. For reports which use differential privacy, setting event_vector_buffer_max
598  // to a smaller number will improve the signal for event vectors which are included in
599  // observations.
600  uint64 event_vector_buffer_max = 26;
601
602  // The maximum number of distinct strings that Cobalt must keep in its in-memory buffer
603  // on any single device. During local aggregation for reports of type STRING_COUNTS and
604  // UNIQUE_DEVICE_STRING_COUNTS, Cobalt will keep track of this many distinct strings per
605  // aggregation period. The report's project will be charged against a resource budget for this
606  // value so project owners are encouraged to set this as small as possible. A STRING metric
607  // includes a file of candidate strings that may contain many thousands of strings. But it is
608  // expected that any one device will log only a few of these strings per day. We may set
609  // string_buffer_max to a relatively small number, say 20.
610  //
611  // This is a required field for reports of type STRING_COUNTS and UNIQUE_DEVICE_STRING_COUNTS.
612  uint32 string_buffer_max = 28;
613
614  // For reports of type UNIQUE_DEVICE_COUNTS, send observations as soon as the
615  // event occurs, instead of waiting for the end of the day.
616  //
617  // This can only be enabled when using a local aggregation procedure of
618  // AT_LEAST_ONCE or SELECT_FIRST, and when the privacy level is
619  // NO_ADDED_PRIVACY. When used with a system_profile_selection of REPORT_ALL
620  // or SELECT_FIRST, enabling this is recommended as Cobalt will send the count
621  // for the current day when the event occurs instead of at the end of the day.
622  // For a system_profile_selection of SELECT_LAST, this may also be desirable,
623  // though it may result in a slight change in the current day's system profile
624  // that is used, as Cobalt won't wait until the end of the day to determine
625  // the final system profile, but will instead send the count immediately with
626  // the system profile that is currently active on the device.
627  bool expedited_sending = 29;
628
629  ///////////////////  Fields used by all report types ///////////////////
630  // Next id: 109
631
632  // The list of SystemProfileFields to include in each row of the report.
633  // Optional.
634  repeated SystemProfileField system_profile_field = 100;
635
636  // The list of Experiments to include in each row of the report.
637  //
638  // Each report row lists the intersection of the experiment ids active on the device and
639  // experiment ids specified in this field.
640  //
641  // The specified experiment ids must be found in one of the project's experiments_namespaces.
642  repeated int64 experiment_id = 104;
643
644  // This field is required for reports of type UNIQUE_DEVICE_COUNTS,
645  // UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and
646  // HOURLY_VALUE_HISTOGRAMS. The value for these reports must be SELECT_LAST,
647  // SELECT_FIRST, or occasionally REPORT_ALL.
648  //
649  // If the system profile value changed during the aggregation window specified
650  // for this report, system_profile_selection specifies which system profile to
651  // report for each device.
652  SystemProfileSelectionPolicy system_profile_selection = 103;
653
654  // Maximum ReleaseStage for which this Report is allowed to be collected.
655  ReleaseStage max_release_stage = 105;
656
657  // Report can be collected even if the user/device has not consented.
658  // This field can only be set to true on reports that use privacy mechanisms
659  // that include differential privacy (i.e. not DE_IDENTIFICATION). The use of
660  // this field is for collecting anonymized data that is allowed even when
661  // the consent is not given. These use cases need to be specially approved
662  // by privacy reviewers.
663  bool exempt_from_consent = 108;
664
665  // New Privacy API
666
667  // This enum identifies what privacy protection is applied to the report.
668  enum PrivacyMechanism {
669    PRIVACY_MECHANISM_UNSPECIFIED = 0;
670    // If you specify this value the data will be de-identified without
671    // additional privacy protections.
672    DE_IDENTIFICATION = 1;
673    // If you specify this value the data will be protected with Shuffled
674    // Differential Privacy guarantees (e.g., the noise wll be added on the
675    // devices)
676    SHUFFLED_DIFFERENTIAL_PRIVACY = 2;
677  }
678
679  // This field identifies what privacy protection is applied to the report.
680  // It will eventually be required once migration from privacy_level
681  // is complete.
682  PrivacyMechanism privacy_mechanism = 106;
683
684  // The object for grouping all parameters needed for SHUFFLED DP mode.
685  message ShuffledDifferentialPrivacyConfig {
686    // This field represents an upper bound on the amount of information which
687    // can be learned about a device from a report including that device.
688    // Lower values correspond to higher privacy.
689    // Epsilon must be > 0.
690    double epsilon = 1;
691    // This field represents the risk of the epsilon guarantee not holding. This
692    // is usually set as 1 over the expected number of participating devices.
693    // Delta must be > 0 and < 1.
694    double delta = 2;
695    // The generated report will exclude an Observation if there are not at
696    // least |reporting_threshold| number of distinct devices reporting
697    // Observations with the same ObservationMetadata.
698    uint32 reporting_threshold = 3;
699
700    // The mean number of observations added per index point when performing the
701    // Poisson mechanism encoding for Cobalt reports. Required.
702    //
703    // In the future, the value of this field will be computed by the registry
704    // parser as a function of other fields in this
705    // ShuffledDifferentialPrivacyConfig. For now, it should be set manually in
706    // the Cobalt registry in consultation with the Cobalt team.
707    //
708    // TODO(b/295053509): update this comment once the field is auto populated by
709    // the registry parser.
710    double poisson_mean = 4;
711  }
712
713  // If privacy_mechanism is SHUFFLED_DIFFERENTIAL_PRIVACY then privacy_config
714  // must contain valid ShuffledDifferentialPrivacyConfig otherwise empty.
715  oneof privacy_config {
716    ShuffledDifferentialPrivacyConfig shuffled_dp = 107;
717  }
718}
719
720// A specification for SystemProfile selection policy.
721enum SystemProfileSelectionPolicy {
722  // Use the default value. For reports of type FLEETWIDE_OCCURRENCE_COUNTS,
723  // FLEETWIDE_HISTOGRAMS, FLEETWIDE_MEANS, UNIQUE_DEVICE_NUMERIC_STATS,
724  // HOURLY_VALUE_NUMERIC_STATS, and STRING_COUNTS this will resolve to
725  // 'REPORT_ALL' and should not be changed. For all other report types,
726  // SELECT_DEFAULT must not be used.
727  SELECT_DEFAULT = 0;
728
729  // Always report the last SystemProfile seen in the aggregation window. This
730  // will be the last SystemProfile seen *at the time of an event* in the
731  // aggregation window.
732  SELECT_LAST = 1;
733
734  // Always report the first SystemProfile seen in the aggregation window. This
735  // will be the first SystemProfile seen *at the time of an event* in the
736  // aggregation window.
737  SELECT_FIRST = 2;
738
739  // Report all system profiles in the aggregation window. For most report
740  // types, this is the most sensible value to use. For reports that depend on
741  // some concept of uniqueness (such as UNIQUE_DEVICE_COUNTS,
742  // UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and
743  // HOURLY_VALUE_HISTOGRAMS) this may not be the best choice, since it will no
744  // longer be the case that a single device will only upload one observation
745  // per time period (It will upload one observation per time period *per unique
746  // system_profile*).
747  REPORT_ALL = 3;
748}
749
750// A specification of a field from SystemProfile. These are used in a
751// ReportDefinition to specify which fields should be included in the generated
752// Observations and reports.
753//
754// For a description of the meaning of each field, see the fields in the
755// SystemProfile in: cobalt/proto/common.proto
756enum SystemProfileField {
757  OS = 0;
758  ARCH = 1;
759  BOARD_NAME = 2;
760  PRODUCT_NAME = 3;
761  SYSTEM_VERSION = 4;
762  APP_VERSION = 10;
763  CHANNEL = 5;
764  BUILD_TYPE = 7;
765  EXPERIMENT_IDS = 9;
766  reserved 6, 8;
767  reserved "REALM", "EXPERIMENT_TOKENS";
768}
769
770// Stages in the release cycle of a component. Each Cobalt customer determines
771// its current ReleaseStage when initializing the CobaltService. Each Metric
772// and Report can declare the maximum ReleaseStage for which it is allowed to
773// be collected. For example a DEBUG Metric will not be collected from a device
774// running a FISHFOOD release.
775enum ReleaseStage {
776  RELEASE_STAGE_NOT_SET = 0;
777
778  // A test build. Also called "eng". Only use this value when the device is
779  // running test builds as all metrics/reports will be collected.
780  DEBUG = 10;
781  // Small, internal prototype. Used for testing a new feature internally,
782  // usually within the team or a small group.
783  FISHFOOD = 20;
784  // An internal release for testing with internal users.
785  DOGFOOD = 40;
786  // An open beta, for testing with internal and external users.
787  OPEN_BETA = 60;
788
789  // Generally-available. The final stage of a release. Also called
790  // "production". If unsure of which release stage the device is running, it
791  // is safest to fallback to this value (which is the default if no value is
792  // set), to avoid inadvertently collecting metric/report data.
793  GA = 99;
794}
795
796// ExponentialIntegerBuckets is used to define a partition of the integers into
797// a finite number of exponentially increasing buckets.
798//
799// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1.
800//
801// The bucket boundaries are:
802// a[0] = floor
803// a[1] = floor + initial_step
804// a[2] = floor + initial_step * step_multiplier
805// a[3] = floor + initial_step * step_multiplier ^ 2
806// a[4] = floor + initial_step * step_multiplier ^ 3
807// and in general, for i = 1, 2, 3 ... n
808// a[i] = floor + initial_step * step_multiplier ^ (i-1)
809//
810// Then, the buckets are defined as follows:
811// Bucket 0 is the underflow bucket: (-infinity, floor)
812// Bucket i for 0 < i < n+1: [a[i-1], a[i])
813// Bucket n+1 is the overflow bucket: [a[n], +infinity)
814//
815// Examples:
816// floor = 0
817// num_buckets = 3
818// initial_step = 10
819// step_multiplier = 10
820// Then, the buckets are:
821// (-infinity, 0), [0, 10), [10, 100), [100, 1000), [1000, +infinity)
822//
823// floor = 0
824// num_buckets = 3
825// initial_step = 2
826// step_multiplier = 2
827// Then, the buckets are:
828// (-infinity, 0), [0, 2), [2, 4), [4, 8), [8, +infinity)
829//
830// floor = 10
831// num_buckets = 3
832// initial_step = 2
833// step_multiplier = 2
834// Then, the buckets are:
835// (-infinity, 10), [10, 12), [12, 14), [14, 18), [18, +infinity)
836//
837// floor = 0
838// num_buckets = 3
839// initial_step = 100
840// step_multiplier = 10
841// Then, the buckets are:
842// (-infinity, 0), [0, 100), [100, 1000), [1000, 10000), [10000, +infinity)
843//
844message ExponentialIntegerBuckets {
845  int64 floor = 1;
846
847  // num_buckets must be at least 1.
848  uint32 num_buckets = 2;
849
850  // Must be at least one.
851  uint32 initial_step = 3;
852
853  // Must be at least one.
854  uint32 step_multiplier = 4;
855}
856
857// LinearIntegerBuckets is used to define a partition of the integers into a
858// finite number of buckets of equal size.
859//
860// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1.
861// Bucket 0 is the underflow bucket: (-infinity, floor)
862// Bucket n+1 is the overflow bucket: [lower + step_size * n, +infinity)
863//
864// For i = 1 to n, the bucket i is defined as
865// [floor + step_size * (i-1), floor + step_size * i)
866//
867// Example: floor = 0, num_buckets = 3, step_size = 10.
868// (-infinity, 0), [0, 10), [10, 20), [20, 30), [30, +inifinity)
869message LinearIntegerBuckets {
870  int64 floor = 1;
871
872  // Must be at least one.
873  uint32 num_buckets = 2;
874
875  // Must be at least one.
876  uint32 step_size = 3;
877}
878
879message IntegerBuckets {
880  oneof buckets {
881    ExponentialIntegerBuckets exponential = 1;
882    LinearIntegerBuckets linear = 2;
883  }
884
885  // If set to true, empty buckets will not be added to the report data such
886  // that all histograms contain a row for every bucket. Buckets with a zero
887  // count may still occur if data is logged that contains a zero count. This
888  // field can not be set on reports with added privacy.
889  bool sparse_output = 3;
890}
891
892message StringSketchParameters {
893  // Number of hashes in Count-Min Sketch.
894  int32 num_hashes = 1;
895
896  // Number of cells per hash in Count-Min Sketch.
897  int32 num_cells_per_hash = 2;
898}
899