1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14syntax = "proto3"; 15 16package cobalt; 17 18import "window_size.proto"; 19 20option java_multiple_files = true; 21option java_package = "com.google.cobalt"; 22 23//////////////////////////////////////////////////////////////////////////////// 24// NOTE: This file is used by the Cobalt client and the Cobalt servers. 25// The source-of-truth of this file is located in Cobalt's open source code 26// repository, and the file is copied to Android where it is used by the Cobalt 27// client. Do not edit the copy of this file in this Android repo as those edits 28// will be overwritten when the file is next copied. 29//////////////////////////////////////////////////////////////////////////////// 30 31// A Report analyzes Events that were logged to Cobalt and emits an aggregated 32// output that may then be queried or visualized by an analyst user of Cobalt. 33// 34// A Report is associated with a Metric and this means that the Report analyzes 35// the Events that were logged to that Metric. The first step occurs on a 36// device where Cobalt analyzes the logged Events in order to form Observations. 37// 38// An Observation is built for a particular Report. The type of observation, 39// including which of several privacy-oriented Encodings is used or not, depends 40// on the Report type. 41// 42// The Observations are sent to the Cobalt Shuffler which shuffles them in order 43// to break linkability between Observations and linkability with the 44// originating device. Next the shuffled Observations are sent to the Analyzer 45// which aggregates Observations from all devices in order to generate a report. 46// 47// There are multiple types of Metrics and multiple types of Reports. Each 48// Report type is compatible with only some of the Metric types. 49// 50// A ReportDefinition defines a Cobalt Report to be generated. 51// An instance of ReportDefinition is always associated with an instance of 52// MetricDefinition called the owning MetricDefinition. 53// Next ID: 33 54message ReportDefinition { 55 reserved 4, 5, 6, 7, 8, 9, 11, 14, 15, 16, 12, 101, 102, 31, 21; 56 reserved "aggregation_type", "aggregation_window", "candidate_lis", "dp_release_config", 57 "expected_population_size", "expected_string_set_size", "export_location_override", 58 "local_privacy_noise_level", "output_location", "percentiles", "threshold", "window_size", 59 "use_poisson_mechanism_for_privacy", "prob_bit_flip", "candidate_file"; 60 61 // Unique name for this Report within its owning MetricDefinition. 62 // The name must obey the syntax of a C variable name and must have length 63 // at most 64. The integer |id| field is the stable identifier for a report 64 // so this name may be changed. However doing this may affect the 65 // names and locations of some artifacts produced by Cobalt's report 66 // generation pipeline. 67 string report_name = 1; 68 69 // The unique integer ID for this report within its owning metric. 70 // The user must manually set this |id| field. This is the stable identifier 71 // for a report and should not be changed once data collection begins. 72 uint32 id = 2; 73 74 // A Report has one of the following types. 75 // Next standard report type ID: 22 76 enum ReportType { 77 reserved 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 19, 9999; 78 reserved "CUSTOM_RAW_DUMP", "EVENT_COMPONENT_OCCURRENCE_COUNT", "HIGH_FREQUENCY_STRING_COUNTS", 79 "INT_RANGE_HISTOGRAM", "NUMERIC_AGGREGATION", "NUMERIC_PERF_RAW_DUMP", 80 "PER_DEVICE_HISTOGRAM", "PER_DEVICE_NUMERIC_STATS", "SIMPLE_OCCURRENCE_COUNT", 81 "STRING_COUNTS_WITH_THRESHOLD", "STRING_HISTOGRAMS", "UNIQUE_N_DAY_ACTIVES"; 82 83 REPORT_TYPE_UNSET = 0; 84 85 // For each system_profile SP and each event_vector EV, produces the total 86 // count of all occurrences on all devices in the fleet with system profile 87 // SP of the event associated with EV over the course of the report day. 88 // For example, a report of this type might give the total number of times 89 // a medium, red widget was used across the fleet yesterday. 90 // 91 // Input metric types: OCCURRENCE 92 // 93 // Local aggregation: COUNT 94 // Local aggregation period: 1 hour 95 // Global aggregation: OCCURRENCE_COUNTS 96 // System Profile Selection Policy: REPORT_ALL 97 // 98 // Output report row type: OccurrenceCountReportRow 99 // (See report_row.proto) 100 // 101 // ReportDefinition fields particular to this type: 102 // none 103 FLEETWIDE_OCCURRENCE_COUNTS = 11; 104 105 // For each system_profile SP and each event_vector EV, produces the count 106 // of the number of unique devices with system profile SP for which EV 107 // “is accepted” during the aggregation period, which must be DAYS_1, 108 // DAYS_7, DAYS_28 or DAYS_30. 109 // 110 // There are different versions of what “is accepted” means depending on 111 // which local aggregation procedure is specified: 112 // 113 // AT_LEAST_ONCE. In this case EV is accepted if EV was logged at least once 114 // during the aggregation period. For example, a report of this type might 115 // give the total number of devices with system profile SP on which a 116 // medium, red widget was used at least once in the seven-day period 117 // ending yesterday. 118 // 119 // SELECT_FIRST, SELECT_MOST_COMMON. In this case EV is accepted if the 120 // category selection procedure selected EV. For example, a report of this 121 // type using SELECT_MOST_COMMON might give the total number of devices 122 // with system profile SP on which most of the widgets used during the 123 // seven-day period ending yesterday were medium-red. 124 // 125 // NOTE: Using a local aggregation procedure of AT_LEAST_ONCE or 126 // SELECT_FIRST, in combination with setting expedited_sending, results in 127 // the count being sent by the device when the event occurs (instead of at 128 // the end of the day). This can be desirable for having data for the 129 // current day appear faster in the reports output by Cobalt. 130 // 131 // Input metric types: OCCURRENCE 132 // 133 // Local aggregation: AT_LEAST_ONCE, SELECT_FIRST, or SELECT_MOST_COMMON 134 // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 135 // Global aggregation: OCCURRENCE_COUNTS 136 // 137 // Output report row type: OccurrenceCountReportRow 138 // (See report_row.proto) 139 // 140 // ReportDefinition fields particular to this type: 141 // - local_aggregation_procedure 142 // - local_aggregation_period 143 // - expedited_sending 144 // - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain 145 // uniqueness, REPORT_ALL may be useful in some cases) 146 UNIQUE_DEVICE_COUNTS = 12; 147 148 // For each system_profile SP and each event_vector EV, produces an 149 // int-range histogram such that in each int range bucket it gives the 150 // number of unique devices with system_profile SP for which an integer 151 // value, aggregated locally on device over the aggregation period, 152 // associated with EV, falls into the bucket. 153 // 154 // There are two versions of this depending on the metric type: 155 // 156 // With metrics of type OCCURRENCE the integer values are occurrence counts. 157 // For example, for the integer bucket 10-100, a report of this type might 158 // give the number of devices with system profile SP on which a medium, 159 // red widget was used between 10 and 100 times in the seven-day period 160 // ending yesterday. 161 // 162 // With metrics of type INTEGER the integer values are computed statistics. 163 // For example, for the integer bucket 10-100, a report of this type that 164 // specifies the MINIMUM local aggregation procedure might give the number 165 // of devices with system profile SP on which the minimum temperature of a 166 // medium red widget over the seven-day period ending yesterday was between 167 // 10 and 100 degrees. 168 // 169 // Input metric types: OCCURRENCE or INTEGER 170 // 171 // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or 172 // NUMERIC_STAT (used with INTEGER metrics) 173 // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 174 // Global aggregation: INTEGER_HISTOGRAMS 175 // 176 // Output report row type: IntegerHistogramReportRow 177 // (See report_row.proto) 178 // 179 // ReportDefinition fields particular to this type: 180 // - local_aggregation_procedure (only when the metric type is INTEGER) 181 // - local_aggregation_period 182 // - int_buckets (this is used only on the server for reports without 183 // added privacy, but is used on the client for reports with added 184 // privacy) 185 // - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain 186 // uniqueness, REPORT_ALL may be useful in some cases) 187 UNIQUE_DEVICE_HISTOGRAMS = 13; 188 189 // For each system_profile SP and each event_vector EV, produces an 190 // int-range histogram such that in each int range bucket it gives the 191 // number of values, associated with EV, from devices 192 // with system_profile SP, that fall into the bucket, where each device 193 // computes one such value per hour. 194 // 195 // Computationally this report type is identical to 196 // UNQIQUE_DEVICE_HISTOGRAMS except that the local aggregation period 197 // used is one hour and so the counts in each buckets are not interpreted 198 // as a number of unique devices. 199 // 200 // There are two versions of this depending on the metric type: 201 // 202 // With metrics of type OCCURRENCE the integer values are occurrence counts. 203 // For example, for the integer bucket 10-100, a report of this type might 204 // give the number of times that the hourly count of medium red widgets 205 // used was between 10 and 100 over devices with system profile SP, 206 // yesterday. 207 // 208 // With metrics of type INTEGER the integer values are computed statistics. 209 // For example, for the integer bucket 10-100, a report of this that 210 // specifies the MINIMUM local aggregation procedure might give the number 211 // of times that the minimum temperature over an hour of all medium red 212 // widgets used was between 10 and 100 degrees over all devices with 213 // system profile SP, yesterday. 214 // 215 // Input metric types: OCCURRENCE or INTEGER 216 // 217 // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or 218 // NUMERIC_STAT (used with INTEGER metrics) 219 // Local aggregation period: one hour 220 // Global aggregation: INTEGER_HISTOGRAMS 221 // 222 // Output report row type: IntegerHistogramReportRow 223 // (See report_row.proto) 224 // 225 // ReportDefinition fields particular to this type: 226 // - local_aggregation_procedure (only when the metric type is INTEGER) 227 // - int_buckets (this is used only on the server for reports without 228 // added privacy, but is used on the client for reports with added 229 // privacy) 230 // - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain 231 // uniqueness, REPORT_ALL may be useful in some cases) 232 HOURLY_VALUE_HISTOGRAMS = 14; 233 234 // For each system_profile SP and each event_vector EV, produces an 235 // int-range histogram such that in each int range bucket it gives the 236 // number of integer measurements, associated with EV, logged on devices 237 // with system_profile SP, that fall into the bucket. Here we are counting 238 // each value logged by the instrumented code individually and so the rate 239 // at which values are being recorded is arbitrary and varies from device 240 // to device. For example, for the integer bucket 10-100, a report of this 241 // type might give the number of times that a medium red widget's 242 // temperature was measured as being between 10 and 100 degrees over all 243 // devices with system profile SP, yesterday. The rate at which these 244 // widget temperature measurements are taken is arbitrary and may vary 245 // from device to device. 246 // 247 // Input metric types: INTEGER or INTEGER_HISTOGRAM 248 // 249 // Local aggregation: INTEGER_HISTOGRAM 250 // Local aggregation period: one hour 251 // Global aggregation: INTEGER_HISTOGRAMS 252 // System Profile Selection Policy: REPORT_ALL 253 // 254 // Output report row type: IntegerHistogramReportRow 255 // (See report_row.proto) 256 // 257 // ReportDefinition fields particular to this type: 258 // - int_buckets (Only with metric_type = INTEGER) 259 FLEETWIDE_HISTOGRAMS = 15; 260 261 // For each system_profile SP and each event_vector EV, produces the sum 262 // and count of many integer measurements associated with EV, logged on 263 // devices with system_profile SP. Here we are counting each value logged 264 // by the instrumented code individually and so the rate at which values are 265 // being recorded is arbitrary and varies from device to device. This allows 266 // us to produce a fleetwide mean. For example, a report of this type might 267 // give the mean of all temperature measurements of medium-red widgets 268 // yesterday, across all devices with system profile SP, regardless of how 269 // many temperature measurements were taken on each device individually. 270 // 271 // Input metric types: INTEGER 272 // 273 // Local aggregation: SUM_AND_COUNT 274 // Local aggregation period: one hour 275 // Global aggregation: SUM_AND_COUNTS 276 // System Profile Selection Policy: REPORT_ALL 277 // 278 // Output report row type: SumAndCountReportRow 279 // (See report_row.proto) 280 // 281 // ReportDefinition fields particular to this type: 282 // none 283 FLEETWIDE_MEANS = 16; 284 285 // For each system_profile SP and each event_vector EV, produces several 286 // numeric statistics (e.g. 95%-ile) over a set of integers associated 287 // with EV, collected from all devices with system_profile SP. Each unique 288 // device contributes a single value and so the distribution of the values 289 // may be thought of as a distribution of unique devices. 290 // 291 // There are different versions of this depending on the metric type: 292 // 293 // With metrics of type OCCURRENCE the integer values are occurrence counts 294 // over the course of the aggregation period. For example a report of this 295 // type might give the 95%-ile of the counts of medium-red widgets used by 296 // each device over the 7-day period ending yesterday. 297 // 298 // With metrics of type INTEGER the integer values are computed statistics. 299 // For example, a report of this type that specifies the MINIMUM local 300 // aggregation procedure might give the 95%-ile of the minimum temperature 301 // over the 7-day period ending yesterday of all medium-red widgets over 302 // all devices with system profile SP. 303 // 304 // Input metric types: OCCURRENCE or INTEGER 305 // 306 // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or 307 // NUMERIC_STAT (used with INTEGER metrics) 308 // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 309 // Global aggregation: NUMERIC_STATS 310 // System Profile Selection Policy: REPORT_ALL 311 // 312 // Output report row type: NumericStatsReportRow 313 // (See report_row.proto) 314 // 315 // ReportDefinition fields particular to this type: 316 // - local_aggregation_procedure (only when the metric type is INTEGER) 317 // - local_aggregation_period 318 UNIQUE_DEVICE_NUMERIC_STATS = 17; 319 320 // For each system_profile SP and each event_vector EV, produces several 321 // numeric statistics (e.g. 95%-ile) over a set of integers associated 322 // with EV, collected from all devices with system_profile SP. Each unique 323 // device contributes a value every hour and so the distribution of the 324 // values may NOT be thought of as a distribution of unique devices. 325 // 326 // Computationally this report type is identical to 327 // UNIQUE_DEVICE_NUMERIC_STATS except that the local aggregation period 328 // used is one hour. 329 // 330 // There are different versions of this depending on the metric type: 331 // 332 // With metrics of type OCCURRENCE the integer values are occurrence counts 333 // over the course of the hour. For example a report of this 334 // type might give the 95%-ile of the counts of medium-red widgets used in 335 // any one hour period on any device with System profile SP, yesterday. 336 // 337 // With metrics of type INTEGER the integer values are computed statistics. 338 // For example, a report of this type that specifies the MINIMUM local 339 // aggregation procedure might give the 95%-ile of the minimum temperature 340 // over any one-hour period of medium-red widgets use on any device 341 // with system profile SP, yesterday. 342 // 343 // Input metric types: OCCURRENCE or INTEGER 344 // 345 // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or 346 // NUMERIC_STAT (used with INTEGER metrics) 347 // Local aggregation period: 1 hour 348 // Global aggregation: NUMERIC_STATS 349 // System Profile Selection Policy: REPORT_ALL 350 // 351 // Output report row type: NumericStatsReportRow 352 // (See report_row.proto) 353 // 354 // ReportDefinition fields particular to this type: 355 // - local_aggregation_procedure (only when the metric type is INTEGER) 356 HOURLY_VALUE_NUMERIC_STATS = 18; 357 358 // For each system_profile SP and each event_vector EV, produces the total 359 // count of all occurrences of a string value on all devices in the fleet 360 // with system profile SP of the event associated with EV over the course 361 // of the report day. 362 // 363 // Input metric types: STRING 364 // 365 // Local aggregation: STRING_HISTOGRAM 366 // Local aggregation period: 1 hour 367 // Global aggregation: STRING_HISTOGRAMS 368 // System Profile Selection Policy: REPORT_ALL 369 // 370 // Output report row type: StringCountReportRow 371 // (See report_row.proto) 372 // 373 // ReportDefinition fields particular to this type: 374 // - string_buffer_max 375 STRING_COUNTS = 20; 376 377 // For each system_profile SP, each event_vector EV, and each string value 378 // produces the count of the number of unique devices with system profile 379 // SP on which the string value was logged in connection with the EV during 380 // the aggregation period, which must be DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 381 // 382 // This is similar to the AT_LEAST_ONCE local aggregation procedure for 383 // UNIQUE_DEVICE_COUNTS. For example, a report of this type might 384 // give the total number of devices with system profile SP on which a 385 // medium, red widget was used in conjunction with the component name 386 // "widget-consumer" at least once in the seven-day period ending 387 // yesterday. 388 // 389 // Input metric types: STRING 390 // 391 // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 392 // Global aggregation: STRING_HISTOGRAMS 393 // 394 // Output report row type: StringCountReportRow 395 // (See report_row.proto) 396 // 397 // ReportDefinition fields particular to this type: 398 // - local_aggregation_period 399 // - string_buffer_max 400 // - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain 401 // uniqueness, REPORT_ALL may be useful in some cases) 402 UNIQUE_DEVICE_STRING_COUNTS = 21; 403 } 404 ReportType report_type = 3; 405 406 //////////////// Fields for reports with privacy enabled ///////////////// 407 408 // The level of differential privacy applied to the report. Each level 409 // corresponds to an epsilon value in the shuffled model. The mapping 410 // from enum values to epsilon values is hard-coded in makePrivacyConstants() 411 // in the file //src/bin/config_parser/src/privacy/privacy_encoding_params.go 412 enum PrivacyLevel { 413 PRIVACY_LEVEL_UNKNOWN = 0; 414 415 // epsilon = infinity 416 NO_ADDED_PRIVACY = 1; 417 418 LOW_PRIVACY = 2; 419 420 MEDIUM_PRIVACY = 3; 421 422 HIGH_PRIVACY = 4; 423 } 424 425 // This field is used to specify the privacy level for a Cobalt report. 426 // All Cobalt report types support differential privacy and are required 427 // to set this field (use NO_ADDED_PRIVACY to disable differential privacy). 428 PrivacyLevel privacy_level = 20; 429 430 // The mean number of observations added per index point when performing the 431 // Poisson mechanism encoding for Cobalt reports. Should be set if and only if 432 // `privacy_level` is not NO_ADDED_PRIVACY. 433 // 434 // In the future, the value of this field will be computed by the registry 435 // parser as a function of other privacy-related fields and an estimate of the 436 // user population size. For now, it should be set manually in the Cobalt 437 // registry in consultation with the Cobalt team. 438 // 439 // TODO(b/295053509): update this comment once the field is populated by 440 // the registry parser. 441 double poisson_mean = 30; 442 443 // When reporting numerical values with privacy, the values are mapped to 444 // indices from 0 to num_index_points-1 with a randomized rounding method. 445 // 446 // In the future, the value of this field will be computed by the registry 447 // parser as a function of other privacy-related fields and an estimate of the 448 // user population size. For now, it should be set manually in the Cobalt 449 // registry in consultation with the Cobalt team. 450 // 451 // TODO(b/278932979): update this comment once the field is populated by 452 // the registry parser. 453 uint32 num_index_points = 22; 454 455 // When reporting strings with privacy, the strings are counted using a linear 456 // sketch. 457 // 458 // In the future, the value of this field will be computed by the registry 459 // parser as a function of other privacy-related fields and an estimate of the 460 // user population size. For now, it should be set manually in the Cobalt 461 // registry in consultation with the Cobalt team. 462 // 463 // TODO(b/278932979): update this comment once the field is populated by 464 // the registry parser. 465 StringSketchParameters string_sketch_params = 27; 466 467 // These fields specify the range of values that can be reported by a device 468 // in the specified local_aggregation_period. If the true value to be reported 469 // falls outside specified range, the value is clipped. 470 // 471 // For FLEETWIDE_OCCURRENCE_COUNTS, UNIQUE_DEVICE_NUMERIC_STATS and 472 // HOURLY_VALUE_NUMERIC_STATS, the range applies to the total numerical value 473 // computed for the device over the aggregation period specified in the 474 // report. 475 // 476 // For FLEETWIDE_MEANS, the range applies to the per-device sum of the value 477 // to be averaged over one hour. (For FLEETWIDE_MEANS, the `max_count` field 478 // is also required in order to bound the `count` value.) 479 // 480 // If a privacy_level other than NO_ADDED_PRIVACY is specified, this field is 481 // required for reports of type: 482 // * FLEETWIDE_OCCURRENCE_COUNTS 483 // * UNIQUE_DEVICE_NUMERIC_STATS 484 // * HOURLY_VALUE_NUMERIC_STATS 485 // * FLEETWIDE_MEANS 486 int64 min_value = 23; 487 int64 max_value = 24; 488 489 // This field specifies the maximum count to be reported by a device in the 490 // specified local_aggregation_period. If the true count is greater than 491 // max_count, then the count will be reported as max_count. 492 // 493 // For FLEETWIDE_HISTOGRAMS, the bound applies to the count for each 494 // individual histogram bucket over the aggregation period of one hour. For 495 // STRING_COUNTS, it applies to the count for each string over one hour. 496 // 497 // For FLEETWIDE_MEANS, the bound applies to the per-device count of the 498 // values to be averaged over one hour. 499 // 500 // If a privacy_level other than NO_ADDED_PRIVACY is specified, this field is 501 // required for reports of type: 502 // * FLEETWIDE_HISTOGRAMS 503 // * FLEETWIDE_MEANS 504 // * STRING_COUNTS 505 uint64 max_count = 25; 506 507 //////////////// Fields specific to some report types ///////////////// 508 509 // A specification of integer-range buckets for a histogram. 510 // 511 // This field is for reports of type UNIQUE_DEVICE_HISTOGRAMS, 512 // HOURLY_VALUE_HISTOGRAMS, and FLEETWIDE_HISTOGRAMS -- but for 513 // FLEETWIDE_HISTOGRAMS only with metrics of type INTEGER, not with metrics of 514 // type INTEGER_HISTOGRAM, because in that case the MetricDefinition already 515 // contains an instance of IntegerBuckets. 516 IntegerBuckets int_buckets = 10; 517 518 // The interval with which clients will generate and upload observations. 519 enum ReportingInterval { 520 REPORTING_INTERVAL_UNSET = 0; 521 HOURS_1 = 1; 522 DAYS_1 = 2; 523 } 524 525 // This field is optional for FLEETWIDE_OCCURRENCE_COUNTS reports, and is only 526 // supported by some client platforms. If not set, the reporting interval 527 // defaults to 1 hour for FLEETWIDE_OCCURRENCE_COUNTS reports. 528 ReportingInterval reporting_interval = 32; 529 530 // This field can be used with all Report types. When set, the generated 531 // report will exclude an Observation if there are not at least 532 // |reporting_threshold| number of distinct devices reporting Observations 533 // with the same ObservationMetadata. 534 uint32 reporting_threshold = 13; 535 536 // The on-device function computed on the metric during the aggregation 537 // window. 538 enum LocalAggregationProcedure { 539 LOCAL_AGGREGATION_PROCEDURE_UNSET = 0; 540 541 // Numerical statistic aggregation procedures to be used with reports 542 // of type UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS, 543 // UNIQUE_DEVICE_NUMERIC_STATS and HOURLY_VALUE_NUMERIC_STATS. 544 // TODO(fxbug.dev/87151): Rename these to remove the '_PROCEDURE' suffix. 545 SUM_PROCEDURE = 1; 546 MIN_PROCEDURE = 2; 547 MAX_PROCEDURE = 3; 548 MEAN = 4; 549 MEDIAN = 5; 550 // The value of N is set in the field 551 // |local_aggregation_procedure_percentile_n|. 552 PERCENTILE_N = 6; 553 554 // Logical aggregation procedures to be used with reports of type 555 // UNIQUE_DEVICE_COUNTS 556 AT_LEAST_ONCE = 7; 557 SELECT_FIRST = 8; 558 SELECT_MOST_COMMON = 9; 559 } 560 561 // This field is required for reports of type 562 // UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS, 563 // UNIQUE_DEVICE_NUMERIC_STATS, HOURLY_VALUE_NUMERIC_STATS 564 // and UNIQUE_DEVICE_COUNTS. Different report types support 565 // different values of this field. See the comments on the 566 // enum values in LocalAggregationProcedure. 567 LocalAggregationProcedure local_aggregation_procedure = 17; 568 569 // This field is required when 570 // local_aggregation_procedure = LOCAL_AGGREGATION_PROCEDURE_PERCENTILE_N. 571 // In this case it gives the value of N to use. Otherwise this field is 572 // ignored. 573 uint32 local_aggregation_procedure_percentile_n = 18; 574 575 // Time window over which the metric is aggregated. The local aggregation 576 // period is specified for UNIQUE_DEVICE_* report types. 577 WindowSize local_aggregation_period = 19; 578 579 // The maximum number of distinct event vectors for which an instance of the Cobalt 580 // client should produce an observation, for a given local aggregation period. Event 581 // vectors are prioritized in order of first arrival during the aggregation period. 582 // 583 // For example, if a report has an event_vector_buffer_max of 10, and 12 distinct event 584 // vectors are logged for this metric over an aggregation period, then Cobalt will send 585 // observations of the first 10 event vectors for that aggregation period and drop the 586 // last 2. 587 // 588 // If this field is unset, the registry parser assigns to it the total number of event 589 // vectors for the report's parent metric (i.e., the product over all metric dimensions 590 // of the number of event codes per dimension). 591 // 592 // The report's project will be charged against a resource budget for this value 593 // so project owners are encouraged to set this as small as possible. For example, 594 // the report's parent metric may include a dimension with thousands of event codes, 595 // but it is expected that any one device will log only a few distinct event vectors 596 // per day. In that case we may set event_vector_buffer_max to a relatively small number, 597 // say 20. For reports which use differential privacy, setting event_vector_buffer_max 598 // to a smaller number will improve the signal for event vectors which are included in 599 // observations. 600 uint64 event_vector_buffer_max = 26; 601 602 // The maximum number of distinct strings that Cobalt must keep in its in-memory buffer 603 // on any single device. During local aggregation for reports of type STRING_COUNTS and 604 // UNIQUE_DEVICE_STRING_COUNTS, Cobalt will keep track of this many distinct strings per 605 // aggregation period. The report's project will be charged against a resource budget for this 606 // value so project owners are encouraged to set this as small as possible. A STRING metric 607 // includes a file of candidate strings that may contain many thousands of strings. But it is 608 // expected that any one device will log only a few of these strings per day. We may set 609 // string_buffer_max to a relatively small number, say 20. 610 // 611 // This is a required field for reports of type STRING_COUNTS and UNIQUE_DEVICE_STRING_COUNTS. 612 uint32 string_buffer_max = 28; 613 614 // For reports of type UNIQUE_DEVICE_COUNTS, send observations as soon as the 615 // event occurs, instead of waiting for the end of the day. 616 // 617 // This can only be enabled when using a local aggregation procedure of 618 // AT_LEAST_ONCE or SELECT_FIRST, and when the privacy level is 619 // NO_ADDED_PRIVACY. When used with a system_profile_selection of REPORT_ALL 620 // or SELECT_FIRST, enabling this is recommended as Cobalt will send the count 621 // for the current day when the event occurs instead of at the end of the day. 622 // For a system_profile_selection of SELECT_LAST, this may also be desirable, 623 // though it may result in a slight change in the current day's system profile 624 // that is used, as Cobalt won't wait until the end of the day to determine 625 // the final system profile, but will instead send the count immediately with 626 // the system profile that is currently active on the device. 627 bool expedited_sending = 29; 628 629 /////////////////// Fields used by all report types /////////////////// 630 // Next id: 109 631 632 // The list of SystemProfileFields to include in each row of the report. 633 // Optional. 634 repeated SystemProfileField system_profile_field = 100; 635 636 // The list of Experiments to include in each row of the report. 637 // 638 // Each report row lists the intersection of the experiment ids active on the device and 639 // experiment ids specified in this field. 640 // 641 // The specified experiment ids must be found in one of the project's experiments_namespaces. 642 repeated int64 experiment_id = 104; 643 644 // This field is required for reports of type UNIQUE_DEVICE_COUNTS, 645 // UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and 646 // HOURLY_VALUE_HISTOGRAMS. The value for these reports must be SELECT_LAST, 647 // SELECT_FIRST, or occasionally REPORT_ALL. 648 // 649 // If the system profile value changed during the aggregation window specified 650 // for this report, system_profile_selection specifies which system profile to 651 // report for each device. 652 SystemProfileSelectionPolicy system_profile_selection = 103; 653 654 // Maximum ReleaseStage for which this Report is allowed to be collected. 655 ReleaseStage max_release_stage = 105; 656 657 // Report can be collected even if the user/device has not consented. 658 // This field can only be set to true on reports that use privacy mechanisms 659 // that include differential privacy (i.e. not DE_IDENTIFICATION). The use of 660 // this field is for collecting anonymized data that is allowed even when 661 // the consent is not given. These use cases need to be specially approved 662 // by privacy reviewers. 663 bool exempt_from_consent = 108; 664 665 // New Privacy API 666 667 // This enum identifies what privacy protection is applied to the report. 668 enum PrivacyMechanism { 669 PRIVACY_MECHANISM_UNSPECIFIED = 0; 670 // If you specify this value the data will be de-identified without 671 // additional privacy protections. 672 DE_IDENTIFICATION = 1; 673 // If you specify this value the data will be protected with Shuffled 674 // Differential Privacy guarantees (e.g., the noise wll be added on the 675 // devices) 676 SHUFFLED_DIFFERENTIAL_PRIVACY = 2; 677 } 678 679 // This field identifies what privacy protection is applied to the report. 680 // It will eventually be required once migration from privacy_level 681 // is complete. 682 PrivacyMechanism privacy_mechanism = 106; 683 684 // The object for grouping all parameters needed for SHUFFLED DP mode. 685 message ShuffledDifferentialPrivacyConfig { 686 // This field represents an upper bound on the amount of information which 687 // can be learned about a device from a report including that device. 688 // Lower values correspond to higher privacy. 689 // Epsilon must be > 0. 690 double epsilon = 1; 691 // This field represents the risk of the epsilon guarantee not holding. This 692 // is usually set as 1 over the expected number of participating devices. 693 // Delta must be > 0 and < 1. 694 double delta = 2; 695 // The generated report will exclude an Observation if there are not at 696 // least |reporting_threshold| number of distinct devices reporting 697 // Observations with the same ObservationMetadata. 698 uint32 reporting_threshold = 3; 699 700 // The mean number of observations added per index point when performing the 701 // Poisson mechanism encoding for Cobalt reports. Required. 702 // 703 // In the future, the value of this field will be computed by the registry 704 // parser as a function of other fields in this 705 // ShuffledDifferentialPrivacyConfig. For now, it should be set manually in 706 // the Cobalt registry in consultation with the Cobalt team. 707 // 708 // TODO(b/295053509): update this comment once the field is auto populated by 709 // the registry parser. 710 double poisson_mean = 4; 711 } 712 713 // If privacy_mechanism is SHUFFLED_DIFFERENTIAL_PRIVACY then privacy_config 714 // must contain valid ShuffledDifferentialPrivacyConfig otherwise empty. 715 oneof privacy_config { 716 ShuffledDifferentialPrivacyConfig shuffled_dp = 107; 717 } 718} 719 720// A specification for SystemProfile selection policy. 721enum SystemProfileSelectionPolicy { 722 // Use the default value. For reports of type FLEETWIDE_OCCURRENCE_COUNTS, 723 // FLEETWIDE_HISTOGRAMS, FLEETWIDE_MEANS, UNIQUE_DEVICE_NUMERIC_STATS, 724 // HOURLY_VALUE_NUMERIC_STATS, and STRING_COUNTS this will resolve to 725 // 'REPORT_ALL' and should not be changed. For all other report types, 726 // SELECT_DEFAULT must not be used. 727 SELECT_DEFAULT = 0; 728 729 // Always report the last SystemProfile seen in the aggregation window. This 730 // will be the last SystemProfile seen *at the time of an event* in the 731 // aggregation window. 732 SELECT_LAST = 1; 733 734 // Always report the first SystemProfile seen in the aggregation window. This 735 // will be the first SystemProfile seen *at the time of an event* in the 736 // aggregation window. 737 SELECT_FIRST = 2; 738 739 // Report all system profiles in the aggregation window. For most report 740 // types, this is the most sensible value to use. For reports that depend on 741 // some concept of uniqueness (such as UNIQUE_DEVICE_COUNTS, 742 // UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and 743 // HOURLY_VALUE_HISTOGRAMS) this may not be the best choice, since it will no 744 // longer be the case that a single device will only upload one observation 745 // per time period (It will upload one observation per time period *per unique 746 // system_profile*). 747 REPORT_ALL = 3; 748} 749 750// A specification of a field from SystemProfile. These are used in a 751// ReportDefinition to specify which fields should be included in the generated 752// Observations and reports. 753// 754// For a description of the meaning of each field, see the fields in the 755// SystemProfile in: cobalt/proto/common.proto 756enum SystemProfileField { 757 OS = 0; 758 ARCH = 1; 759 BOARD_NAME = 2; 760 PRODUCT_NAME = 3; 761 SYSTEM_VERSION = 4; 762 APP_VERSION = 10; 763 CHANNEL = 5; 764 BUILD_TYPE = 7; 765 EXPERIMENT_IDS = 9; 766 reserved 6, 8; 767 reserved "REALM", "EXPERIMENT_TOKENS"; 768} 769 770// Stages in the release cycle of a component. Each Cobalt customer determines 771// its current ReleaseStage when initializing the CobaltService. Each Metric 772// and Report can declare the maximum ReleaseStage for which it is allowed to 773// be collected. For example a DEBUG Metric will not be collected from a device 774// running a FISHFOOD release. 775enum ReleaseStage { 776 RELEASE_STAGE_NOT_SET = 0; 777 778 // A test build. Also called "eng". Only use this value when the device is 779 // running test builds as all metrics/reports will be collected. 780 DEBUG = 10; 781 // Small, internal prototype. Used for testing a new feature internally, 782 // usually within the team or a small group. 783 FISHFOOD = 20; 784 // An internal release for testing with internal users. 785 DOGFOOD = 40; 786 // An open beta, for testing with internal and external users. 787 OPEN_BETA = 60; 788 789 // Generally-available. The final stage of a release. Also called 790 // "production". If unsure of which release stage the device is running, it 791 // is safest to fallback to this value (which is the default if no value is 792 // set), to avoid inadvertently collecting metric/report data. 793 GA = 99; 794} 795 796// ExponentialIntegerBuckets is used to define a partition of the integers into 797// a finite number of exponentially increasing buckets. 798// 799// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1. 800// 801// The bucket boundaries are: 802// a[0] = floor 803// a[1] = floor + initial_step 804// a[2] = floor + initial_step * step_multiplier 805// a[3] = floor + initial_step * step_multiplier ^ 2 806// a[4] = floor + initial_step * step_multiplier ^ 3 807// and in general, for i = 1, 2, 3 ... n 808// a[i] = floor + initial_step * step_multiplier ^ (i-1) 809// 810// Then, the buckets are defined as follows: 811// Bucket 0 is the underflow bucket: (-infinity, floor) 812// Bucket i for 0 < i < n+1: [a[i-1], a[i]) 813// Bucket n+1 is the overflow bucket: [a[n], +infinity) 814// 815// Examples: 816// floor = 0 817// num_buckets = 3 818// initial_step = 10 819// step_multiplier = 10 820// Then, the buckets are: 821// (-infinity, 0), [0, 10), [10, 100), [100, 1000), [1000, +infinity) 822// 823// floor = 0 824// num_buckets = 3 825// initial_step = 2 826// step_multiplier = 2 827// Then, the buckets are: 828// (-infinity, 0), [0, 2), [2, 4), [4, 8), [8, +infinity) 829// 830// floor = 10 831// num_buckets = 3 832// initial_step = 2 833// step_multiplier = 2 834// Then, the buckets are: 835// (-infinity, 10), [10, 12), [12, 14), [14, 18), [18, +infinity) 836// 837// floor = 0 838// num_buckets = 3 839// initial_step = 100 840// step_multiplier = 10 841// Then, the buckets are: 842// (-infinity, 0), [0, 100), [100, 1000), [1000, 10000), [10000, +infinity) 843// 844message ExponentialIntegerBuckets { 845 int64 floor = 1; 846 847 // num_buckets must be at least 1. 848 uint32 num_buckets = 2; 849 850 // Must be at least one. 851 uint32 initial_step = 3; 852 853 // Must be at least one. 854 uint32 step_multiplier = 4; 855} 856 857// LinearIntegerBuckets is used to define a partition of the integers into a 858// finite number of buckets of equal size. 859// 860// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1. 861// Bucket 0 is the underflow bucket: (-infinity, floor) 862// Bucket n+1 is the overflow bucket: [lower + step_size * n, +infinity) 863// 864// For i = 1 to n, the bucket i is defined as 865// [floor + step_size * (i-1), floor + step_size * i) 866// 867// Example: floor = 0, num_buckets = 3, step_size = 10. 868// (-infinity, 0), [0, 10), [10, 20), [20, 30), [30, +inifinity) 869message LinearIntegerBuckets { 870 int64 floor = 1; 871 872 // Must be at least one. 873 uint32 num_buckets = 2; 874 875 // Must be at least one. 876 uint32 step_size = 3; 877} 878 879message IntegerBuckets { 880 oneof buckets { 881 ExponentialIntegerBuckets exponential = 1; 882 LinearIntegerBuckets linear = 2; 883 } 884 885 // If set to true, empty buckets will not be added to the report data such 886 // that all histograms contain a row for every bucket. Buckets with a zero 887 // count may still occur if data is logged that contains a zero count. This 888 // field can not be set on reports with added privacy. 889 bool sparse_output = 3; 890} 891 892message StringSketchParameters { 893 // Number of hashes in Count-Min Sketch. 894 int32 num_hashes = 1; 895 896 // Number of cells per hash in Count-Min Sketch. 897 int32 num_cells_per_hash = 2; 898} 899