1// Copyright 2021 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package compliance 16 17import ( 18 "crypto/md5" 19 "fmt" 20 "io" 21 "io/fs" 22 "net/url" 23 "path/filepath" 24 "regexp" 25 "sort" 26 "strings" 27 28 "android/soong/tools/compliance/projectmetadata" 29) 30 31var ( 32 licensesPathRegexp = regexp.MustCompile(`licen[cs]es?/`) 33) 34 35// NoticeIndex transforms license metadata into license text hashes, library 36// names, and install paths indexing them for fast lookup/iteration. 37type NoticeIndex struct { 38 // lg identifies the license graph to which the index applies. 39 lg *LicenseGraph 40 // pmix indexes project metadata 41 pmix *projectmetadata.Index 42 // rs identifies the set of resolutions upon which the index is based. 43 rs ResolutionSet 44 // shipped identifies the set of target nodes shipped directly or as derivative works. 45 shipped TargetNodeSet 46 // rootFS locates the root of the file system from which to read the files. 47 rootFS fs.FS 48 // hash maps license text filenames to content hashes 49 hash map[string]hash 50 // text maps content hashes to content 51 text map[hash][]byte 52 // hashLibInstall maps hashes to libraries to install paths. 53 hashLibInstall map[hash]map[string]map[string]struct{} 54 // installHashLib maps install paths to libraries to hashes. 55 installHashLib map[string]map[hash]map[string]struct{} 56 // libHash maps libraries to hashes. 57 libHash map[string]map[hash]struct{} 58 // targetHash maps target nodes to hashes. 59 targetHashes map[*TargetNode]map[hash]struct{} 60 // projectName maps project directory names to project name text. 61 projectName map[string]string 62 // files lists all the files accessed during indexing 63 files []string 64} 65 66// IndexLicenseTexts creates a hashed index of license texts for `lg` and `rs` 67// using the files rooted at `rootFS`. 68func IndexLicenseTexts(rootFS fs.FS, lg *LicenseGraph, rs ResolutionSet) (*NoticeIndex, error) { 69 if rs == nil { 70 rs = ResolveNotices(lg) 71 } 72 ni := &NoticeIndex{ 73 lg: lg, 74 pmix: projectmetadata.NewIndex(rootFS), 75 rs: rs, 76 shipped: ShippedNodes(lg), 77 rootFS: rootFS, 78 hash: make(map[string]hash), 79 text: make(map[hash][]byte), 80 hashLibInstall: make(map[hash]map[string]map[string]struct{}), 81 installHashLib: make(map[string]map[hash]map[string]struct{}), 82 libHash: make(map[string]map[hash]struct{}), 83 targetHashes: make(map[*TargetNode]map[hash]struct{}), 84 projectName: make(map[string]string), 85 } 86 87 // index adds all license texts for `tn` to the index. 88 index := func(tn *TargetNode) (map[hash]struct{}, error) { 89 if hashes, ok := ni.targetHashes[tn]; ok { 90 return hashes, nil 91 } 92 hashes := make(map[hash]struct{}) 93 for _, text := range tn.LicenseTexts() { 94 fname := strings.SplitN(text, ":", 2)[0] 95 if _, ok := ni.hash[fname]; !ok { 96 err := ni.addText(fname) 97 if err != nil { 98 return nil, err 99 } 100 } 101 hash := ni.hash[fname] 102 if _, ok := hashes[hash]; !ok { 103 hashes[hash] = struct{}{} 104 } 105 } 106 ni.targetHashes[tn] = hashes 107 return hashes, nil 108 } 109 110 link := func(tn *TargetNode, hashes map[hash]struct{}, installPaths []string) error { 111 for h := range hashes { 112 libName, err := ni.getLibName(tn, h) 113 if err != nil { 114 return err 115 } 116 if _, ok := ni.libHash[libName]; !ok { 117 ni.libHash[libName] = make(map[hash]struct{}) 118 } 119 if _, ok := ni.hashLibInstall[h]; !ok { 120 ni.hashLibInstall[h] = make(map[string]map[string]struct{}) 121 } 122 if _, ok := ni.libHash[libName][h]; !ok { 123 ni.libHash[libName][h] = struct{}{} 124 } 125 for _, installPath := range installPaths { 126 if _, ok := ni.installHashLib[installPath]; !ok { 127 ni.installHashLib[installPath] = make(map[hash]map[string]struct{}) 128 ni.installHashLib[installPath][h] = make(map[string]struct{}) 129 ni.installHashLib[installPath][h][libName] = struct{}{} 130 } else if _, ok = ni.installHashLib[installPath][h]; !ok { 131 ni.installHashLib[installPath][h] = make(map[string]struct{}) 132 ni.installHashLib[installPath][h][libName] = struct{}{} 133 } else if _, ok = ni.installHashLib[installPath][h][libName]; !ok { 134 ni.installHashLib[installPath][h][libName] = struct{}{} 135 } 136 if _, ok := ni.hashLibInstall[h]; !ok { 137 ni.hashLibInstall[h] = make(map[string]map[string]struct{}) 138 ni.hashLibInstall[h][libName] = make(map[string]struct{}) 139 ni.hashLibInstall[h][libName][installPath] = struct{}{} 140 } else if _, ok = ni.hashLibInstall[h][libName]; !ok { 141 ni.hashLibInstall[h][libName] = make(map[string]struct{}) 142 ni.hashLibInstall[h][libName][installPath] = struct{}{} 143 } else if _, ok = ni.hashLibInstall[h][libName][installPath]; !ok { 144 ni.hashLibInstall[h][libName][installPath] = struct{}{} 145 } 146 } 147 } 148 return nil 149 } 150 151 cacheMetadata := func(tn *TargetNode) { 152 ni.pmix.MetadataForProjects(tn.Projects()...) 153 } 154 155 // returns error from walk below. 156 var err error 157 158 WalkTopDown(NoEdgeContext{}, lg, func(lg *LicenseGraph, tn *TargetNode, path TargetEdgePath) bool { 159 if err != nil { 160 return false 161 } 162 if !ni.shipped.Contains(tn) { 163 return false 164 } 165 go cacheMetadata(tn) 166 installPaths := getInstallPaths(tn, path) 167 var hashes map[hash]struct{} 168 hashes, err = index(tn) 169 if err != nil { 170 return false 171 } 172 err = link(tn, hashes, installPaths) 173 if err != nil { 174 return false 175 } 176 if tn.IsContainer() { 177 return true 178 } 179 180 for _, r := range rs.Resolutions(tn) { 181 hashes, err = index(r.actsOn) 182 if err != nil { 183 return false 184 } 185 err = link(r.actsOn, hashes, installPaths) 186 if err != nil { 187 return false 188 } 189 } 190 return false 191 }) 192 193 if err != nil { 194 return nil, err 195 } 196 197 return ni, nil 198} 199 200// Hashes returns an ordered channel of the hashed license texts. 201func (ni *NoticeIndex) Hashes() chan hash { 202 c := make(chan hash) 203 go func() { 204 libs := make([]string, 0, len(ni.libHash)) 205 for libName := range ni.libHash { 206 libs = append(libs, libName) 207 } 208 sort.Strings(libs) 209 hashes := make(map[hash]struct{}) 210 for _, libName := range libs { 211 hl := make([]hash, 0, len(ni.libHash[libName])) 212 for h := range ni.libHash[libName] { 213 if _, ok := hashes[h]; ok { 214 continue 215 } 216 hashes[h] = struct{}{} 217 hl = append(hl, h) 218 } 219 if len(hl) > 0 { 220 sort.Sort(hashList{ni, libName, "", &hl}) 221 for _, h := range hl { 222 c <- h 223 } 224 } 225 } 226 close(c) 227 }() 228 return c 229 230} 231 232// InputFiles returns the complete list of files read during indexing. 233func (ni *NoticeIndex) InputFiles() []string { 234 projectMeta := ni.pmix.AllMetadataFiles() 235 files := make([]string, 0, len(ni.files) + len(ni.lg.targets) + len(projectMeta)) 236 files = append(files, ni.files...) 237 for f := range ni.lg.targets { 238 files = append(files, f) 239 } 240 files = append(files, projectMeta...) 241 return files 242} 243 244// HashLibs returns the ordered array of library names using the license text 245// hashed as `h`. 246func (ni *NoticeIndex) HashLibs(h hash) []string { 247 libs := make([]string, 0, len(ni.hashLibInstall[h])) 248 for libName := range ni.hashLibInstall[h] { 249 libs = append(libs, libName) 250 } 251 sort.Strings(libs) 252 return libs 253} 254 255// HashLibInstalls returns the ordered array of install paths referencing 256// library `libName` using the license text hashed as `h`. 257func (ni *NoticeIndex) HashLibInstalls(h hash, libName string) []string { 258 installs := make([]string, 0, len(ni.hashLibInstall[h][libName])) 259 for installPath := range ni.hashLibInstall[h][libName] { 260 installs = append(installs, installPath) 261 } 262 sort.Strings(installs) 263 return installs 264} 265 266// InstallPaths returns the ordered channel of indexed install paths. 267func (ni *NoticeIndex) InstallPaths() chan string { 268 c := make(chan string) 269 go func() { 270 paths := make([]string, 0, len(ni.installHashLib)) 271 for path := range ni.installHashLib { 272 paths = append(paths, path) 273 } 274 sort.Strings(paths) 275 for _, installPath := range paths { 276 c <- installPath 277 } 278 close(c) 279 }() 280 return c 281} 282 283// InstallHashes returns the ordered array of hashes attached to `installPath`. 284func (ni *NoticeIndex) InstallHashes(installPath string) []hash { 285 result := make([]hash, 0, len(ni.installHashLib[installPath])) 286 for h := range ni.installHashLib[installPath] { 287 result = append(result, h) 288 } 289 if len(result) > 0 { 290 sort.Sort(hashList{ni, "", installPath, &result}) 291 } 292 return result 293} 294 295// InstallHashLibs returns the ordered array of library names attached to 296// `installPath` as hash `h`. 297func (ni *NoticeIndex) InstallHashLibs(installPath string, h hash) []string { 298 result := make([]string, 0, len(ni.installHashLib[installPath][h])) 299 for libName := range ni.installHashLib[installPath][h] { 300 result = append(result, libName) 301 } 302 sort.Strings(result) 303 return result 304} 305 306// Libraries returns the ordered channel of indexed library names. 307func (ni *NoticeIndex) Libraries() chan string { 308 c := make(chan string) 309 go func() { 310 libs := make([]string, 0, len(ni.libHash)) 311 for lib := range ni.libHash { 312 libs = append(libs, lib) 313 } 314 sort.Strings(libs) 315 for _, lib := range libs { 316 c <- lib 317 } 318 close(c) 319 }() 320 return c 321} 322 323// HashText returns the file content of the license text hashed as `h`. 324func (ni *NoticeIndex) HashText(h hash) []byte { 325 return ni.text[h] 326} 327 328// getLibName returns the name of the library associated with `noticeFor`. 329func (ni *NoticeIndex) getLibName(noticeFor *TargetNode, h hash) (string, error) { 330 for _, text := range noticeFor.LicenseTexts() { 331 if !strings.Contains(text, ":") { 332 if ni.hash[text].key != h.key { 333 continue 334 } 335 ln, err := ni.checkMetadataForLicenseText(noticeFor, text) 336 if err != nil { 337 return "", err 338 } 339 if len(ln) > 0 { 340 return ln, nil 341 } 342 continue 343 } 344 345 fields := strings.SplitN(text, ":", 2) 346 fname, pname := fields[0], fields[1] 347 if ni.hash[fname].key != h.key { 348 continue 349 } 350 351 ln, err := url.QueryUnescape(pname) 352 if err != nil { 353 continue 354 } 355 return ln, nil 356 } 357 // use name from METADATA if available 358 ln, err := ni.checkMetadata(noticeFor) 359 if err != nil { 360 return "", err 361 } 362 if len(ln) > 0 { 363 return ln, nil 364 } 365 // use package_name: from license{} module if available 366 pn := noticeFor.PackageName() 367 if len(pn) > 0 { 368 return pn, nil 369 } 370 for _, p := range noticeFor.Projects() { 371 if strings.HasPrefix(p, "prebuilts/") { 372 for _, licenseText := range noticeFor.LicenseTexts() { 373 if !strings.HasPrefix(licenseText, "prebuilts/") { 374 continue 375 } 376 if !strings.Contains(licenseText, ":") { 377 if ni.hash[licenseText].key != h.key { 378 continue 379 } 380 } else { 381 fields := strings.SplitN(licenseText, ":", 2) 382 fname := fields[0] 383 if ni.hash[fname].key != h.key { 384 continue 385 } 386 } 387 for _, safePrebuiltPrefix := range safePrebuiltPrefixes { 388 match := safePrebuiltPrefix.re.FindString(licenseText) 389 if len(match) == 0 { 390 continue 391 } 392 if safePrebuiltPrefix.strip { 393 // strip entire prefix 394 match = licenseText[len(match):] 395 } else { 396 // strip from prebuilts/ until safe prefix 397 match = licenseText[len(match)-len(safePrebuiltPrefix.prefix):] 398 } 399 // remove LICENSE or NOTICE or other filename 400 li := strings.LastIndex(match, "/") 401 if li > 0 { 402 match = match[:li] 403 } 404 // remove *licenses/ path segment and subdirectory if in path 405 if offsets := licensesPathRegexp.FindAllStringIndex(match, -1); offsets != nil && offsets[len(offsets)-1][0] > 0 { 406 match = match[:offsets[len(offsets)-1][0]] 407 li = strings.LastIndex(match, "/") 408 if li > 0 { 409 match = match[:li] 410 } 411 } 412 return match, nil 413 } 414 break 415 } 416 } 417 for _, safePathPrefix := range safePathPrefixes { 418 if strings.HasPrefix(p, safePathPrefix.prefix) { 419 if safePathPrefix.strip { 420 return p[len(safePathPrefix.prefix):], nil 421 } else { 422 return p, nil 423 } 424 } 425 } 426 } 427 // strip off [./]meta_lic from license metadata path and extract base name 428 n := noticeFor.name[:len(noticeFor.name)-9] 429 li := strings.LastIndex(n, "/") 430 if li > 0 { 431 n = n[li+1:] 432 } 433 fi := strings.Index(n, "@") 434 if fi > 0 { 435 n = n[:fi] 436 } 437 return n, nil 438} 439 440// checkMetadata tries to look up a library name from a METADATA file associated with `noticeFor`. 441func (ni *NoticeIndex) checkMetadata(noticeFor *TargetNode) (string, error) { 442 pms, err := ni.pmix.MetadataForProjects(noticeFor.Projects()...) 443 if err != nil { 444 return "", err 445 } 446 for _, pm := range pms { 447 name := pm.VersionedName() 448 if name != "" { 449 return name, nil 450 } 451 } 452 return "", nil 453} 454 455// checkMetadataForLicenseText 456func (ni *NoticeIndex) checkMetadataForLicenseText(noticeFor *TargetNode, licenseText string) (string, error) { 457 p := "" 458 for _, proj := range noticeFor.Projects() { 459 if strings.HasPrefix(licenseText, proj) { 460 p = proj 461 } 462 } 463 if len(p) == 0 { 464 p = filepath.Dir(licenseText) 465 for { 466 fi, err := fs.Stat(ni.rootFS, filepath.Join(p, ".git")) 467 if err == nil && fi.IsDir() { 468 break 469 } 470 if strings.Contains(p, "/") && p != "/" { 471 p = filepath.Dir(p) 472 continue 473 } 474 return "", nil 475 } 476 } 477 pms, err := ni.pmix.MetadataForProjects(p) 478 if err != nil { 479 return "", err 480 } 481 if pms == nil { 482 return "", nil 483 } 484 return pms[0].VersionedName(), nil 485} 486 487// addText reads and indexes the content of a license text file. 488func (ni *NoticeIndex) addText(file string) error { 489 f, err := ni.rootFS.Open(filepath.Clean(file)) 490 if err != nil { 491 return fmt.Errorf("error opening license text file %q: %w", file, err) 492 } 493 494 // read the file 495 text, err := io.ReadAll(f) 496 if err != nil { 497 return fmt.Errorf("error reading license text file %q: %w", file, err) 498 } 499 500 hash := hash{fmt.Sprintf("%x", md5.Sum(text))} 501 ni.hash[file] = hash 502 if _, alreadyPresent := ni.text[hash]; !alreadyPresent { 503 ni.text[hash] = text 504 } 505 506 ni.files = append(ni.files, file) 507 508 return nil 509} 510 511// getInstallPaths returns the names of the used dependencies mapped to their 512// installed locations. 513func getInstallPaths(attachesTo *TargetNode, path TargetEdgePath) []string { 514 if len(path) == 0 { 515 installs := attachesTo.Installed() 516 if 0 == len(installs) { 517 installs = attachesTo.Built() 518 } 519 return installs 520 } 521 522 var getInstalls func(path TargetEdgePath) []string 523 524 getInstalls = func(path TargetEdgePath) []string { 525 // deps contains the output targets from the dependencies in the path 526 var deps []string 527 if len(path) > 1 { 528 // recursively get the targets from the sub-path skipping 1 path segment 529 deps = getInstalls(path[1:]) 530 } else { 531 // stop recursion at 1 path segment 532 deps = path[0].Dependency().TargetFiles() 533 } 534 size := 0 535 prefixes := path[0].Target().TargetFiles() 536 installMap := path[0].Target().InstallMap() 537 sources := path[0].Target().Sources() 538 for _, dep := range deps { 539 found := false 540 for _, source := range sources { 541 if strings.HasPrefix(dep, source) { 542 found = true 543 break 544 } 545 } 546 if !found { 547 continue 548 } 549 for _, im := range installMap { 550 if strings.HasPrefix(dep, im.FromPath) { 551 size += len(prefixes) 552 break 553 } 554 } 555 } 556 557 installs := make([]string, 0, size) 558 for _, dep := range deps { 559 found := false 560 for _, source := range sources { 561 if strings.HasPrefix(dep, source) { 562 found = true 563 break 564 } 565 } 566 if !found { 567 continue 568 } 569 for _, im := range installMap { 570 if strings.HasPrefix(dep, im.FromPath) { 571 for _, prefix := range prefixes { 572 installs = append(installs, prefix+im.ContainerPath+dep[len(im.FromPath):]) 573 } 574 break 575 } 576 } 577 } 578 return installs 579 } 580 allInstalls := getInstalls(path) 581 installs := path[0].Target().Installed() 582 if len(installs) == 0 { 583 return allInstalls 584 } 585 result := make([]string, 0, len(allInstalls)) 586 for _, install := range allInstalls { 587 for _, prefix := range installs { 588 if strings.HasPrefix(install, prefix) { 589 result = append(result, install) 590 } 591 } 592 } 593 return result 594} 595 596// hash is an opaque string derived from md5sum. 597type hash struct { 598 key string 599} 600 601// String returns the hexadecimal representation of the hash. 602func (h hash) String() string { 603 return h.key 604} 605 606// hashList orders an array of hashes 607type hashList struct { 608 ni *NoticeIndex 609 libName string 610 installPath string 611 hashes *[]hash 612} 613 614// Len returns the count of elements in the slice. 615func (l hashList) Len() int { return len(*l.hashes) } 616 617// Swap rearranges 2 elements of the slice so that each occupies the other's 618// former position. 619func (l hashList) Swap(i, j int) { (*l.hashes)[i], (*l.hashes)[j] = (*l.hashes)[j], (*l.hashes)[i] } 620 621// Less returns true when the `i`th element is lexicographically less than 622// the `j`th element. 623func (l hashList) Less(i, j int) bool { 624 var insti, instj int 625 if len(l.libName) > 0 { 626 insti = len(l.ni.hashLibInstall[(*l.hashes)[i]][l.libName]) 627 instj = len(l.ni.hashLibInstall[(*l.hashes)[j]][l.libName]) 628 } else { 629 libsi := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[i]) 630 libsj := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[j]) 631 libsis := strings.Join(libsi, " ") 632 libsjs := strings.Join(libsj, " ") 633 if libsis != libsjs { 634 return libsis < libsjs 635 } 636 } 637 if insti == instj { 638 leni := len(l.ni.text[(*l.hashes)[i]]) 639 lenj := len(l.ni.text[(*l.hashes)[j]]) 640 if leni == lenj { 641 // all else equal, just order by hash value 642 return (*l.hashes)[i].key < (*l.hashes)[j].key 643 } 644 // put shortest texts first within same # of installs 645 return leni < lenj 646 } 647 // reverse order of # installs so that most popular appears first 648 return instj < insti 649} 650