1// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package compliance
16
17import (
18	"crypto/md5"
19	"fmt"
20	"io"
21	"io/fs"
22	"net/url"
23	"path/filepath"
24	"regexp"
25	"sort"
26	"strings"
27
28	"android/soong/tools/compliance/projectmetadata"
29)
30
31var (
32	licensesPathRegexp = regexp.MustCompile(`licen[cs]es?/`)
33)
34
35// NoticeIndex transforms license metadata into license text hashes, library
36// names, and install paths indexing them for fast lookup/iteration.
37type NoticeIndex struct {
38	// lg identifies the license graph to which the index applies.
39	lg *LicenseGraph
40	// pmix indexes project metadata
41	pmix *projectmetadata.Index
42	// rs identifies the set of resolutions upon which the index is based.
43	rs ResolutionSet
44	// shipped identifies the set of target nodes shipped directly or as derivative works.
45	shipped TargetNodeSet
46	// rootFS locates the root of the file system from which to read the files.
47	rootFS fs.FS
48	// hash maps license text filenames to content hashes
49	hash map[string]hash
50	// text maps content hashes to content
51	text map[hash][]byte
52	// hashLibInstall maps hashes to libraries to install paths.
53	hashLibInstall map[hash]map[string]map[string]struct{}
54	// installHashLib maps install paths to libraries to hashes.
55	installHashLib map[string]map[hash]map[string]struct{}
56	// libHash maps libraries to hashes.
57	libHash map[string]map[hash]struct{}
58	// targetHash maps target nodes to hashes.
59	targetHashes map[*TargetNode]map[hash]struct{}
60	// projectName maps project directory names to project name text.
61	projectName map[string]string
62	// files lists all the files accessed during indexing
63	files []string
64}
65
66// IndexLicenseTexts creates a hashed index of license texts for `lg` and `rs`
67// using the files rooted at `rootFS`.
68func IndexLicenseTexts(rootFS fs.FS, lg *LicenseGraph, rs ResolutionSet) (*NoticeIndex, error) {
69	if rs == nil {
70		rs = ResolveNotices(lg)
71	}
72	ni := &NoticeIndex{
73		lg:             lg,
74		pmix:           projectmetadata.NewIndex(rootFS),
75		rs:             rs,
76		shipped:        ShippedNodes(lg),
77		rootFS:         rootFS,
78		hash:           make(map[string]hash),
79		text:           make(map[hash][]byte),
80		hashLibInstall: make(map[hash]map[string]map[string]struct{}),
81		installHashLib: make(map[string]map[hash]map[string]struct{}),
82		libHash:        make(map[string]map[hash]struct{}),
83		targetHashes:   make(map[*TargetNode]map[hash]struct{}),
84		projectName:    make(map[string]string),
85	}
86
87	// index adds all license texts for `tn` to the index.
88	index := func(tn *TargetNode) (map[hash]struct{}, error) {
89		if hashes, ok := ni.targetHashes[tn]; ok {
90			return hashes, nil
91		}
92		hashes := make(map[hash]struct{})
93		for _, text := range tn.LicenseTexts() {
94			fname := strings.SplitN(text, ":", 2)[0]
95			if _, ok := ni.hash[fname]; !ok {
96				err := ni.addText(fname)
97				if err != nil {
98					return nil, err
99				}
100			}
101			hash := ni.hash[fname]
102			if _, ok := hashes[hash]; !ok {
103				hashes[hash] = struct{}{}
104			}
105		}
106		ni.targetHashes[tn] = hashes
107		return hashes, nil
108	}
109
110	link := func(tn *TargetNode, hashes map[hash]struct{}, installPaths []string) error {
111		for h := range hashes {
112			libName, err := ni.getLibName(tn, h)
113			if err != nil {
114				return err
115			}
116			if _, ok := ni.libHash[libName]; !ok {
117				ni.libHash[libName] = make(map[hash]struct{})
118			}
119			if _, ok := ni.hashLibInstall[h]; !ok {
120				ni.hashLibInstall[h] = make(map[string]map[string]struct{})
121			}
122			if _, ok := ni.libHash[libName][h]; !ok {
123				ni.libHash[libName][h] = struct{}{}
124			}
125			for _, installPath := range installPaths {
126				if _, ok := ni.installHashLib[installPath]; !ok {
127					ni.installHashLib[installPath] = make(map[hash]map[string]struct{})
128					ni.installHashLib[installPath][h] = make(map[string]struct{})
129					ni.installHashLib[installPath][h][libName] = struct{}{}
130				} else if _, ok = ni.installHashLib[installPath][h]; !ok {
131					ni.installHashLib[installPath][h] = make(map[string]struct{})
132					ni.installHashLib[installPath][h][libName] = struct{}{}
133				} else if _, ok = ni.installHashLib[installPath][h][libName]; !ok {
134					ni.installHashLib[installPath][h][libName] = struct{}{}
135				}
136				if _, ok := ni.hashLibInstall[h]; !ok {
137					ni.hashLibInstall[h] = make(map[string]map[string]struct{})
138					ni.hashLibInstall[h][libName] = make(map[string]struct{})
139					ni.hashLibInstall[h][libName][installPath] = struct{}{}
140				} else if _, ok = ni.hashLibInstall[h][libName]; !ok {
141					ni.hashLibInstall[h][libName] = make(map[string]struct{})
142					ni.hashLibInstall[h][libName][installPath] = struct{}{}
143				} else if _, ok = ni.hashLibInstall[h][libName][installPath]; !ok {
144					ni.hashLibInstall[h][libName][installPath] = struct{}{}
145				}
146			}
147		}
148		return nil
149	}
150
151	cacheMetadata := func(tn *TargetNode) {
152		ni.pmix.MetadataForProjects(tn.Projects()...)
153	}
154
155	// returns error from walk below.
156	var err error
157
158	WalkTopDown(NoEdgeContext{}, lg, func(lg *LicenseGraph, tn *TargetNode, path TargetEdgePath) bool {
159		if err != nil {
160			return false
161		}
162		if !ni.shipped.Contains(tn) {
163			return false
164		}
165		go cacheMetadata(tn)
166		installPaths := getInstallPaths(tn, path)
167		var hashes map[hash]struct{}
168		hashes, err = index(tn)
169		if err != nil {
170			return false
171		}
172		err = link(tn, hashes, installPaths)
173		if err != nil {
174			return false
175		}
176		if tn.IsContainer() {
177			return true
178		}
179
180		for _, r := range rs.Resolutions(tn) {
181			hashes, err = index(r.actsOn)
182			if err != nil {
183				return false
184			}
185			err = link(r.actsOn, hashes, installPaths)
186			if err != nil {
187				return false
188			}
189		}
190		return false
191	})
192
193	if err != nil {
194		return nil, err
195	}
196
197	return ni, nil
198}
199
200// Hashes returns an ordered channel of the hashed license texts.
201func (ni *NoticeIndex) Hashes() chan hash {
202	c := make(chan hash)
203	go func() {
204		libs := make([]string, 0, len(ni.libHash))
205		for libName := range ni.libHash {
206			libs = append(libs, libName)
207		}
208		sort.Strings(libs)
209		hashes := make(map[hash]struct{})
210		for _, libName := range libs {
211			hl := make([]hash, 0, len(ni.libHash[libName]))
212			for h := range ni.libHash[libName] {
213				if _, ok := hashes[h]; ok {
214					continue
215				}
216				hashes[h] = struct{}{}
217				hl = append(hl, h)
218			}
219			if len(hl) > 0 {
220				sort.Sort(hashList{ni, libName, "", &hl})
221				for _, h := range hl {
222					c <- h
223				}
224			}
225		}
226		close(c)
227	}()
228	return c
229
230}
231
232// InputFiles returns the complete list of files read during indexing.
233func (ni *NoticeIndex) InputFiles() []string {
234	projectMeta := ni.pmix.AllMetadataFiles()
235	files := make([]string, 0, len(ni.files) + len(ni.lg.targets) + len(projectMeta))
236	files = append(files, ni.files...)
237	for f := range ni.lg.targets {
238		files = append(files, f)
239	}
240	files = append(files, projectMeta...)
241	return files
242}
243
244// HashLibs returns the ordered array of library names using the license text
245// hashed as `h`.
246func (ni *NoticeIndex) HashLibs(h hash) []string {
247	libs := make([]string, 0, len(ni.hashLibInstall[h]))
248	for libName := range ni.hashLibInstall[h] {
249		libs = append(libs, libName)
250	}
251	sort.Strings(libs)
252	return libs
253}
254
255// HashLibInstalls returns the ordered array of install paths referencing
256// library `libName` using the license text hashed as `h`.
257func (ni *NoticeIndex) HashLibInstalls(h hash, libName string) []string {
258	installs := make([]string, 0, len(ni.hashLibInstall[h][libName]))
259	for installPath := range ni.hashLibInstall[h][libName] {
260		installs = append(installs, installPath)
261	}
262	sort.Strings(installs)
263	return installs
264}
265
266// InstallPaths returns the ordered channel of indexed install paths.
267func (ni *NoticeIndex) InstallPaths() chan string {
268	c := make(chan string)
269	go func() {
270		paths := make([]string, 0, len(ni.installHashLib))
271		for path := range ni.installHashLib {
272			paths = append(paths, path)
273		}
274		sort.Strings(paths)
275		for _, installPath := range paths {
276			c <- installPath
277		}
278		close(c)
279	}()
280	return c
281}
282
283// InstallHashes returns the ordered array of hashes attached to `installPath`.
284func (ni *NoticeIndex) InstallHashes(installPath string) []hash {
285	result := make([]hash, 0, len(ni.installHashLib[installPath]))
286	for h := range ni.installHashLib[installPath] {
287		result = append(result, h)
288	}
289	if len(result) > 0 {
290		sort.Sort(hashList{ni, "", installPath, &result})
291	}
292	return result
293}
294
295// InstallHashLibs returns the ordered array of library names attached to
296// `installPath` as hash `h`.
297func (ni *NoticeIndex) InstallHashLibs(installPath string, h hash) []string {
298	result := make([]string, 0, len(ni.installHashLib[installPath][h]))
299	for libName := range ni.installHashLib[installPath][h] {
300		result = append(result, libName)
301	}
302	sort.Strings(result)
303	return result
304}
305
306// Libraries returns the ordered channel of indexed library names.
307func (ni *NoticeIndex) Libraries() chan string {
308	c := make(chan string)
309	go func() {
310		libs := make([]string, 0, len(ni.libHash))
311		for lib := range ni.libHash {
312			libs = append(libs, lib)
313		}
314		sort.Strings(libs)
315		for _, lib := range libs {
316			c <- lib
317		}
318		close(c)
319	}()
320	return c
321}
322
323// HashText returns the file content of the license text hashed as `h`.
324func (ni *NoticeIndex) HashText(h hash) []byte {
325	return ni.text[h]
326}
327
328// getLibName returns the name of the library associated with `noticeFor`.
329func (ni *NoticeIndex) getLibName(noticeFor *TargetNode, h hash) (string, error) {
330	for _, text := range noticeFor.LicenseTexts() {
331		if !strings.Contains(text, ":") {
332			if ni.hash[text].key != h.key {
333				continue
334			}
335			ln, err := ni.checkMetadataForLicenseText(noticeFor, text)
336			if err != nil {
337				return "", err
338			}
339			if len(ln) > 0 {
340				return ln, nil
341			}
342			continue
343		}
344
345		fields := strings.SplitN(text, ":", 2)
346		fname, pname := fields[0], fields[1]
347		if ni.hash[fname].key != h.key {
348			continue
349		}
350
351		ln, err := url.QueryUnescape(pname)
352		if err != nil {
353			continue
354		}
355		return ln, nil
356	}
357	// use name from METADATA if available
358	ln, err := ni.checkMetadata(noticeFor)
359	if err != nil {
360		return "", err
361	}
362	if len(ln) > 0 {
363		return ln, nil
364	}
365	// use package_name: from license{} module if available
366	pn := noticeFor.PackageName()
367	if len(pn) > 0 {
368		return pn, nil
369	}
370	for _, p := range noticeFor.Projects() {
371		if strings.HasPrefix(p, "prebuilts/") {
372			for _, licenseText := range noticeFor.LicenseTexts() {
373				if !strings.HasPrefix(licenseText, "prebuilts/") {
374					continue
375				}
376				if !strings.Contains(licenseText, ":") {
377					if ni.hash[licenseText].key != h.key {
378						continue
379					}
380				} else {
381					fields := strings.SplitN(licenseText, ":", 2)
382					fname := fields[0]
383					if ni.hash[fname].key != h.key {
384						continue
385					}
386				}
387				for _, safePrebuiltPrefix := range safePrebuiltPrefixes {
388					match := safePrebuiltPrefix.re.FindString(licenseText)
389					if len(match) == 0 {
390						continue
391					}
392					if safePrebuiltPrefix.strip {
393						// strip entire prefix
394						match = licenseText[len(match):]
395					} else {
396						// strip from prebuilts/ until safe prefix
397						match = licenseText[len(match)-len(safePrebuiltPrefix.prefix):]
398					}
399					// remove LICENSE or NOTICE or other filename
400					li := strings.LastIndex(match, "/")
401					if li > 0 {
402						match = match[:li]
403					}
404					// remove *licenses/ path segment and subdirectory if in path
405					if offsets := licensesPathRegexp.FindAllStringIndex(match, -1); offsets != nil && offsets[len(offsets)-1][0] > 0 {
406						match = match[:offsets[len(offsets)-1][0]]
407						li = strings.LastIndex(match, "/")
408						if li > 0 {
409							match = match[:li]
410						}
411					}
412					return match, nil
413				}
414				break
415			}
416		}
417		for _, safePathPrefix := range safePathPrefixes {
418			if strings.HasPrefix(p, safePathPrefix.prefix) {
419				if safePathPrefix.strip {
420					return p[len(safePathPrefix.prefix):], nil
421				} else {
422					return p, nil
423				}
424			}
425		}
426	}
427	// strip off [./]meta_lic from license metadata path and extract base name
428	n := noticeFor.name[:len(noticeFor.name)-9]
429	li := strings.LastIndex(n, "/")
430	if li > 0 {
431		n = n[li+1:]
432	}
433	fi := strings.Index(n, "@")
434	if fi > 0 {
435		n = n[:fi]
436	}
437	return n, nil
438}
439
440// checkMetadata tries to look up a library name from a METADATA file associated with `noticeFor`.
441func (ni *NoticeIndex) checkMetadata(noticeFor *TargetNode) (string, error) {
442	pms, err := ni.pmix.MetadataForProjects(noticeFor.Projects()...)
443	if err != nil {
444		return "", err
445	}
446	for _, pm := range pms {
447		name := pm.VersionedName()
448		if name != "" {
449			return name, nil
450		}
451	}
452	return "", nil
453}
454
455// checkMetadataForLicenseText
456func (ni *NoticeIndex) checkMetadataForLicenseText(noticeFor *TargetNode, licenseText string) (string, error) {
457	p := ""
458	for _, proj := range noticeFor.Projects() {
459		if strings.HasPrefix(licenseText, proj) {
460			p = proj
461		}
462	}
463	if len(p) == 0 {
464		p = filepath.Dir(licenseText)
465		for {
466			fi, err := fs.Stat(ni.rootFS, filepath.Join(p, ".git"))
467			if err == nil && fi.IsDir() {
468				break
469			}
470			if strings.Contains(p, "/") && p != "/" {
471				p = filepath.Dir(p)
472				continue
473			}
474			return "", nil
475		}
476	}
477	pms, err := ni.pmix.MetadataForProjects(p)
478	if err != nil {
479		return "", err
480	}
481	if pms == nil {
482		return "", nil
483	}
484	return pms[0].VersionedName(), nil
485}
486
487// addText reads and indexes the content of a license text file.
488func (ni *NoticeIndex) addText(file string) error {
489	f, err := ni.rootFS.Open(filepath.Clean(file))
490	if err != nil {
491		return fmt.Errorf("error opening license text file %q: %w", file, err)
492	}
493
494	// read the file
495	text, err := io.ReadAll(f)
496	if err != nil {
497		return fmt.Errorf("error reading license text file %q: %w", file, err)
498	}
499
500	hash := hash{fmt.Sprintf("%x", md5.Sum(text))}
501	ni.hash[file] = hash
502	if _, alreadyPresent := ni.text[hash]; !alreadyPresent {
503		ni.text[hash] = text
504	}
505
506	ni.files = append(ni.files, file)
507
508	return nil
509}
510
511// getInstallPaths returns the names of the used dependencies mapped to their
512// installed locations.
513func getInstallPaths(attachesTo *TargetNode, path TargetEdgePath) []string {
514	if len(path) == 0 {
515		installs := attachesTo.Installed()
516		if 0 == len(installs) {
517			installs = attachesTo.Built()
518		}
519		return installs
520	}
521
522	var getInstalls func(path TargetEdgePath) []string
523
524	getInstalls = func(path TargetEdgePath) []string {
525		// deps contains the output targets from the dependencies in the path
526		var deps []string
527		if len(path) > 1 {
528			// recursively get the targets from the sub-path skipping 1 path segment
529			deps = getInstalls(path[1:])
530		} else {
531			// stop recursion at 1 path segment
532			deps = path[0].Dependency().TargetFiles()
533		}
534		size := 0
535		prefixes := path[0].Target().TargetFiles()
536		installMap := path[0].Target().InstallMap()
537		sources := path[0].Target().Sources()
538		for _, dep := range deps {
539			found := false
540			for _, source := range sources {
541				if strings.HasPrefix(dep, source) {
542					found = true
543					break
544				}
545			}
546			if !found {
547				continue
548			}
549			for _, im := range installMap {
550				if strings.HasPrefix(dep, im.FromPath) {
551					size += len(prefixes)
552					break
553				}
554			}
555		}
556
557		installs := make([]string, 0, size)
558		for _, dep := range deps {
559			found := false
560			for _, source := range sources {
561				if strings.HasPrefix(dep, source) {
562					found = true
563					break
564				}
565			}
566			if !found {
567				continue
568			}
569			for _, im := range installMap {
570				if strings.HasPrefix(dep, im.FromPath) {
571					for _, prefix := range prefixes {
572						installs = append(installs, prefix+im.ContainerPath+dep[len(im.FromPath):])
573					}
574					break
575				}
576			}
577		}
578		return installs
579	}
580	allInstalls := getInstalls(path)
581	installs := path[0].Target().Installed()
582	if len(installs) == 0 {
583		return allInstalls
584	}
585	result := make([]string, 0, len(allInstalls))
586	for _, install := range allInstalls {
587		for _, prefix := range installs {
588			if strings.HasPrefix(install, prefix) {
589				result = append(result, install)
590			}
591		}
592	}
593	return result
594}
595
596// hash is an opaque string derived from md5sum.
597type hash struct {
598	key string
599}
600
601// String returns the hexadecimal representation of the hash.
602func (h hash) String() string {
603	return h.key
604}
605
606// hashList orders an array of hashes
607type hashList struct {
608	ni          *NoticeIndex
609	libName     string
610	installPath string
611	hashes      *[]hash
612}
613
614// Len returns the count of elements in the slice.
615func (l hashList) Len() int { return len(*l.hashes) }
616
617// Swap rearranges 2 elements of the slice so that each occupies the other's
618// former position.
619func (l hashList) Swap(i, j int) { (*l.hashes)[i], (*l.hashes)[j] = (*l.hashes)[j], (*l.hashes)[i] }
620
621// Less returns true when the `i`th element is lexicographically less than
622// the `j`th element.
623func (l hashList) Less(i, j int) bool {
624	var insti, instj int
625	if len(l.libName) > 0 {
626		insti = len(l.ni.hashLibInstall[(*l.hashes)[i]][l.libName])
627		instj = len(l.ni.hashLibInstall[(*l.hashes)[j]][l.libName])
628	} else {
629		libsi := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[i])
630		libsj := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[j])
631		libsis := strings.Join(libsi, " ")
632		libsjs := strings.Join(libsj, " ")
633		if libsis != libsjs {
634			return libsis < libsjs
635		}
636	}
637	if insti == instj {
638		leni := len(l.ni.text[(*l.hashes)[i]])
639		lenj := len(l.ni.text[(*l.hashes)[j]])
640		if leni == lenj {
641			// all else equal, just order by hash value
642			return (*l.hashes)[i].key < (*l.hashes)[j].key
643		}
644		// put shortest texts first within same # of installs
645		return leni < lenj
646	}
647	// reverse order of # installs so that most popular appears first
648	return instj < insti
649}
650