1// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package compliance
16
17import (
18	"fmt"
19	"io"
20	"io/fs"
21	"os"
22	"strings"
23	"sync"
24
25	"android/soong/compliance/license_metadata_proto"
26
27	"google.golang.org/protobuf/encoding/prototext"
28)
29
30var (
31	// ConcurrentReaders is the size of the task pool for limiting resource usage e.g. open files.
32	ConcurrentReaders = 5
33)
34
35type globalFS struct{}
36
37var _ fs.FS = globalFS{}
38var _ fs.StatFS = globalFS{}
39
40func (s globalFS) Open(name string) (fs.File, error) {
41	return os.Open(name)
42}
43
44func (s globalFS) Stat(name string) (fs.FileInfo, error) {
45	return os.Stat(name)
46}
47
48var FS globalFS
49
50// GetFS returns a filesystem for accessing files under the OUT_DIR environment variable.
51func GetFS(outDir string) fs.FS {
52	if len(outDir) > 0 {
53		return os.DirFS(outDir)
54	}
55	return os.DirFS(".")
56}
57
58// result describes the outcome of reading and parsing a single license metadata file.
59type result struct {
60	// file identifies the path to the license metadata file
61	file string
62
63	// target contains the parsed metadata or nil if an error
64	target *TargetNode
65
66	// err is nil unless an error occurs
67	err error
68}
69
70// receiver coordinates the tasks for reading and parsing license metadata files.
71type receiver struct {
72	// lg accumulates the read metadata and becomes the final resulting LicenseGraph.
73	lg *LicenseGraph
74
75	// rootFS locates the root of the file system from which to read the files.
76	rootFS fs.FS
77
78	// stderr identifies the error output writer.
79	stderr io.Writer
80
81	// task provides a fixed-size task pool to limit concurrent open files etc.
82	task chan bool
83
84	// results returns one license metadata file result at a time.
85	results chan *result
86
87	// wg detects when done
88	wg sync.WaitGroup
89}
90
91// ReadLicenseGraph reads and parses `files` and their dependencies into a LicenseGraph.
92//
93// `files` become the root files of the graph for top-down walks of the graph.
94func ReadLicenseGraph(rootFS fs.FS, stderr io.Writer, files []string) (*LicenseGraph, error) {
95	if len(files) == 0 {
96		return nil, fmt.Errorf("no license metadata to analyze")
97	}
98	if ConcurrentReaders < 1 {
99		return nil, fmt.Errorf("need at least one task in pool")
100	}
101
102	lg := newLicenseGraph()
103	for _, f := range files {
104		if strings.HasSuffix(f, "meta_lic") {
105			lg.rootFiles = append(lg.rootFiles, f)
106		} else {
107			lg.rootFiles = append(lg.rootFiles, f+".meta_lic")
108		}
109	}
110
111	recv := &receiver{
112		lg:      lg,
113		rootFS:  rootFS,
114		stderr:  stderr,
115		task:    make(chan bool, ConcurrentReaders),
116		results: make(chan *result, ConcurrentReaders),
117		wg:      sync.WaitGroup{},
118	}
119	for i := 0; i < ConcurrentReaders; i++ {
120		recv.task <- true
121	}
122
123	readFiles := func() {
124		lg.mu.Lock()
125		// identify the metadata files to schedule reading tasks for
126		for _, f := range lg.rootFiles {
127			lg.targets[f] = nil
128		}
129		lg.mu.Unlock()
130
131		// schedule tasks to read the files
132		for _, f := range lg.rootFiles {
133			readFile(recv, f)
134		}
135
136		// schedule a task to wait until finished and close the channel.
137		go func() {
138			recv.wg.Wait()
139			close(recv.task)
140			close(recv.results)
141		}()
142	}
143	go readFiles()
144
145	// tasks to read license metadata files are scheduled; read and process results from channel
146	var err error
147	for recv.results != nil {
148		select {
149		case r, ok := <-recv.results:
150			if ok {
151				// handle errors by nil'ing ls, setting err, and clobbering results channel
152				if r.err != nil {
153					err = r.err
154					fmt.Fprintf(recv.stderr, "%s\n", err.Error())
155					lg = nil
156					recv.results = nil
157					continue
158				}
159
160				// record the parsed metadata (guarded by mutex)
161				recv.lg.mu.Lock()
162				lg.targets[r.target.name] = r.target
163				recv.lg.mu.Unlock()
164			} else {
165				// finished -- nil the results channel
166				recv.results = nil
167			}
168		}
169	}
170
171	if lg != nil {
172		esize := 0
173		for _, tn := range lg.targets {
174			esize += len(tn.proto.Deps)
175		}
176		lg.edges = make(TargetEdgeList, 0, esize)
177		for _, tn := range lg.targets {
178			tn.licenseConditions = LicenseConditionSetFromNames(tn.proto.LicenseConditions...)
179			err = addDependencies(lg, tn)
180			if err != nil {
181				return nil, fmt.Errorf("error indexing dependencies for %q: %w", tn.name, err)
182			}
183			tn.proto.Deps = []*license_metadata_proto.AnnotatedDependency{}
184		}
185	}
186	return lg, err
187
188}
189
190// targetNode contains the license metadata for a node in the license graph.
191type targetNode struct {
192	proto license_metadata_proto.LicenseMetadata
193
194	// name is the path to the metadata file.
195	name string
196
197	// lg is the license graph the node belongs to.
198	lg *LicenseGraph
199
200	// edges identifies the dependencies of the target.
201	edges TargetEdgeList
202
203	// licenseConditions identifies the set of license conditions originating at the target node.
204	licenseConditions LicenseConditionSet
205
206	// resolution identifies the set of conditions resolved by acting on the target node.
207	resolution LicenseConditionSet
208
209	// pure indicates whether to treat the node as a pure aggregate (no internal linkage)
210	pure bool
211}
212
213// addDependencies converts the proto AnnotatedDependencies into `edges`
214func addDependencies(lg *LicenseGraph, tn *TargetNode) error {
215	tn.edges = make(TargetEdgeList, 0, len(tn.proto.Deps))
216	for _, ad := range tn.proto.Deps {
217		dependency := ad.GetFile()
218		if len(dependency) == 0 {
219			return fmt.Errorf("missing dependency name")
220		}
221		dtn, ok := lg.targets[dependency]
222		if !ok {
223			return fmt.Errorf("unknown dependency name %q", dependency)
224		}
225		if dtn == nil {
226			return fmt.Errorf("nil dependency for name %q", dependency)
227		}
228		annotations := newEdgeAnnotations()
229		for _, a := range ad.Annotations {
230			// look up a common constant annotation string from a small map
231			// instead of creating 1000's of copies of the same 3 strings.
232			if ann, ok := RecognizedAnnotations[a]; ok {
233				annotations.annotations[ann] = struct{}{}
234			}
235		}
236		edge := &TargetEdge{tn, dtn, annotations}
237		lg.edges = append(lg.edges, edge)
238		tn.edges = append(tn.edges, edge)
239	}
240	return nil
241}
242
243// readFile is a task to read and parse a single license metadata file, and to schedule
244// additional tasks for reading and parsing dependencies as necessary.
245func readFile(recv *receiver, file string) {
246	recv.wg.Add(1)
247	<-recv.task
248	go func() {
249		f, err := recv.rootFS.Open(file)
250		if err != nil {
251			recv.results <- &result{file, nil, fmt.Errorf("error opening license metadata %q: %w", file, err)}
252			return
253		}
254
255		// read the file
256		data, err := io.ReadAll(f)
257		if err != nil {
258			recv.results <- &result{file, nil, fmt.Errorf("error reading license metadata %q: %w", file, err)}
259			return
260		}
261		f.Close()
262
263		tn := &TargetNode{lg: recv.lg, name: file}
264
265		err = prototext.Unmarshal(data, &tn.proto)
266		if err != nil {
267			recv.results <- &result{file, nil, fmt.Errorf("error license metadata %q: %w", file, err)}
268			return
269		}
270
271		// send result for this file and release task before scheduling dependencies,
272		// but do not signal done to WaitGroup until dependencies are scheduled.
273		recv.results <- &result{file, tn, nil}
274		recv.task <- true
275
276		// schedule tasks as necessary to read dependencies
277		for _, ad := range tn.proto.Deps {
278			dependency := ad.GetFile()
279			// decide, signal and record whether to schedule task in critical section
280			recv.lg.mu.Lock()
281			_, alreadyScheduled := recv.lg.targets[dependency]
282			if !alreadyScheduled {
283				recv.lg.targets[dependency] = nil
284			}
285			recv.lg.mu.Unlock()
286			// schedule task to read dependency file outside critical section
287			if !alreadyScheduled {
288				readFile(recv, dependency)
289			}
290		}
291
292		// signal task done after scheduling dependencies
293		recv.wg.Done()
294	}()
295}
296