1// Copyright 2021 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package compliance 16 17import ( 18 "fmt" 19 "io" 20 "io/fs" 21 "os" 22 "strings" 23 "sync" 24 25 "android/soong/compliance/license_metadata_proto" 26 27 "google.golang.org/protobuf/encoding/prototext" 28) 29 30var ( 31 // ConcurrentReaders is the size of the task pool for limiting resource usage e.g. open files. 32 ConcurrentReaders = 5 33) 34 35type globalFS struct{} 36 37var _ fs.FS = globalFS{} 38var _ fs.StatFS = globalFS{} 39 40func (s globalFS) Open(name string) (fs.File, error) { 41 return os.Open(name) 42} 43 44func (s globalFS) Stat(name string) (fs.FileInfo, error) { 45 return os.Stat(name) 46} 47 48var FS globalFS 49 50// GetFS returns a filesystem for accessing files under the OUT_DIR environment variable. 51func GetFS(outDir string) fs.FS { 52 if len(outDir) > 0 { 53 return os.DirFS(outDir) 54 } 55 return os.DirFS(".") 56} 57 58// result describes the outcome of reading and parsing a single license metadata file. 59type result struct { 60 // file identifies the path to the license metadata file 61 file string 62 63 // target contains the parsed metadata or nil if an error 64 target *TargetNode 65 66 // err is nil unless an error occurs 67 err error 68} 69 70// receiver coordinates the tasks for reading and parsing license metadata files. 71type receiver struct { 72 // lg accumulates the read metadata and becomes the final resulting LicenseGraph. 73 lg *LicenseGraph 74 75 // rootFS locates the root of the file system from which to read the files. 76 rootFS fs.FS 77 78 // stderr identifies the error output writer. 79 stderr io.Writer 80 81 // task provides a fixed-size task pool to limit concurrent open files etc. 82 task chan bool 83 84 // results returns one license metadata file result at a time. 85 results chan *result 86 87 // wg detects when done 88 wg sync.WaitGroup 89} 90 91// ReadLicenseGraph reads and parses `files` and their dependencies into a LicenseGraph. 92// 93// `files` become the root files of the graph for top-down walks of the graph. 94func ReadLicenseGraph(rootFS fs.FS, stderr io.Writer, files []string) (*LicenseGraph, error) { 95 if len(files) == 0 { 96 return nil, fmt.Errorf("no license metadata to analyze") 97 } 98 if ConcurrentReaders < 1 { 99 return nil, fmt.Errorf("need at least one task in pool") 100 } 101 102 lg := newLicenseGraph() 103 for _, f := range files { 104 if strings.HasSuffix(f, "meta_lic") { 105 lg.rootFiles = append(lg.rootFiles, f) 106 } else { 107 lg.rootFiles = append(lg.rootFiles, f+".meta_lic") 108 } 109 } 110 111 recv := &receiver{ 112 lg: lg, 113 rootFS: rootFS, 114 stderr: stderr, 115 task: make(chan bool, ConcurrentReaders), 116 results: make(chan *result, ConcurrentReaders), 117 wg: sync.WaitGroup{}, 118 } 119 for i := 0; i < ConcurrentReaders; i++ { 120 recv.task <- true 121 } 122 123 readFiles := func() { 124 lg.mu.Lock() 125 // identify the metadata files to schedule reading tasks for 126 for _, f := range lg.rootFiles { 127 lg.targets[f] = nil 128 } 129 lg.mu.Unlock() 130 131 // schedule tasks to read the files 132 for _, f := range lg.rootFiles { 133 readFile(recv, f) 134 } 135 136 // schedule a task to wait until finished and close the channel. 137 go func() { 138 recv.wg.Wait() 139 close(recv.task) 140 close(recv.results) 141 }() 142 } 143 go readFiles() 144 145 // tasks to read license metadata files are scheduled; read and process results from channel 146 var err error 147 for recv.results != nil { 148 select { 149 case r, ok := <-recv.results: 150 if ok { 151 // handle errors by nil'ing ls, setting err, and clobbering results channel 152 if r.err != nil { 153 err = r.err 154 fmt.Fprintf(recv.stderr, "%s\n", err.Error()) 155 lg = nil 156 recv.results = nil 157 continue 158 } 159 160 // record the parsed metadata (guarded by mutex) 161 recv.lg.mu.Lock() 162 lg.targets[r.target.name] = r.target 163 recv.lg.mu.Unlock() 164 } else { 165 // finished -- nil the results channel 166 recv.results = nil 167 } 168 } 169 } 170 171 if lg != nil { 172 esize := 0 173 for _, tn := range lg.targets { 174 esize += len(tn.proto.Deps) 175 } 176 lg.edges = make(TargetEdgeList, 0, esize) 177 for _, tn := range lg.targets { 178 tn.licenseConditions = LicenseConditionSetFromNames(tn.proto.LicenseConditions...) 179 err = addDependencies(lg, tn) 180 if err != nil { 181 return nil, fmt.Errorf("error indexing dependencies for %q: %w", tn.name, err) 182 } 183 tn.proto.Deps = []*license_metadata_proto.AnnotatedDependency{} 184 } 185 } 186 return lg, err 187 188} 189 190// targetNode contains the license metadata for a node in the license graph. 191type targetNode struct { 192 proto license_metadata_proto.LicenseMetadata 193 194 // name is the path to the metadata file. 195 name string 196 197 // lg is the license graph the node belongs to. 198 lg *LicenseGraph 199 200 // edges identifies the dependencies of the target. 201 edges TargetEdgeList 202 203 // licenseConditions identifies the set of license conditions originating at the target node. 204 licenseConditions LicenseConditionSet 205 206 // resolution identifies the set of conditions resolved by acting on the target node. 207 resolution LicenseConditionSet 208 209 // pure indicates whether to treat the node as a pure aggregate (no internal linkage) 210 pure bool 211} 212 213// addDependencies converts the proto AnnotatedDependencies into `edges` 214func addDependencies(lg *LicenseGraph, tn *TargetNode) error { 215 tn.edges = make(TargetEdgeList, 0, len(tn.proto.Deps)) 216 for _, ad := range tn.proto.Deps { 217 dependency := ad.GetFile() 218 if len(dependency) == 0 { 219 return fmt.Errorf("missing dependency name") 220 } 221 dtn, ok := lg.targets[dependency] 222 if !ok { 223 return fmt.Errorf("unknown dependency name %q", dependency) 224 } 225 if dtn == nil { 226 return fmt.Errorf("nil dependency for name %q", dependency) 227 } 228 annotations := newEdgeAnnotations() 229 for _, a := range ad.Annotations { 230 // look up a common constant annotation string from a small map 231 // instead of creating 1000's of copies of the same 3 strings. 232 if ann, ok := RecognizedAnnotations[a]; ok { 233 annotations.annotations[ann] = struct{}{} 234 } 235 } 236 edge := &TargetEdge{tn, dtn, annotations} 237 lg.edges = append(lg.edges, edge) 238 tn.edges = append(tn.edges, edge) 239 } 240 return nil 241} 242 243// readFile is a task to read and parse a single license metadata file, and to schedule 244// additional tasks for reading and parsing dependencies as necessary. 245func readFile(recv *receiver, file string) { 246 recv.wg.Add(1) 247 <-recv.task 248 go func() { 249 f, err := recv.rootFS.Open(file) 250 if err != nil { 251 recv.results <- &result{file, nil, fmt.Errorf("error opening license metadata %q: %w", file, err)} 252 return 253 } 254 255 // read the file 256 data, err := io.ReadAll(f) 257 if err != nil { 258 recv.results <- &result{file, nil, fmt.Errorf("error reading license metadata %q: %w", file, err)} 259 return 260 } 261 f.Close() 262 263 tn := &TargetNode{lg: recv.lg, name: file} 264 265 err = prototext.Unmarshal(data, &tn.proto) 266 if err != nil { 267 recv.results <- &result{file, nil, fmt.Errorf("error license metadata %q: %w", file, err)} 268 return 269 } 270 271 // send result for this file and release task before scheduling dependencies, 272 // but do not signal done to WaitGroup until dependencies are scheduled. 273 recv.results <- &result{file, tn, nil} 274 recv.task <- true 275 276 // schedule tasks as necessary to read dependencies 277 for _, ad := range tn.proto.Deps { 278 dependency := ad.GetFile() 279 // decide, signal and record whether to schedule task in critical section 280 recv.lg.mu.Lock() 281 _, alreadyScheduled := recv.lg.targets[dependency] 282 if !alreadyScheduled { 283 recv.lg.targets[dependency] = nil 284 } 285 recv.lg.mu.Unlock() 286 // schedule task to read dependency file outside critical section 287 if !alreadyScheduled { 288 readFile(recv, dependency) 289 } 290 } 291 292 // signal task done after scheduling dependencies 293 recv.wg.Done() 294 }() 295} 296