1// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bazel
16
17import (
18	"crypto/sha256"
19	"encoding/base64"
20	"encoding/json"
21	"errors"
22	"fmt"
23	"path/filepath"
24	"reflect"
25	"sort"
26	"strings"
27	"sync"
28
29	analysis_v2_proto "prebuilts/bazel/common/proto/analysis_v2"
30
31	"github.com/google/blueprint/metrics"
32	"github.com/google/blueprint/proptools"
33	"google.golang.org/protobuf/proto"
34)
35
36type artifactId int
37type depsetId int
38type pathFragmentId int
39
40// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
41type KeyValuePair struct {
42	Key   string
43	Value string
44}
45
46// AqueryDepset is a depset definition from Bazel's aquery response. This is
47// akin to the `depSetOfFiles` in the response proto, except:
48//   - direct artifacts are enumerated by full path instead of by ID
49//   - it has a hash of the depset contents, instead of an int ID (for determinism)
50//
51// A depset is a data structure for efficient transitive handling of artifact
52// paths. A single depset consists of one or more artifact paths and one or
53// more "child" depsets.
54type AqueryDepset struct {
55	ContentHash            string
56	DirectArtifacts        []string
57	TransitiveDepSetHashes []string
58}
59
60// BuildStatement contains information to register a build statement corresponding (one to one)
61// with a Bazel action from Bazel's action graph.
62type BuildStatement struct {
63	Command      string
64	Depfile      *string
65	OutputPaths  []string
66	SymlinkPaths []string
67	Env          []*analysis_v2_proto.KeyValuePair
68	Mnemonic     string
69
70	// Inputs of this build statement, either as unexpanded depsets or expanded
71	// input paths. There should be no overlap between these fields; an input
72	// path should either be included as part of an unexpanded depset or a raw
73	// input path string, but not both.
74	InputDepsetHashes []string
75	InputPaths        []string
76	FileContents      string
77	// If ShouldRunInSbox is true, Soong will use sbox to created an isolated environment
78	// and run the mixed build action there
79	ShouldRunInSbox bool
80	// A list of files to add as implicit deps to the outputs of this BuildStatement.
81	// Unlike most properties in BuildStatement, these paths must be relative to the root of
82	// the whole out/ folder, instead of relative to ctx.Config().BazelContext.OutputBase()
83	ImplicitDeps []string
84	IsExecutable bool
85}
86
87// A helper type for aquery processing which facilitates retrieval of path IDs from their
88// less readable Bazel structures (depset and path fragment).
89type aqueryArtifactHandler struct {
90	// Maps depset id to AqueryDepset, a representation of depset which is
91	// post-processed for middleman artifact handling, unhandled artifact
92	// dropping, content hashing, etc.
93	depsetIdToAqueryDepset map[depsetId]AqueryDepset
94	emptyDepsetIds         map[depsetId]struct{}
95	// Maps content hash to AqueryDepset.
96	depsetHashToAqueryDepset map[string]AqueryDepset
97
98	// depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
99	// may be an expensive operation.
100	depsetHashToArtifactPathsCache sync.Map
101	// Maps artifact ids to fully expanded paths.
102	artifactIdToPath map[artifactId]string
103}
104
105// The tokens should be substituted with the value specified here, instead of the
106// one returned in 'substitutions' of TemplateExpand action.
107var templateActionOverriddenTokens = map[string]string{
108	// Uses "python3" for %python_binary% instead of the value returned by aquery
109	// which is "py3wrapper.sh". See removePy3wrapperScript.
110	"%python_binary%": "python3",
111}
112
113const (
114	middlemanMnemonic = "Middleman"
115	// The file name of py3wrapper.sh, which is used by py_binary targets.
116	py3wrapperFileName = "/py3wrapper.sh"
117)
118
119func indexBy[K comparable, V any](values []V, keyFn func(v V) K) map[K]V {
120	m := map[K]V{}
121	for _, v := range values {
122		m[keyFn(v)] = v
123	}
124	return m
125}
126
127func newAqueryHandler(aqueryResult *analysis_v2_proto.ActionGraphContainer) (*aqueryArtifactHandler, error) {
128	pathFragments := indexBy(aqueryResult.PathFragments, func(pf *analysis_v2_proto.PathFragment) pathFragmentId {
129		return pathFragmentId(pf.Id)
130	})
131
132	artifactIdToPath := make(map[artifactId]string, len(aqueryResult.Artifacts))
133	for _, artifact := range aqueryResult.Artifacts {
134		artifactPath, err := expandPathFragment(pathFragmentId(artifact.PathFragmentId), pathFragments)
135		if err != nil {
136			return nil, err
137		}
138		if artifact.IsTreeArtifact &&
139			!strings.HasPrefix(artifactPath, "bazel-out/io_bazel_rules_go/") &&
140			!strings.HasPrefix(artifactPath, "bazel-out/rules_java_builtin/") {
141			// Since we're using ninja as an executor, we can't use tree artifacts. Ninja only
142			// considers a file/directory "dirty" when it's mtime changes. Directories' mtimes will
143			// only change when a file in the directory is added/removed, but not when files in
144			// the directory are changed, or when files in subdirectories are changed/added/removed.
145			// Bazel handles this by walking the directory and generating a hash for it after the
146			// action runs, which we would have to do as well if we wanted to support these
147			// artifacts in mixed builds.
148			//
149			// However, there are some bazel built-in rules that use tree artifacts. Allow those,
150			// but keep in mind that they'll have incrementality issues.
151			return nil, fmt.Errorf("tree artifacts are currently not supported in mixed builds: " + artifactPath)
152		}
153		artifactIdToPath[artifactId(artifact.Id)] = artifactPath
154	}
155
156	// Map middleman artifact ContentHash to input artifact depset ID.
157	// Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example,
158	// if we find a middleman action which has inputs [foo, bar], and output [baz_middleman], then,
159	// for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for
160	// that action instead.
161	middlemanIdToDepsetIds := map[artifactId][]uint32{}
162	for _, actionEntry := range aqueryResult.Actions {
163		if actionEntry.Mnemonic == middlemanMnemonic {
164			for _, outputId := range actionEntry.OutputIds {
165				middlemanIdToDepsetIds[artifactId(outputId)] = actionEntry.InputDepSetIds
166			}
167		}
168	}
169
170	depsetIdToDepset := indexBy(aqueryResult.DepSetOfFiles, func(d *analysis_v2_proto.DepSetOfFiles) depsetId {
171		return depsetId(d.Id)
172	})
173
174	aqueryHandler := aqueryArtifactHandler{
175		depsetIdToAqueryDepset:         map[depsetId]AqueryDepset{},
176		depsetHashToAqueryDepset:       map[string]AqueryDepset{},
177		depsetHashToArtifactPathsCache: sync.Map{},
178		emptyDepsetIds:                 make(map[depsetId]struct{}, 0),
179		artifactIdToPath:               artifactIdToPath,
180	}
181
182	// Validate and adjust aqueryResult.DepSetOfFiles values.
183	for _, depset := range aqueryResult.DepSetOfFiles {
184		_, err := aqueryHandler.populateDepsetMaps(depset, middlemanIdToDepsetIds, depsetIdToDepset)
185		if err != nil {
186			return nil, err
187		}
188	}
189
190	return &aqueryHandler, nil
191}
192
193// Ensures that the handler's depsetIdToAqueryDepset map contains an entry for the given
194// depset.
195func (a *aqueryArtifactHandler) populateDepsetMaps(depset *analysis_v2_proto.DepSetOfFiles, middlemanIdToDepsetIds map[artifactId][]uint32, depsetIdToDepset map[depsetId]*analysis_v2_proto.DepSetOfFiles) (*AqueryDepset, error) {
196	if aqueryDepset, containsDepset := a.depsetIdToAqueryDepset[depsetId(depset.Id)]; containsDepset {
197		return &aqueryDepset, nil
198	}
199	transitiveDepsetIds := depset.TransitiveDepSetIds
200	directArtifactPaths := make([]string, 0, len(depset.DirectArtifactIds))
201	for _, id := range depset.DirectArtifactIds {
202		aId := artifactId(id)
203		path, pathExists := a.artifactIdToPath[aId]
204		if !pathExists {
205			return nil, fmt.Errorf("undefined input artifactId %d", aId)
206		}
207		// Filter out any inputs which are universally dropped, and swap middleman
208		// artifacts with their corresponding depsets.
209		if depsetsToUse, isMiddleman := middlemanIdToDepsetIds[aId]; isMiddleman {
210			// Swap middleman artifacts with their corresponding depsets and drop the middleman artifacts.
211			transitiveDepsetIds = append(transitiveDepsetIds, depsetsToUse...)
212		} else if strings.HasSuffix(path, py3wrapperFileName) ||
213			strings.HasPrefix(path, "../bazel_tools") {
214			continue
215			// Drop these artifacts.
216			// See go/python-binary-host-mixed-build for more details.
217			// 1) Drop py3wrapper.sh, just use python binary, the launcher script generated by the
218			// TemplateExpandAction handles everything necessary to launch a Pythin application.
219			// 2) ../bazel_tools: they have MODIFY timestamp 10years in the future and would cause the
220			// containing depset to always be considered newer than their outputs.
221		} else {
222			directArtifactPaths = append(directArtifactPaths, path)
223		}
224	}
225
226	childDepsetHashes := make([]string, 0, len(transitiveDepsetIds))
227	for _, id := range transitiveDepsetIds {
228		childDepsetId := depsetId(id)
229		childDepset, exists := depsetIdToDepset[childDepsetId]
230		if !exists {
231			if _, empty := a.emptyDepsetIds[childDepsetId]; empty {
232				continue
233			} else {
234				return nil, fmt.Errorf("undefined input depsetId %d (referenced by depsetId %d)", childDepsetId, depset.Id)
235			}
236		}
237		if childAqueryDepset, err := a.populateDepsetMaps(childDepset, middlemanIdToDepsetIds, depsetIdToDepset); err != nil {
238			return nil, err
239		} else if childAqueryDepset == nil {
240			continue
241		} else {
242			childDepsetHashes = append(childDepsetHashes, childAqueryDepset.ContentHash)
243		}
244	}
245	if len(directArtifactPaths) == 0 && len(childDepsetHashes) == 0 {
246		a.emptyDepsetIds[depsetId(depset.Id)] = struct{}{}
247		return nil, nil
248	}
249	aqueryDepset := AqueryDepset{
250		ContentHash:            depsetContentHash(directArtifactPaths, childDepsetHashes),
251		DirectArtifacts:        directArtifactPaths,
252		TransitiveDepSetHashes: childDepsetHashes,
253	}
254	a.depsetIdToAqueryDepset[depsetId(depset.Id)] = aqueryDepset
255	a.depsetHashToAqueryDepset[aqueryDepset.ContentHash] = aqueryDepset
256	return &aqueryDepset, nil
257}
258
259// getInputPaths flattens the depsets of the given IDs and returns all transitive
260// input paths contained in these depsets.
261// This is a potentially expensive operation, and should not be invoked except
262// for actions which need specialized input handling.
263func (a *aqueryArtifactHandler) getInputPaths(depsetIds []uint32) ([]string, error) {
264	var inputPaths []string
265
266	for _, id := range depsetIds {
267		inputDepSetId := depsetId(id)
268		depset := a.depsetIdToAqueryDepset[inputDepSetId]
269		inputArtifacts, err := a.artifactPathsFromDepsetHash(depset.ContentHash)
270		if err != nil {
271			return nil, err
272		}
273		for _, inputPath := range inputArtifacts {
274			inputPaths = append(inputPaths, inputPath)
275		}
276	}
277
278	return inputPaths, nil
279}
280
281func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) {
282	if result, exists := a.depsetHashToArtifactPathsCache.Load(depsetHash); exists {
283		return result.([]string), nil
284	}
285	if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists {
286		result := depset.DirectArtifacts
287		for _, childHash := range depset.TransitiveDepSetHashes {
288			childArtifactIds, err := a.artifactPathsFromDepsetHash(childHash)
289			if err != nil {
290				return nil, err
291			}
292			result = append(result, childArtifactIds...)
293		}
294		a.depsetHashToArtifactPathsCache.Store(depsetHash, result)
295		return result, nil
296	} else {
297		return nil, fmt.Errorf("undefined input depset hash %s", depsetHash)
298	}
299}
300
301// AqueryBuildStatements returns a slice of BuildStatements and a slice of AqueryDepset
302// which should be registered (and output to a ninja file) to correspond with Bazel's
303// action graph, as described by the given action graph json proto.
304// BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets
305// are one-to-one with Bazel's depSetOfFiles objects.
306func AqueryBuildStatements(aqueryJsonProto []byte, eventHandler *metrics.EventHandler) ([]*BuildStatement, []AqueryDepset, error) {
307	aqueryProto := &analysis_v2_proto.ActionGraphContainer{}
308	err := proto.Unmarshal(aqueryJsonProto, aqueryProto)
309	if err != nil {
310		return nil, nil, err
311	}
312
313	var aqueryHandler *aqueryArtifactHandler
314	{
315		eventHandler.Begin("init_handler")
316		defer eventHandler.End("init_handler")
317		aqueryHandler, err = newAqueryHandler(aqueryProto)
318		if err != nil {
319			return nil, nil, err
320		}
321	}
322
323	// allocate both length and capacity so each goroutine can write to an index independently without
324	// any need for synchronization for slice access.
325	buildStatements := make([]*BuildStatement, len(aqueryProto.Actions))
326	{
327		eventHandler.Begin("build_statements")
328		defer eventHandler.End("build_statements")
329		wg := sync.WaitGroup{}
330		var errOnce sync.Once
331		id2targets := make(map[uint32]string, len(aqueryProto.Targets))
332		for _, t := range aqueryProto.Targets {
333			id2targets[t.GetId()] = t.GetLabel()
334		}
335		for i, actionEntry := range aqueryProto.Actions {
336			wg.Add(1)
337			go func(i int, actionEntry *analysis_v2_proto.Action) {
338				if strings.HasPrefix(id2targets[actionEntry.TargetId], "@bazel_tools//") {
339					// bazel_tools are removed depsets in `populateDepsetMaps()` so skipping
340					// conversion to build statements as well
341					buildStatements[i] = nil
342				} else if buildStatement, aErr := aqueryHandler.actionToBuildStatement(actionEntry); aErr != nil {
343					errOnce.Do(func() {
344						aErr = fmt.Errorf("%s: [%s] [%s]", aErr.Error(), actionEntry.GetMnemonic(), id2targets[actionEntry.TargetId])
345						err = aErr
346					})
347				} else {
348					// set build statement at an index rather than appending such that each goroutine does not
349					// impact other goroutines
350					buildStatements[i] = buildStatement
351				}
352				wg.Done()
353			}(i, actionEntry)
354		}
355		wg.Wait()
356	}
357	if err != nil {
358		return nil, nil, err
359	}
360
361	depsetsByHash := map[string]AqueryDepset{}
362	depsets := make([]AqueryDepset, 0, len(aqueryHandler.depsetIdToAqueryDepset))
363	{
364		eventHandler.Begin("depsets")
365		defer eventHandler.End("depsets")
366		for _, aqueryDepset := range aqueryHandler.depsetIdToAqueryDepset {
367			if prevEntry, hasKey := depsetsByHash[aqueryDepset.ContentHash]; hasKey {
368				// Two depsets collide on hash. Ensure that their contents are identical.
369				if !reflect.DeepEqual(aqueryDepset, prevEntry) {
370					return nil, nil, fmt.Errorf("two different depsets have the same hash: %v, %v", prevEntry, aqueryDepset)
371				}
372			} else {
373				depsetsByHash[aqueryDepset.ContentHash] = aqueryDepset
374				depsets = append(depsets, aqueryDepset)
375			}
376		}
377	}
378
379	eventHandler.Do("build_statement_sort", func() {
380		// Build Statements and depsets must be sorted by their content hash to
381		// preserve determinism between builds (this will result in consistent ninja file
382		// output). Note they are not sorted by their original IDs nor their Bazel ordering,
383		// as Bazel gives nondeterministic ordering / identifiers in aquery responses.
384		sort.Slice(buildStatements, func(i, j int) bool {
385			// Sort all nil statements to the end of the slice
386			if buildStatements[i] == nil {
387				return false
388			} else if buildStatements[j] == nil {
389				return true
390			}
391			//For build statements, compare output lists. In Bazel, each output file
392			// may only have one action which generates it, so this will provide
393			// a deterministic ordering.
394			outputs_i := buildStatements[i].OutputPaths
395			outputs_j := buildStatements[j].OutputPaths
396			if len(outputs_i) != len(outputs_j) {
397				return len(outputs_i) < len(outputs_j)
398			}
399			if len(outputs_i) == 0 {
400				// No outputs for these actions, so compare commands.
401				return buildStatements[i].Command < buildStatements[j].Command
402			}
403			// There may be multiple outputs, but the output ordering is deterministic.
404			return outputs_i[0] < outputs_j[0]
405		})
406	})
407	eventHandler.Do("depset_sort", func() {
408		sort.Slice(depsets, func(i, j int) bool {
409			return depsets[i].ContentHash < depsets[j].ContentHash
410		})
411	})
412	return buildStatements, depsets, nil
413}
414
415// depsetContentHash computes and returns a SHA256 checksum of the contents of
416// the given depset. This content hash may serve as the depset's identifier.
417// Using a content hash for an identifier is superior for determinism. (For example,
418// using an integer identifier which depends on the order in which the depsets are
419// created would result in nondeterministic depset IDs.)
420func depsetContentHash(directPaths []string, transitiveDepsetHashes []string) string {
421	h := sha256.New()
422	// Use newline as delimiter, as paths cannot contain newline.
423	h.Write([]byte(strings.Join(directPaths, "\n")))
424	h.Write([]byte(strings.Join(transitiveDepsetHashes, "")))
425	fullHash := base64.RawURLEncoding.EncodeToString(h.Sum(nil))
426	return fullHash
427}
428
429func (a *aqueryArtifactHandler) depsetContentHashes(inputDepsetIds []uint32) ([]string, error) {
430	var hashes []string
431	for _, id := range inputDepsetIds {
432		dId := depsetId(id)
433		if aqueryDepset, exists := a.depsetIdToAqueryDepset[dId]; !exists {
434			if _, empty := a.emptyDepsetIds[dId]; !empty {
435				return nil, fmt.Errorf("undefined (not even empty) input depsetId %d", dId)
436			}
437		} else {
438			hashes = append(hashes, aqueryDepset.ContentHash)
439		}
440	}
441	return hashes, nil
442}
443
444// escapes the args received from aquery and creates a command string
445func commandString(actionEntry *analysis_v2_proto.Action) string {
446	argsEscaped := make([]string, len(actionEntry.Arguments))
447	for i, arg := range actionEntry.Arguments {
448		if arg == "" {
449			// If this is an empty string, add ''
450			// And not
451			// 1. (literal empty)
452			// 2. `''\'''\'''` (escaped version of '')
453			//
454			// If we had used (1), then this would appear as a whitespace when we strings.Join
455			argsEscaped[i] = "''"
456		} else {
457			argsEscaped[i] = proptools.ShellEscapeIncludingSpaces(arg)
458		}
459	}
460	return strings.Join(argsEscaped, " ")
461}
462
463func (a *aqueryArtifactHandler) normalActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
464	command := commandString(actionEntry)
465	inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
466	if err != nil {
467		return nil, err
468	}
469	outputPaths, depfile, err := a.getOutputPaths(actionEntry)
470	if err != nil {
471		return nil, err
472	}
473
474	buildStatement := &BuildStatement{
475		Command:           command,
476		Depfile:           depfile,
477		OutputPaths:       outputPaths,
478		InputDepsetHashes: inputDepsetHashes,
479		Env:               actionEntry.EnvironmentVariables,
480		Mnemonic:          actionEntry.Mnemonic,
481	}
482	if buildStatement.Mnemonic == "GoToolchainBinaryBuild" {
483		// Unlike b's execution root, mixed build execution root contains a symlink to prebuilts/go
484		// This causes issues for `GOCACHE=$(mktemp -d) go build ...`
485		// To prevent this, sandbox this action in mixed builds as well
486		buildStatement.ShouldRunInSbox = true
487	}
488	return buildStatement, nil
489}
490
491func (a *aqueryArtifactHandler) templateExpandActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
492	outputPaths, depfile, err := a.getOutputPaths(actionEntry)
493	if err != nil {
494		return nil, err
495	}
496	if len(outputPaths) != 1 {
497		return nil, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths)
498	}
499	expandedTemplateContent := expandTemplateContent(actionEntry)
500	// The expandedTemplateContent is escaped for being used in double quotes and shell unescape,
501	// and the new line characters (\n) are also changed to \\n which avoids some Ninja escape on \n, which might
502	// change \n to space and mess up the format of Python programs.
503	// sed is used to convert \\n back to \n before saving to output file.
504	// See go/python-binary-host-mixed-build for more details.
505	command := fmt.Sprintf(`/bin/bash -c 'echo "%[1]s" | sed "s/\\\\n/\\n/g" > %[2]s && chmod a+x %[2]s'`,
506		escapeCommandlineArgument(expandedTemplateContent), outputPaths[0])
507	inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
508	if err != nil {
509		return nil, err
510	}
511
512	buildStatement := &BuildStatement{
513		Command:           command,
514		Depfile:           depfile,
515		OutputPaths:       outputPaths,
516		InputDepsetHashes: inputDepsetHashes,
517		Env:               actionEntry.EnvironmentVariables,
518		Mnemonic:          actionEntry.Mnemonic,
519	}
520	return buildStatement, nil
521}
522
523func (a *aqueryArtifactHandler) fileWriteActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
524	outputPaths, _, err := a.getOutputPaths(actionEntry)
525	var depsetHashes []string
526	if err == nil {
527		depsetHashes, err = a.depsetContentHashes(actionEntry.InputDepSetIds)
528	}
529	if err != nil {
530		return nil, err
531	}
532	return &BuildStatement{
533		Depfile:           nil,
534		OutputPaths:       outputPaths,
535		Env:               actionEntry.EnvironmentVariables,
536		Mnemonic:          actionEntry.Mnemonic,
537		InputDepsetHashes: depsetHashes,
538		FileContents:      actionEntry.FileContents,
539		IsExecutable:      actionEntry.IsExecutable,
540	}, nil
541}
542
543func (a *aqueryArtifactHandler) symlinkTreeActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
544	outputPaths, _, err := a.getOutputPaths(actionEntry)
545	if err != nil {
546		return nil, err
547	}
548	inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
549	if err != nil {
550		return nil, err
551	}
552	if len(inputPaths) != 1 || len(outputPaths) != 1 {
553		return nil, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
554	}
555	// The actual command is generated in bazelSingleton.GenerateBuildActions
556	return &BuildStatement{
557		Depfile:     nil,
558		OutputPaths: outputPaths,
559		Env:         actionEntry.EnvironmentVariables,
560		Mnemonic:    actionEntry.Mnemonic,
561		InputPaths:  inputPaths,
562	}, nil
563}
564
565type bazelSandwichJson struct {
566	Target         string   `json:"target"`
567	DependOnTarget *bool    `json:"depend_on_target,omitempty"`
568	ImplicitDeps   []string `json:"implicit_deps"`
569}
570
571func (a *aqueryArtifactHandler) unresolvedSymlinkActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
572	outputPaths, depfile, err := a.getOutputPaths(actionEntry)
573	if err != nil {
574		return nil, err
575	}
576	if len(actionEntry.InputDepSetIds) != 0 || len(outputPaths) != 1 {
577		return nil, fmt.Errorf("expected 0 inputs and 1 output to symlink action, got: input %q, output %q", actionEntry.InputDepSetIds, outputPaths)
578	}
579	target := actionEntry.UnresolvedSymlinkTarget
580	if target == "" {
581		return nil, fmt.Errorf("expected an unresolved_symlink_target, but didn't get one")
582	}
583	if filepath.Clean(target) != target {
584		return nil, fmt.Errorf("expected %q, got %q", filepath.Clean(target), target)
585	}
586	if strings.HasPrefix(target, "/") {
587		return nil, fmt.Errorf("no absolute symlinks allowed: %s", target)
588	}
589
590	out := outputPaths[0]
591	outDir := filepath.Dir(out)
592	var implicitDeps []string
593	if strings.HasPrefix(target, "bazel_sandwich:") {
594		j := bazelSandwichJson{}
595		err := json.Unmarshal([]byte(target[len("bazel_sandwich:"):]), &j)
596		if err != nil {
597			return nil, err
598		}
599		if proptools.BoolDefault(j.DependOnTarget, true) {
600			implicitDeps = append(implicitDeps, j.Target)
601		}
602		implicitDeps = append(implicitDeps, j.ImplicitDeps...)
603		dotDotsToReachCwd := ""
604		if outDir != "." {
605			dotDotsToReachCwd = strings.Repeat("../", strings.Count(outDir, "/")+1)
606		}
607		target = proptools.ShellEscapeIncludingSpaces(j.Target)
608		target = "{DOTDOTS_TO_OUTPUT_ROOT}" + dotDotsToReachCwd + target
609	} else {
610		target = proptools.ShellEscapeIncludingSpaces(target)
611	}
612
613	outDir = proptools.ShellEscapeIncludingSpaces(outDir)
614	out = proptools.ShellEscapeIncludingSpaces(out)
615	// Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`).
616	command := fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, target)
617	symlinkPaths := outputPaths[:]
618
619	buildStatement := &BuildStatement{
620		Command:      command,
621		Depfile:      depfile,
622		OutputPaths:  outputPaths,
623		Env:          actionEntry.EnvironmentVariables,
624		Mnemonic:     actionEntry.Mnemonic,
625		SymlinkPaths: symlinkPaths,
626		ImplicitDeps: implicitDeps,
627	}
628	return buildStatement, nil
629}
630
631func (a *aqueryArtifactHandler) symlinkActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
632	outputPaths, depfile, err := a.getOutputPaths(actionEntry)
633	if err != nil {
634		return nil, err
635	}
636
637	inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
638	if err != nil {
639		return nil, err
640	}
641	if len(inputPaths) != 1 || len(outputPaths) != 1 {
642		return nil, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
643	}
644	out := outputPaths[0]
645	outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out))
646	out = proptools.ShellEscapeIncludingSpaces(out)
647	in := filepath.Join("$PWD", proptools.ShellEscapeIncludingSpaces(inputPaths[0]))
648	// Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`).
649	command := fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, in)
650	symlinkPaths := outputPaths[:]
651
652	buildStatement := &BuildStatement{
653		Command:      command,
654		Depfile:      depfile,
655		OutputPaths:  outputPaths,
656		InputPaths:   inputPaths,
657		Env:          actionEntry.EnvironmentVariables,
658		Mnemonic:     actionEntry.Mnemonic,
659		SymlinkPaths: symlinkPaths,
660	}
661	return buildStatement, nil
662}
663
664func (a *aqueryArtifactHandler) getOutputPaths(actionEntry *analysis_v2_proto.Action) (outputPaths []string, depfile *string, err error) {
665	for _, outputId := range actionEntry.OutputIds {
666		outputPath, exists := a.artifactIdToPath[artifactId(outputId)]
667		if !exists {
668			err = fmt.Errorf("undefined outputId %d", outputId)
669			return
670		}
671		ext := filepath.Ext(outputPath)
672		if ext == ".d" {
673			if depfile != nil {
674				err = fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
675				return
676			} else {
677				depfile = &outputPath
678			}
679		} else {
680			outputPaths = append(outputPaths, outputPath)
681		}
682	}
683	return
684}
685
686// expandTemplateContent substitutes the tokens in a template.
687func expandTemplateContent(actionEntry *analysis_v2_proto.Action) string {
688	replacerString := make([]string, len(actionEntry.Substitutions)*2)
689	for i, pair := range actionEntry.Substitutions {
690		value := pair.Value
691		if val, ok := templateActionOverriddenTokens[pair.Key]; ok {
692			value = val
693		}
694		replacerString[i*2] = pair.Key
695		replacerString[i*2+1] = value
696	}
697	replacer := strings.NewReplacer(replacerString...)
698	return replacer.Replace(actionEntry.TemplateContent)
699}
700
701// \->\\, $->\$, `->\`, "->\", \n->\\n, '->'"'"'
702var commandLineArgumentReplacer = strings.NewReplacer(
703	`\`, `\\`,
704	`$`, `\$`,
705	"`", "\\`",
706	`"`, `\"`,
707	"\n", "\\n",
708	`'`, `'"'"'`,
709)
710
711func escapeCommandlineArgument(str string) string {
712	return commandLineArgumentReplacer.Replace(str)
713}
714
715func (a *aqueryArtifactHandler) actionToBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
716	switch actionEntry.Mnemonic {
717	// Middleman actions are not handled like other actions; they are handled separately as a
718	// preparatory step so that their inputs may be relayed to actions depending on middleman
719	// artifacts.
720	case middlemanMnemonic:
721		return nil, nil
722	// PythonZipper is bogus action returned by aquery, ignore it (b/236198693)
723	case "PythonZipper":
724		return nil, nil
725	// Skip "Fail" actions, which are placeholder actions designed to always fail.
726	case "Fail":
727		return nil, nil
728	case "BaselineCoverage":
729		return nil, nil
730	case "Symlink", "SolibSymlink", "ExecutableSymlink":
731		return a.symlinkActionBuildStatement(actionEntry)
732	case "TemplateExpand":
733		if len(actionEntry.Arguments) < 1 {
734			return a.templateExpandActionBuildStatement(actionEntry)
735		}
736	case "FileWrite", "SourceSymlinkManifest", "RepoMappingManifest":
737		return a.fileWriteActionBuildStatement(actionEntry)
738	case "SymlinkTree":
739		return a.symlinkTreeActionBuildStatement(actionEntry)
740	case "UnresolvedSymlink":
741		return a.unresolvedSymlinkActionBuildStatement(actionEntry)
742	}
743
744	if len(actionEntry.Arguments) < 1 {
745		return nil, errors.New("received action with no command")
746	}
747	return a.normalActionBuildStatement(actionEntry)
748
749}
750
751func expandPathFragment(id pathFragmentId, pathFragmentsMap map[pathFragmentId]*analysis_v2_proto.PathFragment) (string, error) {
752	var labels []string
753	currId := id
754	// Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
755	for currId > 0 {
756		currFragment, ok := pathFragmentsMap[currId]
757		if !ok {
758			return "", fmt.Errorf("undefined path fragment id %d", currId)
759		}
760		labels = append([]string{currFragment.Label}, labels...)
761		parentId := pathFragmentId(currFragment.ParentId)
762		if currId == parentId {
763			return "", fmt.Errorf("fragment cannot refer to itself as parent %#v", currFragment)
764		}
765		currId = parentId
766	}
767	return filepath.Join(labels...), nil
768}
769