1 // Copyright (C) 2022 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 use super::metadata::WorkspaceMetadata;
16 use super::{Crate, CrateType, Extern, ExternType};
17 use crate::CargoOutput;
18 use anyhow::anyhow;
19 use anyhow::bail;
20 use anyhow::Context;
21 use anyhow::Result;
22 use log::debug;
23 use once_cell::sync::Lazy;
24 use regex::Regex;
25 use std::collections::BTreeMap;
26 use std::env;
27 use std::path::Path;
28 use std::path::PathBuf;
29 
30 /// Reads the given `cargo.out` and `cargo.metadata` files, and generates a list of crates based on
31 /// the rustc invocations.
32 ///
33 /// Ignores crates outside the current directory and build script crates.
parse_cargo_out(cargo_output: &CargoOutput) -> Result<Vec<Crate>>34 pub fn parse_cargo_out(cargo_output: &CargoOutput) -> Result<Vec<Crate>> {
35     let metadata = serde_json::from_str(&cargo_output.cargo_metadata)
36         .context("failed to parse cargo metadata")?;
37     parse_cargo_out_str(
38         &cargo_output.cargo_out,
39         &metadata,
40         env::current_dir().unwrap().canonicalize().unwrap(),
41     )
42 }
43 
44 /// Parses the given `cargo.out` and `cargo.metadata` file contents and generates a list of crates
45 /// based on the rustc invocations.
46 ///
47 /// Ignores crates outside `base_directory` and build script crates.
parse_cargo_out_str( cargo_out: &str, metadata: &WorkspaceMetadata, base_directory: impl AsRef<Path>, ) -> Result<Vec<Crate>>48 fn parse_cargo_out_str(
49     cargo_out: &str,
50     metadata: &WorkspaceMetadata,
51     base_directory: impl AsRef<Path>,
52 ) -> Result<Vec<Crate>> {
53     let cargo_out = CargoOut::parse(cargo_out).context("failed to parse cargo.out")?;
54     debug!("Parsed cargo output: {:?}", cargo_out);
55 
56     assert!(cargo_out.cc_invocations.is_empty(), "cc not supported yet");
57     assert!(cargo_out.ar_invocations.is_empty(), "ar not supported yet");
58 
59     let mut crates = Vec::new();
60     for rustc in cargo_out.rustc_invocations.iter() {
61         let c = Crate::from_rustc_invocation(rustc, metadata, &cargo_out.tests)
62             .with_context(|| format!("failed to process rustc invocation: {rustc}"))?;
63         // Ignore build.rs crates.
64         if c.name.starts_with("build_script_") {
65             continue;
66         }
67         // Ignore crates outside the base directory.
68         if !c.package_dir.starts_with(&base_directory) {
69             continue;
70         }
71         crates.push(c);
72     }
73     crates.dedup();
74     Ok(crates)
75 }
76 
77 /// Whether a test target contains any tests or benchmarks.
78 #[derive(Debug)]
79 struct TestContents {
80     tests: bool,
81     benchmarks: bool,
82 }
83 
84 /// Raw-ish data extracted from cargo.out file.
85 #[derive(Debug, Default)]
86 struct CargoOut {
87     rustc_invocations: Vec<String>,
88 
89     // package name => cmd args
90     cc_invocations: BTreeMap<String, String>,
91     ar_invocations: BTreeMap<String, String>,
92 
93     // lines starting with "warning: ".
94     // line number => line
95     warning_lines: BTreeMap<usize, String>,
96     warning_files: Vec<String>,
97 
98     // output filename => test filename => whether it contains any tests or benchmarks
99     tests: BTreeMap<String, BTreeMap<PathBuf, TestContents>>,
100 
101     errors: Vec<String>,
102     test_errors: Vec<String>,
103 }
104 
match1(regex: &Regex, s: &str) -> Option<String>105 fn match1(regex: &Regex, s: &str) -> Option<String> {
106     regex.captures(s).and_then(|x| x.get(1)).map(|x| x.as_str().to_string())
107 }
108 
match3(regex: &Regex, s: &str) -> Option<(String, String, String)>109 fn match3(regex: &Regex, s: &str) -> Option<(String, String, String)> {
110     regex.captures(s).and_then(|x| match (x.get(1), x.get(2), x.get(3)) {
111         (Some(a), Some(b), Some(c)) => {
112             Some((a.as_str().to_string(), b.as_str().to_string(), c.as_str().to_string()))
113         }
114         _ => None,
115     })
116 }
117 
118 impl CargoOut {
119     /// Parse the output of a `cargo build -v` run.
parse(contents: &str) -> Result<CargoOut>120     fn parse(contents: &str) -> Result<CargoOut> {
121         let mut result = CargoOut::default();
122         let mut in_tests = false;
123         let mut cur_test_key = None;
124         let mut lines_iter = contents.lines().enumerate();
125         while let Some((n, line)) = lines_iter.next() {
126             if line.starts_with("warning: ") {
127                 result.warning_lines.insert(n, line.to_string());
128                 continue;
129             }
130 
131             // Cargo -v output of a call to rustc.
132             static RUSTC_REGEX: Lazy<Regex> =
133                 Lazy::new(|| Regex::new(r"^ +Running `rustc (.*)`$").unwrap());
134             if let Some(args) = match1(&RUSTC_REGEX, line) {
135                 result.rustc_invocations.push(args);
136                 continue;
137             }
138             // Cargo -vv output of a call to rustc could be split into multiple lines.
139             // Assume that the first line will contain some CARGO_* env definition.
140             static RUSTC_VV_REGEX: Lazy<Regex> =
141                 Lazy::new(|| Regex::new(r"^ +Running `.*CARGO_.*=.*$").unwrap());
142             if RUSTC_VV_REGEX.is_match(line) {
143                 // cargo build -vv output can have multiple lines for a rustc command due to
144                 // '\n' in strings for environment variables.
145                 let mut line = line.to_string();
146                 loop {
147                     // Use an heuristic to detect the completions of a multi-line command.
148                     if line.ends_with('`') && line.chars().filter(|c| *c == '`').count() % 2 == 0 {
149                         break;
150                     }
151                     if let Some((_, next_line)) = lines_iter.next() {
152                         line += next_line;
153                         continue;
154                     }
155                     break;
156                 }
157                 // The combined -vv output rustc command line pattern.
158                 static RUSTC_VV_CMD_ARGS: Lazy<Regex> =
159                     Lazy::new(|| Regex::new(r"^ *Running `.*CARGO_.*=.* rustc (.*)`$").unwrap());
160                 if let Some(args) = match1(&RUSTC_VV_CMD_ARGS, &line) {
161                     result.rustc_invocations.push(args);
162                 } else {
163                     bail!("failed to parse cargo.out line: {}", line);
164                 }
165                 continue;
166             }
167             // Cargo -vv output of a "cc" or "ar" command; all in one line.
168             static CC_AR_VV_REGEX: Lazy<Regex> = Lazy::new(|| {
169                 Regex::new(r#"^\[([^ ]*)[^\]]*\] running:? "(cc|ar)" (.*)$"#).unwrap()
170             });
171             if let Some((pkg, cmd, args)) = match3(&CC_AR_VV_REGEX, line) {
172                 match cmd.as_str() {
173                     "ar" => result.ar_invocations.insert(pkg, args),
174                     "cc" => result.cc_invocations.insert(pkg, args),
175                     _ => unreachable!(),
176                 };
177                 continue;
178             }
179             // Rustc output of file location path pattern for a warning message.
180             static WARNING_FILE_REGEX: Lazy<Regex> =
181                 Lazy::new(|| Regex::new(r"^ *--> ([^:]*):[0-9]+").unwrap());
182             if result.warning_lines.contains_key(&n.saturating_sub(1)) {
183                 if let Some(fpath) = match1(&WARNING_FILE_REGEX, line) {
184                     result.warning_files.push(fpath);
185                     continue;
186                 }
187             }
188             if line.starts_with("error: ") || line.starts_with("error[E") {
189                 if in_tests {
190                     result.test_errors.push(line.to_string());
191                 } else {
192                     result.errors.push(line.to_string());
193                 }
194                 continue;
195             }
196             static CARGO2ANDROID_RUNNING_REGEX: Lazy<Regex> =
197                 Lazy::new(|| Regex::new(r"^### Running: .*$").unwrap());
198             if CARGO2ANDROID_RUNNING_REGEX.is_match(line) {
199                 in_tests = line.contains("cargo test") && line.contains("--list");
200                 continue;
201             }
202 
203             // `cargo test -- --list` output
204             // Example: Running unittests src/lib.rs (target.tmp/x86_64-unknown-linux-gnu/debug/deps/aarch64-58b675be7dc09833)
205             static CARGO_TEST_LIST_START_PAT: Lazy<Regex> =
206                 Lazy::new(|| Regex::new(r"^\s*Running (?:unittests )?(.*) \(.*/(.*)\)$").unwrap());
207             static CARGO_TEST_LIST_END_PAT: Lazy<Regex> =
208                 Lazy::new(|| Regex::new(r"^(\d+) tests?, (\d+) benchmarks$").unwrap());
209             if let Some(captures) = CARGO_TEST_LIST_START_PAT.captures(line) {
210                 cur_test_key =
211                     Some((captures.get(2).unwrap().as_str(), captures.get(1).unwrap().as_str()));
212             } else if let Some((output_filename, main_src)) = cur_test_key {
213                 if let Some(captures) = CARGO_TEST_LIST_END_PAT.captures(line) {
214                     let num_tests = captures.get(1).unwrap().as_str().parse::<u32>().unwrap();
215                     let num_benchmarks = captures.get(2).unwrap().as_str().parse::<u32>().unwrap();
216                     result.tests.entry(output_filename.to_owned()).or_default().insert(
217                         PathBuf::from(main_src),
218                         TestContents { tests: num_tests != 0, benchmarks: num_benchmarks != 0 },
219                     );
220                     cur_test_key = None;
221                 }
222             }
223         }
224 
225         // self.find_warning_owners()
226 
227         Ok(result)
228     }
229 }
230 
231 impl Crate {
from_rustc_invocation( rustc: &str, metadata: &WorkspaceMetadata, tests: &BTreeMap<String, BTreeMap<PathBuf, TestContents>>, ) -> Result<Crate>232     fn from_rustc_invocation(
233         rustc: &str,
234         metadata: &WorkspaceMetadata,
235         tests: &BTreeMap<String, BTreeMap<PathBuf, TestContents>>,
236     ) -> Result<Crate> {
237         let mut out = Crate::default();
238         let mut extra_filename = String::new();
239 
240         // split into args
241         let args: Vec<&str> = rustc.split_whitespace().collect();
242         let mut arg_iter = args
243             .iter()
244             // Remove quotes from simple strings, panic for others.
245             .map(|arg| match (arg.chars().next(), arg.chars().skip(1).last()) {
246                 (Some('"'), Some('"')) => &arg[1..arg.len() - 1],
247                 (Some('\''), Some('\'')) => &arg[1..arg.len() - 1],
248                 (Some('"'), _) => panic!("can't handle strings with whitespace"),
249                 (Some('\''), _) => panic!("can't handle strings with whitespace"),
250                 _ => arg,
251             });
252         // process each arg
253         while let Some(arg) = arg_iter.next() {
254             match arg {
255                 "--crate-name" => out.name = arg_iter.next().unwrap().to_string(),
256                 "--crate-type" => out
257                     .types
258                     .push(CrateType::from_str(arg_iter.next().unwrap().to_string().as_str())),
259                 "--test" => out.types.push(CrateType::Test),
260                 "--target" => out.target = Some(arg_iter.next().unwrap().to_string()),
261                 "--cfg" => {
262                     // example: feature=\"sink\"
263                     let arg = arg_iter.next().unwrap();
264                     if let Some(feature) =
265                         arg.strip_prefix("feature=\"").and_then(|s| s.strip_suffix('\"'))
266                     {
267                         out.features.push(feature.to_string());
268                     } else {
269                         out.cfgs.push(arg.to_string());
270                     }
271                 }
272                 "--extern" => {
273                     // example: proc_macro
274                     // example: memoffset=/some/path/libmemoffset-2cfda327d156e680.rmeta
275                     let arg = arg_iter.next().unwrap();
276                     if let Some((name, path)) = arg.split_once('=') {
277                         let filename = path.split('/').last().unwrap();
278 
279                         // Example filename: "libgetrandom-fd8800939535fc59.rmeta" or "libmls_rs_uniffi.rlib".
280                         static REGEX: Lazy<Regex> = Lazy::new(|| {
281                             Regex::new(r"^lib([^-]*)(?:-[0-9a-f]*)?.(rlib|so|rmeta)$").unwrap()
282                         });
283 
284                         let Some(lib_name) = REGEX.captures(filename).and_then(|x| x.get(1)) else {
285                             bail!("bad filename for extern {}: {}", name, filename);
286                         };
287                         let extern_type =
288                             if filename.ends_with(".rlib") || filename.ends_with(".rmeta") {
289                                 ExternType::Rust
290                             } else if filename.ends_with(".so") {
291                                 // Assume .so files are always proc_macros. May not always be right.
292                                 ExternType::ProcMacro
293                             } else {
294                                 bail!("Unexpected extension for extern filename {}", filename);
295                             };
296                         out.externs.push(Extern {
297                             name: name.to_string(),
298                             lib_name: lib_name.as_str().to_string(),
299                             extern_type,
300                         });
301                     } else if arg != "proc_macro" {
302                         panic!("No filename for {}", arg);
303                     }
304                 }
305                 _ if arg.starts_with("-C") => {
306                     // handle both "-Cfoo" and "-C foo"
307                     let arg = if arg == "-C" {
308                         arg_iter.next().unwrap()
309                     } else {
310                         arg.strip_prefix("-C").unwrap()
311                     };
312                     // 'prefer-dynamic' does not work with common flag -C lto
313                     // 'embed-bitcode' is ignored; we might control LTO with other .bp flag
314                     // 'codegen-units' is set in Android global config or by default
315                     //
316                     // TODO: this is business logic. move it out of the parsing code
317                     if !arg.starts_with("codegen-units=")
318                         && !arg.starts_with("debuginfo=")
319                         && !arg.starts_with("embed-bitcode=")
320                         && !arg.starts_with("extra-filename=")
321                         && !arg.starts_with("incremental=")
322                         && !arg.starts_with("metadata=")
323                         && arg != "prefer-dynamic"
324                     {
325                         out.codegens.push(arg.to_string());
326                     }
327                     if let Some(x) = arg.strip_prefix("extra-filename=") {
328                         extra_filename = x.to_string();
329                     }
330                 }
331                 "--cap-lints" => out.cap_lints = arg_iter.next().unwrap().to_string(),
332                 "-l" => {
333                     let arg = arg_iter.next().unwrap();
334                     if let Some(lib) = arg.strip_prefix("static=") {
335                         out.static_libs.push(lib.to_string());
336                     } else if let Some(lib) = arg.strip_prefix("dylib=") {
337                         out.shared_libs.push(lib.to_string());
338                     } else {
339                         out.shared_libs.push(arg.to_string());
340                     }
341                 }
342                 _ if !arg.starts_with('-') => {
343                     (out.package_dir, out.main_src) = split_src_path(Path::new(arg))?;
344                 }
345 
346                 // ignored flags
347                 "-L" => {
348                     arg_iter.next().unwrap();
349                 }
350                 "--out-dir" => {
351                     arg_iter.next().unwrap();
352                 }
353                 "--color" => {
354                     arg_iter.next().unwrap();
355                 }
356                 _ if arg.starts_with("--error-format=") => {}
357                 _ if arg.starts_with("--emit=") => {}
358                 _ if arg.starts_with("--edition=") => {}
359                 _ if arg.starts_with("--json=") => {}
360                 _ if arg.starts_with("-Aclippy") => {}
361                 _ if arg.starts_with("--allow=clippy") => {}
362                 _ if arg.starts_with("-Wclippy") => {}
363                 _ if arg.starts_with("--warn=clippy") => {}
364                 _ if arg.starts_with("-A=rustdoc") => {}
365                 _ if arg.starts_with("--allow=rustdoc") => {}
366                 _ if arg.starts_with("-D") => {}
367                 _ if arg.starts_with("--deny=") => {}
368                 _ if arg.starts_with("-W") => {}
369                 _ if arg.starts_with("--warn=") => {}
370 
371                 arg => bail!("unsupported rustc argument: {arg:?}"),
372             }
373         }
374         out.cfgs.sort();
375         out.cfgs.dedup();
376         out.codegens.sort();
377         out.features.sort();
378 
379         if out.name.is_empty() {
380             bail!("missing --crate-name");
381         }
382         if out.main_src.as_os_str().is_empty() {
383             bail!("missing main source file");
384         }
385         // Must have at least one type.
386         if out.types.is_empty() {
387             if out.cfgs.contains(&"test".to_string()) {
388                 out.types.push(CrateType::TestNoHarness);
389             } else {
390                 bail!("failed to detect crate type. did not have --crate-type or --test or '--cfg test'");
391             }
392         }
393         if out.types.contains(&CrateType::Test) && out.types.len() != 1 {
394             bail!("cannot specify both --test and --crate-type");
395         }
396         if out.types.contains(&CrateType::Lib) && out.types.contains(&CrateType::RLib) {
397             bail!("cannot both have lib and rlib crate types");
398         }
399 
400         // Find the metadata for the crates containing package by matching the manifest's path.
401         let manifest_path = out.package_dir.join("Cargo.toml");
402         let package_metadata = metadata
403             .packages
404             .iter()
405             .find(|p| Path::new(&p.manifest_path).canonicalize().unwrap() == manifest_path)
406             .ok_or_else(|| {
407                 anyhow!(
408                     "can't find metadata for crate {:?} with manifest path {:?}",
409                     out.name,
410                     manifest_path,
411                 )
412             })?;
413         out.package_name.clone_from(&package_metadata.name);
414         out.version = Some(package_metadata.version.clone());
415         out.edition.clone_from(&package_metadata.edition);
416 
417         let output_filename = out.name.clone() + &extra_filename;
418         if let Some(test_contents) = tests.get(&output_filename).and_then(|m| m.get(&out.main_src))
419         {
420             out.empty_test = !test_contents.tests && !test_contents.benchmarks;
421         }
422 
423         Ok(out)
424     }
425 }
426 
427 /// Given a path to the main source file of some Rust crate, returns the canonical path to the
428 /// package directory, and the relative path to the source file within that directory.
split_src_path(src_path: &Path) -> Result<(PathBuf, PathBuf)>429 fn split_src_path(src_path: &Path) -> Result<(PathBuf, PathBuf)> {
430     // Canonicalize the path because:
431     //
432     // 1. We don't consistently get relative or absolute paths elsewhere. If we
433     //    canonicalize everything, it becomes easy to compare paths.
434     //
435     // 2. We don't want to consider symlinks to code outside the cwd as part of the
436     //    project (e.g. AOSP's import of crosvm has symlinks from crosvm's own 3p
437     //    directory to the android 3p directories).
438     let src_path = src_path
439         .canonicalize()
440         .unwrap_or_else(|e| panic!("failed to canonicalize {src_path:?}: {}", e));
441     let package_dir = find_cargo_toml(&src_path)?;
442     let main_src = src_path.strip_prefix(&package_dir).unwrap().to_path_buf();
443 
444     Ok((package_dir, main_src))
445 }
446 
447 /// Given a path to a Rust source file, finds the closest ancestor directory containing a
448 /// `Cargo.toml` file.
find_cargo_toml(src_path: &Path) -> Result<PathBuf>449 fn find_cargo_toml(src_path: &Path) -> Result<PathBuf> {
450     let mut package_dir = src_path.parent().unwrap();
451     while !package_dir.join("Cargo.toml").try_exists()? {
452         package_dir = package_dir
453             .parent()
454             .ok_or_else(|| anyhow!("No Cargo.toml found in parents of {:?}", src_path))?;
455     }
456     Ok(package_dir.to_path_buf())
457 }
458