1 // Copyright (C) 2021 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <fstream>
16 #include <iostream>
17 #include <map>
18 #include <string>
19 #include <vector>
20 
21 #include <benchmark.pb.h>
22 
23 #include <google/protobuf/io/zero_copy_stream_impl.h>
24 #include <google/protobuf/text_format.h>
25 
26 struct Syscall {
27   std::string name;
28   std::vector<std::string> arguments;
29   std::string return_value;
30 };
31 
32 // Reads lines from the provided file with strace output. Returns a list of lines
ReadLines(const std::string & file_path)33 std::vector<std::string> ReadLines(const std::string& file_path) {
34   std::vector<std::string> lines;
35 
36   std::string line;
37   std::ifstream input(file_path);
38   while (std::getline(input, line)) {
39     lines.push_back(line);
40   }
41   input.close();
42 
43   return lines;
44 }
45 
46 // Processes the given line into syscall name, arguments and return value
ProcessLine(const std::string & line)47 Syscall ProcessLine(const std::string& line) {
48   Syscall syscall;
49 
50   syscall.name = line.substr(0, line.find('('));
51   std::string raw_arguments = line.substr(line.find('(') + 1, line.find(')') - line.find('(') - 1);
52   syscall.return_value = line.substr(line.find(')'));
53   syscall.return_value = syscall.return_value.substr(syscall.return_value.find("= ") + 2);
54 
55   size_t next = 0;
56   size_t last = 0;
57   while ((next = raw_arguments.find(", ", last)) != std::string::npos) {
58     std::string part = raw_arguments.substr(last, next - last);
59     last = next + 2;
60     if (part.size() != 0) syscall.arguments.push_back(part);
61   }
62   std::string part = raw_arguments.substr(last);
63   if (part.size() != 0) syscall.arguments.push_back(part);
64 
65   return syscall;
66 }
67 
68 // Splits lines by pid. Returns a map where pid maps to a list of lines
SplitByPid(const std::vector<std::string> & lines)69 std::map<int, std::vector<std::string>> SplitByPid(const std::vector<std::string>& lines) {
70   std::map<int, std::vector<std::string>> lines_by_pid;
71 
72   for (const auto& line : lines) {
73     int pid = strtoll(line.substr(0, line.find(' ')).c_str(), nullptr, 10);
74     lines_by_pid[pid].push_back(line);
75   }
76 
77   return lines_by_pid;
78 }
79 
80 // Goes through all the lines for each pid, merges lines with unfinished and resumed tags, then
81 // calls ProcessLine on each of those merged lines. Returns a map where pid maps to a list of
82 // processed lines/syscalls
ProcessLines(const std::map<int,std::vector<std::string>> & lines_by_pid)83 std::map<int, std::vector<Syscall>> ProcessLines(
84     const std::map<int, std::vector<std::string>>& lines_by_pid) {
85   std::map<int, std::vector<Syscall>> processed_syscalls_by_pid;
86 
87   for (const auto& [pid, lines] : lines_by_pid) {
88     for (std::size_t i = 0; i < lines.size(); ++i) {
89       auto line = lines[i];
90 
91       // If only the resumed part of the syscall was found, ignore it
92       if (line.find("resumed>") != std::string::npos) continue;
93 
94       // If the syscall is detached, ignore it
95       if (line.find("<detached ...>") != std::string::npos) continue;
96 
97       // If the line contains "unfinished", concatenate it with the next line, which should contain
98       // "resumed"
99       if (line.find("<unfinished ...>") != std::string::npos) {
100         // Remove the "unfinished" tag
101         line = line.substr(0, line.find("<unfinished ...>"));
102 
103         // If the next line does not exist, ignore the syscall altogether
104         if (i + 1 >= lines.size()) continue;
105 
106         auto second_line = lines[++i];
107 
108         // Remove the "resumed" tag
109         second_line = second_line.substr(second_line.find("resumed>") + std::strlen("resumed>"));
110         // Concatenate both lines
111         line += second_line;
112       }
113 
114       // Remove the pid
115       line = line.substr(line.find("  ") + 2);
116 
117       // If the line starts with "---" or "+++", ignore it
118       if (line.length() >= 3 && (line.substr(0, 3) == "---" || line.substr(0, 3) == "+++"))
119         continue;
120 
121       auto processed_syscall = ProcessLine(line);
122       processed_syscalls_by_pid[pid].push_back(processed_syscall);
123     }
124   }
125 
126   return processed_syscalls_by_pid;
127 }
128 
main(int argc,char ** argv)129 int main(int argc, char** argv) {
130   if (argc != 3) {
131     std::cerr << "Invalid number of arguments.\n";
132     exit(EXIT_FAILURE);
133   }
134 
135   auto raw_lines = ReadLines(argv[1]);
136   auto raw_lines_by_pid = SplitByPid(raw_lines);
137   auto processed_syscalls_by_pid = ProcessLines(raw_lines_by_pid);
138 
139   std::string absolute_path = argv[2];
140 
141   // Initialize .ditto file
142   auto benchmark = std::make_unique<dittosuiteproto::Benchmark>();
143   auto main_instruction_set = benchmark->mutable_main()->mutable_instruction_set();
144   benchmark->mutable_global()->set_absolute_path(absolute_path);
145 
146   // Iterate over each pid and its processed lines. Start creating instructions after first openat()
147   // syscall, whose file name includes the provided absolute path, is found
148   for (const auto& [pid, syscalls] : processed_syscalls_by_pid) {
149     std::map<int, std::unique_ptr<dittosuiteproto::InstructionSet>> instruction_set_by_fd;
150     for (const auto& syscall : syscalls) {
151       if (syscall.name == "openat" &&
152           syscall.arguments[1].find(absolute_path) != std::string::npos) {
153         // Remove absolute_path
154         std::string path_name = syscall.arguments[1].substr(absolute_path.size() + 2);
155         // Remove quotes at the end
156         path_name.pop_back();
157 
158         // If the return value is -1, ignore it
159         if (syscall.return_value.find("-1") != std::string::npos) continue;
160 
161         int fd = strtoll(syscall.return_value.c_str(), nullptr, 10);
162 
163         // Create .ditto instruction set for this fd with open file instruction
164         instruction_set_by_fd[fd] = std::make_unique<dittosuiteproto::InstructionSet>();
165         auto instruction = instruction_set_by_fd[fd]->add_instructions()->mutable_open_file();
166         instruction->set_path_name(path_name);
167         instruction->set_output_fd("fd");
168       } else if (syscall.name == "pread64") {
169         int fd = strtoll(syscall.arguments[0].c_str(), nullptr, 10);
170 
171         if (syscall.arguments.size() != 4) continue;
172         if (instruction_set_by_fd.find(fd) == instruction_set_by_fd.end()) continue;
173 
174         int64_t size = strtoll(syscall.arguments[2].c_str(), nullptr, 10);
175         int64_t offset = strtoll(syscall.arguments[3].c_str(), nullptr, 10);
176 
177         // Create .ditto read file instruction
178         auto instruction = instruction_set_by_fd[fd]->add_instructions()->mutable_read_file();
179         instruction->set_input_fd("fd");
180         instruction->set_size(size);
181         instruction->set_block_size(size);
182         instruction->set_starting_offset(offset);
183       } else if (syscall.name == "pwrite64") {
184         int fd = strtoll(syscall.arguments[0].c_str(), nullptr, 10);
185 
186         if (syscall.arguments.size() != 4) continue;
187         if (instruction_set_by_fd.find(fd) == instruction_set_by_fd.end()) continue;
188 
189         int64_t size = strtoll(syscall.arguments[2].c_str(), nullptr, 10);
190         int64_t offset = strtoll(syscall.arguments[3].c_str(), nullptr, 10);
191 
192         // Create .ditto write file instruction
193         auto instruction = instruction_set_by_fd[fd]->add_instructions()->mutable_write_file();
194         instruction->set_input_fd("fd");
195         instruction->set_size(size);
196         instruction->set_block_size(size);
197         instruction->set_starting_offset(offset);
198       } else if (syscall.name == "close") {
199         int fd = strtoll(syscall.arguments[0].c_str(), nullptr, 10);
200 
201         if (instruction_set_by_fd.find(fd) == instruction_set_by_fd.end()) continue;
202 
203         // Create .ditto close file instruction
204         auto instruction = instruction_set_by_fd[fd]->add_instructions()->mutable_close_file();
205         instruction->set_input_fd("fd");
206 
207         // Add the instruction set for this fd to the main instruction set
208         main_instruction_set->add_instructions()->set_allocated_instruction_set(
209             instruction_set_by_fd[fd].release());
210         instruction_set_by_fd.erase(instruction_set_by_fd.find(fd));
211       }
212     }
213   }
214 
215   auto output = std::make_unique<google::protobuf::io::OstreamOutputStream>(&std::cout);
216   google::protobuf::TextFormat::Print(*benchmark, output.get());
217   return 0;
218 }
219