1 /*
2  * Copyright 2016, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <debuggerd/client.h>
18 
19 #include <fcntl.h>
20 #include <signal.h>
21 #include <stdlib.h>
22 #include <sys/poll.h>
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <time.h>
26 #include <unistd.h>
27 
28 #include <chrono>
29 #include <iomanip>
30 
31 #include <android-base/cmsg.h>
32 #include <android-base/file.h>
33 #include <android-base/logging.h>
34 #include <android-base/parseint.h>
35 #include <android-base/stringprintf.h>
36 #include <android-base/strings.h>
37 #include <android-base/unique_fd.h>
38 #include <bionic/reserved_signals.h>
39 #include <cutils/sockets.h>
40 #include <procinfo/process.h>
41 
42 #include "debuggerd/handler.h"
43 #include "protocol.h"
44 #include "util.h"
45 
46 using namespace std::chrono_literals;
47 
48 using android::base::ReadFileToString;
49 using android::base::SendFileDescriptors;
50 using android::base::StringAppendV;
51 using android::base::unique_fd;
52 using android::base::WriteStringToFd;
53 
54 #define TAG "libdebuggerd_client: "
55 
56 // Log an error both to the log (via LOG(ERROR)) and to the given fd.
log_error(int fd,int errno_value,const char * format,...)57 static void log_error(int fd, int errno_value, const char* format, ...) __printflike(3, 4) {
58   std::string message(TAG);
59 
60   va_list ap;
61   va_start(ap, format);
62   StringAppendV(&message, format, ap);
63   va_end(ap);
64 
65   if (errno_value != 0) {
66     message = message + ": " + strerror(errno_value);
67   }
68 
69   if (fd != -1) {
70     dprintf(fd, "%s\n", message.c_str());
71   }
72 
73   LOG(ERROR) << message;
74 }
75 
76 template <typename Duration>
populate_timeval(struct timeval * tv,const Duration & duration)77 static void populate_timeval(struct timeval* tv, const Duration& duration) {
78   auto seconds = std::chrono::duration_cast<std::chrono::seconds>(duration);
79   auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(duration - seconds);
80   tv->tv_sec = static_cast<long>(seconds.count());
81   tv->tv_usec = static_cast<long>(microseconds.count());
82 }
83 
84 /**
85  * Returns the wchan data for each thread in the process,
86  * or empty string if unable to obtain any data.
87  */
get_wchan_data(int fd,pid_t pid)88 static std::string get_wchan_data(int fd, pid_t pid) {
89   std::vector<pid_t> tids;
90   if (!android::procinfo::GetProcessTids(pid, &tids)) {
91     log_error(fd, 0, "failed to get process tids");
92     return "";
93   }
94 
95   std::stringstream data;
96   for (int tid : tids) {
97     std::string path = "/proc/" + std::to_string(pid) + "/task/" + std::to_string(tid) + "/wchan";
98     std::string wchan_str;
99     if (!ReadFileToString(path, &wchan_str, true)) {
100       log_error(fd, errno, "failed to read \"%s\"", path.c_str());
101       continue;
102     }
103     data << "sysTid=" << std::left << std::setw(10) << tid << wchan_str << "\n";
104   }
105 
106   std::stringstream buffer;
107   if (std::string str = data.str(); !str.empty()) {
108     buffer << "\n----- Waiting Channels: pid " << pid << " at " << get_timestamp() << " -----\n"
109            << "Cmd line: " << android::base::Join(get_command_line(pid), " ") << "\n";
110     buffer << "\n" << str << "\n";
111     buffer << "----- end " << std::to_string(pid) << " -----\n";
112     buffer << "\n";
113   }
114   return buffer.str();
115 }
116 
debuggerd_trigger_dump(pid_t tid,DebuggerdDumpType dump_type,unsigned int timeout_ms,unique_fd output_fd)117 bool debuggerd_trigger_dump(pid_t tid, DebuggerdDumpType dump_type, unsigned int timeout_ms,
118                             unique_fd output_fd) {
119   if (dump_type == kDebuggerdJavaBacktrace) {
120     // Java dumps always get sent to the tgid, so we need to resolve our tid to a tgid.
121     android::procinfo::ProcessInfo procinfo;
122     std::string error;
123     if (!android::procinfo::GetProcessInfo(tid, &procinfo, &error)) {
124       log_error(output_fd, 0, "failed to get process info: %s", error.c_str());
125       return false;
126     }
127     tid = procinfo.pid;
128   }
129 
130   LOG(INFO) << TAG "started dumping process " << tid;
131 
132   // Rather than try to deal with poll() all the way through the flow, we update
133   // the socket timeout between each step (and only use poll() during the final
134   // copy loop).
135   const auto end = std::chrono::steady_clock::now() + std::chrono::milliseconds(timeout_ms);
136   auto update_timeout = [timeout_ms, &output_fd](int sockfd, auto end) {
137     if (timeout_ms <= 0) return true;
138 
139     auto remaining = end - std::chrono::steady_clock::now();
140     if (remaining < decltype(remaining)::zero()) {
141       log_error(output_fd, 0, "timeout expired");
142       return false;
143     }
144 
145     struct timeval timeout;
146     populate_timeval(&timeout, remaining);
147     if (setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) != 0) {
148       log_error(output_fd, errno, "failed to set receive timeout");
149       return false;
150     }
151     if (setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(timeout)) != 0) {
152       log_error(output_fd, errno, "failed to set send timeout");
153       return false;
154     }
155     return true;
156   };
157 
158   unique_fd sockfd(socket(AF_LOCAL, SOCK_SEQPACKET, 0));
159   if (sockfd == -1) {
160     log_error(output_fd, errno, "failed to create socket");
161     return false;
162   }
163 
164   if (!update_timeout(sockfd, end)) return false;
165 
166   if (socket_local_client_connect(sockfd.get(), kTombstonedInterceptSocketName,
167                                   ANDROID_SOCKET_NAMESPACE_RESERVED, SOCK_SEQPACKET) == -1) {
168     log_error(output_fd, errno, "failed to connect to tombstoned");
169     return false;
170   }
171 
172   InterceptRequest req = {
173       .dump_type = dump_type,
174       .pid = tid,
175   };
176 
177   // Create an intermediate pipe to pass to the other end.
178   unique_fd pipe_read, pipe_write;
179   if (!Pipe(&pipe_read, &pipe_write)) {
180     log_error(output_fd, errno, "failed to create pipe");
181     return false;
182   }
183 
184   std::string pipe_size_str;
185   int pipe_buffer_size = 1024 * 1024;
186   if (android::base::ReadFileToString("/proc/sys/fs/pipe-max-size", &pipe_size_str)) {
187     pipe_size_str = android::base::Trim(pipe_size_str);
188 
189     if (!android::base::ParseInt(pipe_size_str.c_str(), &pipe_buffer_size, 0)) {
190       LOG(FATAL) << "failed to parse pipe max size '" << pipe_size_str << "'";
191     }
192   }
193 
194   if (fcntl(pipe_read.get(), F_SETPIPE_SZ, pipe_buffer_size) != pipe_buffer_size) {
195     log_error(output_fd, errno, "failed to set pipe buffer size");
196   }
197 
198   if (!update_timeout(sockfd, end)) return false;
199   ssize_t rc = SendFileDescriptors(sockfd, &req, sizeof(req), pipe_write.get());
200   pipe_write.reset();
201   if (rc != sizeof(req)) {
202     log_error(output_fd, errno, "failed to send output fd to tombstoned");
203     return false;
204   }
205 
206   auto get_response = [&output_fd](const char* kind, int sockfd, InterceptResponse* response) {
207     ssize_t rc = TEMP_FAILURE_RETRY(recv(sockfd, response, sizeof(*response), MSG_TRUNC));
208     if (rc == 0) {
209       log_error(output_fd, 0, "failed to read %s response from tombstoned: timeout reached?", kind);
210       return false;
211     } else if (rc == -1) {
212       log_error(output_fd, errno, "failed to read %s response from tombstoned", kind);
213       return false;
214     } else if (rc != sizeof(*response)) {
215       log_error(output_fd, 0,
216                 "received packet of unexpected length from tombstoned while reading %s response: "
217                 "expected %zd, received %zd",
218                 kind, sizeof(*response), rc);
219       return false;
220     }
221     return true;
222   };
223 
224   // Check to make sure we've successfully registered.
225   InterceptResponse response;
226   if (!update_timeout(sockfd, end)) return false;
227   if (!get_response("initial", sockfd, &response)) return false;
228   if (response.status != InterceptStatus::kRegistered) {
229     log_error(output_fd, 0, "unexpected registration response: %d",
230               static_cast<int>(response.status));
231     return false;
232   }
233 
234   // Send the signal.
235   const int signal = (dump_type == kDebuggerdJavaBacktrace) ? SIGQUIT : BIONIC_SIGNAL_DEBUGGER;
236   sigval val = {.sival_int = (dump_type == kDebuggerdNativeBacktrace) ? 1 : 0};
237   if (sigqueue(tid, signal, val) != 0) {
238     log_error(output_fd, errno, "failed to send signal to pid %d", tid);
239     return false;
240   }
241 
242   if (!update_timeout(sockfd, end)) return false;
243   if (!get_response("status", sockfd, &response)) return false;
244   if (response.status != InterceptStatus::kStarted) {
245     response.error_message[sizeof(response.error_message) - 1] = '\0';
246     log_error(output_fd, 0, "tombstoned reported failure: %s", response.error_message);
247     return false;
248   }
249 
250   // Forward output from the pipe to the output fd.
251   while (true) {
252     auto remaining = end - std::chrono::steady_clock::now();
253     auto remaining_ms = std::chrono::duration_cast<std::chrono::milliseconds>(remaining).count();
254     if (timeout_ms <= 0) {
255       remaining_ms = -1;
256     } else if (remaining_ms < 0) {
257       log_error(output_fd, 0, "timeout expired");
258       return false;
259     }
260 
261     struct pollfd pfd = {
262         .fd = pipe_read.get(), .events = POLLIN, .revents = 0,
263     };
264 
265     rc = poll(&pfd, 1, remaining_ms);
266     if (rc == -1) {
267       if (errno == EINTR) {
268         continue;
269       } else {
270         log_error(output_fd, errno, "error while polling");
271         return false;
272       }
273     } else if (rc == 0) {
274       log_error(output_fd, 0, "timeout expired");
275       return false;
276     }
277 
278     // WARNING: It's not possible to replace the below with a splice call.
279     // Due to the way debuggerd does many small writes across the pipe,
280     // this would cause splice to copy a page for each write. The second
281     // pipe fills up based on the number of pages being copied, even
282     // though there is not much data being transferred per page. When
283     // the second pipe is full, everything stops since there is nothing
284     // reading the second pipe to clear it.
285     char buf[1024];
286     rc = TEMP_FAILURE_RETRY(read(pipe_read.get(), buf, sizeof(buf)));
287     if (rc == 0) {
288       // Done.
289       break;
290     } else if (rc == -1) {
291       log_error(output_fd, errno, "error while reading");
292       return false;
293     }
294 
295     if (!android::base::WriteFully(output_fd.get(), buf, rc)) {
296       log_error(output_fd, errno, "error while writing");
297       return false;
298     }
299   }
300 
301   LOG(INFO) << TAG "done dumping process " << tid;
302 
303   return true;
304 }
305 
dump_backtrace_to_file(pid_t tid,DebuggerdDumpType dump_type,int fd)306 int dump_backtrace_to_file(pid_t tid, DebuggerdDumpType dump_type, int fd) {
307   return dump_backtrace_to_file_timeout(tid, dump_type, 0, fd);
308 }
309 
dump_backtrace_to_file_timeout(pid_t tid,DebuggerdDumpType dump_type,int timeout_secs,int fd)310 int dump_backtrace_to_file_timeout(pid_t tid, DebuggerdDumpType dump_type, int timeout_secs,
311                                    int fd) {
312   android::base::unique_fd copy(dup(fd));
313   if (copy == -1) {
314     return -1;
315   }
316 
317   // debuggerd_trigger_dump results in every thread in the process being interrupted
318   // by a signal, so we need to fetch the wchan data before calling that.
319   std::string wchan_data = get_wchan_data(fd, tid);
320 
321   int timeout_ms = timeout_secs > 0 ? timeout_secs * 1000 : 0;
322   int ret = debuggerd_trigger_dump(tid, dump_type, timeout_ms, std::move(copy)) ? 0 : -1;
323 
324   // Dump wchan data, since only privileged processes (CAP_SYS_ADMIN) can read
325   // kernel stack traces (/proc/*/stack).
326   if (!WriteStringToFd(wchan_data, fd)) {
327     LOG(WARNING) << TAG "Failed to dump wchan data for pid: " << tid;
328   }
329 
330   return ret;
331 }
332