1 /*
2 * Copyright 2016, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <fcntl.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <sys/stat.h>
21 #include <sys/types.h>
22 #include <unistd.h>
23
24 #include <array>
25 #include <deque>
26 #include <string>
27 #include <unordered_map>
28 #include <utility>
29
30 #include <event2/event.h>
31 #include <event2/listener.h>
32 #include <event2/thread.h>
33
34 #include <android-base/cmsg.h>
35 #include <android-base/logging.h>
36 #include <android-base/properties.h>
37 #include <android-base/stringprintf.h>
38 #include <android-base/unique_fd.h>
39 #include <cutils/sockets.h>
40
41 #include "debuggerd/handler.h"
42 #include "dump_type.h"
43 #include "protocol.h"
44 #include "util.h"
45
46 #include "intercept_manager.h"
47
48 using android::base::GetIntProperty;
49 using android::base::SendFileDescriptors;
50 using android::base::StringPrintf;
51
52 using android::base::borrowed_fd;
53 using android::base::unique_fd;
54
55 static InterceptManager* intercept_manager;
56
57 enum CrashStatus {
58 kCrashStatusRunning,
59 kCrashStatusQueued,
60 };
61
62 struct CrashArtifact {
63 unique_fd fd;
64
devnullCrashArtifact65 static CrashArtifact devnull() {
66 CrashArtifact result;
67 result.fd.reset(open("/dev/null", O_WRONLY | O_CLOEXEC));
68 return result;
69 }
70 };
71
72 struct CrashArtifactPaths {
73 std::string text;
74 std::optional<std::string> proto;
75 };
76
77 struct CrashOutput {
78 CrashArtifact text;
79 std::optional<CrashArtifact> proto;
80 };
81
82 // Ownership of Crash is a bit messy.
83 // It's either owned by an active event that must have a timeout, or owned by
84 // queued_requests, in the case that multiple crashes come in at the same time.
85 struct Crash {
~CrashCrash86 ~Crash() { event_free(crash_event); }
87
88 CrashOutput output;
89 unique_fd crash_socket_fd;
90 pid_t crash_pid;
91 event* crash_event = nullptr;
92
93 DebuggerdDumpType crash_type;
94 };
95
96 class CrashQueue {
97 public:
CrashQueue(const std::string & dir_path,const std::string & file_name_prefix,size_t max_artifacts,size_t max_concurrent_dumps,bool supports_proto,bool world_readable)98 CrashQueue(const std::string& dir_path, const std::string& file_name_prefix, size_t max_artifacts,
99 size_t max_concurrent_dumps, bool supports_proto, bool world_readable)
100 : file_name_prefix_(file_name_prefix),
101 dir_path_(dir_path),
102 dir_fd_(open(dir_path.c_str(), O_DIRECTORY | O_RDONLY | O_CLOEXEC)),
103 max_artifacts_(max_artifacts),
104 next_artifact_(0),
105 max_concurrent_dumps_(max_concurrent_dumps),
106 num_concurrent_dumps_(0),
107 supports_proto_(supports_proto),
108 world_readable_(world_readable) {
109 if (dir_fd_ == -1) {
110 PLOG(FATAL) << "failed to open directory: " << dir_path;
111 }
112
113 // NOTE: If max_artifacts_ <= max_concurrent_dumps_, then theoretically the
114 // same filename could be handed out to multiple processes.
115 CHECK(max_artifacts_ > max_concurrent_dumps_);
116
117 find_oldest_artifact();
118 }
119
for_crash(const Crash * crash)120 static CrashQueue* for_crash(const Crash* crash) {
121 return (crash->crash_type == kDebuggerdJavaBacktrace) ? for_anrs() : for_tombstones();
122 }
123
for_crash(const std::unique_ptr<Crash> & crash)124 static CrashQueue* for_crash(const std::unique_ptr<Crash>& crash) {
125 return for_crash(crash.get());
126 }
127
for_tombstones()128 static CrashQueue* for_tombstones() {
129 static CrashQueue queue("/data/tombstones", "tombstone_" /* file_name_prefix */,
130 GetIntProperty("tombstoned.max_tombstone_count", 32),
131 1 /* max_concurrent_dumps */, true /* supports_proto */,
132 true /* world_readable */);
133 return &queue;
134 }
135
for_anrs()136 static CrashQueue* for_anrs() {
137 static CrashQueue queue("/data/anr", "trace_" /* file_name_prefix */,
138 GetIntProperty("tombstoned.max_anr_count", 64),
139 4 /* max_concurrent_dumps */, false /* supports_proto */,
140 false /* world_readable */);
141 return &queue;
142 }
143
create_temporary_file() const144 CrashArtifact create_temporary_file() const {
145 CrashArtifact result;
146
147 std::optional<std::string> path;
148 result.fd.reset(openat(dir_fd_, ".", O_WRONLY | O_APPEND | O_TMPFILE | O_CLOEXEC, 0660));
149 if (result.fd == -1) {
150 PLOG(FATAL) << "failed to create temporary tombstone in " << dir_path_;
151 }
152
153 if (world_readable_) {
154 // We need to fchmodat after creating to avoid getting the umask applied.
155 std::string fd_path = StringPrintf("/proc/self/fd/%d", result.fd.get());
156 if (fchmodat(dir_fd_, fd_path.c_str(), 0664, 0) != 0) {
157 PLOG(ERROR) << "Failed to make tombstone world-readable";
158 }
159 }
160
161 return std::move(result);
162 }
163
get_output(DebuggerdDumpType dump_type)164 std::optional<CrashOutput> get_output(DebuggerdDumpType dump_type) {
165 CrashOutput result;
166
167 switch (dump_type) {
168 case kDebuggerdNativeBacktrace:
169 // Don't generate tombstones for native backtrace requests.
170 return {};
171
172 case kDebuggerdTombstoneProto:
173 if (!supports_proto_) {
174 LOG(ERROR) << "received kDebuggerdTombstoneProto on a queue that doesn't support proto";
175 return {};
176 }
177 result.proto = create_temporary_file();
178 result.text = create_temporary_file();
179 break;
180
181 case kDebuggerdJavaBacktrace:
182 case kDebuggerdTombstone:
183 result.text = create_temporary_file();
184 break;
185
186 default:
187 LOG(ERROR) << "unexpected dump type: " << dump_type;
188 return {};
189 }
190
191 return result;
192 }
193
dir_fd()194 borrowed_fd dir_fd() { return dir_fd_; }
195
get_next_artifact_paths()196 CrashArtifactPaths get_next_artifact_paths() {
197 CrashArtifactPaths result;
198 result.text = StringPrintf("%s%02d", file_name_prefix_.c_str(), next_artifact_);
199
200 if (supports_proto_) {
201 result.proto = StringPrintf("%s%02d.pb", file_name_prefix_.c_str(), next_artifact_);
202 }
203
204 next_artifact_ = (next_artifact_ + 1) % max_artifacts_;
205 return result;
206 }
207
208 // Consumes crash if it returns true, otherwise leaves it untouched.
maybe_enqueue_crash(std::unique_ptr<Crash> && crash)209 bool maybe_enqueue_crash(std::unique_ptr<Crash>&& crash) {
210 if (num_concurrent_dumps_ == max_concurrent_dumps_) {
211 queued_requests_.emplace_back(std::move(crash));
212 return true;
213 }
214
215 return false;
216 }
217
maybe_dequeue_crashes(void (* handler)(std::unique_ptr<Crash> crash))218 void maybe_dequeue_crashes(void (*handler)(std::unique_ptr<Crash> crash)) {
219 while (!queued_requests_.empty() && num_concurrent_dumps_ < max_concurrent_dumps_) {
220 std::unique_ptr<Crash> next_crash = std::move(queued_requests_.front());
221 queued_requests_.pop_front();
222 handler(std::move(next_crash));
223 }
224 }
225
on_crash_started()226 void on_crash_started() { ++num_concurrent_dumps_; }
227
on_crash_completed()228 void on_crash_completed() { --num_concurrent_dumps_; }
229
230 private:
find_oldest_artifact()231 void find_oldest_artifact() {
232 size_t oldest_tombstone = 0;
233 time_t oldest_time = std::numeric_limits<time_t>::max();
234
235 for (size_t i = 0; i < max_artifacts_; ++i) {
236 std::string path =
237 StringPrintf("%s/%s%02zu", dir_path_.c_str(), file_name_prefix_.c_str(), i);
238 struct stat st;
239 if (stat(path.c_str(), &st) != 0) {
240 if (errno == ENOENT) {
241 oldest_tombstone = i;
242 break;
243 } else {
244 PLOG(ERROR) << "failed to stat " << path;
245 continue;
246 }
247 }
248
249 if (st.st_mtime < oldest_time) {
250 oldest_tombstone = i;
251 oldest_time = st.st_mtime;
252 }
253 }
254
255 next_artifact_ = oldest_tombstone;
256 }
257
258 const std::string file_name_prefix_;
259
260 const std::string dir_path_;
261 const int dir_fd_;
262
263 const size_t max_artifacts_;
264 int next_artifact_;
265
266 const size_t max_concurrent_dumps_;
267 size_t num_concurrent_dumps_;
268
269 bool supports_proto_;
270 bool world_readable_;
271
272 std::deque<std::unique_ptr<Crash>> queued_requests_;
273
274 DISALLOW_COPY_AND_ASSIGN(CrashQueue);
275 };
276
277 // Whether java trace dumps are produced via tombstoned.
278 static constexpr bool kJavaTraceDumpsEnabled = true;
279
280 // Forward declare the callbacks so they can be placed in a sensible order.
281 static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
282 void*);
283 static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg);
284 static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg);
285
perform_request(std::unique_ptr<Crash> crash)286 static void perform_request(std::unique_ptr<Crash> crash) {
287 unique_fd output_fd;
288 if (intercept_manager->FindIntercept(crash->crash_pid, crash->crash_type, &output_fd)) {
289 if (crash->crash_type == kDebuggerdTombstoneProto) {
290 crash->output.proto = CrashArtifact::devnull();
291 }
292 } else {
293 if (auto o = CrashQueue::for_crash(crash.get())->get_output(crash->crash_type); o) {
294 crash->output = std::move(*o);
295 output_fd.reset(dup(crash->output.text.fd));
296 } else {
297 LOG(ERROR) << "failed to get crash output for type " << crash->crash_type;
298 return;
299 }
300 }
301
302 TombstonedCrashPacket response = {.packet_type = CrashPacketType::kPerformDump};
303
304 ssize_t rc = -1;
305 if (crash->output.proto) {
306 rc = SendFileDescriptors(crash->crash_socket_fd, &response, sizeof(response), output_fd.get(),
307 crash->output.proto->fd.get());
308 } else {
309 rc = SendFileDescriptors(crash->crash_socket_fd, &response, sizeof(response), output_fd.get());
310 }
311
312 output_fd.reset();
313
314 if (rc == -1) {
315 PLOG(WARNING) << "failed to send response to CrashRequest";
316 return;
317 } else if (rc != sizeof(response)) {
318 PLOG(WARNING) << "crash socket write returned short";
319 return;
320 }
321
322 // TODO: Make this configurable by the interceptor?
323 struct timeval timeout = {10 * android::base::HwTimeoutMultiplier(), 0};
324
325 event_base* base = event_get_base(crash->crash_event);
326
327 event_assign(crash->crash_event, base, crash->crash_socket_fd, EV_TIMEOUT | EV_READ,
328 crash_completed_cb, crash.get());
329 event_add(crash->crash_event, &timeout);
330 CrashQueue::for_crash(crash)->on_crash_started();
331
332 // The crash is now owned by the event loop.
333 crash.release();
334 }
335
crash_accept_cb(evconnlistener * listener,evutil_socket_t sockfd,sockaddr *,int,void *)336 static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
337 void*) {
338 event_base* base = evconnlistener_get_base(listener);
339 Crash* crash = new Crash();
340
341 // TODO: Make sure that only java crashes come in on the java socket
342 // and only native crashes on the native socket.
343 struct timeval timeout = {1 * android::base::HwTimeoutMultiplier(), 0};
344 event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash);
345 crash->crash_socket_fd.reset(sockfd);
346 crash->crash_event = crash_event;
347 event_add(crash_event, &timeout);
348 }
349
crash_request_cb(evutil_socket_t sockfd,short ev,void * arg)350 static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
351 std::unique_ptr<Crash> crash(static_cast<Crash*>(arg));
352 TombstonedCrashPacket request = {};
353
354 if ((ev & EV_TIMEOUT) != 0) {
355 LOG(WARNING) << "crash request timed out";
356 return;
357 } else if ((ev & EV_READ) == 0) {
358 LOG(WARNING) << "tombstoned received unexpected event from crash socket";
359 return;
360 }
361
362 ssize_t rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
363 if (rc == -1) {
364 PLOG(WARNING) << "failed to read from crash socket";
365 return;
366 } else if (rc != sizeof(request)) {
367 LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
368 << sizeof(request) << ")";
369 return;
370 }
371
372 if (request.packet_type != CrashPacketType::kDumpRequest) {
373 LOG(WARNING) << "unexpected crash packet type, expected kDumpRequest, received "
374 << StringPrintf("%#2hhX", request.packet_type);
375 return;
376 }
377
378 crash->crash_type = request.packet.dump_request.dump_type;
379 if (crash->crash_type < 0 || crash->crash_type > kDebuggerdTombstoneProto) {
380 LOG(WARNING) << "unexpected crash dump type: " << crash->crash_type;
381 return;
382 }
383
384 if (crash->crash_type != kDebuggerdJavaBacktrace) {
385 crash->crash_pid = request.packet.dump_request.pid;
386 } else {
387 // Requests for java traces are sent from untrusted processes, so we
388 // must not trust the PID sent down with the request. Instead, we ask the
389 // kernel.
390 ucred cr = {};
391 socklen_t len = sizeof(cr);
392 int ret = getsockopt(sockfd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
393 if (ret != 0) {
394 PLOG(ERROR) << "Failed to getsockopt(..SO_PEERCRED)";
395 return;
396 }
397
398 crash->crash_pid = cr.pid;
399 }
400
401 pid_t crash_pid = crash->crash_pid;
402 LOG(INFO) << "received crash request for pid " << crash_pid;
403
404 if (CrashQueue::for_crash(crash)->maybe_enqueue_crash(std::move(crash))) {
405 LOG(INFO) << "enqueueing crash request for pid " << crash_pid;
406 } else {
407 perform_request(std::move(crash));
408 }
409 }
410
rename_tombstone_fd(borrowed_fd fd,borrowed_fd dirfd,const std::string & path)411 static bool rename_tombstone_fd(borrowed_fd fd, borrowed_fd dirfd, const std::string& path) {
412 // Always try to unlink the tombstone file.
413 // linkat doesn't let us replace a file, so we need to unlink before linking
414 // our results onto disk, and if we fail for some reason, we should delete
415 // stale tombstones to avoid confusing inconsistency.
416 int rc = unlinkat(dirfd.get(), path.c_str(), 0);
417 if (rc != 0 && errno != ENOENT) {
418 PLOG(ERROR) << "failed to unlink tombstone at " << path;
419 return false;
420 }
421
422 // This fd is created inside of dirfd in CrashQueue::create_temporary_file.
423 std::string fd_path = StringPrintf("/proc/self/fd/%d", fd.get());
424 rc = linkat(AT_FDCWD, fd_path.c_str(), dirfd.get(), path.c_str(), AT_SYMLINK_FOLLOW);
425 if (rc != 0) {
426 PLOG(ERROR) << "failed to link tombstone at " << path;
427 return false;
428 }
429 return true;
430 }
431
crash_completed(borrowed_fd sockfd,std::unique_ptr<Crash> crash)432 static void crash_completed(borrowed_fd sockfd, std::unique_ptr<Crash> crash) {
433 TombstonedCrashPacket request = {};
434 CrashQueue* queue = CrashQueue::for_crash(crash);
435
436 ssize_t rc = TEMP_FAILURE_RETRY(read(sockfd.get(), &request, sizeof(request)));
437 if (rc == -1) {
438 PLOG(WARNING) << "failed to read from crash socket";
439 return;
440 } else if (rc != sizeof(request)) {
441 LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
442 << sizeof(request) << ")";
443 return;
444 }
445
446 if (request.packet_type != CrashPacketType::kCompletedDump) {
447 LOG(WARNING) << "unexpected crash packet type, expected kCompletedDump, received "
448 << uint32_t(request.packet_type);
449 return;
450 }
451
452 if (crash->output.text.fd == -1) {
453 LOG(WARNING) << "skipping tombstone file creation due to intercept";
454 return;
455 }
456
457 CrashArtifactPaths paths = queue->get_next_artifact_paths();
458
459 if (crash->output.proto && crash->output.proto->fd != -1) {
460 if (!paths.proto) {
461 LOG(ERROR) << "missing path for proto tombstone";
462 } else {
463 rename_tombstone_fd(crash->output.proto->fd, queue->dir_fd(), *paths.proto);
464 }
465 }
466
467 if (rename_tombstone_fd(crash->output.text.fd, queue->dir_fd(), paths.text)) {
468 if (crash->crash_type == kDebuggerdJavaBacktrace) {
469 LOG(ERROR) << "Traces for pid " << crash->crash_pid << " written to: " << paths.text;
470 } else {
471 // NOTE: Several tools parse this log message to figure out where the
472 // tombstone associated with a given native crash was written. Any changes
473 // to this message must be carefully considered.
474 LOG(ERROR) << "Tombstone written to: " << paths.text;
475 }
476 }
477 }
478
crash_completed_cb(evutil_socket_t sockfd,short ev,void * arg)479 static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) {
480 std::unique_ptr<Crash> crash(static_cast<Crash*>(arg));
481 CrashQueue* queue = CrashQueue::for_crash(crash);
482
483 queue->on_crash_completed();
484
485 if ((ev & EV_READ) == EV_READ) {
486 crash_completed(sockfd, std::move(crash));
487 }
488
489 // If there's something queued up, let them proceed.
490 queue->maybe_dequeue_crashes(perform_request);
491 }
492
main(int,char * [])493 int main(int, char* []) {
494 umask(0117);
495
496 // Don't try to connect to ourselves if we crash.
497 struct sigaction action = {};
498 action.sa_handler = [](int signal) {
499 LOG(ERROR) << "received fatal signal " << signal;
500 _exit(1);
501 };
502 debuggerd_register_handlers(&action);
503
504 int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName);
505 int crash_socket = android_get_control_socket(kTombstonedCrashSocketName);
506
507 if (intercept_socket == -1 || crash_socket == -1) {
508 PLOG(FATAL) << "failed to get socket from init";
509 }
510
511 evutil_make_socket_nonblocking(intercept_socket);
512 evutil_make_socket_nonblocking(crash_socket);
513
514 event_base* base = event_base_new();
515 if (!base) {
516 LOG(FATAL) << "failed to create event_base";
517 }
518
519 intercept_manager = new InterceptManager(base, intercept_socket);
520
521 evconnlistener* tombstone_listener =
522 evconnlistener_new(base, crash_accept_cb, CrashQueue::for_tombstones(), LEV_OPT_CLOSE_ON_FREE,
523 -1 /* backlog */, crash_socket);
524 if (!tombstone_listener) {
525 LOG(FATAL) << "failed to create evconnlistener for tombstones.";
526 }
527
528 if (kJavaTraceDumpsEnabled) {
529 const int java_trace_socket = android_get_control_socket(kTombstonedJavaTraceSocketName);
530 if (java_trace_socket == -1) {
531 PLOG(FATAL) << "failed to get socket from init";
532 }
533
534 evutil_make_socket_nonblocking(java_trace_socket);
535 evconnlistener* java_trace_listener =
536 evconnlistener_new(base, crash_accept_cb, CrashQueue::for_anrs(), LEV_OPT_CLOSE_ON_FREE,
537 -1 /* backlog */, java_trace_socket);
538 if (!java_trace_listener) {
539 LOG(FATAL) << "failed to create evconnlistener for java traces.";
540 }
541 }
542
543 LOG(INFO) << "tombstoned successfully initialized";
544 event_base_dispatch(base);
545 }
546