1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "IptablesRestoreController"
18 #include "IptablesRestoreController.h"
19 
20 #include <poll.h>
21 #include <signal.h>
22 #include <sys/wait.h>
23 #include <unistd.h>
24 
25 #include <android-base/file.h>
26 #include <android-base/logging.h>
27 #include <android-base/properties.h>
28 #include <netdutils/Syscalls.h>
29 
30 #include "Controllers.h"
31 #include "NetdConstants.h"
32 
33 using android::netdutils::StatusOr;
34 using android::netdutils::sSyscalls;
35 
36 constexpr char IPTABLES_RESTORE_PATH[] = "/system/bin/iptables-restore";
37 constexpr char IP6TABLES_RESTORE_PATH[] = "/system/bin/ip6tables-restore";
38 
39 constexpr char PING[] = "#PING\n";
40 
41 constexpr size_t PING_SIZE = sizeof(PING) - 1;
42 
43 // Not compile-time constants because they are changed by the unit tests.
44 int IptablesRestoreController::MAX_RETRIES = 50;
45 int IptablesRestoreController::POLL_TIMEOUT_MS = 100 * android::base::HwTimeoutMultiplier();
46 
47 class IptablesProcess {
48 public:
IptablesProcess(const IptablesRestoreController::IptablesProcessType type,pid_t pid,int stdIn,int stdOut,int stdErr)49     IptablesProcess(const IptablesRestoreController::IptablesProcessType type,
50             pid_t pid, int stdIn, int stdOut, int stdErr) :
51         type(type),
52         pid(pid),
53         stdIn(stdIn),
54         processTerminated(false) {
55 
56         pollFds[STDOUT_IDX] = { .fd = stdOut, .events = POLLIN };
57         pollFds[STDERR_IDX] = { .fd = stdErr, .events = POLLIN };
58     }
59 
~IptablesProcess()60     ~IptablesProcess() {
61         close(stdIn);
62         close(pollFds[STDOUT_IDX].fd);
63         close(pollFds[STDERR_IDX].fd);
64     }
65 
outputReady()66     bool outputReady() {
67         struct pollfd pollfd = { .fd = stdIn, .events = POLLOUT };
68         int ret = poll(&pollfd, 1, 0);
69         if (ret == -1) {
70             ALOGE("outputReady poll failed: %s", strerror(errno));
71             return false;
72         }
73         return (ret == 1) && !(pollfd.revents & POLLERR);
74     }
75 
stop()76     void stop() {
77         if (processTerminated) return;
78 
79         // This can be called by drainAndWaitForAck (after a POLLHUP) or by sendCommand (if the
80         // process was killed by something else on the system). In both cases, it's safe to send the
81         // PID a SIGTERM, because the PID continues to exist until its parent (i.e., us) calls
82         // waitpid on it, so there's no risk that the PID is reused.
83         ::stopProcess(pid, (type == IptablesRestoreController::IPTABLES_PROCESS) ?
84                 "iptables-restore" : "ip6tables-restore");
85 
86         processTerminated = true;
87     }
88 
89     const IptablesRestoreController::IptablesProcessType type;
90     const pid_t pid;  // NOLINT(misc-non-private-member-variables-in-classes)
91     const int stdIn;  // NOLINT(misc-non-private-member-variables-in-classes)
92 
93     struct pollfd pollFds[2];
94     std::string errBuf;
95 
96     std::atomic_bool processTerminated;
97 
98     static constexpr size_t STDOUT_IDX = 0;
99     static constexpr size_t STDERR_IDX = 1;
100 };
101 
IptablesRestoreController()102 IptablesRestoreController::IptablesRestoreController() {
103     Init();
104 }
105 
~IptablesRestoreController()106 IptablesRestoreController::~IptablesRestoreController() {
107 }
108 
Init()109 void IptablesRestoreController::Init() {
110     // We cannot fork these in parallel or a child process could inherit the pipe fds intended for
111     // use by the other child process. see https://android-review.googlesource.com/469559 for what
112     // breaks. This does not cause a latency hit, because the parent only has to wait for
113     // forkAndExec, which is sub-millisecond, and the child processes then call exec() in parallel.
114     mIpRestore.reset(forkAndExec(IPTABLES_PROCESS));
115     mIp6Restore.reset(forkAndExec(IP6TABLES_PROCESS));
116 }
117 
118 /* static */
forkAndExec(const IptablesProcessType type)119 IptablesProcess* IptablesRestoreController::forkAndExec(const IptablesProcessType type) {
120     const char* const cmd = (type == IPTABLES_PROCESS) ?
121         IPTABLES_RESTORE_PATH : IP6TABLES_RESTORE_PATH;
122 
123     // Create the pipes we'll use for communication with the child
124     // process. One each for the child's in, out and err files.
125     int stdin_pipe[2];
126     int stdout_pipe[2];
127     int stderr_pipe[2];
128 
129     // Assumes stdin, stdout, stderr are already in use.
130     if (pipe2(stdin_pipe,  O_CLOEXEC) == -1 ||
131         pipe2(stdout_pipe, O_NONBLOCK | O_CLOEXEC) == -1 ||
132         pipe2(stderr_pipe, O_NONBLOCK | O_CLOEXEC) == -1) {
133 
134         ALOGE("pipe2() failed: %s", strerror(errno));
135         return nullptr;
136     }
137 
138     const auto& sys = sSyscalls.get();
139     StatusOr<pid_t> child_pid = sys.fork();
140     if (!isOk(child_pid)) {
141         ALOGE("fork() failed: %s", strerror(child_pid.status().code()));
142         return nullptr;
143     }
144 
145     if (child_pid.value() == 0) {
146         // The child process. Reads from stdin, writes to stderr and stdout.
147 
148         // stdin_pipe[0] : The read end of the stdin pipe.
149         // stdout_pipe[1] : The write end of the stdout pipe.
150         // stderr_pipe[1] : The write end of the stderr pipe.
151         // Note: dup2 does not set O_CLOEXEC. std*_pipe[*] is closed by execl.
152         if (dup2(stdin_pipe[0], 0) == -1 ||
153             dup2(stdout_pipe[1], 1) == -1 ||
154             dup2(stderr_pipe[1], 2) == -1) {
155             ALOGE("dup2() failed: %s", strerror(errno));
156             abort();
157         }
158 
159         if (execl(cmd,
160                   cmd,
161                   "--noflush",  // Don't flush the whole table.
162                   "-w",         // Wait instead of failing if the lock is held.
163                   "-v",         // Verbose mode, to make sure our ping is echoed
164                                 // back to us.
165                   nullptr) == -1) {
166             ALOGE("execl(%s, ...) failed: %s", cmd, strerror(errno));
167             abort();
168         }
169 
170         // This statement is unreachable. We abort() upon error, and execl
171         // if everything goes well.
172         return nullptr;
173     }
174 
175     // The parent process.
176 
177     if (close(stdin_pipe[0]) == -1 ||
178         close(stdout_pipe[1]) == -1 ||
179         close(stderr_pipe[1]) == -1) {
180         ALOGW("close() failed: %s", strerror(errno));
181     }
182 
183     // stdin_pipe[1] : The write end of the stdin pipe.
184     // stdout_pipe[0] : The read end of the stdout pipe.
185     // stderr_pipe[0] : The read end of the stderr pipe.
186     return new IptablesProcess(type,
187             child_pid.value(), stdin_pipe[1], stdout_pipe[0], stderr_pipe[0]);
188 }
189 
190 // TODO: Return -errno on failure instead of -1.
191 // TODO: Maybe we should keep a rotating buffer of the last N commands
192 // so that they can be dumped on dumpsys.
sendCommand(const IptablesProcessType type,const std::string & command,std::string * output)193 int IptablesRestoreController::sendCommand(const IptablesProcessType type,
194                                            const std::string& command,
195                                            std::string *output) {
196    std::unique_ptr<IptablesProcess> *process =
197            (type == IPTABLES_PROCESS) ? &mIpRestore : &mIp6Restore;
198 
199 
200     // We might need to fork a new process if we haven't forked one yet, or
201     // if the forked process terminated.
202     //
203     // NOTE: For a given command, this is the last point at which we try to
204     // recover from a child death. If the child dies at some later point during
205     // the execution of this method, we will receive an EPIPE and return an
206     // error. The command will then need to be retried at a higher level.
207     IptablesProcess *existingProcess = process->get();
208     if (existingProcess != nullptr && !existingProcess->outputReady()) {
209         existingProcess->stop();
210         existingProcess = nullptr;
211     }
212 
213     if (existingProcess == nullptr) {
214         // Fork a new iptables[6]-restore process.
215         IptablesProcess *newProcess = IptablesRestoreController::forkAndExec(type);
216         if (newProcess == nullptr) {
217             LOG(ERROR) << "Unable to fork ip[6]tables-restore, type: " << type;
218             return -1;
219         }
220 
221         process->reset(newProcess);
222     }
223 
224     if (!android::base::WriteFully((*process)->stdIn, command.data(), command.length())) {
225         ALOGE("Unable to send command: %s", strerror(errno));
226         return -1;
227     }
228 
229     if (!android::base::WriteFully((*process)->stdIn, PING, PING_SIZE)) {
230         ALOGE("Unable to send ping command: %s", strerror(errno));
231         return -1;
232     }
233 
234     if (!drainAndWaitForAck(*process, command, output)) {
235         // drainAndWaitForAck has already logged an error.
236         return -1;
237     }
238 
239     return 0;
240 }
241 
maybeLogStderr(const std::unique_ptr<IptablesProcess> & process,const std::string & command)242 void IptablesRestoreController::maybeLogStderr(const std::unique_ptr<IptablesProcess> &process,
243                                                const std::string& command) {
244     if (process->errBuf.empty()) {
245         return;
246     }
247 
248     ALOGE("iptables error:");
249     ALOGE("------- COMMAND -------");
250     ALOGE("%s", command.c_str());
251     ALOGE("-------  ERROR -------");
252     ALOGE("%s", process->errBuf.c_str());
253     ALOGE("----------------------");
254     process->errBuf.clear();
255 }
256 
257 /* static */
drainAndWaitForAck(const std::unique_ptr<IptablesProcess> & process,const std::string & command,std::string * output)258 bool IptablesRestoreController::drainAndWaitForAck(const std::unique_ptr<IptablesProcess> &process,
259                                                    const std::string& command,
260                                                    std::string *output) {
261     bool receivedAck = false;
262     int timeout = 0;
263     while (!receivedAck && (timeout++ < MAX_RETRIES)) {
264         int numEvents = TEMP_FAILURE_RETRY(
265             poll(process->pollFds, ARRAY_SIZE(process->pollFds), POLL_TIMEOUT_MS));
266         if (numEvents == -1) {
267             ALOGE("Poll failed: %s", strerror(errno));
268             return false;
269         }
270 
271         // We've timed out, which means something has gone wrong - we know that stdout should have
272         // become available to read with the ACK message, or that stderr should have been available
273         // to read with an error message.
274         if (numEvents == 0) {
275             continue;
276         }
277 
278         char buffer[PIPE_BUF];
279         for (size_t i = 0; i < ARRAY_SIZE(process->pollFds); ++i) {
280             const struct pollfd &pollfd = process->pollFds[i];
281             if (pollfd.revents & POLLIN) {
282                 ssize_t size;
283                 do {
284                     size = TEMP_FAILURE_RETRY(read(pollfd.fd, buffer, sizeof(buffer)));
285 
286                     if (size == -1) {
287                         if (errno != EAGAIN) {
288                             ALOGE("Unable to read from descriptor: %s", strerror(errno));
289                         }
290                         break;
291                     }
292 
293                     if (i == IptablesProcess::STDOUT_IDX) {
294                         // i == STDOUT_IDX: accumulate stdout into *output, and look
295                         // for the ping response.
296                         output->append(buffer, size);
297                         size_t pos = output->find(PING);
298                         if (pos != std::string::npos) {
299                             if (output->size() > pos + PING_SIZE) {
300                                 size_t extra = output->size() - (pos + PING_SIZE);
301                                 ALOGW("%zd extra characters after iptables response: '%s...'",
302                                       extra, output->substr(pos + PING_SIZE, 128).c_str());
303                             }
304                             output->resize(pos);
305                             receivedAck = true;
306                         }
307                     } else {
308                         // i == STDERR_IDX: accumulate stderr into errBuf.
309                         process->errBuf.append(buffer, size);
310                     }
311                 } while (size > 0);
312             }
313             if (pollfd.revents & POLLHUP) {
314                 // The pipe was closed. This likely means the subprocess is exiting, since
315                 // iptables-restore only closes stdin on error.
316                 process->stop();
317                 break;
318             }
319         }
320     }
321 
322     if (!receivedAck && !process->processTerminated) {
323         ALOGE("Timed out waiting for response from iptables process %d", process->pid);
324         // Kill the process so that if it eventually recovers, we don't misinterpret the ping
325         // response (or any output) of the command we just sent as coming from future commands.
326         process->stop();
327     }
328 
329     maybeLogStderr(process, command);
330 
331     return receivedAck;
332 }
333 
execute(const IptablesTarget target,const std::string & command,std::string * output)334 int IptablesRestoreController::execute(const IptablesTarget target, const std::string& command,
335                                        std::string *output) {
336     std::lock_guard lock(mLock);
337 
338     std::string buffer;
339     if (output == nullptr) {
340         output = &buffer;
341     } else {
342         output->clear();
343     }
344 
345     int res = 0;
346     if (target == V4 || target == V4V6) {
347         res |= sendCommand(IPTABLES_PROCESS, command, output);
348     }
349     if (target == V6 || target == V4V6) {
350         res |= sendCommand(IP6TABLES_PROCESS, command, output);
351     }
352     return res;
353 }
354 
getIpRestorePid(const IptablesProcessType type)355 int IptablesRestoreController::getIpRestorePid(const IptablesProcessType type) {
356     return type == IPTABLES_PROCESS ? mIpRestore->pid : mIp6Restore->pid;
357 }
358