|
57 | 57 | import signal |
58 | 58 | import subprocess |
59 | 59 | import threading |
| 60 | +import time |
60 | 61 |
|
61 | 62 | try: |
62 | 63 | from pathlib import PurePath |
@@ -1249,61 +1250,36 @@ def __init__(self, *args, **kwargs): |
1249 | 1250 | # blocked on wait(). |
1250 | 1251 | self._condvar = threading.Condition(threading.Lock()) |
1251 | 1252 |
|
1252 | | - def wait(self): |
1253 | | - # Take the condvar/lock. If we need to do a blocking wait, we'll |
1254 | | - # release it first. |
| 1253 | + def wait(self, timeout=None): |
| 1254 | + deadline = time.monotonic() + timeout if timeout is not None else None |
| 1255 | + # Start by taking the condvar/lock, but note that we need to release it |
| 1256 | + # before waiting, or else we'd block .poll(), .kill(), and other calls |
| 1257 | + # to .wait(). |
1255 | 1258 | with self._condvar: |
1256 | | - # As long as another thread is in a blocking wait, sleep on the |
1257 | | - # condvar. Once we wake from this sleep, it's overwhelmingly likely |
1258 | | - # that the child will have been reaped, but we'll still check in |
1259 | | - # case waiting fails somehow. |
1260 | | - while self._someone_is_waiting: |
1261 | | - self._condvar.wait() |
1262 | | - |
1263 | | - # Check whether the child has already been reaped. (The .poll() |
1264 | | - # below actually does the same check internally, so we could skip |
1265 | | - # this, but it feels clearer to me to check it.) |
1266 | | - if self._child.returncode is not None: |
1267 | | - return self._child.returncode |
1268 | | - |
1269 | | - # We have the condvar/lock, and we're the thread responsible for |
1270 | | - # waiting on the child. We're about to release the lock and do a |
1271 | | - # blocking, non-reaping wait. However, there's another subtle race |
1272 | | - # condition we need to worry about, apart from the kill/wait PID |
1273 | | - # reuse race that this class is all about. Here's a possible order |
1274 | | - # of events: |
1275 | | - # 1. The child process exits. |
1276 | | - # 2. Lots of time passes. Now any waiter will certainly see that |
1277 | | - # the child has exited. |
1278 | | - # 3. One thread calls .wait(). It acquires the lock, sets |
1279 | | - # _someone_is_waiting to True, and releases the lock, in |
1280 | | - # preparation for attempting its blocking wait. |
1281 | | - # 4. Suddently, another thread swoops in and calls .poll(). |
1282 | | - # That thread acquires the lock, sees _someone_is_waiting, and |
1283 | | - # returns None. ***THIS IS A BUG.*** |
1284 | | - # 5. The first thread sees that the child has exited, reacquires |
1285 | | - # the lock, and cleans up. |
1286 | | - # If the call to .poll() were racing against the child's actual |
1287 | | - # exit, then we wouldn't care what it returned. That would be an |
1288 | | - # "honest" race, and it would be correct for the result to be a |
1289 | | - # coin flip. But that's not what happens in this situation. Here |
1290 | | - # the child has definitely exited, maybe seconds or minutes ago, |
1291 | | - # and a single call to .poll() would certainly have returned |
1292 | | - # non-None. It's only by racing against .wait() that this situation |
1293 | | - # could incorrectly report that the child hasn't exited. |
1294 | | - # |
1295 | | - # To fix this, .wait() must do a non-blocking wait (that is, |
1296 | | - # actually check the status of the child process) *before* |
1297 | | - # releasing the lock. (In fact, doing it before acquiring the lock |
1298 | | - # could also be correct.) If that returns false, then the only |
1299 | | - # possible race is a race against the child itself, where again |
1300 | | - # it's expected and fine for the result to be a coin flip. |
1301 | | - if self._child.poll() is not None: |
1302 | | - return self._child.returncode |
| 1259 | + while True: |
| 1260 | + now = time.monotonic() |
| 1261 | + if self._child.returncode is not None: |
| 1262 | + # The child has already been reaped. Return its saved exit |
| 1263 | + # status. |
| 1264 | + return self._child.returncode |
| 1265 | + if deadline is not None and time.monotonic() > deadline: |
| 1266 | + # The deadline has passed. Poll the child on the way out, |
| 1267 | + # to make sure we always poll at least once. |
| 1268 | + return self.poll() |
| 1269 | + if self._someone_is_waiting: |
| 1270 | + # There is another blocking waiter. Sleep until it signals |
| 1271 | + # the condvar or the deadline passes. Spurious wakeups are |
| 1272 | + # acceptable here. |
| 1273 | + condvar_timeout = deadline - now if deadline is not None else None |
| 1274 | + self._condvar.wait(condvar_timeout) |
| 1275 | + else: |
| 1276 | + # There are no other blocking waiters. Proceed to the |
| 1277 | + # blocking wait. |
| 1278 | + break |
1303 | 1279 |
|
1304 | | - # Finally, before releasing the condvar/lock, set |
1305 | | - # _someone_is_waiting. We must unset it before returning, or else |
1306 | | - # we'll deadlock other callers. |
| 1280 | + # We are the blocking waiter. Set the _someone_is_waiting flag and |
| 1281 | + # release the lock (dedent) before blocking. After this, we must |
| 1282 | + # clear this flag and notify the condvar before returning. |
1307 | 1283 | self._someone_is_waiting = True |
1308 | 1284 |
|
1309 | 1285 | # Dedent to release the condvar/lock, and do the blocking wait. We have |
|
0 commit comments