changeset: 91294:2a7375bd09f9 branch: 3.4 parent: 91291:ec0aae4df38b user: Charles-François Natali date: Fri Jun 20 22:37:35 2014 +0100 files: Lib/socketserver.py Misc/NEWS description: Issue #21491: socketserver: Fix a race condition in child processes reaping. diff -r ec0aae4df38b -r 2a7375bd09f9 Lib/socketserver.py --- a/Lib/socketserver.py Fri Jun 20 15:16:35 2014 -0400 +++ b/Lib/socketserver.py Fri Jun 20 22:37:35 2014 +0100 @@ -523,35 +523,39 @@ def collect_children(self): """Internal routine to wait for children that have exited.""" - if self.active_children is None: return - while len(self.active_children) >= self.max_children: - # XXX: This will wait for any child process, not just ones - # spawned by this library. This could confuse other - # libraries that expect to be able to wait for their own - # children. - try: - pid, status = os.waitpid(0, 0) - except OSError: - pid = None - if pid not in self.active_children: continue - self.active_children.remove(pid) + if self.active_children is None: + return - # XXX: This loop runs more system calls than it ought - # to. There should be a way to put the active_children into a - # process group and then use os.waitpid(-pgid) to wait for any - # of that set, but I couldn't find a way to allocate pgids - # that couldn't collide. - for child in self.active_children: + # If we're above the max number of children, wait and reap them until + # we go back below threshold. Note that we use waitpid(-1) below to be + # able to collect children in size() syscalls instead + # of size(): the downside is that this might reap children + # which we didn't spawn, which is why we only resort to this when we're + # above max_children. + while len(self.active_children) >= self.max_children: try: - pid, status = os.waitpid(child, os.WNOHANG) + pid, _ = os.waitpid(-1, 0) + self.active_children.discard(pid) + except InterruptedError: + pass + except ChildProcessError: + # we don't have any children, we're done + self.active_children.clear() except OSError: - pid = None - if not pid: continue + break + + # Now reap all defunct children. + for pid in self.active_children.copy(): try: - self.active_children.remove(pid) - except ValueError as e: - raise ValueError('%s. x=%d and list=%r' % (e.message, pid, - self.active_children)) + pid, _ = os.waitpid(pid, os.WNOHANG) + # if the child hasn't exited yet, pid will be 0 and ignored by + # discard() below + self.active_children.discard(pid) + except ChildProcessError: + # someone else reaped it + self.active_children.discard(pid) + except OSError: + pass def handle_timeout(self): """Wait for zombies after self.timeout seconds of inactivity. @@ -573,8 +577,8 @@ if pid: # Parent process if self.active_children is None: - self.active_children = [] - self.active_children.append(pid) + self.active_children = set() + self.active_children.add(pid) self.close_request(request) return else: diff -r ec0aae4df38b -r 2a7375bd09f9 Misc/NEWS --- a/Misc/NEWS Fri Jun 20 15:16:35 2014 -0400 +++ b/Misc/NEWS Fri Jun 20 22:37:35 2014 +0100 @@ -27,6 +27,8 @@ Library ------- +- Issue #21491: socketserver: Fix a race condition in child processes reaping. + - Issue #21722: The distutils "upload" command now exits with a non-zero return code when uploading fails. Patch by Martin Dengler.