changeset: 91293:aa5e3f7a5501 branch: 2.7 parent: 91288:b0c850121ded user: Charles-François Natali date: Fri Jun 20 22:03:08 2014 +0100 files: Lib/SocketServer.py Misc/NEWS description: Issue #21491: SocketServer: Fix a race condition in child processes reaping. diff -r b0c850121ded -r aa5e3f7a5501 Lib/SocketServer.py --- a/Lib/SocketServer.py Fri Jun 20 14:59:07 2014 -0400 +++ b/Lib/SocketServer.py Fri Jun 20 22:03:08 2014 +0100 @@ -513,35 +513,37 @@ def collect_children(self): """Internal routine to wait for children that have exited.""" - if self.active_children is None: return - while len(self.active_children) >= self.max_children: - # XXX: This will wait for any child process, not just ones - # spawned by this library. This could confuse other - # libraries that expect to be able to wait for their own - # children. - try: - pid, status = os.waitpid(0, 0) - except os.error: - pid = None - if pid not in self.active_children: continue - self.active_children.remove(pid) + if self.active_children is None: + return - # XXX: This loop runs more system calls than it ought - # to. There should be a way to put the active_children into a - # process group and then use os.waitpid(-pgid) to wait for any - # of that set, but I couldn't find a way to allocate pgids - # that couldn't collide. - for child in self.active_children: + # If we're above the max number of children, wait and reap them until + # we go back below threshold. Note that we use waitpid(-1) below to be + # able to collect children in size() syscalls instead + # of size(): the downside is that this might reap children + # which we didn't spawn, which is why we only resort to this when we're + # above max_children. + while len(self.active_children) >= self.max_children: try: - pid, status = os.waitpid(child, os.WNOHANG) - except os.error: - pid = None - if not pid: continue + pid, _ = os.waitpid(-1, 0) + self.active_children.discard(pid) + except OSError as e: + if e.errno == errno.ECHILD: + # we don't have any children, we're done + self.active_children.clear() + elif e.errno != errno.EINTR: + break + + # Now reap all defunct children. + for pid in self.active_children.copy(): try: - self.active_children.remove(pid) - except ValueError, e: - raise ValueError('%s. x=%d and list=%r' % (e.message, pid, - self.active_children)) + pid, _ = os.waitpid(pid, os.WNOHANG) + # if the child hasn't exited yet, pid will be 0 and ignored by + # discard() below + self.active_children.discard(pid) + except OSError as e: + if e.errno == errno.ECHILD: + # someone else reaped it + self.active_children.discard(pid) def handle_timeout(self): """Wait for zombies after self.timeout seconds of inactivity. @@ -557,8 +559,8 @@ if pid: # Parent process if self.active_children is None: - self.active_children = [] - self.active_children.append(pid) + self.active_children = set() + self.active_children.add(pid) self.close_request(request) #close handle in parent process return else: diff -r b0c850121ded -r aa5e3f7a5501 Misc/NEWS --- a/Misc/NEWS Fri Jun 20 14:59:07 2014 -0400 +++ b/Misc/NEWS Fri Jun 20 22:03:08 2014 +0100 @@ -29,6 +29,8 @@ Library ------- +- Issue #21491: SocketServer: Fix a race condition in child processes reaping. + - Issue #21722: The distutils "upload" command now exits with a non-zero return code when uploading fails. Patch by Martin Dengler.