Browse Source

fix race condition in Service.create_container()

The Service.create_container() method fetches a list of the current
containers in order to determine the next container number. In doing so,
it makes several API calls: one to fetch the list of containers, then
one per container in order to inspect it.

In some situations it can happen that a container is removed after
having been listed: in that case, the call to inspect will get a 404 and
raise a NotFound.

One situation in which this has been observed is when trying to
concurrently create multiple one-off containers for the same service
(using `docker-compose run` and a unique `--name`), as described in
more detail in gh-5179.

This patch adds a unit test that simulates the race between the
calls to list and to inspect, and changes Service._next_container_number
to skip removed containers instead of blowing up.

Fixes gh-5179

Signed-off-by: Alberto Piai <[email protected]>
Alberto Piai 7 years ago
parent
commit
394c8efe98
2 changed files with 51 additions and 6 deletions
  1. 18 6
      compose/service.py
  2. 33 0
      tests/unit/service_test.py

+ 18 - 6
compose/service.py

@@ -685,15 +685,27 @@ class Service(object):
     # TODO: this would benefit from github.com/docker/docker/pull/14699
     # to remove the need to inspect every container
     def _next_container_number(self, one_off=False):
-        containers = filter(None, [
-            Container.from_ps(self.client, container)
-            for container in self.client.containers(
-                all=True,
-                filters={'label': self.labels(one_off=one_off)})
-        ])
+        containers = self._fetch_containers(
+            all=True,
+            filters={'label': self.labels(one_off=one_off)}
+        )
         numbers = [c.number for c in containers]
         return 1 if not numbers else max(numbers) + 1
 
+    def _fetch_containers(self, **fetch_options):
+        # Account for containers that might have been removed since we fetched
+        # the list.
+        def soft_inspect(container):
+            try:
+                return Container.from_id(self.client, container['Id'])
+            except NotFound:
+                return None
+
+        return filter(None, [
+            soft_inspect(container)
+            for container in self.client.containers(**fetch_options)
+        ])
+
     def _get_aliases(self, network, container=None):
         return list(
             {self.name} |

+ 33 - 0
tests/unit/service_test.py

@@ -5,6 +5,7 @@ import docker
 import pytest
 from docker.constants import DEFAULT_DOCKER_API_VERSION
 from docker.errors import APIError
+from docker.errors import NotFound
 
 from .. import mock
 from .. import unittest
@@ -888,6 +889,38 @@ class ServiceTest(unittest.TestCase):
             'ftp_proxy': override_options['environment']['FTP_PROXY'],
         }))
 
+    def test_create_when_removed_containers_are_listed(self):
+        # This is aimed at simulating a race between the API call to list the
+        # containers, and the ones to inspect each of the listed containers.
+        # It can happen that a container has been removed after we listed it.
+
+        # containers() returns a container that is about to be removed
+        self.mock_client.containers.return_value = [
+            {'Id': 'rm_cont_id', 'Name': 'rm_cont', 'Image': 'img_id'},
+        ]
+
+        # inspect_container() will raise a NotFound when trying to inspect
+        # rm_cont_id, which at this point has been removed
+        def inspect(name):
+            if name == 'rm_cont_id':
+                raise NotFound(message='Not Found')
+
+            if name == 'new_cont_id':
+                return {'Id': 'new_cont_id'}
+
+            raise NotImplementedError("incomplete mock")
+
+        self.mock_client.inspect_container.side_effect = inspect
+
+        self.mock_client.inspect_image.return_value = {'Id': 'imageid'}
+
+        self.mock_client.create_container.return_value = {'Id': 'new_cont_id'}
+
+        # We should nonetheless be able to create a new container
+        service = Service('foo', client=self.mock_client)
+
+        assert service.create_container().id == 'new_cont_id'
+
 
 class TestServiceNetwork(unittest.TestCase):
     def setUp(self):