db_locks_monitor_test.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. # --- BEGIN COPYRIGHT BLOCK ---
  2. # Copyright (C) 2021 Red Hat, Inc.
  3. # All rights reserved.
  4. #
  5. # License: GPL (version 3 or any later version).
  6. # See LICENSE for details.
  7. # --- END COPYRIGHT BLOCK ---
  8. #
  9. import logging
  10. import pytest
  11. import datetime
  12. import subprocess
  13. from multiprocessing import Process, Queue
  14. from lib389 import pid_from_file
  15. from lib389.utils import ldap, os
  16. from lib389._constants import DEFAULT_SUFFIX, ReplicaRole
  17. from lib389.cli_base import LogCapture
  18. from lib389.idm.user import UserAccounts
  19. from lib389.idm.organizationalunit import OrganizationalUnits
  20. from lib389.tasks import AccessLog
  21. from lib389.backend import Backends
  22. from lib389.ldclt import Ldclt
  23. from lib389.dbgen import dbgen_users
  24. from lib389.tasks import ImportTask
  25. from lib389.index import Indexes
  26. from lib389.plugins import AttributeUniquenessPlugin
  27. from lib389.config import BDB_LDBMConfig
  28. from lib389.monitor import MonitorLDBM
  29. from lib389.topologies import create_topology, _remove_ssca_db
  30. pytestmark = pytest.mark.tier2
  31. db_locks_monitoring_ack = pytest.mark.skipif(not os.environ.get('DB_LOCKS_MONITORING_ACK', False),
  32. reason="DB locks monitoring tests may take hours if the feature is not present or another failure exists. "
  33. "Also, the feature requires a big amount of space as we set nsslapd-db-locks to 1300000.")
  34. DEBUGGING = os.getenv('DEBUGGING', default=False)
  35. if DEBUGGING:
  36. logging.getLogger(__name__).setLevel(logging.DEBUG)
  37. else:
  38. logging.getLogger(__name__).setLevel(logging.INFO)
  39. log = logging.getLogger(__name__)
  40. def _kill_ns_slapd(inst):
  41. pid = str(pid_from_file(inst.ds_paths.pid_file))
  42. cmd = ['kill', '-9', pid]
  43. subprocess.Popen(cmd, stdout=subprocess.PIPE)
  44. @pytest.fixture(scope="function")
  45. def topology_st_fn(request):
  46. """Create DS standalone instance for each test case"""
  47. topology = create_topology({ReplicaRole.STANDALONE: 1})
  48. def fin():
  49. # Kill the hanging process at the end of test to prevent failures in the following tests
  50. if DEBUGGING:
  51. [_kill_ns_slapd(inst) for inst in topology]
  52. else:
  53. [_kill_ns_slapd(inst) for inst in topology]
  54. assert _remove_ssca_db(topology)
  55. [inst.stop() for inst in topology if inst.exists()]
  56. [inst.delete() for inst in topology if inst.exists()]
  57. request.addfinalizer(fin)
  58. topology.logcap = LogCapture()
  59. return topology
  60. @pytest.fixture(scope="function")
  61. def setup_attruniq_index_be_import(topology_st_fn):
  62. """Enable Attribute Uniqueness, disable indexes and
  63. import 120000 entries to the default backend
  64. """
  65. inst = topology_st_fn.standalone
  66. inst.config.loglevel([AccessLog.DEFAULT, AccessLog.INTERNAL], service='access')
  67. inst.config.set('nsslapd-plugin-logging', 'on')
  68. inst.restart()
  69. attruniq = AttributeUniquenessPlugin(inst, dn="cn=attruniq,cn=plugins,cn=config")
  70. attruniq.create(properties={'cn': 'attruniq'})
  71. for cn in ['uid', 'cn', 'sn', 'uidNumber', 'gidNumber', 'homeDirectory', 'givenName', 'description']:
  72. attruniq.add_unique_attribute(cn)
  73. attruniq.add_unique_subtree(DEFAULT_SUFFIX)
  74. attruniq.enable_all_subtrees()
  75. attruniq.enable()
  76. indexes = Indexes(inst)
  77. for cn in ['uid', 'cn', 'sn', 'uidNumber', 'gidNumber', 'homeDirectory', 'givenName', 'description']:
  78. indexes.ensure_state(properties={
  79. 'cn': cn,
  80. 'nsSystemIndex': 'false',
  81. 'nsIndexType': 'none'})
  82. bdb_config = BDB_LDBMConfig(inst)
  83. bdb_config.replace("nsslapd-db-locks", "130000")
  84. inst.restart()
  85. ldif_dir = inst.get_ldif_dir()
  86. import_ldif = ldif_dir + '/perf_import.ldif'
  87. # Valid online import
  88. import_task = ImportTask(inst)
  89. dbgen_users(inst, 120000, import_ldif, DEFAULT_SUFFIX, entry_name="userNew")
  90. import_task.import_suffix_from_ldif(ldiffile=import_ldif, suffix=DEFAULT_SUFFIX)
  91. import_task.wait()
  92. assert import_task.is_complete()
  93. def create_user_wrapper(q, users):
  94. try:
  95. users.create_test_user()
  96. except Exception as ex:
  97. q.put(ex)
  98. def spawn_worker_thread(function, users, log, timeout, info):
  99. log.info(f"Starting the thread - {info}")
  100. q = Queue()
  101. p = Process(target=function, args=(q,users,))
  102. p.start()
  103. log.info(f"Waiting for {timeout} seconds for the thread to finish")
  104. p.join(timeout)
  105. if p.is_alive():
  106. log.info("Killing the thread as it's still running")
  107. p.terminate()
  108. p.join()
  109. raise RuntimeError(f"Function call was aborted: {info}")
  110. result = q.get()
  111. if isinstance(result, Exception):
  112. raise result
  113. else:
  114. return result
  115. @db_locks_monitoring_ack
  116. @pytest.mark.parametrize("lock_threshold", [("70"), ("80"), ("95")])
  117. def test_exhaust_db_locks_basic(topology_st_fn, setup_attruniq_index_be_import, lock_threshold):
  118. """Test that when all of the locks are exhausted the instance still working
  119. and database is not corrupted
  120. :id: 299108cc-04d8-4ddc-b58e-99157fccd643
  121. :setup: Standalone instance with Attr Uniq plugin and user indexes disabled
  122. :steps: 1. Set nsslapd-db-locks to 11000
  123. 2. Check that we stop acquiring new locks when the threshold is reached
  124. 3. Check that we can regulate a pause interval for DB locks monitoring thread
  125. 4. Make sure the feature works for different backends on the same suffix
  126. :expectedresults:
  127. 1. Success
  128. 2. Success
  129. 3. Success
  130. 4. Success
  131. """
  132. inst = topology_st_fn.standalone
  133. ADDITIONAL_SUFFIX = 'ou=newpeople,dc=example,dc=com'
  134. backends = Backends(inst)
  135. backends.create(properties={'nsslapd-suffix': ADDITIONAL_SUFFIX,
  136. 'name': ADDITIONAL_SUFFIX[-3:]})
  137. ous = OrganizationalUnits(inst, DEFAULT_SUFFIX)
  138. ous.create(properties={'ou': 'newpeople'})
  139. bdb_config = BDB_LDBMConfig(inst)
  140. bdb_config.replace("nsslapd-db-locks", "11000")
  141. # Restart server
  142. inst.restart()
  143. for lock_enabled in ["on", "off"]:
  144. for lock_pause in ["100", "500", "1000"]:
  145. bdb_config.replace("nsslapd-db-locks-monitoring-enabled", lock_enabled)
  146. bdb_config.replace("nsslapd-db-locks-monitoring-threshold", lock_threshold)
  147. bdb_config.replace("nsslapd-db-locks-monitoring-pause", lock_pause)
  148. inst.restart()
  149. if lock_enabled == "off":
  150. raised_exception = (RuntimeError, ldap.SERVER_DOWN)
  151. else:
  152. raised_exception = ldap.OPERATIONS_ERROR
  153. users = UserAccounts(inst, DEFAULT_SUFFIX)
  154. with pytest.raises(raised_exception):
  155. spawn_worker_thread(create_user_wrapper, users, log, 30,
  156. f"Adding user with monitoring enabled='{lock_enabled}'; "
  157. f"threshold='{lock_threshold}'; pause='{lock_pause}'.")
  158. # Restart because we already run out of locks and the next unindexed searches will fail eventually
  159. if lock_enabled == "off":
  160. _kill_ns_slapd(inst)
  161. inst.restart()
  162. users = UserAccounts(inst, ADDITIONAL_SUFFIX, rdn=None)
  163. with pytest.raises(raised_exception):
  164. spawn_worker_thread(create_user_wrapper, users, log, 30,
  165. f"Adding user with monitoring enabled='{lock_enabled}'; "
  166. f"threshold='{lock_threshold}'; pause='{lock_pause}'.")
  167. # In case feature is disabled - restart for the clean up
  168. if lock_enabled == "off":
  169. _kill_ns_slapd(inst)
  170. inst.restart()
  171. @db_locks_monitoring_ack
  172. def test_exhaust_db_locks_big_pause(topology_st_fn, setup_attruniq_index_be_import):
  173. """Test that DB lock pause setting increases the wait interval value for the monitoring thread
  174. :id: 7d5bf838-5d4e-4ad5-8c03-5716afb84ea6
  175. :setup: Standalone instance with Attr Uniq plugin and user indexes disabled
  176. :steps: 1. Set nsslapd-db-locks to 20000 while using the default threshold value (95%)
  177. 2. Set nsslapd-db-locks-monitoring-pause to 10000 (10 seconds)
  178. 3. Make sure that the pause is successfully increased a few times in a row
  179. :expectedresults:
  180. 1. Success
  181. 2. Success
  182. 3. Success
  183. """
  184. inst = topology_st_fn.standalone
  185. bdb_config = BDB_LDBMConfig(inst)
  186. bdb_config.replace("nsslapd-db-locks", "20000")
  187. lock_pause = bdb_config.get_attr_val_int("nsslapd-db-locks-monitoring-pause")
  188. assert lock_pause == 500
  189. lock_pause = "10000"
  190. bdb_config.replace("nsslapd-db-locks-monitoring-pause", lock_pause)
  191. # Restart server
  192. inst.restart()
  193. lock_enabled = bdb_config.get_attr_val_utf8_l("nsslapd-db-locks-monitoring-enabled")
  194. lock_threshold = bdb_config.get_attr_val_int("nsslapd-db-locks-monitoring-threshold")
  195. assert lock_enabled == "on"
  196. assert lock_threshold == 90
  197. users = UserAccounts(inst, DEFAULT_SUFFIX)
  198. start = datetime.datetime.now()
  199. with pytest.raises(ldap.OPERATIONS_ERROR):
  200. spawn_worker_thread(create_user_wrapper, users, log, 30,
  201. f"Adding user with monitoring enabled='{lock_enabled}'; "
  202. f"threshold='{lock_threshold}'; pause='{lock_pause}'. Expect it to 'Work'")
  203. end = datetime.datetime.now()
  204. time_delta = end - start
  205. if time_delta.seconds < 9:
  206. raise RuntimeError("nsslapd-db-locks-monitoring-pause attribute doesn't function correctly. "
  207. f"Finished the execution in {time_delta.seconds} seconds")
  208. # In case something has failed - restart for the clean up
  209. inst.restart()