global_domains.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. #!/usr/bin/env python3
  2. #
  3. # This script generates a global equivalent domains JSON file from
  4. # the upstream Bitwarden source repo.
  5. #
  6. import json
  7. import re
  8. import sys
  9. import urllib.request
  10. from collections import OrderedDict
  11. if not 2 <= len(sys.argv) <= 3:
  12. print(f"usage: {sys.argv[0]} <OUTPUT-FILE> [GIT-REF]")
  13. print()
  14. print("This script generates a global equivalent domains JSON file from")
  15. print("the upstream Bitwarden source repo.")
  16. sys.exit(1)
  17. OUTPUT_FILE = sys.argv[1]
  18. GIT_REF = 'main' if len(sys.argv) == 2 else sys.argv[2]
  19. BASE_URL = f'https://github.com/bitwarden/server/raw/{GIT_REF}'
  20. ENUMS_URL = f'{BASE_URL}/src/Core/Enums/GlobalEquivalentDomainsType.cs'
  21. DOMAIN_LISTS_URL = f'{BASE_URL}/src/Core/Utilities/StaticStore.cs'
  22. # Enum lines look like:
  23. #
  24. # EnumName0 = 0,
  25. # EnumName1 = 1,
  26. #
  27. ENUM_RE = re.compile(
  28. r'\s*' # Leading whitespace (optional).
  29. r'([_0-9a-zA-Z]+)' # Enum name (capture group 1).
  30. r'\s*=\s*' # '=' with optional surrounding whitespace.
  31. r'([0-9]+)' # Enum value (capture group 2).
  32. )
  33. # Global domains lines look like:
  34. #
  35. # GlobalDomains.Add(GlobalEquivalentDomainsType.EnumName, new List<string> { "x.com", "y.com" });
  36. #
  37. DOMAIN_LIST_RE = re.compile(
  38. r'\s*' # Leading whitespace (optional).
  39. r'GlobalDomains\.Add\(GlobalEquivalentDomainsType\.'
  40. r'([_0-9a-zA-Z]+)' # Enum name (capture group 1).
  41. r'\s*,\s*new List<string>\s*{'
  42. r'([^}]+)' # Domain list (capture group 2).
  43. r'}\);'
  44. )
  45. enums = dict()
  46. domain_lists = OrderedDict()
  47. # Read in the enum names and values.
  48. with urllib.request.urlopen(ENUMS_URL) as response:
  49. for ln in response.read().decode('utf-8').split('\n'):
  50. m = ENUM_RE.match(ln)
  51. if m:
  52. enums[m.group(1)] = int(m.group(2))
  53. # Read in the domain lists.
  54. with urllib.request.urlopen(DOMAIN_LISTS_URL) as response:
  55. for ln in response.read().decode('utf-8').split('\n'):
  56. m = DOMAIN_LIST_RE.match(ln)
  57. if m:
  58. # Strip double quotes and extraneous spaces in each domain.
  59. domain_lists[m.group(1)] = [d.strip(' "') for d in m.group(2).split(",")]
  60. # Build the global domains data structure.
  61. global_domains = []
  62. for name, domain_list in domain_lists.items():
  63. entry = OrderedDict()
  64. entry["type"] = enums[name]
  65. entry["domains"] = domain_list
  66. entry["excluded"] = False
  67. global_domains.append(entry)
  68. # Write out the global domains JSON file.
  69. with open(file=OUTPUT_FILE, mode='w', encoding='utf-8') as f:
  70. json.dump(global_domains, f, indent=2)