issue-stats.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. import requests
  2. import os
  3. from collections import defaultdict, Counter
  4. import json
  5. from pprint import pprint
  6. from datetime import datetime, timedelta
  7. headers = {"Authorization": "bearer " + os.environ["GITHUB_TOKEN"]}
  8. this = datetime.now()
  9. USE_EXISTING = False
  10. releases_per_month = {}
  11. try:
  12. from tqdm.contrib.concurrent import process_map # or thread_map
  13. except Exception:
  14. process_map = map
  15. def run_query(query):
  16. request = requests.post(
  17. "https://api.github.com/graphql", json={"query": query}, headers=headers
  18. )
  19. if request.status_code == 200:
  20. return request.json()
  21. else:
  22. raise Exception(
  23. "Query failed to run by returning code of {}. {}".format(
  24. request.status_code, query
  25. )
  26. )
  27. def do_one_repo(reponame):
  28. issues = []
  29. pagination = ""
  30. while True:
  31. query = """
  32. query {
  33. organization(login: "notofonts") {
  34. repository(name: "%s") {
  35. issues(first: 100%s) {
  36. nodes {
  37. createdAt
  38. closedAt
  39. url
  40. }
  41. pageInfo {
  42. hasNextPage
  43. endCursor
  44. }
  45. }
  46. }
  47. }
  48. }
  49. """ % (
  50. reponame,
  51. pagination,
  52. )
  53. result = run_query(query)
  54. tier = sources[reponame].get("tier", 3)
  55. if "data" in result:
  56. result = result["data"]["organization"]["repository"]["issues"]
  57. else:
  58. print(result)
  59. break
  60. for issue in result["nodes"]:
  61. if issue["closedAt"] and issue["closedAt"] <= "2024-01-01":
  62. continue
  63. issue["repo"] = reponame
  64. issue["tier"] = tier
  65. issues.append(issue)
  66. if result["pageInfo"]["hasNextPage"]:
  67. endcursor = result["pageInfo"]["endCursor"]
  68. pagination = f'after:"{endcursor}"'
  69. else:
  70. break
  71. return issues
  72. def get_releases(rpm):
  73. pagination = ""
  74. while True:
  75. query = """
  76. query {
  77. organization(login: "notofonts") {
  78. repositories(first: 100%s) {
  79. nodes {
  80. releases(last: 100) {
  81. nodes {
  82. publishedAt
  83. tagName
  84. url
  85. }
  86. }
  87. }
  88. pageInfo {
  89. hasNextPage
  90. endCursor
  91. }
  92. }
  93. }
  94. }
  95. """ % (
  96. pagination
  97. )
  98. result = run_query(query)
  99. if "data" in result:
  100. result = result["data"]["organization"]["repositories"]
  101. else:
  102. print(result)
  103. return
  104. for repo in result["nodes"]:
  105. for release in repo["releases"]["nodes"]:
  106. if not release["publishedAt"] or release["publishedAt"] <= "2024-01-01":
  107. continue
  108. if "notofonts.github.io" in release["url"]:
  109. continue
  110. published = datetime.fromisoformat(release["publishedAt"].replace("Z",""))
  111. rpm.setdefault(published.month, []).append(
  112. {"tag": release["tagName"], "url": release["url"]}
  113. )
  114. if result["pageInfo"]["hasNextPage"]:
  115. endcursor = result["pageInfo"]["endCursor"]
  116. pagination = f'after:"{endcursor}"'
  117. else:
  118. break
  119. all_results = []
  120. sources = json.load(open("fontrepos.json"))
  121. if __name__ == '__main__':
  122. get_releases(releases_per_month)
  123. all_results = process_map(do_one_repo, list(sources.keys()))
  124. all_results = [item for sublist in all_results for item in sublist]
  125. json.dump(
  126. all_results,
  127. open("all-results.json", "w"),
  128. indent=True,
  129. sort_keys=True,
  130. )
  131. def last_day_of_month(any_day):
  132. next_month = any_day.replace(day=28) + timedelta(days=4)
  133. return next_month - timedelta(days=next_month.day)
  134. def open_at(date, issues):
  135. return [x for x in issues if (not x["closedAt"] or x["closedAt"] > str(date)) and x["createdAt"] <= str(date)]
  136. def opened_during(start, end, issues):
  137. return [
  138. x for x in issues if x["createdAt"] >= str(start) and x["createdAt"] <= str(end)
  139. ]
  140. def closed_during(start, end, issues):
  141. return [
  142. x
  143. for x in issues
  144. if x["closedAt"] and x["closedAt"] >= str(start) and x["closedAt"] <= str(end)
  145. ]
  146. open_issues = [x for x in all_results if not x["closedAt"]]
  147. open_per_repo = Counter([x["repo"] for x in open_issues])
  148. open_per_tier = Counter([x["tier"] for x in open_issues])
  149. totals_per_month = {}
  150. closed_per_month = {}
  151. opened_per_month = {}
  152. year_to_date = range(1, this.month + 1)
  153. for i in year_to_date:
  154. start_of_month = this.replace(month=i, day=1)
  155. end_of_month = last_day_of_month(start_of_month)
  156. totals_per_month[i] = len(open_at(start_of_month, all_results))
  157. closed_per_month[i] = len(closed_during(start_of_month, end_of_month, all_results))
  158. opened_per_month[i] = len(opened_during(start_of_month, end_of_month, all_results))
  159. # Save it
  160. json.dump(
  161. {
  162. "opened_per_month": opened_per_month,
  163. "closed_per_month": closed_per_month,
  164. "totals_per_month": totals_per_month,
  165. "open_per_repo": open_per_repo,
  166. "open_per_tier": open_per_tier,
  167. "releases_per_month": releases_per_month,
  168. },
  169. open("docs/issues.json", "w"),
  170. indent=True,
  171. sort_keys=True,
  172. )
  173. months = [
  174. "Jan",
  175. "Feb",
  176. "Mar",
  177. "Apr",
  178. "May",
  179. "Jun",
  180. "Jul",
  181. "Aug",
  182. "Sep",
  183. "Oct",
  184. "Nov",
  185. "Dec",
  186. ][: this.month]
  187. totals = [totals_per_month[i] for i in year_to_date]
  188. from pybars import Compiler
  189. import matplotlib
  190. matplotlib.use("Agg")
  191. import matplotlib.pyplot as plt
  192. fig, ax1 = plt.subplots()
  193. ax2 = ax1.twinx()
  194. bars = ax1.bar(
  195. months,
  196. [totals_per_month[i] for i in year_to_date],
  197. label="Total",
  198. color="#aaaaffaa",
  199. )
  200. ax1.bar_label(bars)
  201. ax1.axes.get_yaxis().set_visible(False)
  202. lns1 = ax2.plot(
  203. months,
  204. [opened_per_month[i] for i in year_to_date],
  205. marker=".",
  206. label="Opened",
  207. color="red",
  208. linewidth=3,
  209. )
  210. lns2 = ax2.plot(
  211. months,
  212. [closed_per_month[i] for i in year_to_date],
  213. marker="+",
  214. label="Closed",
  215. color="green",
  216. linewidth=3,
  217. )
  218. lines, labels = ax1.get_legend_handles_labels()
  219. lines2, labels2 = ax2.get_legend_handles_labels()
  220. ax2.legend(lines + lines2, labels + labels2, loc="lower left")
  221. plt.title("Issues opened, closed, and open")
  222. plt.savefig("docs/open-closed.png")
  223. ## Top 10 scripts
  224. top_10 = sorted(open_per_repo.most_common(10), key=lambda x: -x[1])
  225. labels, values = list(zip(*top_10))
  226. fig, ax = plt.subplots()
  227. bars = ax.bar(labels, values)
  228. ax.bar_label(bars)
  229. plt.title("Repositories with most open issues")
  230. plt.xticks(rotation=60)
  231. plt.tight_layout()
  232. plt.savefig("docs/top-10.png")
  233. # Low hanging fruit and tiers
  234. low_hanging = {}
  235. tiers = {1: [], 2: [], 3: [], 4: [], 5: []}
  236. for k, v in open_per_repo.items():
  237. if v == 0:
  238. continue
  239. tier = sources[k].get("tier", 3)
  240. tiers[tier].append({"repo": k, "issues": v})
  241. if v > 10:
  242. continue
  243. low_hanging.setdefault(v, []).append(k)
  244. low_hanging = [
  245. {"issues": k, "repos": low_hanging[k]} for k in sorted(low_hanging.keys())
  246. ]
  247. tiers = {k: sorted(v, key=lambda i: -i["issues"]) for k, v in tiers.items()}
  248. labels = [1, 2, 3, 4, 5]
  249. values = [open_per_tier.get(l, 0) for l in labels]
  250. fig, ax = plt.subplots()
  251. bars = ax.bar(labels, values)
  252. ax.bar_label(bars)
  253. plt.title("Open issues per tier")
  254. plt.tight_layout()
  255. plt.savefig("docs/per-tier.png")
  256. ## Releases per month
  257. release_count_per_month = [len(releases_per_month.get(i, [])) for i in year_to_date]
  258. fig, ax = plt.subplots()
  259. bars = ax.bar(months, release_count_per_month)
  260. ax.bar_label(bars)
  261. plt.title("Releases per month")
  262. plt.savefig("docs/releases.png")
  263. monthly_stats = [
  264. {
  265. "month": months[i - 1],
  266. "opened": opened_per_month.get(i, 0),
  267. "closed": closed_per_month.get(i, 0),
  268. "releases": releases_per_month.get(i, []),
  269. "releases_count": len(releases_per_month.get(i, [])),
  270. }
  271. for i in year_to_date
  272. ]
  273. compiler = Compiler()
  274. template = open("scripts/analytics-template.html", "r").read()
  275. template = compiler.compile(template)
  276. output = template(
  277. {
  278. "monthly_stats": monthly_stats,
  279. "top_10": [{"repo": k, "count": v} for k, v in top_10],
  280. "low_hanging": low_hanging,
  281. "tiers": tiers,
  282. }
  283. )
  284. with open("docs/analytics.html", "w") as fh:
  285. fh.write(output)