profiler.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159
  1. #include "profiler.h"
  2. #include "darray.h"
  3. #include "dstr.h"
  4. #include "platform.h"
  5. #include "threading.h"
  6. #include <math.h>
  7. #include <zlib.h>
  8. //#define TRACK_OVERHEAD
  9. struct profiler_snapshot {
  10. DARRAY(profiler_snapshot_entry_t) roots;
  11. };
  12. struct profiler_snapshot_entry {
  13. const char *name;
  14. profiler_time_entries_t times;
  15. uint64_t min_time;
  16. uint64_t max_time;
  17. uint64_t overall_count;
  18. profiler_time_entries_t times_between_calls;
  19. uint64_t expected_time_between_calls;
  20. uint64_t min_time_between_calls;
  21. uint64_t max_time_between_calls;
  22. uint64_t overall_between_calls_count;
  23. DARRAY(profiler_snapshot_entry_t) children;
  24. };
  25. typedef struct profiler_time_entry profiler_time_entry;
  26. typedef struct profile_call profile_call;
  27. struct profile_call {
  28. const char *name;
  29. #ifdef TRACK_OVERHEAD
  30. uint64_t overhead_start;
  31. #endif
  32. uint64_t start_time;
  33. uint64_t end_time;
  34. #ifdef TRACK_OVERHEAD
  35. uint64_t overhead_end;
  36. #endif
  37. uint64_t expected_time_between_calls;
  38. DARRAY(profile_call) children;
  39. profile_call *parent;
  40. };
  41. typedef struct profile_times_table_entry profile_times_table_entry;
  42. struct profile_times_table_entry {
  43. size_t probes;
  44. profiler_time_entry entry;
  45. };
  46. typedef struct profile_times_table profile_times_table;
  47. struct profile_times_table {
  48. size_t size;
  49. size_t occupied;
  50. size_t max_probe_count;
  51. profile_times_table_entry *entries;
  52. size_t old_start_index;
  53. size_t old_occupied;
  54. profile_times_table_entry *old_entries;
  55. };
  56. typedef struct profile_entry profile_entry;
  57. struct profile_entry {
  58. const char *name;
  59. profile_times_table times;
  60. #ifdef TRACK_OVERHEAD
  61. profile_times_table overhead;
  62. #endif
  63. uint64_t expected_time_between_calls;
  64. profile_times_table times_between_calls;
  65. DARRAY(profile_entry) children;
  66. };
  67. typedef struct profile_root_entry profile_root_entry;
  68. struct profile_root_entry {
  69. pthread_mutex_t *mutex;
  70. const char *name;
  71. profile_entry *entry;
  72. profile_call *prev_call;
  73. };
  74. static inline uint64_t diff_ns_to_usec(uint64_t prev, uint64_t next)
  75. {
  76. return (next - prev + 500) / 1000;
  77. }
  78. static inline void update_max_probes(profile_times_table *map, size_t val)
  79. {
  80. map->max_probe_count = map->max_probe_count < val ?
  81. val : map->max_probe_count;
  82. }
  83. static void migrate_old_entries(profile_times_table *map, bool limit_items);
  84. static void grow_hashmap(profile_times_table *map,
  85. uint64_t usec, uint64_t count);
  86. static void add_hashmap_entry(profile_times_table *map, uint64_t usec,
  87. uint64_t count)
  88. {
  89. size_t probes = 1;
  90. size_t start = usec % map->size;
  91. for (;; probes += 1) {
  92. size_t idx = (start + probes) % map->size;
  93. profile_times_table_entry *entry = &map->entries[idx];
  94. if (!entry->probes) {
  95. entry->probes = probes;
  96. entry->entry.time_delta = usec;
  97. entry->entry.count = count;
  98. map->occupied += 1;
  99. update_max_probes(map, probes);
  100. return;
  101. }
  102. if (entry->entry.time_delta == usec) {
  103. entry->entry.count += count;
  104. return;
  105. }
  106. if (entry->probes >= probes)
  107. continue;
  108. if (map->occupied/(double)map->size > 0.7) {
  109. grow_hashmap(map, usec, count);
  110. return;
  111. }
  112. size_t old_probes = entry->probes;
  113. uint64_t old_count = entry->entry.count;
  114. uint64_t old_usec = entry->entry.time_delta;
  115. entry->probes = probes;
  116. entry->entry.count = count;
  117. entry->entry.time_delta = usec;
  118. update_max_probes(map, probes);
  119. probes = old_probes;
  120. count = old_count;
  121. usec = old_usec;
  122. start = usec % map->size;
  123. }
  124. }
  125. static void init_hashmap(profile_times_table *map, size_t size)
  126. {
  127. map->size = size;
  128. map->occupied = 0;
  129. map->max_probe_count = 0;
  130. map->entries = bzalloc(sizeof(profile_times_table_entry) * size);
  131. map->old_start_index = 0;
  132. map->old_occupied = 0;
  133. map->old_entries = NULL;
  134. }
  135. static void migrate_old_entries(profile_times_table *map, bool limit_items)
  136. {
  137. if (!map->old_entries)
  138. return;
  139. if (!map->old_occupied) {
  140. bfree(map->old_entries);
  141. map->old_entries = NULL;
  142. return;
  143. }
  144. for (size_t i = 0; !limit_items || i < 8; i++, map->old_start_index++) {
  145. if (!map->old_occupied)
  146. return;
  147. profile_times_table_entry *entry =
  148. &map->old_entries[map->old_start_index];
  149. if (!entry->probes)
  150. continue;
  151. add_hashmap_entry(map, entry->entry.time_delta,
  152. entry->entry.count);
  153. map->old_occupied -= 1;
  154. }
  155. }
  156. static void grow_hashmap(profile_times_table *map,
  157. uint64_t usec, uint64_t count)
  158. {
  159. migrate_old_entries(map, false);
  160. size_t old_size = map->size;
  161. size_t old_occupied = map->occupied;
  162. profile_times_table_entry *entries = map->entries;
  163. init_hashmap(map, (old_size * 2 < 16) ? 16 : (old_size * 2));
  164. map->old_occupied = old_occupied;
  165. map->old_entries = entries;
  166. add_hashmap_entry(map, usec, count);
  167. }
  168. static profile_entry *init_entry(profile_entry *entry, const char *name)
  169. {
  170. entry->name = name;
  171. init_hashmap(&entry->times, 1);
  172. #ifdef TRACK_OVERHEAD
  173. init_hashmap(&entry->overhead, 1);
  174. #endif
  175. entry->expected_time_between_calls = 0;
  176. init_hashmap(&entry->times_between_calls, 1);
  177. return entry;
  178. }
  179. static profile_entry *get_child(profile_entry *parent, const char *name)
  180. {
  181. const size_t num = parent->children.num;
  182. for (size_t i = 0; i < num; i++) {
  183. profile_entry *child = &parent->children.array[i];
  184. if (child->name == name)
  185. return child;
  186. }
  187. return init_entry(da_push_back_new(parent->children), name);
  188. }
  189. static void merge_call(profile_entry *entry, profile_call *call,
  190. profile_call *prev_call)
  191. {
  192. const size_t num = call->children.num;
  193. for (size_t i = 0; i < num; i++) {
  194. profile_call *child = &call->children.array[i];
  195. merge_call(get_child(entry, child->name), child, NULL);
  196. }
  197. if (entry->expected_time_between_calls != 0 && prev_call) {
  198. migrate_old_entries(&entry->times_between_calls, true);
  199. uint64_t usec = diff_ns_to_usec(prev_call->start_time,
  200. call->start_time);
  201. add_hashmap_entry(&entry->times_between_calls, usec, 1);
  202. }
  203. migrate_old_entries(&entry->times, true);
  204. uint64_t usec = diff_ns_to_usec(call->start_time, call->end_time);
  205. add_hashmap_entry(&entry->times, usec, 1);
  206. #ifdef TRACK_OVERHEAD
  207. migrate_old_entries(&entry->overhead, true);
  208. usec = diff_ns_to_usec(call->overhead_start, call->start_time);
  209. usec += diff_ns_to_usec(call->end_time, call->overhead_end);
  210. add_hashmap_entry(&entry->overhead, usec, 1);
  211. #endif
  212. }
  213. static bool enabled = false;
  214. static pthread_mutex_t root_mutex = PTHREAD_MUTEX_INITIALIZER;
  215. static DARRAY(profile_root_entry) root_entries;
  216. #ifdef _MSC_VER
  217. static __declspec(thread) profile_call *thread_context = NULL;
  218. static __declspec(thread) bool thread_enabled = true;
  219. #else
  220. static __thread profile_call *thread_context = NULL;
  221. static __thread bool thread_enabled = true;
  222. #endif
  223. void profiler_start(void)
  224. {
  225. pthread_mutex_lock(&root_mutex);
  226. enabled = true;
  227. pthread_mutex_unlock(&root_mutex);
  228. }
  229. void profiler_stop(void)
  230. {
  231. pthread_mutex_lock(&root_mutex);
  232. enabled = false;
  233. pthread_mutex_unlock(&root_mutex);
  234. }
  235. void profile_reenable_thread(void)
  236. {
  237. if (thread_enabled)
  238. return;
  239. pthread_mutex_lock(&root_mutex);
  240. thread_enabled = enabled;
  241. pthread_mutex_unlock(&root_mutex);
  242. }
  243. static bool lock_root(void)
  244. {
  245. pthread_mutex_lock(&root_mutex);
  246. if (!enabled) {
  247. pthread_mutex_unlock(&root_mutex);
  248. thread_enabled = false;
  249. return false;
  250. }
  251. return true;
  252. }
  253. static profile_root_entry *get_root_entry(const char *name)
  254. {
  255. profile_root_entry *r_entry = NULL;
  256. for (size_t i = 0; i < root_entries.num; i++) {
  257. if (root_entries.array[i].name == name) {
  258. r_entry = &root_entries.array[i];
  259. break;
  260. }
  261. }
  262. if (!r_entry) {
  263. r_entry = da_push_back_new(root_entries);
  264. r_entry->mutex = bmalloc(sizeof(pthread_mutex_t));
  265. pthread_mutex_init(r_entry->mutex, NULL);
  266. r_entry->name = name;
  267. r_entry->entry = bzalloc(sizeof(profile_entry));
  268. init_entry(r_entry->entry, name);
  269. }
  270. return r_entry;
  271. }
  272. void profile_register_root(const char *name,
  273. uint64_t expected_time_between_calls)
  274. {
  275. if (!lock_root())
  276. return;
  277. get_root_entry(name)->entry->expected_time_between_calls =
  278. (expected_time_between_calls + 500) / 1000;
  279. pthread_mutex_unlock(&root_mutex);
  280. }
  281. static void free_call_context(profile_call *context);
  282. static void merge_context(profile_call *context)
  283. {
  284. pthread_mutex_t *mutex = NULL;
  285. profile_entry *entry = NULL;
  286. profile_call *prev_call = NULL;
  287. if (!lock_root()) {
  288. free_call_context(context);
  289. return;
  290. }
  291. profile_root_entry *r_entry = get_root_entry(context->name);
  292. mutex = r_entry->mutex;
  293. entry = r_entry->entry;
  294. prev_call = r_entry->prev_call;
  295. r_entry->prev_call = context;
  296. pthread_mutex_lock(mutex);
  297. pthread_mutex_unlock(&root_mutex);
  298. merge_call(entry, context, prev_call);
  299. pthread_mutex_unlock(mutex);
  300. free_call_context(prev_call);
  301. }
  302. void profile_start(const char *name)
  303. {
  304. if (!thread_enabled)
  305. return;
  306. profile_call new_call = {
  307. .name = name,
  308. #ifdef TRACK_OVERHEAD
  309. .overhead_start = os_gettime_ns(),
  310. #endif
  311. .parent = thread_context,
  312. };
  313. profile_call *call = NULL;
  314. if (new_call.parent) {
  315. size_t idx = da_push_back(new_call.parent->children, &new_call);
  316. call = &new_call.parent->children.array[idx];
  317. } else {
  318. call = bmalloc(sizeof(profile_call));
  319. memcpy(call, &new_call, sizeof(profile_call));
  320. }
  321. thread_context = call;
  322. call->start_time = os_gettime_ns();
  323. }
  324. void profile_end(const char *name)
  325. {
  326. uint64_t end = os_gettime_ns();
  327. if (!thread_enabled)
  328. return;
  329. profile_call *call = thread_context;
  330. if (!call) {
  331. blog(LOG_ERROR, "Called profile end with no active profile");
  332. return;
  333. }
  334. if (!call->name)
  335. call->name = name;
  336. if (call->name != name) {
  337. blog(LOG_ERROR, "Called profile end with mismatching name: "
  338. "start(\"%s\"[%p]) <-> end(\"%s\"[%p])",
  339. call->name, call->name, name, name);
  340. profile_call *parent = call->parent;
  341. while (parent && parent->parent && parent->name != name)
  342. parent = parent->parent;
  343. if (!parent || parent->name != name)
  344. return;
  345. while (call->name != name) {
  346. profile_end(call->name);
  347. call = call->parent;
  348. }
  349. }
  350. thread_context = call->parent;
  351. call->end_time = end;
  352. #ifdef TRACK_OVERHEAD
  353. call->overhead_end = os_gettime_ns();
  354. #endif
  355. if (call->parent)
  356. return;
  357. merge_context(call);
  358. }
  359. static int profiler_time_entry_compare(const void *first, const void *second)
  360. {
  361. int64_t diff = ((profiler_time_entry*)second)->time_delta -
  362. ((profiler_time_entry*)first)->time_delta;
  363. return diff < 0 ? -1 : (diff > 0 ? 1 : 0);
  364. }
  365. static uint64_t copy_map_to_array(profile_times_table *map,
  366. profiler_time_entries_t *entry_buffer,
  367. uint64_t *min_, uint64_t *max_)
  368. {
  369. migrate_old_entries(map, false);
  370. da_reserve((*entry_buffer), map->occupied);
  371. da_resize((*entry_buffer), 0);
  372. uint64_t min__ = ~(uint64_t)0;
  373. uint64_t max__ = 0;
  374. uint64_t calls = 0;
  375. for (size_t i = 0; i < map->size; i++) {
  376. if (!map->entries[i].probes)
  377. continue;
  378. profiler_time_entry *entry = &map->entries[i].entry;
  379. da_push_back((*entry_buffer), entry);
  380. calls += entry->count;
  381. min__ = (min__ < entry->time_delta) ? min__ : entry->time_delta;
  382. max__ = (max__ > entry->time_delta) ? max__ : entry->time_delta;
  383. }
  384. if (min_)
  385. *min_ = min__;
  386. if (max_)
  387. *max_ = max__;
  388. return calls;
  389. }
  390. typedef void (*profile_entry_print_func)(profiler_snapshot_entry_t *entry,
  391. struct dstr *indent_buffer, struct dstr *output_buffer,
  392. unsigned indent, uint64_t active, uint64_t parent_calls);
  393. static void make_indent_string(struct dstr *indent_buffer, unsigned indent,
  394. uint64_t active)
  395. {
  396. indent_buffer->len = 0;
  397. if (!indent) {
  398. dstr_cat_ch(indent_buffer, 0);
  399. return;
  400. }
  401. for (size_t i = 0; i < indent; i++) {
  402. const char *fragment = "";
  403. bool last = i + 1 == indent;
  404. if (active & ((uint64_t)1 << i))
  405. fragment = last ? " ┣" : " ┃";
  406. else
  407. fragment = last ? " ┗" : " ";
  408. dstr_cat(indent_buffer, fragment);
  409. }
  410. }
  411. static void gather_stats(uint64_t expected_time_between_calls,
  412. profiler_time_entries_t *entries,
  413. uint64_t calls, uint64_t *percentile99, uint64_t *median,
  414. double *percent_within_bounds)
  415. {
  416. if (!entries->num) {
  417. *percentile99 = 0;
  418. *median = 0;
  419. *percent_within_bounds = 0.;
  420. return;
  421. }
  422. /*if (entry_buffer->num > 2)
  423. blog(LOG_INFO, "buffer-size %lu, overall count %llu\n"
  424. "map-size %lu, occupied %lu, probes %lu",
  425. entry_buffer->num, calls,
  426. map->size, map->occupied,
  427. map->max_probe_count);*/
  428. uint64_t accu = 0;
  429. for (size_t i = 0; i < entries->num; i++) {
  430. uint64_t old_accu = accu;
  431. accu += entries->array[i].count;
  432. if (old_accu < calls * 0.01 && accu >= calls * 0.01)
  433. *percentile99 = entries->array[i].time_delta;
  434. else if (old_accu < calls * 0.5 && accu >= calls * 0.5) {
  435. *median = entries->array[i].time_delta;
  436. break;
  437. }
  438. }
  439. *percent_within_bounds = 0.;
  440. if (!expected_time_between_calls)
  441. return;
  442. accu = 0;
  443. for (size_t i = 0; i < entries->num; i++) {
  444. profiler_time_entry *entry = &entries->array[i];
  445. if (entry->time_delta < expected_time_between_calls)
  446. break;
  447. accu += entry->count;
  448. }
  449. *percent_within_bounds = (1. - (double)accu / calls) * 100;
  450. }
  451. static void profile_print_entry(profiler_snapshot_entry_t *entry,
  452. struct dstr *indent_buffer, struct dstr *output_buffer,
  453. unsigned indent, uint64_t active, uint64_t parent_calls)
  454. {
  455. uint64_t calls = entry->overall_count;
  456. uint64_t min_ = entry->min_time;
  457. uint64_t max_ = entry->max_time;
  458. uint64_t percentile99 = 0;
  459. uint64_t median = 0;
  460. double percent_within_bounds = 0.;
  461. gather_stats(entry->expected_time_between_calls,
  462. &entry->times, calls,
  463. &percentile99, &median, &percent_within_bounds);
  464. make_indent_string(indent_buffer, indent, active);
  465. if (min_ == max_) {
  466. dstr_printf(output_buffer, "%s%s: %g ms",
  467. indent_buffer->array, entry->name,
  468. min_ / 1000.);
  469. } else {
  470. dstr_printf(output_buffer, "%s%s: min=%g ms, median=%g ms, "
  471. "max=%g ms, 99th percentile=%g ms",
  472. indent_buffer->array, entry->name,
  473. min_ / 1000., median / 1000., max_ / 1000.,
  474. percentile99 / 1000.);
  475. if (entry->expected_time_between_calls) {
  476. double expected_ms =
  477. entry->expected_time_between_calls / 1000.;
  478. dstr_catf(output_buffer, ", %g%% below %g ms",
  479. percent_within_bounds, expected_ms);
  480. }
  481. }
  482. if (parent_calls && calls != parent_calls) {
  483. double calls_per_parent = (double)calls / parent_calls;
  484. if (lround(calls_per_parent * 10) != 10)
  485. dstr_catf(output_buffer, ", %g calls per parent call",
  486. calls_per_parent);
  487. }
  488. blog(LOG_INFO, "%s", output_buffer->array);
  489. active |= (uint64_t)1 << indent;
  490. for (size_t i = 0; i < entry->children.num; i++) {
  491. if ((i + 1) == entry->children.num)
  492. active &= (1 << indent) - 1;
  493. profile_print_entry(&entry->children.array[i],
  494. indent_buffer, output_buffer,
  495. indent + 1, active, calls);
  496. }
  497. }
  498. static void gather_stats_between(profiler_time_entries_t *entries,
  499. uint64_t calls, uint64_t lower_bound, uint64_t upper_bound,
  500. uint64_t min_, uint64_t max_, uint64_t *median,
  501. double *percent, double *lower, double *higher)
  502. {
  503. *median = 0;
  504. *percent = 0.;
  505. *lower = 0.;
  506. *higher = 0.;
  507. if (!entries->num)
  508. return;
  509. uint64_t accu = 0;
  510. for (size_t i = 0; i < entries->num; i++) {
  511. accu += entries->array[i].count;
  512. if (accu < calls * 0.5)
  513. continue;
  514. *median = entries->array[i].time_delta;
  515. break;
  516. }
  517. bool found_upper_bound = max_ <= upper_bound;
  518. bool found_lower_bound = false;
  519. if (min_ >= upper_bound) {
  520. *higher = 100.;
  521. return;
  522. }
  523. if (found_upper_bound && min_ >= lower_bound) {
  524. *percent = 100.;
  525. return;
  526. }
  527. accu = 0;
  528. for (size_t i = 0; i < entries->num; i++) {
  529. uint64_t delta = entries->array[i].time_delta;
  530. if (!found_upper_bound && delta <= upper_bound) {
  531. *higher = (double)accu / calls * 100;
  532. accu = 0;
  533. found_upper_bound = true;
  534. }
  535. if (!found_lower_bound && delta < lower_bound) {
  536. *percent = (double)accu / calls * 100;
  537. accu = 0;
  538. found_lower_bound = true;
  539. }
  540. accu += entries->array[i].count;
  541. }
  542. if (!found_upper_bound) {
  543. *higher = 100.;
  544. } else if (!found_lower_bound) {
  545. *percent = (double)accu / calls * 100;
  546. } else {
  547. *lower = (double)accu / calls * 100;
  548. }
  549. }
  550. static void profile_print_entry_expected(profiler_snapshot_entry_t *entry,
  551. struct dstr *indent_buffer, struct dstr *output_buffer,
  552. unsigned indent, uint64_t active, uint64_t parent_calls)
  553. {
  554. UNUSED_PARAMETER(parent_calls);
  555. if (!entry->expected_time_between_calls)
  556. return;
  557. uint64_t expected_time = entry->expected_time_between_calls;
  558. uint64_t min_ = entry->min_time_between_calls;
  559. uint64_t max_ = entry->max_time_between_calls;
  560. uint64_t median = 0;
  561. double percent = 0.;
  562. double lower = 0.;
  563. double higher = 0.;
  564. gather_stats_between(&entry->times_between_calls,
  565. entry->overall_between_calls_count,
  566. (uint64_t)(expected_time * 0.98),
  567. (uint64_t)(expected_time * 1.02 + 0.5),
  568. min_, max_,
  569. &median, &percent, &lower, &higher);
  570. make_indent_string(indent_buffer, indent, active);
  571. blog(LOG_INFO, "%s%s: min=%g ms, median=%g ms, max=%g ms, %g%% "
  572. "within ±2%% of %g ms (%g%% lower, %g%% higher)",
  573. indent_buffer->array, entry->name,
  574. min_ / 1000., median / 1000., max_ / 1000., percent,
  575. expected_time / 1000.,
  576. lower, higher);
  577. active |= (uint64_t)1 << indent;
  578. for (size_t i = 0; i < entry->children.num; i++) {
  579. if ((i + 1) == entry->children.num)
  580. active &= (1 << indent) - 1;
  581. profile_print_entry_expected(&entry->children.array[i],
  582. indent_buffer, output_buffer,
  583. indent + 1, active, 0);
  584. }
  585. }
  586. void profile_print_func(const char *intro, profile_entry_print_func print,
  587. profiler_snapshot_t *snap)
  588. {
  589. struct dstr indent_buffer = {0};
  590. struct dstr output_buffer = {0};
  591. bool free_snapshot = !snap;
  592. if (!snap)
  593. snap = profile_snapshot_create();
  594. blog(LOG_INFO, "%s", intro);
  595. for (size_t i = 0; i < snap->roots.num; i++) {
  596. print(&snap->roots.array[i],
  597. &indent_buffer, &output_buffer, 0, 0, 0);
  598. }
  599. blog(LOG_INFO, "=================================================");
  600. if (free_snapshot)
  601. profile_snapshot_free(snap);
  602. dstr_free(&output_buffer);
  603. dstr_free(&indent_buffer);
  604. }
  605. void profiler_print(profiler_snapshot_t *snap)
  606. {
  607. profile_print_func("== Profiler Results =============================",
  608. profile_print_entry, snap);
  609. }
  610. void profiler_print_time_between_calls(profiler_snapshot_t *snap)
  611. {
  612. profile_print_func("== Profiler Time Between Calls ==================",
  613. profile_print_entry_expected, snap);
  614. }
  615. static void free_call_children(profile_call *call)
  616. {
  617. if (!call)
  618. return;
  619. const size_t num = call->children.num;
  620. for (size_t i = 0; i < num; i++)
  621. free_call_children(&call->children.array[i]);
  622. da_free(call->children);
  623. }
  624. static void free_call_context(profile_call *context)
  625. {
  626. free_call_children(context);
  627. bfree(context);
  628. }
  629. static void free_hashmap(profile_times_table *map)
  630. {
  631. map->size = 0;
  632. bfree(map->entries);
  633. map->entries = NULL;
  634. bfree(map->old_entries);
  635. map->old_entries = NULL;
  636. }
  637. static void free_profile_entry(profile_entry *entry)
  638. {
  639. for (size_t i = 0; i < entry->children.num; i++)
  640. free_profile_entry(&entry->children.array[i]);
  641. free_hashmap(&entry->times);
  642. #ifdef TRACK_OVERHEAD
  643. free_hashmap(&entry->overhead);
  644. #endif
  645. free_hashmap(&entry->times_between_calls);
  646. da_free(entry->children);
  647. }
  648. void profiler_free(void)
  649. {
  650. DARRAY(profile_root_entry) old_root_entries = {0};
  651. pthread_mutex_lock(&root_mutex);
  652. enabled = false;
  653. da_move(old_root_entries, root_entries);
  654. pthread_mutex_unlock(&root_mutex);
  655. for (size_t i = 0; i < old_root_entries.num; i++) {
  656. profile_root_entry *entry = &old_root_entries.array[i];
  657. pthread_mutex_lock(entry->mutex);
  658. pthread_mutex_unlock(entry->mutex);
  659. pthread_mutex_destroy(entry->mutex);
  660. bfree(entry->mutex);
  661. entry->mutex = NULL;
  662. free_call_context(entry->prev_call);
  663. free_profile_entry(entry->entry);
  664. bfree(entry->entry);
  665. }
  666. da_free(old_root_entries);
  667. }
  668. /* ------------------------------------------------------------------------- */
  669. /* Profiler name storage */
  670. struct profiler_name_store {
  671. pthread_mutex_t mutex;
  672. DARRAY(char*) names;
  673. };
  674. profiler_name_store_t *profiler_name_store_create(void)
  675. {
  676. profiler_name_store_t *store = bzalloc(sizeof(profiler_name_store_t));
  677. if (pthread_mutex_init(&store->mutex, NULL))
  678. goto error;
  679. return store;
  680. error:
  681. bfree(store);
  682. return NULL;
  683. }
  684. void profiler_name_store_free(profiler_name_store_t *store)
  685. {
  686. if (!store)
  687. return;
  688. for (size_t i = 0; i < store->names.num; i++)
  689. bfree(store->names.array[i]);
  690. da_free(store->names);
  691. bfree(store);
  692. }
  693. const char *profile_store_name(profiler_name_store_t *store,
  694. const char *format, ...)
  695. {
  696. va_list args;
  697. va_start(args, format);
  698. struct dstr str = {0};
  699. dstr_vprintf(&str, format, args);
  700. va_end(args);
  701. const char *result = NULL;
  702. pthread_mutex_lock(&store->mutex);
  703. size_t idx = da_push_back(store->names, &str.array);
  704. result = store->names.array[idx];
  705. pthread_mutex_unlock(&store->mutex);
  706. return result;
  707. }
  708. /* ------------------------------------------------------------------------- */
  709. /* Profiler data access */
  710. static void add_entry_to_snapshot(profile_entry *entry,
  711. profiler_snapshot_entry_t *s_entry)
  712. {
  713. s_entry->name = entry->name;
  714. s_entry->overall_count = copy_map_to_array(&entry->times,
  715. &s_entry->times,
  716. &s_entry->min_time, &s_entry->max_time);
  717. if ((s_entry->expected_time_between_calls =
  718. entry->expected_time_between_calls))
  719. s_entry->overall_between_calls_count =
  720. copy_map_to_array(&entry->times_between_calls,
  721. &s_entry->times_between_calls,
  722. &s_entry->min_time_between_calls,
  723. &s_entry->max_time_between_calls);
  724. da_reserve(s_entry->children, entry->children.num);
  725. for (size_t i = 0; i < entry->children.num; i++)
  726. add_entry_to_snapshot(&entry->children.array[i],
  727. da_push_back_new(s_entry->children));
  728. }
  729. static void sort_snapshot_entry(profiler_snapshot_entry_t *entry)
  730. {
  731. qsort(entry->times.array, entry->times.num,
  732. sizeof(profiler_time_entry),
  733. profiler_time_entry_compare);
  734. if (entry->expected_time_between_calls)
  735. qsort(entry->times_between_calls.array,
  736. entry->times_between_calls.num,
  737. sizeof(profiler_time_entry),
  738. profiler_time_entry_compare);
  739. for (size_t i = 0; i < entry->children.num; i++)
  740. sort_snapshot_entry(&entry->children.array[i]);
  741. }
  742. profiler_snapshot_t *profile_snapshot_create(void)
  743. {
  744. profiler_snapshot_t *snap = bzalloc(sizeof(profiler_snapshot_t));
  745. pthread_mutex_lock(&root_mutex);
  746. da_reserve(snap->roots, root_entries.num);
  747. for (size_t i = 0; i < root_entries.num; i++) {
  748. pthread_mutex_lock(root_entries.array[i].mutex);
  749. add_entry_to_snapshot(root_entries.array[i].entry,
  750. da_push_back_new(snap->roots));
  751. pthread_mutex_unlock(root_entries.array[i].mutex);
  752. }
  753. pthread_mutex_unlock(&root_mutex);
  754. for (size_t i = 0; i < snap->roots.num; i++)
  755. sort_snapshot_entry(&snap->roots.array[i]);
  756. return snap;
  757. }
  758. static void free_snapshot_entry(profiler_snapshot_entry_t *entry)
  759. {
  760. for (size_t i = 0; i < entry->children.num; i++)
  761. free_snapshot_entry(&entry->children.array[i]);
  762. da_free(entry->children);
  763. da_free(entry->times_between_calls);
  764. da_free(entry->times);
  765. }
  766. void profile_snapshot_free(profiler_snapshot_t *snap)
  767. {
  768. if (!snap)
  769. return;
  770. for (size_t i = 0; i < snap->roots.num; i++)
  771. free_snapshot_entry(&snap->roots.array[i]);
  772. da_free(snap->roots);
  773. bfree(snap);
  774. }
  775. typedef void (*dump_csv_func)(void *data, struct dstr *buffer);
  776. static void entry_dump_csv(struct dstr *buffer,
  777. const profiler_snapshot_entry_t *parent,
  778. const profiler_snapshot_entry_t *entry,
  779. dump_csv_func func, void *data)
  780. {
  781. const char *parent_name = parent ? parent->name : NULL;
  782. for (size_t i = 0; i < entry->times.num; i++) {
  783. dstr_printf(buffer, "%p,%p,%p,%p,%s,0,%llu,%llu\n", entry,
  784. parent, entry->name, parent_name, entry->name,
  785. entry->times.array[i].time_delta,
  786. entry->times.array[i].count);
  787. func(data, buffer);
  788. }
  789. for (size_t i = 0; i < entry->times_between_calls.num; i++) {
  790. dstr_printf(buffer, "%p,%p,%p,%p,%s,%llu,%llu,%llu\n", entry,
  791. parent, entry->name, parent_name, entry->name,
  792. entry->expected_time_between_calls,
  793. entry->times_between_calls.array[i].time_delta,
  794. entry->times_between_calls.array[i].count);
  795. func(data, buffer);
  796. }
  797. for (size_t i = 0; i < entry->children.num; i++)
  798. entry_dump_csv(buffer, entry, &entry->children.array[i],
  799. func, data);
  800. }
  801. static void profiler_snapshot_dump(const profiler_snapshot_t *snap,
  802. dump_csv_func func, void *data)
  803. {
  804. struct dstr buffer = {0};
  805. dstr_init_copy(&buffer, "id,parent_id,name_id,parent_name_id,name,"
  806. "time_between_calls,time_delta_µs,count\n");
  807. func(data, &buffer);
  808. for (size_t i = 0; i < snap->roots.num; i++)
  809. entry_dump_csv(&buffer, NULL,
  810. &snap->roots.array[i], func, data);
  811. dstr_free(&buffer);
  812. }
  813. static void dump_csv_fwrite(void *data, struct dstr *buffer)
  814. {
  815. fwrite(buffer->array, 1, buffer->len, data);
  816. }
  817. bool profiler_snapshot_dump_csv(const profiler_snapshot_t *snap,
  818. const char *filename)
  819. {
  820. FILE *f = os_fopen(filename, "wb+");
  821. if (!f)
  822. return false;
  823. profiler_snapshot_dump(snap, dump_csv_fwrite, f);
  824. fclose(f);
  825. return true;
  826. }
  827. static void dump_csv_gzwrite(void *data, struct dstr *buffer)
  828. {
  829. gzwrite(data, buffer->array, (unsigned)buffer->len);
  830. }
  831. bool profiler_snapshot_dump_csv_gz(const profiler_snapshot_t *snap,
  832. const char *filename)
  833. {
  834. FILE *f = os_fopen(filename, "wb");
  835. if (!f)
  836. return false;
  837. gzFile gz = gzdopen(fileno(f), "wb");
  838. if (!gz)
  839. return false;
  840. profiler_snapshot_dump(snap, dump_csv_gzwrite, gz);
  841. gzclose_w(gz);
  842. return true;
  843. }
  844. size_t profiler_snapshot_num_roots(profiler_snapshot_t *snap)
  845. {
  846. return snap ? snap->roots.num : 0;
  847. }
  848. void profiler_snapshot_enumerate_roots(profiler_snapshot_t *snap,
  849. profiler_entry_enum_func func, void *context)
  850. {
  851. if (!snap)
  852. return;
  853. for (size_t i = 0; i < snap->roots.num; i++)
  854. if (!func(context, &snap->roots.array[i]))
  855. break;
  856. }
  857. void profiler_snapshot_filter_roots(profiler_snapshot_t *snap,
  858. profiler_name_filter_func func, void *data)
  859. {
  860. for (size_t i = 0; i < snap->roots.num;) {
  861. bool remove = false;
  862. bool res = func(data, snap->roots.array[i].name, &remove);
  863. if (remove) {
  864. free_snapshot_entry(&snap->roots.array[i]);
  865. da_erase(snap->roots, i);
  866. }
  867. if (!res)
  868. break;
  869. if (!remove)
  870. i += 1;
  871. }
  872. }
  873. size_t profiler_snapshot_num_children(profiler_snapshot_entry_t *entry)
  874. {
  875. return entry ? entry->children.num : 0;
  876. }
  877. void profiler_snapshot_enumerate_children(profiler_snapshot_entry_t *entry,
  878. profiler_entry_enum_func func, void *context)
  879. {
  880. if (!entry)
  881. return;
  882. for (size_t i = 0; i < entry->children.num; i++)
  883. if (!func(context, &entry->children.array[i]))
  884. break;
  885. }
  886. const char *profiler_snapshot_entry_name(profiler_snapshot_entry_t *entry)
  887. {
  888. return entry ? entry->name : NULL;
  889. }
  890. profiler_time_entries_t *profiler_snapshot_entry_times(
  891. profiler_snapshot_entry_t *entry)
  892. {
  893. return entry ? &entry->times : NULL;
  894. }
  895. uint64_t profiler_snapshot_entry_overall_count(
  896. profiler_snapshot_entry_t *entry)
  897. {
  898. return entry ? entry->overall_count : 0;
  899. }
  900. uint64_t profiler_snapshot_entry_min_time(profiler_snapshot_entry_t *entry)
  901. {
  902. return entry ? entry->min_time : 0;
  903. }
  904. uint64_t profiler_snapshot_entry_max_time(profiler_snapshot_entry_t *entry)
  905. {
  906. return entry ? entry->max_time : 0;
  907. }
  908. profiler_time_entries_t *profiler_snapshot_entry_times_between_calls(
  909. profiler_snapshot_entry_t *entry)
  910. {
  911. return entry ? &entry->times_between_calls : NULL;
  912. }
  913. uint64_t profiler_snapshot_entry_expected_time_between_calls(
  914. profiler_snapshot_entry_t *entry)
  915. {
  916. return entry ? entry->expected_time_between_calls : 0;
  917. }
  918. uint64_t profiler_snapshot_entry_min_time_between_calls(
  919. profiler_snapshot_entry_t *entry)
  920. {
  921. return entry ? entry->min_time_between_calls : 0;
  922. }
  923. uint64_t profiler_snapshot_entry_max_time_between_calls(
  924. profiler_snapshot_entry_t *entry)
  925. {
  926. return entry ? entry->max_time_between_calls : 0;
  927. }
  928. uint64_t profiler_snapshot_entry_overall_between_calls_count(
  929. profiler_snapshot_entry_t *entry)
  930. {
  931. return entry ? entry->overall_between_calls_count : 0;
  932. }