profiler.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177
  1. #include <inttypes.h>
  2. #include "profiler.h"
  3. #include "darray.h"
  4. #include "dstr.h"
  5. #include "platform.h"
  6. #include "threading.h"
  7. #include <math.h>
  8. #include <zlib.h>
  9. //#define TRACK_OVERHEAD
  10. struct profiler_snapshot {
  11. DARRAY(profiler_snapshot_entry_t) roots;
  12. };
  13. struct profiler_snapshot_entry {
  14. const char *name;
  15. profiler_time_entries_t times;
  16. uint64_t min_time;
  17. uint64_t max_time;
  18. uint64_t overall_count;
  19. profiler_time_entries_t times_between_calls;
  20. uint64_t expected_time_between_calls;
  21. uint64_t min_time_between_calls;
  22. uint64_t max_time_between_calls;
  23. uint64_t overall_between_calls_count;
  24. DARRAY(profiler_snapshot_entry_t) children;
  25. };
  26. typedef struct profiler_time_entry profiler_time_entry;
  27. typedef struct profile_call profile_call;
  28. struct profile_call {
  29. const char *name;
  30. #ifdef TRACK_OVERHEAD
  31. uint64_t overhead_start;
  32. #endif
  33. uint64_t start_time;
  34. uint64_t end_time;
  35. #ifdef TRACK_OVERHEAD
  36. uint64_t overhead_end;
  37. #endif
  38. uint64_t expected_time_between_calls;
  39. DARRAY(profile_call) children;
  40. profile_call *parent;
  41. };
  42. typedef struct profile_times_table_entry profile_times_table_entry;
  43. struct profile_times_table_entry {
  44. size_t probes;
  45. profiler_time_entry entry;
  46. };
  47. typedef struct profile_times_table profile_times_table;
  48. struct profile_times_table {
  49. size_t size;
  50. size_t occupied;
  51. size_t max_probe_count;
  52. profile_times_table_entry *entries;
  53. size_t old_start_index;
  54. size_t old_occupied;
  55. profile_times_table_entry *old_entries;
  56. };
  57. typedef struct profile_entry profile_entry;
  58. struct profile_entry {
  59. const char *name;
  60. profile_times_table times;
  61. #ifdef TRACK_OVERHEAD
  62. profile_times_table overhead;
  63. #endif
  64. uint64_t expected_time_between_calls;
  65. profile_times_table times_between_calls;
  66. DARRAY(profile_entry) children;
  67. };
  68. typedef struct profile_root_entry profile_root_entry;
  69. struct profile_root_entry {
  70. pthread_mutex_t *mutex;
  71. const char *name;
  72. profile_entry *entry;
  73. profile_call *prev_call;
  74. };
  75. static inline uint64_t diff_ns_to_usec(uint64_t prev, uint64_t next)
  76. {
  77. return (next - prev + 500) / 1000;
  78. }
  79. static inline void update_max_probes(profile_times_table *map, size_t val)
  80. {
  81. map->max_probe_count = map->max_probe_count < val ?
  82. val : map->max_probe_count;
  83. }
  84. static void migrate_old_entries(profile_times_table *map, bool limit_items);
  85. static void grow_hashmap(profile_times_table *map,
  86. uint64_t usec, uint64_t count);
  87. static void add_hashmap_entry(profile_times_table *map, uint64_t usec,
  88. uint64_t count)
  89. {
  90. size_t probes = 1;
  91. size_t start = usec % map->size;
  92. for (;; probes += 1) {
  93. size_t idx = (start + probes) % map->size;
  94. profile_times_table_entry *entry = &map->entries[idx];
  95. if (!entry->probes) {
  96. entry->probes = probes;
  97. entry->entry.time_delta = usec;
  98. entry->entry.count = count;
  99. map->occupied += 1;
  100. update_max_probes(map, probes);
  101. return;
  102. }
  103. if (entry->entry.time_delta == usec) {
  104. entry->entry.count += count;
  105. return;
  106. }
  107. if (entry->probes >= probes)
  108. continue;
  109. if (map->occupied/(double)map->size > 0.7) {
  110. grow_hashmap(map, usec, count);
  111. return;
  112. }
  113. size_t old_probes = entry->probes;
  114. uint64_t old_count = entry->entry.count;
  115. uint64_t old_usec = entry->entry.time_delta;
  116. entry->probes = probes;
  117. entry->entry.count = count;
  118. entry->entry.time_delta = usec;
  119. update_max_probes(map, probes);
  120. probes = old_probes;
  121. count = old_count;
  122. usec = old_usec;
  123. start = usec % map->size;
  124. }
  125. }
  126. static void init_hashmap(profile_times_table *map, size_t size)
  127. {
  128. map->size = size;
  129. map->occupied = 0;
  130. map->max_probe_count = 0;
  131. map->entries = bzalloc(sizeof(profile_times_table_entry) * size);
  132. map->old_start_index = 0;
  133. map->old_occupied = 0;
  134. map->old_entries = NULL;
  135. }
  136. static void migrate_old_entries(profile_times_table *map, bool limit_items)
  137. {
  138. if (!map->old_entries)
  139. return;
  140. if (!map->old_occupied) {
  141. bfree(map->old_entries);
  142. map->old_entries = NULL;
  143. return;
  144. }
  145. for (size_t i = 0; !limit_items || i < 8; i++, map->old_start_index++) {
  146. if (!map->old_occupied)
  147. return;
  148. profile_times_table_entry *entry =
  149. &map->old_entries[map->old_start_index];
  150. if (!entry->probes)
  151. continue;
  152. add_hashmap_entry(map, entry->entry.time_delta,
  153. entry->entry.count);
  154. map->old_occupied -= 1;
  155. }
  156. }
  157. static void grow_hashmap(profile_times_table *map,
  158. uint64_t usec, uint64_t count)
  159. {
  160. migrate_old_entries(map, false);
  161. size_t old_size = map->size;
  162. size_t old_occupied = map->occupied;
  163. profile_times_table_entry *entries = map->entries;
  164. init_hashmap(map, (old_size * 2 < 16) ? 16 : (old_size * 2));
  165. map->old_occupied = old_occupied;
  166. map->old_entries = entries;
  167. add_hashmap_entry(map, usec, count);
  168. }
  169. static profile_entry *init_entry(profile_entry *entry, const char *name)
  170. {
  171. entry->name = name;
  172. init_hashmap(&entry->times, 1);
  173. #ifdef TRACK_OVERHEAD
  174. init_hashmap(&entry->overhead, 1);
  175. #endif
  176. entry->expected_time_between_calls = 0;
  177. init_hashmap(&entry->times_between_calls, 1);
  178. return entry;
  179. }
  180. static profile_entry *get_child(profile_entry *parent, const char *name)
  181. {
  182. const size_t num = parent->children.num;
  183. for (size_t i = 0; i < num; i++) {
  184. profile_entry *child = &parent->children.array[i];
  185. if (child->name == name)
  186. return child;
  187. }
  188. return init_entry(da_push_back_new(parent->children), name);
  189. }
  190. static void merge_call(profile_entry *entry, profile_call *call,
  191. profile_call *prev_call)
  192. {
  193. const size_t num = call->children.num;
  194. for (size_t i = 0; i < num; i++) {
  195. profile_call *child = &call->children.array[i];
  196. merge_call(get_child(entry, child->name), child, NULL);
  197. }
  198. if (entry->expected_time_between_calls != 0 && prev_call) {
  199. migrate_old_entries(&entry->times_between_calls, true);
  200. uint64_t usec = diff_ns_to_usec(prev_call->start_time,
  201. call->start_time);
  202. add_hashmap_entry(&entry->times_between_calls, usec, 1);
  203. }
  204. migrate_old_entries(&entry->times, true);
  205. uint64_t usec = diff_ns_to_usec(call->start_time, call->end_time);
  206. add_hashmap_entry(&entry->times, usec, 1);
  207. #ifdef TRACK_OVERHEAD
  208. migrate_old_entries(&entry->overhead, true);
  209. usec = diff_ns_to_usec(call->overhead_start, call->start_time);
  210. usec += diff_ns_to_usec(call->end_time, call->overhead_end);
  211. add_hashmap_entry(&entry->overhead, usec, 1);
  212. #endif
  213. }
  214. static bool enabled = false;
  215. static pthread_mutex_t root_mutex = PTHREAD_MUTEX_INITIALIZER;
  216. static DARRAY(profile_root_entry) root_entries;
  217. #ifdef _MSC_VER
  218. static __declspec(thread) profile_call *thread_context = NULL;
  219. static __declspec(thread) bool thread_enabled = true;
  220. #else
  221. static __thread profile_call *thread_context = NULL;
  222. static __thread bool thread_enabled = true;
  223. #endif
  224. void profiler_start(void)
  225. {
  226. pthread_mutex_lock(&root_mutex);
  227. enabled = true;
  228. pthread_mutex_unlock(&root_mutex);
  229. }
  230. void profiler_stop(void)
  231. {
  232. pthread_mutex_lock(&root_mutex);
  233. enabled = false;
  234. pthread_mutex_unlock(&root_mutex);
  235. }
  236. void profile_reenable_thread(void)
  237. {
  238. if (thread_enabled)
  239. return;
  240. pthread_mutex_lock(&root_mutex);
  241. thread_enabled = enabled;
  242. pthread_mutex_unlock(&root_mutex);
  243. }
  244. static bool lock_root(void)
  245. {
  246. pthread_mutex_lock(&root_mutex);
  247. if (!enabled) {
  248. pthread_mutex_unlock(&root_mutex);
  249. thread_enabled = false;
  250. return false;
  251. }
  252. return true;
  253. }
  254. static profile_root_entry *get_root_entry(const char *name)
  255. {
  256. profile_root_entry *r_entry = NULL;
  257. for (size_t i = 0; i < root_entries.num; i++) {
  258. if (root_entries.array[i].name == name) {
  259. r_entry = &root_entries.array[i];
  260. break;
  261. }
  262. }
  263. if (!r_entry) {
  264. r_entry = da_push_back_new(root_entries);
  265. r_entry->mutex = bmalloc(sizeof(pthread_mutex_t));
  266. pthread_mutex_init(r_entry->mutex, NULL);
  267. r_entry->name = name;
  268. r_entry->entry = bzalloc(sizeof(profile_entry));
  269. init_entry(r_entry->entry, name);
  270. }
  271. return r_entry;
  272. }
  273. void profile_register_root(const char *name,
  274. uint64_t expected_time_between_calls)
  275. {
  276. if (!lock_root())
  277. return;
  278. get_root_entry(name)->entry->expected_time_between_calls =
  279. (expected_time_between_calls + 500) / 1000;
  280. pthread_mutex_unlock(&root_mutex);
  281. }
  282. static void free_call_context(profile_call *context);
  283. static void merge_context(profile_call *context)
  284. {
  285. pthread_mutex_t *mutex = NULL;
  286. profile_entry *entry = NULL;
  287. profile_call *prev_call = NULL;
  288. if (!lock_root()) {
  289. free_call_context(context);
  290. return;
  291. }
  292. profile_root_entry *r_entry = get_root_entry(context->name);
  293. mutex = r_entry->mutex;
  294. entry = r_entry->entry;
  295. prev_call = r_entry->prev_call;
  296. r_entry->prev_call = context;
  297. pthread_mutex_lock(mutex);
  298. pthread_mutex_unlock(&root_mutex);
  299. merge_call(entry, context, prev_call);
  300. pthread_mutex_unlock(mutex);
  301. free_call_context(prev_call);
  302. }
  303. void profile_start(const char *name)
  304. {
  305. if (!thread_enabled)
  306. return;
  307. profile_call new_call = {
  308. .name = name,
  309. #ifdef TRACK_OVERHEAD
  310. .overhead_start = os_gettime_ns(),
  311. #endif
  312. .parent = thread_context,
  313. };
  314. profile_call *call = NULL;
  315. if (new_call.parent) {
  316. size_t idx = da_push_back(new_call.parent->children, &new_call);
  317. call = &new_call.parent->children.array[idx];
  318. } else {
  319. call = bmalloc(sizeof(profile_call));
  320. memcpy(call, &new_call, sizeof(profile_call));
  321. }
  322. thread_context = call;
  323. call->start_time = os_gettime_ns();
  324. }
  325. void profile_end(const char *name)
  326. {
  327. uint64_t end = os_gettime_ns();
  328. if (!thread_enabled)
  329. return;
  330. profile_call *call = thread_context;
  331. if (!call) {
  332. blog(LOG_ERROR, "Called profile end with no active profile");
  333. return;
  334. }
  335. if (!call->name)
  336. call->name = name;
  337. if (call->name != name) {
  338. blog(LOG_ERROR, "Called profile end with mismatching name: "
  339. "start(\"%s\"[%p]) <-> end(\"%s\"[%p])",
  340. call->name, call->name, name, name);
  341. profile_call *parent = call->parent;
  342. while (parent && parent->parent && parent->name != name)
  343. parent = parent->parent;
  344. if (!parent || parent->name != name)
  345. return;
  346. while (call->name != name) {
  347. profile_end(call->name);
  348. call = call->parent;
  349. }
  350. }
  351. thread_context = call->parent;
  352. call->end_time = end;
  353. #ifdef TRACK_OVERHEAD
  354. call->overhead_end = os_gettime_ns();
  355. #endif
  356. if (call->parent)
  357. return;
  358. merge_context(call);
  359. }
  360. static int profiler_time_entry_compare(const void *first, const void *second)
  361. {
  362. int64_t diff = ((profiler_time_entry*)second)->time_delta -
  363. ((profiler_time_entry*)first)->time_delta;
  364. return diff < 0 ? -1 : (diff > 0 ? 1 : 0);
  365. }
  366. static uint64_t copy_map_to_array(profile_times_table *map,
  367. profiler_time_entries_t *entry_buffer,
  368. uint64_t *min_, uint64_t *max_)
  369. {
  370. migrate_old_entries(map, false);
  371. da_reserve((*entry_buffer), map->occupied);
  372. da_resize((*entry_buffer), 0);
  373. uint64_t min__ = ~(uint64_t)0;
  374. uint64_t max__ = 0;
  375. uint64_t calls = 0;
  376. for (size_t i = 0; i < map->size; i++) {
  377. if (!map->entries[i].probes)
  378. continue;
  379. profiler_time_entry *entry = &map->entries[i].entry;
  380. da_push_back((*entry_buffer), entry);
  381. calls += entry->count;
  382. min__ = (min__ < entry->time_delta) ? min__ : entry->time_delta;
  383. max__ = (max__ > entry->time_delta) ? max__ : entry->time_delta;
  384. }
  385. if (min_)
  386. *min_ = min__;
  387. if (max_)
  388. *max_ = max__;
  389. return calls;
  390. }
  391. typedef void (*profile_entry_print_func)(profiler_snapshot_entry_t *entry,
  392. struct dstr *indent_buffer, struct dstr *output_buffer,
  393. unsigned indent, uint64_t active, uint64_t parent_calls);
  394. /* UTF-8 characters */
  395. #define VPIPE_RIGHT " \xe2\x94\xa3"
  396. #define VPIPE " \xe2\x94\x83"
  397. #define DOWN_RIGHT " \xe2\x94\x97"
  398. static void make_indent_string(struct dstr *indent_buffer, unsigned indent,
  399. uint64_t active)
  400. {
  401. indent_buffer->len = 0;
  402. if (!indent) {
  403. dstr_cat_ch(indent_buffer, 0);
  404. return;
  405. }
  406. for (size_t i = 0; i < indent; i++) {
  407. const char *fragment = "";
  408. bool last = i + 1 == indent;
  409. if (active & ((uint64_t)1 << i))
  410. fragment = last ? VPIPE_RIGHT : VPIPE;
  411. else
  412. fragment = last ? DOWN_RIGHT : " ";
  413. dstr_cat(indent_buffer, fragment);
  414. }
  415. }
  416. static void gather_stats(uint64_t expected_time_between_calls,
  417. profiler_time_entries_t *entries,
  418. uint64_t calls, uint64_t *percentile99, uint64_t *median,
  419. double *percent_within_bounds)
  420. {
  421. if (!entries->num) {
  422. *percentile99 = 0;
  423. *median = 0;
  424. *percent_within_bounds = 0.;
  425. return;
  426. }
  427. /*if (entry_buffer->num > 2)
  428. blog(LOG_INFO, "buffer-size %lu, overall count %llu\n"
  429. "map-size %lu, occupied %lu, probes %lu",
  430. entry_buffer->num, calls,
  431. map->size, map->occupied,
  432. map->max_probe_count);*/
  433. uint64_t accu = 0;
  434. for (size_t i = 0; i < entries->num; i++) {
  435. uint64_t old_accu = accu;
  436. accu += entries->array[i].count;
  437. if (old_accu < calls * 0.01 && accu >= calls * 0.01)
  438. *percentile99 = entries->array[i].time_delta;
  439. else if (old_accu < calls * 0.5 && accu >= calls * 0.5) {
  440. *median = entries->array[i].time_delta;
  441. break;
  442. }
  443. }
  444. *percent_within_bounds = 0.;
  445. if (!expected_time_between_calls)
  446. return;
  447. accu = 0;
  448. for (size_t i = 0; i < entries->num; i++) {
  449. profiler_time_entry *entry = &entries->array[i];
  450. if (entry->time_delta < expected_time_between_calls)
  451. break;
  452. accu += entry->count;
  453. }
  454. *percent_within_bounds = (1. - (double)accu / calls) * 100;
  455. }
  456. #define G_MS "g\xC2\xA0ms"
  457. static void profile_print_entry(profiler_snapshot_entry_t *entry,
  458. struct dstr *indent_buffer, struct dstr *output_buffer,
  459. unsigned indent, uint64_t active, uint64_t parent_calls)
  460. {
  461. uint64_t calls = entry->overall_count;
  462. uint64_t min_ = entry->min_time;
  463. uint64_t max_ = entry->max_time;
  464. uint64_t percentile99 = 0;
  465. uint64_t median = 0;
  466. double percent_within_bounds = 0.;
  467. gather_stats(entry->expected_time_between_calls,
  468. &entry->times, calls,
  469. &percentile99, &median, &percent_within_bounds);
  470. make_indent_string(indent_buffer, indent, active);
  471. if (min_ == max_) {
  472. dstr_printf(output_buffer, "%s%s: %"G_MS,
  473. indent_buffer->array, entry->name,
  474. min_ / 1000.);
  475. } else {
  476. dstr_printf(output_buffer, "%s%s: min=%"G_MS", median=%"G_MS", "
  477. "max=%"G_MS", 99th percentile=%"G_MS,
  478. indent_buffer->array, entry->name,
  479. min_ / 1000., median / 1000., max_ / 1000.,
  480. percentile99 / 1000.);
  481. if (entry->expected_time_between_calls) {
  482. double expected_ms =
  483. entry->expected_time_between_calls / 1000.;
  484. dstr_catf(output_buffer, ", %g%% below %"G_MS,
  485. percent_within_bounds, expected_ms);
  486. }
  487. }
  488. if (parent_calls && calls != parent_calls) {
  489. double calls_per_parent = (double)calls / parent_calls;
  490. if (lround(calls_per_parent * 10) != 10)
  491. dstr_catf(output_buffer, ", %g calls per parent call",
  492. calls_per_parent);
  493. }
  494. blog(LOG_INFO, "%s", output_buffer->array);
  495. active |= (uint64_t)1 << indent;
  496. for (size_t i = 0; i < entry->children.num; i++) {
  497. if ((i + 1) == entry->children.num)
  498. active &= (1 << indent) - 1;
  499. profile_print_entry(&entry->children.array[i],
  500. indent_buffer, output_buffer,
  501. indent + 1, active, calls);
  502. }
  503. }
  504. static void gather_stats_between(profiler_time_entries_t *entries,
  505. uint64_t calls, uint64_t lower_bound, uint64_t upper_bound,
  506. uint64_t min_, uint64_t max_, uint64_t *median,
  507. double *percent, double *lower, double *higher)
  508. {
  509. *median = 0;
  510. *percent = 0.;
  511. *lower = 0.;
  512. *higher = 0.;
  513. if (!entries->num)
  514. return;
  515. uint64_t accu = 0;
  516. for (size_t i = 0; i < entries->num; i++) {
  517. accu += entries->array[i].count;
  518. if (accu < calls * 0.5)
  519. continue;
  520. *median = entries->array[i].time_delta;
  521. break;
  522. }
  523. bool found_upper_bound = max_ <= upper_bound;
  524. bool found_lower_bound = false;
  525. if (min_ >= upper_bound) {
  526. *higher = 100.;
  527. return;
  528. }
  529. if (found_upper_bound && min_ >= lower_bound) {
  530. *percent = 100.;
  531. return;
  532. }
  533. accu = 0;
  534. for (size_t i = 0; i < entries->num; i++) {
  535. uint64_t delta = entries->array[i].time_delta;
  536. if (!found_upper_bound && delta <= upper_bound) {
  537. *higher = (double)accu / calls * 100;
  538. accu = 0;
  539. found_upper_bound = true;
  540. }
  541. if (!found_lower_bound && delta < lower_bound) {
  542. *percent = (double)accu / calls * 100;
  543. accu = 0;
  544. found_lower_bound = true;
  545. }
  546. accu += entries->array[i].count;
  547. }
  548. if (!found_upper_bound) {
  549. *higher = 100.;
  550. } else if (!found_lower_bound) {
  551. *percent = (double)accu / calls * 100;
  552. } else {
  553. *lower = (double)accu / calls * 100;
  554. }
  555. }
  556. static void profile_print_entry_expected(profiler_snapshot_entry_t *entry,
  557. struct dstr *indent_buffer, struct dstr *output_buffer,
  558. unsigned indent, uint64_t active, uint64_t parent_calls)
  559. {
  560. UNUSED_PARAMETER(parent_calls);
  561. if (!entry->expected_time_between_calls)
  562. return;
  563. uint64_t expected_time = entry->expected_time_between_calls;
  564. uint64_t min_ = entry->min_time_between_calls;
  565. uint64_t max_ = entry->max_time_between_calls;
  566. uint64_t median = 0;
  567. double percent = 0.;
  568. double lower = 0.;
  569. double higher = 0.;
  570. gather_stats_between(&entry->times_between_calls,
  571. entry->overall_between_calls_count,
  572. (uint64_t)(expected_time * 0.98),
  573. (uint64_t)(expected_time * 1.02 + 0.5),
  574. min_, max_,
  575. &median, &percent, &lower, &higher);
  576. make_indent_string(indent_buffer, indent, active);
  577. blog(LOG_INFO, "%s%s: min=%"G_MS", median=%"G_MS", max=%"G_MS", %g%% "
  578. "within ±2%% of %"G_MS" (%g%% lower, %g%% higher)",
  579. indent_buffer->array, entry->name,
  580. min_ / 1000., median / 1000., max_ / 1000., percent,
  581. expected_time / 1000.,
  582. lower, higher);
  583. active |= (uint64_t)1 << indent;
  584. for (size_t i = 0; i < entry->children.num; i++) {
  585. if ((i + 1) == entry->children.num)
  586. active &= (1 << indent) - 1;
  587. profile_print_entry_expected(&entry->children.array[i],
  588. indent_buffer, output_buffer,
  589. indent + 1, active, 0);
  590. }
  591. }
  592. void profile_print_func(const char *intro, profile_entry_print_func print,
  593. profiler_snapshot_t *snap)
  594. {
  595. struct dstr indent_buffer = {0};
  596. struct dstr output_buffer = {0};
  597. bool free_snapshot = !snap;
  598. if (!snap)
  599. snap = profile_snapshot_create();
  600. blog(LOG_INFO, "%s", intro);
  601. for (size_t i = 0; i < snap->roots.num; i++) {
  602. print(&snap->roots.array[i],
  603. &indent_buffer, &output_buffer, 0, 0, 0);
  604. }
  605. blog(LOG_INFO, "=================================================");
  606. if (free_snapshot)
  607. profile_snapshot_free(snap);
  608. dstr_free(&output_buffer);
  609. dstr_free(&indent_buffer);
  610. }
  611. void profiler_print(profiler_snapshot_t *snap)
  612. {
  613. profile_print_func("== Profiler Results =============================",
  614. profile_print_entry, snap);
  615. }
  616. void profiler_print_time_between_calls(profiler_snapshot_t *snap)
  617. {
  618. profile_print_func("== Profiler Time Between Calls ==================",
  619. profile_print_entry_expected, snap);
  620. }
  621. static void free_call_children(profile_call *call)
  622. {
  623. if (!call)
  624. return;
  625. const size_t num = call->children.num;
  626. for (size_t i = 0; i < num; i++)
  627. free_call_children(&call->children.array[i]);
  628. da_free(call->children);
  629. }
  630. static void free_call_context(profile_call *context)
  631. {
  632. free_call_children(context);
  633. bfree(context);
  634. }
  635. static void free_hashmap(profile_times_table *map)
  636. {
  637. map->size = 0;
  638. bfree(map->entries);
  639. map->entries = NULL;
  640. bfree(map->old_entries);
  641. map->old_entries = NULL;
  642. }
  643. static void free_profile_entry(profile_entry *entry)
  644. {
  645. for (size_t i = 0; i < entry->children.num; i++)
  646. free_profile_entry(&entry->children.array[i]);
  647. free_hashmap(&entry->times);
  648. #ifdef TRACK_OVERHEAD
  649. free_hashmap(&entry->overhead);
  650. #endif
  651. free_hashmap(&entry->times_between_calls);
  652. da_free(entry->children);
  653. }
  654. void profiler_free(void)
  655. {
  656. DARRAY(profile_root_entry) old_root_entries = {0};
  657. pthread_mutex_lock(&root_mutex);
  658. enabled = false;
  659. da_move(old_root_entries, root_entries);
  660. pthread_mutex_unlock(&root_mutex);
  661. for (size_t i = 0; i < old_root_entries.num; i++) {
  662. profile_root_entry *entry = &old_root_entries.array[i];
  663. pthread_mutex_lock(entry->mutex);
  664. pthread_mutex_unlock(entry->mutex);
  665. pthread_mutex_destroy(entry->mutex);
  666. bfree(entry->mutex);
  667. entry->mutex = NULL;
  668. free_call_context(entry->prev_call);
  669. free_profile_entry(entry->entry);
  670. bfree(entry->entry);
  671. }
  672. da_free(old_root_entries);
  673. }
  674. /* ------------------------------------------------------------------------- */
  675. /* Profiler name storage */
  676. struct profiler_name_store {
  677. pthread_mutex_t mutex;
  678. DARRAY(char*) names;
  679. };
  680. profiler_name_store_t *profiler_name_store_create(void)
  681. {
  682. profiler_name_store_t *store = bzalloc(sizeof(profiler_name_store_t));
  683. if (pthread_mutex_init(&store->mutex, NULL))
  684. goto error;
  685. return store;
  686. error:
  687. bfree(store);
  688. return NULL;
  689. }
  690. void profiler_name_store_free(profiler_name_store_t *store)
  691. {
  692. if (!store)
  693. return;
  694. for (size_t i = 0; i < store->names.num; i++)
  695. bfree(store->names.array[i]);
  696. da_free(store->names);
  697. bfree(store);
  698. }
  699. const char *profile_store_name(profiler_name_store_t *store,
  700. const char *format, ...)
  701. {
  702. va_list args;
  703. va_start(args, format);
  704. struct dstr str = {0};
  705. dstr_vprintf(&str, format, args);
  706. va_end(args);
  707. const char *result = NULL;
  708. pthread_mutex_lock(&store->mutex);
  709. size_t idx = da_push_back(store->names, &str.array);
  710. result = store->names.array[idx];
  711. pthread_mutex_unlock(&store->mutex);
  712. return result;
  713. }
  714. /* ------------------------------------------------------------------------- */
  715. /* Profiler data access */
  716. static void add_entry_to_snapshot(profile_entry *entry,
  717. profiler_snapshot_entry_t *s_entry)
  718. {
  719. s_entry->name = entry->name;
  720. s_entry->overall_count = copy_map_to_array(&entry->times,
  721. &s_entry->times,
  722. &s_entry->min_time, &s_entry->max_time);
  723. if ((s_entry->expected_time_between_calls =
  724. entry->expected_time_between_calls))
  725. s_entry->overall_between_calls_count =
  726. copy_map_to_array(&entry->times_between_calls,
  727. &s_entry->times_between_calls,
  728. &s_entry->min_time_between_calls,
  729. &s_entry->max_time_between_calls);
  730. da_reserve(s_entry->children, entry->children.num);
  731. for (size_t i = 0; i < entry->children.num; i++)
  732. add_entry_to_snapshot(&entry->children.array[i],
  733. da_push_back_new(s_entry->children));
  734. }
  735. static void sort_snapshot_entry(profiler_snapshot_entry_t *entry)
  736. {
  737. qsort(entry->times.array, entry->times.num,
  738. sizeof(profiler_time_entry),
  739. profiler_time_entry_compare);
  740. if (entry->expected_time_between_calls)
  741. qsort(entry->times_between_calls.array,
  742. entry->times_between_calls.num,
  743. sizeof(profiler_time_entry),
  744. profiler_time_entry_compare);
  745. for (size_t i = 0; i < entry->children.num; i++)
  746. sort_snapshot_entry(&entry->children.array[i]);
  747. }
  748. profiler_snapshot_t *profile_snapshot_create(void)
  749. {
  750. profiler_snapshot_t *snap = bzalloc(sizeof(profiler_snapshot_t));
  751. pthread_mutex_lock(&root_mutex);
  752. da_reserve(snap->roots, root_entries.num);
  753. for (size_t i = 0; i < root_entries.num; i++) {
  754. pthread_mutex_lock(root_entries.array[i].mutex);
  755. add_entry_to_snapshot(root_entries.array[i].entry,
  756. da_push_back_new(snap->roots));
  757. pthread_mutex_unlock(root_entries.array[i].mutex);
  758. }
  759. pthread_mutex_unlock(&root_mutex);
  760. for (size_t i = 0; i < snap->roots.num; i++)
  761. sort_snapshot_entry(&snap->roots.array[i]);
  762. return snap;
  763. }
  764. static void free_snapshot_entry(profiler_snapshot_entry_t *entry)
  765. {
  766. for (size_t i = 0; i < entry->children.num; i++)
  767. free_snapshot_entry(&entry->children.array[i]);
  768. da_free(entry->children);
  769. da_free(entry->times_between_calls);
  770. da_free(entry->times);
  771. }
  772. void profile_snapshot_free(profiler_snapshot_t *snap)
  773. {
  774. if (!snap)
  775. return;
  776. for (size_t i = 0; i < snap->roots.num; i++)
  777. free_snapshot_entry(&snap->roots.array[i]);
  778. da_free(snap->roots);
  779. bfree(snap);
  780. }
  781. typedef void (*dump_csv_func)(void *data, struct dstr *buffer);
  782. static void entry_dump_csv(struct dstr *buffer,
  783. const profiler_snapshot_entry_t *parent,
  784. const profiler_snapshot_entry_t *entry,
  785. dump_csv_func func, void *data)
  786. {
  787. const char *parent_name = parent ? parent->name : NULL;
  788. for (size_t i = 0; i < entry->times.num; i++) {
  789. dstr_printf(buffer, "%p,%p,%p,%p,%s,0,"
  790. "%"PRIu64",%"PRIu64"\n", entry,
  791. parent, entry->name, parent_name, entry->name,
  792. entry->times.array[i].time_delta,
  793. entry->times.array[i].count);
  794. func(data, buffer);
  795. }
  796. for (size_t i = 0; i < entry->times_between_calls.num; i++) {
  797. dstr_printf(buffer,"%p,%p,%p,%p,%s,"
  798. "%"PRIu64",%"PRIu64",%"PRIu64"\n", entry,
  799. parent, entry->name, parent_name, entry->name,
  800. entry->expected_time_between_calls,
  801. entry->times_between_calls.array[i].time_delta,
  802. entry->times_between_calls.array[i].count);
  803. func(data, buffer);
  804. }
  805. for (size_t i = 0; i < entry->children.num; i++)
  806. entry_dump_csv(buffer, entry, &entry->children.array[i],
  807. func, data);
  808. }
  809. static void profiler_snapshot_dump(const profiler_snapshot_t *snap,
  810. dump_csv_func func, void *data)
  811. {
  812. struct dstr buffer = {0};
  813. dstr_init_copy(&buffer, "id,parent_id,name_id,parent_name_id,name,"
  814. "time_between_calls,time_delta_µs,count\n");
  815. func(data, &buffer);
  816. for (size_t i = 0; i < snap->roots.num; i++)
  817. entry_dump_csv(&buffer, NULL,
  818. &snap->roots.array[i], func, data);
  819. dstr_free(&buffer);
  820. }
  821. static void dump_csv_fwrite(void *data, struct dstr *buffer)
  822. {
  823. fwrite(buffer->array, 1, buffer->len, data);
  824. }
  825. bool profiler_snapshot_dump_csv(const profiler_snapshot_t *snap,
  826. const char *filename)
  827. {
  828. FILE *f = os_fopen(filename, "wb+");
  829. if (!f)
  830. return false;
  831. profiler_snapshot_dump(snap, dump_csv_fwrite, f);
  832. fclose(f);
  833. return true;
  834. }
  835. static void dump_csv_gzwrite(void *data, struct dstr *buffer)
  836. {
  837. gzwrite(data, buffer->array, (unsigned)buffer->len);
  838. }
  839. bool profiler_snapshot_dump_csv_gz(const profiler_snapshot_t *snap,
  840. const char *filename)
  841. {
  842. gzFile gz;
  843. #ifdef _WIN32
  844. wchar_t *filename_w = NULL;
  845. os_utf8_to_wcs_ptr(filename, 0, &filename_w);
  846. if (!filename_w)
  847. return false;
  848. gz = gzopen_w(filename_w, "wb");
  849. bfree(filename_w);
  850. #else
  851. gz = gzopen(filename, "wb");
  852. #endif
  853. if (!gz)
  854. return false;
  855. profiler_snapshot_dump(snap, dump_csv_gzwrite, gz);
  856. gzclose_w(gz);
  857. return true;
  858. }
  859. size_t profiler_snapshot_num_roots(profiler_snapshot_t *snap)
  860. {
  861. return snap ? snap->roots.num : 0;
  862. }
  863. void profiler_snapshot_enumerate_roots(profiler_snapshot_t *snap,
  864. profiler_entry_enum_func func, void *context)
  865. {
  866. if (!snap)
  867. return;
  868. for (size_t i = 0; i < snap->roots.num; i++)
  869. if (!func(context, &snap->roots.array[i]))
  870. break;
  871. }
  872. void profiler_snapshot_filter_roots(profiler_snapshot_t *snap,
  873. profiler_name_filter_func func, void *data)
  874. {
  875. for (size_t i = 0; i < snap->roots.num;) {
  876. bool remove = false;
  877. bool res = func(data, snap->roots.array[i].name, &remove);
  878. if (remove) {
  879. free_snapshot_entry(&snap->roots.array[i]);
  880. da_erase(snap->roots, i);
  881. }
  882. if (!res)
  883. break;
  884. if (!remove)
  885. i += 1;
  886. }
  887. }
  888. size_t profiler_snapshot_num_children(profiler_snapshot_entry_t *entry)
  889. {
  890. return entry ? entry->children.num : 0;
  891. }
  892. void profiler_snapshot_enumerate_children(profiler_snapshot_entry_t *entry,
  893. profiler_entry_enum_func func, void *context)
  894. {
  895. if (!entry)
  896. return;
  897. for (size_t i = 0; i < entry->children.num; i++)
  898. if (!func(context, &entry->children.array[i]))
  899. break;
  900. }
  901. const char *profiler_snapshot_entry_name(profiler_snapshot_entry_t *entry)
  902. {
  903. return entry ? entry->name : NULL;
  904. }
  905. profiler_time_entries_t *profiler_snapshot_entry_times(
  906. profiler_snapshot_entry_t *entry)
  907. {
  908. return entry ? &entry->times : NULL;
  909. }
  910. uint64_t profiler_snapshot_entry_overall_count(
  911. profiler_snapshot_entry_t *entry)
  912. {
  913. return entry ? entry->overall_count : 0;
  914. }
  915. uint64_t profiler_snapshot_entry_min_time(profiler_snapshot_entry_t *entry)
  916. {
  917. return entry ? entry->min_time : 0;
  918. }
  919. uint64_t profiler_snapshot_entry_max_time(profiler_snapshot_entry_t *entry)
  920. {
  921. return entry ? entry->max_time : 0;
  922. }
  923. profiler_time_entries_t *profiler_snapshot_entry_times_between_calls(
  924. profiler_snapshot_entry_t *entry)
  925. {
  926. return entry ? &entry->times_between_calls : NULL;
  927. }
  928. uint64_t profiler_snapshot_entry_expected_time_between_calls(
  929. profiler_snapshot_entry_t *entry)
  930. {
  931. return entry ? entry->expected_time_between_calls : 0;
  932. }
  933. uint64_t profiler_snapshot_entry_min_time_between_calls(
  934. profiler_snapshot_entry_t *entry)
  935. {
  936. return entry ? entry->min_time_between_calls : 0;
  937. }
  938. uint64_t profiler_snapshot_entry_max_time_between_calls(
  939. profiler_snapshot_entry_t *entry)
  940. {
  941. return entry ? entry->max_time_between_calls : 0;
  942. }
  943. uint64_t profiler_snapshot_entry_overall_between_calls_count(
  944. profiler_snapshot_entry_t *entry)
  945. {
  946. return entry ? entry->overall_between_calls_count : 0;
  947. }