1
0

profiler.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182
  1. #include <inttypes.h>
  2. #include "profiler.h"
  3. #include "darray.h"
  4. #include "dstr.h"
  5. #include "platform.h"
  6. #include "threading.h"
  7. #include <math.h>
  8. #include <zlib.h>
  9. //#define TRACK_OVERHEAD
  10. struct profiler_snapshot {
  11. DARRAY(profiler_snapshot_entry_t) roots;
  12. };
  13. struct profiler_snapshot_entry {
  14. const char *name;
  15. profiler_time_entries_t times;
  16. uint64_t min_time;
  17. uint64_t max_time;
  18. uint64_t overall_count;
  19. profiler_time_entries_t times_between_calls;
  20. uint64_t expected_time_between_calls;
  21. uint64_t min_time_between_calls;
  22. uint64_t max_time_between_calls;
  23. uint64_t overall_between_calls_count;
  24. DARRAY(profiler_snapshot_entry_t) children;
  25. };
  26. typedef struct profiler_time_entry profiler_time_entry;
  27. typedef struct profile_call profile_call;
  28. struct profile_call {
  29. const char *name;
  30. #ifdef TRACK_OVERHEAD
  31. uint64_t overhead_start;
  32. #endif
  33. uint64_t start_time;
  34. uint64_t end_time;
  35. #ifdef TRACK_OVERHEAD
  36. uint64_t overhead_end;
  37. #endif
  38. uint64_t expected_time_between_calls;
  39. DARRAY(profile_call) children;
  40. profile_call *parent;
  41. };
  42. typedef struct profile_times_table_entry profile_times_table_entry;
  43. struct profile_times_table_entry {
  44. size_t probes;
  45. profiler_time_entry entry;
  46. };
  47. typedef struct profile_times_table profile_times_table;
  48. struct profile_times_table {
  49. size_t size;
  50. size_t occupied;
  51. size_t max_probe_count;
  52. profile_times_table_entry *entries;
  53. size_t old_start_index;
  54. size_t old_occupied;
  55. profile_times_table_entry *old_entries;
  56. };
  57. typedef struct profile_entry profile_entry;
  58. struct profile_entry {
  59. const char *name;
  60. profile_times_table times;
  61. #ifdef TRACK_OVERHEAD
  62. profile_times_table overhead;
  63. #endif
  64. uint64_t expected_time_between_calls;
  65. profile_times_table times_between_calls;
  66. DARRAY(profile_entry) children;
  67. };
  68. typedef struct profile_root_entry profile_root_entry;
  69. struct profile_root_entry {
  70. pthread_mutex_t *mutex;
  71. const char *name;
  72. profile_entry *entry;
  73. profile_call *prev_call;
  74. };
  75. static inline uint64_t diff_ns_to_usec(uint64_t prev, uint64_t next)
  76. {
  77. return (next - prev + 500) / 1000;
  78. }
  79. static inline void update_max_probes(profile_times_table *map, size_t val)
  80. {
  81. map->max_probe_count =
  82. map->max_probe_count < val ? val : map->max_probe_count;
  83. }
  84. static void migrate_old_entries(profile_times_table *map, bool limit_items);
  85. static void grow_hashmap(profile_times_table *map, uint64_t usec,
  86. uint64_t count);
  87. static void add_hashmap_entry(profile_times_table *map, uint64_t usec,
  88. uint64_t count)
  89. {
  90. size_t probes = 1;
  91. size_t start = usec % map->size;
  92. for (;; probes += 1) {
  93. size_t idx = (start + probes) % map->size;
  94. profile_times_table_entry *entry = &map->entries[idx];
  95. if (!entry->probes) {
  96. entry->probes = probes;
  97. entry->entry.time_delta = usec;
  98. entry->entry.count = count;
  99. map->occupied += 1;
  100. update_max_probes(map, probes);
  101. return;
  102. }
  103. if (entry->entry.time_delta == usec) {
  104. entry->entry.count += count;
  105. return;
  106. }
  107. if (entry->probes >= probes)
  108. continue;
  109. if (map->occupied / (double)map->size > 0.7) {
  110. grow_hashmap(map, usec, count);
  111. return;
  112. }
  113. size_t old_probes = entry->probes;
  114. uint64_t old_count = entry->entry.count;
  115. uint64_t old_usec = entry->entry.time_delta;
  116. entry->probes = probes;
  117. entry->entry.count = count;
  118. entry->entry.time_delta = usec;
  119. update_max_probes(map, probes);
  120. probes = old_probes;
  121. count = old_count;
  122. usec = old_usec;
  123. start = usec % map->size;
  124. }
  125. }
  126. static void init_hashmap(profile_times_table *map, size_t size)
  127. {
  128. map->size = size;
  129. map->occupied = 0;
  130. map->max_probe_count = 0;
  131. map->entries = bzalloc(sizeof(profile_times_table_entry) * size);
  132. map->old_start_index = 0;
  133. map->old_occupied = 0;
  134. map->old_entries = NULL;
  135. }
  136. static void migrate_old_entries(profile_times_table *map, bool limit_items)
  137. {
  138. if (!map->old_entries)
  139. return;
  140. if (!map->old_occupied) {
  141. bfree(map->old_entries);
  142. map->old_entries = NULL;
  143. return;
  144. }
  145. for (size_t i = 0; !limit_items || i < 8; i++, map->old_start_index++) {
  146. if (!map->old_occupied)
  147. return;
  148. profile_times_table_entry *entry =
  149. &map->old_entries[map->old_start_index];
  150. if (!entry->probes)
  151. continue;
  152. add_hashmap_entry(map, entry->entry.time_delta,
  153. entry->entry.count);
  154. map->old_occupied -= 1;
  155. }
  156. }
  157. static void grow_hashmap(profile_times_table *map, uint64_t usec,
  158. uint64_t count)
  159. {
  160. migrate_old_entries(map, false);
  161. size_t old_size = map->size;
  162. size_t old_occupied = map->occupied;
  163. profile_times_table_entry *entries = map->entries;
  164. init_hashmap(map, (old_size * 2 < 16) ? 16 : (old_size * 2));
  165. map->old_occupied = old_occupied;
  166. map->old_entries = entries;
  167. add_hashmap_entry(map, usec, count);
  168. }
  169. static profile_entry *init_entry(profile_entry *entry, const char *name)
  170. {
  171. entry->name = name;
  172. init_hashmap(&entry->times, 1);
  173. #ifdef TRACK_OVERHEAD
  174. init_hashmap(&entry->overhead, 1);
  175. #endif
  176. entry->expected_time_between_calls = 0;
  177. init_hashmap(&entry->times_between_calls, 1);
  178. return entry;
  179. }
  180. static profile_entry *get_child(profile_entry *parent, const char *name)
  181. {
  182. const size_t num = parent->children.num;
  183. for (size_t i = 0; i < num; i++) {
  184. profile_entry *child = &parent->children.array[i];
  185. if (child->name == name)
  186. return child;
  187. }
  188. return init_entry(da_push_back_new(parent->children), name);
  189. }
  190. static void merge_call(profile_entry *entry, profile_call *call,
  191. profile_call *prev_call)
  192. {
  193. const size_t num = call->children.num;
  194. for (size_t i = 0; i < num; i++) {
  195. profile_call *child = &call->children.array[i];
  196. merge_call(get_child(entry, child->name), child, NULL);
  197. }
  198. if (entry->expected_time_between_calls != 0 && prev_call) {
  199. migrate_old_entries(&entry->times_between_calls, true);
  200. uint64_t usec = diff_ns_to_usec(prev_call->start_time,
  201. call->start_time);
  202. add_hashmap_entry(&entry->times_between_calls, usec, 1);
  203. }
  204. migrate_old_entries(&entry->times, true);
  205. uint64_t usec = diff_ns_to_usec(call->start_time, call->end_time);
  206. add_hashmap_entry(&entry->times, usec, 1);
  207. #ifdef TRACK_OVERHEAD
  208. migrate_old_entries(&entry->overhead, true);
  209. usec = diff_ns_to_usec(call->overhead_start, call->start_time);
  210. usec += diff_ns_to_usec(call->end_time, call->overhead_end);
  211. add_hashmap_entry(&entry->overhead, usec, 1);
  212. #endif
  213. }
  214. static bool enabled = false;
  215. static pthread_mutex_t root_mutex = PTHREAD_MUTEX_INITIALIZER;
  216. static DARRAY(profile_root_entry) root_entries;
  217. static THREAD_LOCAL profile_call *thread_context = NULL;
  218. static THREAD_LOCAL bool thread_enabled = true;
  219. void profiler_start(void)
  220. {
  221. pthread_mutex_lock(&root_mutex);
  222. enabled = true;
  223. pthread_mutex_unlock(&root_mutex);
  224. }
  225. void profiler_stop(void)
  226. {
  227. pthread_mutex_lock(&root_mutex);
  228. enabled = false;
  229. pthread_mutex_unlock(&root_mutex);
  230. }
  231. void profile_reenable_thread(void)
  232. {
  233. if (thread_enabled)
  234. return;
  235. pthread_mutex_lock(&root_mutex);
  236. thread_enabled = enabled;
  237. pthread_mutex_unlock(&root_mutex);
  238. }
  239. static bool lock_root(void)
  240. {
  241. pthread_mutex_lock(&root_mutex);
  242. if (!enabled) {
  243. pthread_mutex_unlock(&root_mutex);
  244. thread_enabled = false;
  245. return false;
  246. }
  247. return true;
  248. }
  249. static profile_root_entry *get_root_entry(const char *name)
  250. {
  251. profile_root_entry *r_entry = NULL;
  252. for (size_t i = 0; i < root_entries.num; i++) {
  253. if (root_entries.array[i].name == name) {
  254. r_entry = &root_entries.array[i];
  255. break;
  256. }
  257. }
  258. if (!r_entry) {
  259. r_entry = da_push_back_new(root_entries);
  260. r_entry->mutex = bmalloc(sizeof(pthread_mutex_t));
  261. pthread_mutex_init(r_entry->mutex, NULL);
  262. r_entry->name = name;
  263. r_entry->entry = bzalloc(sizeof(profile_entry));
  264. init_entry(r_entry->entry, name);
  265. }
  266. return r_entry;
  267. }
  268. void profile_register_root(const char *name,
  269. uint64_t expected_time_between_calls)
  270. {
  271. if (!lock_root())
  272. return;
  273. get_root_entry(name)->entry->expected_time_between_calls =
  274. (expected_time_between_calls + 500) / 1000;
  275. pthread_mutex_unlock(&root_mutex);
  276. }
  277. static void free_call_context(profile_call *context);
  278. static void merge_context(profile_call *context)
  279. {
  280. pthread_mutex_t *mutex = NULL;
  281. profile_entry *entry = NULL;
  282. profile_call *prev_call = NULL;
  283. if (!lock_root()) {
  284. free_call_context(context);
  285. return;
  286. }
  287. profile_root_entry *r_entry = get_root_entry(context->name);
  288. mutex = r_entry->mutex;
  289. entry = r_entry->entry;
  290. prev_call = r_entry->prev_call;
  291. r_entry->prev_call = context;
  292. pthread_mutex_lock(mutex);
  293. pthread_mutex_unlock(&root_mutex);
  294. merge_call(entry, context, prev_call);
  295. pthread_mutex_unlock(mutex);
  296. free_call_context(prev_call);
  297. }
  298. void profile_start(const char *name)
  299. {
  300. if (!thread_enabled)
  301. return;
  302. profile_call new_call = {
  303. .name = name,
  304. #ifdef TRACK_OVERHEAD
  305. .overhead_start = os_gettime_ns(),
  306. #endif
  307. .parent = thread_context,
  308. };
  309. profile_call *call = NULL;
  310. if (new_call.parent) {
  311. size_t idx = da_push_back(new_call.parent->children, &new_call);
  312. call = &new_call.parent->children.array[idx];
  313. } else {
  314. call = bmalloc(sizeof(profile_call));
  315. memcpy(call, &new_call, sizeof(profile_call));
  316. }
  317. thread_context = call;
  318. call->start_time = os_gettime_ns();
  319. }
  320. void profile_end(const char *name)
  321. {
  322. uint64_t end = os_gettime_ns();
  323. if (!thread_enabled)
  324. return;
  325. profile_call *call = thread_context;
  326. if (!call) {
  327. blog(LOG_ERROR, "Called profile end with no active profile");
  328. return;
  329. }
  330. if (!call->name)
  331. call->name = name;
  332. if (call->name != name) {
  333. blog(LOG_ERROR,
  334. "Called profile end with mismatching name: "
  335. "start(\"%s\"[%p]) <-> end(\"%s\"[%p])",
  336. call->name, call->name, name, name);
  337. profile_call *parent = call->parent;
  338. while (parent && parent->parent && parent->name != name)
  339. parent = parent->parent;
  340. if (!parent || parent->name != name)
  341. return;
  342. while (call->name != name) {
  343. profile_end(call->name);
  344. call = call->parent;
  345. }
  346. }
  347. thread_context = call->parent;
  348. call->end_time = end;
  349. #ifdef TRACK_OVERHEAD
  350. call->overhead_end = os_gettime_ns();
  351. #endif
  352. if (call->parent)
  353. return;
  354. merge_context(call);
  355. }
  356. static int profiler_time_entry_compare(const void *first, const void *second)
  357. {
  358. int64_t diff = ((profiler_time_entry *)second)->time_delta -
  359. ((profiler_time_entry *)first)->time_delta;
  360. return diff < 0 ? -1 : (diff > 0 ? 1 : 0);
  361. }
  362. static uint64_t copy_map_to_array(profile_times_table *map,
  363. profiler_time_entries_t *entry_buffer,
  364. uint64_t *min_, uint64_t *max_)
  365. {
  366. migrate_old_entries(map, false);
  367. da_reserve(*entry_buffer, map->occupied);
  368. da_resize(*entry_buffer, 0);
  369. uint64_t min__ = ~(uint64_t)0;
  370. uint64_t max__ = 0;
  371. uint64_t calls = 0;
  372. for (size_t i = 0; i < map->size; i++) {
  373. if (!map->entries[i].probes)
  374. continue;
  375. profiler_time_entry *entry = &map->entries[i].entry;
  376. da_push_back(*entry_buffer, entry);
  377. calls += entry->count;
  378. min__ = (min__ < entry->time_delta) ? min__ : entry->time_delta;
  379. max__ = (max__ > entry->time_delta) ? max__ : entry->time_delta;
  380. }
  381. if (min_)
  382. *min_ = min__;
  383. if (max_)
  384. *max_ = max__;
  385. return calls;
  386. }
  387. typedef void (*profile_entry_print_func)(profiler_snapshot_entry_t *entry,
  388. struct dstr *indent_buffer,
  389. struct dstr *output_buffer,
  390. unsigned indent, uint64_t active,
  391. uint64_t parent_calls);
  392. /* UTF-8 characters */
  393. #define VPIPE_RIGHT " \xe2\x94\xa3"
  394. #define VPIPE " \xe2\x94\x83"
  395. #define DOWN_RIGHT " \xe2\x94\x97"
  396. static void make_indent_string(struct dstr *indent_buffer, unsigned indent,
  397. uint64_t active)
  398. {
  399. indent_buffer->len = 0;
  400. if (!indent) {
  401. dstr_cat_ch(indent_buffer, 0);
  402. return;
  403. }
  404. for (size_t i = 0; i < indent; i++) {
  405. const char *fragment = "";
  406. bool last = i + 1 == indent;
  407. if (active & ((uint64_t)1 << i))
  408. fragment = last ? VPIPE_RIGHT : VPIPE;
  409. else
  410. fragment = last ? DOWN_RIGHT : " ";
  411. dstr_cat(indent_buffer, fragment);
  412. }
  413. }
  414. static void gather_stats(uint64_t expected_time_between_calls,
  415. profiler_time_entries_t *entries, uint64_t calls,
  416. uint64_t *percentile99, uint64_t *median,
  417. double *percent_within_bounds)
  418. {
  419. if (!entries->num) {
  420. *percentile99 = 0;
  421. *median = 0;
  422. *percent_within_bounds = 0.;
  423. return;
  424. }
  425. /*if (entry_buffer->num > 2)
  426. blog(LOG_INFO, "buffer-size %lu, overall count %llu\n"
  427. "map-size %lu, occupied %lu, probes %lu",
  428. entry_buffer->num, calls,
  429. map->size, map->occupied,
  430. map->max_probe_count);*/
  431. uint64_t accu = 0;
  432. for (size_t i = 0; i < entries->num; i++) {
  433. uint64_t old_accu = accu;
  434. accu += entries->array[i].count;
  435. if (old_accu < calls * 0.01 && accu >= calls * 0.01)
  436. *percentile99 = entries->array[i].time_delta;
  437. else if (old_accu < calls * 0.5 && accu >= calls * 0.5) {
  438. *median = entries->array[i].time_delta;
  439. break;
  440. }
  441. }
  442. *percent_within_bounds = 0.;
  443. if (!expected_time_between_calls)
  444. return;
  445. accu = 0;
  446. for (size_t i = 0; i < entries->num; i++) {
  447. profiler_time_entry *entry = &entries->array[i];
  448. if (entry->time_delta < expected_time_between_calls)
  449. break;
  450. accu += entry->count;
  451. }
  452. *percent_within_bounds = (1. - (double)accu / calls) * 100;
  453. }
  454. #define G_MS "g\xC2\xA0ms"
  455. static void profile_print_entry(profiler_snapshot_entry_t *entry,
  456. struct dstr *indent_buffer,
  457. struct dstr *output_buffer, unsigned indent,
  458. uint64_t active, uint64_t parent_calls)
  459. {
  460. uint64_t calls = entry->overall_count;
  461. uint64_t min_ = entry->min_time;
  462. uint64_t max_ = entry->max_time;
  463. uint64_t percentile99 = 0;
  464. uint64_t median = 0;
  465. double percent_within_bounds = 0.;
  466. gather_stats(entry->expected_time_between_calls, &entry->times, calls,
  467. &percentile99, &median, &percent_within_bounds);
  468. make_indent_string(indent_buffer, indent, active);
  469. if (min_ == max_) {
  470. dstr_printf(output_buffer, "%s%s: %" G_MS, indent_buffer->array,
  471. entry->name, min_ / 1000.);
  472. } else {
  473. dstr_printf(output_buffer,
  474. "%s%s: min=%" G_MS ", median=%" G_MS ", "
  475. "max=%" G_MS ", 99th percentile=%" G_MS,
  476. indent_buffer->array, entry->name, min_ / 1000.,
  477. median / 1000., max_ / 1000., percentile99 / 1000.);
  478. if (entry->expected_time_between_calls) {
  479. double expected_ms =
  480. entry->expected_time_between_calls / 1000.;
  481. dstr_catf(output_buffer, ", %g%% below %" G_MS,
  482. percent_within_bounds, expected_ms);
  483. }
  484. }
  485. if (parent_calls && calls != parent_calls) {
  486. double calls_per_parent = (double)calls / parent_calls;
  487. if (lround(calls_per_parent * 10) != 10)
  488. dstr_catf(output_buffer, ", %g calls per parent call",
  489. calls_per_parent);
  490. }
  491. blog(LOG_INFO, "%s", output_buffer->array);
  492. active |= (uint64_t)1 << indent;
  493. for (size_t i = 0; i < entry->children.num; i++) {
  494. if ((i + 1) == entry->children.num)
  495. active &= (1 << indent) - 1;
  496. profile_print_entry(&entry->children.array[i], indent_buffer,
  497. output_buffer, indent + 1, active, calls);
  498. }
  499. }
  500. static void gather_stats_between(profiler_time_entries_t *entries,
  501. uint64_t calls, uint64_t lower_bound,
  502. uint64_t upper_bound, uint64_t min_,
  503. uint64_t max_, uint64_t *median,
  504. double *percent, double *lower, double *higher)
  505. {
  506. *median = 0;
  507. *percent = 0.;
  508. *lower = 0.;
  509. *higher = 0.;
  510. if (!entries->num)
  511. return;
  512. uint64_t accu = 0;
  513. for (size_t i = 0; i < entries->num; i++) {
  514. accu += entries->array[i].count;
  515. if (accu < calls * 0.5)
  516. continue;
  517. *median = entries->array[i].time_delta;
  518. break;
  519. }
  520. bool found_upper_bound = max_ <= upper_bound;
  521. bool found_lower_bound = false;
  522. if (min_ >= upper_bound) {
  523. *higher = 100.;
  524. return;
  525. }
  526. if (found_upper_bound && min_ >= lower_bound) {
  527. *percent = 100.;
  528. return;
  529. }
  530. accu = 0;
  531. for (size_t i = 0; i < entries->num; i++) {
  532. uint64_t delta = entries->array[i].time_delta;
  533. if (!found_upper_bound && delta <= upper_bound) {
  534. *higher = (double)accu / calls * 100;
  535. accu = 0;
  536. found_upper_bound = true;
  537. }
  538. if (!found_lower_bound && delta < lower_bound) {
  539. *percent = (double)accu / calls * 100;
  540. accu = 0;
  541. found_lower_bound = true;
  542. }
  543. accu += entries->array[i].count;
  544. }
  545. if (!found_upper_bound) {
  546. *higher = 100.;
  547. } else if (!found_lower_bound) {
  548. *percent = (double)accu / calls * 100;
  549. } else {
  550. *lower = (double)accu / calls * 100;
  551. }
  552. }
  553. static void profile_print_entry_expected(profiler_snapshot_entry_t *entry,
  554. struct dstr *indent_buffer,
  555. struct dstr *output_buffer,
  556. unsigned indent, uint64_t active,
  557. uint64_t parent_calls)
  558. {
  559. UNUSED_PARAMETER(parent_calls);
  560. if (!entry->expected_time_between_calls)
  561. return;
  562. uint64_t expected_time = entry->expected_time_between_calls;
  563. uint64_t min_ = entry->min_time_between_calls;
  564. uint64_t max_ = entry->max_time_between_calls;
  565. uint64_t median = 0;
  566. double percent = 0.;
  567. double lower = 0.;
  568. double higher = 0.;
  569. gather_stats_between(&entry->times_between_calls,
  570. entry->overall_between_calls_count,
  571. (uint64_t)(expected_time * 0.98),
  572. (uint64_t)(expected_time * 1.02 + 0.5), min_, max_,
  573. &median, &percent, &lower, &higher);
  574. make_indent_string(indent_buffer, indent, active);
  575. blog(LOG_INFO,
  576. "%s%s: min=%" G_MS ", median=%" G_MS ", max=%" G_MS ", %g%% "
  577. "within ±2%% of %" G_MS " (%g%% lower, %g%% higher)",
  578. indent_buffer->array, entry->name, min_ / 1000., median / 1000.,
  579. max_ / 1000., percent, expected_time / 1000., lower, higher);
  580. active |= (uint64_t)1 << indent;
  581. for (size_t i = 0; i < entry->children.num; i++) {
  582. if ((i + 1) == entry->children.num)
  583. active &= (1 << indent) - 1;
  584. profile_print_entry_expected(&entry->children.array[i],
  585. indent_buffer, output_buffer,
  586. indent + 1, active, 0);
  587. }
  588. }
  589. void profile_print_func(const char *intro, profile_entry_print_func print,
  590. profiler_snapshot_t *snap)
  591. {
  592. struct dstr indent_buffer = {0};
  593. struct dstr output_buffer = {0};
  594. bool free_snapshot = !snap;
  595. if (!snap)
  596. snap = profile_snapshot_create();
  597. blog(LOG_INFO, "%s", intro);
  598. for (size_t i = 0; i < snap->roots.num; i++) {
  599. print(&snap->roots.array[i], &indent_buffer, &output_buffer, 0,
  600. 0, 0);
  601. }
  602. blog(LOG_INFO, "=================================================");
  603. if (free_snapshot)
  604. profile_snapshot_free(snap);
  605. dstr_free(&output_buffer);
  606. dstr_free(&indent_buffer);
  607. }
  608. void profiler_print(profiler_snapshot_t *snap)
  609. {
  610. profile_print_func("== Profiler Results =============================",
  611. profile_print_entry, snap);
  612. }
  613. void profiler_print_time_between_calls(profiler_snapshot_t *snap)
  614. {
  615. profile_print_func("== Profiler Time Between Calls ==================",
  616. profile_print_entry_expected, snap);
  617. }
  618. static void free_call_children(profile_call *call)
  619. {
  620. if (!call)
  621. return;
  622. const size_t num = call->children.num;
  623. for (size_t i = 0; i < num; i++)
  624. free_call_children(&call->children.array[i]);
  625. da_free(call->children);
  626. }
  627. static void free_call_context(profile_call *context)
  628. {
  629. free_call_children(context);
  630. bfree(context);
  631. }
  632. static void free_hashmap(profile_times_table *map)
  633. {
  634. map->size = 0;
  635. bfree(map->entries);
  636. map->entries = NULL;
  637. bfree(map->old_entries);
  638. map->old_entries = NULL;
  639. }
  640. static void free_profile_entry(profile_entry *entry)
  641. {
  642. for (size_t i = 0; i < entry->children.num; i++)
  643. free_profile_entry(&entry->children.array[i]);
  644. free_hashmap(&entry->times);
  645. #ifdef TRACK_OVERHEAD
  646. free_hashmap(&entry->overhead);
  647. #endif
  648. free_hashmap(&entry->times_between_calls);
  649. da_free(entry->children);
  650. }
  651. void profiler_free(void)
  652. {
  653. DARRAY(profile_root_entry) old_root_entries = {0};
  654. pthread_mutex_lock(&root_mutex);
  655. enabled = false;
  656. da_move(old_root_entries, root_entries);
  657. pthread_mutex_unlock(&root_mutex);
  658. for (size_t i = 0; i < old_root_entries.num; i++) {
  659. profile_root_entry *entry = &old_root_entries.array[i];
  660. pthread_mutex_lock(entry->mutex);
  661. pthread_mutex_unlock(entry->mutex);
  662. pthread_mutex_destroy(entry->mutex);
  663. bfree(entry->mutex);
  664. entry->mutex = NULL;
  665. free_call_context(entry->prev_call);
  666. free_profile_entry(entry->entry);
  667. bfree(entry->entry);
  668. }
  669. da_free(old_root_entries);
  670. pthread_mutex_destroy(&root_mutex);
  671. }
  672. /* ------------------------------------------------------------------------- */
  673. /* Profiler name storage */
  674. struct profiler_name_store {
  675. pthread_mutex_t mutex;
  676. DARRAY(char *) names;
  677. };
  678. profiler_name_store_t *profiler_name_store_create(void)
  679. {
  680. profiler_name_store_t *store = bzalloc(sizeof(profiler_name_store_t));
  681. if (pthread_mutex_init(&store->mutex, NULL))
  682. goto error;
  683. return store;
  684. error:
  685. bfree(store);
  686. return NULL;
  687. }
  688. void profiler_name_store_free(profiler_name_store_t *store)
  689. {
  690. if (!store)
  691. return;
  692. for (size_t i = 0; i < store->names.num; i++)
  693. bfree(store->names.array[i]);
  694. da_free(store->names);
  695. pthread_mutex_destroy(&store->mutex);
  696. bfree(store);
  697. }
  698. const char *profile_store_name(profiler_name_store_t *store, const char *format,
  699. ...)
  700. {
  701. va_list args;
  702. va_start(args, format);
  703. struct dstr str = {0};
  704. dstr_vprintf(&str, format, args);
  705. va_end(args);
  706. const char *result = NULL;
  707. pthread_mutex_lock(&store->mutex);
  708. size_t idx = da_push_back(store->names, &str.array);
  709. result = store->names.array[idx];
  710. pthread_mutex_unlock(&store->mutex);
  711. return result;
  712. }
  713. /* ------------------------------------------------------------------------- */
  714. /* Profiler data access */
  715. static void add_entry_to_snapshot(profile_entry *entry,
  716. profiler_snapshot_entry_t *s_entry)
  717. {
  718. s_entry->name = entry->name;
  719. s_entry->overall_count =
  720. copy_map_to_array(&entry->times, &s_entry->times,
  721. &s_entry->min_time, &s_entry->max_time);
  722. if ((s_entry->expected_time_between_calls =
  723. entry->expected_time_between_calls))
  724. s_entry->overall_between_calls_count =
  725. copy_map_to_array(&entry->times_between_calls,
  726. &s_entry->times_between_calls,
  727. &s_entry->min_time_between_calls,
  728. &s_entry->max_time_between_calls);
  729. da_reserve(s_entry->children, entry->children.num);
  730. for (size_t i = 0; i < entry->children.num; i++)
  731. add_entry_to_snapshot(&entry->children.array[i],
  732. da_push_back_new(s_entry->children));
  733. }
  734. static void sort_snapshot_entry(profiler_snapshot_entry_t *entry)
  735. {
  736. qsort(entry->times.array, entry->times.num, sizeof(profiler_time_entry),
  737. profiler_time_entry_compare);
  738. if (entry->expected_time_between_calls)
  739. qsort(entry->times_between_calls.array,
  740. entry->times_between_calls.num,
  741. sizeof(profiler_time_entry), profiler_time_entry_compare);
  742. for (size_t i = 0; i < entry->children.num; i++)
  743. sort_snapshot_entry(&entry->children.array[i]);
  744. }
  745. profiler_snapshot_t *profile_snapshot_create(void)
  746. {
  747. profiler_snapshot_t *snap = bzalloc(sizeof(profiler_snapshot_t));
  748. pthread_mutex_lock(&root_mutex);
  749. da_reserve(snap->roots, root_entries.num);
  750. for (size_t i = 0; i < root_entries.num; i++) {
  751. pthread_mutex_lock(root_entries.array[i].mutex);
  752. add_entry_to_snapshot(root_entries.array[i].entry,
  753. da_push_back_new(snap->roots));
  754. pthread_mutex_unlock(root_entries.array[i].mutex);
  755. }
  756. pthread_mutex_unlock(&root_mutex);
  757. for (size_t i = 0; i < snap->roots.num; i++)
  758. sort_snapshot_entry(&snap->roots.array[i]);
  759. return snap;
  760. }
  761. static void free_snapshot_entry(profiler_snapshot_entry_t *entry)
  762. {
  763. for (size_t i = 0; i < entry->children.num; i++)
  764. free_snapshot_entry(&entry->children.array[i]);
  765. da_free(entry->children);
  766. da_free(entry->times_between_calls);
  767. da_free(entry->times);
  768. }
  769. void profile_snapshot_free(profiler_snapshot_t *snap)
  770. {
  771. if (!snap)
  772. return;
  773. for (size_t i = 0; i < snap->roots.num; i++)
  774. free_snapshot_entry(&snap->roots.array[i]);
  775. da_free(snap->roots);
  776. bfree(snap);
  777. }
  778. typedef void (*dump_csv_func)(void *data, struct dstr *buffer);
  779. static void entry_dump_csv(struct dstr *buffer,
  780. const profiler_snapshot_entry_t *parent,
  781. const profiler_snapshot_entry_t *entry,
  782. dump_csv_func func, void *data)
  783. {
  784. const char *parent_name = parent ? parent->name : NULL;
  785. for (size_t i = 0; i < entry->times.num; i++) {
  786. dstr_printf(buffer,
  787. "%p,%p,%p,%p,%s,0,"
  788. "%" PRIu64 ",%" PRIu64 "\n",
  789. entry, parent, entry->name, parent_name,
  790. entry->name, entry->times.array[i].time_delta,
  791. entry->times.array[i].count);
  792. func(data, buffer);
  793. }
  794. for (size_t i = 0; i < entry->times_between_calls.num; i++) {
  795. dstr_printf(buffer,
  796. "%p,%p,%p,%p,%s,"
  797. "%" PRIu64 ",%" PRIu64 ",%" PRIu64 "\n",
  798. entry, parent, entry->name, parent_name,
  799. entry->name, entry->expected_time_between_calls,
  800. entry->times_between_calls.array[i].time_delta,
  801. entry->times_between_calls.array[i].count);
  802. func(data, buffer);
  803. }
  804. for (size_t i = 0; i < entry->children.num; i++)
  805. entry_dump_csv(buffer, entry, &entry->children.array[i], func,
  806. data);
  807. }
  808. static void profiler_snapshot_dump(const profiler_snapshot_t *snap,
  809. dump_csv_func func, void *data)
  810. {
  811. struct dstr buffer = {0};
  812. dstr_init_copy(&buffer, "id,parent_id,name_id,parent_name_id,name,"
  813. "time_between_calls,time_delta_µs,count\n");
  814. func(data, &buffer);
  815. for (size_t i = 0; i < snap->roots.num; i++)
  816. entry_dump_csv(&buffer, NULL, &snap->roots.array[i], func,
  817. data);
  818. dstr_free(&buffer);
  819. }
  820. static void dump_csv_fwrite(void *data, struct dstr *buffer)
  821. {
  822. fwrite(buffer->array, 1, buffer->len, data);
  823. }
  824. bool profiler_snapshot_dump_csv(const profiler_snapshot_t *snap,
  825. const char *filename)
  826. {
  827. FILE *f = os_fopen(filename, "wb+");
  828. if (!f)
  829. return false;
  830. profiler_snapshot_dump(snap, dump_csv_fwrite, f);
  831. fclose(f);
  832. return true;
  833. }
  834. static void dump_csv_gzwrite(void *data, struct dstr *buffer)
  835. {
  836. gzwrite(data, buffer->array, (unsigned)buffer->len);
  837. }
  838. bool profiler_snapshot_dump_csv_gz(const profiler_snapshot_t *snap,
  839. const char *filename)
  840. {
  841. gzFile gz;
  842. #ifdef _WIN32
  843. wchar_t *filename_w = NULL;
  844. os_utf8_to_wcs_ptr(filename, 0, &filename_w);
  845. if (!filename_w)
  846. return false;
  847. gz = gzopen_w(filename_w, "wb");
  848. bfree(filename_w);
  849. #else
  850. gz = gzopen(filename, "wb");
  851. #endif
  852. if (!gz)
  853. return false;
  854. profiler_snapshot_dump(snap, dump_csv_gzwrite, gz);
  855. #ifdef _WIN32
  856. gzclose_w(gz);
  857. #else
  858. gzclose(gz);
  859. #endif
  860. return true;
  861. }
  862. size_t profiler_snapshot_num_roots(profiler_snapshot_t *snap)
  863. {
  864. return snap ? snap->roots.num : 0;
  865. }
  866. void profiler_snapshot_enumerate_roots(profiler_snapshot_t *snap,
  867. profiler_entry_enum_func func,
  868. void *context)
  869. {
  870. if (!snap)
  871. return;
  872. for (size_t i = 0; i < snap->roots.num; i++)
  873. if (!func(context, &snap->roots.array[i]))
  874. break;
  875. }
  876. void profiler_snapshot_filter_roots(profiler_snapshot_t *snap,
  877. profiler_name_filter_func func, void *data)
  878. {
  879. for (size_t i = 0; i < snap->roots.num;) {
  880. bool remove = false;
  881. bool res = func(data, snap->roots.array[i].name, &remove);
  882. if (remove) {
  883. free_snapshot_entry(&snap->roots.array[i]);
  884. da_erase(snap->roots, i);
  885. }
  886. if (!res)
  887. break;
  888. if (!remove)
  889. i += 1;
  890. }
  891. }
  892. size_t profiler_snapshot_num_children(profiler_snapshot_entry_t *entry)
  893. {
  894. return entry ? entry->children.num : 0;
  895. }
  896. void profiler_snapshot_enumerate_children(profiler_snapshot_entry_t *entry,
  897. profiler_entry_enum_func func,
  898. void *context)
  899. {
  900. if (!entry)
  901. return;
  902. for (size_t i = 0; i < entry->children.num; i++)
  903. if (!func(context, &entry->children.array[i]))
  904. break;
  905. }
  906. const char *profiler_snapshot_entry_name(profiler_snapshot_entry_t *entry)
  907. {
  908. return entry ? entry->name : NULL;
  909. }
  910. profiler_time_entries_t *
  911. profiler_snapshot_entry_times(profiler_snapshot_entry_t *entry)
  912. {
  913. return entry ? &entry->times : NULL;
  914. }
  915. uint64_t profiler_snapshot_entry_overall_count(profiler_snapshot_entry_t *entry)
  916. {
  917. return entry ? entry->overall_count : 0;
  918. }
  919. uint64_t profiler_snapshot_entry_min_time(profiler_snapshot_entry_t *entry)
  920. {
  921. return entry ? entry->min_time : 0;
  922. }
  923. uint64_t profiler_snapshot_entry_max_time(profiler_snapshot_entry_t *entry)
  924. {
  925. return entry ? entry->max_time : 0;
  926. }
  927. profiler_time_entries_t *
  928. profiler_snapshot_entry_times_between_calls(profiler_snapshot_entry_t *entry)
  929. {
  930. return entry ? &entry->times_between_calls : NULL;
  931. }
  932. uint64_t profiler_snapshot_entry_expected_time_between_calls(
  933. profiler_snapshot_entry_t *entry)
  934. {
  935. return entry ? entry->expected_time_between_calls : 0;
  936. }
  937. uint64_t
  938. profiler_snapshot_entry_min_time_between_calls(profiler_snapshot_entry_t *entry)
  939. {
  940. return entry ? entry->min_time_between_calls : 0;
  941. }
  942. uint64_t
  943. profiler_snapshot_entry_max_time_between_calls(profiler_snapshot_entry_t *entry)
  944. {
  945. return entry ? entry->max_time_between_calls : 0;
  946. }
  947. uint64_t profiler_snapshot_entry_overall_between_calls_count(
  948. profiler_snapshot_entry_t *entry)
  949. {
  950. return entry ? entry->overall_between_calls_count : 0;
  951. }