profiler.c 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172
  1. #include <inttypes.h>
  2. #include "profiler.h"
  3. #include "darray.h"
  4. #include "dstr.h"
  5. #include "platform.h"
  6. #include "threading.h"
  7. #include <math.h>
  8. #include <zlib.h>
  9. //#define TRACK_OVERHEAD
  10. struct profiler_snapshot {
  11. DARRAY(profiler_snapshot_entry_t) roots;
  12. };
  13. struct profiler_snapshot_entry {
  14. const char *name;
  15. profiler_time_entries_t times;
  16. uint64_t min_time;
  17. uint64_t max_time;
  18. uint64_t overall_count;
  19. profiler_time_entries_t times_between_calls;
  20. uint64_t expected_time_between_calls;
  21. uint64_t min_time_between_calls;
  22. uint64_t max_time_between_calls;
  23. uint64_t overall_between_calls_count;
  24. DARRAY(profiler_snapshot_entry_t) children;
  25. };
  26. typedef struct profiler_time_entry profiler_time_entry;
  27. typedef struct profile_call profile_call;
  28. struct profile_call {
  29. const char *name;
  30. #ifdef TRACK_OVERHEAD
  31. uint64_t overhead_start;
  32. #endif
  33. uint64_t start_time;
  34. uint64_t end_time;
  35. #ifdef TRACK_OVERHEAD
  36. uint64_t overhead_end;
  37. #endif
  38. uint64_t expected_time_between_calls;
  39. DARRAY(profile_call) children;
  40. profile_call *parent;
  41. };
  42. typedef struct profile_times_table_entry profile_times_table_entry;
  43. struct profile_times_table_entry {
  44. size_t probes;
  45. profiler_time_entry entry;
  46. };
  47. typedef struct profile_times_table profile_times_table;
  48. struct profile_times_table {
  49. size_t size;
  50. size_t occupied;
  51. size_t max_probe_count;
  52. profile_times_table_entry *entries;
  53. size_t old_start_index;
  54. size_t old_occupied;
  55. profile_times_table_entry *old_entries;
  56. };
  57. typedef struct profile_entry profile_entry;
  58. struct profile_entry {
  59. const char *name;
  60. profile_times_table times;
  61. #ifdef TRACK_OVERHEAD
  62. profile_times_table overhead;
  63. #endif
  64. uint64_t expected_time_between_calls;
  65. profile_times_table times_between_calls;
  66. DARRAY(profile_entry) children;
  67. };
  68. typedef struct profile_root_entry profile_root_entry;
  69. struct profile_root_entry {
  70. pthread_mutex_t *mutex;
  71. const char *name;
  72. profile_entry *entry;
  73. profile_call *prev_call;
  74. };
  75. static inline uint64_t diff_ns_to_usec(uint64_t prev, uint64_t next)
  76. {
  77. return (next - prev + 500) / 1000;
  78. }
  79. static inline void update_max_probes(profile_times_table *map, size_t val)
  80. {
  81. map->max_probe_count = map->max_probe_count < val ?
  82. val : map->max_probe_count;
  83. }
  84. static void migrate_old_entries(profile_times_table *map, bool limit_items);
  85. static void grow_hashmap(profile_times_table *map,
  86. uint64_t usec, uint64_t count);
  87. static void add_hashmap_entry(profile_times_table *map, uint64_t usec,
  88. uint64_t count)
  89. {
  90. size_t probes = 1;
  91. size_t start = usec % map->size;
  92. for (;; probes += 1) {
  93. size_t idx = (start + probes) % map->size;
  94. profile_times_table_entry *entry = &map->entries[idx];
  95. if (!entry->probes) {
  96. entry->probes = probes;
  97. entry->entry.time_delta = usec;
  98. entry->entry.count = count;
  99. map->occupied += 1;
  100. update_max_probes(map, probes);
  101. return;
  102. }
  103. if (entry->entry.time_delta == usec) {
  104. entry->entry.count += count;
  105. return;
  106. }
  107. if (entry->probes >= probes)
  108. continue;
  109. if (map->occupied/(double)map->size > 0.7) {
  110. grow_hashmap(map, usec, count);
  111. return;
  112. }
  113. size_t old_probes = entry->probes;
  114. uint64_t old_count = entry->entry.count;
  115. uint64_t old_usec = entry->entry.time_delta;
  116. entry->probes = probes;
  117. entry->entry.count = count;
  118. entry->entry.time_delta = usec;
  119. update_max_probes(map, probes);
  120. probes = old_probes;
  121. count = old_count;
  122. usec = old_usec;
  123. start = usec % map->size;
  124. }
  125. }
  126. static void init_hashmap(profile_times_table *map, size_t size)
  127. {
  128. map->size = size;
  129. map->occupied = 0;
  130. map->max_probe_count = 0;
  131. map->entries = bzalloc(sizeof(profile_times_table_entry) * size);
  132. map->old_start_index = 0;
  133. map->old_occupied = 0;
  134. map->old_entries = NULL;
  135. }
  136. static void migrate_old_entries(profile_times_table *map, bool limit_items)
  137. {
  138. if (!map->old_entries)
  139. return;
  140. if (!map->old_occupied) {
  141. bfree(map->old_entries);
  142. map->old_entries = NULL;
  143. return;
  144. }
  145. for (size_t i = 0; !limit_items || i < 8; i++, map->old_start_index++) {
  146. if (!map->old_occupied)
  147. return;
  148. profile_times_table_entry *entry =
  149. &map->old_entries[map->old_start_index];
  150. if (!entry->probes)
  151. continue;
  152. add_hashmap_entry(map, entry->entry.time_delta,
  153. entry->entry.count);
  154. map->old_occupied -= 1;
  155. }
  156. }
  157. static void grow_hashmap(profile_times_table *map,
  158. uint64_t usec, uint64_t count)
  159. {
  160. migrate_old_entries(map, false);
  161. size_t old_size = map->size;
  162. size_t old_occupied = map->occupied;
  163. profile_times_table_entry *entries = map->entries;
  164. init_hashmap(map, (old_size * 2 < 16) ? 16 : (old_size * 2));
  165. map->old_occupied = old_occupied;
  166. map->old_entries = entries;
  167. add_hashmap_entry(map, usec, count);
  168. }
  169. static profile_entry *init_entry(profile_entry *entry, const char *name)
  170. {
  171. entry->name = name;
  172. init_hashmap(&entry->times, 1);
  173. #ifdef TRACK_OVERHEAD
  174. init_hashmap(&entry->overhead, 1);
  175. #endif
  176. entry->expected_time_between_calls = 0;
  177. init_hashmap(&entry->times_between_calls, 1);
  178. return entry;
  179. }
  180. static profile_entry *get_child(profile_entry *parent, const char *name)
  181. {
  182. const size_t num = parent->children.num;
  183. for (size_t i = 0; i < num; i++) {
  184. profile_entry *child = &parent->children.array[i];
  185. if (child->name == name)
  186. return child;
  187. }
  188. return init_entry(da_push_back_new(parent->children), name);
  189. }
  190. static void merge_call(profile_entry *entry, profile_call *call,
  191. profile_call *prev_call)
  192. {
  193. const size_t num = call->children.num;
  194. for (size_t i = 0; i < num; i++) {
  195. profile_call *child = &call->children.array[i];
  196. merge_call(get_child(entry, child->name), child, NULL);
  197. }
  198. if (entry->expected_time_between_calls != 0 && prev_call) {
  199. migrate_old_entries(&entry->times_between_calls, true);
  200. uint64_t usec = diff_ns_to_usec(prev_call->start_time,
  201. call->start_time);
  202. add_hashmap_entry(&entry->times_between_calls, usec, 1);
  203. }
  204. migrate_old_entries(&entry->times, true);
  205. uint64_t usec = diff_ns_to_usec(call->start_time, call->end_time);
  206. add_hashmap_entry(&entry->times, usec, 1);
  207. #ifdef TRACK_OVERHEAD
  208. migrate_old_entries(&entry->overhead, true);
  209. usec = diff_ns_to_usec(call->overhead_start, call->start_time);
  210. usec += diff_ns_to_usec(call->end_time, call->overhead_end);
  211. add_hashmap_entry(&entry->overhead, usec, 1);
  212. #endif
  213. }
  214. static bool enabled = false;
  215. static pthread_mutex_t root_mutex = PTHREAD_MUTEX_INITIALIZER;
  216. static DARRAY(profile_root_entry) root_entries;
  217. static THREAD_LOCAL profile_call *thread_context = NULL;
  218. static THREAD_LOCAL bool thread_enabled = true;
  219. void profiler_start(void)
  220. {
  221. pthread_mutex_lock(&root_mutex);
  222. enabled = true;
  223. pthread_mutex_unlock(&root_mutex);
  224. }
  225. void profiler_stop(void)
  226. {
  227. pthread_mutex_lock(&root_mutex);
  228. enabled = false;
  229. pthread_mutex_unlock(&root_mutex);
  230. }
  231. void profile_reenable_thread(void)
  232. {
  233. if (thread_enabled)
  234. return;
  235. pthread_mutex_lock(&root_mutex);
  236. thread_enabled = enabled;
  237. pthread_mutex_unlock(&root_mutex);
  238. }
  239. static bool lock_root(void)
  240. {
  241. pthread_mutex_lock(&root_mutex);
  242. if (!enabled) {
  243. pthread_mutex_unlock(&root_mutex);
  244. thread_enabled = false;
  245. return false;
  246. }
  247. return true;
  248. }
  249. static profile_root_entry *get_root_entry(const char *name)
  250. {
  251. profile_root_entry *r_entry = NULL;
  252. for (size_t i = 0; i < root_entries.num; i++) {
  253. if (root_entries.array[i].name == name) {
  254. r_entry = &root_entries.array[i];
  255. break;
  256. }
  257. }
  258. if (!r_entry) {
  259. r_entry = da_push_back_new(root_entries);
  260. r_entry->mutex = bmalloc(sizeof(pthread_mutex_t));
  261. pthread_mutex_init(r_entry->mutex, NULL);
  262. r_entry->name = name;
  263. r_entry->entry = bzalloc(sizeof(profile_entry));
  264. init_entry(r_entry->entry, name);
  265. }
  266. return r_entry;
  267. }
  268. void profile_register_root(const char *name,
  269. uint64_t expected_time_between_calls)
  270. {
  271. if (!lock_root())
  272. return;
  273. get_root_entry(name)->entry->expected_time_between_calls =
  274. (expected_time_between_calls + 500) / 1000;
  275. pthread_mutex_unlock(&root_mutex);
  276. }
  277. static void free_call_context(profile_call *context);
  278. static void merge_context(profile_call *context)
  279. {
  280. pthread_mutex_t *mutex = NULL;
  281. profile_entry *entry = NULL;
  282. profile_call *prev_call = NULL;
  283. if (!lock_root()) {
  284. free_call_context(context);
  285. return;
  286. }
  287. profile_root_entry *r_entry = get_root_entry(context->name);
  288. mutex = r_entry->mutex;
  289. entry = r_entry->entry;
  290. prev_call = r_entry->prev_call;
  291. r_entry->prev_call = context;
  292. pthread_mutex_lock(mutex);
  293. pthread_mutex_unlock(&root_mutex);
  294. merge_call(entry, context, prev_call);
  295. pthread_mutex_unlock(mutex);
  296. free_call_context(prev_call);
  297. }
  298. void profile_start(const char *name)
  299. {
  300. if (!thread_enabled)
  301. return;
  302. profile_call new_call = {
  303. .name = name,
  304. #ifdef TRACK_OVERHEAD
  305. .overhead_start = os_gettime_ns(),
  306. #endif
  307. .parent = thread_context,
  308. };
  309. profile_call *call = NULL;
  310. if (new_call.parent) {
  311. size_t idx = da_push_back(new_call.parent->children, &new_call);
  312. call = &new_call.parent->children.array[idx];
  313. } else {
  314. call = bmalloc(sizeof(profile_call));
  315. memcpy(call, &new_call, sizeof(profile_call));
  316. }
  317. thread_context = call;
  318. call->start_time = os_gettime_ns();
  319. }
  320. void profile_end(const char *name)
  321. {
  322. uint64_t end = os_gettime_ns();
  323. if (!thread_enabled)
  324. return;
  325. profile_call *call = thread_context;
  326. if (!call) {
  327. blog(LOG_ERROR, "Called profile end with no active profile");
  328. return;
  329. }
  330. if (!call->name)
  331. call->name = name;
  332. if (call->name != name) {
  333. blog(LOG_ERROR, "Called profile end with mismatching name: "
  334. "start(\"%s\"[%p]) <-> end(\"%s\"[%p])",
  335. call->name, call->name, name, name);
  336. profile_call *parent = call->parent;
  337. while (parent && parent->parent && parent->name != name)
  338. parent = parent->parent;
  339. if (!parent || parent->name != name)
  340. return;
  341. while (call->name != name) {
  342. profile_end(call->name);
  343. call = call->parent;
  344. }
  345. }
  346. thread_context = call->parent;
  347. call->end_time = end;
  348. #ifdef TRACK_OVERHEAD
  349. call->overhead_end = os_gettime_ns();
  350. #endif
  351. if (call->parent)
  352. return;
  353. merge_context(call);
  354. }
  355. static int profiler_time_entry_compare(const void *first, const void *second)
  356. {
  357. int64_t diff = ((profiler_time_entry*)second)->time_delta -
  358. ((profiler_time_entry*)first)->time_delta;
  359. return diff < 0 ? -1 : (diff > 0 ? 1 : 0);
  360. }
  361. static uint64_t copy_map_to_array(profile_times_table *map,
  362. profiler_time_entries_t *entry_buffer,
  363. uint64_t *min_, uint64_t *max_)
  364. {
  365. migrate_old_entries(map, false);
  366. da_reserve((*entry_buffer), map->occupied);
  367. da_resize((*entry_buffer), 0);
  368. uint64_t min__ = ~(uint64_t)0;
  369. uint64_t max__ = 0;
  370. uint64_t calls = 0;
  371. for (size_t i = 0; i < map->size; i++) {
  372. if (!map->entries[i].probes)
  373. continue;
  374. profiler_time_entry *entry = &map->entries[i].entry;
  375. da_push_back((*entry_buffer), entry);
  376. calls += entry->count;
  377. min__ = (min__ < entry->time_delta) ? min__ : entry->time_delta;
  378. max__ = (max__ > entry->time_delta) ? max__ : entry->time_delta;
  379. }
  380. if (min_)
  381. *min_ = min__;
  382. if (max_)
  383. *max_ = max__;
  384. return calls;
  385. }
  386. typedef void (*profile_entry_print_func)(profiler_snapshot_entry_t *entry,
  387. struct dstr *indent_buffer, struct dstr *output_buffer,
  388. unsigned indent, uint64_t active, uint64_t parent_calls);
  389. /* UTF-8 characters */
  390. #define VPIPE_RIGHT " \xe2\x94\xa3"
  391. #define VPIPE " \xe2\x94\x83"
  392. #define DOWN_RIGHT " \xe2\x94\x97"
  393. static void make_indent_string(struct dstr *indent_buffer, unsigned indent,
  394. uint64_t active)
  395. {
  396. indent_buffer->len = 0;
  397. if (!indent) {
  398. dstr_cat_ch(indent_buffer, 0);
  399. return;
  400. }
  401. for (size_t i = 0; i < indent; i++) {
  402. const char *fragment = "";
  403. bool last = i + 1 == indent;
  404. if (active & ((uint64_t)1 << i))
  405. fragment = last ? VPIPE_RIGHT : VPIPE;
  406. else
  407. fragment = last ? DOWN_RIGHT : " ";
  408. dstr_cat(indent_buffer, fragment);
  409. }
  410. }
  411. static void gather_stats(uint64_t expected_time_between_calls,
  412. profiler_time_entries_t *entries,
  413. uint64_t calls, uint64_t *percentile99, uint64_t *median,
  414. double *percent_within_bounds)
  415. {
  416. if (!entries->num) {
  417. *percentile99 = 0;
  418. *median = 0;
  419. *percent_within_bounds = 0.;
  420. return;
  421. }
  422. /*if (entry_buffer->num > 2)
  423. blog(LOG_INFO, "buffer-size %lu, overall count %llu\n"
  424. "map-size %lu, occupied %lu, probes %lu",
  425. entry_buffer->num, calls,
  426. map->size, map->occupied,
  427. map->max_probe_count);*/
  428. uint64_t accu = 0;
  429. for (size_t i = 0; i < entries->num; i++) {
  430. uint64_t old_accu = accu;
  431. accu += entries->array[i].count;
  432. if (old_accu < calls * 0.01 && accu >= calls * 0.01)
  433. *percentile99 = entries->array[i].time_delta;
  434. else if (old_accu < calls * 0.5 && accu >= calls * 0.5) {
  435. *median = entries->array[i].time_delta;
  436. break;
  437. }
  438. }
  439. *percent_within_bounds = 0.;
  440. if (!expected_time_between_calls)
  441. return;
  442. accu = 0;
  443. for (size_t i = 0; i < entries->num; i++) {
  444. profiler_time_entry *entry = &entries->array[i];
  445. if (entry->time_delta < expected_time_between_calls)
  446. break;
  447. accu += entry->count;
  448. }
  449. *percent_within_bounds = (1. - (double)accu / calls) * 100;
  450. }
  451. #define G_MS "g\xC2\xA0ms"
  452. static void profile_print_entry(profiler_snapshot_entry_t *entry,
  453. struct dstr *indent_buffer, struct dstr *output_buffer,
  454. unsigned indent, uint64_t active, uint64_t parent_calls)
  455. {
  456. uint64_t calls = entry->overall_count;
  457. uint64_t min_ = entry->min_time;
  458. uint64_t max_ = entry->max_time;
  459. uint64_t percentile99 = 0;
  460. uint64_t median = 0;
  461. double percent_within_bounds = 0.;
  462. gather_stats(entry->expected_time_between_calls,
  463. &entry->times, calls,
  464. &percentile99, &median, &percent_within_bounds);
  465. make_indent_string(indent_buffer, indent, active);
  466. if (min_ == max_) {
  467. dstr_printf(output_buffer, "%s%s: %"G_MS,
  468. indent_buffer->array, entry->name,
  469. min_ / 1000.);
  470. } else {
  471. dstr_printf(output_buffer, "%s%s: min=%"G_MS", median=%"G_MS", "
  472. "max=%"G_MS", 99th percentile=%"G_MS,
  473. indent_buffer->array, entry->name,
  474. min_ / 1000., median / 1000., max_ / 1000.,
  475. percentile99 / 1000.);
  476. if (entry->expected_time_between_calls) {
  477. double expected_ms =
  478. entry->expected_time_between_calls / 1000.;
  479. dstr_catf(output_buffer, ", %g%% below %"G_MS,
  480. percent_within_bounds, expected_ms);
  481. }
  482. }
  483. if (parent_calls && calls != parent_calls) {
  484. double calls_per_parent = (double)calls / parent_calls;
  485. if (lround(calls_per_parent * 10) != 10)
  486. dstr_catf(output_buffer, ", %g calls per parent call",
  487. calls_per_parent);
  488. }
  489. blog(LOG_INFO, "%s", output_buffer->array);
  490. active |= (uint64_t)1 << indent;
  491. for (size_t i = 0; i < entry->children.num; i++) {
  492. if ((i + 1) == entry->children.num)
  493. active &= (1 << indent) - 1;
  494. profile_print_entry(&entry->children.array[i],
  495. indent_buffer, output_buffer,
  496. indent + 1, active, calls);
  497. }
  498. }
  499. static void gather_stats_between(profiler_time_entries_t *entries,
  500. uint64_t calls, uint64_t lower_bound, uint64_t upper_bound,
  501. uint64_t min_, uint64_t max_, uint64_t *median,
  502. double *percent, double *lower, double *higher)
  503. {
  504. *median = 0;
  505. *percent = 0.;
  506. *lower = 0.;
  507. *higher = 0.;
  508. if (!entries->num)
  509. return;
  510. uint64_t accu = 0;
  511. for (size_t i = 0; i < entries->num; i++) {
  512. accu += entries->array[i].count;
  513. if (accu < calls * 0.5)
  514. continue;
  515. *median = entries->array[i].time_delta;
  516. break;
  517. }
  518. bool found_upper_bound = max_ <= upper_bound;
  519. bool found_lower_bound = false;
  520. if (min_ >= upper_bound) {
  521. *higher = 100.;
  522. return;
  523. }
  524. if (found_upper_bound && min_ >= lower_bound) {
  525. *percent = 100.;
  526. return;
  527. }
  528. accu = 0;
  529. for (size_t i = 0; i < entries->num; i++) {
  530. uint64_t delta = entries->array[i].time_delta;
  531. if (!found_upper_bound && delta <= upper_bound) {
  532. *higher = (double)accu / calls * 100;
  533. accu = 0;
  534. found_upper_bound = true;
  535. }
  536. if (!found_lower_bound && delta < lower_bound) {
  537. *percent = (double)accu / calls * 100;
  538. accu = 0;
  539. found_lower_bound = true;
  540. }
  541. accu += entries->array[i].count;
  542. }
  543. if (!found_upper_bound) {
  544. *higher = 100.;
  545. } else if (!found_lower_bound) {
  546. *percent = (double)accu / calls * 100;
  547. } else {
  548. *lower = (double)accu / calls * 100;
  549. }
  550. }
  551. static void profile_print_entry_expected(profiler_snapshot_entry_t *entry,
  552. struct dstr *indent_buffer, struct dstr *output_buffer,
  553. unsigned indent, uint64_t active, uint64_t parent_calls)
  554. {
  555. UNUSED_PARAMETER(parent_calls);
  556. if (!entry->expected_time_between_calls)
  557. return;
  558. uint64_t expected_time = entry->expected_time_between_calls;
  559. uint64_t min_ = entry->min_time_between_calls;
  560. uint64_t max_ = entry->max_time_between_calls;
  561. uint64_t median = 0;
  562. double percent = 0.;
  563. double lower = 0.;
  564. double higher = 0.;
  565. gather_stats_between(&entry->times_between_calls,
  566. entry->overall_between_calls_count,
  567. (uint64_t)(expected_time * 0.98),
  568. (uint64_t)(expected_time * 1.02 + 0.5),
  569. min_, max_,
  570. &median, &percent, &lower, &higher);
  571. make_indent_string(indent_buffer, indent, active);
  572. blog(LOG_INFO, "%s%s: min=%"G_MS", median=%"G_MS", max=%"G_MS", %g%% "
  573. "within ±2%% of %"G_MS" (%g%% lower, %g%% higher)",
  574. indent_buffer->array, entry->name,
  575. min_ / 1000., median / 1000., max_ / 1000., percent,
  576. expected_time / 1000.,
  577. lower, higher);
  578. active |= (uint64_t)1 << indent;
  579. for (size_t i = 0; i < entry->children.num; i++) {
  580. if ((i + 1) == entry->children.num)
  581. active &= (1 << indent) - 1;
  582. profile_print_entry_expected(&entry->children.array[i],
  583. indent_buffer, output_buffer,
  584. indent + 1, active, 0);
  585. }
  586. }
  587. void profile_print_func(const char *intro, profile_entry_print_func print,
  588. profiler_snapshot_t *snap)
  589. {
  590. struct dstr indent_buffer = {0};
  591. struct dstr output_buffer = {0};
  592. bool free_snapshot = !snap;
  593. if (!snap)
  594. snap = profile_snapshot_create();
  595. blog(LOG_INFO, "%s", intro);
  596. for (size_t i = 0; i < snap->roots.num; i++) {
  597. print(&snap->roots.array[i],
  598. &indent_buffer, &output_buffer, 0, 0, 0);
  599. }
  600. blog(LOG_INFO, "=================================================");
  601. if (free_snapshot)
  602. profile_snapshot_free(snap);
  603. dstr_free(&output_buffer);
  604. dstr_free(&indent_buffer);
  605. }
  606. void profiler_print(profiler_snapshot_t *snap)
  607. {
  608. profile_print_func("== Profiler Results =============================",
  609. profile_print_entry, snap);
  610. }
  611. void profiler_print_time_between_calls(profiler_snapshot_t *snap)
  612. {
  613. profile_print_func("== Profiler Time Between Calls ==================",
  614. profile_print_entry_expected, snap);
  615. }
  616. static void free_call_children(profile_call *call)
  617. {
  618. if (!call)
  619. return;
  620. const size_t num = call->children.num;
  621. for (size_t i = 0; i < num; i++)
  622. free_call_children(&call->children.array[i]);
  623. da_free(call->children);
  624. }
  625. static void free_call_context(profile_call *context)
  626. {
  627. free_call_children(context);
  628. bfree(context);
  629. }
  630. static void free_hashmap(profile_times_table *map)
  631. {
  632. map->size = 0;
  633. bfree(map->entries);
  634. map->entries = NULL;
  635. bfree(map->old_entries);
  636. map->old_entries = NULL;
  637. }
  638. static void free_profile_entry(profile_entry *entry)
  639. {
  640. for (size_t i = 0; i < entry->children.num; i++)
  641. free_profile_entry(&entry->children.array[i]);
  642. free_hashmap(&entry->times);
  643. #ifdef TRACK_OVERHEAD
  644. free_hashmap(&entry->overhead);
  645. #endif
  646. free_hashmap(&entry->times_between_calls);
  647. da_free(entry->children);
  648. }
  649. void profiler_free(void)
  650. {
  651. DARRAY(profile_root_entry) old_root_entries = {0};
  652. pthread_mutex_lock(&root_mutex);
  653. enabled = false;
  654. da_move(old_root_entries, root_entries);
  655. pthread_mutex_unlock(&root_mutex);
  656. for (size_t i = 0; i < old_root_entries.num; i++) {
  657. profile_root_entry *entry = &old_root_entries.array[i];
  658. pthread_mutex_lock(entry->mutex);
  659. pthread_mutex_unlock(entry->mutex);
  660. pthread_mutex_destroy(entry->mutex);
  661. bfree(entry->mutex);
  662. entry->mutex = NULL;
  663. free_call_context(entry->prev_call);
  664. free_profile_entry(entry->entry);
  665. bfree(entry->entry);
  666. }
  667. da_free(old_root_entries);
  668. }
  669. /* ------------------------------------------------------------------------- */
  670. /* Profiler name storage */
  671. struct profiler_name_store {
  672. pthread_mutex_t mutex;
  673. DARRAY(char*) names;
  674. };
  675. profiler_name_store_t *profiler_name_store_create(void)
  676. {
  677. profiler_name_store_t *store = bzalloc(sizeof(profiler_name_store_t));
  678. if (pthread_mutex_init(&store->mutex, NULL))
  679. goto error;
  680. return store;
  681. error:
  682. bfree(store);
  683. return NULL;
  684. }
  685. void profiler_name_store_free(profiler_name_store_t *store)
  686. {
  687. if (!store)
  688. return;
  689. for (size_t i = 0; i < store->names.num; i++)
  690. bfree(store->names.array[i]);
  691. da_free(store->names);
  692. bfree(store);
  693. }
  694. const char *profile_store_name(profiler_name_store_t *store,
  695. const char *format, ...)
  696. {
  697. va_list args;
  698. va_start(args, format);
  699. struct dstr str = {0};
  700. dstr_vprintf(&str, format, args);
  701. va_end(args);
  702. const char *result = NULL;
  703. pthread_mutex_lock(&store->mutex);
  704. size_t idx = da_push_back(store->names, &str.array);
  705. result = store->names.array[idx];
  706. pthread_mutex_unlock(&store->mutex);
  707. return result;
  708. }
  709. /* ------------------------------------------------------------------------- */
  710. /* Profiler data access */
  711. static void add_entry_to_snapshot(profile_entry *entry,
  712. profiler_snapshot_entry_t *s_entry)
  713. {
  714. s_entry->name = entry->name;
  715. s_entry->overall_count = copy_map_to_array(&entry->times,
  716. &s_entry->times,
  717. &s_entry->min_time, &s_entry->max_time);
  718. if ((s_entry->expected_time_between_calls =
  719. entry->expected_time_between_calls))
  720. s_entry->overall_between_calls_count =
  721. copy_map_to_array(&entry->times_between_calls,
  722. &s_entry->times_between_calls,
  723. &s_entry->min_time_between_calls,
  724. &s_entry->max_time_between_calls);
  725. da_reserve(s_entry->children, entry->children.num);
  726. for (size_t i = 0; i < entry->children.num; i++)
  727. add_entry_to_snapshot(&entry->children.array[i],
  728. da_push_back_new(s_entry->children));
  729. }
  730. static void sort_snapshot_entry(profiler_snapshot_entry_t *entry)
  731. {
  732. qsort(entry->times.array, entry->times.num,
  733. sizeof(profiler_time_entry),
  734. profiler_time_entry_compare);
  735. if (entry->expected_time_between_calls)
  736. qsort(entry->times_between_calls.array,
  737. entry->times_between_calls.num,
  738. sizeof(profiler_time_entry),
  739. profiler_time_entry_compare);
  740. for (size_t i = 0; i < entry->children.num; i++)
  741. sort_snapshot_entry(&entry->children.array[i]);
  742. }
  743. profiler_snapshot_t *profile_snapshot_create(void)
  744. {
  745. profiler_snapshot_t *snap = bzalloc(sizeof(profiler_snapshot_t));
  746. pthread_mutex_lock(&root_mutex);
  747. da_reserve(snap->roots, root_entries.num);
  748. for (size_t i = 0; i < root_entries.num; i++) {
  749. pthread_mutex_lock(root_entries.array[i].mutex);
  750. add_entry_to_snapshot(root_entries.array[i].entry,
  751. da_push_back_new(snap->roots));
  752. pthread_mutex_unlock(root_entries.array[i].mutex);
  753. }
  754. pthread_mutex_unlock(&root_mutex);
  755. for (size_t i = 0; i < snap->roots.num; i++)
  756. sort_snapshot_entry(&snap->roots.array[i]);
  757. return snap;
  758. }
  759. static void free_snapshot_entry(profiler_snapshot_entry_t *entry)
  760. {
  761. for (size_t i = 0; i < entry->children.num; i++)
  762. free_snapshot_entry(&entry->children.array[i]);
  763. da_free(entry->children);
  764. da_free(entry->times_between_calls);
  765. da_free(entry->times);
  766. }
  767. void profile_snapshot_free(profiler_snapshot_t *snap)
  768. {
  769. if (!snap)
  770. return;
  771. for (size_t i = 0; i < snap->roots.num; i++)
  772. free_snapshot_entry(&snap->roots.array[i]);
  773. da_free(snap->roots);
  774. bfree(snap);
  775. }
  776. typedef void (*dump_csv_func)(void *data, struct dstr *buffer);
  777. static void entry_dump_csv(struct dstr *buffer,
  778. const profiler_snapshot_entry_t *parent,
  779. const profiler_snapshot_entry_t *entry,
  780. dump_csv_func func, void *data)
  781. {
  782. const char *parent_name = parent ? parent->name : NULL;
  783. for (size_t i = 0; i < entry->times.num; i++) {
  784. dstr_printf(buffer, "%p,%p,%p,%p,%s,0,"
  785. "%"PRIu64",%"PRIu64"\n", entry,
  786. parent, entry->name, parent_name, entry->name,
  787. entry->times.array[i].time_delta,
  788. entry->times.array[i].count);
  789. func(data, buffer);
  790. }
  791. for (size_t i = 0; i < entry->times_between_calls.num; i++) {
  792. dstr_printf(buffer,"%p,%p,%p,%p,%s,"
  793. "%"PRIu64",%"PRIu64",%"PRIu64"\n", entry,
  794. parent, entry->name, parent_name, entry->name,
  795. entry->expected_time_between_calls,
  796. entry->times_between_calls.array[i].time_delta,
  797. entry->times_between_calls.array[i].count);
  798. func(data, buffer);
  799. }
  800. for (size_t i = 0; i < entry->children.num; i++)
  801. entry_dump_csv(buffer, entry, &entry->children.array[i],
  802. func, data);
  803. }
  804. static void profiler_snapshot_dump(const profiler_snapshot_t *snap,
  805. dump_csv_func func, void *data)
  806. {
  807. struct dstr buffer = {0};
  808. dstr_init_copy(&buffer, "id,parent_id,name_id,parent_name_id,name,"
  809. "time_between_calls,time_delta_µs,count\n");
  810. func(data, &buffer);
  811. for (size_t i = 0; i < snap->roots.num; i++)
  812. entry_dump_csv(&buffer, NULL,
  813. &snap->roots.array[i], func, data);
  814. dstr_free(&buffer);
  815. }
  816. static void dump_csv_fwrite(void *data, struct dstr *buffer)
  817. {
  818. fwrite(buffer->array, 1, buffer->len, data);
  819. }
  820. bool profiler_snapshot_dump_csv(const profiler_snapshot_t *snap,
  821. const char *filename)
  822. {
  823. FILE *f = os_fopen(filename, "wb+");
  824. if (!f)
  825. return false;
  826. profiler_snapshot_dump(snap, dump_csv_fwrite, f);
  827. fclose(f);
  828. return true;
  829. }
  830. static void dump_csv_gzwrite(void *data, struct dstr *buffer)
  831. {
  832. gzwrite(data, buffer->array, (unsigned)buffer->len);
  833. }
  834. bool profiler_snapshot_dump_csv_gz(const profiler_snapshot_t *snap,
  835. const char *filename)
  836. {
  837. gzFile gz;
  838. #ifdef _WIN32
  839. wchar_t *filename_w = NULL;
  840. os_utf8_to_wcs_ptr(filename, 0, &filename_w);
  841. if (!filename_w)
  842. return false;
  843. gz = gzopen_w(filename_w, "wb");
  844. bfree(filename_w);
  845. #else
  846. gz = gzopen(filename, "wb");
  847. #endif
  848. if (!gz)
  849. return false;
  850. profiler_snapshot_dump(snap, dump_csv_gzwrite, gz);
  851. gzclose_w(gz);
  852. return true;
  853. }
  854. size_t profiler_snapshot_num_roots(profiler_snapshot_t *snap)
  855. {
  856. return snap ? snap->roots.num : 0;
  857. }
  858. void profiler_snapshot_enumerate_roots(profiler_snapshot_t *snap,
  859. profiler_entry_enum_func func, void *context)
  860. {
  861. if (!snap)
  862. return;
  863. for (size_t i = 0; i < snap->roots.num; i++)
  864. if (!func(context, &snap->roots.array[i]))
  865. break;
  866. }
  867. void profiler_snapshot_filter_roots(profiler_snapshot_t *snap,
  868. profiler_name_filter_func func, void *data)
  869. {
  870. for (size_t i = 0; i < snap->roots.num;) {
  871. bool remove = false;
  872. bool res = func(data, snap->roots.array[i].name, &remove);
  873. if (remove) {
  874. free_snapshot_entry(&snap->roots.array[i]);
  875. da_erase(snap->roots, i);
  876. }
  877. if (!res)
  878. break;
  879. if (!remove)
  880. i += 1;
  881. }
  882. }
  883. size_t profiler_snapshot_num_children(profiler_snapshot_entry_t *entry)
  884. {
  885. return entry ? entry->children.num : 0;
  886. }
  887. void profiler_snapshot_enumerate_children(profiler_snapshot_entry_t *entry,
  888. profiler_entry_enum_func func, void *context)
  889. {
  890. if (!entry)
  891. return;
  892. for (size_t i = 0; i < entry->children.num; i++)
  893. if (!func(context, &entry->children.array[i]))
  894. break;
  895. }
  896. const char *profiler_snapshot_entry_name(profiler_snapshot_entry_t *entry)
  897. {
  898. return entry ? entry->name : NULL;
  899. }
  900. profiler_time_entries_t *profiler_snapshot_entry_times(
  901. profiler_snapshot_entry_t *entry)
  902. {
  903. return entry ? &entry->times : NULL;
  904. }
  905. uint64_t profiler_snapshot_entry_overall_count(
  906. profiler_snapshot_entry_t *entry)
  907. {
  908. return entry ? entry->overall_count : 0;
  909. }
  910. uint64_t profiler_snapshot_entry_min_time(profiler_snapshot_entry_t *entry)
  911. {
  912. return entry ? entry->min_time : 0;
  913. }
  914. uint64_t profiler_snapshot_entry_max_time(profiler_snapshot_entry_t *entry)
  915. {
  916. return entry ? entry->max_time : 0;
  917. }
  918. profiler_time_entries_t *profiler_snapshot_entry_times_between_calls(
  919. profiler_snapshot_entry_t *entry)
  920. {
  921. return entry ? &entry->times_between_calls : NULL;
  922. }
  923. uint64_t profiler_snapshot_entry_expected_time_between_calls(
  924. profiler_snapshot_entry_t *entry)
  925. {
  926. return entry ? entry->expected_time_between_calls : 0;
  927. }
  928. uint64_t profiler_snapshot_entry_min_time_between_calls(
  929. profiler_snapshot_entry_t *entry)
  930. {
  931. return entry ? entry->min_time_between_calls : 0;
  932. }
  933. uint64_t profiler_snapshot_entry_max_time_between_calls(
  934. profiler_snapshot_entry_t *entry)
  935. {
  936. return entry ? entry->max_time_between_calls : 0;
  937. }
  938. uint64_t profiler_snapshot_entry_overall_between_calls_count(
  939. profiler_snapshot_entry_t *entry)
  940. {
  941. return entry ? entry->overall_between_calls_count : 0;
  942. }