1
0

tuklib_integer.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file tuklib_integer.h
  4. /// \brief Various integer and bit operations
  5. ///
  6. /// This file provides macros or functions to do some basic integer and bit
  7. /// operations.
  8. ///
  9. /// Native endian inline functions (XX = 16, 32, or 64):
  10. /// - Unaligned native endian reads: readXXne(ptr)
  11. /// - Unaligned native endian writes: writeXXne(ptr, num)
  12. /// - Aligned native endian reads: aligned_readXXne(ptr)
  13. /// - Aligned native endian writes: aligned_writeXXne(ptr, num)
  14. ///
  15. /// Endianness-converting integer operations (these can be macros!)
  16. /// (XX = 16, 32, or 64; Y = b or l):
  17. /// - Byte swapping: bswapXX(num)
  18. /// - Byte order conversions to/from native (byteswaps if Y isn't
  19. /// the native endianness): convXXYe(num)
  20. /// - Unaligned reads (16/32-bit only): readXXYe(ptr)
  21. /// - Unaligned writes (16/32-bit only): writeXXYe(ptr, num)
  22. /// - Aligned reads: aligned_readXXYe(ptr)
  23. /// - Aligned writes: aligned_writeXXYe(ptr, num)
  24. ///
  25. /// Since the above can macros, the arguments should have no side effects
  26. /// because they may be evaluated more than once.
  27. ///
  28. /// Bit scan operations for non-zero 32-bit integers (inline functions):
  29. /// - Bit scan reverse (find highest non-zero bit): bsr32(num)
  30. /// - Count leading zeros: clz32(num)
  31. /// - Count trailing zeros: ctz32(num)
  32. /// - Bit scan forward (simply an alias for ctz32()): bsf32(num)
  33. ///
  34. /// The above bit scan operations return 0-31. If num is zero,
  35. /// the result is undefined.
  36. //
  37. // Authors: Lasse Collin
  38. // Joachim Henke
  39. //
  40. // This file has been put into the public domain.
  41. // You can do whatever you want with this file.
  42. //
  43. ///////////////////////////////////////////////////////////////////////////////
  44. #ifndef TUKLIB_INTEGER_H
  45. #define TUKLIB_INTEGER_H
  46. #include "tuklib_common.h"
  47. #include <string.h>
  48. // Newer Intel C compilers require immintrin.h for _bit_scan_reverse()
  49. // and such functions.
  50. #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500)
  51. # include <immintrin.h>
  52. #endif
  53. ///////////////////
  54. // Byte swapping //
  55. ///////////////////
  56. #if defined(HAVE___BUILTIN_BSWAPXX)
  57. // GCC >= 4.8 and Clang
  58. # define bswap16(n) __builtin_bswap16(n)
  59. # define bswap32(n) __builtin_bswap32(n)
  60. # define bswap64(n) __builtin_bswap64(n)
  61. #elif defined(HAVE_BYTESWAP_H)
  62. // glibc, uClibc, dietlibc
  63. # include <byteswap.h>
  64. # ifdef HAVE_BSWAP_16
  65. # define bswap16(num) bswap_16(num)
  66. # endif
  67. # ifdef HAVE_BSWAP_32
  68. # define bswap32(num) bswap_32(num)
  69. # endif
  70. # ifdef HAVE_BSWAP_64
  71. # define bswap64(num) bswap_64(num)
  72. # endif
  73. #elif defined(HAVE_SYS_ENDIAN_H)
  74. // *BSDs and Darwin
  75. # include <sys/endian.h>
  76. #elif defined(HAVE_SYS_BYTEORDER_H)
  77. // Solaris
  78. # include <sys/byteorder.h>
  79. # ifdef BSWAP_16
  80. # define bswap16(num) BSWAP_16(num)
  81. # endif
  82. # ifdef BSWAP_32
  83. # define bswap32(num) BSWAP_32(num)
  84. # endif
  85. # ifdef BSWAP_64
  86. # define bswap64(num) BSWAP_64(num)
  87. # endif
  88. # ifdef BE_16
  89. # define conv16be(num) BE_16(num)
  90. # endif
  91. # ifdef BE_32
  92. # define conv32be(num) BE_32(num)
  93. # endif
  94. # ifdef BE_64
  95. # define conv64be(num) BE_64(num)
  96. # endif
  97. # ifdef LE_16
  98. # define conv16le(num) LE_16(num)
  99. # endif
  100. # ifdef LE_32
  101. # define conv32le(num) LE_32(num)
  102. # endif
  103. # ifdef LE_64
  104. # define conv64le(num) LE_64(num)
  105. # endif
  106. #endif
  107. #ifndef bswap16
  108. # define bswap16(n) (uint16_t)( \
  109. (((n) & 0x00FFU) << 8) \
  110. | (((n) & 0xFF00U) >> 8) \
  111. )
  112. #endif
  113. #ifndef bswap32
  114. # define bswap32(n) (uint32_t)( \
  115. (((n) & UINT32_C(0x000000FF)) << 24) \
  116. | (((n) & UINT32_C(0x0000FF00)) << 8) \
  117. | (((n) & UINT32_C(0x00FF0000)) >> 8) \
  118. | (((n) & UINT32_C(0xFF000000)) >> 24) \
  119. )
  120. #endif
  121. #ifndef bswap64
  122. # define bswap64(n) (uint64_t)( \
  123. (((n) & UINT64_C(0x00000000000000FF)) << 56) \
  124. | (((n) & UINT64_C(0x000000000000FF00)) << 40) \
  125. | (((n) & UINT64_C(0x0000000000FF0000)) << 24) \
  126. | (((n) & UINT64_C(0x00000000FF000000)) << 8) \
  127. | (((n) & UINT64_C(0x000000FF00000000)) >> 8) \
  128. | (((n) & UINT64_C(0x0000FF0000000000)) >> 24) \
  129. | (((n) & UINT64_C(0x00FF000000000000)) >> 40) \
  130. | (((n) & UINT64_C(0xFF00000000000000)) >> 56) \
  131. )
  132. #endif
  133. // Define conversion macros using the basic byte swapping macros.
  134. #ifdef WORDS_BIGENDIAN
  135. # ifndef conv16be
  136. # define conv16be(num) ((uint16_t)(num))
  137. # endif
  138. # ifndef conv32be
  139. # define conv32be(num) ((uint32_t)(num))
  140. # endif
  141. # ifndef conv64be
  142. # define conv64be(num) ((uint64_t)(num))
  143. # endif
  144. # ifndef conv16le
  145. # define conv16le(num) bswap16(num)
  146. # endif
  147. # ifndef conv32le
  148. # define conv32le(num) bswap32(num)
  149. # endif
  150. # ifndef conv64le
  151. # define conv64le(num) bswap64(num)
  152. # endif
  153. #else
  154. # ifndef conv16be
  155. # define conv16be(num) bswap16(num)
  156. # endif
  157. # ifndef conv32be
  158. # define conv32be(num) bswap32(num)
  159. # endif
  160. # ifndef conv64be
  161. # define conv64be(num) bswap64(num)
  162. # endif
  163. # ifndef conv16le
  164. # define conv16le(num) ((uint16_t)(num))
  165. # endif
  166. # ifndef conv32le
  167. # define conv32le(num) ((uint32_t)(num))
  168. # endif
  169. # ifndef conv64le
  170. # define conv64le(num) ((uint64_t)(num))
  171. # endif
  172. #endif
  173. ////////////////////////////////
  174. // Unaligned reads and writes //
  175. ////////////////////////////////
  176. // The traditional way of casting e.g. *(const uint16_t *)uint8_pointer
  177. // is bad even if the uint8_pointer is properly aligned because this kind
  178. // of casts break strict aliasing rules and result in undefined behavior.
  179. // With unaligned pointers it's even worse: compilers may emit vector
  180. // instructions that require aligned pointers even if non-vector
  181. // instructions work with unaligned pointers.
  182. //
  183. // Using memcpy() is the standard compliant way to do unaligned access.
  184. // Many modern compilers inline it so there is no function call overhead.
  185. // For those compilers that don't handle the memcpy() method well, the
  186. // old casting method (that violates strict aliasing) can be requested at
  187. // build time. A third method, casting to a packed struct, would also be
  188. // an option but isn't provided to keep things simpler (it's already a mess).
  189. // Hopefully this is flexible enough in practice.
  190. static inline uint16_t
  191. read16ne(const uint8_t *buf)
  192. {
  193. #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
  194. && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
  195. return *(const uint16_t *)buf;
  196. #else
  197. uint16_t num;
  198. memcpy(&num, buf, sizeof(num));
  199. return num;
  200. #endif
  201. }
  202. static inline uint32_t
  203. read32ne(const uint8_t *buf)
  204. {
  205. #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
  206. && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
  207. return *(const uint32_t *)buf;
  208. #else
  209. uint32_t num;
  210. memcpy(&num, buf, sizeof(num));
  211. return num;
  212. #endif
  213. }
  214. static inline uint64_t
  215. read64ne(const uint8_t *buf)
  216. {
  217. #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
  218. && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
  219. return *(const uint64_t *)buf;
  220. #else
  221. uint64_t num;
  222. memcpy(&num, buf, sizeof(num));
  223. return num;
  224. #endif
  225. }
  226. static inline void
  227. write16ne(uint8_t *buf, uint16_t num)
  228. {
  229. #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
  230. && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
  231. *(uint16_t *)buf = num;
  232. #else
  233. memcpy(buf, &num, sizeof(num));
  234. #endif
  235. return;
  236. }
  237. static inline void
  238. write32ne(uint8_t *buf, uint32_t num)
  239. {
  240. #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
  241. && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
  242. *(uint32_t *)buf = num;
  243. #else
  244. memcpy(buf, &num, sizeof(num));
  245. #endif
  246. return;
  247. }
  248. static inline void
  249. write64ne(uint8_t *buf, uint64_t num)
  250. {
  251. #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
  252. && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
  253. *(uint64_t *)buf = num;
  254. #else
  255. memcpy(buf, &num, sizeof(num));
  256. #endif
  257. return;
  258. }
  259. static inline uint16_t
  260. read16be(const uint8_t *buf)
  261. {
  262. #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
  263. uint16_t num = read16ne(buf);
  264. return conv16be(num);
  265. #else
  266. uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
  267. return num;
  268. #endif
  269. }
  270. static inline uint16_t
  271. read16le(const uint8_t *buf)
  272. {
  273. #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
  274. uint16_t num = read16ne(buf);
  275. return conv16le(num);
  276. #else
  277. uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
  278. return num;
  279. #endif
  280. }
  281. static inline uint32_t
  282. read32be(const uint8_t *buf)
  283. {
  284. #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
  285. uint32_t num = read32ne(buf);
  286. return conv32be(num);
  287. #else
  288. uint32_t num = (uint32_t)buf[0] << 24;
  289. num |= (uint32_t)buf[1] << 16;
  290. num |= (uint32_t)buf[2] << 8;
  291. num |= (uint32_t)buf[3];
  292. return num;
  293. #endif
  294. }
  295. static inline uint32_t
  296. read32le(const uint8_t *buf)
  297. {
  298. #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
  299. uint32_t num = read32ne(buf);
  300. return conv32le(num);
  301. #else
  302. uint32_t num = (uint32_t)buf[0];
  303. num |= (uint32_t)buf[1] << 8;
  304. num |= (uint32_t)buf[2] << 16;
  305. num |= (uint32_t)buf[3] << 24;
  306. return num;
  307. #endif
  308. }
  309. // NOTE: Possible byte swapping must be done in a macro to allow the compiler
  310. // to optimize byte swapping of constants when using glibc's or *BSD's
  311. // byte swapping macros. The actual write is done in an inline function
  312. // to make type checking of the buf pointer possible.
  313. #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
  314. # define write16be(buf, num) write16ne(buf, conv16be(num))
  315. # define write32be(buf, num) write32ne(buf, conv32be(num))
  316. #endif
  317. #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
  318. # define write16le(buf, num) write16ne(buf, conv16le(num))
  319. # define write32le(buf, num) write32ne(buf, conv32le(num))
  320. #endif
  321. #ifndef write16be
  322. static inline void
  323. write16be(uint8_t *buf, uint16_t num)
  324. {
  325. buf[0] = (uint8_t)(num >> 8);
  326. buf[1] = (uint8_t)num;
  327. return;
  328. }
  329. #endif
  330. #ifndef write16le
  331. static inline void
  332. write16le(uint8_t *buf, uint16_t num)
  333. {
  334. buf[0] = (uint8_t)num;
  335. buf[1] = (uint8_t)(num >> 8);
  336. return;
  337. }
  338. #endif
  339. #ifndef write32be
  340. static inline void
  341. write32be(uint8_t *buf, uint32_t num)
  342. {
  343. buf[0] = (uint8_t)(num >> 24);
  344. buf[1] = (uint8_t)(num >> 16);
  345. buf[2] = (uint8_t)(num >> 8);
  346. buf[3] = (uint8_t)num;
  347. return;
  348. }
  349. #endif
  350. #ifndef write32le
  351. static inline void
  352. write32le(uint8_t *buf, uint32_t num)
  353. {
  354. buf[0] = (uint8_t)num;
  355. buf[1] = (uint8_t)(num >> 8);
  356. buf[2] = (uint8_t)(num >> 16);
  357. buf[3] = (uint8_t)(num >> 24);
  358. return;
  359. }
  360. #endif
  361. //////////////////////////////
  362. // Aligned reads and writes //
  363. //////////////////////////////
  364. // Separate functions for aligned reads and writes are provided since on
  365. // strict-align archs aligned access is much faster than unaligned access.
  366. //
  367. // Just like in the unaligned case, memcpy() is needed to avoid
  368. // strict aliasing violations. However, on archs that don't support
  369. // unaligned access the compiler cannot know that the pointers given
  370. // to memcpy() are aligned which results in slow code. As of C11 there is
  371. // no standard way to tell the compiler that we know that the address is
  372. // aligned but some compilers have language extensions to do that. With
  373. // such language extensions the memcpy() method gives excellent results.
  374. //
  375. // What to do on a strict-align system when no known language extentensions
  376. // are available? Falling back to byte-by-byte access would be safe but ruin
  377. // optimizations that have been made specifically with aligned access in mind.
  378. // As a compromise, aligned reads will fall back to non-compliant type punning
  379. // but aligned writes will be byte-by-byte, that is, fast reads are preferred
  380. // over fast writes. This obviously isn't great but hopefully it's a working
  381. // compromise for now.
  382. //
  383. // __builtin_assume_aligned is support by GCC >= 4.7 and clang >= 3.6.
  384. #ifdef HAVE___BUILTIN_ASSUME_ALIGNED
  385. # define tuklib_memcpy_aligned(dest, src, size) \
  386. memcpy(dest, __builtin_assume_aligned(src, size), size)
  387. #else
  388. # define tuklib_memcpy_aligned(dest, src, size) \
  389. memcpy(dest, src, size)
  390. # ifndef TUKLIB_FAST_UNALIGNED_ACCESS
  391. # define TUKLIB_USE_UNSAFE_ALIGNED_READS 1
  392. # endif
  393. #endif
  394. static inline uint16_t
  395. aligned_read16ne(const uint8_t *buf)
  396. {
  397. #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
  398. || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
  399. return *(const uint16_t *)buf;
  400. #else
  401. uint16_t num;
  402. tuklib_memcpy_aligned(&num, buf, sizeof(num));
  403. return num;
  404. #endif
  405. }
  406. static inline uint32_t
  407. aligned_read32ne(const uint8_t *buf)
  408. {
  409. #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
  410. || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
  411. return *(const uint32_t *)buf;
  412. #else
  413. uint32_t num;
  414. tuklib_memcpy_aligned(&num, buf, sizeof(num));
  415. return num;
  416. #endif
  417. }
  418. static inline uint64_t
  419. aligned_read64ne(const uint8_t *buf)
  420. {
  421. #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
  422. || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
  423. return *(const uint64_t *)buf;
  424. #else
  425. uint64_t num;
  426. tuklib_memcpy_aligned(&num, buf, sizeof(num));
  427. return num;
  428. #endif
  429. }
  430. static inline void
  431. aligned_write16ne(uint8_t *buf, uint16_t num)
  432. {
  433. #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
  434. *(uint16_t *)buf = num;
  435. #else
  436. tuklib_memcpy_aligned(buf, &num, sizeof(num));
  437. #endif
  438. return;
  439. }
  440. static inline void
  441. aligned_write32ne(uint8_t *buf, uint32_t num)
  442. {
  443. #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
  444. *(uint32_t *)buf = num;
  445. #else
  446. tuklib_memcpy_aligned(buf, &num, sizeof(num));
  447. #endif
  448. return;
  449. }
  450. static inline void
  451. aligned_write64ne(uint8_t *buf, uint64_t num)
  452. {
  453. #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
  454. *(uint64_t *)buf = num;
  455. #else
  456. tuklib_memcpy_aligned(buf, &num, sizeof(num));
  457. #endif
  458. return;
  459. }
  460. static inline uint16_t
  461. aligned_read16be(const uint8_t *buf)
  462. {
  463. uint16_t num = aligned_read16ne(buf);
  464. return conv16be(num);
  465. }
  466. static inline uint16_t
  467. aligned_read16le(const uint8_t *buf)
  468. {
  469. uint16_t num = aligned_read16ne(buf);
  470. return conv16le(num);
  471. }
  472. static inline uint32_t
  473. aligned_read32be(const uint8_t *buf)
  474. {
  475. uint32_t num = aligned_read32ne(buf);
  476. return conv32be(num);
  477. }
  478. static inline uint32_t
  479. aligned_read32le(const uint8_t *buf)
  480. {
  481. uint32_t num = aligned_read32ne(buf);
  482. return conv32le(num);
  483. }
  484. static inline uint64_t
  485. aligned_read64be(const uint8_t *buf)
  486. {
  487. uint64_t num = aligned_read64ne(buf);
  488. return conv64be(num);
  489. }
  490. static inline uint64_t
  491. aligned_read64le(const uint8_t *buf)
  492. {
  493. uint64_t num = aligned_read64ne(buf);
  494. return conv64le(num);
  495. }
  496. // These need to be macros like in the unaligned case.
  497. #define aligned_write16be(buf, num) aligned_write16ne((buf), conv16be(num))
  498. #define aligned_write16le(buf, num) aligned_write16ne((buf), conv16le(num))
  499. #define aligned_write32be(buf, num) aligned_write32ne((buf), conv32be(num))
  500. #define aligned_write32le(buf, num) aligned_write32ne((buf), conv32le(num))
  501. #define aligned_write64be(buf, num) aligned_write64ne((buf), conv64be(num))
  502. #define aligned_write64le(buf, num) aligned_write64ne((buf), conv64le(num))
  503. ////////////////////
  504. // Bit operations //
  505. ////////////////////
  506. static inline uint32_t
  507. bsr32(uint32_t n)
  508. {
  509. // Check for ICC first, since it tends to define __GNUC__ too.
  510. #if defined(__INTEL_COMPILER)
  511. return _bit_scan_reverse(n);
  512. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
  513. // GCC >= 3.4 has __builtin_clz(), which gives good results on
  514. // multiple architectures. On x86, __builtin_clz() ^ 31U becomes
  515. // either plain BSR (so the XOR gets optimized away) or LZCNT and
  516. // XOR (if -march indicates that SSE4a instructions are supported).
  517. return (uint32_t)__builtin_clz(n) ^ 31U;
  518. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  519. uint32_t i;
  520. __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
  521. return i;
  522. #else
  523. uint32_t i = 31;
  524. if ((n & 0xFFFF0000) == 0) {
  525. n <<= 16;
  526. i = 15;
  527. }
  528. if ((n & 0xFF000000) == 0) {
  529. n <<= 8;
  530. i -= 8;
  531. }
  532. if ((n & 0xF0000000) == 0) {
  533. n <<= 4;
  534. i -= 4;
  535. }
  536. if ((n & 0xC0000000) == 0) {
  537. n <<= 2;
  538. i -= 2;
  539. }
  540. if ((n & 0x80000000) == 0)
  541. --i;
  542. return i;
  543. #endif
  544. }
  545. static inline uint32_t
  546. clz32(uint32_t n)
  547. {
  548. #if defined(__INTEL_COMPILER)
  549. return _bit_scan_reverse(n) ^ 31U;
  550. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
  551. return (uint32_t)__builtin_clz(n);
  552. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  553. uint32_t i;
  554. __asm__("bsrl %1, %0\n\t"
  555. "xorl $31, %0"
  556. : "=r" (i) : "rm" (n));
  557. return i;
  558. #else
  559. uint32_t i = 0;
  560. if ((n & 0xFFFF0000) == 0) {
  561. n <<= 16;
  562. i = 16;
  563. }
  564. if ((n & 0xFF000000) == 0) {
  565. n <<= 8;
  566. i += 8;
  567. }
  568. if ((n & 0xF0000000) == 0) {
  569. n <<= 4;
  570. i += 4;
  571. }
  572. if ((n & 0xC0000000) == 0) {
  573. n <<= 2;
  574. i += 2;
  575. }
  576. if ((n & 0x80000000) == 0)
  577. ++i;
  578. return i;
  579. #endif
  580. }
  581. static inline uint32_t
  582. ctz32(uint32_t n)
  583. {
  584. #if defined(__INTEL_COMPILER)
  585. return _bit_scan_forward(n);
  586. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX
  587. return (uint32_t)__builtin_ctz(n);
  588. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  589. uint32_t i;
  590. __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
  591. return i;
  592. #else
  593. uint32_t i = 0;
  594. if ((n & 0x0000FFFF) == 0) {
  595. n >>= 16;
  596. i = 16;
  597. }
  598. if ((n & 0x000000FF) == 0) {
  599. n >>= 8;
  600. i += 8;
  601. }
  602. if ((n & 0x0000000F) == 0) {
  603. n >>= 4;
  604. i += 4;
  605. }
  606. if ((n & 0x00000003) == 0) {
  607. n >>= 2;
  608. i += 2;
  609. }
  610. if ((n & 0x00000001) == 0)
  611. ++i;
  612. return i;
  613. #endif
  614. }
  615. #define bsf32 ctz32
  616. #endif