tuklib_integer.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file tuklib_integer.h
  4. /// \brief Various integer and bit operations
  5. ///
  6. /// This file provides macros or functions to do some basic integer and bit
  7. /// operations.
  8. ///
  9. /// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l):
  10. /// - Byte swapping: bswapXX(num)
  11. /// - Byte order conversions to/from native: convXXYe(num)
  12. /// - Aligned reads: readXXYe(ptr)
  13. /// - Aligned writes: writeXXYe(ptr, num)
  14. /// - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr)
  15. /// - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num)
  16. ///
  17. /// Since they can macros, the arguments should have no side effects since
  18. /// they may be evaluated more than once.
  19. ///
  20. /// \todo PowerPC and possibly some other architectures support
  21. /// byte swapping load and store instructions. This file
  22. /// doesn't take advantage of those instructions.
  23. ///
  24. /// Bit scan operations for non-zero 32-bit integers:
  25. /// - Bit scan reverse (find highest non-zero bit): bsr32(num)
  26. /// - Count leading zeros: clz32(num)
  27. /// - Count trailing zeros: ctz32(num)
  28. /// - Bit scan forward (simply an alias for ctz32()): bsf32(num)
  29. ///
  30. /// The above bit scan operations return 0-31. If num is zero,
  31. /// the result is undefined.
  32. //
  33. // Authors: Lasse Collin
  34. // Joachim Henke
  35. //
  36. // This file has been put into the public domain.
  37. // You can do whatever you want with this file.
  38. //
  39. ///////////////////////////////////////////////////////////////////////////////
  40. #ifndef TUKLIB_INTEGER_H
  41. #define TUKLIB_INTEGER_H
  42. #include "sysdefs.h"
  43. #if defined(__GNUC__) && defined(__GNUC_MINOR__)
  44. # define TUKLIB_GNUC_REQ(major, minor) \
  45. ((__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)) \
  46. || __GNUC__ > (major))
  47. #else
  48. # define TUKLIB_GNUC_REQ(major, minor) 0
  49. #endif
  50. ////////////////////////////////////////
  51. // Operating system specific features //
  52. ////////////////////////////////////////
  53. #if defined(HAVE_BYTESWAP_H)
  54. // glibc, uClibc, dietlibc
  55. # include <byteswap.h>
  56. # ifdef HAVE_BSWAP_16
  57. # define bswap16(num) bswap_16(num)
  58. # endif
  59. # ifdef HAVE_BSWAP_32
  60. # define bswap32(num) bswap_32(num)
  61. # endif
  62. # ifdef HAVE_BSWAP_64
  63. # define bswap64(num) bswap_64(num)
  64. # endif
  65. #elif defined(HAVE_SYS_ENDIAN_H)
  66. // *BSDs and Darwin
  67. # include <sys/endian.h>
  68. #elif defined(HAVE_SYS_BYTEORDER_H)
  69. // Solaris
  70. # include <sys/byteorder.h>
  71. # ifdef BSWAP_16
  72. # define bswap16(num) BSWAP_16(num)
  73. # endif
  74. # ifdef BSWAP_32
  75. # define bswap32(num) BSWAP_32(num)
  76. # endif
  77. # ifdef BSWAP_64
  78. # define bswap64(num) BSWAP_64(num)
  79. # endif
  80. # ifdef BE_16
  81. # define conv16be(num) BE_16(num)
  82. # endif
  83. # ifdef BE_32
  84. # define conv32be(num) BE_32(num)
  85. # endif
  86. # ifdef BE_64
  87. # define conv64be(num) BE_64(num)
  88. # endif
  89. # ifdef LE_16
  90. # define conv16le(num) LE_16(num)
  91. # endif
  92. # ifdef LE_32
  93. # define conv32le(num) LE_32(num)
  94. # endif
  95. # ifdef LE_64
  96. # define conv64le(num) LE_64(num)
  97. # endif
  98. #endif
  99. ////////////////////////////////
  100. // Compiler-specific features //
  101. ////////////////////////////////
  102. // Newer Intel C compilers require immintrin.h for _bit_scan_reverse()
  103. // and such functions.
  104. #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500)
  105. # include <immintrin.h>
  106. #endif
  107. ///////////////////
  108. // Byte swapping //
  109. ///////////////////
  110. #ifndef bswap16
  111. # define bswap16(num) \
  112. (((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8))
  113. #endif
  114. #ifndef bswap32
  115. # define bswap32(num) \
  116. ( (((uint32_t)(num) << 24) ) \
  117. | (((uint32_t)(num) << 8) & UINT32_C(0x00FF0000)) \
  118. | (((uint32_t)(num) >> 8) & UINT32_C(0x0000FF00)) \
  119. | (((uint32_t)(num) >> 24) ) )
  120. #endif
  121. #ifndef bswap64
  122. # define bswap64(num) \
  123. ( (((uint64_t)(num) << 56) ) \
  124. | (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \
  125. | (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \
  126. | (((uint64_t)(num) << 8) & UINT64_C(0x000000FF00000000)) \
  127. | (((uint64_t)(num) >> 8) & UINT64_C(0x00000000FF000000)) \
  128. | (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \
  129. | (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \
  130. | (((uint64_t)(num) >> 56) ) )
  131. #endif
  132. // Define conversion macros using the basic byte swapping macros.
  133. #ifdef WORDS_BIGENDIAN
  134. # ifndef conv16be
  135. # define conv16be(num) ((uint16_t)(num))
  136. # endif
  137. # ifndef conv32be
  138. # define conv32be(num) ((uint32_t)(num))
  139. # endif
  140. # ifndef conv64be
  141. # define conv64be(num) ((uint64_t)(num))
  142. # endif
  143. # ifndef conv16le
  144. # define conv16le(num) bswap16(num)
  145. # endif
  146. # ifndef conv32le
  147. # define conv32le(num) bswap32(num)
  148. # endif
  149. # ifndef conv64le
  150. # define conv64le(num) bswap64(num)
  151. # endif
  152. #else
  153. # ifndef conv16be
  154. # define conv16be(num) bswap16(num)
  155. # endif
  156. # ifndef conv32be
  157. # define conv32be(num) bswap32(num)
  158. # endif
  159. # ifndef conv64be
  160. # define conv64be(num) bswap64(num)
  161. # endif
  162. # ifndef conv16le
  163. # define conv16le(num) ((uint16_t)(num))
  164. # endif
  165. # ifndef conv32le
  166. # define conv32le(num) ((uint32_t)(num))
  167. # endif
  168. # ifndef conv64le
  169. # define conv64le(num) ((uint64_t)(num))
  170. # endif
  171. #endif
  172. //////////////////////////////
  173. // Aligned reads and writes //
  174. //////////////////////////////
  175. static inline uint16_t
  176. read16be(const uint8_t *buf)
  177. {
  178. uint16_t num = *(const uint16_t *)buf;
  179. return conv16be(num);
  180. }
  181. static inline uint16_t
  182. read16le(const uint8_t *buf)
  183. {
  184. uint16_t num = *(const uint16_t *)buf;
  185. return conv16le(num);
  186. }
  187. static inline uint32_t
  188. read32be(const uint8_t *buf)
  189. {
  190. uint32_t num = *(const uint32_t *)buf;
  191. return conv32be(num);
  192. }
  193. static inline uint32_t
  194. read32le(const uint8_t *buf)
  195. {
  196. uint32_t num = *(const uint32_t *)buf;
  197. return conv32le(num);
  198. }
  199. static inline uint64_t
  200. read64be(const uint8_t *buf)
  201. {
  202. uint64_t num = *(const uint64_t *)buf;
  203. return conv64be(num);
  204. }
  205. static inline uint64_t
  206. read64le(const uint8_t *buf)
  207. {
  208. uint64_t num = *(const uint64_t *)buf;
  209. return conv64le(num);
  210. }
  211. // NOTE: Possible byte swapping must be done in a macro to allow GCC
  212. // to optimize byte swapping of constants when using glibc's or *BSD's
  213. // byte swapping macros. The actual write is done in an inline function
  214. // to make type checking of the buf pointer possible similarly to readXXYe()
  215. // functions.
  216. #define write16be(buf, num) write16ne((buf), conv16be(num))
  217. #define write16le(buf, num) write16ne((buf), conv16le(num))
  218. #define write32be(buf, num) write32ne((buf), conv32be(num))
  219. #define write32le(buf, num) write32ne((buf), conv32le(num))
  220. #define write64be(buf, num) write64ne((buf), conv64be(num))
  221. #define write64le(buf, num) write64ne((buf), conv64le(num))
  222. static inline void
  223. write16ne(uint8_t *buf, uint16_t num)
  224. {
  225. *(uint16_t *)buf = num;
  226. return;
  227. }
  228. static inline void
  229. write32ne(uint8_t *buf, uint32_t num)
  230. {
  231. *(uint32_t *)buf = num;
  232. return;
  233. }
  234. static inline void
  235. write64ne(uint8_t *buf, uint64_t num)
  236. {
  237. *(uint64_t *)buf = num;
  238. return;
  239. }
  240. ////////////////////////////////
  241. // Unaligned reads and writes //
  242. ////////////////////////////////
  243. // NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and
  244. // 32-bit unaligned integer loads and stores. It's possible that 64-bit
  245. // unaligned access doesn't work or is slower than byte-by-byte access.
  246. // Since unaligned 64-bit is probably not needed as often as 16-bit or
  247. // 32-bit, we simply don't support 64-bit unaligned access for now.
  248. #ifdef TUKLIB_FAST_UNALIGNED_ACCESS
  249. # define unaligned_read16be read16be
  250. # define unaligned_read16le read16le
  251. # define unaligned_read32be read32be
  252. # define unaligned_read32le read32le
  253. # define unaligned_write16be write16be
  254. # define unaligned_write16le write16le
  255. # define unaligned_write32be write32be
  256. # define unaligned_write32le write32le
  257. #else
  258. static inline uint16_t
  259. unaligned_read16be(const uint8_t *buf)
  260. {
  261. uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
  262. return num;
  263. }
  264. static inline uint16_t
  265. unaligned_read16le(const uint8_t *buf)
  266. {
  267. uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
  268. return num;
  269. }
  270. static inline uint32_t
  271. unaligned_read32be(const uint8_t *buf)
  272. {
  273. uint32_t num = (uint32_t)buf[0] << 24;
  274. num |= (uint32_t)buf[1] << 16;
  275. num |= (uint32_t)buf[2] << 8;
  276. num |= (uint32_t)buf[3];
  277. return num;
  278. }
  279. static inline uint32_t
  280. unaligned_read32le(const uint8_t *buf)
  281. {
  282. uint32_t num = (uint32_t)buf[0];
  283. num |= (uint32_t)buf[1] << 8;
  284. num |= (uint32_t)buf[2] << 16;
  285. num |= (uint32_t)buf[3] << 24;
  286. return num;
  287. }
  288. static inline void
  289. unaligned_write16be(uint8_t *buf, uint16_t num)
  290. {
  291. buf[0] = (uint8_t)(num >> 8);
  292. buf[1] = (uint8_t)num;
  293. return;
  294. }
  295. static inline void
  296. unaligned_write16le(uint8_t *buf, uint16_t num)
  297. {
  298. buf[0] = (uint8_t)num;
  299. buf[1] = (uint8_t)(num >> 8);
  300. return;
  301. }
  302. static inline void
  303. unaligned_write32be(uint8_t *buf, uint32_t num)
  304. {
  305. buf[0] = (uint8_t)(num >> 24);
  306. buf[1] = (uint8_t)(num >> 16);
  307. buf[2] = (uint8_t)(num >> 8);
  308. buf[3] = (uint8_t)num;
  309. return;
  310. }
  311. static inline void
  312. unaligned_write32le(uint8_t *buf, uint32_t num)
  313. {
  314. buf[0] = (uint8_t)num;
  315. buf[1] = (uint8_t)(num >> 8);
  316. buf[2] = (uint8_t)(num >> 16);
  317. buf[3] = (uint8_t)(num >> 24);
  318. return;
  319. }
  320. #endif
  321. static inline uint32_t
  322. bsr32(uint32_t n)
  323. {
  324. // Check for ICC first, since it tends to define __GNUC__ too.
  325. #if defined(__INTEL_COMPILER)
  326. return _bit_scan_reverse(n);
  327. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
  328. // GCC >= 3.4 has __builtin_clz(), which gives good results on
  329. // multiple architectures. On x86, __builtin_clz() ^ 31U becomes
  330. // either plain BSR (so the XOR gets optimized away) or LZCNT and
  331. // XOR (if -march indicates that SSE4a instructions are supported).
  332. return __builtin_clz(n) ^ 31U;
  333. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  334. uint32_t i;
  335. __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
  336. return i;
  337. #else
  338. uint32_t i = 31;
  339. if ((n & UINT32_C(0xFFFF0000)) == 0) {
  340. n <<= 16;
  341. i = 15;
  342. }
  343. if ((n & UINT32_C(0xFF000000)) == 0) {
  344. n <<= 8;
  345. i -= 8;
  346. }
  347. if ((n & UINT32_C(0xF0000000)) == 0) {
  348. n <<= 4;
  349. i -= 4;
  350. }
  351. if ((n & UINT32_C(0xC0000000)) == 0) {
  352. n <<= 2;
  353. i -= 2;
  354. }
  355. if ((n & UINT32_C(0x80000000)) == 0)
  356. --i;
  357. return i;
  358. #endif
  359. }
  360. static inline uint32_t
  361. clz32(uint32_t n)
  362. {
  363. #if defined(__INTEL_COMPILER)
  364. return _bit_scan_reverse(n) ^ 31U;
  365. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
  366. return __builtin_clz(n);
  367. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  368. uint32_t i;
  369. __asm__("bsrl %1, %0\n\t"
  370. "xorl $31, %0"
  371. : "=r" (i) : "rm" (n));
  372. return i;
  373. #else
  374. uint32_t i = 0;
  375. if ((n & UINT32_C(0xFFFF0000)) == 0) {
  376. n <<= 16;
  377. i = 16;
  378. }
  379. if ((n & UINT32_C(0xFF000000)) == 0) {
  380. n <<= 8;
  381. i += 8;
  382. }
  383. if ((n & UINT32_C(0xF0000000)) == 0) {
  384. n <<= 4;
  385. i += 4;
  386. }
  387. if ((n & UINT32_C(0xC0000000)) == 0) {
  388. n <<= 2;
  389. i += 2;
  390. }
  391. if ((n & UINT32_C(0x80000000)) == 0)
  392. ++i;
  393. return i;
  394. #endif
  395. }
  396. static inline uint32_t
  397. ctz32(uint32_t n)
  398. {
  399. #if defined(__INTEL_COMPILER)
  400. return _bit_scan_forward(n);
  401. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX
  402. return __builtin_ctz(n);
  403. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  404. uint32_t i;
  405. __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
  406. return i;
  407. #else
  408. uint32_t i = 0;
  409. if ((n & UINT32_C(0x0000FFFF)) == 0) {
  410. n >>= 16;
  411. i = 16;
  412. }
  413. if ((n & UINT32_C(0x000000FF)) == 0) {
  414. n >>= 8;
  415. i += 8;
  416. }
  417. if ((n & UINT32_C(0x0000000F)) == 0) {
  418. n >>= 4;
  419. i += 4;
  420. }
  421. if ((n & UINT32_C(0x00000003)) == 0) {
  422. n >>= 2;
  423. i += 2;
  424. }
  425. if ((n & UINT32_C(0x00000001)) == 0)
  426. ++i;
  427. return i;
  428. #endif
  429. }
  430. #define bsf32 ctz32
  431. #endif