crc32_arm64.h 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. // SPDX-License-Identifier: 0BSD
  2. ///////////////////////////////////////////////////////////////////////////////
  3. //
  4. /// \file crc32_arm64.h
  5. /// \brief CRC32 calculation with ARM64 optimization
  6. //
  7. // Authors: Chenxi Mao
  8. // Jia Tan
  9. // Hans Jansen
  10. //
  11. ///////////////////////////////////////////////////////////////////////////////
  12. #ifndef LZMA_CRC32_ARM64_H
  13. #define LZMA_CRC32_ARM64_H
  14. // MSVC always has the CRC intrinsics available when building for ARM64
  15. // there is no need to include any header files.
  16. #ifndef _MSC_VER
  17. # include <arm_acle.h>
  18. #endif
  19. // If both versions are going to be built, we need runtime detection
  20. // to check if the instructions are supported.
  21. #if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)
  22. # if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)
  23. # include <sys/auxv.h>
  24. # elif defined(_WIN32)
  25. # include <processthreadsapi.h>
  26. # elif defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME)
  27. # include <sys/sysctl.h>
  28. # endif
  29. #endif
  30. // Some EDG-based compilers support ARM64 and define __GNUC__
  31. // (such as Nvidia's nvcc), but do not support function attributes.
  32. //
  33. // NOTE: Build systems check for this too, keep them in sync with this.
  34. #if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__)
  35. # define crc_attr_target __attribute__((__target__("+crc")))
  36. #else
  37. # define crc_attr_target
  38. #endif
  39. crc_attr_target
  40. static uint32_t
  41. crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc)
  42. {
  43. crc = ~crc;
  44. // Align the input buffer because this was shown to be
  45. // significantly faster than unaligned accesses.
  46. const size_t align_amount = my_min(size, (0U - (uintptr_t)buf) & 7);
  47. for (const uint8_t *limit = buf + align_amount; buf < limit; ++buf)
  48. crc = __crc32b(crc, *buf);
  49. size -= align_amount;
  50. // Process 8 bytes at a time. The end point is determined by
  51. // ignoring the least significant three bits of size to ensure
  52. // we do not process past the bounds of the buffer. This guarantees
  53. // that limit is a multiple of 8 and is strictly less than size.
  54. for (const uint8_t *limit = buf + (size & ~(size_t)7);
  55. buf < limit; buf += 8)
  56. crc = __crc32d(crc, aligned_read64le(buf));
  57. // Process the remaining bytes that are not 8 byte aligned.
  58. for (const uint8_t *limit = buf + (size & 7); buf < limit; ++buf)
  59. crc = __crc32b(crc, *buf);
  60. return ~crc;
  61. }
  62. #if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)
  63. static inline bool
  64. is_arch_extension_supported(void)
  65. {
  66. #if defined(HAVE_GETAUXVAL)
  67. return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0;
  68. #elif defined(HAVE_ELF_AUX_INFO)
  69. unsigned long feature_flags;
  70. if (elf_aux_info(AT_HWCAP, &feature_flags, sizeof(feature_flags)) != 0)
  71. return false;
  72. return (feature_flags & HWCAP_CRC32) != 0;
  73. #elif defined(_WIN32)
  74. return IsProcessorFeaturePresent(
  75. PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
  76. #elif defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME)
  77. int has_crc32 = 0;
  78. size_t size = sizeof(has_crc32);
  79. // The sysctlbyname() function requires a string identifier for the
  80. // CPU feature it tests. The Apple documentation lists the string
  81. // "hw.optional.armv8_crc32", which can be found here:
  82. // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3915619
  83. if (sysctlbyname("hw.optional.armv8_crc32", &has_crc32,
  84. &size, NULL, 0) != 0)
  85. return false;
  86. return has_crc32;
  87. #else
  88. // If a runtime detection method cannot be found, then this must
  89. // be a compile time error. The checks in crc_common.h should ensure
  90. // a runtime detection method is always found if this function is
  91. // built. It would be possible to just return false here, but this
  92. // is inefficient for binary size and runtime since only the generic
  93. // method could ever be used.
  94. # error Runtime detection method unavailable.
  95. #endif
  96. }
  97. #endif
  98. #endif // LZMA_CRC32_ARM64_H