001-lzo-speed.patch 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. --- a/sub-projects/compression/lzo-kmod/lzo1x_compress.c
  2. +++ b/sub-projects/compression/lzo-kmod/lzo1x_compress.c
  3. @@ -62,8 +62,12 @@ _lzo1x_1_do_compress(const unsigned char
  4. goto literal;
  5. try_match:
  6. +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  7. if (get_unaligned((const unsigned short *)m_pos)
  8. == get_unaligned((const unsigned short *)ip)) {
  9. +#else
  10. + if (m_pos[0] == ip[0] && m_pos[1] == ip[1]) {
  11. +#endif
  12. if (likely(m_pos[2] == ip[2]))
  13. goto match;
  14. }
  15. @@ -94,9 +98,14 @@ match:
  16. }
  17. *op++ = tt;
  18. }
  19. - do {
  20. - *op++ = *ii++;
  21. - } while (--t > 0);
  22. + if (t >= 2 * 4) {
  23. + memcpy(op, ii, t);
  24. + op += t;
  25. + ii += t;
  26. + } else
  27. + do {
  28. + *op++ = *ii++;
  29. + } while (--t > 0);
  30. }
  31. ip += 3;
  32. @@ -208,9 +217,14 @@ int lzo1x_1_compress(const unsigned char
  33. *op++ = tt;
  34. }
  35. - do {
  36. - *op++ = *ii++;
  37. - } while (--t > 0);
  38. + if (t >= 2 * 4) {
  39. + memcpy(op, ii, t);
  40. + op += t;
  41. + ii += t;
  42. + } else
  43. + do {
  44. + *op++ = *ii++;
  45. + } while (--t > 0);
  46. }
  47. *op++ = M4_MARKER | 1;
  48. @@ -224,4 +238,3 @@ EXPORT_SYMBOL_GPL(lzo1x_1_compress);
  49. MODULE_LICENSE("GPL");
  50. MODULE_DESCRIPTION("LZO1X-1 Compressor");
  51. -
  52. --- a/sub-projects/compression/lzo-kmod/lzo1x_decompress.c
  53. +++ b/sub-projects/compression/lzo-kmod/lzo1x_decompress.c
  54. @@ -45,10 +45,7 @@ int lzo1x_decompress_safe(const unsigned
  55. goto output_overrun;
  56. if (HAVE_IP(t + 1, ip_end, ip))
  57. goto input_overrun;
  58. - do {
  59. - *op++ = *ip++;
  60. - } while (--t > 0);
  61. - goto first_literal_run;
  62. + goto prep_first_literal_run;
  63. }
  64. while ((ip < ip_end)) {
  65. @@ -71,30 +68,27 @@ int lzo1x_decompress_safe(const unsigned
  66. if (HAVE_IP(t + 4, ip_end, ip))
  67. goto input_overrun;
  68. - COPY4(op, ip);
  69. - op += 4;
  70. - ip += 4;
  71. - if (--t > 0) {
  72. - if (t >= 4) {
  73. - do {
  74. - COPY4(op, ip);
  75. - op += 4;
  76. - ip += 4;
  77. - t -= 4;
  78. - } while (t >= 4);
  79. - if (t > 0) {
  80. - do {
  81. - *op++ = *ip++;
  82. - } while (--t > 0);
  83. - }
  84. - } else {
  85. + t += (4 - 1);
  86. + if (t >= 2 * 4) {
  87. + memcpy(op, ip, t);
  88. + op += t;
  89. + ip += t;
  90. + } else {
  91. + do {
  92. + COPY4(op, ip);
  93. + op += 4;
  94. + ip += 4;
  95. + t -= 4;
  96. + } while (t >= 4);
  97. + if (t > 0) {
  98. +prep_first_literal_run:
  99. do {
  100. *op++ = *ip++;
  101. } while (--t > 0);
  102. }
  103. }
  104. -first_literal_run:
  105. +//first_literal_run:
  106. t = *ip++;
  107. if (t >= 16)
  108. goto match;
  109. @@ -139,8 +133,7 @@ match:
  110. t += 31 + *ip++;
  111. }
  112. m_pos = op - 1;
  113. - m_pos -= le16_to_cpu(get_unaligned(
  114. - (const unsigned short *)ip)) >> 2;
  115. + m_pos -= get_unaligned_le16(ip) >> 2;
  116. ip += 2;
  117. } else if (t >= 16) {
  118. m_pos = op;
  119. @@ -158,8 +151,7 @@ match:
  120. }
  121. t += 7 + *ip++;
  122. }
  123. - m_pos -= le16_to_cpu(get_unaligned(
  124. - (const unsigned short *)ip)) >> 2;
  125. + m_pos -= get_unaligned_le16(ip) >> 2;
  126. ip += 2;
  127. if (m_pos == op)
  128. goto eof_found;
  129. @@ -184,21 +176,33 @@ match:
  130. if (HAVE_OP(t + 3 - 1, op_end, op))
  131. goto output_overrun;
  132. - if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) {
  133. - COPY4(op, m_pos);
  134. - op += 4;
  135. - m_pos += 4;
  136. - t -= 4 - (3 - 1);
  137. - do {
  138. + if (t >= 2 * 4 - (3 - 1)) {
  139. + /*
  140. + * Assume memcpy don't copy
  141. + * more than 32 bytes at once
  142. + */
  143. + if ((op - m_pos) >= 32) {
  144. + t += (3 - 1);
  145. + memcpy(op, m_pos, t);
  146. + op += t;
  147. + m_pos += t;
  148. + } else if ((op - m_pos) >= 4) {
  149. COPY4(op, m_pos);
  150. op += 4;
  151. m_pos += 4;
  152. - t -= 4;
  153. - } while (t >= 4);
  154. - if (t > 0)
  155. + t -= 4 - (3 - 1);
  156. do {
  157. - *op++ = *m_pos++;
  158. - } while (--t > 0);
  159. + COPY4(op, m_pos);
  160. + op += 4;
  161. + m_pos += 4;
  162. + t -= 4;
  163. + } while (t >= 4);
  164. + if (t > 0)
  165. + do {
  166. + *op++ = *m_pos++;
  167. + } while (--t > 0);
  168. + } else
  169. + goto copy_match;
  170. } else {
  171. copy_match:
  172. *op++ = *m_pos++;