bio_base64_test.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. /*
  2. * Copyright 2024-2025 The OpenSSL Project Authors. All Rights Reserved.
  3. *
  4. * Licensed under the Apache License 2.0 (the "License"). You may not use
  5. * this file except in compliance with the License. You can obtain a copy
  6. * in the file LICENSE in the source distribution or at
  7. * https://www.openssl.org/source/license.html
  8. */
  9. #include <stdio.h>
  10. #include <string.h>
  11. #include <openssl/bio.h>
  12. #include <openssl/evp.h>
  13. #include <openssl/rand.h>
  14. #include "testutil.h"
  15. /* 2047 bytes of "#ooooooooo..." + NUL terminator */
  16. static char gunk[2048];
  17. typedef struct {
  18. char *prefix;
  19. char *encoded;
  20. unsigned bytes;
  21. int trunc;
  22. char *suffix;
  23. int retry;
  24. int no_nl;
  25. } test_case;
  26. #define BUFMAX 0xa0000 /* Encode at most 640kB. */
  27. #define sEOF "-EOF" /* '-' as in PEM and MIME boundaries */
  28. #define junk "#foo" /* Skipped initial content */
  29. #define EOF_RETURN (-1729) /* Distinct from -1, etc., internal results */
  30. #define NLEN 6
  31. #define NVAR 5
  32. /*
  33. * Junk suffixed variants don't make sense with padding or truncated groups
  34. * because we will typically stop with an error before seeing the suffix, but
  35. * with retriable BIOs may never look at the suffix after detecting padding.
  36. */
  37. #define NPAD 6
  38. #define NVARPAD (NVAR * NPAD - NPAD + 1)
  39. static char *prefixes[NVAR] = { "", junk, gunk, "", "" };
  40. static char *suffixes[NVAR] = { "", "", "", sEOF, junk };
  41. static unsigned lengths[6] = { 0, 3, 48, 192, 768, 1536 };
  42. static unsigned linelengths[] = {
  43. 4, 8, 16, 28, 40, 64, 80, 128, 256, 512, 1023, 0
  44. };
  45. static unsigned wscnts[] = { 0, 1, 2, 4, 8, 16, 0xFFFF };
  46. /* Generate `len` random octets */
  47. static unsigned char *genbytes(unsigned len)
  48. {
  49. unsigned char *buf = NULL;
  50. if (len > 0 && len <= BUFMAX && (buf = OPENSSL_malloc(len)) != NULL)
  51. RAND_bytes(buf, len);
  52. return buf;
  53. }
  54. /* Append one base64 codepoint, adding newlines after every `llen` bytes */
  55. static int memout(BIO *mem, char c, int llen, int *pos)
  56. {
  57. if (BIO_write(mem, &c, 1) != 1)
  58. return 0;
  59. if (++*pos == llen) {
  60. *pos = 0;
  61. c = '\n';
  62. if (BIO_write(mem, &c, 1) != 1)
  63. return 0;
  64. }
  65. return 1;
  66. }
  67. /* Encode and append one 6-bit slice, randomly prepending some whitespace */
  68. static int memoutws(BIO *mem, char c, unsigned wscnt, unsigned llen, int *pos)
  69. {
  70. if (wscnt > 0
  71. && (test_random() % llen) < wscnt
  72. && memout(mem, ' ', llen, pos) == 0)
  73. return 0;
  74. return memout(mem, c, llen, pos);
  75. }
  76. /*
  77. * Encode an octet string in base64, approximately `llen` bytes per line,
  78. * with up to roughly `wscnt` additional space characters inserted at random
  79. * before some of the base64 code points.
  80. */
  81. static int encode(unsigned const char *buf, unsigned buflen, char *encoded,
  82. int trunc, unsigned llen, unsigned wscnt, BIO *mem)
  83. {
  84. static const unsigned char b64[65] =
  85. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  86. int pos = 0;
  87. char nl = '\n';
  88. /* Use a verbatim encoding when provided */
  89. if (encoded != NULL) {
  90. int elen = strlen(encoded);
  91. return BIO_write(mem, encoded, elen) == elen;
  92. }
  93. /* Encode full 3-octet groups */
  94. while (buflen > 2) {
  95. unsigned long v = buf[0] << 16 | buf[1] << 8 | buf[2];
  96. if (memoutws(mem, b64[v >> 18], wscnt, llen, &pos) == 0
  97. || memoutws(mem, b64[(v >> 12) & 0x3f], wscnt, llen, &pos) == 0
  98. || memoutws(mem, b64[(v >> 6) & 0x3f], wscnt, llen, &pos) == 0
  99. || memoutws(mem, b64[v & 0x3f], wscnt, llen, &pos) == 0)
  100. return 0;
  101. buf += 3;
  102. buflen -= 3;
  103. }
  104. /* Encode and pad final 1 or 2 octet group */
  105. if (buflen == 2) {
  106. unsigned long v = buf[0] << 8 | buf[1];
  107. if (memoutws(mem, b64[(v >> 10) & 0x3f], wscnt, llen, &pos) == 0
  108. || memoutws(mem, b64[(v >> 4) & 0x3f], wscnt, llen, &pos) == 0
  109. || memoutws(mem, b64[(v & 0xf) << 2], wscnt, llen, &pos) == 0
  110. || memoutws(mem, '=', wscnt, llen, &pos) == 0)
  111. return 0;
  112. } else if (buflen == 1) {
  113. unsigned long v = buf[0];
  114. if (memoutws(mem, b64[v >> 2], wscnt, llen, &pos) == 0
  115. || memoutws(mem, b64[(v & 0x3) << 4], wscnt, llen, &pos) == 0
  116. || memoutws(mem, '=', wscnt, llen, &pos) == 0
  117. || memoutws(mem, '=', wscnt, llen, &pos) == 0)
  118. return 0;
  119. }
  120. while (trunc-- > 0)
  121. if (memoutws(mem, 'A', wscnt, llen, &pos) == 0)
  122. return 0;
  123. /* Terminate last line */
  124. if (pos > 0 && BIO_write(mem, &nl, 1) != 1)
  125. return 0;
  126. return 1;
  127. }
  128. static int genb64(char *prefix, char *suffix, unsigned const char *buf,
  129. unsigned buflen, int trunc, char *encoded, unsigned llen,
  130. unsigned wscnt, char **out)
  131. {
  132. int preflen = strlen(prefix);
  133. int sufflen = strlen(suffix);
  134. int outlen;
  135. char newline = '\n';
  136. BUF_MEM *bptr;
  137. BIO *mem = BIO_new(BIO_s_mem());
  138. if (mem == NULL)
  139. return -1;
  140. if ((*prefix && (BIO_write(mem, prefix, preflen) != preflen
  141. || BIO_write(mem, &newline, 1) != 1))
  142. || encode(buf, buflen, encoded, trunc, llen, wscnt, mem) <= 0
  143. || (*suffix && (BIO_write(mem, suffix, sufflen) != sufflen
  144. || BIO_write(mem, &newline, 1) != 1))) {
  145. BIO_free(mem);
  146. return -1;
  147. }
  148. /* Orphan the memory BIO's data buffer */
  149. BIO_get_mem_ptr(mem, &bptr);
  150. *out = bptr->data;
  151. outlen = bptr->length;
  152. bptr->data = NULL;
  153. (void) BIO_set_close(mem, BIO_NOCLOSE);
  154. BIO_free(mem);
  155. BUF_MEM_free(bptr);
  156. return outlen;
  157. }
  158. static int test_bio_base64_run(test_case *t, int llen, int wscnt)
  159. {
  160. unsigned char *raw;
  161. unsigned char *out;
  162. unsigned out_len;
  163. char *encoded = NULL;
  164. int elen;
  165. BIO *bio, *b64;
  166. int n, n1, n2;
  167. int ret;
  168. /*
  169. * Pre-encoded data always encodes NUL octets. If all we care about is the
  170. * length, and not the payload, use random bytes.
  171. */
  172. if (t->encoded != NULL)
  173. raw = OPENSSL_zalloc(t->bytes);
  174. else
  175. raw = genbytes(t->bytes);
  176. if (raw == NULL && t->bytes > 0) {
  177. TEST_error("out of memory");
  178. return -1;
  179. }
  180. out_len = t->bytes + 1024;
  181. out = OPENSSL_malloc(out_len);
  182. if (out == NULL) {
  183. OPENSSL_free(raw);
  184. TEST_error("out of memory");
  185. return -1;
  186. }
  187. elen = genb64(t->prefix, t->suffix, raw, t->bytes, t->trunc, t->encoded,
  188. llen, wscnt, &encoded);
  189. if (elen < 0 || (bio = BIO_new(BIO_s_mem())) == NULL) {
  190. OPENSSL_free(raw);
  191. OPENSSL_free(out);
  192. OPENSSL_free(encoded);
  193. TEST_error("out of memory");
  194. return -1;
  195. }
  196. if (t->retry)
  197. BIO_set_mem_eof_return(bio, EOF_RETURN);
  198. else
  199. BIO_set_mem_eof_return(bio, 0);
  200. /*
  201. * When the input is long enough, and the source bio is retriable, exercise
  202. * retries by writting the input to the underlying BIO in two steps (1024
  203. * bytes, then the rest) and trying to decode some data after each write.
  204. */
  205. n1 = elen;
  206. if (t->retry)
  207. n1 = elen / 2;
  208. if (n1 > 0)
  209. BIO_write(bio, encoded, n1);
  210. b64 = BIO_new(BIO_f_base64());
  211. if (t->no_nl)
  212. BIO_set_flags(b64, BIO_FLAGS_BASE64_NO_NL);
  213. BIO_push(b64, bio);
  214. n = BIO_read(b64, out, out_len);
  215. if (n1 < elen) {
  216. /* Append the rest of the input, and read again */
  217. BIO_write(bio, encoded + n1, elen - n1);
  218. if (n > 0) {
  219. n2 = BIO_read(b64, out + n, out_len - n);
  220. if (n2 > 0)
  221. n += n2;
  222. } else if (n == EOF_RETURN) {
  223. n = BIO_read(b64, out, out_len);
  224. }
  225. }
  226. /* Turn retry-related negative results to normal (0) EOF */
  227. if (n < 0 && n == EOF_RETURN)
  228. n = 0;
  229. /* Turn off retries */
  230. if (t->retry)
  231. BIO_set_mem_eof_return(bio, 0);
  232. if (n < (int) out_len)
  233. /* Perform the last read, checking its result */
  234. ret = BIO_read(b64, out + n, out_len - n);
  235. else {
  236. /* Should not happen, given extra space in out_len */
  237. TEST_error("Unexpectedly long decode output");
  238. ret = -1;
  239. }
  240. /*
  241. * Expect an error to be detected with:
  242. *
  243. * - truncated groups,
  244. * - non-base64 suffixes (other than soft EOF) for non-empty or oneline
  245. * input
  246. * - non-base64 prefixes in NO_NL mode
  247. *
  248. * Otherwise, check the decoded content
  249. */
  250. if (t->trunc > 0
  251. || ((t->bytes > 0 || t->no_nl) && *t->suffix && *t->suffix != '-')
  252. || (t->no_nl && *t->prefix)) {
  253. if ((ret = ret < 0 ? 0 : -1) != 0)
  254. TEST_error("Final read result was non-negative");
  255. } else if (ret != 0
  256. || n != (int) t->bytes
  257. || (n > 0 && memcmp(raw, out, n) != 0)) {
  258. TEST_error("Failed to decode expected data");
  259. ret = -1;
  260. }
  261. BIO_free_all(b64);
  262. OPENSSL_free(out);
  263. OPENSSL_free(raw);
  264. OPENSSL_free(encoded);
  265. return ret;
  266. }
  267. static int generic_case(test_case *t, int verbose)
  268. {
  269. unsigned *llen;
  270. unsigned *wscnt;
  271. int ok = 1;
  272. for (llen = linelengths; *llen > 0; ++llen) {
  273. for (wscnt = wscnts; *wscnt * 2 < *llen; ++wscnt) {
  274. int extra = t->no_nl ? 64 : 0;
  275. /*
  276. * Use a longer line for NO_NL tests, in particular, eventually
  277. * exceeding 1k bytes.
  278. */
  279. if (test_bio_base64_run(t, *llen + extra, *wscnt) != 0)
  280. ok = 0;
  281. if (verbose) {
  282. fprintf(stderr, "bio_base64_test: ok=%d", ok);
  283. if (*t->prefix)
  284. fprintf(stderr, ", prefix='%s'", t->prefix);
  285. if (t->encoded)
  286. fprintf(stderr, ", data='%s'", t->encoded);
  287. else
  288. fprintf(stderr, ", datalen=%u", t->bytes);
  289. if (t->trunc)
  290. fprintf(stderr, ", trunc=%d", t->trunc);
  291. if (*t->suffix)
  292. fprintf(stderr, ", suffix='%s'", t->suffix);
  293. fprintf(stderr, ", linelen=%u", *llen);
  294. fprintf(stderr, ", wscount=%u", *wscnt);
  295. if (t->retry)
  296. fprintf(stderr, ", retriable");
  297. if (t->no_nl)
  298. fprintf(stderr, ", oneline");
  299. fputc('\n', stderr);
  300. }
  301. /* For verbatim input no effect from varying llen or wscnt */
  302. if (t->encoded)
  303. return ok;
  304. }
  305. /*
  306. * Longer 'llen' has no effect once we're sure to not have multiple
  307. * lines of data
  308. */
  309. if (*llen > t->bytes + (t->bytes >> 1))
  310. break;
  311. }
  312. return ok;
  313. }
  314. static int quotrem(int i, unsigned int m, int *q)
  315. {
  316. *q = i / m;
  317. return i - *q * m;
  318. }
  319. static int test_bio_base64_generated(int idx)
  320. {
  321. test_case t;
  322. int variant;
  323. int lencase;
  324. int padcase;
  325. int q = idx;
  326. lencase = quotrem(q, NLEN, &q);
  327. variant = quotrem(q, NVARPAD, &q);
  328. padcase = quotrem(variant, NPAD, &variant);
  329. t.retry = quotrem(q, 2, &q);
  330. t.no_nl = quotrem(q, 2, &q);
  331. if (q != 0) {
  332. fprintf(stderr, "Test index out of range: %d", idx);
  333. return 0;
  334. }
  335. t.prefix = prefixes[variant];
  336. t.encoded = NULL;
  337. t.bytes = lengths[lencase];
  338. t.trunc = 0;
  339. if (padcase && padcase < 3)
  340. t.bytes += padcase;
  341. else if (padcase >= 3)
  342. t.trunc = padcase - 2;
  343. t.suffix = suffixes[variant];
  344. if (padcase != 0 && (*t.suffix && *t.suffix != '-')) {
  345. TEST_error("Unexpected suffix test after padding");
  346. return 0;
  347. }
  348. return generic_case(&t, 0);
  349. }
  350. static int test_bio_base64_corner_case_bug(int idx)
  351. {
  352. test_case t;
  353. int q = idx;
  354. t.retry = quotrem(q, 2, &q);
  355. t.no_nl = quotrem(q, 2, &q);
  356. if (q != 0) {
  357. fprintf(stderr, "Test index out of range: %d", idx);
  358. return 0;
  359. }
  360. /* 9 bytes of skipped non-base64 input + newline */
  361. t.prefix = "#foo\n#bar";
  362. /* 9 bytes on 2nd and subsequent lines */
  363. t.encoded = "A\nAAA\nAAAA\n";
  364. t.suffix = "";
  365. /* Expected decode length */
  366. t.bytes = 6;
  367. t.trunc = 0; /* ignored */
  368. return generic_case(&t, 0);
  369. }
  370. int setup_tests(void)
  371. {
  372. int numidx;
  373. memset(gunk, 'o', sizeof(gunk));
  374. gunk[0] = '#';
  375. gunk[sizeof(gunk) - 1] = '\0';
  376. /*
  377. * Test 5 variants of prefix or suffix
  378. *
  379. * - both empty
  380. * - short junk prefix
  381. * - long gunk prefix (> internal BIO 1k buffer size),
  382. * - soft EOF suffix
  383. * - junk suffix (expect to detect an error)
  384. *
  385. * For 6 input lengths of randomly generated raw input:
  386. *
  387. * 0, 3, 48, 192, 768 and 1536
  388. *
  389. * corresponding to encoded lengths (plus linebreaks and ignored
  390. * whitespace) of:
  391. *
  392. * 0, 4, 64, 256, 1024 and 2048
  393. *
  394. * Followed by zero, one or two additional bytes that may involve padding,
  395. * or else (truncation) 1, 2 or 3 bytes with missing padding.
  396. * Only the first four variants make sense with padding or truncated
  397. * groups.
  398. *
  399. * With two types of underlying BIO
  400. *
  401. * - Non-retriable underlying BIO
  402. * - Retriable underlying BIO
  403. *
  404. * And with/without the BIO_FLAGS_BASE64_NO_NL flag, where now an error is
  405. * expected with the junk and gunk prefixes, however, but the "soft EOF"
  406. * suffix is still accepted.
  407. *
  408. * Internally, each test may loop over a range of encoded line lengths and
  409. * whitespace average "densities".
  410. */
  411. numidx = NLEN * (NVAR * NPAD - NPAD + 1) * 2 * 2;
  412. ADD_ALL_TESTS(test_bio_base64_generated, numidx);
  413. /*
  414. * Corner case in original code that skips ignored input, when the ignored
  415. * length is one byte longer than the total of the second and later lines
  416. * of valid input in the first 1k bytes of input. No content variants,
  417. * just BIO retry status and oneline flags vary.
  418. */
  419. numidx = 2 * 2;
  420. ADD_ALL_TESTS(test_bio_base64_corner_case_bug, numidx);
  421. return 1;
  422. }