line_reader.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. /*-
  2. * Copyright (c) 2008 Tim Kientzle
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer
  10. * in this position and unchanged.
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  16. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  17. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  18. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  19. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  20. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  24. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include "lafe_platform.h"
  27. __FBSDID("$FreeBSD$");
  28. #include <errno.h>
  29. #include <stdio.h>
  30. #include <stdlib.h>
  31. #include <string.h>
  32. #include "err.h"
  33. #include "line_reader.h"
  34. #if defined(_WIN32) && !defined(__CYGWIN__)
  35. #define strdup _strdup
  36. #endif
  37. /*
  38. * Read lines from file and do something with each one. If option_null
  39. * is set, lines are terminated with zero bytes; otherwise, they're
  40. * terminated with newlines.
  41. *
  42. * This uses a self-sizing buffer to handle arbitrarily-long lines.
  43. */
  44. struct lafe_line_reader {
  45. FILE *f;
  46. char *buff, *buff_end, *line_start, *line_end, *p;
  47. char *pathname;
  48. size_t buff_length;
  49. int nullSeparator; /* Lines separated by null, not CR/CRLF/etc. */
  50. int ret;
  51. };
  52. struct lafe_line_reader *
  53. lafe_line_reader(const char *pathname, int nullSeparator)
  54. {
  55. struct lafe_line_reader *lr;
  56. lr = calloc(1, sizeof(*lr));
  57. if (lr == NULL)
  58. lafe_errc(1, ENOMEM, "Can't open %s", pathname);
  59. lr->nullSeparator = nullSeparator;
  60. lr->pathname = strdup(pathname);
  61. if (strcmp(pathname, "-") == 0)
  62. lr->f = stdin;
  63. else
  64. lr->f = fopen(pathname, "r");
  65. if (lr->f == NULL)
  66. lafe_errc(1, errno, "Couldn't open %s", pathname);
  67. lr->buff_length = 8192;
  68. lr->buff = malloc(lr->buff_length);
  69. if (lr->buff == NULL)
  70. lafe_errc(1, ENOMEM, "Can't read %s", pathname);
  71. lr->line_start = lr->line_end = lr->buff_end = lr->buff;
  72. return (lr);
  73. }
  74. const char *
  75. lafe_line_reader_next(struct lafe_line_reader *lr)
  76. {
  77. size_t bytes_wanted, bytes_read, new_buff_size;
  78. char *line_start, *p;
  79. for (;;) {
  80. /* If there's a line in the buffer, return it immediately. */
  81. while (lr->line_end < lr->buff_end) {
  82. if (lr->nullSeparator) {
  83. if (*lr->line_end == '\0') {
  84. line_start = lr->line_start;
  85. lr->line_start = lr->line_end + 1;
  86. lr->line_end = lr->line_start;
  87. return (line_start);
  88. }
  89. } else if (*lr->line_end == '\x0a' || *lr->line_end == '\x0d') {
  90. *lr->line_end = '\0';
  91. line_start = lr->line_start;
  92. lr->line_start = lr->line_end + 1;
  93. lr->line_end = lr->line_start;
  94. if (line_start[0] != '\0')
  95. return (line_start);
  96. }
  97. lr->line_end++;
  98. }
  99. /* If we're at end-of-file, process the final data. */
  100. if (lr->f == NULL) {
  101. /* If there's more text, return one last line. */
  102. if (lr->line_end > lr->line_start) {
  103. *lr->line_end = '\0';
  104. line_start = lr->line_start;
  105. lr->line_start = lr->line_end + 1;
  106. lr->line_end = lr->line_start;
  107. return (line_start);
  108. }
  109. /* Otherwise, we're done. */
  110. return (NULL);
  111. }
  112. /* Buffer only has part of a line. */
  113. if (lr->line_start > lr->buff) {
  114. /* Move a leftover fractional line to the beginning. */
  115. memmove(lr->buff, lr->line_start,
  116. lr->buff_end - lr->line_start);
  117. lr->buff_end -= lr->line_start - lr->buff;
  118. lr->line_end -= lr->line_start - lr->buff;
  119. lr->line_start = lr->buff;
  120. } else {
  121. /* Line is too big; enlarge the buffer. */
  122. new_buff_size = lr->buff_length * 2;
  123. if (new_buff_size <= lr->buff_length)
  124. lafe_errc(1, ENOMEM,
  125. "Line too long in %s", lr->pathname);
  126. lr->buff_length = new_buff_size;
  127. p = realloc(lr->buff, new_buff_size);
  128. if (p == NULL)
  129. lafe_errc(1, ENOMEM,
  130. "Line too long in %s", lr->pathname);
  131. lr->buff_end = p + (lr->buff_end - lr->buff);
  132. lr->line_end = p + (lr->line_end - lr->buff);
  133. lr->line_start = lr->buff = p;
  134. }
  135. /* Get some more data into the buffer. */
  136. bytes_wanted = lr->buff + lr->buff_length - lr->buff_end;
  137. bytes_read = fread(lr->buff_end, 1, bytes_wanted, lr->f);
  138. lr->buff_end += bytes_read;
  139. if (ferror(lr->f))
  140. lafe_errc(1, errno, "Can't read %s", lr->pathname);
  141. if (feof(lr->f)) {
  142. if (lr->f != stdin)
  143. fclose(lr->f);
  144. lr->f = NULL;
  145. }
  146. }
  147. }
  148. void
  149. lafe_line_reader_free(struct lafe_line_reader *lr)
  150. {
  151. free(lr->buff);
  152. free(lr->pathname);
  153. free(lr);
  154. }