text-lookup.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. /*
  2. * Copyright (c) 2013 Hugh Bailey <[email protected]>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include <ctype.h>
  17. #include "dstr.h"
  18. #include "text-lookup.h"
  19. #include "lexer.h"
  20. #include "platform.h"
  21. /* ------------------------------------------------------------------------- */
  22. struct text_leaf {
  23. char *lookup, *value;
  24. };
  25. static inline void text_leaf_destroy(struct text_leaf *leaf)
  26. {
  27. if (leaf) {
  28. bfree(leaf->lookup);
  29. bfree(leaf->value);
  30. bfree(leaf);
  31. }
  32. }
  33. /* ------------------------------------------------------------------------- */
  34. struct text_node {
  35. struct dstr str;
  36. struct text_node *first_subnode;
  37. struct text_leaf *leaf;
  38. struct text_node *next;
  39. };
  40. static void text_node_destroy(struct text_node *node)
  41. {
  42. struct text_node *subnode;
  43. if (!node)
  44. return;
  45. subnode = node->first_subnode;
  46. while (subnode) {
  47. struct text_node *destroy_node = subnode;
  48. subnode = subnode->next;
  49. text_node_destroy(destroy_node);
  50. }
  51. dstr_free(&node->str);
  52. if (node->leaf)
  53. text_leaf_destroy(node->leaf);
  54. bfree(node);
  55. }
  56. static struct text_node *text_node_bychar(struct text_node *node, char ch)
  57. {
  58. struct text_node *subnode = node->first_subnode;
  59. while (subnode) {
  60. if (!dstr_is_empty(&subnode->str) &&
  61. subnode->str.array[0] == ch)
  62. return subnode;
  63. subnode = subnode->next;
  64. }
  65. return NULL;
  66. }
  67. static struct text_node *text_node_byname(struct text_node *node,
  68. const char *name)
  69. {
  70. struct text_node *subnode = node->first_subnode;
  71. while (subnode) {
  72. if (astrcmpi_n(subnode->str.array, name, subnode->str.len) == 0)
  73. return subnode;
  74. subnode = subnode->next;
  75. }
  76. return NULL;
  77. }
  78. /* ------------------------------------------------------------------------- */
  79. struct text_lookup {
  80. struct dstr language;
  81. struct text_node *top;
  82. };
  83. static void lookup_createsubnode(const char *lookup_val, struct text_leaf *leaf,
  84. struct text_node *node)
  85. {
  86. struct text_node *new = bzalloc(sizeof(struct text_node));
  87. new->leaf = leaf;
  88. new->next = node->first_subnode;
  89. dstr_copy(&new->str, lookup_val);
  90. node->first_subnode = new;
  91. }
  92. static void lookup_splitnode(const char *lookup_val, size_t len,
  93. struct text_leaf *leaf, struct text_node *node)
  94. {
  95. struct text_node *split = bzalloc(sizeof(struct text_node));
  96. dstr_copy(&split->str, node->str.array + len);
  97. split->leaf = node->leaf;
  98. split->first_subnode = node->first_subnode;
  99. node->first_subnode = split;
  100. dstr_resize(&node->str, len);
  101. if (lookup_val[len] != 0) {
  102. node->leaf = NULL;
  103. lookup_createsubnode(lookup_val + len, leaf, node);
  104. } else {
  105. node->leaf = leaf;
  106. }
  107. }
  108. static inline void lookup_replaceleaf(struct text_node *node,
  109. struct text_leaf *leaf)
  110. {
  111. text_leaf_destroy(node->leaf);
  112. node->leaf = leaf;
  113. }
  114. static void lookup_addstring(const char *lookup_val, struct text_leaf *leaf,
  115. struct text_node *node)
  116. {
  117. struct text_node *child;
  118. /* value already exists, so replace */
  119. if (!lookup_val || !*lookup_val) {
  120. lookup_replaceleaf(node, leaf);
  121. return;
  122. }
  123. child = text_node_bychar(node, *lookup_val);
  124. if (child) {
  125. size_t len;
  126. for (len = 0; len < child->str.len; len++) {
  127. char val1 = child->str.array[len],
  128. val2 = lookup_val[len];
  129. if (val1 != val2)
  130. break;
  131. }
  132. if (len == child->str.len) {
  133. lookup_addstring(lookup_val + len, leaf, child);
  134. return;
  135. } else {
  136. lookup_splitnode(lookup_val, len, leaf, child);
  137. }
  138. } else {
  139. lookup_createsubnode(lookup_val, leaf, node);
  140. }
  141. }
  142. static void lookup_getstringtoken(struct lexer *lex, struct strref *token)
  143. {
  144. const char *temp = lex->offset;
  145. bool was_backslash = false;
  146. while (*temp != 0 && *temp != '\n') {
  147. if (!was_backslash) {
  148. if (*temp == '\\') {
  149. was_backslash = true;
  150. } else if (*temp == '"') {
  151. temp++;
  152. break;
  153. }
  154. } else {
  155. was_backslash = false;
  156. }
  157. ++temp;
  158. }
  159. token->len += (size_t)(temp - lex->offset);
  160. if (*token->array == '"') {
  161. token->array++;
  162. token->len--;
  163. if (*(temp - 1) == '"')
  164. token->len--;
  165. }
  166. lex->offset = temp;
  167. }
  168. static bool lookup_gettoken(struct lexer *lex, struct strref *str)
  169. {
  170. struct base_token temp;
  171. base_token_clear(&temp);
  172. strref_clear(str);
  173. while (lexer_getbasetoken(lex, &temp, PARSE_WHITESPACE)) {
  174. char ch = *temp.text.array;
  175. if (!str->array) {
  176. /* comments are designated with a #, and end at LF */
  177. if (ch == '#') {
  178. while (ch != '\n' && ch != 0)
  179. ch = *(++lex->offset);
  180. } else if (temp.type == BASETOKEN_WHITESPACE) {
  181. strref_copy(str, &temp.text);
  182. break;
  183. } else {
  184. strref_copy(str, &temp.text);
  185. if (ch == '"') {
  186. lookup_getstringtoken(lex, str);
  187. break;
  188. } else if (ch == '=') {
  189. break;
  190. }
  191. }
  192. } else {
  193. if (temp.type == BASETOKEN_WHITESPACE ||
  194. *temp.text.array == '=') {
  195. lex->offset -= temp.text.len;
  196. break;
  197. }
  198. if (ch == '#') {
  199. lex->offset--;
  200. break;
  201. }
  202. str->len += temp.text.len;
  203. }
  204. }
  205. return (str->len != 0);
  206. }
  207. static inline bool lookup_goto_nextline(struct lexer *p)
  208. {
  209. struct strref val;
  210. bool success = true;
  211. strref_clear(&val);
  212. while (true) {
  213. if (!lookup_gettoken(p, &val)) {
  214. success = false;
  215. break;
  216. }
  217. if (*val.array == '\n')
  218. break;
  219. }
  220. return success;
  221. }
  222. static char *convert_string(const char *str, size_t len)
  223. {
  224. struct dstr out;
  225. out.array = bstrdup_n(str, len);
  226. out.capacity = len + 1;
  227. out.len = len;
  228. dstr_replace(&out, "\\n", "\n");
  229. dstr_replace(&out, "\\t", "\t");
  230. dstr_replace(&out, "\\r", "\r");
  231. dstr_replace(&out, "\\\"", "\"");
  232. return out.array;
  233. }
  234. static void lookup_addfiledata(struct text_lookup *lookup,
  235. const char *file_data)
  236. {
  237. struct lexer lex;
  238. struct strref name, value;
  239. lexer_init(&lex);
  240. lexer_start(&lex, file_data);
  241. strref_clear(&name);
  242. strref_clear(&value);
  243. while (lookup_gettoken(&lex, &name)) {
  244. struct text_leaf *leaf;
  245. bool got_eq = false;
  246. if (*name.array == '\n')
  247. continue;
  248. getval:
  249. if (!lookup_gettoken(&lex, &value))
  250. break;
  251. if (*value.array == '\n')
  252. continue;
  253. else if (!got_eq && *value.array == '=') {
  254. got_eq = true;
  255. goto getval;
  256. }
  257. leaf = bmalloc(sizeof(struct text_leaf));
  258. leaf->lookup = bstrdup_n(name.array, name.len);
  259. leaf->value = convert_string(value.array, value.len);
  260. for (size_t i = 0; i < name.len; i++)
  261. leaf->lookup[i] = toupper(leaf->lookup[i]);
  262. lookup_addstring(leaf->lookup, leaf, lookup->top);
  263. if (!lookup_goto_nextline(&lex))
  264. break;
  265. }
  266. lexer_free(&lex);
  267. }
  268. static inline bool lookup_getstring(const char *lookup_val, const char **out,
  269. struct text_node *node)
  270. {
  271. struct text_node *child;
  272. char ch;
  273. if (!node)
  274. return false;
  275. child = text_node_byname(node, lookup_val);
  276. if (!child)
  277. return false;
  278. lookup_val += child->str.len;
  279. ch = *lookup_val;
  280. if (ch)
  281. return lookup_getstring(lookup_val, out, child);
  282. if (!child->leaf)
  283. return false;
  284. *out = child->leaf->value;
  285. return true;
  286. }
  287. /* ------------------------------------------------------------------------- */
  288. lookup_t *text_lookup_create(const char *path)
  289. {
  290. struct text_lookup *lookup = bzalloc(sizeof(struct text_lookup));
  291. if (!text_lookup_add(lookup, path)) {
  292. bfree(lookup);
  293. lookup = NULL;
  294. }
  295. return lookup;
  296. }
  297. bool text_lookup_add(lookup_t *lookup, const char *path)
  298. {
  299. struct dstr file_str;
  300. char *temp = NULL;
  301. FILE *file;
  302. file = os_fopen(path, "rb");
  303. if (!file)
  304. return false;
  305. os_fread_utf8(file, &temp);
  306. dstr_init_move_array(&file_str, temp);
  307. fclose(file);
  308. if (!file_str.array)
  309. return false;
  310. if (!lookup->top)
  311. lookup->top = bzalloc(sizeof(struct text_node));
  312. dstr_replace(&file_str, "\r", " ");
  313. lookup_addfiledata(lookup, file_str.array);
  314. dstr_free(&file_str);
  315. return true;
  316. }
  317. void text_lookup_destroy(lookup_t *lookup)
  318. {
  319. if (lookup) {
  320. dstr_free(&lookup->language);
  321. text_node_destroy(lookup->top);
  322. bfree(lookup);
  323. }
  324. }
  325. bool text_lookup_getstr(lookup_t *lookup, const char *lookup_val,
  326. const char **out)
  327. {
  328. if (lookup)
  329. return lookup_getstring(lookup_val, out, lookup->top);
  330. return false;
  331. }