text-lookup.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. /*
  2. * Copyright (c) 2013 Hugh Bailey <[email protected]>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include "dstr.h"
  17. #include "text-lookup.h"
  18. #include "lexer.h"
  19. #include "platform.h"
  20. /* ------------------------------------------------------------------------- */
  21. struct text_leaf {
  22. char *lookup, *value;
  23. };
  24. static inline void text_leaf_destroy(struct text_leaf *leaf)
  25. {
  26. if (leaf) {
  27. bfree(leaf->lookup);
  28. bfree(leaf->value);
  29. bfree(leaf);
  30. }
  31. }
  32. /* ------------------------------------------------------------------------- */
  33. struct text_node {
  34. struct dstr str;
  35. struct text_node *first_subnode;
  36. struct text_leaf *leaf;
  37. struct text_node *next;
  38. };
  39. static void text_node_destroy(struct text_node *node)
  40. {
  41. struct text_node *subnode;
  42. if (!node)
  43. return;
  44. subnode = node->first_subnode;
  45. while (subnode) {
  46. struct text_node *destroy_node = subnode;
  47. subnode = subnode->next;
  48. text_node_destroy(destroy_node);
  49. }
  50. dstr_free(&node->str);
  51. if (node->leaf)
  52. text_leaf_destroy(node->leaf);
  53. bfree(node);
  54. }
  55. static struct text_node *text_node_bychar(struct text_node *node, char ch)
  56. {
  57. struct text_node *subnode = node->first_subnode;
  58. while (subnode) {
  59. if (!dstr_is_empty(&subnode->str) &&
  60. subnode->str.array[0] == ch)
  61. return subnode;
  62. subnode = subnode->next;
  63. }
  64. return NULL;
  65. }
  66. static struct text_node *text_node_byname(struct text_node *node,
  67. const char *name)
  68. {
  69. struct text_node *subnode = node->first_subnode;
  70. while (subnode) {
  71. if (astrcmpi_n(subnode->str.array, name, subnode->str.len) == 0)
  72. return subnode;
  73. subnode = subnode->next;
  74. }
  75. return NULL;
  76. }
  77. /* ------------------------------------------------------------------------- */
  78. struct text_lookup {
  79. struct dstr language;
  80. struct text_node *top;
  81. };
  82. static void lookup_createsubnode(const char *lookup_val, struct text_leaf *leaf,
  83. struct text_node *node)
  84. {
  85. struct text_node *new = bzalloc(sizeof(struct text_node));
  86. new->leaf = leaf;
  87. new->next = node->first_subnode;
  88. dstr_copy(&new->str, lookup_val);
  89. node->first_subnode = new;
  90. }
  91. static void lookup_splitnode(const char *lookup_val, size_t len,
  92. struct text_leaf *leaf, struct text_node *node)
  93. {
  94. struct text_node *split = bzalloc(sizeof(struct text_node));
  95. dstr_copy(&split->str, node->str.array + len);
  96. split->leaf = node->leaf;
  97. split->first_subnode = node->first_subnode;
  98. node->first_subnode = split;
  99. dstr_resize(&node->str, len);
  100. if (lookup_val[len] != 0) {
  101. node->leaf = NULL;
  102. lookup_createsubnode(lookup_val + len, leaf, node);
  103. } else {
  104. node->leaf = leaf;
  105. }
  106. }
  107. static inline void lookup_replaceleaf(struct text_node *node,
  108. struct text_leaf *leaf)
  109. {
  110. text_leaf_destroy(node->leaf);
  111. node->leaf = leaf;
  112. }
  113. static void lookup_addstring(const char *lookup_val, struct text_leaf *leaf,
  114. struct text_node *node)
  115. {
  116. struct text_node *child;
  117. /* value already exists, so replace */
  118. if (!lookup_val || !*lookup_val) {
  119. lookup_replaceleaf(node, leaf);
  120. return;
  121. }
  122. child = text_node_bychar(node, *lookup_val);
  123. if (child) {
  124. size_t len;
  125. for (len = 0; len < child->str.len; len++) {
  126. char val1 = child->str.array[len],
  127. val2 = lookup_val[len];
  128. if (val1 >= 'A' && val1 <= 'Z')
  129. val1 += 0x20;
  130. if (val2 >= 'A' && val2 <= 'Z')
  131. val2 += 0x20;
  132. if (val1 != val2)
  133. break;
  134. }
  135. if (len == child->str.len) {
  136. lookup_addstring(lookup_val + len, leaf, child);
  137. return;
  138. } else {
  139. lookup_splitnode(lookup_val, len, leaf, child);
  140. }
  141. } else {
  142. lookup_createsubnode(lookup_val, leaf, node);
  143. }
  144. }
  145. static void lookup_getstringtoken(struct lexer *lex, struct strref *token)
  146. {
  147. const char *temp = lex->offset;
  148. bool was_backslash = false;
  149. while (*temp != 0 && *temp != '\n') {
  150. if (!was_backslash) {
  151. if (*temp == '\\') {
  152. was_backslash = true;
  153. } else if (*temp == '"') {
  154. temp++;
  155. break;
  156. }
  157. } else {
  158. was_backslash = false;
  159. }
  160. ++temp;
  161. }
  162. token->len += (size_t)(temp - lex->offset);
  163. if (*token->array == '"') {
  164. token->array++;
  165. token->len--;
  166. if (*(temp - 1) == '"')
  167. token->len--;
  168. }
  169. lex->offset = temp;
  170. }
  171. static bool lookup_gettoken(struct lexer *lex, struct strref *str)
  172. {
  173. struct base_token temp;
  174. base_token_clear(&temp);
  175. strref_clear(str);
  176. while (lexer_getbasetoken(lex, &temp, PARSE_WHITESPACE)) {
  177. char ch = *temp.text.array;
  178. if (!str->array) {
  179. /* comments are designated with a #, and end at LF */
  180. if (ch == '#') {
  181. while (ch != '\n' && ch != 0)
  182. ch = *(++lex->offset);
  183. } else if (temp.type == BASETOKEN_WHITESPACE) {
  184. strref_copy(str, &temp.text);
  185. break;
  186. } else {
  187. strref_copy(str, &temp.text);
  188. if (ch == '"') {
  189. lookup_getstringtoken(lex, str);
  190. break;
  191. } else if (ch == '=') {
  192. break;
  193. }
  194. }
  195. } else {
  196. if (temp.type == BASETOKEN_WHITESPACE ||
  197. *temp.text.array == '=') {
  198. lex->offset -= temp.text.len;
  199. break;
  200. }
  201. if (ch == '#') {
  202. lex->offset--;
  203. break;
  204. }
  205. str->len += temp.text.len;
  206. }
  207. }
  208. return (str->len != 0);
  209. }
  210. static inline bool lookup_goto_nextline(struct lexer *p)
  211. {
  212. struct strref val;
  213. bool success = true;
  214. strref_clear(&val);
  215. while (true) {
  216. if (!lookup_gettoken(p, &val)) {
  217. success = false;
  218. break;
  219. }
  220. if (*val.array == '\n')
  221. break;
  222. }
  223. return success;
  224. }
  225. static char *convert_string(const char *str, size_t len)
  226. {
  227. struct dstr out;
  228. out.array = bstrdup_n(str, len);
  229. out.capacity = len + 1;
  230. out.len = len;
  231. dstr_replace(&out, "\\n", "\n");
  232. dstr_replace(&out, "\\t", "\t");
  233. dstr_replace(&out, "\\r", "\r");
  234. dstr_replace(&out, "\\\"", "\"");
  235. return out.array;
  236. }
  237. static void lookup_addfiledata(struct text_lookup *lookup,
  238. const char *file_data)
  239. {
  240. struct lexer lex;
  241. struct strref name, value;
  242. lexer_init(&lex);
  243. lexer_start(&lex, file_data);
  244. strref_clear(&name);
  245. strref_clear(&value);
  246. while (lookup_gettoken(&lex, &name)) {
  247. struct text_leaf *leaf;
  248. bool got_eq = false;
  249. if (*name.array == '\n')
  250. continue;
  251. getval:
  252. if (!lookup_gettoken(&lex, &value))
  253. break;
  254. if (*value.array == '\n')
  255. continue;
  256. else if (!got_eq && *value.array == '=') {
  257. got_eq = true;
  258. goto getval;
  259. }
  260. leaf = bmalloc(sizeof(struct text_leaf));
  261. leaf->lookup = bstrdup_n(name.array, name.len);
  262. leaf->value = convert_string(value.array, value.len);
  263. lookup_addstring(leaf->lookup, leaf, lookup->top);
  264. if (!lookup_goto_nextline(&lex))
  265. break;
  266. }
  267. lexer_free(&lex);
  268. }
  269. static inline bool lookup_getstring(const char *lookup_val, const char **out,
  270. struct text_node *node)
  271. {
  272. struct text_node *child;
  273. char ch;
  274. if (!node)
  275. return false;
  276. child = text_node_byname(node, lookup_val);
  277. if (!child)
  278. return false;
  279. lookup_val += child->str.len;
  280. ch = *lookup_val;
  281. if (ch)
  282. return lookup_getstring(lookup_val, out, child);
  283. if (!child->leaf)
  284. return false;
  285. *out = child->leaf->value;
  286. return true;
  287. }
  288. /* ------------------------------------------------------------------------- */
  289. lookup_t *text_lookup_create(const char *path)
  290. {
  291. struct text_lookup *lookup = bzalloc(sizeof(struct text_lookup));
  292. if (!text_lookup_add(lookup, path)) {
  293. bfree(lookup);
  294. lookup = NULL;
  295. }
  296. return lookup;
  297. }
  298. bool text_lookup_add(lookup_t *lookup, const char *path)
  299. {
  300. struct dstr file_str;
  301. char *temp = NULL;
  302. FILE *file;
  303. file = os_fopen(path, "rb");
  304. if (!file)
  305. return false;
  306. os_fread_utf8(file, &temp);
  307. dstr_init_move_array(&file_str, temp);
  308. fclose(file);
  309. if (!file_str.array)
  310. return false;
  311. if (!lookup->top)
  312. lookup->top = bzalloc(sizeof(struct text_node));
  313. dstr_replace(&file_str, "\r", " ");
  314. lookup_addfiledata(lookup, file_str.array);
  315. dstr_free(&file_str);
  316. return true;
  317. }
  318. void text_lookup_destroy(lookup_t *lookup)
  319. {
  320. if (lookup) {
  321. dstr_free(&lookup->language);
  322. text_node_destroy(lookup->top);
  323. bfree(lookup);
  324. }
  325. }
  326. bool text_lookup_getstr(lookup_t *lookup, const char *lookup_val,
  327. const char **out)
  328. {
  329. if (lookup)
  330. return lookup_getstring(lookup_val, out, lookup->top);
  331. return false;
  332. }