text-lookup.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. /*
  2. * Copyright (c) 2013 Hugh Bailey <[email protected]>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include "dstr.h"
  17. #include "text-lookup.h"
  18. #include "lexer.h"
  19. #include "platform.h"
  20. /* ------------------------------------------------------------------------- */
  21. struct text_leaf {
  22. char *lookup, *value;
  23. };
  24. static inline void text_leaf_destroy(struct text_leaf *leaf)
  25. {
  26. bfree(leaf->lookup);
  27. bfree(leaf->value);
  28. bfree(leaf);
  29. }
  30. /* ------------------------------------------------------------------------- */
  31. struct text_node {
  32. struct dstr str;
  33. struct text_node *first_subnode;
  34. struct text_leaf *leaf;
  35. struct text_node *next;
  36. };
  37. static void text_node_destroy(struct text_node *node)
  38. {
  39. struct text_node *subnode;
  40. if (!node)
  41. return;
  42. subnode = node->first_subnode;
  43. while (subnode) {
  44. struct text_node *destroy_node = subnode;
  45. subnode = subnode->next;
  46. text_node_destroy(destroy_node);
  47. }
  48. dstr_free(&node->str);
  49. if (node->leaf)
  50. text_leaf_destroy(node->leaf);
  51. bfree(node);
  52. }
  53. static struct text_node *text_node_bychar(struct text_node *node, char ch)
  54. {
  55. struct text_node *subnode = node->first_subnode;
  56. while (subnode) {
  57. if (!dstr_isempty(&subnode->str) && subnode->str.array[0] == ch)
  58. return subnode;
  59. subnode = subnode->next;
  60. }
  61. return NULL;
  62. }
  63. static struct text_node *text_node_byname(struct text_node *node,
  64. const char *name)
  65. {
  66. struct text_node *subnode = node->first_subnode;
  67. while (subnode) {
  68. if (astrcmpi_n(subnode->str.array, name, subnode->str.len) == 0)
  69. return subnode;
  70. subnode = subnode->next;
  71. }
  72. return NULL;
  73. }
  74. /* ------------------------------------------------------------------------- */
  75. struct text_lookup {
  76. struct dstr language;
  77. struct text_node *top;
  78. };
  79. static void lookup_createsubnode(const char *lookup_val,
  80. struct text_leaf *leaf, struct text_node *node)
  81. {
  82. struct text_node *new = bzalloc(sizeof(struct text_node));
  83. new->leaf = leaf;
  84. new->next = node->first_subnode;
  85. dstr_copy(&new->str, lookup_val);
  86. node->first_subnode = new;
  87. }
  88. static void lookup_splitnode(const char *lookup_val, size_t len,
  89. struct text_leaf *leaf, struct text_node *node)
  90. {
  91. struct text_node *split = bzalloc(sizeof(struct text_node));
  92. dstr_copy(&split->str, node->str.array+len);
  93. split->leaf = node->leaf;
  94. split->first_subnode = node->first_subnode;
  95. node->first_subnode = split;
  96. dstr_resize(&node->str, len);
  97. if (lookup_val[len] != 0) {
  98. node->leaf = NULL;
  99. lookup_createsubnode(lookup_val+len, leaf, node);
  100. } else {
  101. node->leaf = leaf;
  102. }
  103. }
  104. static inline void lookup_replaceleaf(struct text_node *node,
  105. struct text_leaf *leaf)
  106. {
  107. text_leaf_destroy(node->leaf);
  108. node->leaf = leaf;
  109. }
  110. static void lookup_addstring(const char *lookup_val, struct text_leaf *leaf,
  111. struct text_node *node)
  112. {
  113. struct text_node *child;
  114. /* value already exists, so replace */
  115. if (!lookup_val || !*lookup_val) {
  116. lookup_replaceleaf(node, leaf);
  117. return;
  118. }
  119. child = text_node_bychar(node, *lookup_val);
  120. if (child) {
  121. size_t len;
  122. for (len = 0; len < child->str.len; len++) {
  123. char val1 = child->str.array[len],
  124. val2 = lookup_val[len];
  125. if (val1 >= 'A' && val1 <= 'Z')
  126. val1 += 0x20;
  127. if (val2 >= 'A' && val2 <= 'Z')
  128. val2 += 0x20;
  129. if (val1 != val2)
  130. break;
  131. }
  132. if (len == child->str.len) {
  133. lookup_addstring(lookup_val+len, leaf, child);
  134. return;
  135. } else {
  136. lookup_splitnode(lookup_val, len, leaf, child);
  137. }
  138. } else {
  139. lookup_createsubnode(lookup_val, leaf, node);
  140. }
  141. }
  142. static void lookup_getstringtoken(struct lexer *lex, struct strref *token)
  143. {
  144. const char *temp = lex->offset;
  145. bool was_backslash = false;
  146. while (*temp != 0 && *temp != '\n') {
  147. if (!was_backslash) {
  148. if (*temp == '\\') {
  149. was_backslash = true;
  150. } else if (*temp == '"') {
  151. temp++;
  152. break;
  153. }
  154. } else {
  155. was_backslash = false;
  156. }
  157. ++temp;
  158. }
  159. token->len += (size_t)(temp - lex->offset);
  160. if (*token->array == '"') {
  161. token->array++;
  162. token->len--;
  163. if (*(temp-1) == '"')
  164. token->len--;
  165. }
  166. lex->offset = temp;
  167. }
  168. static bool lookup_gettoken(struct lexer *lex, struct strref *str)
  169. {
  170. struct base_token temp;
  171. base_token_clear(&temp);
  172. strref_clear(str);
  173. while (lexer_getbasetoken(lex, &temp, PARSE_WHITESPACE)) {
  174. char ch = *temp.text.array;
  175. if (!str->array) {
  176. /* comments are designated with a #, and end at LF */
  177. if (ch == '#') {
  178. while(ch != '\n' && ch != 0)
  179. ch = *(++lex->offset);
  180. } else if (temp.type == BASETOKEN_WHITESPACE) {
  181. strref_copy(str, &temp.text);
  182. break;
  183. } else {
  184. strref_copy(str, &temp.text);
  185. if (ch == '"') {
  186. lookup_getstringtoken(lex, str);
  187. break;
  188. } else if (ch == '=') {
  189. break;
  190. }
  191. }
  192. } else {
  193. if (temp.type == BASETOKEN_WHITESPACE ||
  194. *temp.text.array == '=') {
  195. lex->offset -= temp.text.len;
  196. break;
  197. }
  198. if (ch == '#') {
  199. lex->offset--;
  200. break;
  201. }
  202. str->len += temp.text.len;
  203. }
  204. }
  205. return (str->len != 0);
  206. }
  207. static inline bool lookup_goto_nextline(struct lexer *p)
  208. {
  209. struct strref val;
  210. bool success = true;
  211. strref_clear(&val);
  212. while (true) {
  213. if (!lookup_gettoken(p, &val)) {
  214. success = false;
  215. break;
  216. }
  217. if (*val.array == '\n')
  218. break;
  219. }
  220. return success;
  221. }
  222. static char *convert_string(const char *str, size_t len)
  223. {
  224. struct dstr out;
  225. out.array = bstrdup_n(str, len);
  226. out.capacity = len+1;
  227. out.len = len;
  228. dstr_replace(&out, "\\n", "\n");
  229. dstr_replace(&out, "\\t", "\t");
  230. dstr_replace(&out, "\\r", "\r");
  231. return out.array;
  232. }
  233. static void lookup_addfiledata(struct text_lookup *lookup,
  234. const char *file_data)
  235. {
  236. struct lexer lex;
  237. struct strref name, value;
  238. lexer_init(&lex);
  239. lexer_start(&lex, file_data);
  240. strref_clear(&name);
  241. strref_clear(&value);
  242. while (lookup_gettoken(&lex, &name)) {
  243. struct text_leaf *leaf;
  244. bool got_eq = false;
  245. if (*name.array == '\n')
  246. continue;
  247. getval:
  248. if (!lookup_gettoken(&lex, &value))
  249. break;
  250. if (*value.array == '\n')
  251. continue;
  252. else if (!got_eq && *value.array == '=') {
  253. got_eq = true;
  254. goto getval;
  255. }
  256. leaf = bmalloc(sizeof(struct text_leaf));
  257. leaf->lookup = bstrdup_n(name.array, name.len);
  258. leaf->value = convert_string(value.array, value.len);
  259. lookup_addstring(leaf->lookup, leaf, lookup->top);
  260. if (!lookup_goto_nextline(&lex))
  261. break;
  262. }
  263. lexer_free(&lex);
  264. }
  265. static inline bool lookup_getstring(const char *lookup_val,
  266. const char **out, struct text_node *node)
  267. {
  268. struct text_node *child;
  269. char ch;
  270. if (!node)
  271. return false;
  272. child = text_node_byname(node, lookup_val);
  273. if (!child)
  274. return false;
  275. lookup_val += child->str.len;
  276. ch = *lookup_val;
  277. if (ch)
  278. return lookup_getstring(lookup_val, out, child);
  279. if (!child->leaf)
  280. return false;
  281. *out = child->leaf->value;
  282. return true;
  283. }
  284. /* ------------------------------------------------------------------------- */
  285. lookup_t text_lookup_create(const char *path)
  286. {
  287. struct text_lookup *lookup = bzalloc(sizeof(struct text_lookup));
  288. if (!text_lookup_add(lookup, path)) {
  289. bfree(lookup);
  290. lookup = NULL;
  291. }
  292. return lookup;
  293. }
  294. bool text_lookup_add(lookup_t lookup, const char *path)
  295. {
  296. struct dstr file_str;
  297. char *temp = NULL;
  298. FILE *file;
  299. file = os_fopen(path, "rb");
  300. if (!file)
  301. return false;
  302. os_fread_utf8(file, &temp);
  303. dstr_init_move_array(&file_str, temp);
  304. fclose(file);
  305. if (!file_str.array)
  306. return false;
  307. if (!lookup->top)
  308. lookup->top = bzalloc(sizeof(struct text_node));
  309. dstr_replace(&file_str, "\r", " ");
  310. lookup_addfiledata(lookup, file_str.array);
  311. dstr_free(&file_str);
  312. return true;
  313. }
  314. void text_lookup_destroy(lookup_t lookup)
  315. {
  316. if (lookup) {
  317. dstr_free(&lookup->language);
  318. text_node_destroy(lookup->top);
  319. bfree(lookup);
  320. }
  321. }
  322. bool text_lookup_getstr(lookup_t lookup, const char *lookup_val,
  323. const char **out)
  324. {
  325. return lookup_getstring(lookup_val, out, lookup->top);
  326. }