text-lookup.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. /*
  2. * Copyright (c) 2013 Hugh Bailey <[email protected]>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include "dstr.h"
  17. #include "text-lookup.h"
  18. #include "lexer.h"
  19. #include "platform.h"
  20. /* ------------------------------------------------------------------------- */
  21. struct text_leaf {
  22. char *lookup, *value;
  23. };
  24. static inline void text_leaf_destroy(struct text_leaf *leaf)
  25. {
  26. if (leaf) {
  27. bfree(leaf->lookup);
  28. bfree(leaf->value);
  29. bfree(leaf);
  30. }
  31. }
  32. /* ------------------------------------------------------------------------- */
  33. struct text_node {
  34. struct dstr str;
  35. struct text_node *first_subnode;
  36. struct text_leaf *leaf;
  37. struct text_node *next;
  38. };
  39. static void text_node_destroy(struct text_node *node)
  40. {
  41. struct text_node *subnode;
  42. if (!node)
  43. return;
  44. subnode = node->first_subnode;
  45. while (subnode) {
  46. struct text_node *destroy_node = subnode;
  47. subnode = subnode->next;
  48. text_node_destroy(destroy_node);
  49. }
  50. dstr_free(&node->str);
  51. if (node->leaf)
  52. text_leaf_destroy(node->leaf);
  53. bfree(node);
  54. }
  55. static struct text_node *text_node_bychar(struct text_node *node, char ch)
  56. {
  57. struct text_node *subnode = node->first_subnode;
  58. while (subnode) {
  59. if (!dstr_isempty(&subnode->str) && subnode->str.array[0] == ch)
  60. return subnode;
  61. subnode = subnode->next;
  62. }
  63. return NULL;
  64. }
  65. static struct text_node *text_node_byname(struct text_node *node,
  66. const char *name)
  67. {
  68. struct text_node *subnode = node->first_subnode;
  69. while (subnode) {
  70. if (astrcmpi_n(subnode->str.array, name, subnode->str.len) == 0)
  71. return subnode;
  72. subnode = subnode->next;
  73. }
  74. return NULL;
  75. }
  76. /* ------------------------------------------------------------------------- */
  77. struct text_lookup {
  78. struct dstr language;
  79. struct text_node *top;
  80. };
  81. static void lookup_createsubnode(const char *lookup_val,
  82. struct text_leaf *leaf, struct text_node *node)
  83. {
  84. struct text_node *new = bzalloc(sizeof(struct text_node));
  85. new->leaf = leaf;
  86. new->next = node->first_subnode;
  87. dstr_copy(&new->str, lookup_val);
  88. node->first_subnode = new;
  89. }
  90. static void lookup_splitnode(const char *lookup_val, size_t len,
  91. struct text_leaf *leaf, struct text_node *node)
  92. {
  93. struct text_node *split = bzalloc(sizeof(struct text_node));
  94. dstr_copy(&split->str, node->str.array+len);
  95. split->leaf = node->leaf;
  96. split->first_subnode = node->first_subnode;
  97. node->first_subnode = split;
  98. dstr_resize(&node->str, len);
  99. if (lookup_val[len] != 0) {
  100. node->leaf = NULL;
  101. lookup_createsubnode(lookup_val+len, leaf, node);
  102. } else {
  103. node->leaf = leaf;
  104. }
  105. }
  106. static inline void lookup_replaceleaf(struct text_node *node,
  107. struct text_leaf *leaf)
  108. {
  109. text_leaf_destroy(node->leaf);
  110. node->leaf = leaf;
  111. }
  112. static void lookup_addstring(const char *lookup_val, struct text_leaf *leaf,
  113. struct text_node *node)
  114. {
  115. struct text_node *child;
  116. /* value already exists, so replace */
  117. if (!lookup_val || !*lookup_val) {
  118. lookup_replaceleaf(node, leaf);
  119. return;
  120. }
  121. child = text_node_bychar(node, *lookup_val);
  122. if (child) {
  123. size_t len;
  124. for (len = 0; len < child->str.len; len++) {
  125. char val1 = child->str.array[len],
  126. val2 = lookup_val[len];
  127. if (val1 >= 'A' && val1 <= 'Z')
  128. val1 += 0x20;
  129. if (val2 >= 'A' && val2 <= 'Z')
  130. val2 += 0x20;
  131. if (val1 != val2)
  132. break;
  133. }
  134. if (len == child->str.len) {
  135. lookup_addstring(lookup_val+len, leaf, child);
  136. return;
  137. } else {
  138. lookup_splitnode(lookup_val, len, leaf, child);
  139. }
  140. } else {
  141. lookup_createsubnode(lookup_val, leaf, node);
  142. }
  143. }
  144. static void lookup_getstringtoken(struct lexer *lex, struct strref *token)
  145. {
  146. const char *temp = lex->offset;
  147. bool was_backslash = false;
  148. while (*temp != 0 && *temp != '\n') {
  149. if (!was_backslash) {
  150. if (*temp == '\\') {
  151. was_backslash = true;
  152. } else if (*temp == '"') {
  153. temp++;
  154. break;
  155. }
  156. } else {
  157. was_backslash = false;
  158. }
  159. ++temp;
  160. }
  161. token->len += (size_t)(temp - lex->offset);
  162. if (*token->array == '"') {
  163. token->array++;
  164. token->len--;
  165. if (*(temp-1) == '"')
  166. token->len--;
  167. }
  168. lex->offset = temp;
  169. }
  170. static bool lookup_gettoken(struct lexer *lex, struct strref *str)
  171. {
  172. struct base_token temp;
  173. base_token_clear(&temp);
  174. strref_clear(str);
  175. while (lexer_getbasetoken(lex, &temp, PARSE_WHITESPACE)) {
  176. char ch = *temp.text.array;
  177. if (!str->array) {
  178. /* comments are designated with a #, and end at LF */
  179. if (ch == '#') {
  180. while(ch != '\n' && ch != 0)
  181. ch = *(++lex->offset);
  182. } else if (temp.type == BASETOKEN_WHITESPACE) {
  183. strref_copy(str, &temp.text);
  184. break;
  185. } else {
  186. strref_copy(str, &temp.text);
  187. if (ch == '"') {
  188. lookup_getstringtoken(lex, str);
  189. break;
  190. } else if (ch == '=') {
  191. break;
  192. }
  193. }
  194. } else {
  195. if (temp.type == BASETOKEN_WHITESPACE ||
  196. *temp.text.array == '=') {
  197. lex->offset -= temp.text.len;
  198. break;
  199. }
  200. if (ch == '#') {
  201. lex->offset--;
  202. break;
  203. }
  204. str->len += temp.text.len;
  205. }
  206. }
  207. return (str->len != 0);
  208. }
  209. static inline bool lookup_goto_nextline(struct lexer *p)
  210. {
  211. struct strref val;
  212. bool success = true;
  213. strref_clear(&val);
  214. while (true) {
  215. if (!lookup_gettoken(p, &val)) {
  216. success = false;
  217. break;
  218. }
  219. if (*val.array == '\n')
  220. break;
  221. }
  222. return success;
  223. }
  224. static char *convert_string(const char *str, size_t len)
  225. {
  226. struct dstr out;
  227. out.array = bstrdup_n(str, len);
  228. out.capacity = len+1;
  229. out.len = len;
  230. dstr_replace(&out, "\\n", "\n");
  231. dstr_replace(&out, "\\t", "\t");
  232. dstr_replace(&out, "\\r", "\r");
  233. return out.array;
  234. }
  235. static void lookup_addfiledata(struct text_lookup *lookup,
  236. const char *file_data)
  237. {
  238. struct lexer lex;
  239. struct strref name, value;
  240. lexer_init(&lex);
  241. lexer_start(&lex, file_data);
  242. strref_clear(&name);
  243. strref_clear(&value);
  244. while (lookup_gettoken(&lex, &name)) {
  245. struct text_leaf *leaf;
  246. bool got_eq = false;
  247. if (*name.array == '\n')
  248. continue;
  249. getval:
  250. if (!lookup_gettoken(&lex, &value))
  251. break;
  252. if (*value.array == '\n')
  253. continue;
  254. else if (!got_eq && *value.array == '=') {
  255. got_eq = true;
  256. goto getval;
  257. }
  258. leaf = bmalloc(sizeof(struct text_leaf));
  259. leaf->lookup = bstrdup_n(name.array, name.len);
  260. leaf->value = convert_string(value.array, value.len);
  261. lookup_addstring(leaf->lookup, leaf, lookup->top);
  262. if (!lookup_goto_nextline(&lex))
  263. break;
  264. }
  265. lexer_free(&lex);
  266. }
  267. static inline bool lookup_getstring(const char *lookup_val,
  268. const char **out, struct text_node *node)
  269. {
  270. struct text_node *child;
  271. char ch;
  272. if (!node)
  273. return false;
  274. child = text_node_byname(node, lookup_val);
  275. if (!child)
  276. return false;
  277. lookup_val += child->str.len;
  278. ch = *lookup_val;
  279. if (ch)
  280. return lookup_getstring(lookup_val, out, child);
  281. if (!child->leaf)
  282. return false;
  283. *out = child->leaf->value;
  284. return true;
  285. }
  286. /* ------------------------------------------------------------------------- */
  287. lookup_t text_lookup_create(const char *path)
  288. {
  289. struct text_lookup *lookup = bzalloc(sizeof(struct text_lookup));
  290. if (!text_lookup_add(lookup, path)) {
  291. bfree(lookup);
  292. lookup = NULL;
  293. }
  294. return lookup;
  295. }
  296. bool text_lookup_add(lookup_t lookup, const char *path)
  297. {
  298. struct dstr file_str;
  299. char *temp = NULL;
  300. FILE *file;
  301. file = os_fopen(path, "rb");
  302. if (!file)
  303. return false;
  304. os_fread_utf8(file, &temp);
  305. dstr_init_move_array(&file_str, temp);
  306. fclose(file);
  307. if (!file_str.array)
  308. return false;
  309. if (!lookup->top)
  310. lookup->top = bzalloc(sizeof(struct text_node));
  311. dstr_replace(&file_str, "\r", " ");
  312. lookup_addfiledata(lookup, file_str.array);
  313. dstr_free(&file_str);
  314. return true;
  315. }
  316. void text_lookup_destroy(lookup_t lookup)
  317. {
  318. if (lookup) {
  319. dstr_free(&lookup->language);
  320. text_node_destroy(lookup->top);
  321. bfree(lookup);
  322. }
  323. }
  324. bool text_lookup_getstr(lookup_t lookup, const char *lookup_val,
  325. const char **out)
  326. {
  327. return lookup_getstring(lookup_val, out, lookup->top);
  328. }