cgiutil.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. /** --- BEGIN COPYRIGHT BLOCK ---
  2. * Copyright (C) 2001 Sun Microsystems, Inc. Used by permission.
  3. * Copyright (C) 2005 Red Hat, Inc.
  4. * All rights reserved.
  5. --- END COPYRIGHT BLOCK --- */
  6. /*
  7. * cgiutil.c -- CGI-related utility functions -- HTTP gateway
  8. *
  9. * Note: tihs code is derived from the extras/changepw.c code that ships
  10. * with the FastTrack 2.0 server
  11. */
  12. #include "dsgw.h"
  13. #include "dbtdsgw.h"
  14. #include <prprf.h>
  15. #include <unicode/ucnv.h>
  16. #include <unicode/ustring.h>
  17. /* globals */
  18. static char **formvars = NULL;
  19. /* functions */
  20. static char **dsgw_string_to_vec(char *in);
  21. static void
  22. dsgw_vec_convert (char** vec)
  23. /* Convert input from the charset named in it (if any) to UTF_8.
  24. Either return s, or free(s) and return the converted string.
  25. */
  26. {
  27. static const char* prefix = "charset=";
  28. const size_t prefix_len = strlen (prefix);
  29. char** v;
  30. if (vec) for (v = vec; *v; ++v) {
  31. if (!strncmp (*v, prefix, prefix_len)) {
  32. char* charset = *v + prefix_len;
  33. UConverter* converter = NULL;
  34. UErrorCode err = U_ZERO_ERROR;
  35. if ( ! is_UTF_8 (charset) && (converter = ucnv_open(charset, &err)) &&
  36. (err == U_ZERO_ERROR) ) {
  37. for (v = vec; *v; ++v) {
  38. char* s = strchr (*v, '=');
  39. if (s != NULL) {
  40. char *t = NULL;
  41. const size_t nlen = (++s) - *v;
  42. const size_t slen = strlen (s);
  43. size_t tlen = 0;
  44. size_t reallen = 0;
  45. int result;
  46. if (ucnv_getMaxCharSize(converter) == 1) {
  47. tlen = slen + 2; /* best case - ascii or other 7/8 bit */
  48. } else { /* assume worst case utf8 - each char is 3 bytes */
  49. tlen = (slen * 3) + 2;
  50. }
  51. do {
  52. char *tptr;
  53. size_t realSlen = 0;
  54. err = U_ZERO_ERROR;
  55. if (t) {
  56. t = dsgw_ch_realloc(t, nlen + tlen);
  57. } else {
  58. t = dsgw_ch_malloc(nlen + tlen);
  59. }
  60. tptr = t + nlen;
  61. /* copy the converted characters into t after the '=', and
  62. leave room for the trailing 0 */
  63. result = dsgw_convert(DSGW_TO_UTF8, converter,
  64. &tptr, (tlen - nlen - 1), &reallen,
  65. s, slen, &realSlen, &err);
  66. tlen += slen; /* if failed, make more room */
  67. } while (result == 0);
  68. if ((result == 1) && (err == U_ZERO_ERROR)) {
  69. memcpy (t, *v, nlen);
  70. t[nlen+reallen] = '\0';
  71. free (*v);
  72. *v = t;
  73. } else {
  74. free (t);
  75. }
  76. ucnv_reset (converter); /* back to initial shift state */
  77. }
  78. }
  79. ucnv_close (converter);
  80. }
  81. if (U_FAILURE(err)) {
  82. dsgw_error(DSGW_ERR_CHARSET_NOT_SUPPORTED, charset, 0, 0, 0);
  83. }
  84. break;
  85. }
  86. }
  87. }
  88. /* Read in the variables from stdin, unescape them, and then put them in
  89. * the static vector.
  90. *
  91. * Return 0 if all goes well; DSGW error code otherwise
  92. */
  93. int
  94. dsgw_post_begin(FILE *in)
  95. {
  96. char *ct, *vars = NULL, *tmp = NULL;
  97. int cl;
  98. if (( ct = getenv( "CONTENT_TYPE" )) == NULL ||
  99. strcasecmp( ct, "application/x-www-form-urlencoded" ) != 0 ||
  100. ( tmp = getenv( "CONTENT_LENGTH" )) == NULL ) {
  101. return( DSGW_ERR_BADFORMDATA );
  102. }
  103. cl = atoi(tmp);
  104. vars = (char *)dsgw_ch_malloc(cl+1);
  105. if ( fread(vars, 1, cl, in) != cl ) {
  106. return( DSGW_ERR_BADFORMDATA );
  107. }
  108. vars[cl] = '\0';
  109. #ifdef DSGW_DEBUG
  110. dsgw_log ("vars=\"%s\"\n", vars);
  111. #endif
  112. formvars = dsgw_string_to_vec (vars);
  113. free( vars );
  114. dsgw_vec_convert (formvars);
  115. #ifdef DSGW_DEBUG
  116. dsgw_logstringarray( "formvars", formvars );
  117. if (0) {
  118. char** var = formvars;
  119. if (var) {
  120. printf ("Content-type: text/html;charset=UTF-8\n\n<HTML><BODY>\n");
  121. for (; *var; ++var) {
  122. printf ("%s<br>\n", *var);
  123. }
  124. printf ("</BODY></HTML>\n");
  125. exit (1);
  126. }
  127. }
  128. #endif
  129. return( 0 );
  130. }
  131. /* Unescape the %xx variables as they're sent in. */
  132. void
  133. dsgw_form_unescape(char *str)
  134. {
  135. register int x = 0, y = 0;
  136. int l = strlen(str);
  137. char digit;
  138. while(x < l) {
  139. if((str[x] == '%') && (x < (l - 2))) {
  140. ++x;
  141. digit = (str[x] >= 'A' ?
  142. ((str[x] & 0xdf) - 'A')+10 : (str[x] - '0'));
  143. digit *= 16;
  144. ++x;
  145. digit += (str[x] >= 'A' ?
  146. ((str[x] & 0xdf) - 'A')+10 : (str[x] - '0'));
  147. str[y] = digit;
  148. }
  149. else if(str[x] == '+') {
  150. str[y] = ' ';
  151. } else {
  152. str[y] = str[x];
  153. }
  154. x++;
  155. y++;
  156. }
  157. str[y] = '\0';
  158. }
  159. /* Return the value of a POSTed variable, or NULL if none was sent. */
  160. char *
  161. dsgw_get_cgi_var(char *varname, int required)
  162. {
  163. register int x = 0;
  164. int len = strlen(varname);
  165. char *ans = NULL;
  166. while(formvars != NULL && formvars[x]) {
  167. /* We want to get rid of the =, so len, len+1 */
  168. if((!strncmp(formvars[x], varname, len)) &&
  169. (*(formvars[x]+len) == '=')) {
  170. ans = dsgw_ch_strdup(formvars[x] + len + 1);
  171. if(!strcmp(ans, "")) {
  172. free(ans);
  173. ans = NULL;
  174. }
  175. break;
  176. } else
  177. x++;
  178. }
  179. if ( required == DSGW_CGIVAR_REQUIRED && ans == NULL ) {
  180. char errbuf[ 256 ];
  181. PR_snprintf( errbuf, 256,
  182. XP_GetClientStr(DBT_missingFormDataElement100s_), varname );
  183. dsgw_error( DSGW_ERR_BADFORMDATA, errbuf, DSGW_ERROPT_EXIT, 0, NULL );
  184. }
  185. return ans;
  186. }
  187. /*
  188. * Return integer equivalent of POSTed value. If no variable POSTed,
  189. * return defval.
  190. */
  191. int
  192. dsgw_get_int_var( char *varname, int required, int defval )
  193. {
  194. char *val;
  195. int rc;
  196. if (( val = dsgw_get_cgi_var( varname, required )) == NULL ) {
  197. rc = defval;
  198. } else {
  199. rc = atoi( val );
  200. free( val );
  201. }
  202. return( rc );
  203. }
  204. /*
  205. * Return non-zero if POSTed variable is "true" or "yes". If !required
  206. * and no variable POSTed, return defval.
  207. */
  208. int
  209. dsgw_get_boolean_var( char *varname, int required, int defval )
  210. {
  211. char *val;
  212. int rc;
  213. if (( val = dsgw_get_cgi_var( varname, required )) == NULL ) {
  214. rc = defval;
  215. } else {
  216. rc = ( strcasecmp( val, "true" ) == 0 ||
  217. strcasecmp( val, "yes" ) == 0 );
  218. free( val );
  219. }
  220. return( rc );
  221. }
  222. /*
  223. * If a CGI variable named "varname_escaped" was POST'd, unescape it and
  224. * return its value.
  225. * Otherwise if "varname" is not NULL and a CGI variable called "varname"
  226. * was POST'd, return its value.
  227. * Otherwise return NULL.
  228. */
  229. char *
  230. dsgw_get_escaped_cgi_var( char *varname_escaped, char *varname, int required )
  231. {
  232. char *val;
  233. if (( val = dsgw_get_cgi_var( varname_escaped,
  234. ( varname == NULL ) ? required: DSGW_CGIVAR_OPTIONAL )) != NULL ) {
  235. dsgw_form_unescape( val );
  236. } else if ( varname != NULL ) {
  237. val = dsgw_get_cgi_var( varname, required );
  238. }
  239. return( val );
  240. }
  241. /* Convert the input from stdin to a usable variable vector. */
  242. static char **
  243. dsgw_string_to_vec(char *in)
  244. {
  245. char **ans;
  246. int vars = 0;
  247. register int x = 0;
  248. char *tmp;
  249. while(in[x])
  250. if(in[x++]=='=')
  251. vars++;
  252. ans = (char **) dsgw_ch_malloc((sizeof(char *)) * (vars+1));
  253. x=0;
  254. /* strtok() is not MT safe, but it is okay to call here because it is used in monothreaded env */
  255. tmp = strtok(in, "&");
  256. ans[x]=dsgw_ch_strdup(tmp);
  257. dsgw_form_unescape(ans[x++]);
  258. while((tmp = strtok(NULL, "&"))) {
  259. if ( strchr( tmp, '=' ) == NULL ) {
  260. break;
  261. }
  262. ans[x] = dsgw_ch_strdup(tmp);
  263. dsgw_form_unescape(ans[x++]);
  264. }
  265. ans[x] = NULL;
  266. return(ans);
  267. }
  268. /*
  269. * Step through all the CGI POSTed variables. A malloc'd copy of the variable
  270. * name is returned and *valuep is set to point to the value (not malloc'd).
  271. * If there are no more variables, NULL is returned.
  272. *
  273. * The first time this is called, *indexp should be zero. On subsequent
  274. * calls, pass the same indexp as on the first call.
  275. */
  276. char *
  277. dsgw_next_cgi_var( int *indexp, char **valuep )
  278. {
  279. char *name;
  280. int namelen;
  281. if ( formvars == NULL || formvars[ *indexp ] == NULL ) {
  282. return( NULL );
  283. }
  284. if (( *valuep = strchr( formvars[ *indexp ], '=' )) == NULL ) {
  285. namelen = strlen( formvars[ *indexp ] );
  286. } else {
  287. namelen = *valuep - formvars[ *indexp ];
  288. ++(*valuep);
  289. }
  290. name = dsgw_ch_malloc( namelen + 1 );
  291. memcpy( name, formvars[ *indexp ], namelen );
  292. name[ namelen ] = '\0';
  293. *indexp += 1;
  294. return( name );
  295. }
  296. /*
  297. * converts a buffer of characters to/from UTF8 from/to a native charset
  298. * the given converter will handle the native charset
  299. * returns 0 if not all of source was converted, 1 if all of source
  300. * was converted, -1 upon error
  301. * all of source will be converted if there is enough room in dest to contain
  302. * the entire conversion, or if dest is null and we are malloc'ing space for dest
  303. */
  304. int
  305. dsgw_convert(
  306. int direction, /* false for native->utf8, true for utf8->native */
  307. UConverter *nativeConv, /* convert from/to native charset */
  308. char **dest, /* *dest is the destination buffer - if *dest == NULL, it will be malloced */
  309. size_t destSize, /* size of dest buffer (ignored if *dest == NULL) */
  310. size_t *nDest, /* number of chars written to dest */
  311. const char *source, /* source buffer to convert - either in native encoding (from) or utf8 (to) */
  312. size_t sourceSize, /* size of source buffer - if 0, assume source is NULL terminated */
  313. size_t *nSource, /* number of chars read from source buffer */
  314. UErrorCode *pErrorCode /* will be reset each time through */
  315. )
  316. {
  317. #define CHUNK_SIZE 1024
  318. UChar pivotBuffer[CHUNK_SIZE];
  319. UChar *pivot, *pivot2;
  320. static UConverter *utf8Converter = NULL;
  321. UConverter *inConverter, *outConverter;
  322. char *myDest;
  323. const char *mySource;
  324. const char *destLimit;
  325. const char *sourceLimit;
  326. int destAlloc = 0; /* set to true if we allocated *dest */
  327. *pErrorCode = U_ZERO_ERROR;
  328. if(sourceSize<0 || source==NULL || nDest==NULL || nSource==NULL)
  329. {
  330. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  331. return -1;
  332. }
  333. *nSource = 0;
  334. *nDest = 0;
  335. /* if source size is 0, assume source is null terminated and use strlen */
  336. if(sourceSize==0) {
  337. sourceSize = strlen(source);
  338. }
  339. /* create the converters */
  340. if (!utf8Converter) {
  341. utf8Converter = ucnv_open(UNICODE_ENCODING_UTF_8, pErrorCode);
  342. if(U_FAILURE(*pErrorCode)) {
  343. return -1;
  344. }
  345. }
  346. /* reset utf8Converter if done or error */
  347. if (direction) {
  348. inConverter = utf8Converter; /* source is utf8 */
  349. outConverter = nativeConv; /* dest is native charset */
  350. } else {
  351. inConverter = nativeConv; /* source is native charset */
  352. outConverter = utf8Converter; /* dest is utf8 */
  353. }
  354. /* if dest is NULL, allocate space for it - may be reallocated later */
  355. if (!*dest) {
  356. /* good approximation of size is n chars in source * max dest char size */
  357. destSize = ucnv_getMaxCharSize(outConverter) * sourceSize;
  358. *dest = dsgw_ch_malloc(destSize);
  359. destAlloc = 1;
  360. }
  361. /* set up the other variables */
  362. mySource = source;
  363. sourceLimit = source + sourceSize;
  364. pivot = pivot2 = pivotBuffer;
  365. myDest = *dest;
  366. destLimit = *dest + destSize;
  367. /*
  368. * loops until the input buffer is completely consumed
  369. * or an error is encountered;
  370. * first we convert from inConverter codepage to Unicode
  371. * then from Unicode to outConverter codepage
  372. */
  373. do {
  374. pivot = pivotBuffer;
  375. ucnv_toUnicode(inConverter,
  376. &pivot, pivotBuffer + CHUNK_SIZE,
  377. &mySource, sourceLimit,
  378. NULL,
  379. TRUE,
  380. pErrorCode);
  381. /* U_BUFFER_OVERFLOW_ERROR only means that the pivot buffer is full */
  382. if(U_SUCCESS(*pErrorCode) || (*pErrorCode == U_BUFFER_OVERFLOW_ERROR)) {
  383. pivot2 = pivotBuffer;
  384. /* convert and write bytes from the pivot buffer to the dest -
  385. if dest is allocated and we run out of space in dest, grow
  386. dest and try again - otherwise, just bail out and let the
  387. caller know that their dest buffer is full and they need
  388. to try again */
  389. do {
  390. *pErrorCode = U_ZERO_ERROR;
  391. ucnv_fromUnicode(outConverter,
  392. &myDest, destLimit,
  393. (const UChar **)&pivot2, pivot,
  394. NULL,
  395. (UBool)(mySource == sourceLimit),
  396. pErrorCode);
  397. /* we overflowed dest and dest is allocated, so let's increase
  398. the dest size */
  399. if ((*pErrorCode == U_BUFFER_OVERFLOW_ERROR) && destAlloc) {
  400. /* figure out where myDest was pointing */
  401. size_t myDestOffset = myDest - *dest;
  402. /* probably don't need this much more room . . . */
  403. destSize += CHUNK_SIZE;
  404. /* realloc *dest for new size */
  405. *dest = dsgw_ch_realloc(*dest, destSize);
  406. /* reset myDest in new *dest */
  407. myDest = *dest + myDestOffset;
  408. /* set new destLimit */
  409. destLimit = *dest + destSize;
  410. } else {
  411. break; /* skip it */
  412. }
  413. } while(*pErrorCode == U_BUFFER_OVERFLOW_ERROR);
  414. /*
  415. * If this overflows the fixed size dest, then we must stop
  416. * converting and return what we already have
  417. * in this case, pErrorCode will be buffer overflow error because
  418. * we have overflowed the dest buffer
  419. * the outer while loop will break because !U_SUCCESS
  420. */
  421. }
  422. } while(U_SUCCESS(*pErrorCode) && source != sourceLimit);
  423. *nSource = mySource - source; /* n chars read from source */
  424. *nDest = myDest - *dest; /* n chars written to dest */
  425. if (U_SUCCESS(*pErrorCode) && source == sourceLimit) {
  426. /* reset internal converter */
  427. ucnv_reset(utf8Converter);
  428. return 1; /* converted entire string */
  429. }
  430. if (source != sourceLimit) {
  431. /* not done with conversion yet */
  432. /* no reset here - preserve state for next call */
  433. return 0;
  434. }
  435. /* error */
  436. ucnv_reset(utf8Converter);
  437. return -1;
  438. }