fileurl.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. /** BEGIN COPYRIGHT BLOCK
  2. * This Program is free software; you can redistribute it and/or modify it under
  3. * the terms of the GNU General Public License as published by the Free Software
  4. * Foundation; version 2 of the License.
  5. *
  6. * This Program is distributed in the hope that it will be useful, but WITHOUT
  7. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  8. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  9. *
  10. * You should have received a copy of the GNU General Public License along with
  11. * this Program; if not, write to the Free Software Foundation, Inc., 59 Temple
  12. * Place, Suite 330, Boston, MA 02111-1307 USA.
  13. *
  14. * In addition, as a special exception, Red Hat, Inc. gives You the additional
  15. * right to link the code of this Program with code not covered under the GNU
  16. * General Public License ("Non-GPL Code") and to distribute linked combinations
  17. * including the two, subject to the limitations in this paragraph. Non-GPL Code
  18. * permitted under this exception must only link to the code of this Program
  19. * through those well defined interfaces identified in the file named EXCEPTION
  20. * found in the source code files (the "Approved Interfaces"). The files of
  21. * Non-GPL Code may instantiate templates or use macros or inline functions from
  22. * the Approved Interfaces without causing the resulting work to be covered by
  23. * the GNU General Public License. Only Red Hat, Inc. may make changes or
  24. * additions to the list of Approved Interfaces. You must obey the GNU General
  25. * Public License in all respects for all of the Program code and other code used
  26. * in conjunction with the Program except the Non-GPL Code covered by this
  27. * exception. If you modify this file, you may extend this exception to your
  28. * version of the file, but you are not obligated to do so. If you do not wish to
  29. * provide this exception without modification, you must delete this exception
  30. * statement from your version and license this file solely under the GPL without
  31. * exception.
  32. *
  33. *
  34. * Copyright (C) 2001 Sun Microsystems, Inc. Used by permission.
  35. * Copyright (C) 2005 Red Hat, Inc.
  36. * All rights reserved.
  37. * END COPYRIGHT BLOCK **/
  38. #ifdef HAVE_CONFIG_H
  39. # include <config.h>
  40. #endif
  41. /*
  42. * LDIF tools fileurl.c -- functions for handling file URLs.
  43. * Used by ldif_parse_line.
  44. */
  45. #include <stdio.h>
  46. #include <stdlib.h>
  47. #include <string.h>
  48. #include <errno.h>
  49. #include "fileurl.h"
  50. #include <ctype.h> /* for isalpha() */
  51. static int str_starts_with( char *s, char *prefix );
  52. static void hex_unescape( char *s );
  53. static int unhex( char c );
  54. static void strcpy_escaped_and_convert( char *s1, char *s2 );
  55. /*
  56. * Convert a file URL to a local path.
  57. *
  58. * If successful, LDIF_FILEURL_SUCCESS is returned and *localpathp is
  59. * set point to an allocated string. If not, an different LDIF_FILEURL_
  60. * error code is returned.
  61. *
  62. * See RFCs 1738 and 2396 for a specification for file URLs... but
  63. * Netscape Navigator seems to be a bit more lenient in what it will
  64. * accept, especially on Windows).
  65. *
  66. * This function parses file URLs of these three forms:
  67. *
  68. * file:///path
  69. * file:/path
  70. * file://localhost/path
  71. * file://host/path (rejected with a ...NONLOCAL error)
  72. *
  73. * On Windows, we convert leading drive letters of the form C| to C:
  74. * and if a drive letter is present we strip off the slash that precedes
  75. * path. Otherwise, the leading slash is returned.
  76. *
  77. */
  78. int
  79. ldif_fileurl2path( char *fileurl, char **localpathp )
  80. {
  81. char *path;
  82. /*
  83. * Make sure this is a file name or URL we can handle.
  84. */
  85. if ( *fileurl == '/' ||
  86. ( isalpha( fileurl[0] ) && ( fileurl[1] == '|' || fileurl[1] == ':' ) ) ) {
  87. path = fileurl;
  88. goto path_ready;
  89. } else if ( !str_starts_with( fileurl, "file:" )) {
  90. return( LDIF_FILEURL_NOTAFILEURL );
  91. }
  92. path = fileurl + 5; /* skip past "file:" scheme prefix */
  93. if ( *path != '/' ) {
  94. return( LDIF_FILEURL_MISSINGPATH );
  95. }
  96. ++path; /* skip past '/' at end of "file:/" */
  97. if ( *path == '/' ) {
  98. ++path; /* remainder is now host/path or /path */
  99. if ( *path != '/' ) {
  100. /*
  101. * Make sure it is for the local host.
  102. */
  103. if ( str_starts_with( path, "localhost/" )) {
  104. path += 9;
  105. } else {
  106. return( LDIF_FILEURL_NONLOCAL );
  107. }
  108. }
  109. } else { /* URL is of the form file:/path */
  110. --path;
  111. }
  112. /*
  113. * The remainder is now of the form /path. On Windows, skip past the
  114. * leading slash if a drive letter is present.
  115. */
  116. #ifdef _WIN32
  117. if ( isalpha( path[1] ) && ( path[2] == '|' || path[2] == ':' )) {
  118. ++path;
  119. }
  120. #endif /* _WIN32 */
  121. path_ready:
  122. /*
  123. * Duplicate the path so we can safely alter it.
  124. * Unescape any %HH sequences.
  125. */
  126. if (( path = strdup( path )) == NULL ) {
  127. return( LDIF_FILEURL_NOMEMORY );
  128. }
  129. hex_unescape( path );
  130. #ifdef _WIN32
  131. /*
  132. * Convert forward slashes to backslashes for Windows. Also,
  133. * if we see a drive letter / vertical bar combination (e.g., c|)
  134. * at the beginning of the path, replace the '|' with a ':'.
  135. */
  136. {
  137. char *p;
  138. for ( p = path; *p != '\0'; ++p ) {
  139. if ( *p == '/' ) {
  140. *p = '\\';
  141. }
  142. }
  143. }
  144. if ( isalpha( path[0] ) && path[1] == '|' ) {
  145. path[1] = ':';
  146. }
  147. #endif /* _WIN32 */
  148. *localpathp = path;
  149. return( LDIF_FILEURL_SUCCESS );
  150. }
  151. /*
  152. * Convert a local path to a file URL.
  153. *
  154. * If successful, LDIF_FILEURL_SUCCESS is returned and *urlp is
  155. * set point to an allocated string. If not, an different LDIF_FILEURL_
  156. * error code is returned. At present, the only possible error is
  157. * LDIF_FILEURL_NOMEMORY.
  158. *
  159. * This function produces file URLs of the form file:path.
  160. *
  161. * On Windows, we convert leading drive letters to C|.
  162. *
  163. */
  164. int
  165. ldif_path2fileurl( char *path, char **urlp )
  166. {
  167. char *p, *url, *prefix ="file:";
  168. if ( NULL == path ) {
  169. path = "/";
  170. }
  171. /*
  172. * Allocate space for the URL, taking into account that path may
  173. * expand during the hex escaping process.
  174. */
  175. if (( url = malloc( strlen( prefix ) + 3 * strlen( path ) + 1 )) == NULL ) {
  176. return( LDIF_FILEURL_NOMEMORY );
  177. }
  178. strcpy( url, prefix );
  179. p = url + strlen( prefix );
  180. #ifdef _WIN32
  181. /*
  182. * On Windows, convert leading drive letters (e.g., C:) to the correct URL
  183. * syntax (e.g., C|).
  184. */
  185. if ( isalpha( path[0] ) && path[1] == ':' ) {
  186. *p++ = path[0];
  187. *p++ = '|';
  188. path += 2;
  189. *p = '\0';
  190. }
  191. #endif /* _WIN32 */
  192. /*
  193. * Append the path, encoding any URL-special characters using the %HH
  194. * convention.
  195. * On Windows, convert backwards slashes in the path to forward ones.
  196. */
  197. strcpy_escaped_and_convert( p, path );
  198. *urlp = url;
  199. return( LDIF_FILEURL_SUCCESS );
  200. }
  201. /*
  202. * Return a non-zero value if the string s begins with prefix and zero if not.
  203. */
  204. static int
  205. str_starts_with( char *s, char *prefix )
  206. {
  207. size_t prefix_len;
  208. if ( s == NULL || prefix == NULL ) {
  209. return( 0 );
  210. }
  211. prefix_len = strlen( prefix );
  212. if ( strlen( s ) < prefix_len ) {
  213. return( 0 );
  214. }
  215. return( strncmp( s, prefix, prefix_len ) == 0 );
  216. }
  217. /*
  218. * Remove URL hex escapes from s... done in place. The basic concept for
  219. * this routine is borrowed from the WWW library HTUnEscape() routine.
  220. *
  221. */
  222. static void
  223. hex_unescape( char *s )
  224. {
  225. char *p;
  226. for ( p = s; *s != '\0'; ++s ) {
  227. if ( *s == '%' ) {
  228. if ( *++s != '\0' ) {
  229. *p = unhex( *s ) << 4;
  230. }
  231. if ( *++s != '\0' ) {
  232. *p++ += unhex( *s );
  233. }
  234. } else {
  235. *p++ = *s;
  236. }
  237. }
  238. *p = '\0';
  239. }
  240. /*
  241. * Return the integer equivalent of one hex digit (in c).
  242. *
  243. */
  244. static int
  245. unhex( char c )
  246. {
  247. return( c >= '0' && c <= '9' ? c - '0'
  248. : c >= 'A' && c <= 'F' ? c - 'A' + 10
  249. : c - 'a' + 10 );
  250. }
  251. #define HREF_CHAR_ACCEPTABLE( c ) (( c >= '-' && c <= '9' ) || \
  252. ( c >= '@' && c <= 'Z' ) || \
  253. ( c == '_' ) || \
  254. ( c >= 'a' && c <= 'z' ))
  255. /*
  256. * Like strcat(), except if any URL-special characters are found in s2
  257. * they are escaped using the %HH convention and backslash characters are
  258. * converted to forward slashes on Windows.
  259. *
  260. * Maximum space needed in s1 is 3 * strlen( s2 ) + 1.
  261. *
  262. */
  263. static void
  264. strcpy_escaped_and_convert( char *s1, char *s2 )
  265. {
  266. char *p, *q;
  267. char *hexdig = "0123456789ABCDEF";
  268. p = s1 + strlen( s1 );
  269. for ( q = s2; *q != '\0'; ++q ) {
  270. #ifdef _WIN32
  271. if ( *q == '\\' ) {
  272. *p++ = '/';
  273. } else
  274. #endif /* _WIN32 */
  275. if ( HREF_CHAR_ACCEPTABLE( *q )) {
  276. *p++ = *q;
  277. } else {
  278. *p++ = '%';
  279. *p++ = hexdig[ 0x0F & ((*(unsigned char*)q) >> 4) ];
  280. *p++ = hexdig[ 0x0F & *q ];
  281. }
  282. }
  283. *p = '\0';
  284. }