uri.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. // Copyright 2023 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package protocol
  5. // This file declares URI, DocumentUri, and its methods.
  6. //
  7. // For the LSP definition of these types, see
  8. // https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#uri
  9. import (
  10. "fmt"
  11. "net/url"
  12. "path/filepath"
  13. "strings"
  14. "unicode"
  15. )
  16. // A DocumentURI is the URI of a client editor document.
  17. //
  18. // According to the LSP specification:
  19. //
  20. // Care should be taken to handle encoding in URIs. For
  21. // example, some clients (such as VS Code) may encode colons
  22. // in drive letters while others do not. The URIs below are
  23. // both valid, but clients and servers should be consistent
  24. // with the form they use themselves to ensure the other party
  25. // doesn’t interpret them as distinct URIs. Clients and
  26. // servers should not assume that each other are encoding the
  27. // same way (for example a client encoding colons in drive
  28. // letters cannot assume server responses will have encoded
  29. // colons). The same applies to casing of drive letters - one
  30. // party should not assume the other party will return paths
  31. // with drive letters cased the same as it.
  32. //
  33. // file:///c:/project/readme.md
  34. // file:///C%3A/project/readme.md
  35. //
  36. // This is done during JSON unmarshalling;
  37. // see [DocumentURI.UnmarshalText] for details.
  38. type DocumentURI string
  39. // A URI is an arbitrary URL (e.g. https), not necessarily a file.
  40. type URI = string
  41. // UnmarshalText implements decoding of DocumentUri values.
  42. //
  43. // In particular, it implements a systematic correction of various odd
  44. // features of the definition of DocumentUri in the LSP spec that
  45. // appear to be workarounds for bugs in VS Code. For example, it may
  46. // URI-encode the URI itself, so that colon becomes %3A, and it may
  47. // send file://foo.go URIs that have two slashes (not three) and no
  48. // hostname.
  49. //
  50. // We use UnmarshalText, not UnmarshalJSON, because it is called even
  51. // for non-addressable values such as keys and values of map[K]V,
  52. // where there is no pointer of type *K or *V on which to call
  53. // UnmarshalJSON. (See Go issue #28189 for more detail.)
  54. //
  55. // Non-empty DocumentUris are valid "file"-scheme URIs.
  56. // The empty DocumentUri is valid.
  57. func (uri *DocumentURI) UnmarshalText(data []byte) (err error) {
  58. *uri, err = ParseDocumentURI(string(data))
  59. return
  60. }
  61. // Path returns the file path for the given URI.
  62. //
  63. // DocumentUri("").Path() returns the empty string.
  64. //
  65. // Path panics if called on a URI that is not a valid filename.
  66. func (uri DocumentURI) Path() (string, error) {
  67. filename, err := filename(uri)
  68. if err != nil {
  69. // e.g. ParseRequestURI failed.
  70. //
  71. // This can only affect DocumentUris created by
  72. // direct string manipulation; all DocumentUris
  73. // received from the client pass through
  74. // ParseRequestURI, which ensures validity.
  75. return "", fmt.Errorf("invalid URI %q: %w", uri, err)
  76. }
  77. return filepath.FromSlash(filename), nil
  78. }
  79. // Dir returns the URI for the directory containing the receiver.
  80. func (uri DocumentURI) Dir() (DocumentURI, error) {
  81. // XXX: Legacy comment:
  82. // This function could be more efficiently implemented by avoiding any call
  83. // to Path(), but at least consolidates URI manipulation.
  84. path, err := uri.DirPath()
  85. if err != nil {
  86. return "", fmt.Errorf("invalid URI %q: %w", uri, err)
  87. }
  88. return URIFromPath(path), nil
  89. }
  90. // DirPath returns the file path to the directory containing this URI, which
  91. // must be a file URI.
  92. func (uri DocumentURI) DirPath() (string, error) {
  93. path, err := uri.Path()
  94. if err != nil {
  95. return "", err
  96. }
  97. return filepath.Dir(path), nil
  98. }
  99. func filename(uri DocumentURI) (string, error) {
  100. if uri == "" {
  101. return "", nil
  102. }
  103. // This conservative check for the common case
  104. // of a simple non-empty absolute POSIX filename
  105. // avoids the allocation of a net.URL.
  106. if strings.HasPrefix(string(uri), "file:///") {
  107. rest := string(uri)[len("file://"):] // leave one slash
  108. for i := range len(rest) {
  109. b := rest[i]
  110. // Reject these cases:
  111. if b < ' ' || b == 0x7f || // control character
  112. b == '%' || b == '+' || // URI escape
  113. b == ':' || // Windows drive letter
  114. b == '@' || b == '&' || b == '?' { // authority or query
  115. goto slow
  116. }
  117. }
  118. return rest, nil
  119. }
  120. slow:
  121. u, err := url.ParseRequestURI(string(uri))
  122. if err != nil {
  123. return "", err
  124. }
  125. if u.Scheme != fileScheme {
  126. return "", fmt.Errorf("only file URIs are supported, got %q from %q", u.Scheme, uri)
  127. }
  128. // If the URI is a Windows URI, we trim the leading "/" and uppercase
  129. // the drive letter, which will never be case sensitive.
  130. if isWindowsDriveURIPath(u.Path) {
  131. u.Path = strings.ToUpper(string(u.Path[1])) + u.Path[2:]
  132. }
  133. return u.Path, nil
  134. }
  135. // ParseDocumentURI interprets a string as a DocumentUri, applying VS
  136. // Code workarounds; see [DocumentURI.UnmarshalText] for details.
  137. func ParseDocumentURI(s string) (DocumentURI, error) {
  138. if s == "" {
  139. return "", nil
  140. }
  141. if !strings.HasPrefix(s, "file://") {
  142. return "", fmt.Errorf("DocumentUri scheme is not 'file': %s", s)
  143. }
  144. // VS Code sends URLs with only two slashes,
  145. // which are invalid. golang/go#39789.
  146. if !strings.HasPrefix(s, "file:///") {
  147. s = "file:///" + s[len("file://"):]
  148. }
  149. // Even though the input is a URI, it may not be in canonical form. VS Code
  150. // in particular over-escapes :, @, etc. Unescape and re-encode to canonicalize.
  151. path, err := url.PathUnescape(s[len("file://"):])
  152. if err != nil {
  153. return "", err
  154. }
  155. // File URIs from Windows may have lowercase drive letters.
  156. // Since drive letters are guaranteed to be case insensitive,
  157. // we change them to uppercase to remain consistent.
  158. // For example, file:///c:/x/y/z becomes file:///C:/x/y/z.
  159. if isWindowsDriveURIPath(path) {
  160. path = path[:1] + strings.ToUpper(string(path[1])) + path[2:]
  161. }
  162. u := url.URL{Scheme: fileScheme, Path: path}
  163. return DocumentURI(u.String()), nil
  164. }
  165. // URIFromPath returns DocumentUri for the supplied file path.
  166. // Given "", it returns "".
  167. func URIFromPath(path string) DocumentURI {
  168. if path == "" {
  169. return ""
  170. }
  171. if !isWindowsDrivePath(path) {
  172. if abs, err := filepath.Abs(path); err == nil {
  173. path = abs
  174. }
  175. }
  176. // Check the file path again, in case it became absolute.
  177. if isWindowsDrivePath(path) {
  178. path = "/" + strings.ToUpper(string(path[0])) + path[1:]
  179. }
  180. path = filepath.ToSlash(path)
  181. u := url.URL{
  182. Scheme: fileScheme,
  183. Path: path,
  184. }
  185. return DocumentURI(u.String())
  186. }
  187. const fileScheme = "file"
  188. // isWindowsDrivePath returns true if the file path is of the form used by
  189. // Windows. We check if the path begins with a drive letter, followed by a ":".
  190. // For example: C:/x/y/z.
  191. func isWindowsDrivePath(path string) bool {
  192. if len(path) < 3 {
  193. return false
  194. }
  195. return unicode.IsLetter(rune(path[0])) && path[1] == ':'
  196. }
  197. // isWindowsDriveURIPath returns true if the file URI is of the format used by
  198. // Windows URIs. The url.Parse package does not specially handle Windows paths
  199. // (see golang/go#6027), so we check if the URI path has a drive prefix (e.g. "/C:").
  200. func isWindowsDriveURIPath(uri string) bool {
  201. if len(uri) < 4 {
  202. return false
  203. }
  204. return uri[0] == '/' && unicode.IsLetter(rune(uri[1])) && uri[2] == ':'
  205. }