uri.go 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. // Copyright 2023 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package protocol
  5. // This file declares URI, DocumentUri, and its methods.
  6. //
  7. // For the LSP definition of these types, see
  8. // https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#uri
  9. import (
  10. "fmt"
  11. "net/url"
  12. "path/filepath"
  13. "strings"
  14. "unicode"
  15. )
  16. // A DocumentUri is the URI of a client editor document.
  17. //
  18. // According to the LSP specification:
  19. //
  20. // Care should be taken to handle encoding in URIs. For
  21. // example, some clients (such as VS Code) may encode colons
  22. // in drive letters while others do not. The URIs below are
  23. // both valid, but clients and servers should be consistent
  24. // with the form they use themselves to ensure the other party
  25. // doesn’t interpret them as distinct URIs. Clients and
  26. // servers should not assume that each other are encoding the
  27. // same way (for example a client encoding colons in drive
  28. // letters cannot assume server responses will have encoded
  29. // colons). The same applies to casing of drive letters - one
  30. // party should not assume the other party will return paths
  31. // with drive letters cased the same as it.
  32. //
  33. // file:///c:/project/readme.md
  34. // file:///C%3A/project/readme.md
  35. //
  36. // This is done during JSON unmarshalling;
  37. // see [DocumentUri.UnmarshalText] for details.
  38. type DocumentUri string
  39. // A URI is an arbitrary URL (e.g. https), not necessarily a file.
  40. type URI = string
  41. // UnmarshalText implements decoding of DocumentUri values.
  42. //
  43. // In particular, it implements a systematic correction of various odd
  44. // features of the definition of DocumentUri in the LSP spec that
  45. // appear to be workarounds for bugs in VS Code. For example, it may
  46. // URI-encode the URI itself, so that colon becomes %3A, and it may
  47. // send file://foo.go URIs that have two slashes (not three) and no
  48. // hostname.
  49. //
  50. // We use UnmarshalText, not UnmarshalJSON, because it is called even
  51. // for non-addressable values such as keys and values of map[K]V,
  52. // where there is no pointer of type *K or *V on which to call
  53. // UnmarshalJSON. (See Go issue #28189 for more detail.)
  54. //
  55. // Non-empty DocumentUris are valid "file"-scheme URIs.
  56. // The empty DocumentUri is valid.
  57. func (uri *DocumentUri) UnmarshalText(data []byte) (err error) {
  58. *uri, err = ParseDocumentUri(string(data))
  59. return
  60. }
  61. // Path returns the file path for the given URI.
  62. //
  63. // DocumentUri("").Path() returns the empty string.
  64. //
  65. // Path panics if called on a URI that is not a valid filename.
  66. func (uri DocumentUri) Path() string {
  67. filename, err := filename(uri)
  68. if err != nil {
  69. // e.g. ParseRequestURI failed.
  70. //
  71. // This can only affect DocumentUris created by
  72. // direct string manipulation; all DocumentUris
  73. // received from the client pass through
  74. // ParseRequestURI, which ensures validity.
  75. panic(err)
  76. }
  77. return filepath.FromSlash(filename)
  78. }
  79. // Dir returns the URI for the directory containing the receiver.
  80. func (uri DocumentUri) Dir() DocumentUri {
  81. // This function could be more efficiently implemented by avoiding any call
  82. // to Path(), but at least consolidates URI manipulation.
  83. return URIFromPath(uri.DirPath())
  84. }
  85. // DirPath returns the file path to the directory containing this URI, which
  86. // must be a file URI.
  87. func (uri DocumentUri) DirPath() string {
  88. return filepath.Dir(uri.Path())
  89. }
  90. func filename(uri DocumentUri) (string, error) {
  91. if uri == "" {
  92. return "", nil
  93. }
  94. // This conservative check for the common case
  95. // of a simple non-empty absolute POSIX filename
  96. // avoids the allocation of a net.URL.
  97. if strings.HasPrefix(string(uri), "file:///") {
  98. rest := string(uri)[len("file://"):] // leave one slash
  99. for i := range len(rest) {
  100. b := rest[i]
  101. // Reject these cases:
  102. if b < ' ' || b == 0x7f || // control character
  103. b == '%' || b == '+' || // URI escape
  104. b == ':' || // Windows drive letter
  105. b == '@' || b == '&' || b == '?' { // authority or query
  106. goto slow
  107. }
  108. }
  109. return rest, nil
  110. }
  111. slow:
  112. u, err := url.ParseRequestURI(string(uri))
  113. if err != nil {
  114. return "", err
  115. }
  116. if u.Scheme != fileScheme {
  117. return "", fmt.Errorf("only file URIs are supported, got %q from %q", u.Scheme, uri)
  118. }
  119. // If the URI is a Windows URI, we trim the leading "/" and uppercase
  120. // the drive letter, which will never be case sensitive.
  121. if isWindowsDriveURIPath(u.Path) {
  122. u.Path = strings.ToUpper(string(u.Path[1])) + u.Path[2:]
  123. }
  124. return u.Path, nil
  125. }
  126. // ParseDocumentUri interprets a string as a DocumentUri, applying VS
  127. // Code workarounds; see [DocumentUri.UnmarshalText] for details.
  128. func ParseDocumentUri(s string) (DocumentUri, error) {
  129. if s == "" {
  130. return "", nil
  131. }
  132. if !strings.HasPrefix(s, "file://") {
  133. return "", fmt.Errorf("DocumentUri scheme is not 'file': %s", s)
  134. }
  135. // VS Code sends URLs with only two slashes,
  136. // which are invalid. golang/go#39789.
  137. if !strings.HasPrefix(s, "file:///") {
  138. s = "file:///" + s[len("file://"):]
  139. }
  140. // Even though the input is a URI, it may not be in canonical form. VS Code
  141. // in particular over-escapes :, @, etc. Unescape and re-encode to canonicalize.
  142. path, err := url.PathUnescape(s[len("file://"):])
  143. if err != nil {
  144. return "", err
  145. }
  146. // File URIs from Windows may have lowercase drive letters.
  147. // Since drive letters are guaranteed to be case insensitive,
  148. // we change them to uppercase to remain consistent.
  149. // For example, file:///c:/x/y/z becomes file:///C:/x/y/z.
  150. if isWindowsDriveURIPath(path) {
  151. path = path[:1] + strings.ToUpper(string(path[1])) + path[2:]
  152. }
  153. u := url.URL{Scheme: fileScheme, Path: path}
  154. return DocumentUri(u.String()), nil
  155. }
  156. // URIFromPath returns DocumentUri for the supplied file path.
  157. // Given "", it returns "".
  158. func URIFromPath(path string) DocumentUri {
  159. if path == "" {
  160. return ""
  161. }
  162. if !isWindowsDrivePath(path) {
  163. if abs, err := filepath.Abs(path); err == nil {
  164. path = abs
  165. }
  166. }
  167. // Check the file path again, in case it became absolute.
  168. if isWindowsDrivePath(path) {
  169. path = "/" + strings.ToUpper(string(path[0])) + path[1:]
  170. }
  171. path = filepath.ToSlash(path)
  172. u := url.URL{
  173. Scheme: fileScheme,
  174. Path: path,
  175. }
  176. return DocumentUri(u.String())
  177. }
  178. const fileScheme = "file"
  179. // isWindowsDrivePath returns true if the file path is of the form used by
  180. // Windows. We check if the path begins with a drive letter, followed by a ":".
  181. // For example: C:/x/y/z.
  182. func isWindowsDrivePath(path string) bool {
  183. if len(path) < 3 {
  184. return false
  185. }
  186. return unicode.IsLetter(rune(path[0])) && path[1] == ':'
  187. }
  188. // isWindowsDriveURIPath returns true if the file URI is of the format used by
  189. // Windows URIs. The url.Parse package does not specially handle Windows paths
  190. // (see golang/go#6027), so we check if the URI path has a drive prefix (e.g. "/C:").
  191. func isWindowsDriveURIPath(uri string) bool {
  192. if len(uri) < 4 {
  193. return false
  194. }
  195. return uri[0] == '/' && unicode.IsLetter(rune(uri[1])) && uri[2] == ':'
  196. }