stat_cache.go 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. package compositedav
  4. import (
  5. "bytes"
  6. "encoding/xml"
  7. "log"
  8. "net/http"
  9. "net/url"
  10. "sync"
  11. "time"
  12. "github.com/jellydator/ttlcache/v3"
  13. "tailscale.com/drive/driveimpl/shared"
  14. )
  15. var (
  16. notFound = newCacheEntry(http.StatusNotFound, nil)
  17. )
  18. // StatCache provides a cache for directory listings and file metadata.
  19. // Especially when used from the command-line, mapped WebDAV drives can
  20. // generate repetitive requests for the same file metadata. This cache helps
  21. // reduce the number of round-trips to the WebDAV server for such requests.
  22. // This is similar to the DirectoryCacheLifetime setting of Windows' built-in
  23. // SMB client, see
  24. // https://learn.microsoft.com/en-us/previous-versions/windows/it-pro/windows-7/ff686200(v=ws.10)
  25. //
  26. // StatCache is built specifically to cache the results of PROPFIND requests,
  27. // which come back as MultiStatus XML responses. Typical clients will issue two
  28. // kinds of PROPFIND:
  29. //
  30. // The first kind of PROPFIND is a directory listing performed to depth 1. At
  31. // this depth, the resulting XML will contain stats for the requested folder as
  32. // well as for all children of that folder.
  33. //
  34. // The second kind of PROPFIND is a file listing performed to depth 0. At this
  35. // depth, the resulting XML will contain stats only for the requested file.
  36. //
  37. // In order to avoid round-trips, when a PROPFIND at depth 0 is attempted, and
  38. // the requested file is not in the cache, StatCache will check to see if the
  39. // parent folder of that file is cached. If so, StatCache infers the correct
  40. // MultiStatus for the file according to the following logic:
  41. //
  42. // 1. If the parent folder is NotFound (404), treat the file itself as NotFound
  43. // 2. If the parent folder's XML doesn't contain the file, treat it as
  44. // NotFound.
  45. // 3. If the parent folder's XML contains the file, build a MultiStatus for the
  46. // file based on the parent's XML.
  47. //
  48. // To avoid inconsistencies from the perspective of the client, any operations
  49. // that modify the filesystem (e.g. PUT, MKDIR, etc.) should call invalidate()
  50. // to invalidate the cache.
  51. type StatCache struct {
  52. TTL time.Duration
  53. // mu guards the below values.
  54. mu sync.Mutex
  55. cachesByDepthAndPath map[int]*ttlcache.Cache[string, *cacheEntry]
  56. }
  57. // getOr checks the cache for the named value at the given depth. If a cached
  58. // value was found, it returns http.StatusMultiStatus along with the cached
  59. // value. Otherwise, it executes the given function and returns the resulting
  60. // status and value. If the function returned http.StatusMultiStatus, getOr
  61. // caches the resulting value at the given name and depth before returning.
  62. func (c *StatCache) getOr(name string, depth int, or func() (int, []byte)) (int, []byte) {
  63. ce := c.get(name, depth)
  64. if ce == nil {
  65. // Not cached, fetch value.
  66. status, raw := or()
  67. ce = newCacheEntry(status, raw)
  68. if status == http.StatusMultiStatus || status == http.StatusNotFound {
  69. // Got a legit status, cache value
  70. c.set(name, depth, ce)
  71. }
  72. }
  73. return ce.Status, ce.Raw
  74. }
  75. // get retrieves the entry for the named file at the given depth. If no entry
  76. // is found, and depth == 0, get will check to see if the parent path of name
  77. // is present in the cache at depth 1. If so, it will infer that the child does
  78. // not exist and return notFound (404).
  79. func (c *StatCache) get(name string, depth int) *cacheEntry {
  80. if c == nil {
  81. return nil
  82. }
  83. name = shared.Normalize(name)
  84. c.mu.Lock()
  85. defer c.mu.Unlock()
  86. ce := c.tryGetLocked(name, depth)
  87. if ce != nil {
  88. // Cache hit.
  89. return ce
  90. }
  91. if depth > 0 {
  92. // Cache miss.
  93. return nil
  94. }
  95. // At depth 0, if child's parent is in the cache, and the child isn't
  96. // cached, we can infer that the child is notFound.
  97. p := c.tryGetLocked(shared.Parent(name), 1)
  98. if p != nil {
  99. return notFound
  100. }
  101. // No parent in cache, cache miss.
  102. return nil
  103. }
  104. // tryGetLocked requires that c.mu be held.
  105. func (c *StatCache) tryGetLocked(name string, depth int) *cacheEntry {
  106. if c.cachesByDepthAndPath == nil {
  107. return nil
  108. }
  109. cache := c.cachesByDepthAndPath[depth]
  110. if cache == nil {
  111. return nil
  112. }
  113. item := cache.Get(name)
  114. if item == nil {
  115. return nil
  116. }
  117. return item.Value()
  118. }
  119. // set stores the given cacheEntry in the cache at the given name and depth. If
  120. // the depth is 1, set also populates depth 0 entries in the cache for the bare
  121. // name. If status is StatusMultiStatus, set will parse the PROPFIND result and
  122. // store depth 0 entries for all children. If parsing the result fails, nothing
  123. // is cached.
  124. func (c *StatCache) set(name string, depth int, ce *cacheEntry) {
  125. if c == nil {
  126. return
  127. }
  128. name = shared.Normalize(name)
  129. var self *cacheEntry
  130. var children map[string]*cacheEntry
  131. if depth == 1 {
  132. switch ce.Status {
  133. case http.StatusNotFound:
  134. // Record notFound as the self entry.
  135. self = ce
  136. case http.StatusMultiStatus:
  137. // Parse the raw MultiStatus and extract specific responses
  138. // corresponding to the self entry (e.g. the directory, but at depth 0)
  139. // and children (e.g. files within the directory) so that subsequent
  140. // requests for these can be satisfied from the cache.
  141. var ms multiStatus
  142. err := xml.Unmarshal(ce.Raw, &ms)
  143. if err != nil {
  144. // unparseable MultiStatus response, don't cache
  145. log.Printf("statcache.set error: %s", err)
  146. return
  147. }
  148. children = make(map[string]*cacheEntry, len(ms.Responses)-1)
  149. for i := 0; i < len(ms.Responses); i++ {
  150. response := ms.Responses[i]
  151. name, err := url.PathUnescape(response.Href)
  152. if err != nil {
  153. log.Printf("statcache.set child parse error: %s", err)
  154. return
  155. }
  156. name = shared.Normalize(name)
  157. raw := marshalMultiStatus(response)
  158. entry := newCacheEntry(ce.Status, raw)
  159. if i == 0 {
  160. self = entry
  161. } else {
  162. children[name] = entry
  163. }
  164. }
  165. }
  166. }
  167. c.mu.Lock()
  168. defer c.mu.Unlock()
  169. c.setLocked(name, depth, ce)
  170. if self != nil {
  171. c.setLocked(name, 0, self)
  172. }
  173. for childName, child := range children {
  174. c.setLocked(childName, 0, child)
  175. }
  176. }
  177. // setLocked requires that c.mu be held.
  178. func (c *StatCache) setLocked(name string, depth int, ce *cacheEntry) {
  179. if c.cachesByDepthAndPath == nil {
  180. c.cachesByDepthAndPath = make(map[int]*ttlcache.Cache[string, *cacheEntry])
  181. }
  182. cache := c.cachesByDepthAndPath[depth]
  183. if cache == nil {
  184. cache = ttlcache.New(
  185. ttlcache.WithTTL[string, *cacheEntry](c.TTL),
  186. )
  187. go cache.Start()
  188. c.cachesByDepthAndPath[depth] = cache
  189. }
  190. cache.Set(name, ce, ttlcache.DefaultTTL)
  191. }
  192. // invalidate invalidates the entire cache.
  193. func (c *StatCache) invalidate() {
  194. if c == nil {
  195. return
  196. }
  197. c.mu.Lock()
  198. defer c.mu.Unlock()
  199. for _, cache := range c.cachesByDepthAndPath {
  200. cache.DeleteAll()
  201. }
  202. }
  203. func (c *StatCache) stop() {
  204. c.mu.Lock()
  205. defer c.mu.Unlock()
  206. for _, cache := range c.cachesByDepthAndPath {
  207. cache.Stop()
  208. }
  209. }
  210. type cacheEntry struct {
  211. Status int
  212. Raw []byte
  213. }
  214. func newCacheEntry(status int, raw []byte) *cacheEntry {
  215. return &cacheEntry{Status: status, Raw: raw}
  216. }
  217. type propStat struct {
  218. InnerXML []byte `xml:",innerxml"`
  219. }
  220. type response struct {
  221. XMLName xml.Name `xml:"response"`
  222. Href string `xml:"href"`
  223. PropStats []*propStat `xml:"propstat"`
  224. }
  225. type multiStatus struct {
  226. XMLName xml.Name `xml:"multistatus"`
  227. Responses []*response `xml:"response"`
  228. }
  229. // marshalMultiStatus performs custom marshalling of a MultiStatus to preserve
  230. // the original formatting, namespacing, etc. Doing this with Go's XML encoder
  231. // is somewhere between difficult and impossible, which is why we use this more
  232. // manual approach.
  233. func marshalMultiStatus(response *response) []byte {
  234. // TODO(percy): maybe pool these buffers
  235. var buf bytes.Buffer
  236. buf.WriteString(multistatusTemplateStart)
  237. buf.WriteString(response.Href)
  238. buf.WriteString(hrefEnd)
  239. for _, propStat := range response.PropStats {
  240. buf.WriteString(propstatStart)
  241. buf.Write(propStat.InnerXML)
  242. buf.WriteString(propstatEnd)
  243. }
  244. buf.WriteString(multistatusTemplateEnd)
  245. return buf.Bytes()
  246. }
  247. const (
  248. multistatusTemplateStart = `<?xml version="1.0" encoding="UTF-8"?><D:multistatus xmlns:D="DAV:"><D:response><D:href>`
  249. hrefEnd = `</D:href>`
  250. propstatStart = `<D:propstat>`
  251. propstatEnd = `</D:propstat>`
  252. multistatusTemplateEnd = `</D:response></D:multistatus>`
  253. )