walk.go 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. // Copyright (C) 2014 The Syncthing Authors.
  2. //
  3. // This program is free software: you can redistribute it and/or modify it
  4. // under the terms of the GNU General Public License as published by the Free
  5. // Software Foundation, either version 3 of the License, or (at your option)
  6. // any later version.
  7. //
  8. // This program is distributed in the hope that it will be useful, but WITHOUT
  9. // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10. // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  11. // more details.
  12. //
  13. // You should have received a copy of the GNU General Public License along
  14. // with this program. If not, see <http://www.gnu.org/licenses/>.
  15. package scanner
  16. import (
  17. "errors"
  18. "os"
  19. "path/filepath"
  20. "runtime"
  21. "strings"
  22. "time"
  23. "github.com/syncthing/syncthing/internal/ignore"
  24. "github.com/syncthing/syncthing/internal/lamport"
  25. "github.com/syncthing/syncthing/internal/protocol"
  26. "github.com/syncthing/syncthing/internal/symlinks"
  27. "golang.org/x/text/unicode/norm"
  28. )
  29. type Walker struct {
  30. // Dir is the base directory for the walk
  31. Dir string
  32. // Limit walking to this path within Dir, or no limit if Sub is blank
  33. Sub string
  34. // BlockSize controls the size of the block used when hashing.
  35. BlockSize int
  36. // If Matcher is not nil, it is used to identify files to ignore which were specified by the user.
  37. Matcher *ignore.Matcher
  38. // If TempNamer is not nil, it is used to ignore tempory files when walking.
  39. TempNamer TempNamer
  40. // Number of hours to keep temporary files for
  41. TempLifetime time.Duration
  42. // If CurrentFiler is not nil, it is queried for the current file before rescanning.
  43. CurrentFiler CurrentFiler
  44. // If IgnorePerms is true, changes to permission bits will not be
  45. // detected. Scanned files will get zero permission bits and the
  46. // NoPermissionBits flag set.
  47. IgnorePerms bool
  48. }
  49. type TempNamer interface {
  50. // Temporary returns a temporary name for the filed referred to by filepath.
  51. TempName(path string) string
  52. // IsTemporary returns true if path refers to the name of temporary file.
  53. IsTemporary(path string) bool
  54. }
  55. type CurrentFiler interface {
  56. // CurrentFile returns the file as seen at last scan.
  57. CurrentFile(name string) (protocol.FileInfo, bool)
  58. }
  59. // Walk returns the list of files found in the local folder by scanning the
  60. // file system. Files are blockwise hashed.
  61. func (w *Walker) Walk() (chan protocol.FileInfo, error) {
  62. if debug {
  63. l.Debugln("Walk", w.Dir, w.Sub, w.BlockSize, w.Matcher)
  64. }
  65. err := checkDir(w.Dir)
  66. if err != nil {
  67. return nil, err
  68. }
  69. files := make(chan protocol.FileInfo)
  70. hashedFiles := make(chan protocol.FileInfo)
  71. newParallelHasher(w.Dir, w.BlockSize, runtime.NumCPU(), hashedFiles, files)
  72. go func() {
  73. hashFiles := w.walkAndHashFiles(files)
  74. filepath.Walk(filepath.Join(w.Dir, w.Sub), hashFiles)
  75. close(files)
  76. }()
  77. return hashedFiles, nil
  78. }
  79. func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo) filepath.WalkFunc {
  80. now := time.Now()
  81. return func(p string, info os.FileInfo, err error) error {
  82. if err != nil {
  83. if debug {
  84. l.Debugln("error:", p, info, err)
  85. }
  86. return nil
  87. }
  88. rn, err := filepath.Rel(w.Dir, p)
  89. if err != nil {
  90. if debug {
  91. l.Debugln("rel error:", p, err)
  92. }
  93. return nil
  94. }
  95. if rn == "." {
  96. return nil
  97. }
  98. if w.TempNamer != nil && w.TempNamer.IsTemporary(rn) {
  99. // A temporary file
  100. if debug {
  101. l.Debugln("temporary:", rn)
  102. }
  103. if info.Mode().IsRegular() && info.ModTime().Add(w.TempLifetime).Before(now) {
  104. os.Remove(p)
  105. if debug {
  106. l.Debugln("removing temporary:", rn, info.ModTime())
  107. }
  108. }
  109. return nil
  110. }
  111. if sn := filepath.Base(rn); sn == ".stignore" || sn == ".stfolder" ||
  112. strings.HasPrefix(rn, ".stversions") || (w.Matcher != nil && w.Matcher.Match(rn)) {
  113. // An ignored file
  114. if debug {
  115. l.Debugln("ignored:", rn)
  116. }
  117. if info.IsDir() {
  118. return filepath.SkipDir
  119. }
  120. return nil
  121. }
  122. if (runtime.GOOS == "linux" || runtime.GOOS == "windows") && !norm.NFC.IsNormalString(rn) {
  123. l.Warnf("File %q contains non-NFC UTF-8 sequences and cannot be synced. Consider renaming.", rn)
  124. return nil
  125. }
  126. // Index wise symlinks are always files, regardless of what the target
  127. // is, because symlinks carry their target path as their content.
  128. if info.Mode()&os.ModeSymlink == os.ModeSymlink {
  129. var rval error
  130. // If the target is a directory, do NOT descend down there. This
  131. // will cause files to get tracked, and removing the symlink will
  132. // as a result remove files in their real location. But do not
  133. // SkipDir if the target is not a directory, as it will stop
  134. // scanning the current directory.
  135. if info.IsDir() {
  136. rval = filepath.SkipDir
  137. }
  138. // If we don't support symlinks, skip.
  139. if !symlinks.Supported {
  140. return rval
  141. }
  142. // We always rehash symlinks as they have no modtime or
  143. // permissions. We check if they point to the old target by
  144. // checking that their existing blocks match with the blocks in
  145. // the index.
  146. target, flags, err := symlinks.Read(p)
  147. flags = flags & protocol.SymlinkTypeMask
  148. if err != nil {
  149. if debug {
  150. l.Debugln("readlink error:", p, err)
  151. }
  152. return rval
  153. }
  154. blocks, err := Blocks(strings.NewReader(target), w.BlockSize, 0)
  155. if err != nil {
  156. if debug {
  157. l.Debugln("hash link error:", p, err)
  158. }
  159. return rval
  160. }
  161. if w.CurrentFiler != nil {
  162. // A symlink is "unchanged", if
  163. // - it exists
  164. // - it wasn't deleted (because it isn't now)
  165. // - it was a symlink
  166. // - it wasn't invalid
  167. // - the symlink type (file/dir) was the same
  168. // - the block list (i.e. hash of target) was the same
  169. cf, ok := w.CurrentFiler.CurrentFile(rn)
  170. if ok && !cf.IsDeleted() && cf.IsSymlink() && !cf.IsInvalid() && SymlinkTypeEqual(flags, cf.Flags) && BlocksEqual(cf.Blocks, blocks) {
  171. return rval
  172. }
  173. }
  174. f := protocol.FileInfo{
  175. Name: rn,
  176. Version: lamport.Default.Tick(0),
  177. Flags: protocol.FlagSymlink | flags | protocol.FlagNoPermBits | 0666,
  178. Modified: 0,
  179. Blocks: blocks,
  180. }
  181. if debug {
  182. l.Debugln("symlink to hash:", p, f)
  183. }
  184. fchan <- f
  185. return rval
  186. }
  187. if info.Mode().IsDir() {
  188. if w.CurrentFiler != nil {
  189. // A directory is "unchanged", if it
  190. // - exists
  191. // - has the same permissions as previously, unless we are ignoring permissions
  192. // - was not marked deleted (since it apparently exists now)
  193. // - was a directory previously (not a file or something else)
  194. // - was not a symlink (since it's a directory now)
  195. // - was not invalid (since it looks valid now)
  196. cf, ok := w.CurrentFiler.CurrentFile(rn)
  197. permUnchanged := w.IgnorePerms || !cf.HasPermissionBits() || PermsEqual(cf.Flags, uint32(info.Mode()))
  198. if ok && permUnchanged && !cf.IsDeleted() && cf.IsDirectory() && !cf.IsSymlink() && !cf.IsInvalid() {
  199. return nil
  200. }
  201. }
  202. flags := uint32(protocol.FlagDirectory)
  203. if w.IgnorePerms {
  204. flags |= protocol.FlagNoPermBits | 0777
  205. } else {
  206. flags |= uint32(info.Mode() & os.ModePerm)
  207. }
  208. f := protocol.FileInfo{
  209. Name: rn,
  210. Version: lamport.Default.Tick(0),
  211. Flags: flags,
  212. Modified: info.ModTime().Unix(),
  213. }
  214. if debug {
  215. l.Debugln("dir:", p, f)
  216. }
  217. fchan <- f
  218. return nil
  219. }
  220. if info.Mode().IsRegular() {
  221. if w.CurrentFiler != nil {
  222. // A file is "unchanged", if it
  223. // - exists
  224. // - has the same permissions as previously, unless we are ignoring permissions
  225. // - was not marked deleted (since it apparently exists now)
  226. // - had the same modification time as it has now
  227. // - was not a directory previously (since it's a file now)
  228. // - was not a symlink (since it's a file now)
  229. // - was not invalid (since it looks valid now)
  230. // - has the same size as previously
  231. cf, ok := w.CurrentFiler.CurrentFile(rn)
  232. permUnchanged := w.IgnorePerms || !cf.HasPermissionBits() || PermsEqual(cf.Flags, uint32(info.Mode()))
  233. if ok && permUnchanged && !cf.IsDeleted() && cf.Modified == info.ModTime().Unix() && !cf.IsDirectory() &&
  234. !cf.IsSymlink() && !cf.IsInvalid() && cf.Size() == info.Size() {
  235. return nil
  236. }
  237. if debug {
  238. l.Debugln("rescan:", cf, info.ModTime().Unix(), info.Mode()&os.ModePerm)
  239. }
  240. }
  241. var flags = uint32(info.Mode() & os.ModePerm)
  242. if w.IgnorePerms {
  243. flags = protocol.FlagNoPermBits | 0666
  244. }
  245. f := protocol.FileInfo{
  246. Name: rn,
  247. Version: lamport.Default.Tick(0),
  248. Flags: flags,
  249. Modified: info.ModTime().Unix(),
  250. }
  251. if debug {
  252. l.Debugln("to hash:", p, f)
  253. }
  254. fchan <- f
  255. }
  256. return nil
  257. }
  258. }
  259. func checkDir(dir string) error {
  260. if info, err := os.Lstat(dir); err != nil {
  261. return err
  262. } else if !info.IsDir() {
  263. return errors.New(dir + ": not a directory")
  264. } else if debug {
  265. l.Debugln("checkDir", dir, info)
  266. }
  267. return nil
  268. }
  269. func PermsEqual(a, b uint32) bool {
  270. switch runtime.GOOS {
  271. case "windows":
  272. // There is only writeable and read only, represented for user, group
  273. // and other equally. We only compare against user.
  274. return a&0600 == b&0600
  275. default:
  276. // All bits count
  277. return a&0777 == b&0777
  278. }
  279. }
  280. // If the target is missing, Unix never knows what type of symlink it is
  281. // and Windows always knows even if there is no target.
  282. // Which means that without this special check a Unix node would be fighting
  283. // with a Windows node about whether or not the target is known.
  284. // Basically, if you don't know and someone else knows, just accept it.
  285. // The fact that you don't know means you are on Unix, and on Unix you don't
  286. // really care what the target type is. The moment you do know, and if something
  287. // doesn't match, that will propogate throught the cluster.
  288. func SymlinkTypeEqual(disk, index uint32) bool {
  289. if disk&protocol.FlagSymlinkMissingTarget != 0 && index&protocol.FlagSymlinkMissingTarget == 0 {
  290. return true
  291. }
  292. return disk&protocol.SymlinkTypeMask == index&protocol.SymlinkTypeMask
  293. }