walk.go 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. // Copyright (C) 2014 The Syncthing Authors.
  2. //
  3. // This program is free software: you can redistribute it and/or modify it
  4. // under the terms of the GNU General Public License as published by the Free
  5. // Software Foundation, either version 3 of the License, or (at your option)
  6. // any later version.
  7. //
  8. // This program is distributed in the hope that it will be useful, but WITHOUT
  9. // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10. // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  11. // more details.
  12. //
  13. // You should have received a copy of the GNU General Public License along
  14. // with this program. If not, see <http://www.gnu.org/licenses/>.
  15. package scanner
  16. import (
  17. "errors"
  18. "os"
  19. "path/filepath"
  20. "runtime"
  21. "strings"
  22. "time"
  23. "github.com/syncthing/syncthing/internal/ignore"
  24. "github.com/syncthing/syncthing/internal/lamport"
  25. "github.com/syncthing/syncthing/internal/protocol"
  26. "github.com/syncthing/syncthing/internal/symlinks"
  27. "golang.org/x/text/unicode/norm"
  28. )
  29. type Walker struct {
  30. // Dir is the base directory for the walk
  31. Dir string
  32. // Limit walking to this path within Dir, or no limit if Sub is blank
  33. Sub string
  34. // BlockSize controls the size of the block used when hashing.
  35. BlockSize int
  36. // If Matcher is not nil, it is used to identify files to ignore which were specified by the user.
  37. Matcher *ignore.Matcher
  38. // If TempNamer is not nil, it is used to ignore tempory files when walking.
  39. TempNamer TempNamer
  40. // Number of hours to keep temporary files for
  41. TempLifetime time.Duration
  42. // If CurrentFiler is not nil, it is queried for the current file before rescanning.
  43. CurrentFiler CurrentFiler
  44. // If IgnorePerms is true, changes to permission bits will not be
  45. // detected. Scanned files will get zero permission bits and the
  46. // NoPermissionBits flag set.
  47. IgnorePerms bool
  48. }
  49. type TempNamer interface {
  50. // Temporary returns a temporary name for the filed referred to by filepath.
  51. TempName(path string) string
  52. // IsTemporary returns true if path refers to the name of temporary file.
  53. IsTemporary(path string) bool
  54. }
  55. type CurrentFiler interface {
  56. // CurrentFile returns the file as seen at last scan.
  57. CurrentFile(name string) protocol.FileInfo
  58. }
  59. // Walk returns the list of files found in the local folder by scanning the
  60. // file system. Files are blockwise hashed.
  61. func (w *Walker) Walk() (chan protocol.FileInfo, error) {
  62. if debug {
  63. l.Debugln("Walk", w.Dir, w.Sub, w.BlockSize, w.Matcher)
  64. }
  65. err := checkDir(w.Dir)
  66. if err != nil {
  67. return nil, err
  68. }
  69. files := make(chan protocol.FileInfo)
  70. hashedFiles := make(chan protocol.FileInfo)
  71. newParallelHasher(w.Dir, w.BlockSize, runtime.NumCPU(), hashedFiles, files)
  72. go func() {
  73. hashFiles := w.walkAndHashFiles(files)
  74. filepath.Walk(filepath.Join(w.Dir, w.Sub), hashFiles)
  75. close(files)
  76. }()
  77. return hashedFiles, nil
  78. }
  79. func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo) filepath.WalkFunc {
  80. now := time.Now()
  81. return func(p string, info os.FileInfo, err error) error {
  82. if err != nil {
  83. if debug {
  84. l.Debugln("error:", p, info, err)
  85. }
  86. return nil
  87. }
  88. rn, err := filepath.Rel(w.Dir, p)
  89. if err != nil {
  90. if debug {
  91. l.Debugln("rel error:", p, err)
  92. }
  93. return nil
  94. }
  95. if rn == "." {
  96. return nil
  97. }
  98. if w.TempNamer != nil && w.TempNamer.IsTemporary(rn) {
  99. // A temporary file
  100. if debug {
  101. l.Debugln("temporary:", rn)
  102. }
  103. if info.Mode().IsRegular() && info.ModTime().Add(w.TempLifetime).Before(now) {
  104. os.Remove(p)
  105. if debug {
  106. l.Debugln("removing temporary:", rn, info.ModTime())
  107. }
  108. }
  109. return nil
  110. }
  111. if sn := filepath.Base(rn); sn == ".stignore" || sn == ".stfolder" ||
  112. strings.HasPrefix(rn, ".stversions") || (w.Matcher != nil && w.Matcher.Match(rn)) {
  113. // An ignored file
  114. if debug {
  115. l.Debugln("ignored:", rn)
  116. }
  117. if info.IsDir() {
  118. return filepath.SkipDir
  119. }
  120. return nil
  121. }
  122. if (runtime.GOOS == "linux" || runtime.GOOS == "windows") && !norm.NFC.IsNormalString(rn) {
  123. l.Warnf("File %q contains non-NFC UTF-8 sequences and cannot be synced. Consider renaming.", rn)
  124. return nil
  125. }
  126. // Index wise symlinks are always files, regardless of what the target
  127. // is, because symlinks carry their target path as their content.
  128. if info.Mode()&os.ModeSymlink == os.ModeSymlink {
  129. var rval error
  130. // If the target is a directory, do NOT descend down there. This
  131. // will cause files to get tracked, and removing the symlink will
  132. // as a result remove files in their real location. But do not
  133. // SkipDir if the target is not a directory, as it will stop
  134. // scanning the current directory.
  135. if info.IsDir() {
  136. rval = filepath.SkipDir
  137. }
  138. // If we don't support symlinks, skip.
  139. if !symlinks.Supported {
  140. return rval
  141. }
  142. // We always rehash symlinks as they have no modtime or
  143. // permissions. We check if they point to the old target by
  144. // checking that their existing blocks match with the blocks in
  145. // the index.
  146. target, flags, err := symlinks.Read(p)
  147. flags = flags & protocol.SymlinkTypeMask
  148. if err != nil {
  149. if debug {
  150. l.Debugln("readlink error:", p, err)
  151. }
  152. return rval
  153. }
  154. blocks, err := Blocks(strings.NewReader(target), w.BlockSize, 0)
  155. if err != nil {
  156. if debug {
  157. l.Debugln("hash link error:", p, err)
  158. }
  159. return rval
  160. }
  161. if w.CurrentFiler != nil {
  162. // A symlink is "unchanged", if
  163. // - it wasn't deleted (because it isn't now)
  164. // - it was a symlink
  165. // - it wasn't invalid
  166. // - the symlink type (file/dir) was the same
  167. // - the block list (i.e. hash of target) was the same
  168. cf := w.CurrentFiler.CurrentFile(rn)
  169. if !cf.IsDeleted() && cf.IsSymlink() && !cf.IsInvalid() && SymlinkTypeEqual(flags, cf.Flags) && BlocksEqual(cf.Blocks, blocks) {
  170. return rval
  171. }
  172. }
  173. f := protocol.FileInfo{
  174. Name: rn,
  175. Version: lamport.Default.Tick(0),
  176. Flags: protocol.FlagSymlink | flags | protocol.FlagNoPermBits | 0666,
  177. Modified: 0,
  178. Blocks: blocks,
  179. }
  180. if debug {
  181. l.Debugln("symlink to hash:", p, f)
  182. }
  183. fchan <- f
  184. return rval
  185. }
  186. if info.Mode().IsDir() {
  187. if w.CurrentFiler != nil {
  188. // A directory is "unchanged", if it
  189. // - has the same permissions as previously, unless we are ignoring permissions
  190. // - was not marked deleted (since it apparently exists now)
  191. // - was a directory previously (not a file or something else)
  192. // - was not a symlink (since it's a directory now)
  193. // - was not invalid (since it looks valid now)
  194. cf := w.CurrentFiler.CurrentFile(rn)
  195. permUnchanged := w.IgnorePerms || !cf.HasPermissionBits() || PermsEqual(cf.Flags, uint32(info.Mode()))
  196. if permUnchanged && !cf.IsDeleted() && cf.IsDirectory() && !cf.IsSymlink() && !cf.IsInvalid() {
  197. return nil
  198. }
  199. }
  200. flags := uint32(protocol.FlagDirectory)
  201. if w.IgnorePerms {
  202. flags |= protocol.FlagNoPermBits | 0777
  203. } else {
  204. flags |= uint32(info.Mode() & os.ModePerm)
  205. }
  206. f := protocol.FileInfo{
  207. Name: rn,
  208. Version: lamport.Default.Tick(0),
  209. Flags: flags,
  210. Modified: info.ModTime().Unix(),
  211. }
  212. if debug {
  213. l.Debugln("dir:", p, f)
  214. }
  215. fchan <- f
  216. return nil
  217. }
  218. if info.Mode().IsRegular() {
  219. if w.CurrentFiler != nil {
  220. // A file is "unchanged", if it
  221. // - has the same permissions as previously, unless we are ignoring permissions
  222. // - was not marked deleted (since it apparently exists now)
  223. // - had the same modification time as it has now
  224. // - was not a directory previously (since it's a file now)
  225. // - was not a symlink (since it's a file now)
  226. // - was not invalid (since it looks valid now)
  227. // - has the same size as previously
  228. cf := w.CurrentFiler.CurrentFile(rn)
  229. permUnchanged := w.IgnorePerms || !cf.HasPermissionBits() || PermsEqual(cf.Flags, uint32(info.Mode()))
  230. if permUnchanged && !cf.IsDeleted() && cf.Modified == info.ModTime().Unix() && !cf.IsDirectory() &&
  231. !cf.IsSymlink() && !cf.IsInvalid() && cf.Size() == info.Size() {
  232. return nil
  233. }
  234. if debug {
  235. l.Debugln("rescan:", cf, info.ModTime().Unix(), info.Mode()&os.ModePerm)
  236. }
  237. }
  238. var flags = uint32(info.Mode() & os.ModePerm)
  239. if w.IgnorePerms {
  240. flags = protocol.FlagNoPermBits | 0666
  241. }
  242. f := protocol.FileInfo{
  243. Name: rn,
  244. Version: lamport.Default.Tick(0),
  245. Flags: flags,
  246. Modified: info.ModTime().Unix(),
  247. }
  248. if debug {
  249. l.Debugln("to hash:", p, f)
  250. }
  251. fchan <- f
  252. }
  253. return nil
  254. }
  255. }
  256. func checkDir(dir string) error {
  257. if info, err := os.Lstat(dir); err != nil {
  258. return err
  259. } else if !info.IsDir() {
  260. return errors.New(dir + ": not a directory")
  261. } else if debug {
  262. l.Debugln("checkDir", dir, info)
  263. }
  264. return nil
  265. }
  266. func PermsEqual(a, b uint32) bool {
  267. switch runtime.GOOS {
  268. case "windows":
  269. // There is only writeable and read only, represented for user, group
  270. // and other equally. We only compare against user.
  271. return a&0600 == b&0600
  272. default:
  273. // All bits count
  274. return a&0777 == b&0777
  275. }
  276. }
  277. // If the target is missing, Unix never knows what type of symlink it is
  278. // and Windows always knows even if there is no target.
  279. // Which means that without this special check a Unix node would be fighting
  280. // with a Windows node about whether or not the target is known.
  281. // Basically, if you don't know and someone else knows, just accept it.
  282. // The fact that you don't know means you are on Unix, and on Unix you don't
  283. // really care what the target type is. The moment you do know, and if something
  284. // doesn't match, that will propogate throught the cluster.
  285. func SymlinkTypeEqual(disk, index uint32) bool {
  286. if disk&protocol.FlagSymlinkMissingTarget != 0 && index&protocol.FlagSymlinkMissingTarget == 0 {
  287. return true
  288. }
  289. return disk&protocol.SymlinkTypeMask == index&protocol.SymlinkTypeMask
  290. }