walk.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. // Copyright (C) 2014 Jakob Borg and Contributors (see the CONTRIBUTORS file).
  2. // All rights reserved. Use of this source code is governed by an MIT-style
  3. // license that can be found in the LICENSE file.
  4. package scanner
  5. import (
  6. "bufio"
  7. "errors"
  8. "io"
  9. "os"
  10. "path"
  11. "path/filepath"
  12. "regexp"
  13. "runtime"
  14. "strings"
  15. "code.google.com/p/go.text/unicode/norm"
  16. "github.com/syncthing/syncthing/fnmatch"
  17. "github.com/syncthing/syncthing/lamport"
  18. "github.com/syncthing/syncthing/protocol"
  19. )
  20. type Walker struct {
  21. // Dir is the base directory for the walk
  22. Dir string
  23. // Limit walking to this path within Dir, or no limit if Sub is blank
  24. Sub string
  25. // BlockSize controls the size of the block used when hashing.
  26. BlockSize int
  27. // If IgnoreFile is not empty, it is the name used for the file that holds ignore patterns.
  28. IgnoreFile string
  29. // If TempNamer is not nil, it is used to ignore tempory files when walking.
  30. TempNamer TempNamer
  31. // If CurrentFiler is not nil, it is queried for the current file before rescanning.
  32. CurrentFiler CurrentFiler
  33. // If IgnorePerms is true, changes to permission bits will not be
  34. // detected. Scanned files will get zero permission bits and the
  35. // NoPermissionBits flag set.
  36. IgnorePerms bool
  37. }
  38. type TempNamer interface {
  39. // Temporary returns a temporary name for the filed referred to by filepath.
  40. TempName(path string) string
  41. // IsTemporary returns true if path refers to the name of temporary file.
  42. IsTemporary(path string) bool
  43. }
  44. type CurrentFiler interface {
  45. // CurrentFile returns the file as seen at last scan.
  46. CurrentFile(name string) protocol.FileInfo
  47. }
  48. // Walk returns the list of files found in the local repository by scanning the
  49. // file system. Files are blockwise hashed.
  50. func (w *Walker) Walk() (chan protocol.FileInfo, error) {
  51. if debug {
  52. l.Debugln("Walk", w.Dir, w.Sub, w.BlockSize, w.IgnoreFile)
  53. }
  54. err := checkDir(w.Dir)
  55. if err != nil {
  56. return nil, err
  57. }
  58. files := make(chan protocol.FileInfo)
  59. hashedFiles := make(chan protocol.FileInfo)
  60. newParallelHasher(w.Dir, w.BlockSize, runtime.NumCPU(), hashedFiles, files)
  61. var ignores []*regexp.Regexp
  62. go func() {
  63. filepath.Walk(w.Dir, w.loadIgnoreFiles(w.Dir, &ignores))
  64. hashFiles := w.walkAndHashFiles(files, ignores)
  65. filepath.Walk(filepath.Join(w.Dir, w.Sub), hashFiles)
  66. close(files)
  67. }()
  68. return hashedFiles, nil
  69. }
  70. // CleanTempFiles removes all files that match the temporary filename pattern.
  71. func (w *Walker) CleanTempFiles() {
  72. filepath.Walk(w.Dir, w.cleanTempFile)
  73. }
  74. func (w *Walker) loadIgnoreFiles(dir string, ignores *[]*regexp.Regexp) filepath.WalkFunc {
  75. return func(p string, info os.FileInfo, err error) error {
  76. if err != nil {
  77. return nil
  78. }
  79. rn, err := filepath.Rel(dir, p)
  80. if err != nil {
  81. return nil
  82. }
  83. if pn, sn := filepath.Split(rn); sn == w.IgnoreFile {
  84. pn := filepath.Clean(pn)
  85. dirIgnores := loadIgnoreFile(p, pn)
  86. *ignores = append(*ignores, dirIgnores...)
  87. }
  88. return nil
  89. }
  90. }
  91. func loadIgnoreFile(ignFile, base string) []*regexp.Regexp {
  92. fd, err := os.Open(ignFile)
  93. if err != nil {
  94. return nil
  95. }
  96. defer fd.Close()
  97. return parseIgnoreFile(fd, base)
  98. }
  99. func parseIgnoreFile(fd io.Reader, base string) []*regexp.Regexp {
  100. var exps []*regexp.Regexp
  101. scanner := bufio.NewScanner(fd)
  102. for scanner.Scan() {
  103. line := strings.TrimSpace(scanner.Text())
  104. if line == "" {
  105. continue
  106. }
  107. if strings.HasPrefix(line, "/") {
  108. // Pattern is rooted in the current dir only
  109. exp, err := fnmatch.Convert(path.Join(base, line[1:]), fnmatch.FNM_PATHNAME)
  110. if err != nil {
  111. l.Warnf("Invalid pattern %q in ignore file", line)
  112. continue
  113. }
  114. exps = append(exps, exp)
  115. } else if strings.HasPrefix(line, "**/") {
  116. // Add the pattern as is, and without **/ so it matches in current dir
  117. exp, err := fnmatch.Convert(line, fnmatch.FNM_PATHNAME)
  118. if err != nil {
  119. l.Warnf("Invalid pattern %q in ignore file", line)
  120. continue
  121. }
  122. exps = append(exps, exp)
  123. exp, err = fnmatch.Convert(path.Join(base, line[3:]), fnmatch.FNM_PATHNAME)
  124. if err != nil {
  125. l.Warnf("Invalid pattern %q in ignore file", line)
  126. continue
  127. }
  128. exps = append(exps, exp)
  129. } else {
  130. // Path name or pattern, add it so it matches files both in
  131. // current directory and subdirs.
  132. exp, err := fnmatch.Convert(path.Join(base, line), fnmatch.FNM_PATHNAME)
  133. if err != nil {
  134. l.Warnf("Invalid pattern %q in ignore file", line)
  135. continue
  136. }
  137. exps = append(exps, exp)
  138. exp, err = fnmatch.Convert(path.Join(base, "**", line), fnmatch.FNM_PATHNAME)
  139. if err != nil {
  140. l.Warnf("Invalid pattern %q in ignore file", line)
  141. continue
  142. }
  143. exps = append(exps, exp)
  144. }
  145. }
  146. return exps
  147. }
  148. func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo, ignores []*regexp.Regexp) filepath.WalkFunc {
  149. return func(p string, info os.FileInfo, err error) error {
  150. if err != nil {
  151. if debug {
  152. l.Debugln("error:", p, info, err)
  153. }
  154. return nil
  155. }
  156. rn, err := filepath.Rel(w.Dir, p)
  157. if err != nil {
  158. if debug {
  159. l.Debugln("rel error:", p, err)
  160. }
  161. return nil
  162. }
  163. if rn == "." {
  164. return nil
  165. }
  166. if w.TempNamer != nil && w.TempNamer.IsTemporary(rn) {
  167. // A temporary file
  168. if debug {
  169. l.Debugln("temporary:", rn)
  170. }
  171. return nil
  172. }
  173. if sn := filepath.Base(rn); sn == w.IgnoreFile || sn == ".stversions" || w.ignoreFile(ignores, rn) {
  174. // An ignored file
  175. if debug {
  176. l.Debugln("ignored:", rn)
  177. }
  178. if info.IsDir() {
  179. return filepath.SkipDir
  180. }
  181. return nil
  182. }
  183. if (runtime.GOOS == "linux" || runtime.GOOS == "windows") && !norm.NFC.IsNormalString(rn) {
  184. l.Warnf("File %q contains non-NFC UTF-8 sequences and cannot be synced. Consider renaming.", rn)
  185. return nil
  186. }
  187. if info.Mode().IsDir() {
  188. if w.CurrentFiler != nil {
  189. cf := w.CurrentFiler.CurrentFile(rn)
  190. permUnchanged := w.IgnorePerms || !protocol.HasPermissionBits(cf.Flags) || PermsEqual(cf.Flags, uint32(info.Mode()))
  191. if !protocol.IsDeleted(cf.Flags) && protocol.IsDirectory(cf.Flags) && permUnchanged {
  192. return nil
  193. }
  194. }
  195. var flags uint32 = protocol.FlagDirectory
  196. if w.IgnorePerms {
  197. flags |= protocol.FlagNoPermBits | 0777
  198. } else {
  199. flags |= uint32(info.Mode() & os.ModePerm)
  200. }
  201. f := protocol.FileInfo{
  202. Name: rn,
  203. Version: lamport.Default.Tick(0),
  204. Flags: flags,
  205. Modified: info.ModTime().Unix(),
  206. }
  207. if debug {
  208. l.Debugln("dir:", f)
  209. }
  210. fchan <- f
  211. return nil
  212. }
  213. if info.Mode().IsRegular() {
  214. if w.CurrentFiler != nil {
  215. cf := w.CurrentFiler.CurrentFile(rn)
  216. permUnchanged := w.IgnorePerms || !protocol.HasPermissionBits(cf.Flags) || PermsEqual(cf.Flags, uint32(info.Mode()))
  217. if !protocol.IsDeleted(cf.Flags) && cf.Modified == info.ModTime().Unix() && permUnchanged {
  218. return nil
  219. }
  220. if debug {
  221. l.Debugln("rescan:", cf, info.ModTime().Unix(), info.Mode()&os.ModePerm)
  222. }
  223. }
  224. var flags = uint32(info.Mode() & os.ModePerm)
  225. if w.IgnorePerms {
  226. flags = protocol.FlagNoPermBits | 0666
  227. }
  228. fchan <- protocol.FileInfo{
  229. Name: rn,
  230. Version: lamport.Default.Tick(0),
  231. Flags: flags,
  232. Modified: info.ModTime().Unix(),
  233. }
  234. }
  235. return nil
  236. }
  237. }
  238. func (w *Walker) cleanTempFile(path string, info os.FileInfo, err error) error {
  239. if err != nil {
  240. return err
  241. }
  242. if info.Mode()&os.ModeType == 0 && w.TempNamer.IsTemporary(path) {
  243. os.Remove(path)
  244. }
  245. return nil
  246. }
  247. func (w *Walker) ignoreFile(patterns []*regexp.Regexp, file string) bool {
  248. for _, pattern := range patterns {
  249. if pattern.MatchString(file) {
  250. if debug {
  251. l.Debugf("%q matches %v", file, pattern)
  252. }
  253. return true
  254. }
  255. }
  256. return false
  257. }
  258. func checkDir(dir string) error {
  259. if info, err := os.Lstat(dir); err != nil {
  260. return err
  261. } else if !info.IsDir() {
  262. return errors.New(dir + ": not a directory")
  263. } else if debug {
  264. l.Debugln("checkDir", dir, info)
  265. }
  266. return nil
  267. }
  268. func PermsEqual(a, b uint32) bool {
  269. switch runtime.GOOS {
  270. case "windows":
  271. // There is only writeable and read only, represented for user, group
  272. // and other equally. We only compare against user.
  273. return a&0600 == b&0600
  274. default:
  275. // All bits count
  276. return a&0777 == b&0777
  277. }
  278. }