puller.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631
  1. // Copyright (C) 2014 Jakob Borg and Contributors (see the CONTRIBUTORS file).
  2. // All rights reserved. Use of this source code is governed by an MIT-style
  3. // license that can be found in the LICENSE file.
  4. package model
  5. import (
  6. "errors"
  7. "fmt"
  8. "os"
  9. "path/filepath"
  10. "sync"
  11. "time"
  12. "github.com/syncthing/syncthing/internal/config"
  13. "github.com/syncthing/syncthing/internal/events"
  14. "github.com/syncthing/syncthing/internal/osutil"
  15. "github.com/syncthing/syncthing/internal/protocol"
  16. "github.com/syncthing/syncthing/internal/scanner"
  17. "github.com/syncthing/syncthing/internal/versioner"
  18. )
  19. // TODO: Stop on errors
  20. const (
  21. copiersPerFolder = 1
  22. pullersPerFolder = 16
  23. finishersPerFolder = 2
  24. pauseIntv = 60 * time.Second
  25. nextPullIntv = 10 * time.Second
  26. checkPullIntv = 1 * time.Second
  27. )
  28. // A pullBlockState is passed to the puller routine for each block that needs
  29. // to be fetched.
  30. type pullBlockState struct {
  31. *sharedPullerState
  32. block protocol.BlockInfo
  33. }
  34. // A copyBlocksState is passed to copy routine if the file has blocks to be
  35. // copied from the original.
  36. type copyBlocksState struct {
  37. *sharedPullerState
  38. blocks []protocol.BlockInfo
  39. }
  40. var (
  41. activity = newDeviceActivity()
  42. errNoDevice = errors.New("no available source device")
  43. )
  44. type Puller struct {
  45. folder string
  46. dir string
  47. scanIntv time.Duration
  48. model *Model
  49. stop chan struct{}
  50. versioner versioner.Versioner
  51. }
  52. // Serve will run scans and pulls. It will return when Stop()ed or on a
  53. // critical error.
  54. func (p *Puller) Serve() {
  55. if debug {
  56. l.Debugln(p, "starting")
  57. defer l.Debugln(p, "exiting")
  58. }
  59. p.stop = make(chan struct{})
  60. pullTimer := time.NewTimer(checkPullIntv)
  61. scanTimer := time.NewTimer(time.Millisecond) // The first scan should be done immediately.
  62. defer func() {
  63. pullTimer.Stop()
  64. scanTimer.Stop()
  65. // TODO: Should there be an actual FolderStopped state?
  66. p.model.setState(p.folder, FolderIdle)
  67. }()
  68. var prevVer uint64
  69. // Clean out old temporaries before we start pulling
  70. p.clean()
  71. // We don't start pulling files until a scan has been completed.
  72. initialScanCompleted := false
  73. loop:
  74. for {
  75. select {
  76. case <-p.stop:
  77. return
  78. // TODO: We could easily add a channel here for notifications from
  79. // Index(), so that we immediately start a pull when new index
  80. // information is available. Before that though, I'd like to build a
  81. // repeatable benchmark of how long it takes to sync a change from
  82. // device A to device B, so we have something to work against.
  83. case <-pullTimer.C:
  84. if !initialScanCompleted {
  85. continue
  86. }
  87. // RemoteLocalVersion() is a fast call, doesn't touch the database.
  88. curVer := p.model.RemoteLocalVersion(p.folder)
  89. if curVer == prevVer {
  90. pullTimer.Reset(checkPullIntv)
  91. continue
  92. }
  93. if debug {
  94. l.Debugln(p, "pulling", prevVer, curVer)
  95. }
  96. p.model.setState(p.folder, FolderSyncing)
  97. tries := 0
  98. for {
  99. tries++
  100. changed := p.pullerIteration(copiersPerFolder, pullersPerFolder, finishersPerFolder)
  101. if debug {
  102. l.Debugln(p, "changed", changed)
  103. }
  104. if changed == 0 {
  105. // No files were changed by the puller, so we are in
  106. // sync. Remember the local version number and
  107. // schedule a resync a little bit into the future.
  108. if lv := p.model.RemoteLocalVersion(p.folder); lv < curVer {
  109. // There's a corner case where the device we needed
  110. // files from disconnected during the puller
  111. // iteration. The files will have been removed from
  112. // the index, so we've concluded that we don't need
  113. // them, but at the same time we have the local
  114. // version that includes those files in curVer. So we
  115. // catch the case that localVersion might have
  116. // decresed here.
  117. l.Debugln(p, "adjusting curVer", lv)
  118. curVer = lv
  119. }
  120. prevVer = curVer
  121. pullTimer.Reset(nextPullIntv)
  122. break
  123. }
  124. if tries > 10 {
  125. // We've tried a bunch of times to get in sync, but
  126. // we're not making it. Probably there are write
  127. // errors preventing us. Flag this with a warning and
  128. // wait a bit longer before retrying.
  129. l.Warnf("Folder %q isn't making progress - check logs for possible root cause. Pausing puller for %v.", p.folder, pauseIntv)
  130. pullTimer.Reset(pauseIntv)
  131. break
  132. }
  133. }
  134. p.model.setState(p.folder, FolderIdle)
  135. // The reason for running the scanner from within the puller is that
  136. // this is the easiest way to make sure we are not doing both at the
  137. // same time.
  138. case <-scanTimer.C:
  139. if debug {
  140. l.Debugln(p, "rescan")
  141. }
  142. p.model.setState(p.folder, FolderScanning)
  143. if err := p.model.ScanFolder(p.folder); err != nil {
  144. invalidateFolder(p.model.cfg, p.folder, err)
  145. break loop
  146. }
  147. p.model.setState(p.folder, FolderIdle)
  148. scanTimer.Reset(p.scanIntv)
  149. if !initialScanCompleted {
  150. l.Infoln("Completed initial scan (rw) of folder", p.folder)
  151. initialScanCompleted = true
  152. }
  153. }
  154. }
  155. }
  156. func (p *Puller) Stop() {
  157. close(p.stop)
  158. }
  159. func (p *Puller) String() string {
  160. return fmt.Sprintf("puller/%s@%p", p.folder, p)
  161. }
  162. // pullerIteration runs a single puller iteration for the given folder and
  163. // returns the number items that should have been synced (even those that
  164. // might have failed). One puller iteration handles all files currently
  165. // flagged as needed in the folder. The specified number of copier, puller and
  166. // finisher routines are used. It's seldom efficient to use more than one
  167. // copier routine, while multiple pullers are essential and multiple finishers
  168. // may be useful (they are primarily CPU bound due to hashing).
  169. func (p *Puller) pullerIteration(ncopiers, npullers, nfinishers int) int {
  170. pullChan := make(chan pullBlockState)
  171. copyChan := make(chan copyBlocksState)
  172. finisherChan := make(chan *sharedPullerState)
  173. var wg sync.WaitGroup
  174. var doneWg sync.WaitGroup
  175. for i := 0; i < ncopiers; i++ {
  176. wg.Add(1)
  177. go func() {
  178. // copierRoutine finishes when copyChan is closed
  179. p.copierRoutine(copyChan, finisherChan)
  180. wg.Done()
  181. }()
  182. }
  183. for i := 0; i < npullers; i++ {
  184. wg.Add(1)
  185. go func() {
  186. // pullerRoutine finishes when pullChan is closed
  187. p.pullerRoutine(pullChan, finisherChan)
  188. wg.Done()
  189. }()
  190. }
  191. for i := 0; i < nfinishers; i++ {
  192. doneWg.Add(1)
  193. // finisherRoutine finishes when finisherChan is closed
  194. go func() {
  195. p.finisherRoutine(finisherChan)
  196. doneWg.Done()
  197. }()
  198. }
  199. p.model.fmut.RLock()
  200. files := p.model.folderFiles[p.folder]
  201. p.model.fmut.RUnlock()
  202. // !!!
  203. // WithNeed takes a database snapshot (by necessity). By the time we've
  204. // handled a bunch of files it might have become out of date and we might
  205. // be attempting to sync with an old version of a file...
  206. // !!!
  207. changed := 0
  208. files.WithNeed(protocol.LocalDeviceID, func(intf protocol.FileIntf) bool {
  209. // Needed items are delivered sorted lexicographically. This isn't
  210. // really optimal from a performance point of view - it would be
  211. // better if files were handled in random order, to spread the load
  212. // over the cluster. But it means that we can be sure that we fully
  213. // handle directories before the files that go inside them, which is
  214. // nice.
  215. file := intf.(protocol.FileInfo)
  216. events.Default.Log(events.ItemStarted, map[string]string{
  217. "folder": p.folder,
  218. "item": file.Name,
  219. })
  220. if debug {
  221. l.Debugln(p, "handling", file.Name)
  222. }
  223. switch {
  224. case protocol.IsDirectory(file.Flags) && protocol.IsDeleted(file.Flags):
  225. // A deleted directory
  226. p.deleteDir(file)
  227. case protocol.IsDirectory(file.Flags):
  228. // A new or changed directory
  229. p.handleDir(file)
  230. case protocol.IsDeleted(file.Flags):
  231. // A deleted file
  232. p.deleteFile(file)
  233. default:
  234. // A new or changed file. This is the only case where we do stuff
  235. // in the background; the other three are done synchronously.
  236. p.handleFile(file, copyChan, pullChan)
  237. }
  238. changed++
  239. return true
  240. })
  241. // Signal copy and puller routines that we are done with the in data for
  242. // this iteration
  243. close(copyChan)
  244. close(pullChan)
  245. // Wait for them to finish, then signal the finisher chan that there will
  246. // be no more input.
  247. wg.Wait()
  248. close(finisherChan)
  249. // Wait for the finisherChan to finish.
  250. doneWg.Wait()
  251. return changed
  252. }
  253. // handleDir creates or updates the given directory
  254. func (p *Puller) handleDir(file protocol.FileInfo) {
  255. realName := filepath.Join(p.dir, file.Name)
  256. mode := os.FileMode(file.Flags & 0777)
  257. if debug {
  258. curFile := p.model.CurrentFolderFile(p.folder, file.Name)
  259. l.Debugf("need dir\n\t%v\n\t%v", file, curFile)
  260. }
  261. if info, err := os.Stat(realName); err != nil {
  262. if os.IsNotExist(err) {
  263. // The directory doesn't exist, so we create it with the right
  264. // mode bits from the start.
  265. mkdir := func(path string) error {
  266. // We declare a function that acts on only the path name, so
  267. // we can pass it to InWritableDir. We use a regular Mkdir and
  268. // not MkdirAll because the parent should already exist.
  269. return os.Mkdir(path, mode)
  270. }
  271. if err = osutil.InWritableDir(mkdir, realName); err == nil {
  272. p.model.updateLocal(p.folder, file)
  273. } else {
  274. l.Infof("Puller (folder %q, file %q): %v", p.folder, file.Name, err)
  275. }
  276. return
  277. }
  278. // Weird error when stat()'ing the dir. Probably won't work to do
  279. // anything else with it if we can't even stat() it.
  280. l.Infof("Puller (folder %q, file %q): %v", p.folder, file.Name, err)
  281. return
  282. } else if !info.IsDir() {
  283. l.Infof("Puller (folder %q, file %q): should be dir, but is not", p.folder, file.Name)
  284. return
  285. }
  286. // The directory already exists, so we just correct the mode bits. (We
  287. // don't handle modification times on directories, because that sucks...)
  288. // It's OK to change mode bits on stuff within non-writable directories.
  289. if err := os.Chmod(realName, mode); err == nil {
  290. p.model.updateLocal(p.folder, file)
  291. } else {
  292. l.Infof("Puller (folder %q, file %q): %v", p.folder, file.Name, err)
  293. }
  294. }
  295. // deleteDir attempts to delete the given directory
  296. func (p *Puller) deleteDir(file protocol.FileInfo) {
  297. realName := filepath.Join(p.dir, file.Name)
  298. err := osutil.InWritableDir(os.Remove, realName)
  299. if err == nil || os.IsNotExist(err) {
  300. p.model.updateLocal(p.folder, file)
  301. }
  302. }
  303. // deleteFile attempts to delete the given file
  304. func (p *Puller) deleteFile(file protocol.FileInfo) {
  305. realName := filepath.Join(p.dir, file.Name)
  306. var err error
  307. if p.versioner != nil {
  308. err = osutil.InWritableDir(p.versioner.Archive, realName)
  309. } else {
  310. err = osutil.InWritableDir(os.Remove, realName)
  311. }
  312. if err != nil {
  313. l.Infof("Puller (folder %q, file %q): delete: %v", p.folder, file.Name, err)
  314. } else {
  315. p.model.updateLocal(p.folder, file)
  316. }
  317. }
  318. // handleFile queues the copies and pulls as necessary for a single new or
  319. // changed file.
  320. func (p *Puller) handleFile(file protocol.FileInfo, copyChan chan<- copyBlocksState, pullChan chan<- pullBlockState) {
  321. curFile := p.model.CurrentFolderFile(p.folder, file.Name)
  322. copyBlocks, pullBlocks := scanner.BlockDiff(curFile.Blocks, file.Blocks)
  323. if len(copyBlocks) == len(curFile.Blocks) && len(pullBlocks) == 0 {
  324. // We are supposed to copy the entire file, and then fetch nothing. We
  325. // are only updating metadata, so we don't actually *need* to make the
  326. // copy.
  327. if debug {
  328. l.Debugln(p, "taking shortcut on", file.Name)
  329. }
  330. p.shortcutFile(file)
  331. return
  332. }
  333. // Figure out the absolute filenames we need once and for all
  334. tempName := filepath.Join(p.dir, defTempNamer.TempName(file.Name))
  335. realName := filepath.Join(p.dir, file.Name)
  336. s := sharedPullerState{
  337. file: file,
  338. folder: p.folder,
  339. tempName: tempName,
  340. realName: realName,
  341. pullNeeded: len(pullBlocks),
  342. }
  343. if len(copyBlocks) > 0 {
  344. s.copyNeeded = 1
  345. }
  346. if debug {
  347. l.Debugf("%v need file %s; copy %d, pull %d", p, file.Name, len(copyBlocks), len(pullBlocks))
  348. }
  349. if len(copyBlocks) > 0 {
  350. cs := copyBlocksState{
  351. sharedPullerState: &s,
  352. blocks: copyBlocks,
  353. }
  354. copyChan <- cs
  355. }
  356. if len(pullBlocks) > 0 {
  357. for _, block := range pullBlocks {
  358. ps := pullBlockState{
  359. sharedPullerState: &s,
  360. block: block,
  361. }
  362. pullChan <- ps
  363. }
  364. }
  365. }
  366. // shortcutFile sets file mode and modification time, when that's the only
  367. // thing that has changed.
  368. func (p *Puller) shortcutFile(file protocol.FileInfo) {
  369. realName := filepath.Join(p.dir, file.Name)
  370. err := os.Chmod(realName, os.FileMode(file.Flags&0777))
  371. if err != nil {
  372. l.Infof("Puller (folder %q, file %q): shortcut: %v", p.folder, file.Name, err)
  373. return
  374. }
  375. t := time.Unix(file.Modified, 0)
  376. err = os.Chtimes(realName, t, t)
  377. if err != nil {
  378. l.Infof("Puller (folder %q, file %q): shortcut: %v", p.folder, file.Name, err)
  379. return
  380. }
  381. p.model.updateLocal(p.folder, file)
  382. }
  383. // copierRoutine reads pullerStates until the in channel closes and performs
  384. // the relevant copy.
  385. func (p *Puller) copierRoutine(in <-chan copyBlocksState, out chan<- *sharedPullerState) {
  386. buf := make([]byte, scanner.StandardBlockSize)
  387. nextFile:
  388. for state := range in {
  389. dstFd, err := state.tempFile()
  390. if err != nil {
  391. // Nothing more to do for this failed file (the error was logged
  392. // when it happened)
  393. continue nextFile
  394. }
  395. srcFd, err := state.sourceFile()
  396. if err != nil {
  397. // As above
  398. continue nextFile
  399. }
  400. for _, block := range state.blocks {
  401. buf = buf[:int(block.Size)]
  402. _, err = srcFd.ReadAt(buf, block.Offset)
  403. if err != nil {
  404. state.earlyClose("src read", err)
  405. srcFd.Close()
  406. continue nextFile
  407. }
  408. _, err = dstFd.WriteAt(buf, block.Offset)
  409. if err != nil {
  410. state.earlyClose("dst write", err)
  411. srcFd.Close()
  412. continue nextFile
  413. }
  414. }
  415. srcFd.Close()
  416. state.copyDone()
  417. out <- state.sharedPullerState
  418. }
  419. }
  420. func (p *Puller) pullerRoutine(in <-chan pullBlockState, out chan<- *sharedPullerState) {
  421. nextBlock:
  422. for state := range in {
  423. if state.failed() != nil {
  424. continue nextBlock
  425. }
  426. // Select the least busy device to pull the block frop.model. If we found no
  427. // feasible device at all, fail the block (and in the long run, the
  428. // file).
  429. potentialDevices := p.model.availability(p.folder, state.file.Name)
  430. selected := activity.leastBusy(potentialDevices)
  431. if selected == (protocol.DeviceID{}) {
  432. state.earlyClose("pull", errNoDevice)
  433. continue nextBlock
  434. }
  435. // Get an fd to the temporary file. Tehcnically we don't need it until
  436. // after fetching the block, but if we run into an error here there is
  437. // no point in issuing the request to the network.
  438. fd, err := state.tempFile()
  439. if err != nil {
  440. continue nextBlock
  441. }
  442. // Fetch the block, while marking the selected device as in use so that
  443. // leastBusy can select another device when someone else asks.
  444. activity.using(selected)
  445. buf, err := p.model.requestGlobal(selected, p.folder, state.file.Name, state.block.Offset, int(state.block.Size), state.block.Hash)
  446. activity.done(selected)
  447. if err != nil {
  448. state.earlyClose("pull", err)
  449. continue nextBlock
  450. }
  451. // Save the block data we got from the cluster
  452. _, err = fd.WriteAt(buf, state.block.Offset)
  453. if err != nil {
  454. state.earlyClose("save", err)
  455. continue nextBlock
  456. }
  457. state.pullDone()
  458. out <- state.sharedPullerState
  459. }
  460. }
  461. func (p *Puller) finisherRoutine(in <-chan *sharedPullerState) {
  462. for state := range in {
  463. if closed, err := state.finalClose(); closed {
  464. if debug {
  465. l.Debugln(p, "closing", state.file.Name)
  466. }
  467. if err != nil {
  468. l.Warnln("puller: final:", err)
  469. continue
  470. }
  471. // Verify the file against expected hashes
  472. fd, err := os.Open(state.tempName)
  473. if err != nil {
  474. l.Warnln("puller: final:", err)
  475. continue
  476. }
  477. err = scanner.Verify(fd, scanner.StandardBlockSize, state.file.Blocks)
  478. fd.Close()
  479. if err != nil {
  480. l.Warnln("puller: final:", state.file.Name, err)
  481. continue
  482. }
  483. // Set the correct permission bits on the new file
  484. err = os.Chmod(state.tempName, os.FileMode(state.file.Flags&0777))
  485. if err != nil {
  486. os.Remove(state.tempName)
  487. l.Warnln("puller: final:", err)
  488. continue
  489. }
  490. // Set the correct timestamp on the new file
  491. t := time.Unix(state.file.Modified, 0)
  492. err = os.Chtimes(state.tempName, t, t)
  493. if err != nil {
  494. os.Remove(state.tempName)
  495. l.Warnln("puller: final:", err)
  496. continue
  497. }
  498. // If we should use versioning, let the versioner archive the old
  499. // file before we replace it. Archiving a non-existent file is not
  500. // an error.
  501. if p.versioner != nil {
  502. err = p.versioner.Archive(state.realName)
  503. if err != nil {
  504. os.Remove(state.tempName)
  505. l.Warnln("puller: final:", err)
  506. continue
  507. }
  508. }
  509. // Replace the original file with the new one
  510. err = osutil.Rename(state.tempName, state.realName)
  511. if err != nil {
  512. os.Remove(state.tempName)
  513. l.Warnln("puller: final:", err)
  514. continue
  515. }
  516. // Record the updated file in the index
  517. p.model.updateLocal(p.folder, state.file)
  518. }
  519. }
  520. }
  521. // clean deletes orphaned temporary files
  522. func (p *Puller) clean() {
  523. filepath.Walk(p.dir, func(path string, info os.FileInfo, err error) error {
  524. if err != nil {
  525. return err
  526. }
  527. if info.Mode().IsRegular() && defTempNamer.IsTemporary(path) {
  528. os.Remove(path)
  529. }
  530. return nil
  531. })
  532. }
  533. func invalidateFolder(cfg *config.Configuration, folderID string, err error) {
  534. for i := range cfg.Folders {
  535. folder := &cfg.Folders[i]
  536. if folder.ID == folderID {
  537. folder.Invalid = err.Error()
  538. return
  539. }
  540. }
  541. }