2
0

diff.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. // Copyright 2013 Google Inc. All rights reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Package diff implements a linewise diff algorithm.
  15. package diff
  16. import (
  17. "bytes"
  18. "fmt"
  19. "strings"
  20. )
  21. // Chunk represents a piece of the diff. A chunk will not have both added and
  22. // deleted lines. Equal lines are always after any added or deleted lines.
  23. // A Chunk may or may not have any lines in it, especially for the first or last
  24. // chunk in a computation.
  25. type Chunk struct {
  26. Added []string
  27. Deleted []string
  28. Equal []string
  29. }
  30. func (c *Chunk) empty() bool {
  31. return len(c.Added) == 0 && len(c.Deleted) == 0 && len(c.Equal) == 0
  32. }
  33. // Diff returns a string containing a line-by-line unified diff of the linewise
  34. // changes required to make A into B. Each line is prefixed with '+', '-', or
  35. // ' ' to indicate if it should be added, removed, or is correct respectively.
  36. func Diff(A, B string) string {
  37. aLines := strings.Split(A, "\n")
  38. bLines := strings.Split(B, "\n")
  39. chunks := DiffChunks(aLines, bLines)
  40. buf := new(bytes.Buffer)
  41. for _, c := range chunks {
  42. for _, line := range c.Added {
  43. fmt.Fprintf(buf, "+%s\n", line)
  44. }
  45. for _, line := range c.Deleted {
  46. fmt.Fprintf(buf, "-%s\n", line)
  47. }
  48. for _, line := range c.Equal {
  49. fmt.Fprintf(buf, " %s\n", line)
  50. }
  51. }
  52. return strings.TrimRight(buf.String(), "\n")
  53. }
  54. // DiffChunks uses an O(D(N+M)) shortest-edit-script algorithm
  55. // to compute the edits required from A to B and returns the
  56. // edit chunks.
  57. func DiffChunks(a, b []string) []Chunk {
  58. // algorithm: http://www.xmailserver.org/diff2.pdf
  59. // We'll need these quantities a lot.
  60. alen, blen := len(a), len(b) // M, N
  61. // At most, it will require len(a) deletions and len(b) additions
  62. // to transform a into b.
  63. maxPath := alen + blen // MAX
  64. if maxPath == 0 {
  65. // degenerate case: two empty lists are the same
  66. return nil
  67. }
  68. // Store the endpoint of the path for diagonals.
  69. // We store only the a index, because the b index on any diagonal
  70. // (which we know during the loop below) is aidx-diag.
  71. // endpoint[maxPath] represents the 0 diagonal.
  72. //
  73. // Stated differently:
  74. // endpoint[d] contains the aidx of a furthest reaching path in diagonal d
  75. endpoint := make([]int, 2*maxPath+1) // V
  76. saved := make([][]int, 0, 8) // Vs
  77. save := func() {
  78. dup := make([]int, len(endpoint))
  79. copy(dup, endpoint)
  80. saved = append(saved, dup)
  81. }
  82. var editDistance int // D
  83. dLoop:
  84. for editDistance = 0; editDistance <= maxPath; editDistance++ {
  85. // The 0 diag(onal) represents equality of a and b. Each diagonal to
  86. // the left is numbered one lower, to the right is one higher, from
  87. // -alen to +blen. Negative diagonals favor differences from a,
  88. // positive diagonals favor differences from b. The edit distance to a
  89. // diagonal d cannot be shorter than d itself.
  90. //
  91. // The iterations of this loop cover either odds or evens, but not both,
  92. // If odd indices are inputs, even indices are outputs and vice versa.
  93. for diag := -editDistance; diag <= editDistance; diag += 2 { // k
  94. var aidx int // x
  95. switch {
  96. case diag == -editDistance:
  97. // This is a new diagonal; copy from previous iter
  98. aidx = endpoint[maxPath-editDistance+1] + 0
  99. case diag == editDistance:
  100. // This is a new diagonal; copy from previous iter
  101. aidx = endpoint[maxPath+editDistance-1] + 1
  102. case endpoint[maxPath+diag+1] > endpoint[maxPath+diag-1]:
  103. // diagonal d+1 was farther along, so use that
  104. aidx = endpoint[maxPath+diag+1] + 0
  105. default:
  106. // diagonal d-1 was farther (or the same), so use that
  107. aidx = endpoint[maxPath+diag-1] + 1
  108. }
  109. // On diagonal d, we can compute bidx from aidx.
  110. bidx := aidx - diag // y
  111. // See how far we can go on this diagonal before we find a difference.
  112. for aidx < alen && bidx < blen && a[aidx] == b[bidx] {
  113. aidx++
  114. bidx++
  115. }
  116. // Store the end of the current edit chain.
  117. endpoint[maxPath+diag] = aidx
  118. // If we've found the end of both inputs, we're done!
  119. if aidx >= alen && bidx >= blen {
  120. save() // save the final path
  121. break dLoop
  122. }
  123. }
  124. save() // save the current path
  125. }
  126. if editDistance == 0 {
  127. return nil
  128. }
  129. chunks := make([]Chunk, editDistance+1)
  130. x, y := alen, blen
  131. for d := editDistance; d > 0; d-- {
  132. endpoint := saved[d]
  133. diag := x - y
  134. insert := diag == -d || (diag != d && endpoint[maxPath+diag-1] < endpoint[maxPath+diag+1])
  135. x1 := endpoint[maxPath+diag]
  136. var x0, xM, kk int
  137. if insert {
  138. kk = diag + 1
  139. x0 = endpoint[maxPath+kk]
  140. xM = x0
  141. } else {
  142. kk = diag - 1
  143. x0 = endpoint[maxPath+kk]
  144. xM = x0 + 1
  145. }
  146. y0 := x0 - kk
  147. var c Chunk
  148. if insert {
  149. c.Added = b[y0:][:1]
  150. } else {
  151. c.Deleted = a[x0:][:1]
  152. }
  153. if xM < x1 {
  154. c.Equal = a[xM:][:x1-xM]
  155. }
  156. x, y = x0, y0
  157. chunks[d] = c
  158. }
  159. if x > 0 {
  160. chunks[0].Equal = a[:x]
  161. }
  162. if chunks[0].empty() {
  163. chunks = chunks[1:]
  164. }
  165. if len(chunks) == 0 {
  166. return nil
  167. }
  168. return chunks
  169. }