| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186 |
- // Copyright 2013 Google Inc. All rights reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- // Package diff implements a linewise diff algorithm.
- package diff
- import (
- "bytes"
- "fmt"
- "strings"
- )
- // Chunk represents a piece of the diff. A chunk will not have both added and
- // deleted lines. Equal lines are always after any added or deleted lines.
- // A Chunk may or may not have any lines in it, especially for the first or last
- // chunk in a computation.
- type Chunk struct {
- Added []string
- Deleted []string
- Equal []string
- }
- func (c *Chunk) empty() bool {
- return len(c.Added) == 0 && len(c.Deleted) == 0 && len(c.Equal) == 0
- }
- // Diff returns a string containing a line-by-line unified diff of the linewise
- // changes required to make A into B. Each line is prefixed with '+', '-', or
- // ' ' to indicate if it should be added, removed, or is correct respectively.
- func Diff(A, B string) string {
- aLines := strings.Split(A, "\n")
- bLines := strings.Split(B, "\n")
- chunks := DiffChunks(aLines, bLines)
- buf := new(bytes.Buffer)
- for _, c := range chunks {
- for _, line := range c.Added {
- fmt.Fprintf(buf, "+%s\n", line)
- }
- for _, line := range c.Deleted {
- fmt.Fprintf(buf, "-%s\n", line)
- }
- for _, line := range c.Equal {
- fmt.Fprintf(buf, " %s\n", line)
- }
- }
- return strings.TrimRight(buf.String(), "\n")
- }
- // DiffChunks uses an O(D(N+M)) shortest-edit-script algorithm
- // to compute the edits required from A to B and returns the
- // edit chunks.
- func DiffChunks(a, b []string) []Chunk {
- // algorithm: http://www.xmailserver.org/diff2.pdf
- // We'll need these quantities a lot.
- alen, blen := len(a), len(b) // M, N
- // At most, it will require len(a) deletions and len(b) additions
- // to transform a into b.
- maxPath := alen + blen // MAX
- if maxPath == 0 {
- // degenerate case: two empty lists are the same
- return nil
- }
- // Store the endpoint of the path for diagonals.
- // We store only the a index, because the b index on any diagonal
- // (which we know during the loop below) is aidx-diag.
- // endpoint[maxPath] represents the 0 diagonal.
- //
- // Stated differently:
- // endpoint[d] contains the aidx of a furthest reaching path in diagonal d
- endpoint := make([]int, 2*maxPath+1) // V
- saved := make([][]int, 0, 8) // Vs
- save := func() {
- dup := make([]int, len(endpoint))
- copy(dup, endpoint)
- saved = append(saved, dup)
- }
- var editDistance int // D
- dLoop:
- for editDistance = 0; editDistance <= maxPath; editDistance++ {
- // The 0 diag(onal) represents equality of a and b. Each diagonal to
- // the left is numbered one lower, to the right is one higher, from
- // -alen to +blen. Negative diagonals favor differences from a,
- // positive diagonals favor differences from b. The edit distance to a
- // diagonal d cannot be shorter than d itself.
- //
- // The iterations of this loop cover either odds or evens, but not both,
- // If odd indices are inputs, even indices are outputs and vice versa.
- for diag := -editDistance; diag <= editDistance; diag += 2 { // k
- var aidx int // x
- switch {
- case diag == -editDistance:
- // This is a new diagonal; copy from previous iter
- aidx = endpoint[maxPath-editDistance+1] + 0
- case diag == editDistance:
- // This is a new diagonal; copy from previous iter
- aidx = endpoint[maxPath+editDistance-1] + 1
- case endpoint[maxPath+diag+1] > endpoint[maxPath+diag-1]:
- // diagonal d+1 was farther along, so use that
- aidx = endpoint[maxPath+diag+1] + 0
- default:
- // diagonal d-1 was farther (or the same), so use that
- aidx = endpoint[maxPath+diag-1] + 1
- }
- // On diagonal d, we can compute bidx from aidx.
- bidx := aidx - diag // y
- // See how far we can go on this diagonal before we find a difference.
- for aidx < alen && bidx < blen && a[aidx] == b[bidx] {
- aidx++
- bidx++
- }
- // Store the end of the current edit chain.
- endpoint[maxPath+diag] = aidx
- // If we've found the end of both inputs, we're done!
- if aidx >= alen && bidx >= blen {
- save() // save the final path
- break dLoop
- }
- }
- save() // save the current path
- }
- if editDistance == 0 {
- return nil
- }
- chunks := make([]Chunk, editDistance+1)
- x, y := alen, blen
- for d := editDistance; d > 0; d-- {
- endpoint := saved[d]
- diag := x - y
- insert := diag == -d || (diag != d && endpoint[maxPath+diag-1] < endpoint[maxPath+diag+1])
- x1 := endpoint[maxPath+diag]
- var x0, xM, kk int
- if insert {
- kk = diag + 1
- x0 = endpoint[maxPath+kk]
- xM = x0
- } else {
- kk = diag - 1
- x0 = endpoint[maxPath+kk]
- xM = x0 + 1
- }
- y0 := x0 - kk
- var c Chunk
- if insert {
- c.Added = b[y0:][:1]
- } else {
- c.Deleted = a[x0:][:1]
- }
- if xM < x1 {
- c.Equal = a[xM:][:x1-xM]
- }
- x, y = x0, y0
- chunks[d] = c
- }
- if x > 0 {
- chunks[0].Equal = a[:x]
- }
- if chunks[0].empty() {
- chunks = chunks[1:]
- }
- if len(chunks) == 0 {
- return nil
- }
- return chunks
- }
|