diff-pr.sh 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. #!/usr/bin/env bash
  2. set -Eeuo pipefail
  3. shopt -s dotglob
  4. # make sure we can GTFO
  5. trap 'echo >&2 Ctrl+C captured, exiting; exit 1' SIGINT
  6. # if bashbrew is missing, bail early with a sane error
  7. bashbrew --version > /dev/null
  8. usage() {
  9. cat <<-EOUSAGE
  10. usage: $0 [PR number] [repo[:tag]]
  11. ie: $0 1024
  12. $0 9001 debian php django
  13. EOUSAGE
  14. }
  15. # TODO flags parsing
  16. allFiles=
  17. listTarballContents=1
  18. findCopies='20%'
  19. uninterestingTarballContent=(
  20. # "config_diff_2017_01_07.log"
  21. 'var/log/YaST2/'
  22. # "ks-script-mqmz_080.log"
  23. # "ks-script-ycfq606i.log"
  24. 'var/log/anaconda/'
  25. # "2016-12-20/"
  26. 'var/lib/yum/history/'
  27. 'var/lib/dnf/history/'
  28. # "a/f8c032d2be757e1a70f00336b55c434219fee230-acl-2.2.51-12.el7-x86_64/var_uuid"
  29. 'var/lib/yum/yumdb/'
  30. 'var/lib/dnf/yumdb/'
  31. # "b42ff584.0"
  32. 'etc/pki/tls/rootcerts/'
  33. # "09/401f736622f2c9258d14388ebd47900bbab126"
  34. 'usr/lib/.build-id/'
  35. )
  36. # prints "$2$1$3$1...$N"
  37. join() {
  38. local sep="$1"; shift
  39. local out; printf -v out "${sep//%/%%}%s" "$@"
  40. echo "${out#$sep}"
  41. }
  42. uninterestingTarballGrep="^([.]?/)?($(join '|' "${uninterestingTarballContent[@]}"))"
  43. if [ "$#" -eq 0 ]; then
  44. usage >&2
  45. exit 1
  46. fi
  47. pull="$1" # PR number
  48. shift
  49. diffDir="$(readlink -f "$BASH_SOURCE")"
  50. diffDir="$(dirname "$diffDir")"
  51. tempDir="$(mktemp -d)"
  52. trap "rm -rf '$tempDir'" EXIT
  53. cd "$tempDir"
  54. git clone --quiet \
  55. https://github.com/docker-library/official-images.git \
  56. oi
  57. if [ "$pull" != '0' ]; then
  58. git -C oi fetch --quiet \
  59. origin "pull/$pull/merge":refs/heads/pull
  60. else
  61. git -C oi fetch --quiet --update-shallow \
  62. "$diffDir" HEAD:refs/heads/pull
  63. fi
  64. externalPins=
  65. if [ "$#" -eq 0 ]; then
  66. externalPins="$(git -C oi/.external-pins diff --no-renames --name-only HEAD...pull -- '*/**')"
  67. images="$(git -C oi/library diff --no-renames --name-only HEAD...pull -- .)"
  68. if [ -z "$images" ] && [ -z "$externalPins" ]; then
  69. exit 0
  70. fi
  71. images="$(xargs -rn1 basename <<<"$images")"
  72. set -- $images
  73. fi
  74. export BASHBREW_LIBRARY="$PWD/oi/library"
  75. : "${BASHBREW_ARCH:=amd64}" # TODO something smarter with arches
  76. export BASHBREW_ARCH
  77. # TODO something less hacky than "git archive" hackery, like a "bashbrew archive" or "bashbrew context" or something
  78. template='
  79. tempDir="$(mktemp -d)"
  80. {{- "\n" -}}
  81. {{- range $.Entries -}}
  82. {{- $arch := .HasArchitecture arch | ternary arch (.Architectures | first) -}}
  83. {{- /* cannot replace ArchDockerFroms with bashbrew fetch or the arch selector logic has to be duplicated 🥹*/ -}}
  84. {{- $froms := $.ArchDockerFroms $arch . -}}
  85. {{- $outDir := join "_" $.RepoName (.Tags | last) -}}
  86. git -C "{{ gitCache }}" archive --format=tar
  87. {{- " " -}}
  88. {{- "--prefix=" -}}
  89. {{- $outDir -}}
  90. {{- "/" -}}
  91. {{- " " -}}
  92. {{- .ArchGitCommit $arch -}}
  93. {{- ":" -}}
  94. {{- $dir := .ArchDirectory $arch -}}
  95. {{- (eq $dir ".") | ternary "" $dir -}}
  96. {{- "\n" -}}
  97. mkdir -p "$tempDir/{{- $outDir -}}" && echo "{{- .ArchBuilder $arch -}}" > "$tempDir/{{- $outDir -}}/.bashbrew-builder" && echo "{{- .ArchFile $arch -}}" > "$tempDir/{{- $outDir -}}/.bashbrew-file"
  98. {{- "\n" -}}
  99. {{- end -}}
  100. tar -cC "$tempDir" . && rm -rf "$tempDir"
  101. '
  102. _tar-t() {
  103. tar -t "$@" \
  104. | grep -vE "$uninterestingTarballGrep" \
  105. | sed -e 's!^[.]/!!' \
  106. -r \
  107. -e 's!([/.-]|^)((lib)?(c?python|py)-?)[0-9]+([.][0-9]+)?([/.-]|$)!\1\2XXX\6!g' \
  108. | sort
  109. }
  110. _jq() {
  111. if [ "$#" -eq 0 ]; then
  112. set -- '.'
  113. fi
  114. jq --tab -S "$@"
  115. }
  116. copy-tar() {
  117. local src="$1"; shift
  118. local dst="$1"; shift
  119. if [ -n "$allFiles" ]; then
  120. mkdir -p "$dst"
  121. cp -al "$src"/*/ "$dst/"
  122. return
  123. fi
  124. local d indexes=() dockerfiles=()
  125. for d in "$src"/*/.bashbrew-file; do
  126. [ -f "$d" ] || continue
  127. local bf; bf="$(< "$d")"
  128. local dDir; dDir="$(dirname "$d")"
  129. local builder; builder="$(< "$dDir/.bashbrew-builder")"
  130. if [ "$builder" = 'oci-import' ]; then
  131. indexes+=( "$dDir/$bf" )
  132. else
  133. dockerfiles+=( "$dDir/$bf" )
  134. if [ "$bf" = 'Dockerfile' ]; then
  135. # if "Dockerfile.builder" exists, let's check that too (busybox, hello-world)
  136. if [ -f "$dDir/$bf.builder" ]; then
  137. dockerfiles+=( "$dDir/$bf.builder" )
  138. fi
  139. fi
  140. fi
  141. rm "$d" "$dDir/.bashbrew-builder" # remove the ".bashbrew-*" files we created
  142. done
  143. # now that we're done with our globbing needs, let's disable globbing so it doesn't give us wrong answers
  144. local -
  145. set -o noglob
  146. for i in "${indexes[@]}"; do
  147. local iName; iName="$(basename "$i")"
  148. local iDir; iDir="$(dirname "$i")"
  149. local iDirName; iDirName="$(basename "$iDir")"
  150. local iDst="$dst/$iDirName"
  151. mkdir -p "$iDst"
  152. _jq . "$i" > "$iDst/$iName"
  153. local digest
  154. digest="$(jq -r --arg name "$iName" '
  155. if $name == "index.json" then
  156. .manifests[0].digest
  157. else
  158. .digest
  159. end
  160. ' "$i")"
  161. local blob="blobs/${digest//://}"
  162. local blobDir; blobDir="$(dirname "$blob")"
  163. local manifest="$iDir/$blob"
  164. mkdir -p "$iDst/$blobDir"
  165. _jq . "$manifest" > "$iDst/$blob"
  166. local configDigest; configDigest="$(jq -r '.config.digest' "$manifest")"
  167. local blob="blobs/${configDigest//://}"
  168. local blobDir; blobDir="$(dirname "$blob")"
  169. local config="$iDir/$blob"
  170. mkdir -p "$iDst/$blobDir"
  171. _jq . "$config" > "$iDst/$blob"
  172. local layers
  173. layers="$(jq -r '[ .layers[].digest | @sh ] | join(" ")' "$manifest")"
  174. eval "layers=( $layers )"
  175. local layerDigest
  176. for layerDigest in "${layers[@]}"; do
  177. local blob="blobs/${layerDigest//://}"
  178. local blobDir; blobDir="$(dirname "$blob")"
  179. local layer="$iDir/$blob"
  180. mkdir -p "$iDst/$blobDir"
  181. _tar-t -f "$layer" > "$iDst/$blob 'tar -t'"
  182. done
  183. done
  184. for d in "${dockerfiles[@]}"; do
  185. local dDir; dDir="$(dirname "$d")"
  186. local dDirName; dDirName="$(basename "$dDir")"
  187. # TODO choke on "syntax" parser directive
  188. # TODO handle "escape" parser directive reasonably
  189. local flatDockerfile; flatDockerfile="$(
  190. gawk '
  191. BEGIN { line = "" }
  192. /^[[:space:]]*#/ {
  193. gsub(/^[[:space:]]+/, "")
  194. print
  195. next
  196. }
  197. {
  198. if (match($0, /^(.*)(\\[[:space:]]*)$/, m)) {
  199. line = line m[1]
  200. next
  201. }
  202. print line $0
  203. line = ""
  204. }
  205. ' "$d"
  206. )"
  207. local IFS=$'\n'
  208. local copyAddContext; copyAddContext="$(awk '
  209. toupper($1) == "COPY" || toupper($1) == "ADD" {
  210. for (i = 2; i < NF; i++) {
  211. if ($i ~ /^--from=/) {
  212. next
  213. }
  214. # COPY and ADD options
  215. if ($i ~ /^--(chown|chmod|link|parents|exclude)=/) {
  216. continue
  217. }
  218. # additional ADD options
  219. if ($i ~ /^--(keep-git-dir|checksum)=/) {
  220. continue
  221. }
  222. for ( ; i < NF; i++) {
  223. print $i
  224. }
  225. }
  226. }
  227. ' <<<"$flatDockerfile")"
  228. local dBase; dBase="$(basename "$d")"
  229. local files=(
  230. "$dBase"
  231. $copyAddContext
  232. # some extra files which are likely interesting if they exist, but no big loss if they do not
  233. ' .dockerignore' # will be used automatically by "docker build"
  234. ' *.manifest' # debian/ubuntu "package versions" list
  235. ' *.ks' # fedora "kickstart" (rootfs build script)
  236. ' build*.txt' # ubuntu "build-info.txt", debian "build-command.txt"
  237. # usefulness yet to be proven:
  238. #' *.log'
  239. #' {MD5,SHA1,SHA256}SUMS'
  240. #' *.{md5,sha1,sha256}'
  241. # (the space prefix is removed below and is used to ignore non-matching globs so that bad "Dockerfile" entries appropriately lead to failure)
  242. )
  243. unset IFS
  244. mkdir -p "$dst/$dDirName"
  245. local f origF failureMatters
  246. for origF in "${files[@]}"; do
  247. f="${origF# }" # trim off leading space (indicates we don't care about failure)
  248. [ "$f" = "$origF" ] && failureMatters=1 || failureMatters=
  249. local globbed
  250. # "find: warning: -path ./xxx/ will not match anything because it ends with /."
  251. local findGlobbedPath="${f%/}"
  252. findGlobbedPath="${findGlobbedPath#./}"
  253. local globbedStr; globbedStr="$(cd "$dDir" && find -path "./$findGlobbedPath")"
  254. local -a globbed=( $globbedStr )
  255. if [ "${#globbed[@]}" -eq 0 ]; then
  256. globbed=( "$f" )
  257. fi
  258. local g
  259. for g in "${globbed[@]}"; do
  260. local srcG="$dDir/$g" dstG="$dst/$dDirName/$g"
  261. if [ -z "$failureMatters" ] && [ ! -e "$srcG" ]; then
  262. continue
  263. fi
  264. local gDir; gDir="$(dirname "$dstG")"
  265. mkdir -p "$gDir"
  266. cp -alT "$srcG" "$dstG"
  267. if [ -n "$listTarballContents" ]; then
  268. case "$g" in
  269. *.tar.* | *.tgz)
  270. if [ -s "$dstG" ]; then
  271. _tar-t -f "$dstG" > "$dstG 'tar -t'"
  272. fi
  273. ;;
  274. esac
  275. fi
  276. done
  277. done
  278. done
  279. }
  280. # a "bashbrew cat" template that gives us the last / "least specific" tags for the arguments
  281. # (in other words, this is "bashbrew list --uniq" but last instead of first)
  282. templateLastTags='
  283. {{- range .TagEntries -}}
  284. {{- $.RepoName -}}
  285. {{- ":" -}}
  286. {{- .Tags | last -}}
  287. {{- "\n" -}}
  288. {{- end -}}
  289. '
  290. _metadata-files() {
  291. if [ "$#" -gt 0 ]; then
  292. bashbrew list "$@" 2>>temp/_bashbrew.err | sort -uV > temp/_bashbrew-list || :
  293. bashbrew cat --format '{{ range .Entries }}{{ range .Architectures }}{{ . }}{{ "\n" }}{{ end }}{{ end }}' "$@" 2>>temp/_bashbrew.err | sort -u > temp/_bashbrew-arches || :
  294. "$diffDir/_bashbrew-cat-sorted.sh" "$@" 2>>temp/_bashbrew.err > temp/_bashbrew-cat || :
  295. # piping "bashbrew list" first so that .TagEntries is filled up (keeping "templateLastTags" simpler)
  296. # sorting that by version number so it's ~stable
  297. # then doing --build-order on that, which is a "stable sort"
  298. # then redoing that list back into "templateLastTags" so we get the tags we want listed (not the tags "--uniq" chooses)
  299. bashbrew list --uniq "$@" \
  300. | xargs -r bashbrew cat --format "$templateLastTags" \
  301. | sort -V \
  302. | xargs -r bashbrew list --uniq --build-order 2>>temp/_bashbrew.err \
  303. | xargs -r bashbrew cat --format "$templateLastTags" 2>>temp/_bashbrew.err \
  304. > temp/_bashbrew-list-build-order || :
  305. # oci images can't be fetched with ArchDockerFroms
  306. # todo: use each first arch instead of current arch
  307. bashbrew fetch --arch-filter "$@"
  308. script="$(bashbrew cat --format "$template" "$@")"
  309. mkdir tar
  310. ( eval "$script" | tar -xiC tar )
  311. copy-tar tar temp
  312. rm -rf tar
  313. # TODO we should *also* validate that our lists ended up non-empty 😬
  314. cat >&2 temp/_bashbrew.err
  315. fi
  316. if [ -n "$externalPins" ] && command -v crane &> /dev/null; then
  317. local file
  318. for file in $externalPins; do
  319. [ -e "oi/$file" ] || continue
  320. local pin digest dir
  321. pin="$("$diffDir/.external-pins/tag.sh" "$file")"
  322. digest="$(< "oi/$file")"
  323. dir="temp/$file"
  324. mkdir -p "$dir"
  325. bashbrew remote arches --json "$pin@$digest" | _jq > "$dir/bashbrew.json"
  326. local manifests manifest
  327. manifests="$(jq -r '
  328. [ (
  329. .arches
  330. | if has(env.BASHBREW_ARCH) then
  331. .[env.BASHBREW_ARCH]
  332. else
  333. .[keys_unsorted | first]
  334. end
  335. )[].digest | @sh ]
  336. | join(" ")
  337. ' "$dir/bashbrew.json")"
  338. eval "manifests=( $manifests )"
  339. for manifest in "${manifests[@]}"; do
  340. crane manifest "$pin@$manifest" | _jq > "$dir/manifest-${manifest//:/_}.json"
  341. local config
  342. config="$(jq -r '.config.digest' "$dir/manifest-${manifest//:/_}.json")"
  343. crane blob "$pin@$config" | _jq > "$dir/manifest-${manifest//:/_}-config.json"
  344. done
  345. done
  346. fi
  347. }
  348. mkdir temp
  349. git -C temp init --quiet
  350. git -C temp config user.name 'Bogus'
  351. git -C temp config user.email 'bogus@bogus'
  352. # handle "new-image" PRs gracefully
  353. for img; do touch "$BASHBREW_LIBRARY/$img"; [ -s "$BASHBREW_LIBRARY/$img" ] || echo 'Maintainers: New Image! :D (@docker-library-bot)' > "$BASHBREW_LIBRARY/$img"; done
  354. _metadata-files "$@"
  355. git -C temp add . || :
  356. git -C temp commit --quiet --allow-empty -m 'initial' || :
  357. git -C oi clean --quiet --force
  358. git -C oi checkout --quiet pull
  359. # handle "deleted-image" PRs gracefully :(
  360. for img; do touch "$BASHBREW_LIBRARY/$img"; [ -s "$BASHBREW_LIBRARY/$img" ] || echo 'Maintainers: Deleted Image D: (@docker-library-bot)' > "$BASHBREW_LIBRARY/$img"; done
  361. git -C temp rm --quiet -rf . || :
  362. _metadata-files "$@"
  363. git -C temp add .
  364. git -C temp diff \
  365. --find-copies-harder \
  366. --find-copies="$findCopies" \
  367. --find-renames="$findCopies" \
  368. --ignore-blank-lines \
  369. --ignore-space-at-eol \
  370. --ignore-space-change \
  371. --irreversible-delete \
  372. --minimal \
  373. --staged