| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 | 
							- #!/usr/bin/env bash
 
- set -Eeuo pipefail
 
- self="$(basename "$0")"
 
- usage() {
 
- 	cat <<-EOU
 
- 		usage: $self path/to/markdown.md
 
- 		   eg: $self README.md
 
- 		WARNING: this will *always* clobber any path/to/markdown.md.{toc,bak} while processing; use with caution!
 
- 	EOU
 
- }
 
- markdown="${1:-}"
 
- if ! shift || [ ! -s "$markdown" ]; then usage >&2; exit 1; fi
 
- # see https://gist.github.com/tianon/75e267d9137b1c2978031b66b3a98987 for an insane test case for this (with several rough edges)
 
- jq --raw-input --null-input --raw-output '
 
- 	reduce inputs as $line ({ toc: "" };
 
- 		if $line | test("^```") then
 
- 			.ignore |= not
 
- 		else . end
 
- 		| if .ignore then . else
 
- 			(
 
- 				$line
 
- 				| capture("^(?<hash>#+)[[:space:]]*(?<heading>.*?)[[:space:]]*$")
 
- 				// null
 
- 			) as $cap
 
- 			| if $cap then
 
- 				($cap.hash | length) as $level
 
- 				| .levels[$level] += 1
 
- 				| .levels |= (.[range($level+1; length)] = 0)
 
- 				| (
 
- 					$cap.heading
 
- 					| ascii_downcase
 
- 					# https://github.com/thlorenz/anchor-markdown-header/blob/6b9bc1c902e48942666859fb6f795d91cbfd48e7/anchor-markdown-header.js#L33-L48
 
- 					| gsub(" "; "-")
 
- 					# escape codes (commented out because this is not something GitHub strips, although it *does* strip % which is not included below, so that is added here)
 
- 					#| gsub("%[abcdef0-9]{2}"; ""; "i")
 
- 					| gsub("%"; "")
 
- 					# single chars that are removed
 
- 					| gsub("[\\\\/?!:\\[\\]`.,()*\"'"'"';{}+=<>~$|#@&–—]"; "")
 
- 					# CJK punctuations that are removed
 
- 					| gsub("[。?!,、;:“”【】()〔〕[]﹃﹄“ ”‘’﹁﹂—…-~《》〈〉「」]"; "")
 
- 					# Strip emojis (*technically* this is way too aggressive and will strip out *all* UTF-8, but 🤷)
 
- 					| (split("") | map(select(utf8bytelength == 1)) | join(""))
 
- 					# TODO Strip embedded markdown formatting
 
- 				) as $anchor
 
- 				# handle repetition (same end anchor)
 
- 				| (
 
- 					(.seen // []) as $seen
 
- 					| first(
 
- 						# this 1000 limits how many repeated headings we can have, but 1000 of the exact same header text seems pretty generous 🙊
 
- 						$anchor + (range(1000) | if . > 0 then "-\(.)" else "" end)
 
- 						| select(IN($seen[]) | not)
 
- 					)
 
- 					// error("repetition level too deep on #\($anchor) (\($line)) at line \(input_line_number)")
 
- 				) as $finalAnchor
 
- 				| .toc += "\("\t" * ($level-1) // "")\(.levels[$level]).\t[\($cap.heading)](#\($finalAnchor))\n"
 
- 				| .seen += [ $finalAnchor ]
 
- 			else . end
 
- 		end
 
- 	)
 
- 	| .toc
 
- ' "$markdown" > "$markdown.toc"
 
- gawk -v tocFile="$markdown.toc" '
 
- 	/^<!-- AUTOGENERATED TOC -->$/ {
 
- 		inToc = !inToc
 
- 		seenToc = 1
 
- 		if (inToc) {
 
- 			print
 
- 			print ""
 
- 			system("cat " tocFile)
 
- 			# no need for another newline because tocFile should already end with one
 
- 			print
 
- 		}
 
- 		next
 
- 	}
 
- 	!inToc { print }
 
- ' "$markdown" > "$markdown.bak"
 
- mv -f "$markdown.bak" "$markdown"
 
- rm -f "$markdown.toc"
 
 
  |