folding.go 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. // Copyright (C) 2017 The Syncthing Authors.
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this file,
  5. // You can obtain one at https://mozilla.org/MPL/2.0/.
  6. package fs
  7. import (
  8. "strings"
  9. "unicode"
  10. "unicode/utf8"
  11. "golang.org/x/text/unicode/norm"
  12. )
  13. // UnicodeLowercaseNormalized returns the Unicode lower case variant of s,
  14. // having also normalized it to normalization form C.
  15. func UnicodeLowercaseNormalized(s string) string {
  16. if isASCII, isLower := isASCII(s); isASCII {
  17. if isLower {
  18. return s
  19. }
  20. return toLowerASCII(s)
  21. }
  22. return toLowerUnicode(s)
  23. }
  24. func isASCII(s string) (bool, bool) {
  25. isLower := true
  26. for _, b := range []byte(s) {
  27. if b > unicode.MaxASCII {
  28. return false, isLower
  29. }
  30. if 'A' <= b && b <= 'Z' {
  31. isLower = false
  32. }
  33. }
  34. return true, isLower
  35. }
  36. func toLowerASCII(s string) string {
  37. var (
  38. b strings.Builder
  39. pos int
  40. )
  41. b.Grow(len(s))
  42. for i, c := range []byte(s) {
  43. if c < 'A' || 'Z' < c {
  44. continue
  45. }
  46. if pos < i {
  47. b.WriteString(s[pos:i])
  48. }
  49. pos = i + 1
  50. b.WriteByte(c + 'a' - 'A')
  51. }
  52. if pos != len(s) {
  53. b.WriteString(s[pos:])
  54. }
  55. return b.String()
  56. }
  57. func toLowerUnicode(s string) string {
  58. i := firstCaseChange(s)
  59. if i == -1 {
  60. return norm.NFC.String(s)
  61. }
  62. var rs strings.Builder
  63. // WriteRune always reserves utf8.UTFMax bytes for non-ASCII runes,
  64. // even if it doesn't need all that space. Overallocate now to prevent
  65. // it from ever triggering a reallocation.
  66. rs.Grow(utf8.UTFMax - 1 + len(s))
  67. rs.WriteString(s[:i])
  68. for _, r := range s[i:] {
  69. if r <= unicode.MaxLatin1 && r != 'µ' {
  70. rs.WriteRune(unicode.ToLower(r))
  71. } else {
  72. rs.WriteRune(unicode.To(unicode.LowerCase, unicode.To(unicode.UpperCase, r)))
  73. }
  74. }
  75. return norm.NFC.String(rs.String())
  76. }
  77. // Byte index of the first rune r s.t. lower(upper(r)) != r.
  78. func firstCaseChange(s string) int {
  79. for i, r := range s {
  80. if r <= unicode.MaxASCII {
  81. if r < 'A' || r > 'Z' {
  82. continue
  83. }
  84. return i
  85. }
  86. if unicode.To(unicode.LowerCase, unicode.To(unicode.UpperCase, r)) != r {
  87. return i
  88. }
  89. }
  90. return -1
  91. }