strutil.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645
  1. // Copyright (c) 2014 The sortutil Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package strutil collects utils supplemental to the standard strings package.
  5. package strutil
  6. import (
  7. "bytes"
  8. "encoding/base32"
  9. "encoding/base64"
  10. "fmt"
  11. "io"
  12. "reflect"
  13. "sort"
  14. "strconv"
  15. "strings"
  16. "sync"
  17. )
  18. // Base32ExtDecode decodes base32 extended (RFC 4648) text to binary data.
  19. func Base32ExtDecode(text []byte) (data []byte, err error) {
  20. n := base32.HexEncoding.DecodedLen(len(text))
  21. data = make([]byte, n)
  22. decoder := base32.NewDecoder(base32.HexEncoding, bytes.NewBuffer(text))
  23. if n, err = decoder.Read(data); err != nil {
  24. n = 0
  25. }
  26. data = data[:n]
  27. return
  28. }
  29. // Base32ExtEncode encodes binary data to base32 extended (RFC 4648) encoded text.
  30. func Base32ExtEncode(data []byte) (text []byte) {
  31. n := base32.HexEncoding.EncodedLen(len(data))
  32. buf := bytes.NewBuffer(make([]byte, 0, n))
  33. encoder := base32.NewEncoder(base32.HexEncoding, buf)
  34. encoder.Write(data)
  35. encoder.Close()
  36. if buf.Len() != n {
  37. panic("internal error")
  38. }
  39. return buf.Bytes()
  40. }
  41. // Base64Decode decodes base64 text to binary data.
  42. func Base64Decode(text []byte) (data []byte, err error) {
  43. n := base64.StdEncoding.DecodedLen(len(text))
  44. data = make([]byte, n)
  45. decoder := base64.NewDecoder(base64.StdEncoding, bytes.NewBuffer(text))
  46. if n, err = decoder.Read(data); err != nil {
  47. n = 0
  48. }
  49. data = data[:n]
  50. return
  51. }
  52. // Base64Encode encodes binary data to base64 encoded text.
  53. func Base64Encode(data []byte) (text []byte) {
  54. n := base64.StdEncoding.EncodedLen(len(data))
  55. buf := bytes.NewBuffer(make([]byte, 0, n))
  56. encoder := base64.NewEncoder(base64.StdEncoding, buf)
  57. encoder.Write(data)
  58. encoder.Close()
  59. if buf.Len() != n {
  60. panic("internal error")
  61. }
  62. return buf.Bytes()
  63. }
  64. // Formatter is an io.Writer extended by a fmt.Printf like function Format
  65. type Formatter interface {
  66. io.Writer
  67. Format(format string, args ...interface{}) (n int, errno error)
  68. }
  69. type indentFormatter struct {
  70. io.Writer
  71. indent []byte
  72. indentLevel int
  73. state int
  74. }
  75. const (
  76. st0 = iota
  77. stBOL
  78. stPERC
  79. stBOLPERC
  80. )
  81. // IndentFormatter returns a new Formatter which interprets %i and %u in the
  82. // Format() format string as indent and undent commands. The commands can
  83. // nest. The Formatter writes to io.Writer 'w' and inserts one 'indent'
  84. // string per current indent level value.
  85. // Behaviour of commands reaching negative indent levels is undefined.
  86. // IndentFormatter(os.Stdout, "\t").Format("abc%d%%e%i\nx\ny\n%uz\n", 3)
  87. // output:
  88. // abc3%e
  89. // x
  90. // y
  91. // z
  92. // The Go quoted string literal form of the above is:
  93. // "abc%%e\n\tx\n\tx\nz\n"
  94. // The commands can be scattered between separate invocations of Format(),
  95. // i.e. the formatter keeps track of the indent level and knows if it is
  96. // positioned on start of a line and should emit indentation(s).
  97. // The same output as above can be produced by e.g.:
  98. // f := IndentFormatter(os.Stdout, " ")
  99. // f.Format("abc%d%%e%i\nx\n", 3)
  100. // f.Format("y\n%uz\n")
  101. func IndentFormatter(w io.Writer, indent string) Formatter {
  102. return &indentFormatter{w, []byte(indent), 0, stBOL}
  103. }
  104. func (f *indentFormatter) format(flat bool, format string, args ...interface{}) (n int, errno error) {
  105. buf := []byte{}
  106. for i := 0; i < len(format); i++ {
  107. c := format[i]
  108. switch f.state {
  109. case st0:
  110. switch c {
  111. case '\n':
  112. cc := c
  113. if flat && f.indentLevel != 0 {
  114. cc = ' '
  115. }
  116. buf = append(buf, cc)
  117. f.state = stBOL
  118. case '%':
  119. f.state = stPERC
  120. default:
  121. buf = append(buf, c)
  122. }
  123. case stBOL:
  124. switch c {
  125. case '\n':
  126. cc := c
  127. if flat && f.indentLevel != 0 {
  128. cc = ' '
  129. }
  130. buf = append(buf, cc)
  131. case '%':
  132. f.state = stBOLPERC
  133. default:
  134. if !flat {
  135. for i := 0; i < f.indentLevel; i++ {
  136. buf = append(buf, f.indent...)
  137. }
  138. }
  139. buf = append(buf, c)
  140. f.state = st0
  141. }
  142. case stBOLPERC:
  143. switch c {
  144. case 'i':
  145. f.indentLevel++
  146. f.state = stBOL
  147. case 'u':
  148. f.indentLevel--
  149. f.state = stBOL
  150. default:
  151. if !flat {
  152. for i := 0; i < f.indentLevel; i++ {
  153. buf = append(buf, f.indent...)
  154. }
  155. }
  156. buf = append(buf, '%', c)
  157. f.state = st0
  158. }
  159. case stPERC:
  160. switch c {
  161. case 'i':
  162. f.indentLevel++
  163. f.state = st0
  164. case 'u':
  165. f.indentLevel--
  166. f.state = st0
  167. default:
  168. buf = append(buf, '%', c)
  169. f.state = st0
  170. }
  171. default:
  172. panic("unexpected state")
  173. }
  174. }
  175. switch f.state {
  176. case stPERC, stBOLPERC:
  177. buf = append(buf, '%')
  178. }
  179. return f.Write([]byte(fmt.Sprintf(string(buf), args...)))
  180. }
  181. func (f *indentFormatter) Format(format string, args ...interface{}) (n int, errno error) {
  182. return f.format(false, format, args...)
  183. }
  184. type flatFormatter indentFormatter
  185. // FlatFormatter returns a newly created Formatter with the same functionality as the one returned
  186. // by IndentFormatter except it allows a newline in the 'format' string argument of Format
  187. // to pass through iff indent level is currently zero.
  188. //
  189. // If indent level is non-zero then such new lines are changed to a space character.
  190. // There is no indent string, the %i and %u format verbs are used solely to determine the indent level.
  191. //
  192. // The FlatFormatter is intended for flattening of normally nested structure textual representation to
  193. // a one top level structure per line form.
  194. // FlatFormatter(os.Stdout, " ").Format("abc%d%%e%i\nx\ny\n%uz\n", 3)
  195. // output in the form of a Go quoted string literal:
  196. // "abc3%%e x y z\n"
  197. func FlatFormatter(w io.Writer) Formatter {
  198. return (*flatFormatter)(IndentFormatter(w, "").(*indentFormatter))
  199. }
  200. func (f *flatFormatter) Format(format string, args ...interface{}) (n int, errno error) {
  201. return (*indentFormatter)(f).format(true, format, args...)
  202. }
  203. // Pool handles aligning of strings having equal values to the same string instance.
  204. // Intended use is to conserve some memory e.g. where a large number of identically valued strings
  205. // with non identical backing arrays may exists in several semantically distinct instances of some structs.
  206. // Pool is *not* concurrent access safe. It doesn't handle common prefix/suffix aligning,
  207. // e.g. having s1 == "abc" and s2 == "bc", s2 is not automatically aligned as s1[1:].
  208. type Pool struct {
  209. pool map[string]string
  210. }
  211. // NewPool returns a newly created Pool.
  212. func NewPool() *Pool {
  213. return &Pool{map[string]string{}}
  214. }
  215. // Align returns a string with the same value as its argument. It guarantees that
  216. // all aligned strings share a single instance in memory.
  217. func (p *Pool) Align(s string) string {
  218. if a, ok := p.pool[s]; ok {
  219. return a
  220. }
  221. s = StrPack(s)
  222. p.pool[s] = s
  223. return s
  224. }
  225. // Count returns the number of items in the pool.
  226. func (p *Pool) Count() int {
  227. return len(p.pool)
  228. }
  229. // GoPool is a concurrent access safe version of Pool.
  230. type GoPool struct {
  231. pool map[string]string
  232. rwm *sync.RWMutex
  233. }
  234. // NewGoPool returns a newly created GoPool.
  235. func NewGoPool() (p *GoPool) {
  236. return &GoPool{map[string]string{}, &sync.RWMutex{}}
  237. }
  238. // Align returns a string with the same value as its argument. It guarantees that
  239. // all aligned strings share a single instance in memory.
  240. func (p *GoPool) Align(s string) (y string) {
  241. if s != "" {
  242. p.rwm.RLock() // R++
  243. if a, ok := p.pool[s]; ok { // found
  244. p.rwm.RUnlock() // R--
  245. return a
  246. }
  247. p.rwm.RUnlock() // R--
  248. // not found but with a race condition, retry within a write lock
  249. p.rwm.Lock() // W++
  250. defer p.rwm.Unlock() // W--
  251. if a, ok := p.pool[s]; ok { // done in a race
  252. return a
  253. }
  254. // we won
  255. s = StrPack(s)
  256. p.pool[s] = s
  257. return s
  258. }
  259. return
  260. }
  261. // Count returns the number of items in the pool.
  262. func (p *GoPool) Count() int {
  263. return len(p.pool)
  264. }
  265. // Dict is a string <-> id bijection. Dict is *not* concurrent access safe for assigning new ids
  266. // to strings not yet contained in the bijection.
  267. // Id for an empty string is guaranteed to be 0,
  268. // thus Id for any non empty string is guaranteed to be non zero.
  269. type Dict struct {
  270. si map[string]int
  271. is []string
  272. }
  273. // NewDict returns a newly created Dict.
  274. func NewDict() (d *Dict) {
  275. d = &Dict{map[string]int{}, []string{}}
  276. d.Id("")
  277. return
  278. }
  279. // Count returns the number of items in the dict.
  280. func (d *Dict) Count() int {
  281. return len(d.is)
  282. }
  283. // Id maps string s to its numeric identificator.
  284. func (d *Dict) Id(s string) (y int) {
  285. if y, ok := d.si[s]; ok {
  286. return y
  287. }
  288. s = StrPack(s)
  289. y = len(d.is)
  290. d.si[s] = y
  291. d.is = append(d.is, s)
  292. return
  293. }
  294. // S maps an id to its string value and ok == true. Id values not contained in the bijection
  295. // return "", false.
  296. func (d *Dict) S(id int) (s string, ok bool) {
  297. if id >= len(d.is) {
  298. return "", false
  299. }
  300. return d.is[id], true
  301. }
  302. // GoDict is a concurrent access safe version of Dict.
  303. type GoDict struct {
  304. si map[string]int
  305. is []string
  306. rwm *sync.RWMutex
  307. }
  308. // NewGoDict returns a newly created GoDict.
  309. func NewGoDict() (d *GoDict) {
  310. d = &GoDict{map[string]int{}, []string{}, &sync.RWMutex{}}
  311. d.Id("")
  312. return
  313. }
  314. // Count returns the number of items in the dict.
  315. func (d *GoDict) Count() int {
  316. return len(d.is)
  317. }
  318. // Id maps string s to its numeric identificator. The implementation honors getting
  319. // an existing id at the cost of assigning a new one.
  320. func (d *GoDict) Id(s string) (y int) {
  321. d.rwm.RLock() // R++
  322. if y, ok := d.si[s]; ok { // found
  323. d.rwm.RUnlock() // R--
  324. return y
  325. }
  326. d.rwm.RUnlock() // R--
  327. // not found but with a race condition
  328. d.rwm.Lock() // W++ recheck with write lock
  329. defer d.rwm.Unlock() // W--
  330. if y, ok := d.si[s]; ok { // some other goroutine won already
  331. return y
  332. }
  333. // a race free not found state => insert the string
  334. s = StrPack(s)
  335. y = len(d.is)
  336. d.si[s] = y
  337. d.is = append(d.is, s)
  338. return
  339. }
  340. // S maps an id to its string value and ok == true. Id values not contained in the bijection
  341. // return "", false.
  342. func (d *GoDict) S(id int) (s string, ok bool) {
  343. d.rwm.RLock() // R++
  344. defer d.rwm.RUnlock() // R--
  345. if id >= len(d.is) {
  346. return "", false
  347. }
  348. return d.is[id], true
  349. }
  350. // StrPack returns a new instance of s which is tightly packed in memory.
  351. // It is intended for avoiding the situation where having a live reference
  352. // to a string slice over an unreferenced biger underlying string keeps the biger one
  353. // in memory anyway - it can't be GCed.
  354. func StrPack(s string) string {
  355. return string([]byte(s))
  356. }
  357. // JoinFields returns strings in flds joined by sep. Flds may contain arbitrary
  358. // bytes, including the sep as they are safely escaped. JoinFields panics if
  359. // sep is the backslash character or if len(sep) != 1.
  360. func JoinFields(flds []string, sep string) string {
  361. if len(sep) != 1 || sep == "\\" {
  362. panic("invalid separator")
  363. }
  364. a := make([]string, len(flds))
  365. for i, v := range flds {
  366. v = strings.Replace(v, "\\", "\\0", -1)
  367. a[i] = strings.Replace(v, sep, "\\1", -1)
  368. }
  369. return strings.Join(a, sep)
  370. }
  371. // SplitFields splits s, which must be produced by JoinFields using the same
  372. // sep, into flds. SplitFields panics if sep is the backslash character or if
  373. // len(sep) != 1.
  374. func SplitFields(s, sep string) (flds []string) {
  375. if len(sep) != 1 || sep == "\\" {
  376. panic("invalid separator")
  377. }
  378. a := strings.Split(s, sep)
  379. r := make([]string, len(a))
  380. for i, v := range a {
  381. v = strings.Replace(v, "\\1", sep, -1)
  382. r[i] = strings.Replace(v, "\\0", "\\", -1)
  383. }
  384. return r
  385. }
  386. // PrettyPrintHooks allow to customize the result of PrettyPrint for types
  387. // listed in the map value.
  388. type PrettyPrintHooks map[reflect.Type]func(f Formatter, v interface{}, prefix, suffix string)
  389. // PrettyString returns the output of PrettyPrint as a string.
  390. func PrettyString(v interface{}, prefix, suffix string, hooks PrettyPrintHooks) string {
  391. var b bytes.Buffer
  392. PrettyPrint(&b, v, prefix, suffix, hooks)
  393. return b.String()
  394. }
  395. // PrettyPrint pretty prints v to w. Zero values and unexported struct fields
  396. // are omitted.
  397. func PrettyPrint(w io.Writer, v interface{}, prefix, suffix string, hooks PrettyPrintHooks) {
  398. if v == nil {
  399. return
  400. }
  401. f := IndentFormatter(w, "· ")
  402. defer func() {
  403. if e := recover(); e != nil {
  404. f.Format("\npanic: %v", e)
  405. }
  406. }()
  407. prettyPrint(nil, f, prefix, suffix, v, hooks)
  408. }
  409. func prettyPrint(protect map[interface{}]struct{}, sf Formatter, prefix, suffix string, v interface{}, hooks PrettyPrintHooks) {
  410. if v == nil {
  411. return
  412. }
  413. rt := reflect.TypeOf(v)
  414. if handler := hooks[rt]; handler != nil {
  415. handler(sf, v, prefix, suffix)
  416. return
  417. }
  418. rv := reflect.ValueOf(v)
  419. switch rt.Kind() {
  420. case reflect.Slice:
  421. if rv.Len() == 0 {
  422. return
  423. }
  424. sf.Format("%s[]%T{ // len %d%i\n", prefix, rv.Index(0).Interface(), rv.Len())
  425. for i := 0; i < rv.Len(); i++ {
  426. prettyPrint(protect, sf, fmt.Sprintf("%d: ", i), ",\n", rv.Index(i).Interface(), hooks)
  427. }
  428. suffix = strings.Replace(suffix, "%", "%%", -1)
  429. sf.Format("%u}" + suffix)
  430. case reflect.Array:
  431. if reflect.Zero(rt).Interface() == rv.Interface() {
  432. return
  433. }
  434. sf.Format("%s[%d]%T{%i\n", prefix, rv.Len(), rv.Index(0).Interface())
  435. for i := 0; i < rv.Len(); i++ {
  436. prettyPrint(protect, sf, fmt.Sprintf("%d: ", i), ",\n", rv.Index(i).Interface(), hooks)
  437. }
  438. suffix = strings.Replace(suffix, "%", "%%", -1)
  439. sf.Format("%u}" + suffix)
  440. case reflect.Struct:
  441. if rt.NumField() == 0 {
  442. return
  443. }
  444. if reflect.DeepEqual(reflect.Zero(rt).Interface(), rv.Interface()) {
  445. return
  446. }
  447. sf.Format("%s%T{%i\n", prefix, v)
  448. for i := 0; i < rt.NumField(); i++ {
  449. f := rv.Field(i)
  450. if !f.CanInterface() {
  451. continue
  452. }
  453. prettyPrint(protect, sf, fmt.Sprintf("%s: ", rt.Field(i).Name), ",\n", f.Interface(), hooks)
  454. }
  455. suffix = strings.Replace(suffix, "%", "%%", -1)
  456. sf.Format("%u}" + suffix)
  457. case reflect.Ptr:
  458. if rv.IsNil() {
  459. return
  460. }
  461. rvi := rv.Interface()
  462. if _, ok := protect[rvi]; ok {
  463. suffix = strings.Replace(suffix, "%", "%%", -1)
  464. sf.Format("%s&%T{ /* recursive/repetitive pointee not shown */ }"+suffix, prefix, rv.Elem().Interface())
  465. return
  466. }
  467. if protect == nil {
  468. protect = map[interface{}]struct{}{}
  469. }
  470. protect[rvi] = struct{}{}
  471. prettyPrint(protect, sf, prefix+"&", suffix, rv.Elem().Interface(), hooks)
  472. case reflect.Int, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int8:
  473. if v := rv.Int(); v != 0 {
  474. suffix = strings.Replace(suffix, "%", "%%", -1)
  475. sf.Format("%s%v"+suffix, prefix, v)
  476. }
  477. case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint8:
  478. if v := rv.Uint(); v != 0 {
  479. suffix = strings.Replace(suffix, "%", "%%", -1)
  480. sf.Format("%s%v"+suffix, prefix, v)
  481. }
  482. case reflect.Float32, reflect.Float64:
  483. if v := rv.Float(); v != 0 {
  484. suffix = strings.Replace(suffix, "%", "%%", -1)
  485. sf.Format("%s%v"+suffix, prefix, v)
  486. }
  487. case reflect.Complex64, reflect.Complex128:
  488. if v := rv.Complex(); v != 0 {
  489. suffix = strings.Replace(suffix, "%", "%%", -1)
  490. sf.Format("%s%v"+suffix, prefix, v)
  491. }
  492. case reflect.Uintptr:
  493. if v := rv.Uint(); v != 0 {
  494. suffix = strings.Replace(suffix, "%", "%%", -1)
  495. sf.Format("%s%v"+suffix, prefix, v)
  496. }
  497. case reflect.UnsafePointer:
  498. s := fmt.Sprintf("%p", rv.Interface())
  499. if s == "0x0" {
  500. return
  501. }
  502. suffix = strings.Replace(suffix, "%", "%%", -1)
  503. sf.Format("%s%s"+suffix, prefix, s)
  504. case reflect.Bool:
  505. if v := rv.Bool(); v {
  506. suffix = strings.Replace(suffix, "%", "%%", -1)
  507. sf.Format("%s%v"+suffix, prefix, rv.Bool())
  508. }
  509. case reflect.String:
  510. s := rv.Interface().(string)
  511. if s == "" {
  512. return
  513. }
  514. suffix = strings.Replace(suffix, "%", "%%", -1)
  515. sf.Format("%s%q"+suffix, prefix, s)
  516. case reflect.Chan:
  517. if reflect.Zero(rt).Interface() == rv.Interface() {
  518. return
  519. }
  520. c := rv.Cap()
  521. s := ""
  522. if c != 0 {
  523. s = fmt.Sprintf("// capacity: %d", c)
  524. }
  525. suffix = strings.Replace(suffix, "%", "%%", -1)
  526. sf.Format("%s%s %s%s"+suffix, prefix, rt.ChanDir(), rt.Elem().Name(), s)
  527. case reflect.Func:
  528. if rv.IsNil() {
  529. return
  530. }
  531. var in, out []string
  532. for i := 0; i < rt.NumIn(); i++ {
  533. x := reflect.Zero(rt.In(i))
  534. in = append(in, fmt.Sprintf("%T", x.Interface()))
  535. }
  536. if rt.IsVariadic() {
  537. i := len(in) - 1
  538. in[i] = "..." + in[i][2:]
  539. }
  540. for i := 0; i < rt.NumOut(); i++ {
  541. out = append(out, rt.Out(i).Name())
  542. }
  543. s := "(" + strings.Join(in, ", ") + ")"
  544. t := strings.Join(out, ", ")
  545. if len(out) > 1 {
  546. t = "(" + t + ")"
  547. }
  548. if t != "" {
  549. t = " " + t
  550. }
  551. suffix = strings.Replace(suffix, "%", "%%", -1)
  552. sf.Format("%sfunc%s%s { ... }"+suffix, prefix, s, t)
  553. case reflect.Map:
  554. keys := rv.MapKeys()
  555. if len(keys) == 0 {
  556. return
  557. }
  558. var buf bytes.Buffer
  559. nf := IndentFormatter(&buf, "· ")
  560. var skeys []string
  561. for i, k := range keys {
  562. prettyPrint(protect, nf, "", "", k.Interface(), hooks)
  563. skeys = append(skeys, fmt.Sprintf("%s%10d", buf.Bytes(), i))
  564. buf.Reset()
  565. }
  566. sort.Strings(skeys)
  567. sf.Format("%s%T{%i\n", prefix, v)
  568. for _, k := range skeys {
  569. si := strings.TrimSpace(k[len(k)-10:])
  570. k = k[:len(k)-10]
  571. n, _ := strconv.ParseUint(si, 10, 64)
  572. mv := rv.MapIndex(keys[n])
  573. prettyPrint(protect, sf, fmt.Sprintf("%s: ", k), ",\n", mv.Interface(), hooks)
  574. }
  575. suffix = strings.Replace(suffix, "%", "%%", -1)
  576. sf.Format("%u}" + suffix)
  577. }
  578. }