speech.ts 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. import { createSignal, onCleanup } from "solid-js"
  2. // Minimal types to avoid relying on non-standard DOM typings
  3. type RecognitionResult = {
  4. 0: { transcript: string }
  5. isFinal: boolean
  6. }
  7. type RecognitionEvent = {
  8. results: RecognitionResult[]
  9. resultIndex: number
  10. }
  11. interface Recognition {
  12. continuous: boolean
  13. interimResults: boolean
  14. lang: string
  15. start: () => void
  16. stop: () => void
  17. onresult: ((e: RecognitionEvent) => void) | null
  18. onerror: ((e: { error: string }) => void) | null
  19. onend: (() => void) | null
  20. onstart: (() => void) | null
  21. }
  22. const COMMIT_DELAY = 250
  23. const appendSegment = (base: string, addition: string) => {
  24. const trimmed = addition.trim()
  25. if (!trimmed) return base
  26. if (!base) return trimmed
  27. const needsSpace = /\S$/.test(base) && !/^[,.;!?]/.test(trimmed)
  28. return `${base}${needsSpace ? " " : ""}${trimmed}`
  29. }
  30. const extractSuffix = (committed: string, hypothesis: string) => {
  31. const cleanHypothesis = hypothesis.trim()
  32. if (!cleanHypothesis) return ""
  33. const baseTokens = committed.trim() ? committed.trim().split(/\s+/) : []
  34. const hypothesisTokens = cleanHypothesis.split(/\s+/)
  35. let index = 0
  36. while (
  37. index < baseTokens.length &&
  38. index < hypothesisTokens.length &&
  39. baseTokens[index] === hypothesisTokens[index]
  40. ) {
  41. index += 1
  42. }
  43. if (index < baseTokens.length) return ""
  44. return hypothesisTokens.slice(index).join(" ")
  45. }
  46. export function createSpeechRecognition(opts?: {
  47. lang?: string
  48. onFinal?: (text: string) => void
  49. onInterim?: (text: string) => void
  50. }) {
  51. const hasSupport =
  52. typeof window !== "undefined" &&
  53. Boolean((window as any).webkitSpeechRecognition || (window as any).SpeechRecognition)
  54. const [isRecording, setIsRecording] = createSignal(false)
  55. const [committed, setCommitted] = createSignal("")
  56. const [interim, setInterim] = createSignal("")
  57. let recognition: Recognition | undefined
  58. let shouldContinue = false
  59. let committedText = ""
  60. let sessionCommitted = ""
  61. let pendingHypothesis = ""
  62. let lastInterimSuffix = ""
  63. let shrinkCandidate: string | undefined
  64. let commitTimer: number | undefined
  65. const cancelPendingCommit = () => {
  66. if (commitTimer === undefined) return
  67. clearTimeout(commitTimer)
  68. commitTimer = undefined
  69. }
  70. const commitSegment = (segment: string) => {
  71. const nextCommitted = appendSegment(committedText, segment)
  72. if (nextCommitted === committedText) return
  73. committedText = nextCommitted
  74. setCommitted(committedText)
  75. if (opts?.onFinal) opts.onFinal(segment.trim())
  76. }
  77. const promotePending = () => {
  78. if (!pendingHypothesis) return
  79. const suffix = extractSuffix(sessionCommitted, pendingHypothesis)
  80. if (!suffix) {
  81. pendingHypothesis = ""
  82. return
  83. }
  84. sessionCommitted = appendSegment(sessionCommitted, suffix)
  85. commitSegment(suffix)
  86. pendingHypothesis = ""
  87. lastInterimSuffix = ""
  88. shrinkCandidate = undefined
  89. setInterim("")
  90. if (opts?.onInterim) opts.onInterim("")
  91. }
  92. const applyInterim = (suffix: string, hypothesis: string) => {
  93. cancelPendingCommit()
  94. pendingHypothesis = hypothesis
  95. lastInterimSuffix = suffix
  96. shrinkCandidate = undefined
  97. setInterim(suffix)
  98. if (opts?.onInterim) {
  99. opts.onInterim(suffix ? appendSegment(committedText, suffix) : "")
  100. }
  101. if (!suffix) return
  102. const snapshot = hypothesis
  103. commitTimer = window.setTimeout(() => {
  104. if (pendingHypothesis !== snapshot) return
  105. const currentSuffix = extractSuffix(sessionCommitted, pendingHypothesis)
  106. if (!currentSuffix) return
  107. sessionCommitted = appendSegment(sessionCommitted, currentSuffix)
  108. commitSegment(currentSuffix)
  109. pendingHypothesis = ""
  110. lastInterimSuffix = ""
  111. shrinkCandidate = undefined
  112. setInterim("")
  113. if (opts?.onInterim) opts.onInterim("")
  114. }, COMMIT_DELAY)
  115. }
  116. if (hasSupport) {
  117. const Ctor: new () => Recognition = (window as any).webkitSpeechRecognition || (window as any).SpeechRecognition
  118. recognition = new Ctor()
  119. recognition.continuous = false
  120. recognition.interimResults = true
  121. recognition.lang = opts?.lang || (typeof navigator !== "undefined" ? navigator.language : "en-US")
  122. recognition.onresult = (event: RecognitionEvent) => {
  123. if (!event.results.length) return
  124. let aggregatedFinal = ""
  125. let latestHypothesis = ""
  126. for (let i = 0; i < event.results.length; i += 1) {
  127. const result = event.results[i]
  128. const transcript = (result[0]?.transcript || "").trim()
  129. if (!transcript) continue
  130. if (result.isFinal) {
  131. aggregatedFinal = appendSegment(aggregatedFinal, transcript)
  132. } else {
  133. latestHypothesis = transcript
  134. }
  135. }
  136. if (aggregatedFinal) {
  137. cancelPendingCommit()
  138. const finalSuffix = extractSuffix(sessionCommitted, aggregatedFinal)
  139. if (finalSuffix) {
  140. sessionCommitted = appendSegment(sessionCommitted, finalSuffix)
  141. commitSegment(finalSuffix)
  142. }
  143. pendingHypothesis = ""
  144. lastInterimSuffix = ""
  145. shrinkCandidate = undefined
  146. setInterim("")
  147. if (opts?.onInterim) opts.onInterim("")
  148. return
  149. }
  150. cancelPendingCommit()
  151. if (!latestHypothesis) {
  152. shrinkCandidate = undefined
  153. applyInterim("", "")
  154. return
  155. }
  156. const suffix = extractSuffix(sessionCommitted, latestHypothesis)
  157. if (!suffix) {
  158. if (!lastInterimSuffix) {
  159. shrinkCandidate = undefined
  160. applyInterim("", latestHypothesis)
  161. return
  162. }
  163. if (shrinkCandidate === "") {
  164. applyInterim("", latestHypothesis)
  165. return
  166. }
  167. shrinkCandidate = ""
  168. pendingHypothesis = latestHypothesis
  169. return
  170. }
  171. if (lastInterimSuffix && suffix.length < lastInterimSuffix.length) {
  172. if (shrinkCandidate === suffix) {
  173. applyInterim(suffix, latestHypothesis)
  174. return
  175. }
  176. shrinkCandidate = suffix
  177. pendingHypothesis = latestHypothesis
  178. return
  179. }
  180. shrinkCandidate = undefined
  181. applyInterim(suffix, latestHypothesis)
  182. }
  183. recognition.onerror = (e: { error: string }) => {
  184. cancelPendingCommit()
  185. lastInterimSuffix = ""
  186. shrinkCandidate = undefined
  187. if (e.error === "no-speech" && shouldContinue) {
  188. setInterim("")
  189. if (opts?.onInterim) opts.onInterim("")
  190. setTimeout(() => {
  191. try {
  192. recognition?.start()
  193. } catch {}
  194. }, 150)
  195. return
  196. }
  197. shouldContinue = false
  198. setIsRecording(false)
  199. }
  200. recognition.onstart = () => {
  201. sessionCommitted = ""
  202. pendingHypothesis = ""
  203. cancelPendingCommit()
  204. lastInterimSuffix = ""
  205. shrinkCandidate = undefined
  206. setInterim("")
  207. if (opts?.onInterim) opts.onInterim("")
  208. setIsRecording(true)
  209. }
  210. recognition.onend = () => {
  211. cancelPendingCommit()
  212. lastInterimSuffix = ""
  213. shrinkCandidate = undefined
  214. setIsRecording(false)
  215. if (shouldContinue) {
  216. setTimeout(() => {
  217. try {
  218. recognition?.start()
  219. } catch {}
  220. }, 150)
  221. }
  222. }
  223. }
  224. const start = () => {
  225. if (!recognition) return
  226. shouldContinue = true
  227. sessionCommitted = ""
  228. pendingHypothesis = ""
  229. cancelPendingCommit()
  230. lastInterimSuffix = ""
  231. shrinkCandidate = undefined
  232. setInterim("")
  233. try {
  234. recognition.start()
  235. } catch {}
  236. }
  237. const stop = () => {
  238. if (!recognition) return
  239. shouldContinue = false
  240. promotePending()
  241. cancelPendingCommit()
  242. lastInterimSuffix = ""
  243. shrinkCandidate = undefined
  244. setInterim("")
  245. if (opts?.onInterim) opts.onInterim("")
  246. try {
  247. recognition.stop()
  248. } catch {}
  249. }
  250. onCleanup(() => {
  251. shouldContinue = false
  252. promotePending()
  253. cancelPendingCommit()
  254. lastInterimSuffix = ""
  255. shrinkCandidate = undefined
  256. setInterim("")
  257. if (opts?.onInterim) opts.onInterim("")
  258. try {
  259. recognition?.stop()
  260. } catch {}
  261. })
  262. return {
  263. isSupported: () => hasSupport,
  264. isRecording,
  265. committed,
  266. interim,
  267. start,
  268. stop,
  269. }
  270. }