|
|
@@ -1,4 +1,4 @@
|
|
|
-import { normalizeString } from "../text-normalization"
|
|
|
+import { normalizeString, unescapeHtmlEntities } from "../text-normalization"
|
|
|
|
|
|
describe("Text normalization utilities", () => {
|
|
|
describe("normalizeString", () => {
|
|
|
@@ -30,4 +30,50 @@ describe("Text normalization utilities", () => {
|
|
|
expect(normalizeString(input)).toBe('Let\'s test this-with some "fancy" punctuation... and spaces')
|
|
|
})
|
|
|
})
|
|
|
+
|
|
|
+ describe("unescapeHtmlEntities", () => {
|
|
|
+ test("unescapes basic HTML entities", () => {
|
|
|
+ expect(unescapeHtmlEntities("<div>Hello</div>")).toBe("<div>Hello</div>")
|
|
|
+ })
|
|
|
+
|
|
|
+ test("unescapes ampersand entity", () => {
|
|
|
+ expect(unescapeHtmlEntities("This & that")).toBe("This & that")
|
|
|
+ })
|
|
|
+
|
|
|
+ test("unescapes quote entities", () => {
|
|
|
+ expect(unescapeHtmlEntities(""quoted" and 'single-quoted'")).toBe(
|
|
|
+ "\"quoted\" and 'single-quoted'",
|
|
|
+ )
|
|
|
+ })
|
|
|
+
|
|
|
+ test("unescapes apostrophe entity", () => {
|
|
|
+ expect(unescapeHtmlEntities("Don't worry")).toBe("Don't worry")
|
|
|
+ })
|
|
|
+
|
|
|
+ test("handles mixed content with multiple entity types", () => {
|
|
|
+ expect(
|
|
|
+ unescapeHtmlEntities(
|
|
|
+ "<a href="https://example.com?param1=value&param2=value">Link</a>",
|
|
|
+ ),
|
|
|
+ ).toBe('<a href="https://example.com?param1=value¶m2=value">Link</a>')
|
|
|
+ })
|
|
|
+
|
|
|
+ test("handles mixed content with apostrophe entities", () => {
|
|
|
+ expect(
|
|
|
+ unescapeHtmlEntities(
|
|
|
+ "<div>Don't forget that Tom&Jerry's show is at 3 o'clock</div>",
|
|
|
+ ),
|
|
|
+ ).toBe("<div>Don't forget that Tom&Jerry's show is at 3 o'clock</div>")
|
|
|
+ })
|
|
|
+
|
|
|
+ test("returns original string when no entities are present", () => {
|
|
|
+ const original = "Plain text without entities"
|
|
|
+ expect(unescapeHtmlEntities(original)).toBe(original)
|
|
|
+ })
|
|
|
+
|
|
|
+ test("handles empty or undefined input", () => {
|
|
|
+ expect(unescapeHtmlEntities("")).toBe("")
|
|
|
+ expect(unescapeHtmlEntities(undefined as unknown as string)).toBe(undefined)
|
|
|
+ })
|
|
|
+ })
|
|
|
})
|