|
|
@@ -0,0 +1,125 @@
|
|
|
+#pragma once
|
|
|
+
|
|
|
+#include <string>
|
|
|
+#include <unordered_map>
|
|
|
+#include <regex>
|
|
|
+#include <algorithm>
|
|
|
+
|
|
|
+void replace_all(std::wstring& input, const std::wstring& from, const std::wstring& to)
|
|
|
+{
|
|
|
+ size_t pos = 0;
|
|
|
+ while ((pos = input.find(from, pos)) != std::wstring::npos)
|
|
|
+ {
|
|
|
+ input.replace(pos, from.size(), to);
|
|
|
+ pos += to.size();
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+std::wstring trim(const std::wstring &s)
|
|
|
+{
|
|
|
+ std::wstring::const_iterator it = s.begin();
|
|
|
+ while (it != s.end() && isspace(*it))
|
|
|
+ it++;
|
|
|
+
|
|
|
+ std::wstring::const_reverse_iterator rit = s.rbegin();
|
|
|
+ while (rit.base() != it && isspace(*rit))
|
|
|
+ rit++;
|
|
|
+
|
|
|
+ return std::wstring(it, rit.base());
|
|
|
+}
|
|
|
+
|
|
|
+// SLUGIFY
|
|
|
+std::wstring slugify(std::wstring input)
|
|
|
+{
|
|
|
+ std::unordered_map<std::wstring, std::wstring> charMap{
|
|
|
+ // latin
|
|
|
+ {_T("À"), _T("A")}, {_T("Á"), _T("A")}, {_T("Â"), _T("A")}, {_T("Ã"), _T("A")}, {_T("Ä"), _T("A")}, {_T("Å"), _T("A")}, {_T("Æ"), _T("AE")}, {
|
|
|
+ _T("Ç"), _T("C")}, {_T("È"), _T("E")}, {_T("É"), _T("E")}, {_T("Ê"), _T("E")}, {_T("Ë"), _T("E")}, {_T("Ì"), _T("I")}, {_T("Í"), _T("I")}, {
|
|
|
+ _T("Î"), _T("I")}, {_T("Ï"), _T("I")}, {_T("Ð"), _T("D")}, {_T("Ñ"), _T("N")}, {_T("Ò"), _T("O")}, {_T("Ó"), _T("O")}, {_T("Ô"), _T("O")}, {
|
|
|
+ _T("Õ"), _T("O")}, {_T("Ö"), _T("O")}, {_T("Ő"), _T("O")}, {_T("Ø"), _T("O")}, {_T("Ù"), _T("U")}, {_T("Ú"), _T("U")}, {_T("Û"), _T("U")}, {
|
|
|
+ _T("Ü"), _T("U")}, {_T("Ű"), _T("U")}, {_T("Ý"), _T("Y")}, {_T("Þ"), _T("TH")}, {_T("ß"), _T("ss")}, {_T("à"), _T("a")}, {_T("á"), _T("a")}, {
|
|
|
+ _T("â"), _T("a")}, {_T("ã"), _T("a")}, {_T("ä"), _T("a")}, {_T("å"), _T("a")}, {_T("æ"), _T("ae")}, {_T("ç"), _T("c")}, {_T("è"), _T("e")}, {
|
|
|
+ _T("é"), _T("e")}, {_T("ê"), _T("e")}, {_T("ë"), _T("e")}, {_T("ì"), _T("i")}, {_T("í"), _T("i")}, {_T("î"), _T("i")}, {_T("ï"), _T("i")}, {
|
|
|
+ _T("ð"), _T("d")}, {_T("ñ"), _T("n")}, {_T("ò"), _T("o")}, {_T("ó"), _T("o")}, {_T("ô"), _T("o")}, {_T("õ"), _T("o")}, {_T("ö"), _T("o")}, {
|
|
|
+ _T("ő"), _T("o")}, {_T("ø"), _T("o")}, {_T("ù"), _T("u")}, {_T("ú"), _T("u")}, {_T("û"), _T("u")}, {_T("ü"), _T("u")}, {_T("ű"), _T("u")}, {
|
|
|
+ _T("ý"), _T("y")}, {_T("þ"), _T("th")}, {_T("ÿ"), _T("y")}, {_T("ẞ"), _T("SS")},
|
|
|
+ // greek
|
|
|
+ {_T("α"), _T("a")}, {_T("β"), _T("b")}, {_T("γ"), _T("g")}, {_T("δ"), _T("d")}, {_T("ε"), _T("e")}, {_T("ζ"), _T("z")}, {_T("η"), _T("h")}, {_T("θ"), _T("8")}, {
|
|
|
+ _T("ι"), _T("i")}, {_T("κ"), _T("k")}, {_T("λ"), _T("l")}, {_T("μ"), _T("m")}, {_T("ν"), _T("n")}, {_T("ξ"), _T("3")}, {_T("ο"), _T("o")}, {_T("π"), _T("p")}, {
|
|
|
+ _T("ρ"), _T("r")}, {_T("σ"), _T("s")}, {_T("τ"), _T("t")}, {_T("υ"), _T("y")}, {_T("φ"), _T("f")}, {_T("χ"), _T("x")}, {_T("ψ"), _T("ps")}, {_T("ω"), _T("w")}, {
|
|
|
+ _T("ά"), _T("a")}, {_T("έ"), _T("e")}, {_T("ί"), _T("i")}, {_T("ό"), _T("o")}, {_T("ύ"), _T("y")}, {_T("ή"), _T("h")}, {_T("ώ"), _T("w")}, {_T("ς"), _T("s")}, {
|
|
|
+ _T("ϊ"), _T("i")}, {_T("ΰ"), _T("y")}, {_T("ϋ"), _T("y")}, {_T("ΐ"), _T("i")}, {
|
|
|
+ _T("Α"), _T("A")}, {_T("Β"), _T("B")}, {_T("Γ"), _T("G")}, {_T("Δ"), _T("D")}, {_T("Ε"), _T("E")}, {_T("Ζ"), _T("Z")}, {_T("Η"), _T("H")}, {_T("Θ"), _T("8")}, {
|
|
|
+ _T("Ι"), _T("I")}, {_T("Κ"), _T("K")}, {_T("Λ"), _T("L")}, {_T("Μ"), _T("M")}, {_T("Ν"), _T("N")}, {_T("Ξ"), _T("3")}, {_T("Ο"), _T("O")}, {_T("Π"), _T("P")}, {
|
|
|
+ _T("Ρ"), _T("R")}, {_T("Σ"), _T("S")}, {_T("Τ"), _T("T")}, {_T("Υ"), _T("Y")}, {_T("Φ"), _T("F")}, {_T("Χ"), _T("X")}, {_T("Ψ"), _T("PS")}, {_T("Ω"), _T("W")}, {
|
|
|
+ _T("Ά"), _T("A")}, {_T("Έ"), _T("E")}, {_T("Ί"), _T("I")}, {_T("Ό"), _T("O")}, {_T("Ύ"), _T("Y")}, {_T("Ή"), _T("H")}, {_T("Ώ"), _T("W")}, {_T("Ϊ"), _T("I")}, {
|
|
|
+ _T("Ϋ"), _T("Y")},
|
|
|
+ // turkish
|
|
|
+ {_T("ş"), _T("s")}, {_T("Ş"), _T("S")}, {_T("ı"), _T("i")}, {_T("İ"), _T("I")}, {_T("ç"), _T("c")}, {_T("Ç"), _T("C")}, {_T("ü"), _T("u")}, {_T("Ü"), _T("U")}, {
|
|
|
+ _T("ö"), _T("o")}, {_T("Ö"), _T("O")}, {_T("ğ"), _T("g")}, {_T("Ğ"), _T("G")},
|
|
|
+ // russian
|
|
|
+ {_T("а"), _T("a")}, {_T("б"), _T("b")}, {_T("в"), _T("v")}, {_T("г"), _T("g")}, {_T("д"), _T("d")}, {_T("е"), _T("e")}, {_T("ё"), _T("yo")}, {_T("ж"), _T("zh")}, {
|
|
|
+ _T("з"), _T("z")}, {_T("и"), _T("i")}, {_T("й"), _T("j")}, {_T("к"), _T("k")}, {_T("л"), _T("l")}, {_T("м"), _T("m")}, {_T("н"), _T("n")}, {_T("о"), _T("o")}, {
|
|
|
+ _T("п"), _T("p")}, {_T("р"), _T("r")}, {_T("с"), _T("s")}, {_T("т"), _T("t")}, {_T("у"), _T("u")}, {_T("ф"), _T("f")}, {_T("х"), _T("h")}, {_T("ц"), _T("c")}, {
|
|
|
+ _T("ч"), _T("ch")}, {_T("ш"), _T("sh")}, {_T("щ"), _T("sh")}, {_T("ъ"), _T("u")}, {_T("ы"), _T("y")}, {_T("ь"), _T("")}, {_T("э"), _T("e")}, {_T("ю"), _T("yu")}, {
|
|
|
+ _T("я"), _T("ya")}, {
|
|
|
+ _T("А"), _T("A")}, {_T("Б"), _T("B")}, {_T("В"), _T("V")}, {_T("Г"), _T("G")}, {_T("Д"), _T("D")}, {_T("Е"), _T("E")}, {_T("Ё"), _T("Yo")}, {_T("Ж"), _T("Zh")}, {
|
|
|
+ _T("З"), _T("Z")}, {_T("И"), _T("I")}, {_T("Й"), _T("J")}, {_T("К"), _T("K")}, {_T("Л"), _T("L")}, {_T("М"), _T("M")}, {_T("Н"), _T("N")}, {_T("О"), _T("O")}, {
|
|
|
+ _T("П"), _T("P")}, {_T("Р"), _T("R")}, {_T("С"), _T("S")}, {_T("Т"), _T("T")}, {_T("У"), _T("U")}, {_T("Ф"), _T("F")}, {_T("Х"), _T("H")}, {_T("Ц"), _T("C")}, {
|
|
|
+ _T("Ч"), _T("Ch")}, {_T("Ш"), _T("Sh")}, {_T("Щ"), _T("Sh")}, {_T("Ъ"), _T("U")}, {_T("Ы"), _T("Y")}, {_T("Ь"), _T("")}, {_T("Э"), _T("E")}, {_T("Ю"), _T("Yu")}, {
|
|
|
+ _T("Я"), _T("Ya")},
|
|
|
+ // ukranian
|
|
|
+ {_T("Є"), _T("Ye")}, {_T("І"), _T("I")}, {_T("Ї"), _T("Yi")}, {_T("Ґ"), _T("G")}, {_T("є"), _T("ye")}, {_T("і"), _T("i")}, {_T("ї"), _T("yi")}, {_T("ґ"), _T("g")},
|
|
|
+ // czech
|
|
|
+ {_T("č"), _T("c")}, {_T("ď"), _T("d")}, {_T("ě"), _T("e")}, {_T("ň"), _T("n")}, {_T("ř"), _T("r")}, {_T("š"), _T("s")}, {_T("ť"), _T("t")}, {_T("ů"), _T("u")},
|
|
|
+ {_T("ž"), _T("z")}, {_T("Č"), _T("C")}, {_T("Ď"), _T("D")}, {_T("Ě"), _T("E")}, {_T("Ň"), _T("N")}, {_T("Ř"), _T("R")}, {_T("Š"), _T("S")}, {_T("Ť"), _T("T")},
|
|
|
+ {_T("Ů"), _T("U")}, {_T("Ž"), _T("Z")},
|
|
|
+ // polish
|
|
|
+ {_T("ą"), _T("a")}, {_T("ć"), _T("c")}, {_T("ę"), _T("e")}, {_T("ł"), _T("l")}, {_T("ń"), _T("n")}, {_T("ó"), _T("o")}, {_T("ś"), _T("s")}, {_T("ź"), _T("z")},
|
|
|
+ {_T("ż"), _T("z")}, {_T("Ą"), _T("A")}, {_T("Ć"), _T("C")}, {_T("Ę"), _T("e")}, {_T("Ł"), _T("L")}, {_T("Ń"), _T("N")}, {_T("Ś"), _T("S")},
|
|
|
+ {_T("Ź"), _T("Z")}, {_T("Ż"), _T("Z")},
|
|
|
+ // latvian
|
|
|
+ {_T("ā"), _T("a")}, {_T("č"), _T("c")}, {_T("ē"), _T("e")}, {_T("ģ"), _T("g")}, {_T("ī"), _T("i")}, {_T("ķ"), _T("k")}, {_T("ļ"), _T("l")}, {_T("ņ"), _T("n")},
|
|
|
+ {_T("š"), _T("s")}, {_T("ū"), _T("u")}, {_T("ž"), _T("z")}, {_T("Ā"), _T("A")}, {_T("Č"), _T("C")}, {_T("Ē"), _T("E")}, {_T("Ģ"), _T("G")}, {_T("Ī"), _T("i")},
|
|
|
+ {_T("Ķ"), _T("k")}, {_T("Ļ"), _T("L")}, {_T("Ņ"), _T("N")}, {_T("Š"), _T("S")}, {_T("Ū"), _T("u")}, {_T("Ž"), _T("Z")},
|
|
|
+ // currency
|
|
|
+ {_T("€"), _T("euro")}, {_T("₢"), _T("cruzeiro")}, {_T("₣"), _T("french franc")}, {_T("£"), _T("pound")},
|
|
|
+ {_T("₤"), _T("lira")}, {_T("₥"), _T("mill")}, {_T("₦"), _T("naira")}, {_T("₧"), _T("peseta")}, {_T("₨"), _T("rupee")},
|
|
|
+ {_T("₩"), _T("won")}, {_T("₪"), _T("new shequel")}, {_T("₫"), _T("dong")}, {_T("₭"), _T("kip")}, {_T("₮"), _T("tugrik")},
|
|
|
+ {_T("₯"), _T("drachma")}, {_T("₰"), _T("penny")}, {_T("₱"), _T("peso")}, {_T("₲"), _T("guarani")}, {_T("₳"), _T("austral")},
|
|
|
+ {_T("₴"), _T("hryvnia")}, {_T("₵"), _T("cedi")}, {_T("¢"), _T("cent")}, {_T("¥"), _T("yen")}, {_T("元"), _T("yuan")},
|
|
|
+ {_T("円"), _T("yen")}, {_T("﷼"), _T("rial")}, {_T("₠"), _T("ecu")}, {_T("¤"), _T("currency")}, {_T("฿"), _T("baht")}, {_T("$"), _T("dollar")},
|
|
|
+ // symbols
|
|
|
+ {_T("©"), _T("(c)")}, {_T("œ"), _T("oe")}, {_T("Œ"), _T("OE")}, {_T("∑"), _T("sum")}, {_T("®"), _T("(r)")}, {_T("†"), _T("+")},
|
|
|
+ {_T("“"), _T("\"")}, {_T("∂"), _T("d")}, {_T("ƒ"), _T("f")}, {_T("™"), _T("tm")},
|
|
|
+ {_T("℠"), _T("sm")}, {_T("…"), _T("...")}, {_T("˚"), _T("o")}, {_T("º"), _T("o")}, {_T("ª"), _T("a")}, {_T("•"), _T("*")},
|
|
|
+ {_T("∆"), _T("delta")}, {_T("∞"), _T("infinity")}, {_T("♥"), _T("love")}, {_T("&"), _T("and")}, {_T("|"), _T("or")},
|
|
|
+ {_T("<"), _T("less")}, {_T(">"), _T("greater")
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ //remove accents
|
|
|
+ for (auto kv : charMap)
|
|
|
+ {
|
|
|
+ replace_all(input, kv.first, kv.second);
|
|
|
+ }
|
|
|
+
|
|
|
+ //make lower case
|
|
|
+ std::transform(input.begin(), input.end(), input.begin(), ::tolower);
|
|
|
+
|
|
|
+ //invalid characters
|
|
|
+ std::wregex e1(_T("[^a-z0-9-\\s]"));
|
|
|
+ input = std::regex_replace(input, e1, _T(""));
|
|
|
+
|
|
|
+ //convert multiple spaces into one
|
|
|
+ std::wregex e2(_T("\\s+"));
|
|
|
+ input = std::regex_replace(input, e2, _T(" "));
|
|
|
+
|
|
|
+ trim(input);
|
|
|
+
|
|
|
+ //replace spaces with hyphens
|
|
|
+ std::wregex e3(_T("[-\\s]+"));
|
|
|
+ input = std::regex_replace(input, e3, _T("-"));
|
|
|
+
|
|
|
+ return input;
|
|
|
+};
|