1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
/**
* Created by dabolfathi on 4/26/17.
*/
/**
* Returns a single string, which is the result of joining together all
* the strings in the list with the provided separator.
* @param strings The list of strings to be joined.
* @param separator The separator to use when joining them together.
* @returns {string} The joined string.
*/
export function join(strings, separator) {
if (strings == null || separator == null) {
return null;
}
if (strings.length === 0) {
return "";
}
let stringCount = strings.length;
let joinedString = "";
strings.forEach((element, index) => {
if (element === null) {
stringCount -= 1;
}
else {
joinedString += element;
if (index < stringCount - 1) {
joinedString += separator;
}
}
});
return joinedString;
}
/**
* Generate a normalized string for robust multilingual search that properly handles
* CJK (Chinese, Japanese, Korean), Arabic, Cyrillic, and Latin-based scripts.
* Preserves Unicode characters while normalizing diacritics and case appropriately.
*
* @param input The input string to normalize
* @returns A normalized string suitable for search matching
*/
export function normalizeForSearch(input) {
if (!input) {
return "";
}
try {
// Remove special characters
// \u2122 -> (Trade Mark Sign)
// \u2120 -> (Service Mark)
// \u03a9 -> (Greek Capital Letter Omega)
// \u00a9 -> (Copyright Sign)
// \u00ae -> (Registered Sign)
// \u30fc -> (Katakana-Hiragana Prolonged Sound Mark)
// \u03c9 -> (Greek Small Letter Omega)
const removedSpecialUnicodesRegex = /[\u2122\u2120\u03a9\u00a9\u00ae\u30fc\u03c9]/g;
return (input
.toLowerCase() // Case insensitivity
.replace(removedSpecialUnicodesRegex, "")
// Apply normalization only to Latin characters to preserve CJK integrity
// Handle Latin characters safely with basic ranges A-Za-z
// \u00C0-\u00FF - Latin-1 Supplement (Upper Half)
// \u0100-\u017F - Latin Extended-A (includes dotless i \u0131)
// \u0180-\u024F - Latin Extended-B
// \u1E00-\u1EFF - Latin Extended Additional
.replace(/[A-Za-z\u00A0-\u00FF\u0100-\u017F\u0180-\u024F\u1E00-\u1EFF\p{Diacritic}]+/gu, (latinText) => {
return (latinText
.normalize("NFKD")
.replace(/\p{Diacritic}/gu, "")
// Convert specific characters that don't match basic Latin
.replace(/\u0131/g, "i") // Convert dotless i to regular i
// Keep only actual Latin characters and numbers after normalization
.replace(/[^A-Za-z0-9]/g, ""));
})
// Remove punctuation, symbols, and control characters but preserve:
// - Letters from all writing systems (\p{L})
// - Numbers (\p{N})
// - Whitespace (\s)
// - Underscores (_)
.replace(/[^\p{L}\p{N}\s_]/gu, "")
// Normalize multiple whitespace to single spaces
.replace(/\s+/g, " ")
// Trim leading/trailing whitespace
.trim());
}
catch (error) {
// Fallback: use basic character classes
return (input
.toLowerCase()
// Remove punctuation, symbols, and control characters but preserve:
// - Letters from all writing systems (\p{L})
// - Numbers (\p{N})
// - Whitespace (\s)
// - Underscores (_)
.replace(/[^\p{L}\p{N}\s_]/gu, "")
// Normalize multiple whitespace to single spaces
.replace(/\s+/g, " ")
// Trim leading/trailing whitespace
.trim());
}
}
/**
* Whether or not the input string is contains search term using normalized string which comparing using case insensitive, locale insensitive and ignore all special characters.
*
* @param input
* @param normalizedTerm
* @returns
*/
export function containsSearchTerm(input, normalizedTerm) {
const normalizedInput = normalizeForSearch(input);
return normalizedInput.includes(normalizedTerm);
}
/**
* Wraps a string with bidirectional isolate characters to ensure proper text direction handling.
* This is particularly useful for user-generated content like display names that may contain
* mixed left-to-right and right-to-left text.
*
* @param text The string to wrap with bidi isolates
* @returns The string wrapped with Left-to-Right Isolate (U+2066) and Pop Directional Isolate (U+2069)
*/
export function withBidiIsolates(text) {
if (!text) {
return text;
}
// U+2068: First Strong Isolate
// U+2069: Pop Directional Isolate
return "\u2068" + text + "\u2069";
}
//# sourceMappingURL=string-util.js.map
|