summaryrefslogtreecommitdiff
path: root/node_modules/@jet-app/app-store/tmp/src/foundation/util/string-util.js
blob: affeba3097056d80619ba00e1dc97ad980103b20 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/**
 * Created by dabolfathi on 4/26/17.
 */
/**
 * Returns a single string, which is the result of joining together all
 * the strings in the list with the provided separator.
 * @param strings The list of strings to be joined.
 * @param separator The separator to use when joining them together.
 * @returns {string} The joined string.
 */
export function join(strings, separator) {
    if (strings == null || separator == null) {
        return null;
    }
    if (strings.length === 0) {
        return "";
    }
    let stringCount = strings.length;
    let joinedString = "";
    strings.forEach((element, index) => {
        if (element === null) {
            stringCount -= 1;
        }
        else {
            joinedString += element;
            if (index < stringCount - 1) {
                joinedString += separator;
            }
        }
    });
    return joinedString;
}
/**
 * Generate a normalized string for robust multilingual search that properly handles
 * CJK (Chinese, Japanese, Korean), Arabic, Cyrillic, and Latin-based scripts.
 * Preserves Unicode characters while normalizing diacritics and case appropriately.
 *
 * @param input The input string to normalize
 * @returns A normalized string suitable for search matching
 */
export function normalizeForSearch(input) {
    if (!input) {
        return "";
    }
    try {
        // Remove special characters
        // \u2122 -> (Trade Mark Sign)
        // \u2120 -> (Service Mark)
        // \u03a9 -> (Greek Capital Letter Omega)
        // \u00a9 -> (Copyright Sign)
        // \u00ae -> (Registered Sign)
        // \u30fc -> (Katakana-Hiragana Prolonged Sound Mark)
        // \u03c9 -> (Greek Small Letter Omega)
        const removedSpecialUnicodesRegex = /[\u2122\u2120\u03a9\u00a9\u00ae\u30fc\u03c9]/g;
        return (input
            .toLowerCase() // Case insensitivity
            .replace(removedSpecialUnicodesRegex, "")
            // Apply normalization only to Latin characters to preserve CJK integrity
            // Handle Latin characters safely with basic ranges A-Za-z
            // \u00C0-\u00FF - Latin-1 Supplement (Upper Half)
            // \u0100-\u017F - Latin Extended-A (includes dotless i \u0131)
            // \u0180-\u024F - Latin Extended-B
            // \u1E00-\u1EFF - Latin Extended Additional
            .replace(/[A-Za-z\u00A0-\u00FF\u0100-\u017F\u0180-\u024F\u1E00-\u1EFF\p{Diacritic}]+/gu, (latinText) => {
            return (latinText
                .normalize("NFKD")
                .replace(/\p{Diacritic}/gu, "")
                // Convert specific characters that don't match basic Latin
                .replace(/\u0131/g, "i") // Convert dotless i to regular i
                // Keep only actual Latin characters and numbers after normalization
                .replace(/[^A-Za-z0-9]/g, ""));
        })
            // Remove punctuation, symbols, and control characters but preserve:
            // - Letters from all writing systems (\p{L})
            // - Numbers (\p{N})
            // - Whitespace (\s)
            // - Underscores (_)
            .replace(/[^\p{L}\p{N}\s_]/gu, "")
            // Normalize multiple whitespace to single spaces
            .replace(/\s+/g, " ")
            // Trim leading/trailing whitespace
            .trim());
    }
    catch (error) {
        // Fallback: use basic character classes
        return (input
            .toLowerCase()
            // Remove punctuation, symbols, and control characters but preserve:
            // - Letters from all writing systems (\p{L})
            // - Numbers (\p{N})
            // - Whitespace (\s)
            // - Underscores (_)
            .replace(/[^\p{L}\p{N}\s_]/gu, "")
            // Normalize multiple whitespace to single spaces
            .replace(/\s+/g, " ")
            // Trim leading/trailing whitespace
            .trim());
    }
}
/**
 * Whether or not the input string is contains search term using normalized string which comparing using case insensitive, locale insensitive and ignore all special characters.
 *
 * @param input
 * @param normalizedTerm
 * @returns
 */
export function containsSearchTerm(input, normalizedTerm) {
    const normalizedInput = normalizeForSearch(input);
    return normalizedInput.includes(normalizedTerm);
}
/**
 * Wraps a string with bidirectional isolate characters to ensure proper text direction handling.
 * This is particularly useful for user-generated content like display names that may contain
 * mixed left-to-right and right-to-left text.
 *
 * @param text The string to wrap with bidi isolates
 * @returns The string wrapped with Left-to-Right Isolate (U+2066) and Pop Directional Isolate (U+2069)
 */
export function withBidiIsolates(text) {
    if (!text) {
        return text;
    }
    // U+2068: First Strong Isolate
    // U+2069: Pop Directional Isolate
    return "\u2068" + text + "\u2069";
}
//# sourceMappingURL=string-util.js.map