cosmopolitan/libc/str/iswseparator.c
Jōshin e16a7d8f3b
flip et / noet in modelines
`et` means `expandtab`.

```sh
rg 'vi: .* :vi' -l -0 | \
  xargs -0 sed -i '' 's/vi: \(.*\) et\(.*\)  :vi/vi: \1 xoet\2:vi/'
rg 'vi: .*  :vi' -l -0 | \
  xargs -0 sed -i '' 's/vi: \(.*\)noet\(.*\):vi/vi: \1et\2  :vi/'
rg 'vi: .*  :vi' -l -0 | \
  xargs -0 sed -i '' 's/vi: \(.*\)xoet\(.*\):vi/vi: \1noet\2:vi/'
```
2023-12-07 22:17:11 -05:00

424 lines
20 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2021 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/str/str.h"
static const unsigned short kCodes[][2] = {
{0x00aa, 0x00aa}, /* 1x English */
{0x00b2, 0x00b3}, /* 2x English Arabic */
{0x00b5, 0x00b5}, /* 1x Greek */
{0x00b9, 0x00ba}, /* 2x English Arabic */
{0x00bc, 0x00be}, /* 3x Vulgar English Arabic */
{0x00c0, 0x00d6}, /* 23x Watin */
{0x00d8, 0x00f6}, /* 31x Watin */
{0x0100, 0x02c1}, /* 450x Watin-AB,IPA,Spacemod */
{0x02c6, 0x02d1}, /* 12x Spacemod */
{0x02e0, 0x02e4}, /* 5x Spacemod */
{0x02ec, 0x02ec}, /* 1x Spacemod */
{0x02ee, 0x02ee}, /* 1x Spacemod */
{0x0370, 0x0374}, /* 5x Greek */
{0x0376, 0x0377}, /* 2x Greek */
{0x037a, 0x037d}, /* 4x Greek */
{0x037f, 0x037f}, /* 1x Greek */
{0x0386, 0x0386}, /* 1x Greek */
{0x0388, 0x038a}, /* 3x Greek */
{0x038c, 0x038c}, /* 1x Greek */
{0x038e, 0x03a1}, /* 20x Greek */
{0x03a3, 0x03f5}, /* 83x Greek */
{0x03f7, 0x0481}, /* 139x Greek */
{0x048a, 0x052f}, /* 166x Cyrillic */
{0x0531, 0x0556}, /* 38x Armenian */
{0x0560, 0x0588}, /* 41x Armenian */
{0x05d0, 0x05ea}, /* 27x Hebrew */
{0x0620, 0x064a}, /* 43x Arabic */
{0x0660, 0x0669}, /* 10x Arabic */
{0x0671, 0x06d3}, /* 99x Arabic */
{0x06ee, 0x06fc}, /* 15x Arabic */
{0x0712, 0x072f}, /* 30x Syriac */
{0x074d, 0x07a5}, /* 89x Syriac,Arabic2,Thaana */
{0x07c0, 0x07ea}, /* 43x NKo */
{0x0800, 0x0815}, /* 22x Samaritan */
{0x0840, 0x0858}, /* 25x Mandaic */
{0x0904, 0x0939}, /* 54x Devanagari */
{0x0993, 0x09a8}, /* 22x Bengali */
{0x09e6, 0x09f1}, /* 12x Bengali */
{0x0a13, 0x0a28}, /* 22x Gurmukhi */
{0x0a66, 0x0a6f}, /* 10x Gurmukhi */
{0x0a93, 0x0aa8}, /* 22x Gujarati */
{0x0b13, 0x0b28}, /* 22x Oriya */
{0x0c92, 0x0ca8}, /* 23x Kannada */
{0x0caa, 0x0cb3}, /* 10x Kannada */
{0x0ce6, 0x0cef}, /* 10x Kannada */
{0x0d12, 0x0d3a}, /* 41x Malayalam */
{0x0d85, 0x0d96}, /* 18x Sinhala */
{0x0d9a, 0x0db1}, /* 24x Sinhala */
{0x0de6, 0x0def}, /* 10x Sinhala */
{0x0e01, 0x0e30}, /* 48x Thai */
{0x0e8c, 0x0ea3}, /* 24x Lao */
{0x0f20, 0x0f33}, /* 20x Tibetan */
{0x0f49, 0x0f6c}, /* 36x Tibetan */
{0x109e, 0x10c5}, /* 40x Myanmar,Georgian */
{0x10d0, 0x10fa}, /* 43x Georgian */
{0x10fc, 0x1248}, /* 333x Georgian,Hangul,Ethiopic */
{0x13a0, 0x13f5}, /* 86x Cherokee */
{0x1401, 0x166d}, /* 621x Aboriginal */
{0x16a0, 0x16ea}, /* 75x Runic */
{0x1700, 0x170c}, /* 13x Tagalog */
{0x1780, 0x17b3}, /* 52x Khmer */
{0x1820, 0x1878}, /* 89x Mongolian */
{0x1a00, 0x1a16}, /* 23x Buginese */
{0x1a20, 0x1a54}, /* 53x Tai Tham */
{0x1a80, 0x1a89}, /* 10x Tai Tham */
{0x1a90, 0x1a99}, /* 10x Tai Tham */
{0x1b05, 0x1b33}, /* 47x Balinese */
{0x1b50, 0x1b59}, /* 10x Balinese */
{0x1b83, 0x1ba0}, /* 30x Sundanese */
{0x1bae, 0x1be5}, /* 56x Sundanese */
{0x1c90, 0x1cba}, /* 43x Georgian2 */
{0x1cbd, 0x1cbf}, /* 3x Georgian2 */
{0x1e00, 0x1f15}, /* 278x Watin-C,Greek2 */
{0x2070, 0x2071}, /* 2x Supersub */
{0x2074, 0x2079}, /* 6x Supersub */
{0x207f, 0x2089}, /* 11x Supersub */
{0x2090, 0x209c}, /* 13x Supersub */
{0x2100, 0x2117}, /* 24x Letterlike */
{0x2119, 0x213f}, /* 39x Letterlike */
{0x2145, 0x214a}, /* 6x Letterlike */
{0x214c, 0x218b}, /* 64x Letterlike,Numbery */
{0x21af, 0x21cd}, /* 31x Arrows */
{0x21d5, 0x21f3}, /* 31x Arrows */
{0x230c, 0x231f}, /* 20x Technical */
{0x232b, 0x237b}, /* 81x Technical */
{0x237d, 0x239a}, /* 30x Technical */
{0x23b4, 0x23db}, /* 40x Technical */
{0x23e2, 0x2426}, /* 69x Technical,ControlPictures */
{0x2460, 0x25b6}, /* 343x Enclosed,Boxes,Blocks,Shapes */
{0x25c2, 0x25f7}, /* 54x Shapes */
{0x2600, 0x266e}, /* 111x Symbols */
{0x2670, 0x2767}, /* 248x Symbols,Dingbats */
{0x2776, 0x27bf}, /* 74x Dingbats */
{0x2800, 0x28ff}, /* 256x Braille */
{0x2c00, 0x2c2e}, /* 47x Glagolitic */
{0x2c30, 0x2c5e}, /* 47x Glagolitic */
{0x2c60, 0x2ce4}, /* 133x Watin-D */
{0x2d00, 0x2d25}, /* 38x Georgian2 */
{0x2d30, 0x2d67}, /* 56x Tifinagh */
{0x2d80, 0x2d96}, /* 23x Ethiopic2 */
{0x2e2f, 0x2e2f}, /* 1x Punctuation2 */
{0x3005, 0x3007}, /* 3x CJK Symbols & Punctuation */
{0x3021, 0x3029}, /* 9x CJK Symbols & Punctuation */
{0x3031, 0x3035}, /* 5x CJK Symbols & Punctuation */
{0x3038, 0x303c}, /* 5x CJK Symbols & Punctuation */
{0x3041, 0x3096}, /* 86x Hiragana */
{0x30a1, 0x30fa}, /* 90x Katakana */
{0x3105, 0x312f}, /* 43x Bopomofo */
{0x3131, 0x318e}, /* 94x Hangul Compatibility Jamo */
{0x31a0, 0x31ba}, /* 27x Bopomofo Extended */
{0x31f0, 0x31ff}, /* 16x Katakana Phonetic Extensions */
{0x3220, 0x3229}, /* 10x Enclosed CJK Letters & Months */
{0x3248, 0x324f}, /* 8x Enclosed CJK Letters & Months */
{0x3251, 0x325f}, /* 15x Enclosed CJK Letters & Months */
{0x3280, 0x3289}, /* 10x Enclosed CJK Letters & Months */
{0x32b1, 0x32bf}, /* 15x Enclosed CJK Letters & Months */
{0x3400, 0x4db5}, /* 6582x CJK Unified Ideographs Extension A */
{0x4dc0, 0x9fef}, /* 21040x Yijing Hexagram, CJK Unified Ideographs */
{0xa000, 0xa48c}, /* 1165x Yi Syllables */
{0xa4d0, 0xa4fd}, /* 46x Lisu */
{0xa500, 0xa60c}, /* 269x Vai */
{0xa610, 0xa62b}, /* 28x Vai */
{0xa6a0, 0xa6ef}, /* 80x Bamum */
{0xa80c, 0xa822}, /* 23x Syloti Nagri */
{0xa840, 0xa873}, /* 52x Phags-pa */
{0xa882, 0xa8b3}, /* 50x Saurashtra */
{0xa8d0, 0xa8d9}, /* 10x Saurashtra */
{0xa900, 0xa925}, /* 38x Kayah Li */
{0xa930, 0xa946}, /* 23x Rejang */
{0xa960, 0xa97c}, /* 29x Hangul Jamo Extended-A */
{0xa984, 0xa9b2}, /* 47x Javanese */
{0xa9cf, 0xa9d9}, /* 11x Javanese */
{0xaa00, 0xaa28}, /* 41x Cham */
{0xaa50, 0xaa59}, /* 10x Cham */
{0xabf0, 0xabf9}, /* 10x Meetei Mayek */
{0xac00, 0xd7a3}, /* 11172x Hangul Syllables */
{0xf900, 0xfa6d}, /* 366x CJK Compatibility Ideographs */
{0xfa70, 0xfad9}, /* 106x CJK Compatibility Ideographs */
{0xfb1f, 0xfb28}, /* 10x Alphabetic Presentation Forms */
{0xfb2a, 0xfb36}, /* 13x Alphabetic Presentation Forms */
{0xfb46, 0xfbb1}, /* 108x Alphabetic Presentation Forms */
{0xfbd3, 0xfd3d}, /* 363x Arabic Presentation Forms-A */
{0xfe76, 0xfefc}, /* 135x Arabic Presentation Forms-B */
{0xff10, 0xff19}, /* 10x Dubs */
{0xff21, 0xff3a}, /* 26x Dubs */
{0xff41, 0xff5a}, /* 26x Dubs */
{0xff66, 0xffbe}, /* 89x Dubs */
{0xffc2, 0xffc7}, /* 6x Dubs */
{0xffca, 0xffcf}, /* 6x Dubs */
{0xffd2, 0xffd7}, /* 6x Dubs */
{0xffda, 0xffdc}, /* 3x Dubs */
};
static const unsigned kAstralCodes[][2] = {
{0x10107, 0x10133}, /* 45x Aegean */
{0x10140, 0x10178}, /* 57x Ancient Greek Numbers */
{0x1018a, 0x1018b}, /* 2x Ancient Greek Numbers */
{0x10280, 0x1029c}, /* 29x Lycian */
{0x102a0, 0x102d0}, /* 49x Carian */
{0x102e1, 0x102fb}, /* 27x Coptic Epact Numbers */
{0x10300, 0x10323}, /* 36x Old Italic */
{0x1032d, 0x1034a}, /* 30x Old Italic, Gothic */
{0x10350, 0x10375}, /* 38x Old Permic */
{0x10380, 0x1039d}, /* 30x Ugaritic */
{0x103a0, 0x103c3}, /* 36x Old Persian */
{0x103c8, 0x103cf}, /* 8x Old Persian */
{0x103d1, 0x103d5}, /* 5x Old Persian */
{0x10400, 0x1049d}, /* 158x Deseret, Shavian, Osmanya */
{0x104b0, 0x104d3}, /* 36x Osage */
{0x104d8, 0x104fb}, /* 36x Osage */
{0x10500, 0x10527}, /* 40x Elbasan */
{0x10530, 0x10563}, /* 52x Caucasian Albanian */
{0x10600, 0x10736}, /* 311x Linear A */
{0x10800, 0x10805}, /* 6x Cypriot Syllabary */
{0x1080a, 0x10835}, /* 44x Cypriot Syllabary */
{0x10837, 0x10838}, /* 2x Cypriot Syllabary */
{0x1083f, 0x1089e}, /* 86x Cypriot,ImperialAramaic,Palmyrene,Nabataean */
{0x108e0, 0x108f2}, /* 19x Hatran */
{0x108f4, 0x108f5}, /* 2x Hatran */
{0x108fb, 0x1091b}, /* 33x Hatran */
{0x10920, 0x10939}, /* 26x Lydian */
{0x10980, 0x109b7}, /* 56x Meroitic Hieromarks */
{0x109bc, 0x109cf}, /* 20x Meroitic Cursive */
{0x109d2, 0x10a00}, /* 47x Meroitic Cursive */
{0x10a10, 0x10a13}, /* 4x Kharoshthi */
{0x10a15, 0x10a17}, /* 3x Kharoshthi */
{0x10a19, 0x10a35}, /* 29x Kharoshthi */
{0x10a40, 0x10a48}, /* 9x Kharoshthi */
{0x10a60, 0x10a7e}, /* 31x Old South Arabian */
{0x10a80, 0x10a9f}, /* 32x Old North Arabian */
{0x10ac0, 0x10ac7}, /* 8x Manichaean */
{0x10ac9, 0x10ae4}, /* 28x Manichaean */
{0x10aeb, 0x10aef}, /* 5x Manichaean */
{0x10b00, 0x10b35}, /* 54x Avestan */
{0x10b40, 0x10b55}, /* 22x Inscriptional Parthian */
{0x10b58, 0x10b72}, /* 27x Inscriptional Parthian and Pahlavi */
{0x10b78, 0x10b91}, /* 26x Inscriptional Pahlavi, Psalter Pahlavi */
{0x10c00, 0x10c48}, /* 73x Old Turkic */
{0x10c80, 0x10cb2}, /* 51x Old Hungarian */
{0x10cc0, 0x10cf2}, /* 51x Old Hungarian */
{0x10cfa, 0x10d23}, /* 42x Old Hungarian, Hanifi Rohingya */
{0x10d30, 0x10d39}, /* 10x Hanifi Rohingya */
{0x10e60, 0x10e7e}, /* 31x Rumi Numeral Symbols */
{0x10f00, 0x10f27}, /* 40x Old Sogdian */
{0x10f30, 0x10f45}, /* 22x Sogdian */
{0x10f51, 0x10f54}, /* 4x Sogdian */
{0x10fe0, 0x10ff6}, /* 23x Elymaic */
{0x11003, 0x11037}, /* 53x Brahmi */
{0x11052, 0x1106f}, /* 30x Brahmi */
{0x11083, 0x110af}, /* 45x Kaithi */
{0x110d0, 0x110e8}, /* 25x Sora Sompeng */
{0x110f0, 0x110f9}, /* 10x Sora Sompeng */
{0x11103, 0x11126}, /* 36x Chakma */
{0x11136, 0x1113f}, /* 10x Chakma */
{0x11144, 0x11144}, /* 1x Chakma */
{0x11150, 0x11172}, /* 35x Mahajani */
{0x11176, 0x11176}, /* 1x Mahajani */
{0x11183, 0x111b2}, /* 48x Sharada */
{0x111c1, 0x111c4}, /* 4x Sharada */
{0x111d0, 0x111da}, /* 11x Sharada */
{0x111dc, 0x111dc}, /* 1x Sharada */
{0x111e1, 0x111f4}, /* 20x Sinhala Archaic Numbers */
{0x11200, 0x11211}, /* 18x Khojki */
{0x11213, 0x1122b}, /* 25x Khojki */
{0x11280, 0x11286}, /* 7x Multani */
{0x11288, 0x11288}, /* 1x Multani */
{0x1128a, 0x1128d}, /* 4x Multani */
{0x1128f, 0x1129d}, /* 15x Multani */
{0x1129f, 0x112a8}, /* 10x Multani */
{0x112b0, 0x112de}, /* 47x Khudawadi */
{0x112f0, 0x112f9}, /* 10x Khudawadi */
{0x11305, 0x1130c}, /* 8x Grantha */
{0x1130f, 0x11310}, /* 2x Grantha */
{0x11313, 0x11328}, /* 22x Grantha */
{0x1132a, 0x11330}, /* 7x Grantha */
{0x11332, 0x11333}, /* 2x Grantha */
{0x11335, 0x11339}, /* 5x Grantha */
{0x1133d, 0x1133d}, /* 1x Grantha */
{0x11350, 0x11350}, /* 1x Grantha */
{0x1135d, 0x11361}, /* 5x Grantha */
{0x11400, 0x11434}, /* 53x Newa */
{0x11447, 0x1144a}, /* 4x Newa */
{0x11450, 0x11459}, /* 10x Newa */
{0x1145f, 0x1145f}, /* 1x Newa */
{0x11480, 0x114af}, /* 48x Tirhuta */
{0x114c4, 0x114c5}, /* 2x Tirhuta */
{0x114c7, 0x114c7}, /* 1x Tirhuta */
{0x114d0, 0x114d9}, /* 10x Tirhuta */
{0x11580, 0x115ae}, /* 47x Siddham */
{0x115d8, 0x115db}, /* 4x Siddham */
{0x11600, 0x1162f}, /* 48x Modi */
{0x11644, 0x11644}, /* 1x Modi */
{0x11650, 0x11659}, /* 10x Modi */
{0x11680, 0x116aa}, /* 43x Takri */
{0x116b8, 0x116b8}, /* 1x Takri */
{0x116c0, 0x116c9}, /* 10x Takri */
{0x11700, 0x1171a}, /* 27x Ahom */
{0x11730, 0x1173b}, /* 12x Ahom */
{0x11800, 0x1182b}, /* 44x Dogra */
{0x118a0, 0x118f2}, /* 83x Warang Citi */
{0x118ff, 0x118ff}, /* 1x Warang Citi */
{0x119a0, 0x119a7}, /* 8x Nandinagari */
{0x119aa, 0x119d0}, /* 39x Nandinagari */
{0x119e1, 0x119e1}, /* 1x Nandinagari */
{0x119e3, 0x119e3}, /* 1x Nandinagari */
{0x11a00, 0x11a00}, /* 1x Zanabazar Square */
{0x11a0b, 0x11a32}, /* 40x Zanabazar Square */
{0x11a3a, 0x11a3a}, /* 1x Zanabazar Square */
{0x11a50, 0x11a50}, /* 1x Soyombo */
{0x11a5c, 0x11a89}, /* 46x Soyombo */
{0x11a9d, 0x11a9d}, /* 1x Soyombo */
{0x11ac0, 0x11af8}, /* 57x Pau Cin Hau */
{0x11c00, 0x11c08}, /* 9x Bhaiksuki */
{0x11c0a, 0x11c2e}, /* 37x Bhaiksuki */
{0x11c40, 0x11c40}, /* 1x Bhaiksuki */
{0x11c50, 0x11c6c}, /* 29x Bhaiksuki */
{0x11c72, 0x11c8f}, /* 30x Marchen */
{0x11d00, 0x11d06}, /* 7x Masaram Gondi */
{0x11d08, 0x11d09}, /* 2x Masaram Gondi */
{0x11d0b, 0x11d30}, /* 38x Masaram Gondi */
{0x11d46, 0x11d46}, /* 1x Masaram Gondi */
{0x11d50, 0x11d59}, /* 10x Masaram Gondi */
{0x11d60, 0x11d65}, /* 6x Gunjala Gondi */
{0x11d67, 0x11d68}, /* 2x Gunjala Gondi */
{0x11d6a, 0x11d89}, /* 32x Gunjala Gondi */
{0x11d98, 0x11d98}, /* 1x Gunjala Gondi */
{0x11da0, 0x11da9}, /* 10x Gunjala Gondi */
{0x11ee0, 0x11ef2}, /* 19x Makasar */
{0x11fc0, 0x11fd4}, /* 21x Tamil Supplement */
{0x12000, 0x12399}, /* 922x Cuneiform */
{0x12400, 0x1246e}, /* 111x Cuneiform Numbers & Punctuation */
{0x12480, 0x12543}, /* 196x Early Dynastic Cuneiform */
{0x13000, 0x1342e}, /* 1071x Egyptian Hieromarks */
{0x14400, 0x14646}, /* 583x Anatolian Hieromarks */
{0x16800, 0x16a38}, /* 569x Bamum Supplement */
{0x16a40, 0x16a5e}, /* 31x Mro */
{0x16a60, 0x16a69}, /* 10x Mro */
{0x16ad0, 0x16aed}, /* 30x Bassa Vah */
{0x16b00, 0x16b2f}, /* 48x Pahawh Hmong */
{0x16b40, 0x16b43}, /* 4x Pahawh Hmong */
{0x16b50, 0x16b59}, /* 10x Pahawh Hmong */
{0x16b5b, 0x16b61}, /* 7x Pahawh Hmong */
{0x16b63, 0x16b77}, /* 21x Pahawh Hmong */
{0x16b7d, 0x16b8f}, /* 19x Pahawh Hmong */
{0x16e40, 0x16e96}, /* 87x Medefaidrin */
{0x16f00, 0x16f4a}, /* 75x Miao */
{0x16f50, 0x16f50}, /* 1x Miao */
{0x16f93, 0x16f9f}, /* 13x Miao */
{0x16fe0, 0x16fe1}, /* 2x Ideographic Symbols & Punctuation */
{0x16fe3, 0x16fe3}, /* 1x Ideographic Symbols & Punctuation */
{0x17000, 0x187f7}, /* 6136x Tangut */
{0x18800, 0x18af2}, /* 755x Tangut Components */
{0x1b000, 0x1b11e}, /* 287x Kana Supplement */
{0x1b150, 0x1b152}, /* 3x Small Kana Extension */
{0x1b164, 0x1b167}, /* 4x Small Kana Extension */
{0x1b170, 0x1b2fb}, /* 396x Nushu */
{0x1bc00, 0x1bc6a}, /* 107x Duployan */
{0x1bc70, 0x1bc7c}, /* 13x Duployan */
{0x1bc80, 0x1bc88}, /* 9x Duployan */
{0x1bc90, 0x1bc99}, /* 10x Duployan */
{0x1d2e0, 0x1d2f3}, /* 20x Mayan Numerals */
{0x1d360, 0x1d378}, /* 25x Counting Rod Numerals */
{0x1d400, 0x1d454}, /* 85x 𝐀..𝑔 Math */
{0x1d456, 0x1d49c}, /* 71x 𝑖..𝒜 Math */
{0x1d49e, 0x1d49f}, /* 2x 𝒞..𝒟 Math */
{0x1d4a2, 0x1d4a2}, /* 1x 𝒢..𝒢 Math */
{0x1d4a5, 0x1d4a6}, /* 2x 𝒥..𝒦 Math */
{0x1d4a9, 0x1d4ac}, /* 4x 𝒩..𝒬 Math */
{0x1d4ae, 0x1d4b9}, /* 12x 𝒮..𝒹 Math */
{0x1d4bb, 0x1d4bb}, /* 1x 𝒻..𝒻 Math */
{0x1d4bd, 0x1d4c3}, /* 7x 𝒽..𝓃 Math */
{0x1d4c5, 0x1d505}, /* 65x 𝓅..𝔅 Math */
{0x1d507, 0x1d50a}, /* 4x 𝔇..𝔊 Math */
{0x1d50d, 0x1d514}, /* 8x 𝔍..𝔔 Math */
{0x1d516, 0x1d51c}, /* 7x 𝔖..𝔜 Math */
{0x1d51e, 0x1d539}, /* 28x 𝔞..𝔹 Math */
{0x1d53b, 0x1d53e}, /* 4x 𝔻..𝔾 Math */
{0x1d540, 0x1d544}, /* 5x 𝕀..𝕄 Math */
{0x1d546, 0x1d546}, /* 1x 𝕆..𝕆 Math */
{0x1d54a, 0x1d550}, /* 7x 𝕊..𝕐 Math */
{0x1d552, 0x1d6a5}, /* 340x 𝕒..𝚥 Math */
{0x1d6a8, 0x1d6c0}, /* 25x 𝚨..𝛀 Math */
{0x1d6c2, 0x1d6da}, /* 25x 𝛂..𝛚 Math */
{0x1d6dc, 0x1d6fa}, /* 31x 𝛜..𝛺 Math */
{0x1d6fc, 0x1d714}, /* 25x 𝛼..𝜔 Math */
{0x1d716, 0x1d734}, /* 31x 𝜖..𝜴 Math */
{0x1d736, 0x1d74e}, /* 25x 𝜶..𝝎 Math */
{0x1d750, 0x1d76e}, /* 31x 𝝐..𝝮 Math */
{0x1d770, 0x1d788}, /* 25x 𝝰..𝞈 Math */
{0x1d78a, 0x1d7a8}, /* 31x 𝞊..𝞨 Math */
{0x1d7aa, 0x1d7c2}, /* 25x 𝞪..𝟂 Math */
{0x1d7c4, 0x1d7cb}, /* 8x 𝟄..𝟋 Math */
{0x1d7ce, 0x1d9ff}, /* 562x Math, Sutton SignWriting */
{0x1f100, 0x1f10c}, /* 13x Enclosed Alphanumeric Supplement */
{0x20000, 0x2a6d6}, /* 42711x CJK Unified Ideographs Extension B */
{0x2a700, 0x2b734}, /* 4149x CJK Unified Ideographs Extension C */
{0x2b740, 0x2b81d}, /* 222x CJK Unified Ideographs Extension D */
{0x2b820, 0x2cea1}, /* 5762x CJK Unified Ideographs Extension E */
{0x2ceb0, 0x2ebe0}, /* 7473x CJK Unified Ideographs Extension F */
{0x2f800, 0x2fa1d}, /* 542x CJK Compatibility Ideographs Supplement */
};
/**
* Returns nonzero if 𝑐 isn't alphanumeric.
*
* Line reading interfaces generally define this operation as UNICODE
* characters that aren't in the letter category (Lu, Ll, Lt, Lm, Lo)
* and aren't in the number categorie (Nd, Nl, No). We also add a few
* other things like blocks and emoji (So).
*/
int iswseparator(wint_t c) {
int m, l, r, n;
if (c < 0200) {
return !(('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') ||
('a' <= c && c <= 'z'));
}
if (c <= 0xffff) {
l = 0;
r = n = sizeof(kCodes) / sizeof(kCodes[0]);
while (l < r) {
m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2)
if (kCodes[m][1] < c) {
l = m + 1;
} else {
r = m;
}
}
return !(l < n && kCodes[l][0] <= c && c <= kCodes[l][1]);
} else {
l = 0;
r = n = sizeof(kAstralCodes) / sizeof(kAstralCodes[0]);
while (l < r) {
m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2)
if (kAstralCodes[m][1] < c) {
l = m + 1;
} else {
r = m;
}
}
return !(l < n && kAstralCodes[l][0] <= c && c <= kAstralCodes[l][1]);
}
}