unicode

const (
    MaxRune         = '\U0010FFFF' // 最大的合法unicode码值
    ReplacementChar = '\uFFFD'     // 表示不合法的unicode码值
    MaxASCII        = '\u007F'     // 最大的ASCII值
    MaxLatin1       = '\u00FF'     // 最大的Latin-1值
)

const (
    UpperCase = iota
    LowerCase
    TitleCase
    MaxCase
)

下值可用于CaseRange类型里的数组类型Delta字段，用于码值映射。

const (
    UpperLower = MaxRune + 1 // 不能是合法的delta值
)

如果一个CaseRange类型的的Delta字段使用了UpperLower，就表示该CaseRange表示的序列格式为：Upper Lower Upper Lower

const Version = "6.3.0"

Version是本包采用的unicode版本（以及Variables栏里那些乱七八糟的字符集的来源）。

Variables ¶

var (
    Cc     = _Cc // Cc is the set of Unicode characters in category Cc.
    Cf     = _Cf // Cf is the set of Unicode characters in category Cf.
    Co     = _Co // Co is the set of Unicode characters in category Co.
    Cs     = _Cs // Cs is the set of Unicode characters in category Cs.
    Digit  = _Nd // Digit is the set of Unicode characters with the "decimal digit" property.
    Nd     = _Nd // Nd is the set of Unicode characters in category Nd.
    Letter = _L  // Letter/L is the set of Unicode letters, category L.
    L      = _L
    Lm     = _Lm // Lm is the set of Unicode characters in category Lm.
    Lo     = _Lo // Lo is the set of Unicode characters in category Lo.
    Lower  = _Ll // Lower is the set of Unicode lower case letters.
    Ll     = _Ll // Ll is the set of Unicode characters in category Ll.
    Mark   = _M  // Mark/M is the set of Unicode mark characters, category M.
    M      = _M
    Mc     = _Mc // Mc is the set of Unicode characters in category Mc.
    Me     = _Me // Me is the set of Unicode characters in category Me.
    Mn     = _Mn // Mn is the set of Unicode characters in category Mn.
    Nl     = _Nl // Nl is the set of Unicode characters in category Nl.
    No     = _No // No is the set of Unicode characters in category No.
    Number = _N  // Number/N is the set of Unicode number characters, category N.
    N      = _N
    Other  = _C // Other/C is the set of Unicode control and special characters, category C.
    C      = _C
    Pc     = _Pc // Pc is the set of Unicode characters in category Pc.
    Pd     = _Pd // Pd is the set of Unicode characters in category Pd.
    Pe     = _Pe // Pe is the set of Unicode characters in category Pe.
    Pf     = _Pf // Pf is the set of Unicode characters in category Pf.
    Pi     = _Pi // Pi is the set of Unicode characters in category Pi.
    Po     = _Po // Po is the set of Unicode characters in category Po.
    Ps     = _Ps // Ps is the set of Unicode characters in category Ps.
    Punct  = _P  // Punct/P is the set of Unicode punctuation characters, category P.
    P      = _P
    Sc     = _Sc // Sc is the set of Unicode characters in category Sc.
    Sk     = _Sk // Sk is the set of Unicode characters in category Sk.
    Sm     = _Sm // Sm is the set of Unicode characters in category Sm.
    So     = _So // So is the set of Unicode characters in category So.
    Space  = _Z  // Space/Z is the set of Unicode space characters, category Z.
    Z      = _Z
    Symbol = _S // Symbol/S is the set of Unicode symbol characters, category S.
    S      = _S
    Title  = _Lt // Title is the set of Unicode title case letters.
    Lt     = _Lt // Lt is the set of Unicode characters in category Lt.
    Upper  = _Lu // Upper is the set of Unicode upper case letters.
    Lu     = _Lu // Lu is the set of Unicode characters in category Lu.
    Zl     = _Zl // Zl is the set of Unicode characters in category Zl.
    Zp     = _Zp // Zp is the set of Unicode characters in category Zp.
    Zs     = _Zs // Zs is the set of Unicode characters in category Zs.
)

这些变量的类型是*RangeTable。

var (
    Arabic                 = _Arabic                 // Arabic is the set of Unicode characters in script Arabic.
    Armenian               = _Armenian               // Armenian is the set of Unicode characters in script Armenian.
    Avestan                = _Avestan                // Avestan is the set of Unicode characters in script Avestan.
    Balinese               = _Balinese               // Balinese is the set of Unicode characters in script Balinese.
    Bamum                  = _Bamum                  // Bamum is the set of Unicode characters in script Bamum.
    Batak                  = _Batak                  // Batak is the set of Unicode characters in script Batak.
    Bengali                = _Bengali                // Bengali is the set of Unicode characters in script Bengali.
    Bopomofo               = _Bopomofo               // Bopomofo is the set of Unicode characters in script Bopomofo.
    Brahmi                 = _Brahmi                 // Brahmi is the set of Unicode characters in script Brahmi.
    Braille                = _Braille                // Braille is the set of Unicode characters in script Braille.
    Buginese               = _Buginese               // Buginese is the set of Unicode characters in script Buginese.
    Buhid                  = _Buhid                  // Buhid is the set of Unicode characters in script Buhid.
    Canadian_Aboriginal    = _Canadian_Aboriginal    // Canadian_Aboriginal is the set of Unicode characters in script Canadian_Aboriginal.
    Carian                 = _Carian                 // Carian is the set of Unicode characters in script Carian.
    Chakma                 = _Chakma                 // Chakma is the set of Unicode characters in script Chakma.
    Cham                   = _Cham                   // Cham is the set of Unicode characters in script Cham.
    Cherokee               = _Cherokee               // Cherokee is the set of Unicode characters in script Cherokee.
    Common                 = _Common                 // Common is the set of Unicode characters in script Common.
    Coptic                 = _Coptic                 // Coptic is the set of Unicode characters in script Coptic.
    Cuneiform              = _Cuneiform              // Cuneiform is the set of Unicode characters in script Cuneiform.
    Cypriot                = _Cypriot                // Cypriot is the set of Unicode characters in script Cypriot.
    Cyrillic               = _Cyrillic               // Cyrillic is the set of Unicode characters in script Cyrillic.
    Deseret                = _Deseret                // Deseret is the set of Unicode characters in script Deseret.
    Devanagari             = _Devanagari             // Devanagari is the set of Unicode characters in script Devanagari.
    Egyptian_Hieroglyphs   = _Egyptian_Hieroglyphs   // Egyptian_Hieroglyphs is the set of Unicode characters in script Egyptian_Hieroglyphs.
    Ethiopic               = _Ethiopic               // Ethiopic is the set of Unicode characters in script Ethiopic.
    Georgian               = _Georgian               // Georgian is the set of Unicode characters in script Georgian.
    Glagolitic             = _Glagolitic             // Glagolitic is the set of Unicode characters in script Glagolitic.
    Gothic                 = _Gothic                 // Gothic is the set of Unicode characters in script Gothic.
    Greek                  = _Greek                  // Greek is the set of Unicode characters in script Greek.
    Gujarati               = _Gujarati               // Gujarati is the set of Unicode characters in script Gujarati.
    Gurmukhi               = _Gurmukhi               // Gurmukhi is the set of Unicode characters in script Gurmukhi.
    Han                    = _Han                    // Han is the set of Unicode characters in script Han.
    Hangul                 = _Hangul                 // Hangul is the set of Unicode characters in script Hangul.
    Hanunoo                = _Hanunoo                // Hanunoo is the set of Unicode characters in script Hanunoo.
    Hebrew                 = _Hebrew                 // Hebrew is the set of Unicode characters in script Hebrew.
    Hiragana               = _Hiragana               // Hiragana is the set of Unicode characters in script Hiragana.
    Imperial_Aramaic       = _Imperial_Aramaic       // Imperial_Aramaic is the set of Unicode characters in script Imperial_Aramaic.
    Inherited              = _Inherited              // Inherited is the set of Unicode characters in script Inherited.
    Inscriptional_Pahlavi  = _Inscriptional_Pahlavi  // Inscriptional_Pahlavi is the set of Unicode characters in script Inscriptional_Pahlavi.
    Inscriptional_Parthian = _Inscriptional_Parthian // Inscriptional_Parthian is the set of Unicode characters in script Inscriptional_Parthian.
    Javanese               = _Javanese               // Javanese is the set of Unicode characters in script Javanese.
    Kaithi                 = _Kaithi                 // Kaithi is the set of Unicode characters in script Kaithi.
    Kannada                = _Kannada                // Kannada is the set of Unicode characters in script Kannada.
    Katakana               = _Katakana               // Katakana is the set of Unicode characters in script Katakana.
    Kayah_Li               = _Kayah_Li               // Kayah_Li is the set of Unicode characters in script Kayah_Li.
    Kharoshthi             = _Kharoshthi             // Kharoshthi is the set of Unicode characters in script Kharoshthi.
    Khmer                  = _Khmer                  // Khmer is the set of Unicode characters in script Khmer.
    Lao                    = _Lao                    // Lao is the set of Unicode characters in script Lao.
    Latin                  = _Latin                  // Latin is the set of Unicode characters in script Latin.
    Lepcha                 = _Lepcha                 // Lepcha is the set of Unicode characters in script Lepcha.
    Limbu                  = _Limbu                  // Limbu is the set of Unicode characters in script Limbu.
    Linear_B               = _Linear_B               // Linear_B is the set of Unicode characters in script Linear_B.
    Lisu                   = _Lisu                   // Lisu is the set of Unicode characters in script Lisu.
    Lycian                 = _Lycian                 // Lycian is the set of Unicode characters in script Lycian.
    Lydian                 = _Lydian                 // Lydian is the set of Unicode characters in script Lydian.
    Malayalam              = _Malayalam              // Malayalam is the set of Unicode characters in script Malayalam.
    Mandaic                = _Mandaic                // Mandaic is the set of Unicode characters in script Mandaic.
    Meetei_Mayek           = _Meetei_Mayek           // Meetei_Mayek is the set of Unicode characters in script Meetei_Mayek.
    Meroitic_Cursive       = _Meroitic_Cursive       // Meroitic_Cursive is the set of Unicode characters in script Meroitic_Cursive.
    Meroitic_Hieroglyphs   = _Meroitic_Hieroglyphs   // Meroitic_Hieroglyphs is the set of Unicode characters in script Meroitic_Hieroglyphs.
    Miao                   = _Miao                   // Miao is the set of Unicode characters in script Miao.
    Mongolian              = _Mongolian              // Mongolian is the set of Unicode characters in script Mongolian.
    Myanmar                = _Myanmar                // Myanmar is the set of Unicode characters in script Myanmar.
    New_Tai_Lue            = _New_Tai_Lue            // New_Tai_Lue is the set of Unicode characters in script New_Tai_Lue.
    Nko                    = _Nko                    // Nko is the set of Unicode characters in script Nko.
    Ogham                  = _Ogham                  // Ogham is the set of Unicode characters in script Ogham.
    Ol_Chiki               = _Ol_Chiki               // Ol_Chiki is the set of Unicode characters in script Ol_Chiki.
    Old_Italic             = _Old_Italic             // Old_Italic is the set of Unicode characters in script Old_Italic.
    Old_Persian            = _Old_Persian            // Old_Persian is the set of Unicode characters in script Old_Persian.
    Old_South_Arabian      = _Old_South_Arabian      // Old_South_Arabian is the set of Unicode characters in script Old_South_Arabian.
    Old_Turkic             = _Old_Turkic             // Old_Turkic is the set of Unicode characters in script Old_Turkic.
    Oriya                  = _Oriya                  // Oriya is the set of Unicode characters in script Oriya.
    Osmanya                = _Osmanya                // Osmanya is the set of Unicode characters in script Osmanya.
    Phags_Pa               = _Phags_Pa               // Phags_Pa is the set of Unicode characters in script Phags_Pa.
    Phoenician             = _Phoenician             // Phoenician is the set of Unicode characters in script Phoenician.
    Rejang                 = _Rejang                 // Rejang is the set of Unicode characters in script Rejang.
    Runic                  = _Runic                  // Runic is the set of Unicode characters in script Runic.
    Samaritan              = _Samaritan              // Samaritan is the set of Unicode characters in script Samaritan.
    Saurashtra             = _Saurashtra             // Saurashtra is the set of Unicode characters in script Saurashtra.
    Sharada                = _Sharada                // Sharada is the set of Unicode characters in script Sharada.
    Shavian                = _Shavian                // Shavian is the set of Unicode characters in script Shavian.
    Sinhala                = _Sinhala                // Sinhala is the set of Unicode characters in script Sinhala.
    Sora_Sompeng           = _Sora_Sompeng           // Sora_Sompeng is the set of Unicode characters in script Sora_Sompeng.
    Sundanese              = _Sundanese              // Sundanese is the set of Unicode characters in script Sundanese.
    Syloti_Nagri           = _Syloti_Nagri           // Syloti_Nagri is the set of Unicode characters in script Syloti_Nagri.
    Syriac                 = _Syriac                 // Syriac is the set of Unicode characters in script Syriac.
    Tagalog                = _Tagalog                // Tagalog is the set of Unicode characters in script Tagalog.
    Tagbanwa               = _Tagbanwa               // Tagbanwa is the set of Unicode characters in script Tagbanwa.
    Tai_Le                 = _Tai_Le                 // Tai_Le is the set of Unicode characters in script Tai_Le.
    Tai_Tham               = _Tai_Tham               // Tai_Tham is the set of Unicode characters in script Tai_Tham.
    Tai_Viet               = _Tai_Viet               // Tai_Viet is the set of Unicode characters in script Tai_Viet.
    Takri                  = _Takri                  // Takri is the set of Unicode characters in script Takri.
    Tamil                  = _Tamil                  // Tamil is the set of Unicode characters in script Tamil.
    Telugu                 = _Telugu                 // Telugu is the set of Unicode characters in script Telugu.
    Thaana                 = _Thaana                 // Thaana is the set of Unicode characters in script Thaana.
    Thai                   = _Thai                   // Thai is the set of Unicode characters in script Thai.
    Tibetan                = _Tibetan                // Tibetan is the set of Unicode characters in script Tibetan.
    Tifinagh               = _Tifinagh               // Tifinagh is the set of Unicode characters in script Tifinagh.
    Ugaritic               = _Ugaritic               // Ugaritic is the set of Unicode characters in script Ugaritic.
    Vai                    = _Vai                    // Vai is the set of Unicode characters in script Vai.
    Yi                     = _Yi                     // Yi is the set of Unicode characters in script Yi.
)

这些变量的类型也是*RangeTable。

var (
    ASCII_Hex_Digit                    = _ASCII_Hex_Digit                    // ASCII_Hex_Digit is the set of Unicode characters with property ASCII_Hex_Digit.
    Bidi_Control                       = _Bidi_Control                       // Bidi_Control is the set of Unicode characters with property Bidi_Control.
    Dash                               = _Dash                               // Dash is the set of Unicode characters with property Dash.
    Deprecated                         = _Deprecated                         // Deprecated is the set of Unicode characters with property Deprecated.
    Diacritic                          = _Diacritic                          // Diacritic is the set of Unicode characters with property Diacritic.
    Extender                           = _Extender                           // Extender is the set of Unicode characters with property Extender.
    Hex_Digit                          = _Hex_Digit                          // Hex_Digit is the set of Unicode characters with property Hex_Digit.
    Hyphen                             = _Hyphen                             // Hyphen is the set of Unicode characters with property Hyphen.
    IDS_Binary_Operator                = _IDS_Binary_Operator                // IDS_Binary_Operator is the set of Unicode characters with property IDS_Binary_Operator.
    IDS_Trinary_Operator               = _IDS_Trinary_Operator               // IDS_Trinary_Operator is the set of Unicode characters with property IDS_Trinary_Operator.
    Ideographic                        = _Ideographic                        // Ideographic is the set of Unicode characters with property Ideographic.
    Join_Control                       = _Join_Control                       // Join_Control is the set of Unicode characters with property Join_Control.
    Logical_Order_Exception            = _Logical_Order_Exception            // Logical_Order_Exception is the set of Unicode characters with property Logical_Order_Exception.
    Noncharacter_Code_Point            = _Noncharacter_Code_Point            // Noncharacter_Code_Point is the set of Unicode characters with property Noncharacter_Code_Point.
    Other_Alphabetic                   = _Other_Alphabetic                   // Other_Alphabetic is the set of Unicode characters with property Other_Alphabetic.
    Other_Default_Ignorable_Code_Point = _Other_Default_Ignorable_Code_Point // Other_Default_Ignorable_Code_Point is the set of Unicode characters with property Other_Default_Ignorable_Code_Point.
    Other_Grapheme_Extend              = _Other_Grapheme_Extend              // Other_Grapheme_Extend is the set of Unicode characters with property Other_Grapheme_Extend.
    Other_ID_Continue                  = _Other_ID_Continue                  // Other_ID_Continue is the set of Unicode characters with property Other_ID_Continue.
    Other_ID_Start                     = _Other_ID_Start                     // Other_ID_Start is the set of Unicode characters with property Other_ID_Start.
    Other_Lowercase                    = _Other_Lowercase                    // Other_Lowercase is the set of Unicode characters with property Other_Lowercase.
    Other_Math                         = _Other_Math                         // Other_Math is the set of Unicode characters with property Other_Math.
    Other_Uppercase                    = _Other_Uppercase                    // Other_Uppercase is the set of Unicode characters with property Other_Uppercase.
    Pattern_Syntax                     = _Pattern_Syntax                     // Pattern_Syntax is the set of Unicode characters with property Pattern_Syntax.
    Pattern_White_Space                = _Pattern_White_Space                // Pattern_White_Space is the set of Unicode characters with property Pattern_White_Space.
    Quotation_Mark                     = _Quotation_Mark                     // Quotation_Mark is the set of Unicode characters with property Quotation_Mark.
    Radical                            = _Radical                            // Radical is the set of Unicode characters with property Radical.
    STerm                              = _STerm                              // STerm is the set of Unicode characters with property STerm.
    Soft_Dotted                        = _Soft_Dotted                        // Soft_Dotted is the set of Unicode characters with property Soft_Dotted.
    Terminal_Punctuation               = _Terminal_Punctuation               // Terminal_Punctuation is the set of Unicode characters with property Terminal_Punctuation.
    Unified_Ideograph                  = _Unified_Ideograph                  // Unified_Ideograph is the set of Unicode characters with property Unified_Ideograph.
    Variation_Selector                 = _Variation_Selector                 // Variation_Selector is the set of Unicode characters with property Variation_Selector.
    White_Space                        = _White_Space                        // White_Space is the set of Unicode characters with property White_Space.
)

这些变量的类型还是*RangeTable。

var CaseRanges = _CaseRanges

CaseRanges是描述具有非自映射的所有字母的大小写映射的表格。

var Categories = map[string]*RangeTable{
    "C":  C,
    "Cc": Cc,
    "Cf": Cf,
    "Co": Co,
    "Cs": Cs,
    "L":  L,
    "Ll": Ll,
    "Lm": Lm,
    "Lo": Lo,
    "Lt": Lt,
    "Lu": Lu,
    "M":  M,
    "Mc": Mc,
    "Me": Me,
    "Mn": Mn,
    "N":  N,
    "Nd": Nd,
    "Nl": Nl,
    "No": No,
    "P":  P,
    "Pc": Pc,
    "Pd": Pd,
    "Pe": Pe,
    "Pf": Pf,
    "Pi": Pi,
    "Po": Po,
    "Ps": Ps,
    "S":  S,
    "Sc": Sc,
    "Sk": Sk,
    "Sm": Sm,
    "So": So,
    "Z":  Z,
    "Zl": Zl,
    "Zp": Zp,
    "Zs": Zs,
}

Categories是一组Unicode类别表.

var FoldCategory = map[string]*RangeTable{
    "Common":    foldCommon,
    "Greek":     foldGreek,
    "Inherited": foldInherited,
    "L":         foldL,
    "Ll":        foldLl,
    "Lt":        foldLt,
    "Lu":        foldLu,
    "M":         foldM,
    "Mn":        foldMn,
}

FoldCategory将类别名称映射到类别外的代码点表，这些代码点在简单大小写折叠的情况下等同于类别内的代码点。如果没有类别名称的条目，则不存在这样的点。

var FoldScript = map[string]*RangeTable{}

FoldScript将脚本名称映射到脚本外的代码点表，这些代码点在简单案例折叠到脚本内的代码点之后是等同的。如果没有条目名称的条目，则没有这样的条目。

var GraphicRanges = []*RangeTable{
    L, M, N, P, S, Zs,
}

GraphicRanges根据Unicode定义了一组图形字符。

var PrintRanges = []*RangeTable{
    L, M, N, P, S,
}

PrintRanges根据Go定义一组可打印的字符。ASCII space, U+0020, 分开处理。

var Properties = map[string]*RangeTable{
    "ASCII_Hex_Digit":                    ASCII_Hex_Digit,
    "Bidi_Control":                       Bidi_Control,
    "Dash":                               Dash,
    "Deprecated":                         Deprecated,
    "Diacritic":                          Diacritic,
    "Extender":                           Extender,
    "Hex_Digit":                          Hex_Digit,
    "Hyphen":                             Hyphen,
    "IDS_Binary_Operator":                IDS_Binary_Operator,
    "IDS_Trinary_Operator":               IDS_Trinary_Operator,
    "Ideographic":                        Ideographic,
    "Join_Control":                       Join_Control,
    "Logical_Order_Exception":            Logical_Order_Exception,
    "Noncharacter_Code_Point":            Noncharacter_Code_Point,
    "Other_Alphabetic":                   Other_Alphabetic,
    "Other_Default_Ignorable_Code_Point": Other_Default_Ignorable_Code_Point,
    "Other_Grapheme_Extend":              Other_Grapheme_Extend,
    "Other_ID_Continue":                  Other_ID_Continue,
    "Other_ID_Start":                     Other_ID_Start,
    "Other_Lowercase":                    Other_Lowercase,
    "Other_Math":                         Other_Math,
    "Other_Uppercase":                    Other_Uppercase,
    "Pattern_Syntax":                     Pattern_Syntax,
    "Pattern_White_Space":                Pattern_White_Space,
    "Quotation_Mark":                     Quotation_Mark,
    "Radical":                            Radical,
    "STerm":                              STerm,
    "Soft_Dotted":                        Soft_Dotted,
    "Terminal_Punctuation":               Terminal_Punctuation,
    "Unified_Ideograph":                  Unified_Ideograph,
    "Variation_Selector":                 Variation_Selector,
    "White_Space":                        White_Space,
}

Properties是Unicode属性表的集合。

var Scripts = map[string]*RangeTable{
    "Arabic":                 Arabic,
    "Armenian":               Armenian,
    "Avestan":                Avestan,
    "Balinese":               Balinese,
    "Bamum":                  Bamum,
    "Batak":                  Batak,
    "Bengali":                Bengali,
    "Bopomofo":               Bopomofo,
    "Brahmi":                 Brahmi,
    "Braille":                Braille,
    "Buginese":               Buginese,
    "Buhid":                  Buhid,
    "Canadian_Aboriginal":    Canadian_Aboriginal,
    "Carian":                 Carian,
    "Chakma":                 Chakma,
    "Cham":                   Cham,
    "Cherokee":               Cherokee,
    "Common":                 Common,
    "Coptic":                 Coptic,
    "Cuneiform":              Cuneiform,
    "Cypriot":                Cypriot,
    "Cyrillic":               Cyrillic,
    "Deseret":                Deseret,
    "Devanagari":             Devanagari,
    "Egyptian_Hieroglyphs":   Egyptian_Hieroglyphs,
    "Ethiopic":               Ethiopic,
    "Georgian":               Georgian,
    "Glagolitic":             Glagolitic,
    "Gothic":                 Gothic,
    "Greek":                  Greek,
    "Gujarati":               Gujarati,
    "Gurmukhi":               Gurmukhi,
    "Han":                    Han,
    "Hangul":                 Hangul,
    "Hanunoo":                Hanunoo,
    "Hebrew":                 Hebrew,
    "Hiragana":               Hiragana,
    "Imperial_Aramaic":       Imperial_Aramaic,
    "Inherited":              Inherited,
    "Inscriptional_Pahlavi":  Inscriptional_Pahlavi,
    "Inscriptional_Parthian": Inscriptional_Parthian,
    "Javanese":               Javanese,
    "Kaithi":                 Kaithi,
    "Kannada":                Kannada,
    "Katakana":               Katakana,
    "Kayah_Li":               Kayah_Li,
    "Kharoshthi":             Kharoshthi,
    "Khmer":                  Khmer,
    "Lao":                    Lao,
    "Latin":                  Latin,
    "Lepcha":                 Lepcha,
    "Limbu":                  Limbu,
    "Linear_B":               Linear_B,
    "Lisu":                   Lisu,
    "Lycian":                 Lycian,
    "Lydian":                 Lydian,
    "Malayalam":              Malayalam,
    "Mandaic":                Mandaic,
    "Meetei_Mayek":           Meetei_Mayek,
    "Meroitic_Cursive":       Meroitic_Cursive,
    "Meroitic_Hieroglyphs":   Meroitic_Hieroglyphs,
    "Miao":                   Miao,
    "Mongolian":              Mongolian,
    "Myanmar":                Myanmar,
    "New_Tai_Lue":            New_Tai_Lue,
    "Nko":                    Nko,
    "Ogham":                  Ogham,
    "Ol_Chiki":               Ol_Chiki,
    "Old_Italic":             Old_Italic,
    "Old_Persian":            Old_Persian,
    "Old_South_Arabian":      Old_South_Arabian,
    "Old_Turkic":             Old_Turkic,
    "Oriya":                  Oriya,
    "Osmanya":                Osmanya,
    "Phags_Pa":               Phags_Pa,
    "Phoenician":             Phoenician,
    "Rejang":                 Rejang,
    "Runic":                  Runic,
    "Samaritan":              Samaritan,
    "Saurashtra":             Saurashtra,
    "Sharada":                Sharada,
    "Shavian":                Shavian,
    "Sinhala":                Sinhala,
    "Sora_Sompeng":           Sora_Sompeng,
    "Sundanese":              Sundanese,
    "Syloti_Nagri":           Syloti_Nagri,
    "Syriac":                 Syriac,
    "Tagalog":                Tagalog,
    "Tagbanwa":               Tagbanwa,
    "Tai_Le":                 Tai_Le,
    "Tai_Tham":               Tai_Tham,
    "Tai_Viet":               Tai_Viet,
    "Takri":                  Takri,
    "Tamil":                  Tamil,
    "Telugu":                 Telugu,
    "Thaana":                 Thaana,
    "Thai":                   Thai,
    "Tibetan":                Tibetan,
    "Tifinagh":               Tifinagh,
    "Ugaritic":               Ugaritic,
    "Vai":                    Vai,
    "Yi":                     Yi,
}

Scripts是一组Unicode脚本表。

type CaseRange ¶

type CaseRange struct {
    Lo    uint32
    Hi    uint32
    Delta d // d为[MaxCase]rune的命名类型
}

代表简单的unicode码值的一一映射。范围为[Lo, Hi]，步长为1。

该范围内的每个值+Delta[UpperCase]表示对应的大写字母；

该范围内的每个值+Delta[LowerCase]表示对应的小写字母；

该范围内的每个值+Delta[TitleCase]表示对应的标题字母。

Delta数组里的值可为负数或零。如果Delta数组是：

{UpperLower, UpperLower, UpperLower}

表示[Lo, Hi]范围的字符序列是交替的、对应的大写字母小写字母对。否则常数UpperLower不能用Delta数组里。

type Range16 ¶

type Range16 struct {
    Lo     uint16
    Hi     uint16
    Stride uint16
}

代表一系列16位unicode码值，范围为Lo到Hi（可以是Lo/Hi），步长为Stride。

type Range32 ¶

type Range32 struct {
    Lo     uint32
    Hi     uint32
    Stride uint32
}

代表一系列32位unicode码值，范围为Lo到Hi（可以是Lo/Hi），步长为Stride；Lo和Hi必须大于等于1<<16。

type RangeTable ¶

type RangeTable struct {
    R16         []Range16
    R32         []Range32  // R32不能包含低于0x10000（即1<<16）的值
    LatinOffset int // R16字段中Hi <= MaxLatin1的成员数
}

通过列出集合中码值的范围，定义了一个unicode码值的集合。出于节省空间，范围保存在两个切片里，分别保存16位字符的范围和32位字符的范围。R16和R32必须是有序排列的，且互不重叠的。

type SpecialCase ¶

type SpecialCase []CaseRange

SpecialCase代表特定语言的字符映射，如土耳其语。本类型的方法（通过覆盖）定制了标准映射。

var AzeriCase SpecialCase = _TurkishCase

var TurkishCase SpecialCase = _TurkishCase

func (SpecialCase) ToLower ¶

func (special SpecialCase) ToLower(r rune) rune

按特定映射，返回对应的小写字母。

func (SpecialCase) ToUpper ¶

func (special SpecialCase) ToUpper(r rune) rune

按特定映射，返回对应的大写字母。

func (SpecialCase) ToTitle ¶

func (special SpecialCase) ToTitle(r rune) rune

按特定映射，返回对应的标题字母。

func Is ¶

func Is(rangeTab *RangeTable, r rune) bool

函数报告r是否在rangeTab指定的字符范围内。

func In ¶

func In(r rune, ranges ...*RangeTable) bool

函数报告r是否是给出的ranges字母集中的某个成员。

func IsOneOf ¶

func IsOneOf(ranges []*RangeTable, r rune) bool

函数报告r是否是ranges某个成员指定的字符范围内。本函数的功能类似In，应优先使用In函数。

func IsSpace ¶

func IsSpace(r rune) bool

IsSpace报告一个字符是否是空白字符。在Latin-1字符空间中，空白字符为：

'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).

其它的空白字符请参见策略Z和属性Pattern_White_Space。

Example

fmt.Println(unicode.IsSpace(' ')) // true
fmt.Println(unicode.IsSpace('\n')) // true
fmt.Println(unicode.IsSpace('\t')) // true

func IsDigit ¶

func IsDigit(r rune) bool

IsDigit报告一个r字符是否是十进制数字字符。

Example

fmt.Println(unicode.IsDigit('0')) // true
fmt.Println(unicode.IsDigit('9')) // true
fmt.Println(unicode.IsDigit('A')) // false
fmt.Println(unicode.IsDigit('f')) // false

func IsNumber ¶

func IsNumber(r rune) bool

IsNumber报告一个字符是否是数字字符，参见策略N。

Example

fmt.Println(unicode.IsNumber('0')) // true
fmt.Println(unicode.IsNumber('9')) // true
fmt.Println(unicode.IsNumber('A')) // false
fmt.Println(unicode.IsNumber('f')) // false

func IsLetter ¶

func IsLetter(r rune) bool

IsLetter报告一个字符是否是字母，参见策略L。

Example

fmt.Println(unicode.IsLetter('0')) // false
fmt.Println(unicode.IsLetter('9')) // false
fmt.Println(unicode.IsLetter('A')) // true
fmt.Println(unicode.IsLetter('z')) // true
fmt.Println(unicode.IsLetter('😘')) // false

func IsGraphic ¶

func IsGraphic(r rune) bool

报告一个字符是否是unicode图形。包括字母、标记、数字、符号、标点、空白，参见L、M、N、P、S、Zs。

Example

fmt.Println(unicode.IsGraphic('0')) // true
fmt.Println(unicode.IsGraphic('9')) // true
fmt.Println(unicode.IsGraphic('A')) // true
fmt.Println(unicode.IsGraphic('z')) // true
fmt.Println(unicode.IsGraphic('😘')) // true
fmt.Println(unicode.IsGraphic('尶')) // true

func IsMark ¶

func IsMark(r rune) bool

IsMark报告一个字符是否是标记字符，参见策略M。

func IsPrint ¶

func IsPrint(r rune) bool

IsPrint一个字符是否是go的可打印字符。本函数基本和IsGraphic一致，只是ASCII空白字符U+0020会返回假。

func IsControl ¶

func IsControl(r rune) bool

IsControl报告一个字符是否是控制字符，主要是策略C的字符和一些其他的字符如代理字符。

func IsPunct ¶

func IsPunct(r rune) bool

IsPunct报告一个字符是否是unicode标点字符，参见策略P。

func IsSymbol ¶

func IsSymbol(r rune) bool

IsPunct报告一个字符是否是unicode符号字符。

func IsLower ¶

func IsLower(r rune) bool

返回字符是否是小写字母。

func IsUpper ¶

func IsUpper(r rune) bool

返回字符是否是大写字母。

func IsTitle ¶

func IsTitle(r rune) bool

返回字符是否是标题字母。

func To ¶

func To(_case int, r rune) rune

返回_case指定的对应类型的字母：UpperCase、LowerCase、TitleCase。

func ToLower ¶

func ToLower(r rune) rune

返回对应的小写字母。

func ToUpper ¶

func ToUpper(r rune) rune

返回对应的大写字母。

func ToTitle ¶

func ToTitle(r rune) rune

返回对应的标题字母。

func SimpleFold ¶

func SimpleFold(r rune) rune

SimpleFold函数迭代在unicode标准字符映射中互相对应的unicode码值。在与r对应的码值中（包括r自身），会返回最小的那个大于r的字符（如果有）；否则返回映射中最小的字符。

举例：

SimpleFold('A') = 'a'
SimpleFold('a') = 'A'
SimpleFold('K') = 'k'
SimpleFold('k') = '\u212A' (Kelvin symbol, K)
SimpleFold('\u212A') = 'K'
SimpleFold('1') = '1'

Bugs ¶

☞ 没有全字符（包含多个rune的字符）折叠的机制。¶