0001: /*
0002: * Copyright 2002-2006 Sun Microsystems, Inc. All Rights Reserved.
0003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0004: *
0005: * This code is free software; you can redistribute it and/or modify it
0006: * under the terms of the GNU General Public License version 2 only, as
0007: * published by the Free Software Foundation. Sun designates this
0008: * particular file as subject to the "Classpath" exception as provided
0009: * by Sun in the LICENSE file that accompanied this code.
0010: *
0011: * This code is distributed in the hope that it will be useful, but WITHOUT
0012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0014: * version 2 for more details (a copy is included in the LICENSE file that
0015: * accompanied this code).
0016: *
0017: * You should have received a copy of the GNU General Public License version
0018: * 2 along with this work; if not, write to the Free Software Foundation,
0019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0020: *
0021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
0022: * CA 95054 USA or visit www.sun.com if you need additional information or
0023: * have any questions.
0024: */
0025:
0026: package java.lang;
0027:
0028: import java.util.Map;
0029: import java.util.HashMap;
0030: import java.util.Locale;
0031:
0032: /**
0033: * The <code>Character</code> class wraps a value of the primitive
0034: * type <code>char</code> in an object. An object of type
0035: * <code>Character</code> contains a single field whose type is
0036: * <code>char</code>.
0037: * <p>
0038: * In addition, this class provides several methods for determining
0039: * a character's category (lowercase letter, digit, etc.) and for converting
0040: * characters from uppercase to lowercase and vice versa.
0041: * <p>
0042: * Character information is based on the Unicode Standard, version 4.0.
0043: * <p>
0044: * The methods and data of class <code>Character</code> are defined by
0045: * the information in the <i>UnicodeData</i> file that is part of the
0046: * Unicode Character Database maintained by the Unicode
0047: * Consortium. This file specifies various properties including name
0048: * and general category for every defined Unicode code point or
0049: * character range.
0050: * <p>
0051: * The file and its description are available from the Unicode Consortium at:
0052: * <ul>
0053: * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
0054: * </ul>
0055: *
0056: * <h4><a name="unicode">Unicode Character Representations</a></h4>
0057: *
0058: * <p>The <code>char</code> data type (and therefore the value that a
0059: * <code>Character</code> object encapsulates) are based on the
0060: * original Unicode specification, which defined characters as
0061: * fixed-width 16-bit entities. The Unicode standard has since been
0062: * changed to allow for characters whose representation requires more
0063: * than 16 bits. The range of legal <em>code point</em>s is now
0064: * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
0065: * (Refer to the <a
0066: * href="http://www.unicode.org/reports/tr27/#notation"><i>
0067: * definition</i></a> of the U+<i>n</i> notation in the Unicode
0068: * standard.)
0069: *
0070: * <p>The set of characters from U+0000 to U+FFFF is sometimes
0071: * referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
0072: * name="supplementary">Characters</a> whose code points are greater
0073: * than U+FFFF are called <em>supplementary character</em>s. The Java
0074: * 2 platform uses the UTF-16 representation in <code>char</code>
0075: * arrays and in the <code>String</code> and <code>StringBuffer</code>
0076: * classes. In this representation, supplementary characters are
0077: * represented as a pair of <code>char</code> values, the first from
0078: * the <em>high-surrogates</em> range, (\uD800-\uDBFF), the
0079: * second from the <em>low-surrogates</em> range
0080: * (\uDC00-\uDFFF).
0081: *
0082: * <p>A <code>char</code> value, therefore, represents Basic
0083: * Multilingual Plane (BMP) code points, including the surrogate
0084: * code points, or code units of the UTF-16 encoding. An
0085: * <code>int</code> value represents all Unicode code points,
0086: * including supplementary code points. The lower (least significant)
0087: * 21 bits of <code>int</code> are used to represent Unicode code
0088: * points and the upper (most significant) 11 bits must be zero.
0089: * Unless otherwise specified, the behavior with respect to
0090: * supplementary characters and surrogate <code>char</code> values is
0091: * as follows:
0092: *
0093: * <ul>
0094: * <li>The methods that only accept a <code>char</code> value cannot support
0095: * supplementary characters. They treat <code>char</code> values from the
0096: * surrogate ranges as undefined characters. For example,
0097: * <code>Character.isLetter('\uD840')</code> returns <code>false</code>, even though
0098: * this specific value if followed by any low-surrogate value in a string
0099: * would represent a letter.
0100: *
0101: * <li>The methods that accept an <code>int</code> value support all
0102: * Unicode characters, including supplementary characters. For
0103: * example, <code>Character.isLetter(0x2F81A)</code> returns
0104: * <code>true</code> because the code point value represents a letter
0105: * (a CJK ideograph).
0106: * </ul>
0107: *
0108: * <p>In the Java SE API documentation, <em>Unicode code point</em> is
0109: * used for character values in the range between U+0000 and U+10FFFF,
0110: * and <em>Unicode code unit</em> is used for 16-bit
0111: * <code>char</code> values that are code units of the <em>UTF-16</em>
0112: * encoding. For more information on Unicode terminology, refer to the
0113: * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
0114: *
0115: * @author Lee Boynton
0116: * @author Guy Steele
0117: * @author Akira Tanaka
0118: * @since 1.0
0119: */
0120: public final class Character extends Object implements
0121: java.io.Serializable, Comparable<Character> {
0122: /**
0123: * The minimum radix available for conversion to and from strings.
0124: * The constant value of this field is the smallest value permitted
0125: * for the radix argument in radix-conversion methods such as the
0126: * <code>digit</code> method, the <code>forDigit</code>
0127: * method, and the <code>toString</code> method of class
0128: * <code>Integer</code>.
0129: *
0130: * @see java.lang.Character#digit(char, int)
0131: * @see java.lang.Character#forDigit(int, int)
0132: * @see java.lang.Integer#toString(int, int)
0133: * @see java.lang.Integer#valueOf(java.lang.String)
0134: */
0135: public static final int MIN_RADIX = 2;
0136:
0137: /**
0138: * The maximum radix available for conversion to and from strings.
0139: * The constant value of this field is the largest value permitted
0140: * for the radix argument in radix-conversion methods such as the
0141: * <code>digit</code> method, the <code>forDigit</code>
0142: * method, and the <code>toString</code> method of class
0143: * <code>Integer</code>.
0144: *
0145: * @see java.lang.Character#digit(char, int)
0146: * @see java.lang.Character#forDigit(int, int)
0147: * @see java.lang.Integer#toString(int, int)
0148: * @see java.lang.Integer#valueOf(java.lang.String)
0149: */
0150: public static final int MAX_RADIX = 36;
0151:
0152: /**
0153: * The constant value of this field is the smallest value of type
0154: * <code>char</code>, <code>'\u0000'</code>.
0155: *
0156: * @since 1.0.2
0157: */
0158: public static final char MIN_VALUE = '\u0000';
0159:
0160: /**
0161: * The constant value of this field is the largest value of type
0162: * <code>char</code>, <code>'\uFFFF'</code>.
0163: *
0164: * @since 1.0.2
0165: */
0166: public static final char MAX_VALUE = '\uffff';
0167:
0168: /**
0169: * The <code>Class</code> instance representing the primitive type
0170: * <code>char</code>.
0171: *
0172: * @since 1.1
0173: */
0174: public static final Class<Character> TYPE = Class
0175: .getPrimitiveClass("char");
0176:
0177: /*
0178: * Normative general types
0179: */
0180:
0181: /*
0182: * General character types
0183: */
0184:
0185: /**
0186: * General category "Cn" in the Unicode specification.
0187: * @since 1.1
0188: */
0189: public static final byte UNASSIGNED = 0;
0190:
0191: /**
0192: * General category "Lu" in the Unicode specification.
0193: * @since 1.1
0194: */
0195: public static final byte UPPERCASE_LETTER = 1;
0196:
0197: /**
0198: * General category "Ll" in the Unicode specification.
0199: * @since 1.1
0200: */
0201: public static final byte LOWERCASE_LETTER = 2;
0202:
0203: /**
0204: * General category "Lt" in the Unicode specification.
0205: * @since 1.1
0206: */
0207: public static final byte TITLECASE_LETTER = 3;
0208:
0209: /**
0210: * General category "Lm" in the Unicode specification.
0211: * @since 1.1
0212: */
0213: public static final byte MODIFIER_LETTER = 4;
0214:
0215: /**
0216: * General category "Lo" in the Unicode specification.
0217: * @since 1.1
0218: */
0219: public static final byte OTHER_LETTER = 5;
0220:
0221: /**
0222: * General category "Mn" in the Unicode specification.
0223: * @since 1.1
0224: */
0225: public static final byte NON_SPACING_MARK = 6;
0226:
0227: /**
0228: * General category "Me" in the Unicode specification.
0229: * @since 1.1
0230: */
0231: public static final byte ENCLOSING_MARK = 7;
0232:
0233: /**
0234: * General category "Mc" in the Unicode specification.
0235: * @since 1.1
0236: */
0237: public static final byte COMBINING_SPACING_MARK = 8;
0238:
0239: /**
0240: * General category "Nd" in the Unicode specification.
0241: * @since 1.1
0242: */
0243: public static final byte DECIMAL_DIGIT_NUMBER = 9;
0244:
0245: /**
0246: * General category "Nl" in the Unicode specification.
0247: * @since 1.1
0248: */
0249: public static final byte LETTER_NUMBER = 10;
0250:
0251: /**
0252: * General category "No" in the Unicode specification.
0253: * @since 1.1
0254: */
0255: public static final byte OTHER_NUMBER = 11;
0256:
0257: /**
0258: * General category "Zs" in the Unicode specification.
0259: * @since 1.1
0260: */
0261: public static final byte SPACE_SEPARATOR = 12;
0262:
0263: /**
0264: * General category "Zl" in the Unicode specification.
0265: * @since 1.1
0266: */
0267: public static final byte LINE_SEPARATOR = 13;
0268:
0269: /**
0270: * General category "Zp" in the Unicode specification.
0271: * @since 1.1
0272: */
0273: public static final byte PARAGRAPH_SEPARATOR = 14;
0274:
0275: /**
0276: * General category "Cc" in the Unicode specification.
0277: * @since 1.1
0278: */
0279: public static final byte CONTROL = 15;
0280:
0281: /**
0282: * General category "Cf" in the Unicode specification.
0283: * @since 1.1
0284: */
0285: public static final byte FORMAT = 16;
0286:
0287: /**
0288: * General category "Co" in the Unicode specification.
0289: * @since 1.1
0290: */
0291: public static final byte PRIVATE_USE = 18;
0292:
0293: /**
0294: * General category "Cs" in the Unicode specification.
0295: * @since 1.1
0296: */
0297: public static final byte SURROGATE = 19;
0298:
0299: /**
0300: * General category "Pd" in the Unicode specification.
0301: * @since 1.1
0302: */
0303: public static final byte DASH_PUNCTUATION = 20;
0304:
0305: /**
0306: * General category "Ps" in the Unicode specification.
0307: * @since 1.1
0308: */
0309: public static final byte START_PUNCTUATION = 21;
0310:
0311: /**
0312: * General category "Pe" in the Unicode specification.
0313: * @since 1.1
0314: */
0315: public static final byte END_PUNCTUATION = 22;
0316:
0317: /**
0318: * General category "Pc" in the Unicode specification.
0319: * @since 1.1
0320: */
0321: public static final byte CONNECTOR_PUNCTUATION = 23;
0322:
0323: /**
0324: * General category "Po" in the Unicode specification.
0325: * @since 1.1
0326: */
0327: public static final byte OTHER_PUNCTUATION = 24;
0328:
0329: /**
0330: * General category "Sm" in the Unicode specification.
0331: * @since 1.1
0332: */
0333: public static final byte MATH_SYMBOL = 25;
0334:
0335: /**
0336: * General category "Sc" in the Unicode specification.
0337: * @since 1.1
0338: */
0339: public static final byte CURRENCY_SYMBOL = 26;
0340:
0341: /**
0342: * General category "Sk" in the Unicode specification.
0343: * @since 1.1
0344: */
0345: public static final byte MODIFIER_SYMBOL = 27;
0346:
0347: /**
0348: * General category "So" in the Unicode specification.
0349: * @since 1.1
0350: */
0351: public static final byte OTHER_SYMBOL = 28;
0352:
0353: /**
0354: * General category "Pi" in the Unicode specification.
0355: * @since 1.4
0356: */
0357: public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
0358:
0359: /**
0360: * General category "Pf" in the Unicode specification.
0361: * @since 1.4
0362: */
0363: public static final byte FINAL_QUOTE_PUNCTUATION = 30;
0364:
0365: /**
0366: * Error flag. Use int (code point) to avoid confusion with U+FFFF.
0367: */
0368: static final int ERROR = 0xFFFFFFFF;
0369:
0370: /**
0371: * Undefined bidirectional character type. Undefined <code>char</code>
0372: * values have undefined directionality in the Unicode specification.
0373: * @since 1.4
0374: */
0375: public static final byte DIRECTIONALITY_UNDEFINED = -1;
0376:
0377: /**
0378: * Strong bidirectional character type "L" in the Unicode specification.
0379: * @since 1.4
0380: */
0381: public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
0382:
0383: /**
0384: * Strong bidirectional character type "R" in the Unicode specification.
0385: * @since 1.4
0386: */
0387: public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
0388:
0389: /**
0390: * Strong bidirectional character type "AL" in the Unicode specification.
0391: * @since 1.4
0392: */
0393: public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
0394:
0395: /**
0396: * Weak bidirectional character type "EN" in the Unicode specification.
0397: * @since 1.4
0398: */
0399: public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
0400:
0401: /**
0402: * Weak bidirectional character type "ES" in the Unicode specification.
0403: * @since 1.4
0404: */
0405: public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
0406:
0407: /**
0408: * Weak bidirectional character type "ET" in the Unicode specification.
0409: * @since 1.4
0410: */
0411: public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
0412:
0413: /**
0414: * Weak bidirectional character type "AN" in the Unicode specification.
0415: * @since 1.4
0416: */
0417: public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
0418:
0419: /**
0420: * Weak bidirectional character type "CS" in the Unicode specification.
0421: * @since 1.4
0422: */
0423: public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
0424:
0425: /**
0426: * Weak bidirectional character type "NSM" in the Unicode specification.
0427: * @since 1.4
0428: */
0429: public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
0430:
0431: /**
0432: * Weak bidirectional character type "BN" in the Unicode specification.
0433: * @since 1.4
0434: */
0435: public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
0436:
0437: /**
0438: * Neutral bidirectional character type "B" in the Unicode specification.
0439: * @since 1.4
0440: */
0441: public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
0442:
0443: /**
0444: * Neutral bidirectional character type "S" in the Unicode specification.
0445: * @since 1.4
0446: */
0447: public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
0448:
0449: /**
0450: * Neutral bidirectional character type "WS" in the Unicode specification.
0451: * @since 1.4
0452: */
0453: public static final byte DIRECTIONALITY_WHITESPACE = 12;
0454:
0455: /**
0456: * Neutral bidirectional character type "ON" in the Unicode specification.
0457: * @since 1.4
0458: */
0459: public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
0460:
0461: /**
0462: * Strong bidirectional character type "LRE" in the Unicode specification.
0463: * @since 1.4
0464: */
0465: public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
0466:
0467: /**
0468: * Strong bidirectional character type "LRO" in the Unicode specification.
0469: * @since 1.4
0470: */
0471: public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
0472:
0473: /**
0474: * Strong bidirectional character type "RLE" in the Unicode specification.
0475: * @since 1.4
0476: */
0477: public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
0478:
0479: /**
0480: * Strong bidirectional character type "RLO" in the Unicode specification.
0481: * @since 1.4
0482: */
0483: public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
0484:
0485: /**
0486: * Weak bidirectional character type "PDF" in the Unicode specification.
0487: * @since 1.4
0488: */
0489: public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
0490:
0491: /**
0492: * The minimum value of a Unicode high-surrogate code unit in the
0493: * UTF-16 encoding. A high-surrogate is also known as a
0494: * <i>leading-surrogate</i>.
0495: *
0496: * @since 1.5
0497: */
0498: public static final char MIN_HIGH_SURROGATE = '\uD800';
0499:
0500: /**
0501: * The maximum value of a Unicode high-surrogate code unit in the
0502: * UTF-16 encoding. A high-surrogate is also known as a
0503: * <i>leading-surrogate</i>.
0504: *
0505: * @since 1.5
0506: */
0507: public static final char MAX_HIGH_SURROGATE = '\uDBFF';
0508:
0509: /**
0510: * The minimum value of a Unicode low-surrogate code unit in the
0511: * UTF-16 encoding. A low-surrogate is also known as a
0512: * <i>trailing-surrogate</i>.
0513: *
0514: * @since 1.5
0515: */
0516: public static final char MIN_LOW_SURROGATE = '\uDC00';
0517:
0518: /**
0519: * The maximum value of a Unicode low-surrogate code unit in the
0520: * UTF-16 encoding. A low-surrogate is also known as a
0521: * <i>trailing-surrogate</i>.
0522: *
0523: * @since 1.5
0524: */
0525: public static final char MAX_LOW_SURROGATE = '\uDFFF';
0526:
0527: /**
0528: * The minimum value of a Unicode surrogate code unit in the UTF-16 encoding.
0529: *
0530: * @since 1.5
0531: */
0532: public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
0533:
0534: /**
0535: * The maximum value of a Unicode surrogate code unit in the UTF-16 encoding.
0536: *
0537: * @since 1.5
0538: */
0539: public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
0540:
0541: /**
0542: * The minimum value of a supplementary code point.
0543: *
0544: * @since 1.5
0545: */
0546: public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
0547:
0548: /**
0549: * The minimum value of a Unicode code point.
0550: *
0551: * @since 1.5
0552: */
0553: public static final int MIN_CODE_POINT = 0x000000;
0554:
0555: /**
0556: * The maximum value of a Unicode code point.
0557: *
0558: * @since 1.5
0559: */
0560: public static final int MAX_CODE_POINT = 0x10ffff;
0561:
0562: /**
0563: * Instances of this class represent particular subsets of the Unicode
0564: * character set. The only family of subsets defined in the
0565: * <code>Character</code> class is <code>{@link Character.UnicodeBlock
0566: * UnicodeBlock}</code>. Other portions of the Java API may define other
0567: * subsets for their own purposes.
0568: *
0569: * @since 1.2
0570: */
0571: public static class Subset {
0572:
0573: private String name;
0574:
0575: /**
0576: * Constructs a new <code>Subset</code> instance.
0577: *
0578: * @exception NullPointerException if name is <code>null</code>
0579: * @param name The name of this subset
0580: */
0581: protected Subset(String name) {
0582: if (name == null) {
0583: throw new NullPointerException("name");
0584: }
0585: this .name = name;
0586: }
0587:
0588: /**
0589: * Compares two <code>Subset</code> objects for equality.
0590: * This method returns <code>true</code> if and only if
0591: * <code>this</code> and the argument refer to the same
0592: * object; since this method is <code>final</code>, this
0593: * guarantee holds for all subclasses.
0594: */
0595: public final boolean equals(Object obj) {
0596: return (this == obj);
0597: }
0598:
0599: /**
0600: * Returns the standard hash code as defined by the
0601: * <code>{@link Object#hashCode}</code> method. This method
0602: * is <code>final</code> in order to ensure that the
0603: * <code>equals</code> and <code>hashCode</code> methods will
0604: * be consistent in all subclasses.
0605: */
0606: public final int hashCode() {
0607: return super .hashCode();
0608: }
0609:
0610: /**
0611: * Returns the name of this subset.
0612: */
0613: public final String toString() {
0614: return name;
0615: }
0616: }
0617:
0618: /**
0619: * A family of character subsets representing the character blocks in the
0620: * Unicode specification. Character blocks generally define characters
0621: * used for a specific script or purpose. A character is contained by
0622: * at most one Unicode block.
0623: *
0624: * @since 1.2
0625: */
0626: public static final class UnicodeBlock extends Subset {
0627:
0628: private static Map map = new HashMap();
0629:
0630: /**
0631: * Create a UnicodeBlock with the given identifier name.
0632: * This name must be the same as the block identifier.
0633: */
0634: private UnicodeBlock(String idName) {
0635: super (idName);
0636: map.put(idName.toUpperCase(Locale.US), this );
0637: }
0638:
0639: /**
0640: * Create a UnicodeBlock with the given identifier name and
0641: * alias name.
0642: */
0643: private UnicodeBlock(String idName, String alias) {
0644: this (idName);
0645: map.put(alias.toUpperCase(Locale.US), this );
0646: }
0647:
0648: /**
0649: * Create a UnicodeBlock with the given identifier name and
0650: * alias names.
0651: */
0652: private UnicodeBlock(String idName, String[] aliasName) {
0653: this (idName);
0654: if (aliasName != null) {
0655: for (int x = 0; x < aliasName.length; ++x) {
0656: map.put(aliasName[x].toUpperCase(Locale.US), this );
0657: }
0658: }
0659: }
0660:
0661: /**
0662: * Constant for the "Basic Latin" Unicode character block.
0663: * @since 1.2
0664: */
0665: public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock(
0666: "BASIC_LATIN", new String[] { "Basic Latin",
0667: "BasicLatin" });
0668:
0669: /**
0670: * Constant for the "Latin-1 Supplement" Unicode character block.
0671: * @since 1.2
0672: */
0673: public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock(
0674: "LATIN_1_SUPPLEMENT", new String[] {
0675: "Latin-1 Supplement", "Latin-1Supplement" });
0676:
0677: /**
0678: * Constant for the "Latin Extended-A" Unicode character block.
0679: * @since 1.2
0680: */
0681: public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock(
0682: "LATIN_EXTENDED_A", new String[] { "Latin Extended-A",
0683: "LatinExtended-A" });
0684:
0685: /**
0686: * Constant for the "Latin Extended-B" Unicode character block.
0687: * @since 1.2
0688: */
0689: public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock(
0690: "LATIN_EXTENDED_B", new String[] { "Latin Extended-B",
0691: "LatinExtended-B" });
0692:
0693: /**
0694: * Constant for the "IPA Extensions" Unicode character block.
0695: * @since 1.2
0696: */
0697: public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock(
0698: "IPA_EXTENSIONS", new String[] { "IPA Extensions",
0699: "IPAExtensions" });
0700:
0701: /**
0702: * Constant for the "Spacing Modifier Letters" Unicode character block.
0703: * @since 1.2
0704: */
0705: public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock(
0706: "SPACING_MODIFIER_LETTERS", new String[] {
0707: "Spacing Modifier Letters",
0708: "SpacingModifierLetters" });
0709:
0710: /**
0711: * Constant for the "Combining Diacritical Marks" Unicode character block.
0712: * @since 1.2
0713: */
0714: public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock(
0715: "COMBINING_DIACRITICAL_MARKS", new String[] {
0716: "Combining Diacritical Marks",
0717: "CombiningDiacriticalMarks" });
0718:
0719: /**
0720: * Constant for the "Greek and Coptic" Unicode character block.
0721: * <p>
0722: * This block was previously known as the "Greek" block.
0723: *
0724: * @since 1.2
0725: */
0726: public static final UnicodeBlock GREEK = new UnicodeBlock(
0727: "GREEK", new String[] { "Greek and Coptic",
0728: "GreekandCoptic" });
0729:
0730: /**
0731: * Constant for the "Cyrillic" Unicode character block.
0732: * @since 1.2
0733: */
0734: public static final UnicodeBlock CYRILLIC = new UnicodeBlock(
0735: "CYRILLIC");
0736:
0737: /**
0738: * Constant for the "Armenian" Unicode character block.
0739: * @since 1.2
0740: */
0741: public static final UnicodeBlock ARMENIAN = new UnicodeBlock(
0742: "ARMENIAN");
0743:
0744: /**
0745: * Constant for the "Hebrew" Unicode character block.
0746: * @since 1.2
0747: */
0748: public static final UnicodeBlock HEBREW = new UnicodeBlock(
0749: "HEBREW");
0750:
0751: /**
0752: * Constant for the "Arabic" Unicode character block.
0753: * @since 1.2
0754: */
0755: public static final UnicodeBlock ARABIC = new UnicodeBlock(
0756: "ARABIC");
0757:
0758: /**
0759: * Constant for the "Devanagari" Unicode character block.
0760: * @since 1.2
0761: */
0762: public static final UnicodeBlock DEVANAGARI = new UnicodeBlock(
0763: "DEVANAGARI");
0764:
0765: /**
0766: * Constant for the "Bengali" Unicode character block.
0767: * @since 1.2
0768: */
0769: public static final UnicodeBlock BENGALI = new UnicodeBlock(
0770: "BENGALI");
0771:
0772: /**
0773: * Constant for the "Gurmukhi" Unicode character block.
0774: * @since 1.2
0775: */
0776: public static final UnicodeBlock GURMUKHI = new UnicodeBlock(
0777: "GURMUKHI");
0778:
0779: /**
0780: * Constant for the "Gujarati" Unicode character block.
0781: * @since 1.2
0782: */
0783: public static final UnicodeBlock GUJARATI = new UnicodeBlock(
0784: "GUJARATI");
0785:
0786: /**
0787: * Constant for the "Oriya" Unicode character block.
0788: * @since 1.2
0789: */
0790: public static final UnicodeBlock ORIYA = new UnicodeBlock(
0791: "ORIYA");
0792:
0793: /**
0794: * Constant for the "Tamil" Unicode character block.
0795: * @since 1.2
0796: */
0797: public static final UnicodeBlock TAMIL = new UnicodeBlock(
0798: "TAMIL");
0799:
0800: /**
0801: * Constant for the "Telugu" Unicode character block.
0802: * @since 1.2
0803: */
0804: public static final UnicodeBlock TELUGU = new UnicodeBlock(
0805: "TELUGU");
0806:
0807: /**
0808: * Constant for the "Kannada" Unicode character block.
0809: * @since 1.2
0810: */
0811: public static final UnicodeBlock KANNADA = new UnicodeBlock(
0812: "KANNADA");
0813:
0814: /**
0815: * Constant for the "Malayalam" Unicode character block.
0816: * @since 1.2
0817: */
0818: public static final UnicodeBlock MALAYALAM = new UnicodeBlock(
0819: "MALAYALAM");
0820:
0821: /**
0822: * Constant for the "Thai" Unicode character block.
0823: * @since 1.2
0824: */
0825: public static final UnicodeBlock THAI = new UnicodeBlock("THAI");
0826:
0827: /**
0828: * Constant for the "Lao" Unicode character block.
0829: * @since 1.2
0830: */
0831: public static final UnicodeBlock LAO = new UnicodeBlock("LAO");
0832:
0833: /**
0834: * Constant for the "Tibetan" Unicode character block.
0835: * @since 1.2
0836: */
0837: public static final UnicodeBlock TIBETAN = new UnicodeBlock(
0838: "TIBETAN");
0839:
0840: /**
0841: * Constant for the "Georgian" Unicode character block.
0842: * @since 1.2
0843: */
0844: public static final UnicodeBlock GEORGIAN = new UnicodeBlock(
0845: "GEORGIAN");
0846:
0847: /**
0848: * Constant for the "Hangul Jamo" Unicode character block.
0849: * @since 1.2
0850: */
0851: public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock(
0852: "HANGUL_JAMO", new String[] { "Hangul Jamo",
0853: "HangulJamo" });
0854:
0855: /**
0856: * Constant for the "Latin Extended Additional" Unicode character block.
0857: * @since 1.2
0858: */
0859: public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock(
0860: "LATIN_EXTENDED_ADDITIONAL", new String[] {
0861: "Latin Extended Additional",
0862: "LatinExtendedAdditional" });
0863:
0864: /**
0865: * Constant for the "Greek Extended" Unicode character block.
0866: * @since 1.2
0867: */
0868: public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock(
0869: "GREEK_EXTENDED", new String[] { "Greek Extended",
0870: "GreekExtended" });
0871:
0872: /**
0873: * Constant for the "General Punctuation" Unicode character block.
0874: * @since 1.2
0875: */
0876: public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock(
0877: "GENERAL_PUNCTUATION", new String[] {
0878: "General Punctuation", "GeneralPunctuation" });
0879:
0880: /**
0881: * Constant for the "Superscripts and Subscripts" Unicode character block.
0882: * @since 1.2
0883: */
0884: public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock(
0885: "SUPERSCRIPTS_AND_SUBSCRIPTS", new String[] {
0886: "Superscripts and Subscripts",
0887: "SuperscriptsandSubscripts" });
0888:
0889: /**
0890: * Constant for the "Currency Symbols" Unicode character block.
0891: * @since 1.2
0892: */
0893: public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock(
0894: "CURRENCY_SYMBOLS", new String[] { "Currency Symbols",
0895: "CurrencySymbols" });
0896:
0897: /**
0898: * Constant for the "Combining Diacritical Marks for Symbols" Unicode character block.
0899: * <p>
0900: * This block was previously known as "Combining Marks for Symbols".
0901: * @since 1.2
0902: */
0903: public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock(
0904: "COMBINING_MARKS_FOR_SYMBOLS", new String[] {
0905: "Combining Diacritical Marks for Symbols",
0906: "CombiningDiacriticalMarksforSymbols",
0907: "Combining Marks for Symbols",
0908: "CombiningMarksforSymbols" });
0909:
0910: /**
0911: * Constant for the "Letterlike Symbols" Unicode character block.
0912: * @since 1.2
0913: */
0914: public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock(
0915: "LETTERLIKE_SYMBOLS", new String[] {
0916: "Letterlike Symbols", "LetterlikeSymbols" });
0917:
0918: /**
0919: * Constant for the "Number Forms" Unicode character block.
0920: * @since 1.2
0921: */
0922: public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock(
0923: "NUMBER_FORMS", new String[] { "Number Forms",
0924: "NumberForms" });
0925:
0926: /**
0927: * Constant for the "Arrows" Unicode character block.
0928: * @since 1.2
0929: */
0930: public static final UnicodeBlock ARROWS = new UnicodeBlock(
0931: "ARROWS");
0932:
0933: /**
0934: * Constant for the "Mathematical Operators" Unicode character block.
0935: * @since 1.2
0936: */
0937: public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock(
0938: "MATHEMATICAL_OPERATORS", new String[] {
0939: "Mathematical Operators",
0940: "MathematicalOperators" });
0941:
0942: /**
0943: * Constant for the "Miscellaneous Technical" Unicode character block.
0944: * @since 1.2
0945: */
0946: public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock(
0947: "MISCELLANEOUS_TECHNICAL", new String[] {
0948: "Miscellaneous Technical",
0949: "MiscellaneousTechnical" });
0950:
0951: /**
0952: * Constant for the "Control Pictures" Unicode character block.
0953: * @since 1.2
0954: */
0955: public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock(
0956: "CONTROL_PICTURES", new String[] { "Control Pictures",
0957: "ControlPictures" });
0958:
0959: /**
0960: * Constant for the "Optical Character Recognition" Unicode character block.
0961: * @since 1.2
0962: */
0963: public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock(
0964: "OPTICAL_CHARACTER_RECOGNITION", new String[] {
0965: "Optical Character Recognition",
0966: "OpticalCharacterRecognition" });
0967:
0968: /**
0969: * Constant for the "Enclosed Alphanumerics" Unicode character block.
0970: * @since 1.2
0971: */
0972: public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock(
0973: "ENCLOSED_ALPHANUMERICS", new String[] {
0974: "Enclosed Alphanumerics",
0975: "EnclosedAlphanumerics" });
0976:
0977: /**
0978: * Constant for the "Box Drawing" Unicode character block.
0979: * @since 1.2
0980: */
0981: public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock(
0982: "BOX_DRAWING", new String[] { "Box Drawing",
0983: "BoxDrawing" });
0984:
0985: /**
0986: * Constant for the "Block Elements" Unicode character block.
0987: * @since 1.2
0988: */
0989: public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock(
0990: "BLOCK_ELEMENTS", new String[] { "Block Elements",
0991: "BlockElements" });
0992:
0993: /**
0994: * Constant for the "Geometric Shapes" Unicode character block.
0995: * @since 1.2
0996: */
0997: public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock(
0998: "GEOMETRIC_SHAPES", new String[] { "Geometric Shapes",
0999: "GeometricShapes" });
1000:
1001: /**
1002: * Constant for the "Miscellaneous Symbols" Unicode character block.
1003: * @since 1.2
1004: */
1005: public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock(
1006: "MISCELLANEOUS_SYMBOLS",
1007: new String[] { "Miscellaneous Symbols",
1008: "MiscellaneousSymbols" });
1009:
1010: /**
1011: * Constant for the "Dingbats" Unicode character block.
1012: * @since 1.2
1013: */
1014: public static final UnicodeBlock DINGBATS = new UnicodeBlock(
1015: "DINGBATS");
1016:
1017: /**
1018: * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1019: * @since 1.2
1020: */
1021: public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock(
1022: "CJK_SYMBOLS_AND_PUNCTUATION", new String[] {
1023: "CJK Symbols and Punctuation",
1024: "CJKSymbolsandPunctuation" });
1025:
1026: /**
1027: * Constant for the "Hiragana" Unicode character block.
1028: * @since 1.2
1029: */
1030: public static final UnicodeBlock HIRAGANA = new UnicodeBlock(
1031: "HIRAGANA");
1032:
1033: /**
1034: * Constant for the "Katakana" Unicode character block.
1035: * @since 1.2
1036: */
1037: public static final UnicodeBlock KATAKANA = new UnicodeBlock(
1038: "KATAKANA");
1039:
1040: /**
1041: * Constant for the "Bopomofo" Unicode character block.
1042: * @since 1.2
1043: */
1044: public static final UnicodeBlock BOPOMOFO = new UnicodeBlock(
1045: "BOPOMOFO");
1046:
1047: /**
1048: * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1049: * @since 1.2
1050: */
1051: public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock(
1052: "HANGUL_COMPATIBILITY_JAMO", new String[] {
1053: "Hangul Compatibility Jamo",
1054: "HangulCompatibilityJamo" });
1055:
1056: /**
1057: * Constant for the "Kanbun" Unicode character block.
1058: * @since 1.2
1059: */
1060: public static final UnicodeBlock KANBUN = new UnicodeBlock(
1061: "KANBUN");
1062:
1063: /**
1064: * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1065: * @since 1.2
1066: */
1067: public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock(
1068: "ENCLOSED_CJK_LETTERS_AND_MONTHS", new String[] {
1069: "Enclosed CJK Letters and Months",
1070: "EnclosedCJKLettersandMonths" });
1071:
1072: /**
1073: * Constant for the "CJK Compatibility" Unicode character block.
1074: * @since 1.2
1075: */
1076: public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock(
1077: "CJK_COMPATIBILITY", new String[] {
1078: "CJK Compatibility", "CJKCompatibility" });
1079:
1080: /**
1081: * Constant for the "CJK Unified Ideographs" Unicode character block.
1082: * @since 1.2
1083: */
1084: public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock(
1085: "CJK_UNIFIED_IDEOGRAPHS", new String[] {
1086: "CJK Unified Ideographs",
1087: "CJKUnifiedIdeographs" });
1088:
1089: /**
1090: * Constant for the "Hangul Syllables" Unicode character block.
1091: * @since 1.2
1092: */
1093: public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock(
1094: "HANGUL_SYLLABLES", new String[] { "Hangul Syllables",
1095: "HangulSyllables" });
1096:
1097: /**
1098: * Constant for the "Private Use Area" Unicode character block.
1099: * @since 1.2
1100: */
1101: public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock(
1102: "PRIVATE_USE_AREA", new String[] { "Private Use Area",
1103: "PrivateUseArea" });
1104:
1105: /**
1106: * Constant for the "CJK Compatibility Ideographs" Unicode character block.
1107: * @since 1.2
1108: */
1109: public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock(
1110: "CJK_COMPATIBILITY_IDEOGRAPHS", new String[] {
1111: "CJK Compatibility Ideographs",
1112: "CJKCompatibilityIdeographs" });
1113:
1114: /**
1115: * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1116: * @since 1.2
1117: */
1118: public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock(
1119: "ALPHABETIC_PRESENTATION_FORMS", new String[] {
1120: "Alphabetic Presentation Forms",
1121: "AlphabeticPresentationForms" });
1122:
1123: /**
1124: * Constant for the "Arabic Presentation Forms-A" Unicode character block.
1125: * @since 1.2
1126: */
1127: public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock(
1128: "ARABIC_PRESENTATION_FORMS_A", new String[] {
1129: "Arabic Presentation Forms-A",
1130: "ArabicPresentationForms-A" });
1131:
1132: /**
1133: * Constant for the "Combining Half Marks" Unicode character block.
1134: * @since 1.2
1135: */
1136: public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock(
1137: "COMBINING_HALF_MARKS", new String[] {
1138: "Combining Half Marks", "CombiningHalfMarks" });
1139:
1140: /**
1141: * Constant for the "CJK Compatibility Forms" Unicode character block.
1142: * @since 1.2
1143: */
1144: public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock(
1145: "CJK_COMPATIBILITY_FORMS", new String[] {
1146: "CJK Compatibility Forms",
1147: "CJKCompatibilityForms" });
1148:
1149: /**
1150: * Constant for the "Small Form Variants" Unicode character block.
1151: * @since 1.2
1152: */
1153: public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock(
1154: "SMALL_FORM_VARIANTS", new String[] {
1155: "Small Form Variants", "SmallFormVariants" });
1156:
1157: /**
1158: * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1159: * @since 1.2
1160: */
1161: public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock(
1162: "ARABIC_PRESENTATION_FORMS_B", new String[] {
1163: "Arabic Presentation Forms-B",
1164: "ArabicPresentationForms-B" });
1165:
1166: /**
1167: * Constant for the "Halfwidth and Fullwidth Forms" Unicode character block.
1168: * @since 1.2
1169: */
1170: public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock(
1171: "HALFWIDTH_AND_FULLWIDTH_FORMS", new String[] {
1172: "Halfwidth and Fullwidth Forms",
1173: "HalfwidthandFullwidthForms" });
1174:
1175: /**
1176: * Constant for the "Specials" Unicode character block.
1177: * @since 1.2
1178: */
1179: public static final UnicodeBlock SPECIALS = new UnicodeBlock(
1180: "SPECIALS");
1181:
1182: /**
1183: * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1184: * {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1185: * {@link #LOW_SURROGATES}. These new constants match
1186: * the block definitions of the Unicode Standard.
1187: * The {@link #of(char)} and {@link #of(int)} methods
1188: * return the new constants, not SURROGATES_AREA.
1189: */
1190: @Deprecated
1191: public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock(
1192: "SURROGATES_AREA");
1193:
1194: /**
1195: * Constant for the "Syriac" Unicode character block.
1196: * @since 1.4
1197: */
1198: public static final UnicodeBlock SYRIAC = new UnicodeBlock(
1199: "SYRIAC");
1200:
1201: /**
1202: * Constant for the "Thaana" Unicode character block.
1203: * @since 1.4
1204: */
1205: public static final UnicodeBlock THAANA = new UnicodeBlock(
1206: "THAANA");
1207:
1208: /**
1209: * Constant for the "Sinhala" Unicode character block.
1210: * @since 1.4
1211: */
1212: public static final UnicodeBlock SINHALA = new UnicodeBlock(
1213: "SINHALA");
1214:
1215: /**
1216: * Constant for the "Myanmar" Unicode character block.
1217: * @since 1.4
1218: */
1219: public static final UnicodeBlock MYANMAR = new UnicodeBlock(
1220: "MYANMAR");
1221:
1222: /**
1223: * Constant for the "Ethiopic" Unicode character block.
1224: * @since 1.4
1225: */
1226: public static final UnicodeBlock ETHIOPIC = new UnicodeBlock(
1227: "ETHIOPIC");
1228:
1229: /**
1230: * Constant for the "Cherokee" Unicode character block.
1231: * @since 1.4
1232: */
1233: public static final UnicodeBlock CHEROKEE = new UnicodeBlock(
1234: "CHEROKEE");
1235:
1236: /**
1237: * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1238: * @since 1.4
1239: */
1240: public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock(
1241: "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", new String[] {
1242: "Unified Canadian Aboriginal Syllabics",
1243: "UnifiedCanadianAboriginalSyllabics" });
1244:
1245: /**
1246: * Constant for the "Ogham" Unicode character block.
1247: * @since 1.4
1248: */
1249: public static final UnicodeBlock OGHAM = new UnicodeBlock(
1250: "OGHAM");
1251:
1252: /**
1253: * Constant for the "Runic" Unicode character block.
1254: * @since 1.4
1255: */
1256: public static final UnicodeBlock RUNIC = new UnicodeBlock(
1257: "RUNIC");
1258:
1259: /**
1260: * Constant for the "Khmer" Unicode character block.
1261: * @since 1.4
1262: */
1263: public static final UnicodeBlock KHMER = new UnicodeBlock(
1264: "KHMER");
1265:
1266: /**
1267: * Constant for the "Mongolian" Unicode character block.
1268: * @since 1.4
1269: */
1270: public static final UnicodeBlock MONGOLIAN = new UnicodeBlock(
1271: "MONGOLIAN");
1272:
1273: /**
1274: * Constant for the "Braille Patterns" Unicode character block.
1275: * @since 1.4
1276: */
1277: public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock(
1278: "BRAILLE_PATTERNS", new String[] { "Braille Patterns",
1279: "BraillePatterns" });
1280:
1281: /**
1282: * Constant for the "CJK Radicals Supplement" Unicode character block.
1283: * @since 1.4
1284: */
1285: public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock(
1286: "CJK_RADICALS_SUPPLEMENT", new String[] {
1287: "CJK Radicals Supplement",
1288: "CJKRadicalsSupplement" });
1289:
1290: /**
1291: * Constant for the "Kangxi Radicals" Unicode character block.
1292: * @since 1.4
1293: */
1294: public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock(
1295: "KANGXI_RADICALS", new String[] { "Kangxi Radicals",
1296: "KangxiRadicals" });
1297:
1298: /**
1299: * Constant for the "Ideographic Description Characters" Unicode character block.
1300: * @since 1.4
1301: */
1302: public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock(
1303: "IDEOGRAPHIC_DESCRIPTION_CHARACTERS", new String[] {
1304: "Ideographic Description Characters",
1305: "IdeographicDescriptionCharacters" });
1306:
1307: /**
1308: * Constant for the "Bopomofo Extended" Unicode character block.
1309: * @since 1.4
1310: */
1311: public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock(
1312: "BOPOMOFO_EXTENDED", new String[] {
1313: "Bopomofo Extended", "BopomofoExtended" });
1314:
1315: /**
1316: * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1317: * @since 1.4
1318: */
1319: public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock(
1320: "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", new String[] {
1321: "CJK Unified Ideographs Extension A",
1322: "CJKUnifiedIdeographsExtensionA" });
1323:
1324: /**
1325: * Constant for the "Yi Syllables" Unicode character block.
1326: * @since 1.4
1327: */
1328: public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock(
1329: "YI_SYLLABLES", new String[] { "Yi Syllables",
1330: "YiSyllables" });
1331:
1332: /**
1333: * Constant for the "Yi Radicals" Unicode character block.
1334: * @since 1.4
1335: */
1336: public static final UnicodeBlock YI_RADICALS = new UnicodeBlock(
1337: "YI_RADICALS", new String[] { "Yi Radicals",
1338: "YiRadicals" });
1339:
1340: /**
1341: * Constant for the "Cyrillic Supplementary" Unicode character block.
1342: * @since 1.5
1343: */
1344: public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock(
1345: "CYRILLIC_SUPPLEMENTARY", new String[] {
1346: "Cyrillic Supplementary",
1347: "CyrillicSupplementary" });
1348:
1349: /**
1350: * Constant for the "Tagalog" Unicode character block.
1351: * @since 1.5
1352: */
1353: public static final UnicodeBlock TAGALOG = new UnicodeBlock(
1354: "TAGALOG");
1355:
1356: /**
1357: * Constant for the "Hanunoo" Unicode character block.
1358: * @since 1.5
1359: */
1360: public static final UnicodeBlock HANUNOO = new UnicodeBlock(
1361: "HANUNOO");
1362:
1363: /**
1364: * Constant for the "Buhid" Unicode character block.
1365: * @since 1.5
1366: */
1367: public static final UnicodeBlock BUHID = new UnicodeBlock(
1368: "BUHID");
1369:
1370: /**
1371: * Constant for the "Tagbanwa" Unicode character block.
1372: * @since 1.5
1373: */
1374: public static final UnicodeBlock TAGBANWA = new UnicodeBlock(
1375: "TAGBANWA");
1376:
1377: /**
1378: * Constant for the "Limbu" Unicode character block.
1379: * @since 1.5
1380: */
1381: public static final UnicodeBlock LIMBU = new UnicodeBlock(
1382: "LIMBU");
1383:
1384: /**
1385: * Constant for the "Tai Le" Unicode character block.
1386: * @since 1.5
1387: */
1388: public static final UnicodeBlock TAI_LE = new UnicodeBlock(
1389: "TAI_LE", new String[] { "Tai Le", "TaiLe" });
1390:
1391: /**
1392: * Constant for the "Khmer Symbols" Unicode character block.
1393: * @since 1.5
1394: */
1395: public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock(
1396: "KHMER_SYMBOLS", new String[] { "Khmer Symbols",
1397: "KhmerSymbols" });
1398:
1399: /**
1400: * Constant for the "Phonetic Extensions" Unicode character block.
1401: * @since 1.5
1402: */
1403: public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock(
1404: "PHONETIC_EXTENSIONS", new String[] {
1405: "Phonetic Extensions", "PhoneticExtensions" });
1406:
1407: /**
1408: * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1409: * @since 1.5
1410: */
1411: public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock(
1412: "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", new String[] {
1413: "Miscellaneous Mathematical Symbols-A",
1414: "MiscellaneousMathematicalSymbols-A" });
1415:
1416: /**
1417: * Constant for the "Supplemental Arrows-A" Unicode character block.
1418: * @since 1.5
1419: */
1420: public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock(
1421: "SUPPLEMENTAL_ARROWS_A",
1422: new String[] { "Supplemental Arrows-A",
1423: "SupplementalArrows-A" });
1424:
1425: /**
1426: * Constant for the "Supplemental Arrows-B" Unicode character block.
1427: * @since 1.5
1428: */
1429: public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock(
1430: "SUPPLEMENTAL_ARROWS_B",
1431: new String[] { "Supplemental Arrows-B",
1432: "SupplementalArrows-B" });
1433:
1434: /**
1435: * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode character block.
1436: * @since 1.5
1437: */
1438: public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock(
1439: "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", new String[] {
1440: "Miscellaneous Mathematical Symbols-B",
1441: "MiscellaneousMathematicalSymbols-B" });
1442:
1443: /**
1444: * Constant for the "Supplemental Mathematical Operators" Unicode character block.
1445: * @since 1.5
1446: */
1447: public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock(
1448: "SUPPLEMENTAL_MATHEMATICAL_OPERATORS", new String[] {
1449: "Supplemental Mathematical Operators",
1450: "SupplementalMathematicalOperators" });
1451:
1452: /**
1453: * Constant for the "Miscellaneous Symbols and Arrows" Unicode character block.
1454: * @since 1.5
1455: */
1456: public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock(
1457: "MISCELLANEOUS_SYMBOLS_AND_ARROWS", new String[] {
1458: "Miscellaneous Symbols and Arrows",
1459: "MiscellaneousSymbolsandArrows" });
1460:
1461: /**
1462: * Constant for the "Katakana Phonetic Extensions" Unicode character block.
1463: * @since 1.5
1464: */
1465: public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock(
1466: "KATAKANA_PHONETIC_EXTENSIONS", new String[] {
1467: "Katakana Phonetic Extensions",
1468: "KatakanaPhoneticExtensions" });
1469:
1470: /**
1471: * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1472: * @since 1.5
1473: */
1474: public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock(
1475: "YIJING_HEXAGRAM_SYMBOLS", new String[] {
1476: "Yijing Hexagram Symbols",
1477: "YijingHexagramSymbols" });
1478:
1479: /**
1480: * Constant for the "Variation Selectors" Unicode character block.
1481: * @since 1.5
1482: */
1483: public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock(
1484: "VARIATION_SELECTORS", new String[] {
1485: "Variation Selectors", "VariationSelectors" });
1486:
1487: /**
1488: * Constant for the "Linear B Syllabary" Unicode character block.
1489: * @since 1.5
1490: */
1491: public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock(
1492: "LINEAR_B_SYLLABARY", new String[] {
1493: "Linear B Syllabary", "LinearBSyllabary" });
1494:
1495: /**
1496: * Constant for the "Linear B Ideograms" Unicode character block.
1497: * @since 1.5
1498: */
1499: public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock(
1500: "LINEAR_B_IDEOGRAMS", new String[] {
1501: "Linear B Ideograms", "LinearBIdeograms" });
1502:
1503: /**
1504: * Constant for the "Aegean Numbers" Unicode character block.
1505: * @since 1.5
1506: */
1507: public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock(
1508: "AEGEAN_NUMBERS", new String[] { "Aegean Numbers",
1509: "AegeanNumbers" });
1510:
1511: /**
1512: * Constant for the "Old Italic" Unicode character block.
1513: * @since 1.5
1514: */
1515: public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock(
1516: "OLD_ITALIC",
1517: new String[] { "Old Italic", "OldItalic" });
1518:
1519: /**
1520: * Constant for the "Gothic" Unicode character block.
1521: * @since 1.5
1522: */
1523: public static final UnicodeBlock GOTHIC = new UnicodeBlock(
1524: "GOTHIC");
1525:
1526: /**
1527: * Constant for the "Ugaritic" Unicode character block.
1528: * @since 1.5
1529: */
1530: public static final UnicodeBlock UGARITIC = new UnicodeBlock(
1531: "UGARITIC");
1532:
1533: /**
1534: * Constant for the "Deseret" Unicode character block.
1535: * @since 1.5
1536: */
1537: public static final UnicodeBlock DESERET = new UnicodeBlock(
1538: "DESERET");
1539:
1540: /**
1541: * Constant for the "Shavian" Unicode character block.
1542: * @since 1.5
1543: */
1544: public static final UnicodeBlock SHAVIAN = new UnicodeBlock(
1545: "SHAVIAN");
1546:
1547: /**
1548: * Constant for the "Osmanya" Unicode character block.
1549: * @since 1.5
1550: */
1551: public static final UnicodeBlock OSMANYA = new UnicodeBlock(
1552: "OSMANYA");
1553:
1554: /**
1555: * Constant for the "Cypriot Syllabary" Unicode character block.
1556: * @since 1.5
1557: */
1558: public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock(
1559: "CYPRIOT_SYLLABARY", new String[] {
1560: "Cypriot Syllabary", "CypriotSyllabary" });
1561:
1562: /**
1563: * Constant for the "Byzantine Musical Symbols" Unicode character block.
1564: * @since 1.5
1565: */
1566: public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock(
1567: "BYZANTINE_MUSICAL_SYMBOLS", new String[] {
1568: "Byzantine Musical Symbols",
1569: "ByzantineMusicalSymbols" });
1570:
1571: /**
1572: * Constant for the "Musical Symbols" Unicode character block.
1573: * @since 1.5
1574: */
1575: public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock(
1576: "MUSICAL_SYMBOLS", new String[] { "Musical Symbols",
1577: "MusicalSymbols" });
1578:
1579: /**
1580: * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1581: * @since 1.5
1582: */
1583: public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock(
1584: "TAI_XUAN_JING_SYMBOLS", new String[] {
1585: "Tai Xuan Jing Symbols", "TaiXuanJingSymbols" });
1586:
1587: /**
1588: * Constant for the "Mathematical Alphanumeric Symbols" Unicode character block.
1589: * @since 1.5
1590: */
1591: public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock(
1592: "MATHEMATICAL_ALPHANUMERIC_SYMBOLS", new String[] {
1593: "Mathematical Alphanumeric Symbols",
1594: "MathematicalAlphanumericSymbols" });
1595:
1596: /**
1597: * Constant for the "CJK Unified Ideographs Extension B" Unicode character block.
1598: * @since 1.5
1599: */
1600: public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock(
1601: "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", new String[] {
1602: "CJK Unified Ideographs Extension B",
1603: "CJKUnifiedIdeographsExtensionB" });
1604:
1605: /**
1606: * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1607: * @since 1.5
1608: */
1609: public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock(
1610: "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1611: new String[] {
1612: "CJK Compatibility Ideographs Supplement",
1613: "CJKCompatibilityIdeographsSupplement" });
1614:
1615: /**
1616: * Constant for the "Tags" Unicode character block.
1617: * @since 1.5
1618: */
1619: public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS");
1620:
1621: /**
1622: * Constant for the "Variation Selectors Supplement" Unicode character block.
1623: * @since 1.5
1624: */
1625: public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock(
1626: "VARIATION_SELECTORS_SUPPLEMENT", new String[] {
1627: "Variation Selectors Supplement",
1628: "VariationSelectorsSupplement" });
1629:
1630: /**
1631: * Constant for the "Supplementary Private Use Area-A" Unicode character block.
1632: * @since 1.5
1633: */
1634: public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock(
1635: "SUPPLEMENTARY_PRIVATE_USE_AREA_A", new String[] {
1636: "Supplementary Private Use Area-A",
1637: "SupplementaryPrivateUseArea-A" });
1638:
1639: /**
1640: * Constant for the "Supplementary Private Use Area-B" Unicode character block.
1641: * @since 1.5
1642: */
1643: public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock(
1644: "SUPPLEMENTARY_PRIVATE_USE_AREA_B", new String[] {
1645: "Supplementary Private Use Area-B",
1646: "SupplementaryPrivateUseArea-B" });
1647:
1648: /**
1649: * Constant for the "High Surrogates" Unicode character block.
1650: * This block represents codepoint values in the high surrogate
1651: * range: 0xD800 through 0xDB7F
1652: *
1653: * @since 1.5
1654: */
1655: public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock(
1656: "HIGH_SURROGATES", new String[] { "High Surrogates",
1657: "HighSurrogates" });
1658:
1659: /**
1660: * Constant for the "High Private Use Surrogates" Unicode character block.
1661: * This block represents codepoint values in the high surrogate
1662: * range: 0xDB80 through 0xDBFF
1663: *
1664: * @since 1.5
1665: */
1666: public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock(
1667: "HIGH_PRIVATE_USE_SURROGATES", new String[] {
1668: "High Private Use Surrogates",
1669: "HighPrivateUseSurrogates" });
1670:
1671: /**
1672: * Constant for the "Low Surrogates" Unicode character block.
1673: * This block represents codepoint values in the high surrogate
1674: * range: 0xDC00 through 0xDFFF
1675: *
1676: * @since 1.5
1677: */
1678: public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock(
1679: "LOW_SURROGATES", new String[] { "Low Surrogates",
1680: "LowSurrogates" });
1681:
1682: private static final int blockStarts[] = { 0x0000, // Basic Latin
1683: 0x0080, // Latin-1 Supplement
1684: 0x0100, // Latin Extended-A
1685: 0x0180, // Latin Extended-B
1686: 0x0250, // IPA Extensions
1687: 0x02B0, // Spacing Modifier Letters
1688: 0x0300, // Combining Diacritical Marks
1689: 0x0370, // Greek and Coptic
1690: 0x0400, // Cyrillic
1691: 0x0500, // Cyrillic Supplementary
1692: 0x0530, // Armenian
1693: 0x0590, // Hebrew
1694: 0x0600, // Arabic
1695: 0x0700, // Syriac
1696: 0x0750, // unassigned
1697: 0x0780, // Thaana
1698: 0x07C0, // unassigned
1699: 0x0900, // Devanagari
1700: 0x0980, // Bengali
1701: 0x0A00, // Gurmukhi
1702: 0x0A80, // Gujarati
1703: 0x0B00, // Oriya
1704: 0x0B80, // Tamil
1705: 0x0C00, // Telugu
1706: 0x0C80, // Kannada
1707: 0x0D00, // Malayalam
1708: 0x0D80, // Sinhala
1709: 0x0E00, // Thai
1710: 0x0E80, // Lao
1711: 0x0F00, // Tibetan
1712: 0x1000, // Myanmar
1713: 0x10A0, // Georgian
1714: 0x1100, // Hangul Jamo
1715: 0x1200, // Ethiopic
1716: 0x1380, // unassigned
1717: 0x13A0, // Cherokee
1718: 0x1400, // Unified Canadian Aboriginal Syllabics
1719: 0x1680, // Ogham
1720: 0x16A0, // Runic
1721: 0x1700, // Tagalog
1722: 0x1720, // Hanunoo
1723: 0x1740, // Buhid
1724: 0x1760, // Tagbanwa
1725: 0x1780, // Khmer
1726: 0x1800, // Mongolian
1727: 0x18B0, // unassigned
1728: 0x1900, // Limbu
1729: 0x1950, // Tai Le
1730: 0x1980, // unassigned
1731: 0x19E0, // Khmer Symbols
1732: 0x1A00, // unassigned
1733: 0x1D00, // Phonetic Extensions
1734: 0x1D80, // unassigned
1735: 0x1E00, // Latin Extended Additional
1736: 0x1F00, // Greek Extended
1737: 0x2000, // General Punctuation
1738: 0x2070, // Superscripts and Subscripts
1739: 0x20A0, // Currency Symbols
1740: 0x20D0, // Combining Diacritical Marks for Symbols
1741: 0x2100, // Letterlike Symbols
1742: 0x2150, // Number Forms
1743: 0x2190, // Arrows
1744: 0x2200, // Mathematical Operators
1745: 0x2300, // Miscellaneous Technical
1746: 0x2400, // Control Pictures
1747: 0x2440, // Optical Character Recognition
1748: 0x2460, // Enclosed Alphanumerics
1749: 0x2500, // Box Drawing
1750: 0x2580, // Block Elements
1751: 0x25A0, // Geometric Shapes
1752: 0x2600, // Miscellaneous Symbols
1753: 0x2700, // Dingbats
1754: 0x27C0, // Miscellaneous Mathematical Symbols-A
1755: 0x27F0, // Supplemental Arrows-A
1756: 0x2800, // Braille Patterns
1757: 0x2900, // Supplemental Arrows-B
1758: 0x2980, // Miscellaneous Mathematical Symbols-B
1759: 0x2A00, // Supplemental Mathematical Operators
1760: 0x2B00, // Miscellaneous Symbols and Arrows
1761: 0x2C00, // unassigned
1762: 0x2E80, // CJK Radicals Supplement
1763: 0x2F00, // Kangxi Radicals
1764: 0x2FE0, // unassigned
1765: 0x2FF0, // Ideographic Description Characters
1766: 0x3000, // CJK Symbols and Punctuation
1767: 0x3040, // Hiragana
1768: 0x30A0, // Katakana
1769: 0x3100, // Bopomofo
1770: 0x3130, // Hangul Compatibility Jamo
1771: 0x3190, // Kanbun
1772: 0x31A0, // Bopomofo Extended
1773: 0x31C0, // unassigned
1774: 0x31F0, // Katakana Phonetic Extensions
1775: 0x3200, // Enclosed CJK Letters and Months
1776: 0x3300, // CJK Compatibility
1777: 0x3400, // CJK Unified Ideographs Extension A
1778: 0x4DC0, // Yijing Hexagram Symbols
1779: 0x4E00, // CJK Unified Ideographs
1780: 0xA000, // Yi Syllables
1781: 0xA490, // Yi Radicals
1782: 0xA4D0, // unassigned
1783: 0xAC00, // Hangul Syllables
1784: 0xD7B0, // unassigned
1785: 0xD800, // High Surrogates
1786: 0xDB80, // High Private Use Surrogates
1787: 0xDC00, // Low Surrogates
1788: 0xE000, // Private Use
1789: 0xF900, // CJK Compatibility Ideographs
1790: 0xFB00, // Alphabetic Presentation Forms
1791: 0xFB50, // Arabic Presentation Forms-A
1792: 0xFE00, // Variation Selectors
1793: 0xFE10, // unassigned
1794: 0xFE20, // Combining Half Marks
1795: 0xFE30, // CJK Compatibility Forms
1796: 0xFE50, // Small Form Variants
1797: 0xFE70, // Arabic Presentation Forms-B
1798: 0xFF00, // Halfwidth and Fullwidth Forms
1799: 0xFFF0, // Specials
1800: 0x10000, // Linear B Syllabary
1801: 0x10080, // Linear B Ideograms
1802: 0x10100, // Aegean Numbers
1803: 0x10140, // unassigned
1804: 0x10300, // Old Italic
1805: 0x10330, // Gothic
1806: 0x10350, // unassigned
1807: 0x10380, // Ugaritic
1808: 0x103A0, // unassigned
1809: 0x10400, // Deseret
1810: 0x10450, // Shavian
1811: 0x10480, // Osmanya
1812: 0x104B0, // unassigned
1813: 0x10800, // Cypriot Syllabary
1814: 0x10840, // unassigned
1815: 0x1D000, // Byzantine Musical Symbols
1816: 0x1D100, // Musical Symbols
1817: 0x1D200, // unassigned
1818: 0x1D300, // Tai Xuan Jing Symbols
1819: 0x1D360, // unassigned
1820: 0x1D400, // Mathematical Alphanumeric Symbols
1821: 0x1D800, // unassigned
1822: 0x20000, // CJK Unified Ideographs Extension B
1823: 0x2A6E0, // unassigned
1824: 0x2F800, // CJK Compatibility Ideographs Supplement
1825: 0x2FA20, // unassigned
1826: 0xE0000, // Tags
1827: 0xE0080, // unassigned
1828: 0xE0100, // Variation Selectors Supplement
1829: 0xE01F0, // unassigned
1830: 0xF0000, // Supplementary Private Use Area-A
1831: 0x100000, // Supplementary Private Use Area-B
1832: };
1833:
1834: private static final UnicodeBlock[] blocks = { BASIC_LATIN,
1835: LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A, LATIN_EXTENDED_B,
1836: IPA_EXTENSIONS, SPACING_MODIFIER_LETTERS,
1837: COMBINING_DIACRITICAL_MARKS, GREEK, CYRILLIC,
1838: CYRILLIC_SUPPLEMENTARY, ARMENIAN, HEBREW, ARABIC,
1839: SYRIAC, null, THAANA, null, DEVANAGARI, BENGALI,
1840: GURMUKHI, GUJARATI, ORIYA, TAMIL, TELUGU, KANNADA,
1841: MALAYALAM, SINHALA, THAI, LAO, TIBETAN, MYANMAR,
1842: GEORGIAN, HANGUL_JAMO, ETHIOPIC, null, CHEROKEE,
1843: UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, OGHAM, RUNIC,
1844: TAGALOG, HANUNOO, BUHID, TAGBANWA, KHMER, MONGOLIAN,
1845: null, LIMBU, TAI_LE, null, KHMER_SYMBOLS, null,
1846: PHONETIC_EXTENSIONS, null, LATIN_EXTENDED_ADDITIONAL,
1847: GREEK_EXTENDED, GENERAL_PUNCTUATION,
1848: SUPERSCRIPTS_AND_SUBSCRIPTS, CURRENCY_SYMBOLS,
1849: COMBINING_MARKS_FOR_SYMBOLS, LETTERLIKE_SYMBOLS,
1850: NUMBER_FORMS, ARROWS, MATHEMATICAL_OPERATORS,
1851: MISCELLANEOUS_TECHNICAL, CONTROL_PICTURES,
1852: OPTICAL_CHARACTER_RECOGNITION, ENCLOSED_ALPHANUMERICS,
1853: BOX_DRAWING, BLOCK_ELEMENTS, GEOMETRIC_SHAPES,
1854: MISCELLANEOUS_SYMBOLS, DINGBATS,
1855: MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1856: SUPPLEMENTAL_ARROWS_A, BRAILLE_PATTERNS,
1857: SUPPLEMENTAL_ARROWS_B,
1858: MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1859: SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1860: MISCELLANEOUS_SYMBOLS_AND_ARROWS, null,
1861: CJK_RADICALS_SUPPLEMENT, KANGXI_RADICALS, null,
1862: IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1863: CJK_SYMBOLS_AND_PUNCTUATION, HIRAGANA, KATAKANA,
1864: BOPOMOFO, HANGUL_COMPATIBILITY_JAMO, KANBUN,
1865: BOPOMOFO_EXTENDED, null, KATAKANA_PHONETIC_EXTENSIONS,
1866: ENCLOSED_CJK_LETTERS_AND_MONTHS, CJK_COMPATIBILITY,
1867: CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1868: YIJING_HEXAGRAM_SYMBOLS, CJK_UNIFIED_IDEOGRAPHS,
1869: YI_SYLLABLES, YI_RADICALS, null, HANGUL_SYLLABLES,
1870: null, HIGH_SURROGATES, HIGH_PRIVATE_USE_SURROGATES,
1871: LOW_SURROGATES, PRIVATE_USE_AREA,
1872: CJK_COMPATIBILITY_IDEOGRAPHS,
1873: ALPHABETIC_PRESENTATION_FORMS,
1874: ARABIC_PRESENTATION_FORMS_A, VARIATION_SELECTORS, null,
1875: COMBINING_HALF_MARKS, CJK_COMPATIBILITY_FORMS,
1876: SMALL_FORM_VARIANTS, ARABIC_PRESENTATION_FORMS_B,
1877: HALFWIDTH_AND_FULLWIDTH_FORMS, SPECIALS,
1878: LINEAR_B_SYLLABARY, LINEAR_B_IDEOGRAMS, AEGEAN_NUMBERS,
1879: null, OLD_ITALIC, GOTHIC, null, UGARITIC, null,
1880: DESERET, SHAVIAN, OSMANYA, null, CYPRIOT_SYLLABARY,
1881: null, BYZANTINE_MUSICAL_SYMBOLS, MUSICAL_SYMBOLS, null,
1882: TAI_XUAN_JING_SYMBOLS, null,
1883: MATHEMATICAL_ALPHANUMERIC_SYMBOLS, null,
1884: CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, null,
1885: CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, null, TAGS,
1886: null, VARIATION_SELECTORS_SUPPLEMENT, null,
1887: SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1888: SUPPLEMENTARY_PRIVATE_USE_AREA_B };
1889:
1890: /**
1891: * Returns the object representing the Unicode block containing the
1892: * given character, or <code>null</code> if the character is not a
1893: * member of a defined block.
1894: *
1895: * <p><b>Note:</b> This method cannot handle <a
1896: * href="Character.html#supplementary"> supplementary
1897: * characters</a>. To support all Unicode characters,
1898: * including supplementary characters, use the {@link
1899: * #of(int)} method.
1900: *
1901: * @param c The character in question
1902: * @return The <code>UnicodeBlock</code> instance representing the
1903: * Unicode block of which this character is a member, or
1904: * <code>null</code> if the character is not a member of any
1905: * Unicode block
1906: */
1907: public static UnicodeBlock of(char c) {
1908: return of((int) c);
1909: }
1910:
1911: /**
1912: * Returns the object representing the Unicode block
1913: * containing the given character (Unicode code point), or
1914: * <code>null</code> if the character is not a member of a
1915: * defined block.
1916: *
1917: * @param codePoint the character (Unicode code point) in question.
1918: * @return The <code>UnicodeBlock</code> instance representing the
1919: * Unicode block of which this character is a member, or
1920: * <code>null</code> if the character is not a member of any
1921: * Unicode block
1922: * @exception IllegalArgumentException if the specified
1923: * <code>codePoint</code> is an invalid Unicode code point.
1924: * @see Character#isValidCodePoint(int)
1925: * @since 1.5
1926: */
1927: public static UnicodeBlock of(int codePoint) {
1928: if (!isValidCodePoint(codePoint)) {
1929: throw new IllegalArgumentException();
1930: }
1931:
1932: int top, bottom, current;
1933: bottom = 0;
1934: top = blockStarts.length;
1935: current = top / 2;
1936:
1937: // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
1938: while (top - bottom > 1) {
1939: if (codePoint >= blockStarts[current]) {
1940: bottom = current;
1941: } else {
1942: top = current;
1943: }
1944: current = (top + bottom) / 2;
1945: }
1946: return blocks[current];
1947: }
1948:
1949: /**
1950: * Returns the UnicodeBlock with the given name. Block
1951: * names are determined by The Unicode Standard. The file
1952: * Blocks-<version>.txt defines blocks for a particular
1953: * version of the standard. The {@link Character} class specifies
1954: * the version of the standard that it supports.
1955: * <p>
1956: * This method accepts block names in the following forms:
1957: * <ol>
1958: * <li> Canonical block names as defined by the Unicode Standard.
1959: * For example, the standard defines a "Basic Latin" block. Therefore, this
1960: * method accepts "Basic Latin" as a valid block name. The documentation of
1961: * each UnicodeBlock provides the canonical name.
1962: * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
1963: * is a valid block name for the "Basic Latin" block.
1964: * <li>The text representation of each constant UnicodeBlock identifier.
1965: * For example, this method will return the {@link #BASIC_LATIN} block if
1966: * provided with the "BASIC_LATIN" name. This form replaces all spaces and
1967: * hyphens in the canonical name with underscores.
1968: * </ol>
1969: * Finally, character case is ignored for all of the valid block name forms.
1970: * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
1971: * The en_US locale's case mapping rules are used to provide case-insensitive
1972: * string comparisons for block name validation.
1973: * <p>
1974: * If the Unicode Standard changes block names, both the previous and
1975: * current names will be accepted.
1976: *
1977: * @param blockName A <code>UnicodeBlock</code> name.
1978: * @return The <code>UnicodeBlock</code> instance identified
1979: * by <code>blockName</code>
1980: * @throws IllegalArgumentException if <code>blockName</code> is an
1981: * invalid name
1982: * @throws NullPointerException if <code>blockName</code> is null
1983: * @since 1.5
1984: */
1985: public static final UnicodeBlock forName(String blockName) {
1986: UnicodeBlock block = (UnicodeBlock) map.get(blockName
1987: .toUpperCase(Locale.US));
1988: if (block == null) {
1989: throw new IllegalArgumentException();
1990: }
1991: return block;
1992: }
1993: }
1994:
1995: /**
1996: * The value of the <code>Character</code>.
1997: *
1998: * @serial
1999: */
2000: private final char value;
2001:
2002: /** use serialVersionUID from JDK 1.0.2 for interoperability */
2003: private static final long serialVersionUID = 3786198910865385080L;
2004:
2005: /**
2006: * Constructs a newly allocated <code>Character</code> object that
2007: * represents the specified <code>char</code> value.
2008: *
2009: * @param value the value to be represented by the
2010: * <code>Character</code> object.
2011: */
2012: public Character(char value) {
2013: this .value = value;
2014: }
2015:
2016: private static class CharacterCache {
2017: private CharacterCache() {
2018: }
2019:
2020: static final Character cache[] = new Character[127 + 1];
2021:
2022: static {
2023: for (int i = 0; i < cache.length; i++)
2024: cache[i] = new Character((char) i);
2025: }
2026: }
2027:
2028: /**
2029: * Returns a <tt>Character</tt> instance representing the specified
2030: * <tt>char</tt> value.
2031: * If a new <tt>Character</tt> instance is not required, this method
2032: * should generally be used in preference to the constructor
2033: * {@link #Character(char)}, as this method is likely to yield
2034: * significantly better space and time performance by caching
2035: * frequently requested values.
2036: *
2037: * @param c a char value.
2038: * @return a <tt>Character</tt> instance representing <tt>c</tt>.
2039: * @since 1.5
2040: */
2041: public static Character valueOf(char c) {
2042: if (c <= 127) { // must cache
2043: return CharacterCache.cache[(int) c];
2044: }
2045: return new Character(c);
2046: }
2047:
2048: /**
2049: * Returns the value of this <code>Character</code> object.
2050: * @return the primitive <code>char</code> value represented by
2051: * this object.
2052: */
2053: public char charValue() {
2054: return value;
2055: }
2056:
2057: /**
2058: * Returns a hash code for this <code>Character</code>.
2059: * @return a hash code value for this object.
2060: */
2061: public int hashCode() {
2062: return (int) value;
2063: }
2064:
2065: /**
2066: * Compares this object against the specified object.
2067: * The result is <code>true</code> if and only if the argument is not
2068: * <code>null</code> and is a <code>Character</code> object that
2069: * represents the same <code>char</code> value as this object.
2070: *
2071: * @param obj the object to compare with.
2072: * @return <code>true</code> if the objects are the same;
2073: * <code>false</code> otherwise.
2074: */
2075: public boolean equals(Object obj) {
2076: if (obj instanceof Character) {
2077: return value == ((Character) obj).charValue();
2078: }
2079: return false;
2080: }
2081:
2082: /**
2083: * Returns a <code>String</code> object representing this
2084: * <code>Character</code>'s value. The result is a string of
2085: * length 1 whose sole component is the primitive
2086: * <code>char</code> value represented by this
2087: * <code>Character</code> object.
2088: *
2089: * @return a string representation of this object.
2090: */
2091: public String toString() {
2092: char buf[] = { value };
2093: return String.valueOf(buf);
2094: }
2095:
2096: /**
2097: * Returns a <code>String</code> object representing the
2098: * specified <code>char</code>. The result is a string of length
2099: * 1 consisting solely of the specified <code>char</code>.
2100: *
2101: * @param c the <code>char</code> to be converted
2102: * @return the string representation of the specified <code>char</code>
2103: * @since 1.4
2104: */
2105: public static String toString(char c) {
2106: return String.valueOf(c);
2107: }
2108:
2109: /**
2110: * Determines whether the specified code point is a valid Unicode
2111: * code point value in the range of <code>0x0000</code> to
2112: * <code>0x10FFFF</code> inclusive. This method is equivalent to
2113: * the expression:
2114: *
2115: * <blockquote><pre>
2116: * codePoint >= 0x0000 && codePoint <= 0x10FFFF
2117: * </pre></blockquote>
2118: *
2119: * @param codePoint the Unicode code point to be tested
2120: * @return <code>true</code> if the specified code point value
2121: * is a valid code point value;
2122: * <code>false</code> otherwise.
2123: * @since 1.5
2124: */
2125: public static boolean isValidCodePoint(int codePoint) {
2126: return codePoint >= MIN_CODE_POINT
2127: && codePoint <= MAX_CODE_POINT;
2128: }
2129:
2130: /**
2131: * Determines whether the specified character (Unicode code point)
2132: * is in the supplementary character range. The method call is
2133: * equivalent to the expression:
2134: * <blockquote><pre>
2135: * codePoint >= 0x10000 && codePoint <= 0x10FFFF
2136: * </pre></blockquote>
2137: *
2138: * @param codePoint the character (Unicode code point) to be tested
2139: * @return <code>true</code> if the specified character is in the Unicode
2140: * supplementary character range; <code>false</code> otherwise.
2141: * @since 1.5
2142: */
2143: public static boolean isSupplementaryCodePoint(int codePoint) {
2144: return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
2145: && codePoint <= MAX_CODE_POINT;
2146: }
2147:
2148: /**
2149: * Determines if the given <code>char</code> value is a
2150: * high-surrogate code unit (also known as <i>leading-surrogate
2151: * code unit</i>). Such values do not represent characters by
2152: * themselves, but are used in the representation of <a
2153: * href="#supplementary">supplementary characters</a> in the
2154: * UTF-16 encoding.
2155: *
2156: * <p>This method returns <code>true</code> if and only if
2157: * <blockquote><pre>ch >= '\uD800' && ch <= '\uDBFF'
2158: * </pre></blockquote>
2159: * is <code>true</code>.
2160: *
2161: * @param ch the <code>char</code> value to be tested.
2162: * @return <code>true</code> if the <code>char</code> value
2163: * is between '\uD800' and '\uDBFF' inclusive;
2164: * <code>false</code> otherwise.
2165: * @see java.lang.Character#isLowSurrogate(char)
2166: * @see Character.UnicodeBlock#of(int)
2167: * @since 1.5
2168: */
2169: public static boolean isHighSurrogate(char ch) {
2170: return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
2171: }
2172:
2173: /**
2174: * Determines if the given <code>char</code> value is a
2175: * low-surrogate code unit (also known as <i>trailing-surrogate code
2176: * unit</i>). Such values do not represent characters by themselves,
2177: * but are used in the representation of <a
2178: * href="#supplementary">supplementary characters</a> in the UTF-16 encoding.
2179: *
2180: * <p> This method returns <code>true</code> if and only if
2181: * <blockquote><pre>ch >= '\uDC00' && ch <= '\uDFFF'
2182: * </pre></blockquote> is <code>true</code>.
2183: *
2184: * @param ch the <code>char</code> value to be tested.
2185: * @return <code>true</code> if the <code>char</code> value
2186: * is between '\uDC00' and '\uDFFF' inclusive;
2187: * <code>false</code> otherwise.
2188: * @see java.lang.Character#isHighSurrogate(char)
2189: * @since 1.5
2190: */
2191: public static boolean isLowSurrogate(char ch) {
2192: return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
2193: }
2194:
2195: /**
2196: * Determines whether the specified pair of <code>char</code>
2197: * values is a valid surrogate pair. This method is equivalent to
2198: * the expression:
2199: * <blockquote><pre>
2200: * isHighSurrogate(high) && isLowSurrogate(low)
2201: * </pre></blockquote>
2202: *
2203: * @param high the high-surrogate code value to be tested
2204: * @param low the low-surrogate code value to be tested
2205: * @return <code>true</code> if the specified high and
2206: * low-surrogate code values represent a valid surrogate pair;
2207: * <code>false</code> otherwise.
2208: * @since 1.5
2209: */
2210: public static boolean isSurrogatePair(char high, char low) {
2211: return isHighSurrogate(high) && isLowSurrogate(low);
2212: }
2213:
2214: /**
2215: * Determines the number of <code>char</code> values needed to
2216: * represent the specified character (Unicode code point). If the
2217: * specified character is equal to or greater than 0x10000, then
2218: * the method returns 2. Otherwise, the method returns 1.
2219: *
2220: * <p>This method doesn't validate the specified character to be a
2221: * valid Unicode code point. The caller must validate the
2222: * character value using {@link #isValidCodePoint(int) isValidCodePoint}
2223: * if necessary.
2224: *
2225: * @param codePoint the character (Unicode code point) to be tested.
2226: * @return 2 if the character is a valid supplementary character; 1 otherwise.
2227: * @see #isSupplementaryCodePoint(int)
2228: * @since 1.5
2229: */
2230: public static int charCount(int codePoint) {
2231: return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
2232: }
2233:
2234: /**
2235: * Converts the specified surrogate pair to its supplementary code
2236: * point value. This method does not validate the specified
2237: * surrogate pair. The caller must validate it using {@link
2238: * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
2239: *
2240: * @param high the high-surrogate code unit
2241: * @param low the low-surrogate code unit
2242: * @return the supplementary code point composed from the
2243: * specified surrogate pair.
2244: * @since 1.5
2245: */
2246: public static int toCodePoint(char high, char low) {
2247: return ((high - MIN_HIGH_SURROGATE) << 10)
2248: + (low - MIN_LOW_SURROGATE)
2249: + MIN_SUPPLEMENTARY_CODE_POINT;
2250: }
2251:
2252: /**
2253: * Returns the code point at the given index of the
2254: * <code>CharSequence</code>. If the <code>char</code> value at
2255: * the given index in the <code>CharSequence</code> is in the
2256: * high-surrogate range, the following index is less than the
2257: * length of the <code>CharSequence</code>, and the
2258: * <code>char</code> value at the following index is in the
2259: * low-surrogate range, then the supplementary code point
2260: * corresponding to this surrogate pair is returned. Otherwise,
2261: * the <code>char</code> value at the given index is returned.
2262: *
2263: * @param seq a sequence of <code>char</code> values (Unicode code
2264: * units)
2265: * @param index the index to the <code>char</code> values (Unicode
2266: * code units) in <code>seq</code> to be converted
2267: * @return the Unicode code point at the given index
2268: * @exception NullPointerException if <code>seq</code> is null.
2269: * @exception IndexOutOfBoundsException if the value
2270: * <code>index</code> is negative or not less than
2271: * {@link CharSequence#length() seq.length()}.
2272: * @since 1.5
2273: */
2274: public static int codePointAt(CharSequence seq, int index) {
2275: char c1 = seq.charAt(index++);
2276: if (isHighSurrogate(c1)) {
2277: if (index < seq.length()) {
2278: char c2 = seq.charAt(index);
2279: if (isLowSurrogate(c2)) {
2280: return toCodePoint(c1, c2);
2281: }
2282: }
2283: }
2284: return c1;
2285: }
2286:
2287: /**
2288: * Returns the code point at the given index of the
2289: * <code>char</code> array. If the <code>char</code> value at
2290: * the given index in the <code>char</code> array is in the
2291: * high-surrogate range, the following index is less than the
2292: * length of the <code>char</code> array, and the
2293: * <code>char</code> value at the following index is in the
2294: * low-surrogate range, then the supplementary code point
2295: * corresponding to this surrogate pair is returned. Otherwise,
2296: * the <code>char</code> value at the given index is returned.
2297: *
2298: * @param a the <code>char</code> array
2299: * @param index the index to the <code>char</code> values (Unicode
2300: * code units) in the <code>char</code> array to be converted
2301: * @return the Unicode code point at the given index
2302: * @exception NullPointerException if <code>a</code> is null.
2303: * @exception IndexOutOfBoundsException if the value
2304: * <code>index</code> is negative or not less than
2305: * the length of the <code>char</code> array.
2306: * @since 1.5
2307: */
2308: public static int codePointAt(char[] a, int index) {
2309: return codePointAtImpl(a, index, a.length);
2310: }
2311:
2312: /**
2313: * Returns the code point at the given index of the
2314: * <code>char</code> array, where only array elements with
2315: * <code>index</code> less than <code>limit</code> can be used. If
2316: * the <code>char</code> value at the given index in the
2317: * <code>char</code> array is in the high-surrogate range, the
2318: * following index is less than the <code>limit</code>, and the
2319: * <code>char</code> value at the following index is in the
2320: * low-surrogate range, then the supplementary code point
2321: * corresponding to this surrogate pair is returned. Otherwise,
2322: * the <code>char</code> value at the given index is returned.
2323: *
2324: * @param a the <code>char</code> array
2325: * @param index the index to the <code>char</code> values (Unicode
2326: * code units) in the <code>char</code> array to be converted
2327: * @param limit the index after the last array element that can be used in the
2328: * <code>char</code> array
2329: * @return the Unicode code point at the given index
2330: * @exception NullPointerException if <code>a</code> is null.
2331: * @exception IndexOutOfBoundsException if the <code>index</code>
2332: * argument is negative or not less than the <code>limit</code>
2333: * argument, or if the <code>limit</code> argument is negative or
2334: * greater than the length of the <code>char</code> array.
2335: * @since 1.5
2336: */
2337: public static int codePointAt(char[] a, int index, int limit) {
2338: if (index >= limit || limit < 0 || limit > a.length) {
2339: throw new IndexOutOfBoundsException();
2340: }
2341: return codePointAtImpl(a, index, limit);
2342: }
2343:
2344: static int codePointAtImpl(char[] a, int index, int limit) {
2345: char c1 = a[index++];
2346: if (isHighSurrogate(c1)) {
2347: if (index < limit) {
2348: char c2 = a[index];
2349: if (isLowSurrogate(c2)) {
2350: return toCodePoint(c1, c2);
2351: }
2352: }
2353: }
2354: return c1;
2355: }
2356:
2357: /**
2358: * Returns the code point preceding the given index of the
2359: * <code>CharSequence</code>. If the <code>char</code> value at
2360: * <code>(index - 1)</code> in the <code>CharSequence</code> is in
2361: * the low-surrogate range, <code>(index - 2)</code> is not
2362: * negative, and the <code>char</code> value at <code>(index -
2363: * 2)</code> in the <code>CharSequence</code> is in the
2364: * high-surrogate range, then the supplementary code point
2365: * corresponding to this surrogate pair is returned. Otherwise,
2366: * the <code>char</code> value at <code>(index - 1)</code> is
2367: * returned.
2368: *
2369: * @param seq the <code>CharSequence</code> instance
2370: * @param index the index following the code point that should be returned
2371: * @return the Unicode code point value before the given index.
2372: * @exception NullPointerException if <code>seq</code> is null.
2373: * @exception IndexOutOfBoundsException if the <code>index</code>
2374: * argument is less than 1 or greater than {@link
2375: * CharSequence#length() seq.length()}.
2376: * @since 1.5
2377: */
2378: public static int codePointBefore(CharSequence seq, int index) {
2379: char c2 = seq.charAt(--index);
2380: if (isLowSurrogate(c2)) {
2381: if (index > 0) {
2382: char c1 = seq.charAt(--index);
2383: if (isHighSurrogate(c1)) {
2384: return toCodePoint(c1, c2);
2385: }
2386: }
2387: }
2388: return c2;
2389: }
2390:
2391: /**
2392: * Returns the code point preceding the given index of the
2393: * <code>char</code> array. If the <code>char</code> value at
2394: * <code>(index - 1)</code> in the <code>char</code> array is in
2395: * the low-surrogate range, <code>(index - 2)</code> is not
2396: * negative, and the <code>char</code> value at <code>(index -
2397: * 2)</code> in the <code>char</code> array is in the
2398: * high-surrogate range, then the supplementary code point
2399: * corresponding to this surrogate pair is returned. Otherwise,
2400: * the <code>char</code> value at <code>(index - 1)</code> is
2401: * returned.
2402: *
2403: * @param a the <code>char</code> array
2404: * @param index the index following the code point that should be returned
2405: * @return the Unicode code point value before the given index.
2406: * @exception NullPointerException if <code>a</code> is null.
2407: * @exception IndexOutOfBoundsException if the <code>index</code>
2408: * argument is less than 1 or greater than the length of the
2409: * <code>char</code> array
2410: * @since 1.5
2411: */
2412: public static int codePointBefore(char[] a, int index) {
2413: return codePointBeforeImpl(a, index, 0);
2414: }
2415:
2416: /**
2417: * Returns the code point preceding the given index of the
2418: * <code>char</code> array, where only array elements with
2419: * <code>index</code> greater than or equal to <code>start</code>
2420: * can be used. If the <code>char</code> value at <code>(index -
2421: * 1)</code> in the <code>char</code> array is in the
2422: * low-surrogate range, <code>(index - 2)</code> is not less than
2423: * <code>start</code>, and the <code>char</code> value at
2424: * <code>(index - 2)</code> in the <code>char</code> array is in
2425: * the high-surrogate range, then the supplementary code point
2426: * corresponding to this surrogate pair is returned. Otherwise,
2427: * the <code>char</code> value at <code>(index - 1)</code> is
2428: * returned.
2429: *
2430: * @param a the <code>char</code> array
2431: * @param index the index following the code point that should be returned
2432: * @param start the index of the first array element in the
2433: * <code>char</code> array
2434: * @return the Unicode code point value before the given index.
2435: * @exception NullPointerException if <code>a</code> is null.
2436: * @exception IndexOutOfBoundsException if the <code>index</code>
2437: * argument is not greater than the <code>start</code> argument or
2438: * is greater than the length of the <code>char</code> array, or
2439: * if the <code>start</code> argument is negative or not less than
2440: * the length of the <code>char</code> array.
2441: * @since 1.5
2442: */
2443: public static int codePointBefore(char[] a, int index, int start) {
2444: if (index <= start || start < 0 || start >= a.length) {
2445: throw new IndexOutOfBoundsException();
2446: }
2447: return codePointBeforeImpl(a, index, start);
2448: }
2449:
2450: static int codePointBeforeImpl(char[] a, int index, int start) {
2451: char c2 = a[--index];
2452: if (isLowSurrogate(c2)) {
2453: if (index > start) {
2454: char c1 = a[--index];
2455: if (isHighSurrogate(c1)) {
2456: return toCodePoint(c1, c2);
2457: }
2458: }
2459: }
2460: return c2;
2461: }
2462:
2463: /**
2464: * Converts the specified character (Unicode code point) to its
2465: * UTF-16 representation. If the specified code point is a BMP
2466: * (Basic Multilingual Plane or Plane 0) value, the same value is
2467: * stored in <code>dst[dstIndex]</code>, and 1 is returned. If the
2468: * specified code point is a supplementary character, its
2469: * surrogate values are stored in <code>dst[dstIndex]</code>
2470: * (high-surrogate) and <code>dst[dstIndex+1]</code>
2471: * (low-surrogate), and 2 is returned.
2472: *
2473: * @param codePoint the character (Unicode code point) to be converted.
2474: * @param dst an array of <code>char</code> in which the
2475: * <code>codePoint</code>'s UTF-16 value is stored.
2476: * @param dstIndex the start index into the <code>dst</code>
2477: * array where the converted value is stored.
2478: * @return 1 if the code point is a BMP code point, 2 if the
2479: * code point is a supplementary code point.
2480: * @exception IllegalArgumentException if the specified
2481: * <code>codePoint</code> is not a valid Unicode code point.
2482: * @exception NullPointerException if the specified <code>dst</code> is null.
2483: * @exception IndexOutOfBoundsException if <code>dstIndex</code>
2484: * is negative or not less than <code>dst.length</code>, or if
2485: * <code>dst</code> at <code>dstIndex</code> doesn't have enough
2486: * array element(s) to store the resulting <code>char</code>
2487: * value(s). (If <code>dstIndex</code> is equal to
2488: * <code>dst.length-1</code> and the specified
2489: * <code>codePoint</code> is a supplementary character, the
2490: * high-surrogate value is not stored in
2491: * <code>dst[dstIndex]</code>.)
2492: * @since 1.5
2493: */
2494: public static int toChars(int codePoint, char[] dst, int dstIndex) {
2495: if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
2496: throw new IllegalArgumentException();
2497: }
2498: if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
2499: dst[dstIndex] = (char) codePoint;
2500: return 1;
2501: }
2502: toSurrogates(codePoint, dst, dstIndex);
2503: return 2;
2504: }
2505:
2506: /**
2507: * Converts the specified character (Unicode code point) to its
2508: * UTF-16 representation stored in a <code>char</code> array. If
2509: * the specified code point is a BMP (Basic Multilingual Plane or
2510: * Plane 0) value, the resulting <code>char</code> array has
2511: * the same value as <code>codePoint</code>. If the specified code
2512: * point is a supplementary code point, the resulting
2513: * <code>char</code> array has the corresponding surrogate pair.
2514: *
2515: * @param codePoint a Unicode code point
2516: * @return a <code>char</code> array having
2517: * <code>codePoint</code>'s UTF-16 representation.
2518: * @exception IllegalArgumentException if the specified
2519: * <code>codePoint</code> is not a valid Unicode code point.
2520: * @since 1.5
2521: */
2522: public static char[] toChars(int codePoint) {
2523: if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
2524: throw new IllegalArgumentException();
2525: }
2526: if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
2527: return new char[] { (char) codePoint };
2528: }
2529: char[] result = new char[2];
2530: toSurrogates(codePoint, result, 0);
2531: return result;
2532: }
2533:
2534: static void toSurrogates(int codePoint, char[] dst, int index) {
2535: int offset = codePoint - MIN_SUPPLEMENTARY_CODE_POINT;
2536: dst[index + 1] = (char) ((offset & 0x3ff) + MIN_LOW_SURROGATE);
2537: dst[index] = (char) ((offset >>> 10) + MIN_HIGH_SURROGATE);
2538: }
2539:
2540: /**
2541: * Returns the number of Unicode code points in the text range of
2542: * the specified char sequence. The text range begins at the
2543: * specified <code>beginIndex</code> and extends to the
2544: * <code>char</code> at index <code>endIndex - 1</code>. Thus the
2545: * length (in <code>char</code>s) of the text range is
2546: * <code>endIndex-beginIndex</code>. Unpaired surrogates within
2547: * the text range count as one code point each.
2548: *
2549: * @param seq the char sequence
2550: * @param beginIndex the index to the first <code>char</code> of
2551: * the text range.
2552: * @param endIndex the index after the last <code>char</code> of
2553: * the text range.
2554: * @return the number of Unicode code points in the specified text
2555: * range
2556: * @exception NullPointerException if <code>seq</code> is null.
2557: * @exception IndexOutOfBoundsException if the
2558: * <code>beginIndex</code> is negative, or <code>endIndex</code>
2559: * is larger than the length of the given sequence, or
2560: * <code>beginIndex</code> is larger than <code>endIndex</code>.
2561: * @since 1.5
2562: */
2563: public static int codePointCount(CharSequence seq, int beginIndex,
2564: int endIndex) {
2565: int length = seq.length();
2566: if (beginIndex < 0 || endIndex > length
2567: || beginIndex > endIndex) {
2568: throw new IndexOutOfBoundsException();
2569: }
2570: int n = 0;
2571: for (int i = beginIndex; i < endIndex;) {
2572: n++;
2573: if (isHighSurrogate(seq.charAt(i++))) {
2574: if (i < endIndex && isLowSurrogate(seq.charAt(i))) {
2575: i++;
2576: }
2577: }
2578: }
2579: return n;
2580: }
2581:
2582: /**
2583: * Returns the number of Unicode code points in a subarray of the
2584: * <code>char</code> array argument. The <code>offset</code>
2585: * argument is the index of the first <code>char</code> of the
2586: * subarray and the <code>count</code> argument specifies the
2587: * length of the subarray in <code>char</code>s. Unpaired
2588: * surrogates within the subarray count as one code point each.
2589: *
2590: * @param a the <code>char</code> array
2591: * @param offset the index of the first <code>char</code> in the
2592: * given <code>char</code> array
2593: * @param count the length of the subarray in <code>char</code>s
2594: * @return the number of Unicode code points in the specified subarray
2595: * @exception NullPointerException if <code>a</code> is null.
2596: * @exception IndexOutOfBoundsException if <code>offset</code> or
2597: * <code>count</code> is negative, or if <code>offset +
2598: * count</code> is larger than the length of the given array.
2599: * @since 1.5
2600: */
2601: public static int codePointCount(char[] a, int offset, int count) {
2602: if (count > a.length - offset || offset < 0 || count < 0) {
2603: throw new IndexOutOfBoundsException();
2604: }
2605: return codePointCountImpl(a, offset, count);
2606: }
2607:
2608: static int codePointCountImpl(char[] a, int offset, int count) {
2609: int endIndex = offset + count;
2610: int n = 0;
2611: for (int i = offset; i < endIndex;) {
2612: n++;
2613: if (isHighSurrogate(a[i++])) {
2614: if (i < endIndex && isLowSurrogate(a[i])) {
2615: i++;
2616: }
2617: }
2618: }
2619: return n;
2620: }
2621:
2622: /**
2623: * Returns the index within the given char sequence that is offset
2624: * from the given <code>index</code> by <code>codePointOffset</code>
2625: * code points. Unpaired surrogates within the text range given by
2626: * <code>index</code> and <code>codePointOffset</code> count as
2627: * one code point each.
2628: *
2629: * @param seq the char sequence
2630: * @param index the index to be offset
2631: * @param codePointOffset the offset in code points
2632: * @return the index within the char sequence
2633: * @exception NullPointerException if <code>seq</code> is null.
2634: * @exception IndexOutOfBoundsException if <code>index</code>
2635: * is negative or larger then the length of the char sequence,
2636: * or if <code>codePointOffset</code> is positive and the
2637: * subsequence starting with <code>index</code> has fewer than
2638: * <code>codePointOffset</code> code points, or if
2639: * <code>codePointOffset</code> is negative and the subsequence
2640: * before <code>index</code> has fewer than the absolute value
2641: * of <code>codePointOffset</code> code points.
2642: * @since 1.5
2643: */
2644: public static int offsetByCodePoints(CharSequence seq, int index,
2645: int codePointOffset) {
2646: int length = seq.length();
2647: if (index < 0 || index > length) {
2648: throw new IndexOutOfBoundsException();
2649: }
2650:
2651: int x = index;
2652: if (codePointOffset >= 0) {
2653: int i;
2654: for (i = 0; x < length && i < codePointOffset; i++) {
2655: if (isHighSurrogate(seq.charAt(x++))) {
2656: if (x < length && isLowSurrogate(seq.charAt(x))) {
2657: x++;
2658: }
2659: }
2660: }
2661: if (i < codePointOffset) {
2662: throw new IndexOutOfBoundsException();
2663: }
2664: } else {
2665: int i;
2666: for (i = codePointOffset; x > 0 && i < 0; i++) {
2667: if (isLowSurrogate(seq.charAt(--x))) {
2668: if (x > 0 && isHighSurrogate(seq.charAt(x - 1))) {
2669: x--;
2670: }
2671: }
2672: }
2673: if (i < 0) {
2674: throw new IndexOutOfBoundsException();
2675: }
2676: }
2677: return x;
2678: }
2679:
2680: /**
2681: * Returns the index within the given <code>char</code> subarray
2682: * that is offset from the given <code>index</code> by
2683: * <code>codePointOffset</code> code points. The
2684: * <code>start</code> and <code>count</code> arguments specify a
2685: * subarray of the <code>char</code> array. Unpaired surrogates
2686: * within the text range given by <code>index</code> and
2687: * <code>codePointOffset</code> count as one code point each.
2688: *
2689: * @param a the <code>char</code> array
2690: * @param start the index of the first <code>char</code> of the
2691: * subarray
2692: * @param count the length of the subarray in <code>char</code>s
2693: * @param index the index to be offset
2694: * @param codePointOffset the offset in code points
2695: * @return the index within the subarray
2696: * @exception NullPointerException if <code>a</code> is null.
2697: * @exception IndexOutOfBoundsException
2698: * if <code>start</code> or <code>count</code> is negative,
2699: * or if <code>start + count</code> is larger than the length of
2700: * the given array,
2701: * or if <code>index</code> is less than <code>start</code> or
2702: * larger then <code>start + count</code>,
2703: * or if <code>codePointOffset</code> is positive and the text range
2704: * starting with <code>index</code> and ending with <code>start
2705: * + count - 1</code> has fewer than <code>codePointOffset</code> code
2706: * points,
2707: * or if <code>codePointOffset</code> is negative and the text range
2708: * starting with <code>start</code> and ending with <code>index
2709: * - 1</code> has fewer than the absolute value of
2710: * <code>codePointOffset</code> code points.
2711: * @since 1.5
2712: */
2713: public static int offsetByCodePoints(char[] a, int start,
2714: int count, int index, int codePointOffset) {
2715: if (count > a.length - start || start < 0 || count < 0
2716: || index < start || index > start + count) {
2717: throw new IndexOutOfBoundsException();
2718: }
2719: return offsetByCodePointsImpl(a, start, count, index,
2720: codePointOffset);
2721: }
2722:
2723: static int offsetByCodePointsImpl(char[] a, int start, int count,
2724: int index, int codePointOffset) {
2725: int x = index;
2726: if (codePointOffset >= 0) {
2727: int limit = start + count;
2728: int i;
2729: for (i = 0; x < limit && i < codePointOffset; i++) {
2730: if (isHighSurrogate(a[x++])) {
2731: if (x < limit && isLowSurrogate(a[x])) {
2732: x++;
2733: }
2734: }
2735: }
2736: if (i < codePointOffset) {
2737: throw new IndexOutOfBoundsException();
2738: }
2739: } else {
2740: int i;
2741: for (i = codePointOffset; x > start && i < 0; i++) {
2742: if (isLowSurrogate(a[--x])) {
2743: if (x > start && isHighSurrogate(a[x - 1])) {
2744: x--;
2745: }
2746: }
2747: }
2748: if (i < 0) {
2749: throw new IndexOutOfBoundsException();
2750: }
2751: }
2752: return x;
2753: }
2754:
2755: /**
2756: * Determines if the specified character is a lowercase character.
2757: * <p>
2758: * A character is lowercase if its general category type, provided
2759: * by <code>Character.getType(ch)</code>, is
2760: * <code>LOWERCASE_LETTER</code>.
2761: * <p>
2762: * The following are examples of lowercase characters:
2763: * <p><blockquote><pre>
2764: * a b c d e f g h i j k l m n o p q r s t u v w x y z
2765: * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
2766: * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
2767: * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
2768: * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
2769: * </pre></blockquote>
2770: * <p> Many other Unicode characters are lowercase too.
2771: *
2772: * <p><b>Note:</b> This method cannot handle <a
2773: * href="#supplementary"> supplementary characters</a>. To support
2774: * all Unicode characters, including supplementary characters, use
2775: * the {@link #isLowerCase(int)} method.
2776: *
2777: * @param ch the character to be tested.
2778: * @return <code>true</code> if the character is lowercase;
2779: * <code>false</code> otherwise.
2780: * @see java.lang.Character#isLowerCase(char)
2781: * @see java.lang.Character#isTitleCase(char)
2782: * @see java.lang.Character#toLowerCase(char)
2783: * @see java.lang.Character#getType(char)
2784: */
2785: public static boolean isLowerCase(char ch) {
2786: return isLowerCase((int) ch);
2787: }
2788:
2789: /**
2790: * Determines if the specified character (Unicode code point) is a
2791: * lowercase character.
2792: * <p>
2793: * A character is lowercase if its general category type, provided
2794: * by {@link Character#getType getType(codePoint)}, is
2795: * <code>LOWERCASE_LETTER</code>.
2796: * <p>
2797: * The following are examples of lowercase characters:
2798: * <p><blockquote><pre>
2799: * a b c d e f g h i j k l m n o p q r s t u v w x y z
2800: * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
2801: * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
2802: * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
2803: * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
2804: * </pre></blockquote>
2805: * <p> Many other Unicode characters are lowercase too.
2806: *
2807: * @param codePoint the character (Unicode code point) to be tested.
2808: * @return <code>true</code> if the character is lowercase;
2809: * <code>false</code> otherwise.
2810: * @see java.lang.Character#isLowerCase(int)
2811: * @see java.lang.Character#isTitleCase(int)
2812: * @see java.lang.Character#toLowerCase(int)
2813: * @see java.lang.Character#getType(int)
2814: * @since 1.5
2815: */
2816: public static boolean isLowerCase(int codePoint) {
2817: return getType(codePoint) == Character.LOWERCASE_LETTER;
2818: }
2819:
2820: /**
2821: * Determines if the specified character is an uppercase character.
2822: * <p>
2823: * A character is uppercase if its general category type, provided by
2824: * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>.
2825: * <p>
2826: * The following are examples of uppercase characters:
2827: * <p><blockquote><pre>
2828: * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
2829: * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
2830: * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
2831: * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
2832: * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
2833: * </pre></blockquote>
2834: * <p> Many other Unicode characters are uppercase too.<p>
2835: *
2836: * <p><b>Note:</b> This method cannot handle <a
2837: * href="#supplementary"> supplementary characters</a>. To support
2838: * all Unicode characters, including supplementary characters, use
2839: * the {@link #isUpperCase(int)} method.
2840: *
2841: * @param ch the character to be tested.
2842: * @return <code>true</code> if the character is uppercase;
2843: * <code>false</code> otherwise.
2844: * @see java.lang.Character#isLowerCase(char)
2845: * @see java.lang.Character#isTitleCase(char)
2846: * @see java.lang.Character#toUpperCase(char)
2847: * @see java.lang.Character#getType(char)
2848: * @since 1.0
2849: */
2850: public static boolean isUpperCase(char ch) {
2851: return isUpperCase((int) ch);
2852: }
2853:
2854: /**
2855: * Determines if the specified character (Unicode code point) is an uppercase character.
2856: * <p>
2857: * A character is uppercase if its general category type, provided by
2858: * {@link Character#getType(int) getType(codePoint)}, is <code>UPPERCASE_LETTER</code>.
2859: * <p>
2860: * The following are examples of uppercase characters:
2861: * <p><blockquote><pre>
2862: * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
2863: * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
2864: * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
2865: * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
2866: * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
2867: * </pre></blockquote>
2868: * <p> Many other Unicode characters are uppercase too.<p>
2869: *
2870: * @param codePoint the character (Unicode code point) to be tested.
2871: * @return <code>true</code> if the character is uppercase;
2872: * <code>false</code> otherwise.
2873: * @see java.lang.Character#isLowerCase(int)
2874: * @see java.lang.Character#isTitleCase(int)
2875: * @see java.lang.Character#toUpperCase(int)
2876: * @see java.lang.Character#getType(int)
2877: * @since 1.5
2878: */
2879: public static boolean isUpperCase(int codePoint) {
2880: return getType(codePoint) == Character.UPPERCASE_LETTER;
2881: }
2882:
2883: /**
2884: * Determines if the specified character is a titlecase character.
2885: * <p>
2886: * A character is a titlecase character if its general
2887: * category type, provided by <code>Character.getType(ch)</code>,
2888: * is <code>TITLECASE_LETTER</code>.
2889: * <p>
2890: * Some characters look like pairs of Latin letters. For example, there
2891: * is an uppercase letter that looks like "LJ" and has a corresponding
2892: * lowercase letter that looks like "lj". A third form, which looks like "Lj",
2893: * is the appropriate form to use when rendering a word in lowercase
2894: * with initial capitals, as for a book title.
2895: * <p>
2896: * These are some of the Unicode characters for which this method returns
2897: * <code>true</code>:
2898: * <ul>
2899: * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
2900: * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
2901: * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
2902: * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
2903: * </ul>
2904: * <p> Many other Unicode characters are titlecase too.<p>
2905: *
2906: * <p><b>Note:</b> This method cannot handle <a
2907: * href="#supplementary"> supplementary characters</a>. To support
2908: * all Unicode characters, including supplementary characters, use
2909: * the {@link #isTitleCase(int)} method.
2910: *
2911: * @param ch the character to be tested.
2912: * @return <code>true</code> if the character is titlecase;
2913: * <code>false</code> otherwise.
2914: * @see java.lang.Character#isLowerCase(char)
2915: * @see java.lang.Character#isUpperCase(char)
2916: * @see java.lang.Character#toTitleCase(char)
2917: * @see java.lang.Character#getType(char)
2918: * @since 1.0.2
2919: */
2920: public static boolean isTitleCase(char ch) {
2921: return isTitleCase((int) ch);
2922: }
2923:
2924: /**
2925: * Determines if the specified character (Unicode code point) is a titlecase character.
2926: * <p>
2927: * A character is a titlecase character if its general
2928: * category type, provided by {@link Character#getType(int) getType(codePoint)},
2929: * is <code>TITLECASE_LETTER</code>.
2930: * <p>
2931: * Some characters look like pairs of Latin letters. For example, there
2932: * is an uppercase letter that looks like "LJ" and has a corresponding
2933: * lowercase letter that looks like "lj". A third form, which looks like "Lj",
2934: * is the appropriate form to use when rendering a word in lowercase
2935: * with initial capitals, as for a book title.
2936: * <p>
2937: * These are some of the Unicode characters for which this method returns
2938: * <code>true</code>:
2939: * <ul>
2940: * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
2941: * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
2942: * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
2943: * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
2944: * </ul>
2945: * <p> Many other Unicode characters are titlecase too.<p>
2946: *
2947: * @param codePoint the character (Unicode code point) to be tested.
2948: * @return <code>true</code> if the character is titlecase;
2949: * <code>false</code> otherwise.
2950: * @see java.lang.Character#isLowerCase(int)
2951: * @see java.lang.Character#isUpperCase(int)
2952: * @see java.lang.Character#toTitleCase(int)
2953: * @see java.lang.Character#getType(int)
2954: * @since 1.5
2955: */
2956: public static boolean isTitleCase(int codePoint) {
2957: return getType(codePoint) == Character.TITLECASE_LETTER;
2958: }
2959:
2960: /**
2961: * Determines if the specified character is a digit.
2962: * <p>
2963: * A character is a digit if its general category type, provided
2964: * by <code>Character.getType(ch)</code>, is
2965: * <code>DECIMAL_DIGIT_NUMBER</code>.
2966: * <p>
2967: * Some Unicode character ranges that contain digits:
2968: * <ul>
2969: * <li><code>'\u0030'</code> through <code>'\u0039'</code>,
2970: * ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
2971: * <li><code>'\u0660'</code> through <code>'\u0669'</code>,
2972: * Arabic-Indic digits
2973: * <li><code>'\u06F0'</code> through <code>'\u06F9'</code>,
2974: * Extended Arabic-Indic digits
2975: * <li><code>'\u0966'</code> through <code>'\u096F'</code>,
2976: * Devanagari digits
2977: * <li><code>'\uFF10'</code> through <code>'\uFF19'</code>,
2978: * Fullwidth digits
2979: * </ul>
2980: *
2981: * Many other character ranges contain digits as well.
2982: *
2983: * <p><b>Note:</b> This method cannot handle <a
2984: * href="#supplementary"> supplementary characters</a>. To support
2985: * all Unicode characters, including supplementary characters, use
2986: * the {@link #isDigit(int)} method.
2987: *
2988: * @param ch the character to be tested.
2989: * @return <code>true</code> if the character is a digit;
2990: * <code>false</code> otherwise.
2991: * @see java.lang.Character#digit(char, int)
2992: * @see java.lang.Character#forDigit(int, int)
2993: * @see java.lang.Character#getType(char)
2994: */
2995: public static boolean isDigit(char ch) {
2996: return isDigit((int) ch);
2997: }
2998:
2999: /**
3000: * Determines if the specified character (Unicode code point) is a digit.
3001: * <p>
3002: * A character is a digit if its general category type, provided
3003: * by {@link Character#getType(int) getType(codePoint)}, is
3004: * <code>DECIMAL_DIGIT_NUMBER</code>.
3005: * <p>
3006: * Some Unicode character ranges that contain digits:
3007: * <ul>
3008: * <li><code>'\u0030'</code> through <code>'\u0039'</code>,
3009: * ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
3010: * <li><code>'\u0660'</code> through <code>'\u0669'</code>,
3011: * Arabic-Indic digits
3012: * <li><code>'\u06F0'</code> through <code>'\u06F9'</code>,
3013: * Extended Arabic-Indic digits
3014: * <li><code>'\u0966'</code> through <code>'\u096F'</code>,
3015: * Devanagari digits
3016: * <li><code>'\uFF10'</code> through <code>'\uFF19'</code>,
3017: * Fullwidth digits
3018: * </ul>
3019: *
3020: * Many other character ranges contain digits as well.
3021: *
3022: * @param codePoint the character (Unicode code point) to be tested.
3023: * @return <code>true</code> if the character is a digit;
3024: * <code>false</code> otherwise.
3025: * @see java.lang.Character#forDigit(int, int)
3026: * @see java.lang.Character#getType(int)
3027: * @since 1.5
3028: */
3029: public static boolean isDigit(int codePoint) {
3030: return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
3031: }
3032:
3033: /**
3034: * Determines if a character is defined in Unicode.
3035: * <p>
3036: * A character is defined if at least one of the following is true:
3037: * <ul>
3038: * <li>It has an entry in the UnicodeData file.
3039: * <li>It has a value in a range defined by the UnicodeData file.
3040: * </ul>
3041: *
3042: * <p><b>Note:</b> This method cannot handle <a
3043: * href="#supplementary"> supplementary characters</a>. To support
3044: * all Unicode characters, including supplementary characters, use
3045: * the {@link #isDefined(int)} method.
3046: *
3047: * @param ch the character to be tested
3048: * @return <code>true</code> if the character has a defined meaning
3049: * in Unicode; <code>false</code> otherwise.
3050: * @see java.lang.Character#isDigit(char)
3051: * @see java.lang.Character#isLetter(char)
3052: * @see java.lang.Character#isLetterOrDigit(char)
3053: * @see java.lang.Character#isLowerCase(char)
3054: * @see java.lang.Character#isTitleCase(char)
3055: * @see java.lang.Character#isUpperCase(char)
3056: * @since 1.0.2
3057: */
3058: public static boolean isDefined(char ch) {
3059: return isDefined((int) ch);
3060: }
3061:
3062: /**
3063: * Determines if a character (Unicode code point) is defined in Unicode.
3064: * <p>
3065: * A character is defined if at least one of the following is true:
3066: * <ul>
3067: * <li>It has an entry in the UnicodeData file.
3068: * <li>It has a value in a range defined by the UnicodeData file.
3069: * </ul>
3070: *
3071: * @param codePoint the character (Unicode code point) to be tested.
3072: * @return <code>true</code> if the character has a defined meaning
3073: * in Unicode; <code>false</code> otherwise.
3074: * @see java.lang.Character#isDigit(int)
3075: * @see java.lang.Character#isLetter(int)
3076: * @see java.lang.Character#isLetterOrDigit(int)
3077: * @see java.lang.Character#isLowerCase(int)
3078: * @see java.lang.Character#isTitleCase(int)
3079: * @see java.lang.Character#isUpperCase(int)
3080: * @since 1.5
3081: */
3082: public static boolean isDefined(int codePoint) {
3083: return getType(codePoint) != Character.UNASSIGNED;
3084: }
3085:
3086: /**
3087: * Determines if the specified character is a letter.
3088: * <p>
3089: * A character is considered to be a letter if its general
3090: * category type, provided by <code>Character.getType(ch)</code>,
3091: * is any of the following:
3092: * <ul>
3093: * <li> <code>UPPERCASE_LETTER</code>
3094: * <li> <code>LOWERCASE_LETTER</code>
3095: * <li> <code>TITLECASE_LETTER</code>
3096: * <li> <code>MODIFIER_LETTER</code>
3097: * <li> <code>OTHER_LETTER</code>
3098: * </ul>
3099: *
3100: * Not all letters have case. Many characters are
3101: * letters but are neither uppercase nor lowercase nor titlecase.
3102: *
3103: * <p><b>Note:</b> This method cannot handle <a
3104: * href="#supplementary"> supplementary characters</a>. To support
3105: * all Unicode characters, including supplementary characters, use
3106: * the {@link #isLetter(int)} method.
3107: *
3108: * @param ch the character to be tested.
3109: * @return <code>true</code> if the character is a letter;
3110: * <code>false</code> otherwise.
3111: * @see java.lang.Character#isDigit(char)
3112: * @see java.lang.Character#isJavaIdentifierStart(char)
3113: * @see java.lang.Character#isJavaLetter(char)
3114: * @see java.lang.Character#isJavaLetterOrDigit(char)
3115: * @see java.lang.Character#isLetterOrDigit(char)
3116: * @see java.lang.Character#isLowerCase(char)
3117: * @see java.lang.Character#isTitleCase(char)
3118: * @see java.lang.Character#isUnicodeIdentifierStart(char)
3119: * @see java.lang.Character#isUpperCase(char)
3120: */
3121: public static boolean isLetter(char ch) {
3122: return isLetter((int) ch);
3123: }
3124:
3125: /**
3126: * Determines if the specified character (Unicode code point) is a letter.
3127: * <p>
3128: * A character is considered to be a letter if its general
3129: * category type, provided by {@link Character#getType(int) getType(codePoint)},
3130: * is any of the following:
3131: * <ul>
3132: * <li> <code>UPPERCASE_LETTER</code>
3133: * <li> <code>LOWERCASE_LETTER</code>
3134: * <li> <code>TITLECASE_LETTER</code>
3135: * <li> <code>MODIFIER_LETTER</code>
3136: * <li> <code>OTHER_LETTER</code>
3137: * </ul>
3138: *
3139: * Not all letters have case. Many characters are
3140: * letters but are neither uppercase nor lowercase nor titlecase.
3141: *
3142: * @param codePoint the character (Unicode code point) to be tested.
3143: * @return <code>true</code> if the character is a letter;
3144: * <code>false</code> otherwise.
3145: * @see java.lang.Character#isDigit(int)
3146: * @see java.lang.Character#isJavaIdentifierStart(int)
3147: * @see java.lang.Character#isLetterOrDigit(int)
3148: * @see java.lang.Character#isLowerCase(int)
3149: * @see java.lang.Character#isTitleCase(int)
3150: * @see java.lang.Character#isUnicodeIdentifierStart(int)
3151: * @see java.lang.Character#isUpperCase(int)
3152: * @since 1.5
3153: */
3154: public static boolean isLetter(int codePoint) {
3155: return ((((1 << Character.UPPERCASE_LETTER)
3156: | (1 << Character.LOWERCASE_LETTER)
3157: | (1 << Character.TITLECASE_LETTER)
3158: | (1 << Character.MODIFIER_LETTER) | (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) != 0;
3159: }
3160:
3161: /**
3162: * Determines if the specified character is a letter or digit.
3163: * <p>
3164: * A character is considered to be a letter or digit if either
3165: * <code>Character.isLetter(char ch)</code> or
3166: * <code>Character.isDigit(char ch)</code> returns
3167: * <code>true</code> for the character.
3168: *
3169: * <p><b>Note:</b> This method cannot handle <a
3170: * href="#supplementary"> supplementary characters</a>. To support
3171: * all Unicode characters, including supplementary characters, use
3172: * the {@link #isLetterOrDigit(int)} method.
3173: *
3174: * @param ch the character to be tested.
3175: * @return <code>true</code> if the character is a letter or digit;
3176: * <code>false</code> otherwise.
3177: * @see java.lang.Character#isDigit(char)
3178: * @see java.lang.Character#isJavaIdentifierPart(char)
3179: * @see java.lang.Character#isJavaLetter(char)
3180: * @see java.lang.Character#isJavaLetterOrDigit(char)
3181: * @see java.lang.Character#isLetter(char)
3182: * @see java.lang.Character#isUnicodeIdentifierPart(char)
3183: * @since 1.0.2
3184: */
3185: public static boolean isLetterOrDigit(char ch) {
3186: return isLetterOrDigit((int) ch);
3187: }
3188:
3189: /**
3190: * Determines if the specified character (Unicode code point) is a letter or digit.
3191: * <p>
3192: * A character is considered to be a letter or digit if either
3193: * {@link #isLetter(int) isLetter(codePoint)} or
3194: * {@link #isDigit(int) isDigit(codePoint)} returns
3195: * <code>true</code> for the character.
3196: *
3197: * @param codePoint the character (Unicode code point) to be tested.
3198: * @return <code>true</code> if the character is a letter or digit;
3199: * <code>false</code> otherwise.
3200: * @see java.lang.Character#isDigit(int)
3201: * @see java.lang.Character#isJavaIdentifierPart(int)
3202: * @see java.lang.Character#isLetter(int)
3203: * @see java.lang.Character#isUnicodeIdentifierPart(int)
3204: * @since 1.5
3205: */
3206: public static boolean isLetterOrDigit(int codePoint) {
3207: return ((((1 << Character.UPPERCASE_LETTER)
3208: | (1 << Character.LOWERCASE_LETTER)
3209: | (1 << Character.TITLECASE_LETTER)
3210: | (1 << Character.MODIFIER_LETTER)
3211: | (1 << Character.OTHER_LETTER) | (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) != 0;
3212: }
3213:
3214: /**
3215: * Determines if the specified character is permissible as the first
3216: * character in a Java identifier.
3217: * <p>
3218: * A character may start a Java identifier if and only if
3219: * one of the following is true:
3220: * <ul>
3221: * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3222: * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
3223: * <li> ch is a currency symbol (such as "$")
3224: * <li> ch is a connecting punctuation character (such as "_").
3225: * </ul>
3226: *
3227: * @param ch the character to be tested.
3228: * @return <code>true</code> if the character may start a Java
3229: * identifier; <code>false</code> otherwise.
3230: * @see java.lang.Character#isJavaLetterOrDigit(char)
3231: * @see java.lang.Character#isJavaIdentifierStart(char)
3232: * @see java.lang.Character#isJavaIdentifierPart(char)
3233: * @see java.lang.Character#isLetter(char)
3234: * @see java.lang.Character#isLetterOrDigit(char)
3235: * @see java.lang.Character#isUnicodeIdentifierStart(char)
3236: * @since 1.02
3237: * @deprecated Replaced by isJavaIdentifierStart(char).
3238: */
3239: @Deprecated
3240: public static boolean isJavaLetter(char ch) {
3241: return isJavaIdentifierStart(ch);
3242: }
3243:
3244: /**
3245: * Determines if the specified character may be part of a Java
3246: * identifier as other than the first character.
3247: * <p>
3248: * A character may be part of a Java identifier if and only if any
3249: * of the following are true:
3250: * <ul>
3251: * <li> it is a letter
3252: * <li> it is a currency symbol (such as <code>'$'</code>)
3253: * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3254: * <li> it is a digit
3255: * <li> it is a numeric letter (such as a Roman numeral character)
3256: * <li> it is a combining mark
3257: * <li> it is a non-spacing mark
3258: * <li> <code>isIdentifierIgnorable</code> returns
3259: * <code>true</code> for the character.
3260: * </ul>
3261: *
3262: * @param ch the character to be tested.
3263: * @return <code>true</code> if the character may be part of a
3264: * Java identifier; <code>false</code> otherwise.
3265: * @see java.lang.Character#isJavaLetter(char)
3266: * @see java.lang.Character#isJavaIdentifierStart(char)
3267: * @see java.lang.Character#isJavaIdentifierPart(char)
3268: * @see java.lang.Character#isLetter(char)
3269: * @see java.lang.Character#isLetterOrDigit(char)
3270: * @see java.lang.Character#isUnicodeIdentifierPart(char)
3271: * @see java.lang.Character#isIdentifierIgnorable(char)
3272: * @since 1.02
3273: * @deprecated Replaced by isJavaIdentifierPart(char).
3274: */
3275: @Deprecated
3276: public static boolean isJavaLetterOrDigit(char ch) {
3277: return isJavaIdentifierPart(ch);
3278: }
3279:
3280: /**
3281: * Determines if the specified character is
3282: * permissible as the first character in a Java identifier.
3283: * <p>
3284: * A character may start a Java identifier if and only if
3285: * one of the following conditions is true:
3286: * <ul>
3287: * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3288: * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
3289: * <li> ch is a currency symbol (such as "$")
3290: * <li> ch is a connecting punctuation character (such as "_").
3291: * </ul>
3292: *
3293: * <p><b>Note:</b> This method cannot handle <a
3294: * href="#supplementary"> supplementary characters</a>. To support
3295: * all Unicode characters, including supplementary characters, use
3296: * the {@link #isJavaIdentifierStart(int)} method.
3297: *
3298: * @param ch the character to be tested.
3299: * @return <code>true</code> if the character may start a Java identifier;
3300: * <code>false</code> otherwise.
3301: * @see java.lang.Character#isJavaIdentifierPart(char)
3302: * @see java.lang.Character#isLetter(char)
3303: * @see java.lang.Character#isUnicodeIdentifierStart(char)
3304: * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3305: * @since 1.1
3306: */
3307: public static boolean isJavaIdentifierStart(char ch) {
3308: return isJavaIdentifierStart((int) ch);
3309: }
3310:
3311: /**
3312: * Determines if the character (Unicode code point) is
3313: * permissible as the first character in a Java identifier.
3314: * <p>
3315: * A character may start a Java identifier if and only if
3316: * one of the following conditions is true:
3317: * <ul>
3318: * <li> {@link #isLetter(int) isLetter(codePoint)}
3319: * returns <code>true</code>
3320: * <li> {@link #getType(int) getType(codePoint)}
3321: * returns <code>LETTER_NUMBER</code>
3322: * <li> the referenced character is a currency symbol (such as "$")
3323: * <li> the referenced character is a connecting punctuation character
3324: * (such as "_").
3325: * </ul>
3326: *
3327: * @param codePoint the character (Unicode code point) to be tested.
3328: * @return <code>true</code> if the character may start a Java identifier;
3329: * <code>false</code> otherwise.
3330: * @see java.lang.Character#isJavaIdentifierPart(int)
3331: * @see java.lang.Character#isLetter(int)
3332: * @see java.lang.Character#isUnicodeIdentifierStart(int)
3333: * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3334: * @since 1.5
3335: */
3336: public static boolean isJavaIdentifierStart(int codePoint) {
3337: return CharacterData.of(codePoint).isJavaIdentifierStart(
3338: codePoint);
3339: }
3340:
3341: /**
3342: * Determines if the specified character may be part of a Java
3343: * identifier as other than the first character.
3344: * <p>
3345: * A character may be part of a Java identifier if any of the following
3346: * are true:
3347: * <ul>
3348: * <li> it is a letter
3349: * <li> it is a currency symbol (such as <code>'$'</code>)
3350: * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3351: * <li> it is a digit
3352: * <li> it is a numeric letter (such as a Roman numeral character)
3353: * <li> it is a combining mark
3354: * <li> it is a non-spacing mark
3355: * <li> <code>isIdentifierIgnorable</code> returns
3356: * <code>true</code> for the character
3357: * </ul>
3358: *
3359: * <p><b>Note:</b> This method cannot handle <a
3360: * href="#supplementary"> supplementary characters</a>. To support
3361: * all Unicode characters, including supplementary characters, use
3362: * the {@link #isJavaIdentifierPart(int)} method.
3363: *
3364: * @param ch the character to be tested.
3365: * @return <code>true</code> if the character may be part of a
3366: * Java identifier; <code>false</code> otherwise.
3367: * @see java.lang.Character#isIdentifierIgnorable(char)
3368: * @see java.lang.Character#isJavaIdentifierStart(char)
3369: * @see java.lang.Character#isLetterOrDigit(char)
3370: * @see java.lang.Character#isUnicodeIdentifierPart(char)
3371: * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3372: * @since 1.1
3373: */
3374: public static boolean isJavaIdentifierPart(char ch) {
3375: return isJavaIdentifierPart((int) ch);
3376: }
3377:
3378: /**
3379: * Determines if the character (Unicode code point) may be part of a Java
3380: * identifier as other than the first character.
3381: * <p>
3382: * A character may be part of a Java identifier if any of the following
3383: * are true:
3384: * <ul>
3385: * <li> it is a letter
3386: * <li> it is a currency symbol (such as <code>'$'</code>)
3387: * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3388: * <li> it is a digit
3389: * <li> it is a numeric letter (such as a Roman numeral character)
3390: * <li> it is a combining mark
3391: * <li> it is a non-spacing mark
3392: * <li> {@link #isIdentifierIgnorable(int)
3393: * isIdentifierIgnorable(codePoint)} returns <code>true</code> for
3394: * the character
3395: * </ul>
3396: *
3397: * @param codePoint the character (Unicode code point) to be tested.
3398: * @return <code>true</code> if the character may be part of a
3399: * Java identifier; <code>false</code> otherwise.
3400: * @see java.lang.Character#isIdentifierIgnorable(int)
3401: * @see java.lang.Character#isJavaIdentifierStart(int)
3402: * @see java.lang.Character#isLetterOrDigit(int)
3403: * @see java.lang.Character#isUnicodeIdentifierPart(int)
3404: * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3405: * @since 1.5
3406: */
3407: public static boolean isJavaIdentifierPart(int codePoint) {
3408: return CharacterData.of(codePoint).isJavaIdentifierPart(
3409: codePoint);
3410: }
3411:
3412: /**
3413: * Determines if the specified character is permissible as the
3414: * first character in a Unicode identifier.
3415: * <p>
3416: * A character may start a Unicode identifier if and only if
3417: * one of the following conditions is true:
3418: * <ul>
3419: * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3420: * <li> {@link #getType(char) getType(ch)} returns
3421: * <code>LETTER_NUMBER</code>.
3422: * </ul>
3423: *
3424: * <p><b>Note:</b> This method cannot handle <a
3425: * href="#supplementary"> supplementary characters</a>. To support
3426: * all Unicode characters, including supplementary characters, use
3427: * the {@link #isUnicodeIdentifierStart(int)} method.
3428: *
3429: * @param ch the character to be tested.
3430: * @return <code>true</code> if the character may start a Unicode
3431: * identifier; <code>false</code> otherwise.
3432: * @see java.lang.Character#isJavaIdentifierStart(char)
3433: * @see java.lang.Character#isLetter(char)
3434: * @see java.lang.Character#isUnicodeIdentifierPart(char)
3435: * @since 1.1
3436: */
3437: public static boolean isUnicodeIdentifierStart(char ch) {
3438: return isUnicodeIdentifierStart((int) ch);
3439: }
3440:
3441: /**
3442: * Determines if the specified character (Unicode code point) is permissible as the
3443: * first character in a Unicode identifier.
3444: * <p>
3445: * A character may start a Unicode identifier if and only if
3446: * one of the following conditions is true:
3447: * <ul>
3448: * <li> {@link #isLetter(int) isLetter(codePoint)}
3449: * returns <code>true</code>
3450: * <li> {@link #getType(int) getType(codePoint)}
3451: * returns <code>LETTER_NUMBER</code>.
3452: * </ul>
3453: * @param codePoint the character (Unicode code point) to be tested.
3454: * @return <code>true</code> if the character may start a Unicode
3455: * identifier; <code>false</code> otherwise.
3456: * @see java.lang.Character#isJavaIdentifierStart(int)
3457: * @see java.lang.Character#isLetter(int)
3458: * @see java.lang.Character#isUnicodeIdentifierPart(int)
3459: * @since 1.5
3460: */
3461: public static boolean isUnicodeIdentifierStart(int codePoint) {
3462: return CharacterData.of(codePoint).isUnicodeIdentifierStart(
3463: codePoint);
3464: }
3465:
3466: /**
3467: * Determines if the specified character may be part of a Unicode
3468: * identifier as other than the first character.
3469: * <p>
3470: * A character may be part of a Unicode identifier if and only if
3471: * one of the following statements is true:
3472: * <ul>
3473: * <li> it is a letter
3474: * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3475: * <li> it is a digit
3476: * <li> it is a numeric letter (such as a Roman numeral character)
3477: * <li> it is a combining mark
3478: * <li> it is a non-spacing mark
3479: * <li> <code>isIdentifierIgnorable</code> returns
3480: * <code>true</code> for this character.
3481: * </ul>
3482: *
3483: * <p><b>Note:</b> This method cannot handle <a
3484: * href="#supplementary"> supplementary characters</a>. To support
3485: * all Unicode characters, including supplementary characters, use
3486: * the {@link #isUnicodeIdentifierPart(int)} method.
3487: *
3488: * @param ch the character to be tested.
3489: * @return <code>true</code> if the character may be part of a
3490: * Unicode identifier; <code>false</code> otherwise.
3491: * @see java.lang.Character#isIdentifierIgnorable(char)
3492: * @see java.lang.Character#isJavaIdentifierPart(char)
3493: * @see java.lang.Character#isLetterOrDigit(char)
3494: * @see java.lang.Character#isUnicodeIdentifierStart(char)
3495: * @since 1.1
3496: */
3497: public static boolean isUnicodeIdentifierPart(char ch) {
3498: return isUnicodeIdentifierPart((int) ch);
3499: }
3500:
3501: /**
3502: * Determines if the specified character (Unicode code point) may be part of a Unicode
3503: * identifier as other than the first character.
3504: * <p>
3505: * A character may be part of a Unicode identifier if and only if
3506: * one of the following statements is true:
3507: * <ul>
3508: * <li> it is a letter
3509: * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3510: * <li> it is a digit
3511: * <li> it is a numeric letter (such as a Roman numeral character)
3512: * <li> it is a combining mark
3513: * <li> it is a non-spacing mark
3514: * <li> <code>isIdentifierIgnorable</code> returns
3515: * <code>true</code> for this character.
3516: * </ul>
3517: * @param codePoint the character (Unicode code point) to be tested.
3518: * @return <code>true</code> if the character may be part of a
3519: * Unicode identifier; <code>false</code> otherwise.
3520: * @see java.lang.Character#isIdentifierIgnorable(int)
3521: * @see java.lang.Character#isJavaIdentifierPart(int)
3522: * @see java.lang.Character#isLetterOrDigit(int)
3523: * @see java.lang.Character#isUnicodeIdentifierStart(int)
3524: * @since 1.5
3525: */
3526: public static boolean isUnicodeIdentifierPart(int codePoint) {
3527: return CharacterData.of(codePoint).isUnicodeIdentifierPart(
3528: codePoint);
3529: }
3530:
3531: /**
3532: * Determines if the specified character should be regarded as
3533: * an ignorable character in a Java identifier or a Unicode identifier.
3534: * <p>
3535: * The following Unicode characters are ignorable in a Java identifier
3536: * or a Unicode identifier:
3537: * <ul>
3538: * <li>ISO control characters that are not whitespace
3539: * <ul>
3540: * <li><code>'\u0000'</code> through <code>'\u0008'</code>
3541: * <li><code>'\u000E'</code> through <code>'\u001B'</code>
3542: * <li><code>'\u007F'</code> through <code>'\u009F'</code>
3543: * </ul>
3544: *
3545: * <li>all characters that have the <code>FORMAT</code> general
3546: * category value
3547: * </ul>
3548: *
3549: * <p><b>Note:</b> This method cannot handle <a
3550: * href="#supplementary"> supplementary characters</a>. To support
3551: * all Unicode characters, including supplementary characters, use
3552: * the {@link #isIdentifierIgnorable(int)} method.
3553: *
3554: * @param ch the character to be tested.
3555: * @return <code>true</code> if the character is an ignorable control
3556: * character that may be part of a Java or Unicode identifier;
3557: * <code>false</code> otherwise.
3558: * @see java.lang.Character#isJavaIdentifierPart(char)
3559: * @see java.lang.Character#isUnicodeIdentifierPart(char)
3560: * @since 1.1
3561: */
3562: public static boolean isIdentifierIgnorable(char ch) {
3563: return isIdentifierIgnorable((int) ch);
3564: }
3565:
3566: /**
3567: * Determines if the specified character (Unicode code point) should be regarded as
3568: * an ignorable character in a Java identifier or a Unicode identifier.
3569: * <p>
3570: * The following Unicode characters are ignorable in a Java identifier
3571: * or a Unicode identifier:
3572: * <ul>
3573: * <li>ISO control characters that are not whitespace
3574: * <ul>
3575: * <li><code>'\u0000'</code> through <code>'\u0008'</code>
3576: * <li><code>'\u000E'</code> through <code>'\u001B'</code>
3577: * <li><code>'\u007F'</code> through <code>'\u009F'</code>
3578: * </ul>
3579: *
3580: * <li>all characters that have the <code>FORMAT</code> general
3581: * category value
3582: * </ul>
3583: *
3584: * @param codePoint the character (Unicode code point) to be tested.
3585: * @return <code>true</code> if the character is an ignorable control
3586: * character that may be part of a Java or Unicode identifier;
3587: * <code>false</code> otherwise.
3588: * @see java.lang.Character#isJavaIdentifierPart(int)
3589: * @see java.lang.Character#isUnicodeIdentifierPart(int)
3590: * @since 1.5
3591: */
3592: public static boolean isIdentifierIgnorable(int codePoint) {
3593: return CharacterData.of(codePoint).isIdentifierIgnorable(
3594: codePoint);
3595: }
3596:
3597: /**
3598: * Converts the character argument to lowercase using case
3599: * mapping information from the UnicodeData file.
3600: * <p>
3601: * Note that
3602: * <code>Character.isLowerCase(Character.toLowerCase(ch))</code>
3603: * does not always return <code>true</code> for some ranges of
3604: * characters, particularly those that are symbols or ideographs.
3605: *
3606: * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
3607: * characters to lowercase. <code>String</code> case mapping methods
3608: * have several benefits over <code>Character</code> case mapping methods.
3609: * <code>String</code> case mapping methods can perform locale-sensitive
3610: * mappings, context-sensitive mappings, and 1:M character mappings, whereas
3611: * the <code>Character</code> case mapping methods cannot.
3612: *
3613: * <p><b>Note:</b> This method cannot handle <a
3614: * href="#supplementary"> supplementary characters</a>. To support
3615: * all Unicode characters, including supplementary characters, use
3616: * the {@link #toLowerCase(int)} method.
3617: *
3618: * @param ch the character to be converted.
3619: * @return the lowercase equivalent of the character, if any;
3620: * otherwise, the character itself.
3621: * @see java.lang.Character#isLowerCase(char)
3622: * @see java.lang.String#toLowerCase()
3623: */
3624: public static char toLowerCase(char ch) {
3625: return (char) toLowerCase((int) ch);
3626: }
3627:
3628: /**
3629: * Converts the character (Unicode code point) argument to
3630: * lowercase using case mapping information from the UnicodeData
3631: * file.
3632: *
3633: * <p> Note that
3634: * <code>Character.isLowerCase(Character.toLowerCase(codePoint))</code>
3635: * does not always return <code>true</code> for some ranges of
3636: * characters, particularly those that are symbols or ideographs.
3637: *
3638: * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
3639: * characters to lowercase. <code>String</code> case mapping methods
3640: * have several benefits over <code>Character</code> case mapping methods.
3641: * <code>String</code> case mapping methods can perform locale-sensitive
3642: * mappings, context-sensitive mappings, and 1:M character mappings, whereas
3643: * the <code>Character</code> case mapping methods cannot.
3644: *
3645: * @param codePoint the character (Unicode code point) to be converted.
3646: * @return the lowercase equivalent of the character (Unicode code
3647: * point), if any; otherwise, the character itself.
3648: * @see java.lang.Character#isLowerCase(int)
3649: * @see java.lang.String#toLowerCase()
3650: *
3651: * @since 1.5
3652: */
3653: public static int toLowerCase(int codePoint) {
3654: return CharacterData.of(codePoint).toLowerCase(codePoint);
3655: }
3656:
3657: /**
3658: * Converts the character argument to uppercase using case mapping
3659: * information from the UnicodeData file.
3660: * <p>
3661: * Note that
3662: * <code>Character.isUpperCase(Character.toUpperCase(ch))</code>
3663: * does not always return <code>true</code> for some ranges of
3664: * characters, particularly those that are symbols or ideographs.
3665: *
3666: * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
3667: * characters to uppercase. <code>String</code> case mapping methods
3668: * have several benefits over <code>Character</code> case mapping methods.
3669: * <code>String</code> case mapping methods can perform locale-sensitive
3670: * mappings, context-sensitive mappings, and 1:M character mappings, whereas
3671: * the <code>Character</code> case mapping methods cannot.
3672: *
3673: * <p><b>Note:</b> This method cannot handle <a
3674: * href="#supplementary"> supplementary characters</a>. To support
3675: * all Unicode characters, including supplementary characters, use
3676: * the {@link #toUpperCase(int)} method.
3677: *
3678: * @param ch the character to be converted.
3679: * @return the uppercase equivalent of the character, if any;
3680: * otherwise, the character itself.
3681: * @see java.lang.Character#isUpperCase(char)
3682: * @see java.lang.String#toUpperCase()
3683: */
3684: public static char toUpperCase(char ch) {
3685: return (char) toUpperCase((int) ch);
3686: }
3687:
3688: /**
3689: * Converts the character (Unicode code point) argument to
3690: * uppercase using case mapping information from the UnicodeData
3691: * file.
3692: *
3693: * <p>Note that
3694: * <code>Character.isUpperCase(Character.toUpperCase(codePoint))</code>
3695: * does not always return <code>true</code> for some ranges of
3696: * characters, particularly those that are symbols or ideographs.
3697: *
3698: * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
3699: * characters to uppercase. <code>String</code> case mapping methods
3700: * have several benefits over <code>Character</code> case mapping methods.
3701: * <code>String</code> case mapping methods can perform locale-sensitive
3702: * mappings, context-sensitive mappings, and 1:M character mappings, whereas
3703: * the <code>Character</code> case mapping methods cannot.
3704: *
3705: * @param codePoint the character (Unicode code point) to be converted.
3706: * @return the uppercase equivalent of the character, if any;
3707: * otherwise, the character itself.
3708: * @see java.lang.Character#isUpperCase(int)
3709: * @see java.lang.String#toUpperCase()
3710: *
3711: * @since 1.5
3712: */
3713: public static int toUpperCase(int codePoint) {
3714: return CharacterData.of(codePoint).toUpperCase(codePoint);
3715: }
3716:
3717: /**
3718: * Converts the character argument to titlecase using case mapping
3719: * information from the UnicodeData file. If a character has no
3720: * explicit titlecase mapping and is not itself a titlecase char
3721: * according to UnicodeData, then the uppercase mapping is
3722: * returned as an equivalent titlecase mapping. If the
3723: * <code>char</code> argument is already a titlecase
3724: * <code>char</code>, the same <code>char</code> value will be
3725: * returned.
3726: * <p>
3727: * Note that
3728: * <code>Character.isTitleCase(Character.toTitleCase(ch))</code>
3729: * does not always return <code>true</code> for some ranges of
3730: * characters.
3731: *
3732: * <p><b>Note:</b> This method cannot handle <a
3733: * href="#supplementary"> supplementary characters</a>. To support
3734: * all Unicode characters, including supplementary characters, use
3735: * the {@link #toTitleCase(int)} method.
3736: *
3737: * @param ch the character to be converted.
3738: * @return the titlecase equivalent of the character, if any;
3739: * otherwise, the character itself.
3740: * @see java.lang.Character#isTitleCase(char)
3741: * @see java.lang.Character#toLowerCase(char)
3742: * @see java.lang.Character#toUpperCase(char)
3743: * @since 1.0.2
3744: */
3745: public static char toTitleCase(char ch) {
3746: return (char) toTitleCase((int) ch);
3747: }
3748:
3749: /**
3750: * Converts the character (Unicode code point) argument to titlecase using case mapping
3751: * information from the UnicodeData file. If a character has no
3752: * explicit titlecase mapping and is not itself a titlecase char
3753: * according to UnicodeData, then the uppercase mapping is
3754: * returned as an equivalent titlecase mapping. If the
3755: * character argument is already a titlecase
3756: * character, the same character value will be
3757: * returned.
3758: *
3759: * <p>Note that
3760: * <code>Character.isTitleCase(Character.toTitleCase(codePoint))</code>
3761: * does not always return <code>true</code> for some ranges of
3762: * characters.
3763: *
3764: * @param codePoint the character (Unicode code point) to be converted.
3765: * @return the titlecase equivalent of the character, if any;
3766: * otherwise, the character itself.
3767: * @see java.lang.Character#isTitleCase(int)
3768: * @see java.lang.Character#toLowerCase(int)
3769: * @see java.lang.Character#toUpperCase(int)
3770: * @since 1.5
3771: */
3772: public static int toTitleCase(int codePoint) {
3773: return CharacterData.of(codePoint).toTitleCase(codePoint);
3774: }
3775:
3776: /**
3777: * Returns the numeric value of the character <code>ch</code> in the
3778: * specified radix.
3779: * <p>
3780: * If the radix is not in the range <code>MIN_RADIX</code> <=
3781: * <code>radix</code> <= <code>MAX_RADIX</code> or if the
3782: * value of <code>ch</code> is not a valid digit in the specified
3783: * radix, <code>-1</code> is returned. A character is a valid digit
3784: * if at least one of the following is true:
3785: * <ul>
3786: * <li>The method <code>isDigit</code> is <code>true</code> of the character
3787: * and the Unicode decimal digit value of the character (or its
3788: * single-character decomposition) is less than the specified radix.
3789: * In this case the decimal digit value is returned.
3790: * <li>The character is one of the uppercase Latin letters
3791: * <code>'A'</code> through <code>'Z'</code> and its code is less than
3792: * <code>radix + 'A' - 10</code>.
3793: * In this case, <code>ch - 'A' + 10</code>
3794: * is returned.
3795: * <li>The character is one of the lowercase Latin letters
3796: * <code>'a'</code> through <code>'z'</code> and its code is less than
3797: * <code>radix + 'a' - 10</code>.
3798: * In this case, <code>ch - 'a' + 10</code>
3799: * is returned.
3800: * </ul>
3801: *
3802: * <p><b>Note:</b> This method cannot handle <a
3803: * href="#supplementary"> supplementary characters</a>. To support
3804: * all Unicode characters, including supplementary characters, use
3805: * the {@link #digit(int, int)} method.
3806: *
3807: * @param ch the character to be converted.
3808: * @param radix the radix.
3809: * @return the numeric value represented by the character in the
3810: * specified radix.
3811: * @see java.lang.Character#forDigit(int, int)
3812: * @see java.lang.Character#isDigit(char)
3813: */
3814: public static int digit(char ch, int radix) {
3815: return digit((int) ch, radix);
3816: }
3817:
3818: /**
3819: * Returns the numeric value of the specified character (Unicode
3820: * code point) in the specified radix.
3821: *
3822: * <p>If the radix is not in the range <code>MIN_RADIX</code> <=
3823: * <code>radix</code> <= <code>MAX_RADIX</code> or if the
3824: * character is not a valid digit in the specified
3825: * radix, <code>-1</code> is returned. A character is a valid digit
3826: * if at least one of the following is true:
3827: * <ul>
3828: * <li>The method {@link #isDigit(int) isDigit(codePoint)} is <code>true</code> of the character
3829: * and the Unicode decimal digit value of the character (or its
3830: * single-character decomposition) is less than the specified radix.
3831: * In this case the decimal digit value is returned.
3832: * <li>The character is one of the uppercase Latin letters
3833: * <code>'A'</code> through <code>'Z'</code> and its code is less than
3834: * <code>radix + 'A' - 10</code>.
3835: * In this case, <code>ch - 'A' + 10</code>
3836: * is returned.
3837: * <li>The character is one of the lowercase Latin letters
3838: * <code>'a'</code> through <code>'z'</code> and its code is less than
3839: * <code>radix + 'a' - 10</code>.
3840: * In this case, <code>ch - 'a' + 10</code>
3841: * is returned.
3842: * </ul>
3843: *
3844: * @param codePoint the character (Unicode code point) to be converted.
3845: * @param radix the radix.
3846: * @return the numeric value represented by the character in the
3847: * specified radix.
3848: * @see java.lang.Character#forDigit(int, int)
3849: * @see java.lang.Character#isDigit(int)
3850: * @since 1.5
3851: */
3852: public static int digit(int codePoint, int radix) {
3853: return CharacterData.of(codePoint).digit(codePoint, radix);
3854: }
3855:
3856: /**
3857: * Returns the <code>int</code> value that the specified Unicode
3858: * character represents. For example, the character
3859: * <code>'\u216C'</code> (the roman numeral fifty) will return
3860: * an int with a value of 50.
3861: * <p>
3862: * The letters A-Z in their uppercase (<code>'\u0041'</code> through
3863: * <code>'\u005A'</code>), lowercase
3864: * (<code>'\u0061'</code> through <code>'\u007A'</code>), and
3865: * full width variant (<code>'\uFF21'</code> through
3866: * <code>'\uFF3A'</code> and <code>'\uFF41'</code> through
3867: * <code>'\uFF5A'</code>) forms have numeric values from 10
3868: * through 35. This is independent of the Unicode specification,
3869: * which does not assign numeric values to these <code>char</code>
3870: * values.
3871: * <p>
3872: * If the character does not have a numeric value, then -1 is returned.
3873: * If the character has a numeric value that cannot be represented as a
3874: * nonnegative integer (for example, a fractional value), then -2
3875: * is returned.
3876: *
3877: * <p><b>Note:</b> This method cannot handle <a
3878: * href="#supplementary"> supplementary characters</a>. To support
3879: * all Unicode characters, including supplementary characters, use
3880: * the {@link #getNumericValue(int)} method.
3881: *
3882: * @param ch the character to be converted.
3883: * @return the numeric value of the character, as a nonnegative <code>int</code>
3884: * value; -2 if the character has a numeric value that is not a
3885: * nonnegative integer; -1 if the character has no numeric value.
3886: * @see java.lang.Character#forDigit(int, int)
3887: * @see java.lang.Character#isDigit(char)
3888: * @since 1.1
3889: */
3890: public static int getNumericValue(char ch) {
3891: return getNumericValue((int) ch);
3892: }
3893:
3894: /**
3895: * Returns the <code>int</code> value that the specified
3896: * character (Unicode code point) represents. For example, the character
3897: * <code>'\u216C'</code> (the Roman numeral fifty) will return
3898: * an <code>int</code> with a value of 50.
3899: * <p>
3900: * The letters A-Z in their uppercase (<code>'\u0041'</code> through
3901: * <code>'\u005A'</code>), lowercase
3902: * (<code>'\u0061'</code> through <code>'\u007A'</code>), and
3903: * full width variant (<code>'\uFF21'</code> through
3904: * <code>'\uFF3A'</code> and <code>'\uFF41'</code> through
3905: * <code>'\uFF5A'</code>) forms have numeric values from 10
3906: * through 35. This is independent of the Unicode specification,
3907: * which does not assign numeric values to these <code>char</code>
3908: * values.
3909: * <p>
3910: * If the character does not have a numeric value, then -1 is returned.
3911: * If the character has a numeric value that cannot be represented as a
3912: * nonnegative integer (for example, a fractional value), then -2
3913: * is returned.
3914: *
3915: * @param codePoint the character (Unicode code point) to be converted.
3916: * @return the numeric value of the character, as a nonnegative <code>int</code>
3917: * value; -2 if the character has a numeric value that is not a
3918: * nonnegative integer; -1 if the character has no numeric value.
3919: * @see java.lang.Character#forDigit(int, int)
3920: * @see java.lang.Character#isDigit(int)
3921: * @since 1.5
3922: */
3923: public static int getNumericValue(int codePoint) {
3924: return CharacterData.of(codePoint).getNumericValue(codePoint);
3925: }
3926:
3927: /**
3928: * Determines if the specified character is ISO-LATIN-1 white space.
3929: * This method returns <code>true</code> for the following five
3930: * characters only:
3931: * <table>
3932: * <tr><td><code>'\t'</code></td> <td><code>'\u0009'</code></td>
3933: * <td><code>HORIZONTAL TABULATION</code></td></tr>
3934: * <tr><td><code>'\n'</code></td> <td><code>'\u000A'</code></td>
3935: * <td><code>NEW LINE</code></td></tr>
3936: * <tr><td><code>'\f'</code></td> <td><code>'\u000C'</code></td>
3937: * <td><code>FORM FEED</code></td></tr>
3938: * <tr><td><code>'\r'</code></td> <td><code>'\u000D'</code></td>
3939: * <td><code>CARRIAGE RETURN</code></td></tr>
3940: * <tr><td><code>' '</code></td> <td><code>'\u0020'</code></td>
3941: * <td><code>SPACE</code></td></tr>
3942: * </table>
3943: *
3944: * @param ch the character to be tested.
3945: * @return <code>true</code> if the character is ISO-LATIN-1 white
3946: * space; <code>false</code> otherwise.
3947: * @see java.lang.Character#isSpaceChar(char)
3948: * @see java.lang.Character#isWhitespace(char)
3949: * @deprecated Replaced by isWhitespace(char).
3950: */
3951: @Deprecated
3952: public static boolean isSpace(char ch) {
3953: return (ch <= 0x0020)
3954: && (((((1L << 0x0009) | (1L << 0x000A) | (1L << 0x000C)
3955: | (1L << 0x000D) | (1L << 0x0020)) >> ch) & 1L) != 0);
3956: }
3957:
3958: /**
3959: * Determines if the specified character is a Unicode space character.
3960: * A character is considered to be a space character if and only if
3961: * it is specified to be a space character by the Unicode standard. This
3962: * method returns true if the character's general category type is any of
3963: * the following:
3964: * <ul>
3965: * <li> <code>SPACE_SEPARATOR</code>
3966: * <li> <code>LINE_SEPARATOR</code>
3967: * <li> <code>PARAGRAPH_SEPARATOR</code>
3968: * </ul>
3969: *
3970: * <p><b>Note:</b> This method cannot handle <a
3971: * href="#supplementary"> supplementary characters</a>. To support
3972: * all Unicode characters, including supplementary characters, use
3973: * the {@link #isSpaceChar(int)} method.
3974: *
3975: * @param ch the character to be tested.
3976: * @return <code>true</code> if the character is a space character;
3977: * <code>false</code> otherwise.
3978: * @see java.lang.Character#isWhitespace(char)
3979: * @since 1.1
3980: */
3981: public static boolean isSpaceChar(char ch) {
3982: return isSpaceChar((int) ch);
3983: }
3984:
3985: /**
3986: * Determines if the specified character (Unicode code point) is a
3987: * Unicode space character. A character is considered to be a
3988: * space character if and only if it is specified to be a space
3989: * character by the Unicode standard. This method returns true if
3990: * the character's general category type is any of the following:
3991: *
3992: * <ul>
3993: * <li> {@link #SPACE_SEPARATOR}
3994: * <li> {@link #LINE_SEPARATOR}
3995: * <li> {@link #PARAGRAPH_SEPARATOR}
3996: * </ul>
3997: *
3998: * @param codePoint the character (Unicode code point) to be tested.
3999: * @return <code>true</code> if the character is a space character;
4000: * <code>false</code> otherwise.
4001: * @see java.lang.Character#isWhitespace(int)
4002: * @since 1.5
4003: */
4004: public static boolean isSpaceChar(int codePoint) {
4005: return ((((1 << Character.SPACE_SEPARATOR)
4006: | (1 << Character.LINE_SEPARATOR) | (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) != 0;
4007: }
4008:
4009: /**
4010: * Determines if the specified character is white space according to Java.
4011: * A character is a Java whitespace character if and only if it satisfies
4012: * one of the following criteria:
4013: * <ul>
4014: * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>,
4015: * <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>)
4016: * but is not also a non-breaking space (<code>'\u00A0'</code>,
4017: * <code>'\u2007'</code>, <code>'\u202F'</code>).
4018: * <li> It is <code>'\u0009'</code>, HORIZONTAL TABULATION.
4019: * <li> It is <code>'\u000A'</code>, LINE FEED.
4020: * <li> It is <code>'\u000B'</code>, VERTICAL TABULATION.
4021: * <li> It is <code>'\u000C'</code>, FORM FEED.
4022: * <li> It is <code>'\u000D'</code>, CARRIAGE RETURN.
4023: * <li> It is <code>'\u001C'</code>, FILE SEPARATOR.
4024: * <li> It is <code>'\u001D'</code>, GROUP SEPARATOR.
4025: * <li> It is <code>'\u001E'</code>, RECORD SEPARATOR.
4026: * <li> It is <code>'\u001F'</code>, UNIT SEPARATOR.
4027: * </ul>
4028: *
4029: * <p><b>Note:</b> This method cannot handle <a
4030: * href="#supplementary"> supplementary characters</a>. To support
4031: * all Unicode characters, including supplementary characters, use
4032: * the {@link #isWhitespace(int)} method.
4033: *
4034: * @param ch the character to be tested.
4035: * @return <code>true</code> if the character is a Java whitespace
4036: * character; <code>false</code> otherwise.
4037: * @see java.lang.Character#isSpaceChar(char)
4038: * @since 1.1
4039: */
4040: public static boolean isWhitespace(char ch) {
4041: return isWhitespace((int) ch);
4042: }
4043:
4044: /**
4045: * Determines if the specified character (Unicode code point) is
4046: * white space according to Java. A character is a Java
4047: * whitespace character if and only if it satisfies one of the
4048: * following criteria:
4049: * <ul>
4050: * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
4051: * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
4052: * but is not also a non-breaking space (<code>'\u00A0'</code>,
4053: * <code>'\u2007'</code>, <code>'\u202F'</code>).
4054: * <li> It is <code>'\u0009'</code>, HORIZONTAL TABULATION.
4055: * <li> It is <code>'\u000A'</code>, LINE FEED.
4056: * <li> It is <code>'\u000B'</code>, VERTICAL TABULATION.
4057: * <li> It is <code>'\u000C'</code>, FORM FEED.
4058: * <li> It is <code>'\u000D'</code>, CARRIAGE RETURN.
4059: * <li> It is <code>'\u001C'</code>, FILE SEPARATOR.
4060: * <li> It is <code>'\u001D'</code>, GROUP SEPARATOR.
4061: * <li> It is <code>'\u001E'</code>, RECORD SEPARATOR.
4062: * <li> It is <code>'\u001F'</code>, UNIT SEPARATOR.
4063: * </ul>
4064: * <p>
4065: *
4066: * @param codePoint the character (Unicode code point) to be tested.
4067: * @return <code>true</code> if the character is a Java whitespace
4068: * character; <code>false</code> otherwise.
4069: * @see java.lang.Character#isSpaceChar(int)
4070: * @since 1.5
4071: */
4072: public static boolean isWhitespace(int codePoint) {
4073: return CharacterData.of(codePoint).isWhitespace(codePoint);
4074: }
4075:
4076: /**
4077: * Determines if the specified character is an ISO control
4078: * character. A character is considered to be an ISO control
4079: * character if its code is in the range <code>'\u0000'</code>
4080: * through <code>'\u001F'</code> or in the range
4081: * <code>'\u007F'</code> through <code>'\u009F'</code>.
4082: *
4083: * <p><b>Note:</b> This method cannot handle <a
4084: * href="#supplementary"> supplementary characters</a>. To support
4085: * all Unicode characters, including supplementary characters, use
4086: * the {@link #isISOControl(int)} method.
4087: *
4088: * @param ch the character to be tested.
4089: * @return <code>true</code> if the character is an ISO control character;
4090: * <code>false</code> otherwise.
4091: *
4092: * @see java.lang.Character#isSpaceChar(char)
4093: * @see java.lang.Character#isWhitespace(char)
4094: * @since 1.1
4095: */
4096: public static boolean isISOControl(char ch) {
4097: return isISOControl((int) ch);
4098: }
4099:
4100: /**
4101: * Determines if the referenced character (Unicode code point) is an ISO control
4102: * character. A character is considered to be an ISO control
4103: * character if its code is in the range <code>'\u0000'</code>
4104: * through <code>'\u001F'</code> or in the range
4105: * <code>'\u007F'</code> through <code>'\u009F'</code>.
4106: *
4107: * @param codePoint the character (Unicode code point) to be tested.
4108: * @return <code>true</code> if the character is an ISO control character;
4109: * <code>false</code> otherwise.
4110: * @see java.lang.Character#isSpaceChar(int)
4111: * @see java.lang.Character#isWhitespace(int)
4112: * @since 1.5
4113: */
4114: public static boolean isISOControl(int codePoint) {
4115: return (codePoint >= 0x0000 && codePoint <= 0x001F)
4116: || (codePoint >= 0x007F && codePoint <= 0x009F);
4117: }
4118:
4119: /**
4120: * Returns a value indicating a character's general category.
4121: *
4122: * <p><b>Note:</b> This method cannot handle <a
4123: * href="#supplementary"> supplementary characters</a>. To support
4124: * all Unicode characters, including supplementary characters, use
4125: * the {@link #getType(int)} method.
4126: *
4127: * @param ch the character to be tested.
4128: * @return a value of type <code>int</code> representing the
4129: * character's general category.
4130: * @see java.lang.Character#COMBINING_SPACING_MARK
4131: * @see java.lang.Character#CONNECTOR_PUNCTUATION
4132: * @see java.lang.Character#CONTROL
4133: * @see java.lang.Character#CURRENCY_SYMBOL
4134: * @see java.lang.Character#DASH_PUNCTUATION
4135: * @see java.lang.Character#DECIMAL_DIGIT_NUMBER
4136: * @see java.lang.Character#ENCLOSING_MARK
4137: * @see java.lang.Character#END_PUNCTUATION
4138: * @see java.lang.Character#FINAL_QUOTE_PUNCTUATION
4139: * @see java.lang.Character#FORMAT
4140: * @see java.lang.Character#INITIAL_QUOTE_PUNCTUATION
4141: * @see java.lang.Character#LETTER_NUMBER
4142: * @see java.lang.Character#LINE_SEPARATOR
4143: * @see java.lang.Character#LOWERCASE_LETTER
4144: * @see java.lang.Character#MATH_SYMBOL
4145: * @see java.lang.Character#MODIFIER_LETTER
4146: * @see java.lang.Character#MODIFIER_SYMBOL
4147: * @see java.lang.Character#NON_SPACING_MARK
4148: * @see java.lang.Character#OTHER_LETTER
4149: * @see java.lang.Character#OTHER_NUMBER
4150: * @see java.lang.Character#OTHER_PUNCTUATION
4151: * @see java.lang.Character#OTHER_SYMBOL
4152: * @see java.lang.Character#PARAGRAPH_SEPARATOR
4153: * @see java.lang.Character#PRIVATE_USE
4154: * @see java.lang.Character#SPACE_SEPARATOR
4155: * @see java.lang.Character#START_PUNCTUATION
4156: * @see java.lang.Character#SURROGATE
4157: * @see java.lang.Character#TITLECASE_LETTER
4158: * @see java.lang.Character#UNASSIGNED
4159: * @see java.lang.Character#UPPERCASE_LETTER
4160: * @since 1.1
4161: */
4162: public static int getType(char ch) {
4163: return getType((int) ch);
4164: }
4165:
4166: /**
4167: * Returns a value indicating a character's general category.
4168: *
4169: * @param codePoint the character (Unicode code point) to be tested.
4170: * @return a value of type <code>int</code> representing the
4171: * character's general category.
4172: * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
4173: * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
4174: * @see Character#CONTROL CONTROL
4175: * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
4176: * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION
4177: * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
4178: * @see Character#ENCLOSING_MARK ENCLOSING_MARK
4179: * @see Character#END_PUNCTUATION END_PUNCTUATION
4180: * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
4181: * @see Character#FORMAT FORMAT
4182: * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
4183: * @see Character#LETTER_NUMBER LETTER_NUMBER
4184: * @see Character#LINE_SEPARATOR LINE_SEPARATOR
4185: * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER
4186: * @see Character#MATH_SYMBOL MATH_SYMBOL
4187: * @see Character#MODIFIER_LETTER MODIFIER_LETTER
4188: * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
4189: * @see Character#NON_SPACING_MARK NON_SPACING_MARK
4190: * @see Character#OTHER_LETTER OTHER_LETTER
4191: * @see Character#OTHER_NUMBER OTHER_NUMBER
4192: * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
4193: * @see Character#OTHER_SYMBOL OTHER_SYMBOL
4194: * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
4195: * @see Character#PRIVATE_USE PRIVATE_USE
4196: * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR
4197: * @see Character#START_PUNCTUATION START_PUNCTUATION
4198: * @see Character#SURROGATE SURROGATE
4199: * @see Character#TITLECASE_LETTER TITLECASE_LETTER
4200: * @see Character#UNASSIGNED UNASSIGNED
4201: * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER
4202: * @since 1.5
4203: */
4204: public static int getType(int codePoint) {
4205: return CharacterData.of(codePoint).getType(codePoint);
4206: }
4207:
4208: /**
4209: * Determines the character representation for a specific digit in
4210: * the specified radix. If the value of <code>radix</code> is not a
4211: * valid radix, or the value of <code>digit</code> is not a valid
4212: * digit in the specified radix, the null character
4213: * (<code>'\u0000'</code>) is returned.
4214: * <p>
4215: * The <code>radix</code> argument is valid if it is greater than or
4216: * equal to <code>MIN_RADIX</code> and less than or equal to
4217: * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if
4218: * <code>0 <=digit < radix</code>.
4219: * <p>
4220: * If the digit is less than 10, then
4221: * <code>'0' + digit</code> is returned. Otherwise, the value
4222: * <code>'a' + digit - 10</code> is returned.
4223: *
4224: * @param digit the number to convert to a character.
4225: * @param radix the radix.
4226: * @return the <code>char</code> representation of the specified digit
4227: * in the specified radix.
4228: * @see java.lang.Character#MIN_RADIX
4229: * @see java.lang.Character#MAX_RADIX
4230: * @see java.lang.Character#digit(char, int)
4231: */
4232: public static char forDigit(int digit, int radix) {
4233: if ((digit >= radix) || (digit < 0)) {
4234: return '\0';
4235: }
4236: if ((radix < Character.MIN_RADIX)
4237: || (radix > Character.MAX_RADIX)) {
4238: return '\0';
4239: }
4240: if (digit < 10) {
4241: return (char) ('0' + digit);
4242: }
4243: return (char) ('a' - 10 + digit);
4244: }
4245:
4246: /**
4247: * Returns the Unicode directionality property for the given
4248: * character. Character directionality is used to calculate the
4249: * visual ordering of text. The directionality value of undefined
4250: * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>.
4251: *
4252: * <p><b>Note:</b> This method cannot handle <a
4253: * href="#supplementary"> supplementary characters</a>. To support
4254: * all Unicode characters, including supplementary characters, use
4255: * the {@link #getDirectionality(int)} method.
4256: *
4257: * @param ch <code>char</code> for which the directionality property
4258: * is requested.
4259: * @return the directionality property of the <code>char</code> value.
4260: *
4261: * @see Character#DIRECTIONALITY_UNDEFINED
4262: * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
4263: * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
4264: * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4265: * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
4266: * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4267: * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4268: * @see Character#DIRECTIONALITY_ARABIC_NUMBER
4269: * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4270: * @see Character#DIRECTIONALITY_NONSPACING_MARK
4271: * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
4272: * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
4273: * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
4274: * @see Character#DIRECTIONALITY_WHITESPACE
4275: * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
4276: * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4277: * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4278: * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4279: * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4280: * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4281: * @since 1.4
4282: */
4283: public static byte getDirectionality(char ch) {
4284: return getDirectionality((int) ch);
4285: }
4286:
4287: /**
4288: * Returns the Unicode directionality property for the given
4289: * character (Unicode code point). Character directionality is
4290: * used to calculate the visual ordering of text. The
4291: * directionality value of undefined character is {@link
4292: * #DIRECTIONALITY_UNDEFINED}.
4293: *
4294: * @param codePoint the character (Unicode code point) for which
4295: * the directionality property is requested.
4296: * @return the directionality property of the character.
4297: *
4298: * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
4299: * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
4300: * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
4301: * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4302: * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
4303: * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4304: * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4305: * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
4306: * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4307: * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
4308: * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
4309: * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
4310: * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
4311: * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
4312: * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
4313: * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4314: * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4315: * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4316: * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4317: * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4318: * @since 1.5
4319: */
4320: public static byte getDirectionality(int codePoint) {
4321: return CharacterData.of(codePoint).getDirectionality(codePoint);
4322: }
4323:
4324: /**
4325: * Determines whether the character is mirrored according to the
4326: * Unicode specification. Mirrored characters should have their
4327: * glyphs horizontally mirrored when displayed in text that is
4328: * right-to-left. For example, <code>'\u0028'</code> LEFT
4329: * PARENTHESIS is semantically defined to be an <i>opening
4330: * parenthesis</i>. This will appear as a "(" in text that is
4331: * left-to-right but as a ")" in text that is right-to-left.
4332: *
4333: * <p><b>Note:</b> This method cannot handle <a
4334: * href="#supplementary"> supplementary characters</a>. To support
4335: * all Unicode characters, including supplementary characters, use
4336: * the {@link #isMirrored(int)} method.
4337: *
4338: * @param ch <code>char</code> for which the mirrored property is requested
4339: * @return <code>true</code> if the char is mirrored, <code>false</code>
4340: * if the <code>char</code> is not mirrored or is not defined.
4341: * @since 1.4
4342: */
4343: public static boolean isMirrored(char ch) {
4344: return isMirrored((int) ch);
4345: }
4346:
4347: /**
4348: * Determines whether the specified character (Unicode code point)
4349: * is mirrored according to the Unicode specification. Mirrored
4350: * characters should have their glyphs horizontally mirrored when
4351: * displayed in text that is right-to-left. For example,
4352: * <code>'\u0028'</code> LEFT PARENTHESIS is semantically
4353: * defined to be an <i>opening parenthesis</i>. This will appear
4354: * as a "(" in text that is left-to-right but as a ")" in text
4355: * that is right-to-left.
4356: *
4357: * @param codePoint the character (Unicode code point) to be tested.
4358: * @return <code>true</code> if the character is mirrored, <code>false</code>
4359: * if the character is not mirrored or is not defined.
4360: * @since 1.5
4361: */
4362: public static boolean isMirrored(int codePoint) {
4363: return CharacterData.of(codePoint).isMirrored(codePoint);
4364: }
4365:
4366: /**
4367: * Compares two <code>Character</code> objects numerically.
4368: *
4369: * @param anotherCharacter the <code>Character</code> to be compared.
4370:
4371: * @return the value <code>0</code> if the argument <code>Character</code>
4372: * is equal to this <code>Character</code>; a value less than
4373: * <code>0</code> if this <code>Character</code> is numerically less
4374: * than the <code>Character</code> argument; and a value greater than
4375: * <code>0</code> if this <code>Character</code> is numerically greater
4376: * than the <code>Character</code> argument (unsigned comparison).
4377: * Note that this is strictly a numerical comparison; it is not
4378: * locale-dependent.
4379: * @since 1.2
4380: */
4381: public int compareTo(Character anotherCharacter) {
4382: return this .value - anotherCharacter.value;
4383: }
4384:
4385: /**
4386: * Converts the character (Unicode code point) argument to uppercase using
4387: * information from the UnicodeData file.
4388: * <p>
4389: *
4390: * @param codePoint the character (Unicode code point) to be converted.
4391: * @return either the uppercase equivalent of the character, if
4392: * any, or an error flag (<code>Character.ERROR</code>)
4393: * that indicates that a 1:M <code>char</code> mapping exists.
4394: * @see java.lang.Character#isLowerCase(char)
4395: * @see java.lang.Character#isUpperCase(char)
4396: * @see java.lang.Character#toLowerCase(char)
4397: * @see java.lang.Character#toTitleCase(char)
4398: * @since 1.4
4399: */
4400: static int toUpperCaseEx(int codePoint) {
4401: assert isValidCodePoint(codePoint);
4402: return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
4403: }
4404:
4405: /**
4406: * Converts the character (Unicode code point) argument to uppercase using case
4407: * mapping information from the SpecialCasing file in the Unicode
4408: * specification. If a character has no explicit uppercase
4409: * mapping, then the <code>char</code> itself is returned in the
4410: * <code>char[]</code>.
4411: *
4412: * @param codePoint the character (Unicode code point) to be converted.
4413: * @return a <code>char[]</code> with the uppercased character.
4414: * @since 1.4
4415: */
4416: static char[] toUpperCaseCharArray(int codePoint) {
4417: // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
4418: assert isValidCodePoint(codePoint)
4419: && !isSupplementaryCodePoint(codePoint);
4420: return CharacterData.of(codePoint).toUpperCaseCharArray(
4421: codePoint);
4422: }
4423:
4424: /**
4425: * The number of bits used to represent a <tt>char</tt> value in unsigned
4426: * binary form.
4427: *
4428: * @since 1.5
4429: */
4430: public static final int SIZE = 16;
4431:
4432: /**
4433: * Returns the value obtained by reversing the order of the bytes in the
4434: * specified <tt>char</tt> value.
4435: *
4436: * @return the value obtained by reversing (or, equivalently, swapping)
4437: * the bytes in the specified <tt>char</tt> value.
4438: * @since 1.5
4439: */
4440: public static char reverseBytes(char ch) {
4441: return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
4442: }
4443: }
|