001: /*
002: * Copyright 1995-2006 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: package java.io;
027:
028: /**
029: * The <code>DataInput</code> interface provides
030: * for reading bytes from a binary stream and
031: * reconstructing from them data in any of
032: * the Java primitive types. There is also
033: * a
034: * facility for reconstructing a <code>String</code>
035: * from data in
036: * <a href="#modified-utf-8">modified UTF-8</a>
037: * format.
038: * <p>
039: * It is generally true of all the reading
040: * routines in this interface that if end of
041: * file is reached before the desired number
042: * of bytes has been read, an <code>EOFException</code>
043: * (which is a kind of <code>IOException</code>)
044: * is thrown. If any byte cannot be read for
045: * any reason other than end of file, an <code>IOException</code>
046: * other than <code>EOFException</code> is
047: * thrown. In particular, an <code>IOException</code>
048: * may be thrown if the input stream has been
049: * closed.
050: *
051: * <h4><a name="modified-utf-8">Modified UTF-8</a></h4>
052: * <p>
053: * Implementations of the DataInput and DataOutput interfaces represent
054: * Unicode strings in a format that is a slight modification of UTF-8.
055: * (For information regarding the standard UTF-8 format, see section
056: * <i>3.9 Unicode Encoding Forms</i> of <i>The Unicode Standard, Version
057: * 4.0</i>).
058: * Note that in the following tables, the most significant bit appears in the
059: * far left-hand column.
060: * <p>
061: * All characters in the range <code>'\u0001'</code> to
062: * <code>'\u007F'</code> are represented by a single byte:
063: *
064: * <blockquote>
065: * <table border="1" cellspacing="0" cellpadding="8" width="50%"
066: * summary="Bit values and bytes">
067: * <tr>
068: * <td></td>
069: * <th id="bit">Bit Values</th>
070: * </tr>
071: * <tr>
072: * <th id="byte1">Byte 1</th>
073: * <td>
074: * <table border="1" cellspacing="0" width="100%">
075: * <tr>
076: * <td width="12%"><center>0</center>
077: * <td colspan="7"><center>bits 6-0</center>
078: * </tr>
079: * </table>
080: * </td>
081: * </tr>
082: * </table>
083: * </blockquote>
084: *
085: * <p>
086: * The null character <code>'\u0000'</code> and characters in the
087: * range <code>'\u0080'</code> to <code>'\u07FF'</code> are
088: * represented by a pair of bytes:
089: *
090: * <blockquote>
091: * <table border="1" cellspacing="0" cellpadding="8" width="50%"
092: * summary="Bit values and bytes">
093: * <tr>
094: * <td></td>
095: * <th id="bit">Bit Values</th>
096: * </tr>
097: * <tr>
098: * <th id="byte1">Byte 1</th>
099: * <td>
100: * <table border="1" cellspacing="0" width="100%">
101: * <tr>
102: * <td width="12%"><center>1</center>
103: * <td width="13%"><center>1</center>
104: * <td width="12%"><center>0</center>
105: * <td colspan="5"><center>bits 10-6</center>
106: * </tr>
107: * </table>
108: * </td>
109: * </tr>
110: * <tr>
111: * <th id="byte2">Byte 2</th>
112: * <td>
113: * <table border="1" cellspacing="0" width="100%">
114: * <tr>
115: * <td width="12%"><center>1</center>
116: * <td width="13%"><center>0</center>
117: * <td colspan="6"><center>bits 5-0</center>
118: * </tr>
119: * </table>
120: * </td>
121: * </tr>
122: * </table>
123: * </blockquote>
124: *
125: * <br>
126: * <code>char</code> values in the range <code>'\u0800'</code> to
127: * <code>'\uFFFF'</code> are represented by three bytes:
128: *
129: * <blockquote>
130: * <table border="1" cellspacing="0" cellpadding="8" width="50%"
131: * summary="Bit values and bytes">
132: * <tr>
133: * <td></td>
134: * <th id="bit">Bit Values</th>
135: * </tr>
136: * <tr>
137: * <th id="byte1">Byte 1</th>
138: * <td>
139: * <table border="1" cellspacing="0" width="100%">
140: * <tr>
141: * <td width="12%"><center>1</center>
142: * <td width="13%"><center>1</center>
143: * <td width="12%"><center>1</center>
144: * <td width="13%"><center>0</center>
145: * <td colspan="4"><center>bits 15-12</center>
146: * </tr>
147: * </table>
148: * </td>
149: * </tr>
150: * <tr>
151: * <th id="byte2">Byte 2</th>
152: * <td>
153: * <table border="1" cellspacing="0" width="100%">
154: * <tr>
155: * <td width="12%"><center>1</center>
156: * <td width="13%"><center>0</center>
157: * <td colspan="6"><center>bits 11-6</center>
158: * </tr>
159: * </table>
160: * </td>
161: * </tr>
162: * <tr>
163: * <th id="byte3">Byte 3</th>
164: * <td>
165: * <table border="1" cellspacing="0" width="100%">
166: * <tr>
167: * <td width="12%"><center>1</center>
168: * <td width="13%"><center>0</center>
169: * <td colspan="6"><center>bits 5-0</center>
170: * </tr>
171: * </table>
172: * </td>
173: * </tr>
174: * </table>
175: * </blockquote>
176: *
177: * <p>
178: * The differences between this format and the
179: * standard UTF-8 format are the following:
180: * <ul>
181: * <li>The null byte <code>'\u0000'</code> is encoded in 2-byte format
182: * rather than 1-byte, so that the encoded strings never have
183: * embedded nulls.
184: * <li>Only the 1-byte, 2-byte, and 3-byte formats are used.
185: * <li><a href="../lang/Character.html#unicode">Supplementary characters</a>
186: * are represented in the form of surrogate pairs.
187: * </ul>
188: * @author Frank Yellin
189: * @version 1.31, 05/05/07
190: * @see java.io.DataInputStream
191: * @see java.io.DataOutput
192: * @since JDK1.0
193: */
194: public interface DataInput {
195: /**
196: * Reads some bytes from an input
197: * stream and stores them into the buffer
198: * array <code>b</code>. The number of bytes
199: * read is equal
200: * to the length of <code>b</code>.
201: * <p>
202: * This method blocks until one of the
203: * following conditions occurs:<p>
204: * <ul>
205: * <li><code>b.length</code>
206: * bytes of input data are available, in which
207: * case a normal return is made.
208: *
209: * <li>End of
210: * file is detected, in which case an <code>EOFException</code>
211: * is thrown.
212: *
213: * <li>An I/O error occurs, in
214: * which case an <code>IOException</code> other
215: * than <code>EOFException</code> is thrown.
216: * </ul>
217: * <p>
218: * If <code>b</code> is <code>null</code>,
219: * a <code>NullPointerException</code> is thrown.
220: * If <code>b.length</code> is zero, then
221: * no bytes are read. Otherwise, the first
222: * byte read is stored into element <code>b[0]</code>,
223: * the next one into <code>b[1]</code>, and
224: * so on.
225: * If an exception is thrown from
226: * this method, then it may be that some but
227: * not all bytes of <code>b</code> have been
228: * updated with data from the input stream.
229: *
230: * @param b the buffer into which the data is read.
231: * @exception EOFException if this stream reaches the end before reading
232: * all the bytes.
233: * @exception IOException if an I/O error occurs.
234: */
235: void readFully(byte b[]) throws IOException;
236:
237: /**
238: *
239: * Reads <code>len</code>
240: * bytes from
241: * an input stream.
242: * <p>
243: * This method
244: * blocks until one of the following conditions
245: * occurs:<p>
246: * <ul>
247: * <li><code>len</code> bytes
248: * of input data are available, in which case
249: * a normal return is made.
250: *
251: * <li>End of file
252: * is detected, in which case an <code>EOFException</code>
253: * is thrown.
254: *
255: * <li>An I/O error occurs, in
256: * which case an <code>IOException</code> other
257: * than <code>EOFException</code> is thrown.
258: * </ul>
259: * <p>
260: * If <code>b</code> is <code>null</code>,
261: * a <code>NullPointerException</code> is thrown.
262: * If <code>off</code> is negative, or <code>len</code>
263: * is negative, or <code>off+len</code> is
264: * greater than the length of the array <code>b</code>,
265: * then an <code>IndexOutOfBoundsException</code>
266: * is thrown.
267: * If <code>len</code> is zero,
268: * then no bytes are read. Otherwise, the first
269: * byte read is stored into element <code>b[off]</code>,
270: * the next one into <code>b[off+1]</code>,
271: * and so on. The number of bytes read is,
272: * at most, equal to <code>len</code>.
273: *
274: * @param b the buffer into which the data is read.
275: * @param off an int specifying the offset into the data.
276: * @param len an int specifying the number of bytes to read.
277: * @exception EOFException if this stream reaches the end before reading
278: * all the bytes.
279: * @exception IOException if an I/O error occurs.
280: */
281: void readFully(byte b[], int off, int len) throws IOException;
282:
283: /**
284: * Makes an attempt to skip over
285: * <code>n</code> bytes
286: * of data from the input
287: * stream, discarding the skipped bytes. However,
288: * it may skip
289: * over some smaller number of
290: * bytes, possibly zero. This may result from
291: * any of a
292: * number of conditions; reaching
293: * end of file before <code>n</code> bytes
294: * have been skipped is
295: * only one possibility.
296: * This method never throws an <code>EOFException</code>.
297: * The actual
298: * number of bytes skipped is returned.
299: *
300: * @param n the number of bytes to be skipped.
301: * @return the number of bytes actually skipped.
302: * @exception IOException if an I/O error occurs.
303: */
304: int skipBytes(int n) throws IOException;
305:
306: /**
307: * Reads one input byte and returns
308: * <code>true</code> if that byte is nonzero,
309: * <code>false</code> if that byte is zero.
310: * This method is suitable for reading
311: * the byte written by the <code>writeBoolean</code>
312: * method of interface <code>DataOutput</code>.
313: *
314: * @return the <code>boolean</code> value read.
315: * @exception EOFException if this stream reaches the end before reading
316: * all the bytes.
317: * @exception IOException if an I/O error occurs.
318: */
319: boolean readBoolean() throws IOException;
320:
321: /**
322: * Reads and returns one input byte.
323: * The byte is treated as a signed value in
324: * the range <code>-128</code> through <code>127</code>,
325: * inclusive.
326: * This method is suitable for
327: * reading the byte written by the <code>writeByte</code>
328: * method of interface <code>DataOutput</code>.
329: *
330: * @return the 8-bit value read.
331: * @exception EOFException if this stream reaches the end before reading
332: * all the bytes.
333: * @exception IOException if an I/O error occurs.
334: */
335: byte readByte() throws IOException;
336:
337: /**
338: * Reads one input byte, zero-extends
339: * it to type <code>int</code>, and returns
340: * the result, which is therefore in the range
341: * <code>0</code>
342: * through <code>255</code>.
343: * This method is suitable for reading
344: * the byte written by the <code>writeByte</code>
345: * method of interface <code>DataOutput</code>
346: * if the argument to <code>writeByte</code>
347: * was intended to be a value in the range
348: * <code>0</code> through <code>255</code>.
349: *
350: * @return the unsigned 8-bit value read.
351: * @exception EOFException if this stream reaches the end before reading
352: * all the bytes.
353: * @exception IOException if an I/O error occurs.
354: */
355: int readUnsignedByte() throws IOException;
356:
357: /**
358: * Reads two input bytes and returns
359: * a <code>short</code> value. Let <code>a</code>
360: * be the first byte read and <code>b</code>
361: * be the second byte. The value
362: * returned
363: * is:
364: * <p><pre><code>(short)((a << 8) | (b & 0xff))
365: * </code></pre>
366: * This method
367: * is suitable for reading the bytes written
368: * by the <code>writeShort</code> method of
369: * interface <code>DataOutput</code>.
370: *
371: * @return the 16-bit value read.
372: * @exception EOFException if this stream reaches the end before reading
373: * all the bytes.
374: * @exception IOException if an I/O error occurs.
375: */
376: short readShort() throws IOException;
377:
378: /**
379: * Reads two input bytes and returns
380: * an <code>int</code> value in the range <code>0</code>
381: * through <code>65535</code>. Let <code>a</code>
382: * be the first byte read and
383: * <code>b</code>
384: * be the second byte. The value returned is:
385: * <p><pre><code>(((a & 0xff) << 8) | (b & 0xff))
386: * </code></pre>
387: * This method is suitable for reading the bytes
388: * written by the <code>writeShort</code> method
389: * of interface <code>DataOutput</code> if
390: * the argument to <code>writeShort</code>
391: * was intended to be a value in the range
392: * <code>0</code> through <code>65535</code>.
393: *
394: * @return the unsigned 16-bit value read.
395: * @exception EOFException if this stream reaches the end before reading
396: * all the bytes.
397: * @exception IOException if an I/O error occurs.
398: */
399: int readUnsignedShort() throws IOException;
400:
401: /**
402: * Reads two input bytes and returns a <code>char</code> value.
403: * Let <code>a</code>
404: * be the first byte read and <code>b</code>
405: * be the second byte. The value
406: * returned is:
407: * <p><pre><code>(char)((a << 8) | (b & 0xff))
408: * </code></pre>
409: * This method
410: * is suitable for reading bytes written by
411: * the <code>writeChar</code> method of interface
412: * <code>DataOutput</code>.
413: *
414: * @return the <code>char</code> value read.
415: * @exception EOFException if this stream reaches the end before reading
416: * all the bytes.
417: * @exception IOException if an I/O error occurs.
418: */
419: char readChar() throws IOException;
420:
421: /**
422: * Reads four input bytes and returns an
423: * <code>int</code> value. Let <code>a-d</code>
424: * be the first through fourth bytes read. The value returned is:
425: * <p><pre>
426: * <code>
427: * (((a & 0xff) << 24) | ((b & 0xff) << 16) |
428: *  ((c & 0xff) << 8) | (d & 0xff))
429: * </code></pre>
430: * This method is suitable
431: * for reading bytes written by the <code>writeInt</code>
432: * method of interface <code>DataOutput</code>.
433: *
434: * @return the <code>int</code> value read.
435: * @exception EOFException if this stream reaches the end before reading
436: * all the bytes.
437: * @exception IOException if an I/O error occurs.
438: */
439: int readInt() throws IOException;
440:
441: /**
442: * Reads eight input bytes and returns
443: * a <code>long</code> value. Let <code>a-h</code>
444: * be the first through eighth bytes read.
445: * The value returned is:
446: * <p><pre> <code>
447: * (((long)(a & 0xff) << 56) |
448: * ((long)(b & 0xff) << 48) |
449: * ((long)(c & 0xff) << 40) |
450: * ((long)(d & 0xff) << 32) |
451: * ((long)(e & 0xff) << 24) |
452: * ((long)(f & 0xff) << 16) |
453: * ((long)(g & 0xff) << 8) |
454: * ((long)(h & 0xff)))
455: * </code></pre>
456: * <p>
457: * This method is suitable
458: * for reading bytes written by the <code>writeLong</code>
459: * method of interface <code>DataOutput</code>.
460: *
461: * @return the <code>long</code> value read.
462: * @exception EOFException if this stream reaches the end before reading
463: * all the bytes.
464: * @exception IOException if an I/O error occurs.
465: */
466: long readLong() throws IOException;
467:
468: /**
469: * Reads four input bytes and returns
470: * a <code>float</code> value. It does this
471: * by first constructing an <code>int</code>
472: * value in exactly the manner
473: * of the <code>readInt</code>
474: * method, then converting this <code>int</code>
475: * value to a <code>float</code> in
476: * exactly the manner of the method <code>Float.intBitsToFloat</code>.
477: * This method is suitable for reading
478: * bytes written by the <code>writeFloat</code>
479: * method of interface <code>DataOutput</code>.
480: *
481: * @return the <code>float</code> value read.
482: * @exception EOFException if this stream reaches the end before reading
483: * all the bytes.
484: * @exception IOException if an I/O error occurs.
485: */
486: float readFloat() throws IOException;
487:
488: /**
489: * Reads eight input bytes and returns
490: * a <code>double</code> value. It does this
491: * by first constructing a <code>long</code>
492: * value in exactly the manner
493: * of the <code>readlong</code>
494: * method, then converting this <code>long</code>
495: * value to a <code>double</code> in exactly
496: * the manner of the method <code>Double.longBitsToDouble</code>.
497: * This method is suitable for reading
498: * bytes written by the <code>writeDouble</code>
499: * method of interface <code>DataOutput</code>.
500: *
501: * @return the <code>double</code> value read.
502: * @exception EOFException if this stream reaches the end before reading
503: * all the bytes.
504: * @exception IOException if an I/O error occurs.
505: */
506: double readDouble() throws IOException;
507:
508: /**
509: * Reads the next line of text from the input stream.
510: * It reads successive bytes, converting
511: * each byte separately into a character,
512: * until it encounters a line terminator or
513: * end of
514: * file; the characters read are then
515: * returned as a <code>String</code>. Note
516: * that because this
517: * method processes bytes,
518: * it does not support input of the full Unicode
519: * character set.
520: * <p>
521: * If end of file is encountered
522: * before even one byte can be read, then <code>null</code>
523: * is returned. Otherwise, each byte that is
524: * read is converted to type <code>char</code>
525: * by zero-extension. If the character <code>'\n'</code>
526: * is encountered, it is discarded and reading
527: * ceases. If the character <code>'\r'</code>
528: * is encountered, it is discarded and, if
529: * the following byte converts  to the
530: * character <code>'\n'</code>, then that is
531: * discarded also; reading then ceases. If
532: * end of file is encountered before either
533: * of the characters <code>'\n'</code> and
534: * <code>'\r'</code> is encountered, reading
535: * ceases. Once reading has ceased, a <code>String</code>
536: * is returned that contains all the characters
537: * read and not discarded, taken in order.
538: * Note that every character in this string
539: * will have a value less than <code>\u0100</code>,
540: * that is, <code>(char)256</code>.
541: *
542: * @return the next line of text from the input stream,
543: * or <CODE>null</CODE> if the end of file is
544: * encountered before a byte can be read.
545: * @exception IOException if an I/O error occurs.
546: */
547: String readLine() throws IOException;
548:
549: /**
550: * Reads in a string that has been encoded using a
551: * <a href="#modified-utf-8">modified UTF-8</a>
552: * format.
553: * The general contract of <code>readUTF</code>
554: * is that it reads a representation of a Unicode
555: * character string encoded in modified
556: * UTF-8 format; this string of characters
557: * is then returned as a <code>String</code>.
558: * <p>
559: * First, two bytes are read and used to
560: * construct an unsigned 16-bit integer in
561: * exactly the manner of the <code>readUnsignedShort</code>
562: * method . This integer value is called the
563: * <i>UTF length</i> and specifies the number
564: * of additional bytes to be read. These bytes
565: * are then converted to characters by considering
566: * them in groups. The length of each group
567: * is computed from the value of the first
568: * byte of the group. The byte following a
569: * group, if any, is the first byte of the
570: * next group.
571: * <p>
572: * If the first byte of a group
573: * matches the bit pattern <code>0xxxxxxx</code>
574: * (where <code>x</code> means "may be <code>0</code>
575: * or <code>1</code>"), then the group consists
576: * of just that byte. The byte is zero-extended
577: * to form a character.
578: * <p>
579: * If the first byte
580: * of a group matches the bit pattern <code>110xxxxx</code>,
581: * then the group consists of that byte <code>a</code>
582: * and a second byte <code>b</code>. If there
583: * is no byte <code>b</code> (because byte
584: * <code>a</code> was the last of the bytes
585: * to be read), or if byte <code>b</code> does
586: * not match the bit pattern <code>10xxxxxx</code>,
587: * then a <code>UTFDataFormatException</code>
588: * is thrown. Otherwise, the group is converted
589: * to the character:<p>
590: * <pre><code>(char)(((a& 0x1F) << 6) | (b & 0x3F))
591: * </code></pre>
592: * If the first byte of a group
593: * matches the bit pattern <code>1110xxxx</code>,
594: * then the group consists of that byte <code>a</code>
595: * and two more bytes <code>b</code> and <code>c</code>.
596: * If there is no byte <code>c</code> (because
597: * byte <code>a</code> was one of the last
598: * two of the bytes to be read), or either
599: * byte <code>b</code> or byte <code>c</code>
600: * does not match the bit pattern <code>10xxxxxx</code>,
601: * then a <code>UTFDataFormatException</code>
602: * is thrown. Otherwise, the group is converted
603: * to the character:<p>
604: * <pre><code>
605: * (char)(((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F))
606: * </code></pre>
607: * If the first byte of a group matches the
608: * pattern <code>1111xxxx</code> or the pattern
609: * <code>10xxxxxx</code>, then a <code>UTFDataFormatException</code>
610: * is thrown.
611: * <p>
612: * If end of file is encountered
613: * at any time during this entire process,
614: * then an <code>EOFException</code> is thrown.
615: * <p>
616: * After every group has been converted to
617: * a character by this process, the characters
618: * are gathered, in the same order in which
619: * their corresponding groups were read from
620: * the input stream, to form a <code>String</code>,
621: * which is returned.
622: * <p>
623: * The <code>writeUTF</code>
624: * method of interface <code>DataOutput</code>
625: * may be used to write data that is suitable
626: * for reading by this method.
627: * @return a Unicode string.
628: * @exception EOFException if this stream reaches the end
629: * before reading all the bytes.
630: * @exception IOException if an I/O error occurs.
631: * @exception UTFDataFormatException if the bytes do not represent a
632: * valid modified UTF-8 encoding of a string.
633: */
634: String readUTF() throws IOException;
635: }
|