1. /*
  2. * @(#)URI.java 1.33 03/01/23
  3. *
  4. * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. package java.net;
  8. import java.io.IOException;
  9. import java.io.InvalidObjectException;
  10. import java.io.ObjectInputStream;
  11. import java.io.ObjectOutputStream;
  12. import java.io.Serializable;
  13. import java.nio.ByteBuffer;
  14. import java.nio.CharBuffer;
  15. import java.nio.charset.CharsetDecoder;
  16. import java.nio.charset.CharsetEncoder;
  17. import java.nio.charset.CoderResult;
  18. import java.nio.charset.CodingErrorAction;
  19. import java.nio.charset.CharacterCodingException;
  20. import sun.nio.cs.ThreadLocalCoders;
  21. import sun.text.Normalizer;
  22. import java.lang.Character; // for javadoc
  23. import java.lang.NullPointerException; // for javadoc
  24. /**
  25. * Represents a Uniform Resource Identifier (URI) reference.
  26. *
  27. * <p> An instance of this class represents a URI reference as defined by <a
  28. * href="http://www.ietf.org/rfc/rfc2396.txt""><i>RFC 2396: Uniform
  29. * Resource Identifiers (URI): Generic Syntax</i></a>, amended by <a
  30. * href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC 2732: Format for
  31. * Literal IPv6 Addresses in URLs</i></a> and with the minor deviations noted
  32. * below. This class provides constructors for creating URI instances from
  33. * their components or by parsing their string forms, methods for accessing the
  34. * various components of an instance, and methods for normalizing, resolving,
  35. * and relativizing URI instances. Instances of this class are immutable.
  36. *
  37. *
  38. * <h4> URI syntax and components </h4>
  39. *
  40. * At the highest level a URI reference (hereinafter simply "URI") in string
  41. * form has the syntax
  42. *
  43. * <blockquote>
  44. * [<i>scheme</i><tt><b>:</b></tt><i></i>]<i>scheme-specific-part</i>[<tt><b>#</b></tt><i>fragment</i>]
  45. * </blockquote>
  46. *
  47. * where square brackets [...] delineate optional components and the characters
  48. * <tt><b>:</b></tt> and <tt><b>#</b></tt> stand for themselves.
  49. *
  50. * <p> An <i>absolute</i> URI specifies a scheme; a URI that is not absolute is
  51. * said to be <i>relative</i>. URIs are also classified according to whether
  52. * they are <i>opaque</i> or <i>hierarchical</i>.
  53. *
  54. * <p> An <i>opaque</i> URI is an absolute URI whose scheme-specific part does
  55. * not begin with a slash character (<tt>'/'</tt>). Opaque URIs are not
  56. * subject to further parsing. Some examples of opaque URIs are:
  57. *
  58. * <blockquote><table cellpadding=0 cellspacing=0 summary="layout">
  59. * <tr><td><tt>mailto:java-net@java.sun.com</tt><td></tr>
  60. * <tr><td><tt>news:comp.lang.java</tt><td></tr>
  61. * <tr><td><tt>urn:isbn:096139210x</tt></td></tr>
  62. * </table></blockquote>
  63. *
  64. * <p> A <i>hierarchical</i> URI is either an absolute URI whose
  65. * scheme-specific part begins with a slash character, or a relative URI, that
  66. * is, a URI that does not specify a scheme. Some examples of hierarchical
  67. * URIs are:
  68. *
  69. * <blockquote>
  70. * <tt>http://java.sun.com/j2se/1.3/</tt><br>
  71. * <tt>docs/guide/collections/designfaq.html#28</tt></br>
  72. * <tt>../../../demo/jfc/SwingSet2/src/SwingSet2.java</tt></br>
  73. * <tt>file:///~/calendar</tt>
  74. * </blockquote>
  75. *
  76. * <p> A hierarchical URI is subject to further parsing according to the syntax
  77. *
  78. * <blockquote>
  79. * [<i>scheme</i><tt><b>:</b></tt>][<tt><b>//</b></tt><i>authority</i>][<i>path</i>][<tt><b>?</b></tt><i>query</i>][<tt><b>#</b></tt><i>fragment</i>]
  80. * </blockquote>
  81. *
  82. * where the characters <tt><b>:</b></tt>, <tt><b>/</b></tt>,
  83. * <tt><b>?</b></tt>, and <tt><b>#</b></tt> stand for themselves. The
  84. * scheme-specific part of a hierarchical URI consists of the characters
  85. * between the scheme and fragment components.
  86. *
  87. * <p> The authority component of a hierarchical URI is, if specified, either
  88. * <i>server-based</i> or <i>registry-based</i>. A server-based authority
  89. * parses according to the familiar syntax
  90. *
  91. * <blockquote>
  92. * [<i>user-info</i><tt><b>@</b></tt>]<i>host</i>[<tt><b>:</b></tt><i>port</i>]
  93. * </blockquote>
  94. *
  95. * where the characters <tt><b>@</b></tt> and <tt><b>:</b></tt> stand for
  96. * themselves. Nearly all URI schemes currently in use are server-based. An
  97. * authority component that does not parse in this way is considered to be
  98. * registry-based.
  99. *
  100. * <p> The path component of a hierarchical URI is itself said to be absolute
  101. * if it begins with a slash character (<tt>'/'</tt>); otherwise it is
  102. * relative. The path of a hierarchical URI that is either absolute or
  103. * specifies an authority is always absolute.
  104. *
  105. * <p> All told, then, a URI instance has the following nine components:
  106. *
  107. * <blockquote><table summary="Describes the components of a URI:scheme,scheme-specific-part,authority,user-info,host,port,path,query,fragment">
  108. * <tr><th><i>Component</i></th><th><i>Type</i></th></tr>
  109. * <tr><td>scheme</td><td><tt>String</tt></td></tr>
  110. * <tr><td>scheme-specific-part    </td><td><tt>String</tt></td></tr>
  111. * <tr><td>authority</td><td><tt>String</tt></td></tr>
  112. * <tr><td>user-info</td><td><tt>String</tt></td></tr>
  113. * <tr><td>host</td><td><tt>String</tt></td></tr>
  114. * <tr><td>port</td><td><tt>int</tt></td></tr>
  115. * <tr><td>path</td><td><tt>String</tt></td></tr>
  116. * <tr><td>query</td><td><tt>String</tt></td></tr>
  117. * <tr><td>fragment</td><td><tt>String</tt></td></tr>
  118. * </table></blockquote>
  119. *
  120. * In a given instance any particular component is either <i>undefined</i> or
  121. * <i>defined</i> with a distinct value. Undefined string components are
  122. * represented by <tt>null</tt>, while undefined integer components are
  123. * represented by <tt>-1</tt>. A string component may be defined to have the
  124. * empty string as its value; this is not equivalent to that component being
  125. * undefined.
  126. *
  127. * <p> Whether a particular component is or is not defined in an instance
  128. * depends upon the type of the URI being represented. An absolute URI has a
  129. * scheme component. An opaque URI has a scheme, a scheme-specific part, and
  130. * possibly a fragment, but has no other components. A hierarchical URI always
  131. * has a path (though it may be empty) and a scheme-specific-part (which at
  132. * least contains the path), and may have any of the other components. If the
  133. * authority component is present and is server-based then the host component
  134. * will be defined and the user-information and port components may be defined.
  135. *
  136. *
  137. * <h4> Operations on URI instances </h4>
  138. *
  139. * The key operations supported by this class are those of
  140. * <i>normalization</i>, <i>resolution</i>, and <i>relativization</i>.
  141. *
  142. * <p> <i>Normalization</i> is the process of removing unnecessary <tt>"."</tt>
  143. * and <tt>".."</tt> segments from the path component of a hierarchical URI.
  144. * Each <tt>"."</tt> segment is simply removed. A <tt>".."</tt> segment is
  145. * removed only if it is preceded by a non-<tt>".."</tt> segment.
  146. * Normalization has no effect upon opaque URIs.
  147. *
  148. * <p> <i>Resolution</i> is the process of resolving one URI against another,
  149. * <i>base</i> URI. The resulting URI is constructed from components of both
  150. * URIs in the manner specified by RFC 2396, taking components from the
  151. * base URI for those not specified in the original. For hierarchical URIs,
  152. * the path of the original is resolved against the path of the base and then
  153. * normalized. The result, for example, of resolving
  154. *
  155. * <blockquote>
  156. * <tt>docs/guide/collections/designfaq.html#28          </tt>(1)
  157. * </blockquote>
  158. *
  159. * against the base URI <tt>http://java.sun.com/j2se/1.3/</tt> is the result
  160. * URI
  161. *
  162. * <blockquote>
  163. * <tt>http://java.sun.com/j2se/1.3/docs/guide/collections/designfaq.html#28</tt>
  164. * </blockquote>
  165. *
  166. * Resolving the relative URI
  167. *
  168. * <blockquote>
  169. * <tt>../../../demo/jfc/SwingSet2/src/SwingSet2.java    </tt>(2)
  170. * </blockquote>
  171. *
  172. * against this result yields, in turn,
  173. *
  174. * <blockquote>
  175. * <tt>http://java.sun.com/j2se/1.3/demo/jfc/SwingSet2/src/SwingSet2.java</tt>
  176. * </blockquote>
  177. *
  178. * Resolution of both absolute and relative URIs, and of both absolute and
  179. * relative paths in the case of hierarchical URIs, is supported. Resolving
  180. * the URI <tt>file:///~calendar</tt> against any other URI simply yields the
  181. * original URI, since it is absolute. Resolving the relative URI (2) above
  182. * against the relative base URI (1) yields the normalized, but still relative,
  183. * URI
  184. *
  185. * <blockquote>
  186. * <tt>demo/jfc/SwingSet2/src/SwingSet2.java</tt>
  187. * </blockquote>
  188. *
  189. * <p> <i>Relativization</i>, finally, is the inverse of resolution: For any
  190. * two normalized URIs <i>u</i> and <i>v</i>,
  191. *
  192. * <blockquote>
  193. * <i>u</i><tt>.relativize(</tt><i>u</i><tt>.resolve(</tt><i>v</i><tt>)).equals(</tt><i>v</i><tt>)</tt>  and<br>
  194. * <i>u</i><tt>.resolve(</tt><i>u</i><tt>.relativize(</tt><i>v</i><tt>)).equals(</tt><i>v</i><tt>)</tt>  .<br>
  195. * </blockquote>
  196. *
  197. * This operation is often useful when constructing a document containing URIs
  198. * that must be made relative to the base URI of the document wherever
  199. * possible. For example, relativizing the URI
  200. *
  201. * <blockquote>
  202. * <tt>http://java.sun.com/j2se/1.3/docs/guide/index.html</tt>
  203. * </blockquote>
  204. *
  205. * against the base URI
  206. *
  207. * <blockquote>
  208. * <tt>http://java.sun.com/j2se/1.3</tt>
  209. * </blockquote>
  210. *
  211. * yields the relative URI <tt>docs/guide/index.html</tt>.
  212. *
  213. *
  214. * <h4> Character categories </h4>
  215. *
  216. * RFC 2396 specifies precisely which characters are permitted in the
  217. * various components of a URI reference. The following categories, most of
  218. * which are taken from that specification, are used below to describe these
  219. * constraints:
  220. *
  221. * <blockquote><table cellspacing=2 summary="Describes categories alpha,digit,alphanum,unreserved,punct,reserved,escaped,and other">
  222. * <tr><th valign=top><i>alpha</i></th>
  223. * <td>The US-ASCII alphabetic characters,
  224. * <tt>'A'</tt> through <tt>'Z'</tt>
  225. * and <tt>'a'</tt> through <tt>'z'</tt></td></tr>
  226. * <tr><th valign=top><i>digit</i></th>
  227. * <td>The US-ASCII decimal digit characters,
  228. * <tt>'0'</tt> through <tt>'9'</tt></td></tr>
  229. * <tr><th valign=top><i>alphanum</i></th>
  230. * <td>All <i>alpha</i> and <i>digit</i> characters</td></tr>
  231. * <tr><th valign=top><i>unreserved</i>    </th>
  232. * <td>All <i>alphanum</i> characters together with those in the string
  233. * <tt>"_-!.~'()*"</tt></td></tr>
  234. * <tr><th valign=top><i>punct</i></th>
  235. * <td>The characters in the string <tt>",;:$&+="</tt></td></tr>
  236. * <tr><th valign=top><i>reserved</i></th>
  237. * <td>All <i>punct</i> characters together with those in the string
  238. * <tt>"?/[]@"</tt></td></tr>
  239. * <tr><th valign=top><i>escaped</i></th>
  240. * <td>Escaped octets, that is, triplets consisting of the percent
  241. * character (<tt>'%'</tt>) followed by two hexadecimal digits
  242. * (<tt>'0'</tt>-<tt>'9'</tt>, <tt>'A'</tt>-<tt>'F'</tt>, and
  243. * <tt>'a'</tt>-<tt>'f'</tt>)</td></tr>
  244. * <tr><th valign=top><i>other</i></th>
  245. * <td>The Unicode characters that are not in the US-ASCII character set,
  246. * are not control characters (according to the {@link
  247. * java.lang.Character#isISOControl(char) Character.isISOControl}
  248. * method), and are not space characters (according to the {@link
  249. * java.lang.Character#isSpaceChar(char) Character.isSpaceChar}
  250. * method)  (<b><i>Deviation from RFC 2396</b>, which is
  251. * limited to US-ASCII)</td></tr>
  252. * </table></blockquote>
  253. *
  254. * <p><a name="legal-chars"> The set of all legal URI characters consists of
  255. * the <i>unreserved</i>, <i>reserved</i>, <i>escaped</i>, and <i>other</i>
  256. * characters.
  257. *
  258. *
  259. * <h4> Escaped octets, quotation, encoding, and decoding </h4>
  260. *
  261. * RFC 2396 allows escaped octets to appear in the user-info, path, query, and
  262. * fragment components. Escaping serves two purposes in URIs:
  263. *
  264. * <ul>
  265. *
  266. * <li><p> To <i>encode</i> non-US-ASCII characters when a URI is required to
  267. * conform strictly to RFC 2396 by not containing any <i>other</i>
  268. * characters. </p></li>
  269. *
  270. * <li><p> To <i>quote</i> characters that are otherwise illegal in a
  271. * component. The user-info, path, query, and fragment components differ
  272. * slightly in terms of which characters are considered legal and illegal.
  273. * </p></li>
  274. *
  275. * </ul>
  276. *
  277. * These purposes are served in this class by three related operations:
  278. *
  279. * <ul>
  280. *
  281. * <li><p><a name="encode"> A character is <i>encoded</i> by replacing it
  282. * with the sequence of escaped octets that represent that character in the
  283. * UTF-8 character set. The Euro currency symbol (<tt>'\u20AC'</tt>),
  284. * for example, is encoded as <tt>"%E2%82%AC"</tt>. <i>(<b>Deviation from
  285. * RFC 2396</b>, which does not specify any particular character
  286. * set.)</i> </li></p>
  287. *
  288. * <li><p><a name="quote"> An illegal character is <i>quoted</i> simply by
  289. * encoding it. The space character, for example, is quoted by replacing it
  290. * with <tt>"%20"</tt>. UTF-8 contains US-ASCII, hence for US-ASCII
  291. * characters this transformation has exactly the effect required by
  292. * RFC 2396.
  293. *
  294. * <li><p><a name="decode"> A sequence of escaped octets is <i>decoded</i> by
  295. * replacing it with the sequence of characters that it represents in the
  296. * UTF-8 character set. UTF-8 contains US-ASCII, hence decoding has the
  297. * effect of de-quoting any quoted US-ASCII characters as well as that of
  298. * decoding any encoded non-US-ASCII characters. If a <a
  299. * href="../nio/charset/CharsetDecoder.html#ce">decoding error</a> occurs
  300. * when decoding the escaped octets then the erroneous octets are replaced by
  301. * <tt>'\uFFFD'</tt>, the Unicode replacement character. </p></li>
  302. *
  303. * </ul>
  304. *
  305. * These operations are exposed in the constructors and methods of this class
  306. * as follows:
  307. *
  308. * <ul>
  309. *
  310. * <li><p> The {@link #URI(java.lang.String) </code>single-argument
  311. * constructor<code>} requires any illegal characters in its argument to be
  312. * quoted and preserves any escaped octets and <i>other</i> characters that
  313. * are present. </p></li>
  314. *
  315. * <li><p> The {@link
  316. * #URI(java.lang.String,java.lang.String,java.lang.String,int,java.lang.String,java.lang.String,java.lang.String)
  317. * </code>multi-argument constructors<code>} quote illegal characters as
  318. * required by the components in which they appear. The percent character
  319. * (<tt>'%'</tt>) is always quoted by these constructors. Any <i>other</i>
  320. * characters are preserved. </p></li>
  321. *
  322. * <li><p> The {@link #getRawUserInfo() getRawUserInfo}, {@link #getRawPath()
  323. * getRawPath}, {@link #getRawQuery() getRawQuery}, {@link #getRawFragment()
  324. * getRawFragment}, {@link #getRawAuthority() getRawAuthority}, and {@link
  325. * #getRawSchemeSpecificPart() getRawSchemeSpecificPart} methods return the
  326. * values of their corresponding components in raw form, without interpreting
  327. * any escaped octets. The strings returned by these methods may contain
  328. * both escaped octets and <i>other</i> characters, and will not contain any
  329. * illegal characters. </p></li>
  330. *
  331. * <li><p> The {@link #getUserInfo() getUserInfo}, {@link #getPath()
  332. * getPath}, {@link #getQuery() getQuery}, {@link #getFragment()
  333. * getFragment}, {@link #getAuthority() getAuthority}, and {@link
  334. * #getSchemeSpecificPart() getSchemeSpecificPart} methods decode any escaped
  335. * octets in their corresponding components. The strings returned by these
  336. * methods may contain both <i>other</i> characters and illegal characters,
  337. * and will not contain any escaped octets. </p></li>
  338. *
  339. * <li><p> The {@link #toString() toString} method returns a URI string with
  340. * all necessary quotation but which may contain <i>other</i> characters.
  341. * </p></li>
  342. *
  343. * <li><p> The {@link #toASCIIString() toASCIIString} method returns a fully
  344. * quoted and encoded URI string that does not contain any <i>other</i>
  345. * characters. </p></li>
  346. *
  347. * </ul>
  348. *
  349. *
  350. * <h4> Identities </h4>
  351. *
  352. * For any URI <i>u</i>, it is always the case that
  353. *
  354. * <blockquote>
  355. * <tt>new URI(</tt><i>u</i><tt>.toString()).equals(</tt><i>u</i><tt>)</tt> .
  356. * </blockquote>
  357. *
  358. * For any URI <i>u</i> that does not contain redundant syntax such as two
  359. * slashes before an empty authority (as in <tt>file:///tmp/</tt> ) or a
  360. * colon following a host name but no port (as in
  361. * <tt>http://java.sun.com:</tt> ), and that does not encode characters
  362. * except those that must be quoted, the following identities also hold:
  363. *
  364. * <blockquote>
  365. * <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
  366. *         </tt><i>u</i><tt>.getSchemeSpecificPart(),<br>
  367. *         </tt><i>u</i><tt>.getFragment())<br>
  368. * .equals(</tt><i>u</i><tt>)</tt>
  369. * </blockquote>
  370. *
  371. * in all cases,
  372. *
  373. * <blockquote>
  374. * <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
  375. *         </tt><i>u</i><tt>.getUserInfo(), </tt><i>u</i><tt>.getAuthority(),<br>
  376. *         </tt><i>u</i><tt>.getPath(), </tt><i>u</i><tt>.getQuery(),<br>
  377. *         </tt><i>u</i><tt>.getFragment())<br>
  378. * .equals(</tt><i>u</i><tt>)</tt>
  379. * </blockquote>
  380. *
  381. * if <i>u</i> is hierarchical, and
  382. *
  383. * <blockquote>
  384. * <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
  385. *         </tt><i>u</i><tt>.getUserInfo(), </tt><i>u</i><tt>.getHost(), </tt><i>u</i><tt>.getPort(),<br>
  386. *         </tt><i>u</i><tt>.getPath(), </tt><i>u</i><tt>.getQuery(),<br>
  387. *         </tt><i>u</i><tt>.getFragment())<br>
  388. * .equals(</tt><i>u</i><tt>)</tt>
  389. * </blockquote>
  390. *
  391. * if <i>u</i> is hierarchical and has either no authority or a server-based
  392. * authority.
  393. *
  394. *
  395. * <h4> URIs, URLs, and URNs </h4>
  396. *
  397. * A URI is a uniform resource <i>identifier</i> while a URL is a uniform
  398. * resource <i>locator</i>. Hence every URL is a URI, abstractly speaking, but
  399. * not every URI is a URL. This is because there is another subcategory of
  400. * URIs, uniform resource <i>names</i> (URNs), which name resources but do not
  401. * specify how to locate them. The <tt>mailto</tt>, <tt>news</tt>, and
  402. * <tt>isbn</tt> URIs shown above are examples of URNs.
  403. *
  404. * <p> The conceptual distinction between URIs and URLs is reflected in the
  405. * differences between this class and the {@link URL} class.
  406. *
  407. * <p> An instance of this class represents a URI reference in the syntactic
  408. * sense defined by RFC 2396. A URI may be either absolute or relative.
  409. * A URI string is parsed according to the generic syntax without regard to the
  410. * scheme, if any, that it specifies. No lookup of the host, if any, is
  411. * performed, and no scheme-dependent stream handler is constructed. Equality,
  412. * hashing, and comparison are defined strictly in terms of the character
  413. * content of the instance. In other words, a URI instance is little more than
  414. * a structured string that supports the syntactic, scheme-independent
  415. * operations of comparison, normalization, resolution, and relativization.
  416. *
  417. * <p> An instance of the {@link URL} class, by contrast, represents the
  418. * syntactic components of a URL together with some of the information required
  419. * to access the resource that it describes. A URL must be absolute, that is,
  420. * it must always specify a scheme. A URL string is parsed according to its
  421. * scheme. A stream handler is always established for a URL, and in fact it is
  422. * impossible to create a URL instance for a scheme for which no handler is
  423. * available. Equality and hashing depend upon both the scheme and the
  424. * Internet address of the host, if any; comparison is not defined. In other
  425. * words, a URL is a structured string that supports the syntactic operation of
  426. * resolution as well as the network I/O operations of looking up the host and
  427. * opening a connection to the specified resource.
  428. *
  429. *
  430. * @version 1.33, 03/01/23
  431. * @author Mark Reinhold
  432. * @since 1.4
  433. *
  434. * @see <a href="http://ietf.org/rfc/rfc2279.txt"><i>RFC 2279: UTF-8, a
  435. * transformation format of ISO 10646</i></a>, <br><a
  436. * href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC 2373: IPv6 Addressing
  437. * Architecture</i></a>, <br><a
  438. * href="http://www.ietf.org/rfc/rfc2396.txt""><i>RFC 2396: Uniform
  439. * Resource Identifiers (URI): Generic Syntax</i></a>, <br><a
  440. * href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC 2732: Format for
  441. * Literal IPv6 Addresses in URLs</i></a>, <br><a
  442. * href="URISyntaxException.html">URISyntaxException</a>
  443. */
  444. public final class URI
  445. implements Comparable, Serializable
  446. {
  447. // Note: Comments containing the word "ASSERT" indicate places where a
  448. // throw of an InternalError should be replaced by an appropriate assertion
  449. // statement once asserts are enabled in the build.
  450. static final long serialVersionUID = -6052424284110960213L;
  451. // -- Properties and components of this instance --
  452. // Components of all URIs: [<scheme>:]<scheme-specific-part>[#<fragment>]
  453. private transient String scheme; // null ==> relative URI
  454. private transient String fragment;
  455. // Hierarchical URI components: [//<authority>]<path>[?<query>]
  456. private transient String authority; // Registry or server
  457. // Server-based authority: [<userInfo>@]<host>[:<port>]
  458. private transient String userInfo;
  459. private transient String host; // null ==> registry-based
  460. private transient int port = -1; // -1 ==> undefined
  461. // Remaining components of hierarchical URIs
  462. private transient String path; // null ==> opaque
  463. private transient String query;
  464. // The remaining fields may be computed on demand
  465. private volatile transient String schemeSpecificPart;
  466. private volatile transient int hash; // Zero ==> undefined
  467. private volatile transient String decodedUserInfo = null;
  468. private volatile transient String decodedAuthority = null;
  469. private volatile transient String decodedPath = null;
  470. private volatile transient String decodedQuery = null;
  471. private volatile transient String decodedFragment = null;
  472. private volatile transient String decodedSchemeSpecificPart = null;
  473. /**
  474. * The string form of this URI.
  475. *
  476. * @serial
  477. */
  478. private volatile String string; // The only serializable field
  479. // -- Constructors and factories --
  480. private URI() { } // Used internally
  481. /**
  482. * Constructs a URI by parsing the given string.
  483. *
  484. * <p> This constructor parses the given string exactly as specified by the
  485. * grammar in <a
  486. * href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
  487. * Appendix A, <b><i>except for the following deviations:</i></b> </p>
  488. *
  489. * <ul type=disc>
  490. *
  491. * <li><p> An empty authority component is permitted as long as it is
  492. * followed by a non-empty path, a query component, or a fragment
  493. * component. This allows the parsing of URIs such as
  494. * <tt>"file:///foo/bar"</tt>, which seems to be the intent of
  495. * RFC 2396 although the grammar does not permit it. If the
  496. * authority component is empty then the user-information, host, and port
  497. * components are undefined. </p></li>
  498. *
  499. * <li><p> Empty relative paths are permitted; this seems to be the
  500. * intent of RFC 2396 although the grammar does not permit it. The
  501. * primary consequence of this deviation is that a standalone fragment
  502. * such as <tt>"#foo"</tt> parses as a relative URI with an empty path
  503. * and the given fragment, and can be usefully <a
  504. * href="#resolve-frag">resolved</a> against a base URI.
  505. *
  506. * <li><p> IPv4 addresses in host components are parsed rigorously, as
  507. * specified by <a
  508. * href="http://www.ietf.org/rfc/rfc2732.txt">RFC 2732</a>: Each
  509. * element of a dotted-quad address must contain no more than three
  510. * decimal digits. Each element is further constrained to have a value
  511. * no greater than 255. </p></li>
  512. *
  513. * <li> <p> Hostnames in host components that comprise only a single
  514. * domain label are permitted to start with an <i>alphanum</i>
  515. * character. This seems to be the intent of <a
  516. * href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
  517. * section 3.2.2 although the grammar does not permit it. The
  518. * consequence of this deviation is that the authority component of a
  519. * hierarchical URI such as <tt>s://123</tt>, will parse as a server-based
  520. * authority. </p></li>
  521. *
  522. * <li><p> IPv6 addresses are permitted for the host component. An IPv6
  523. * address must be enclosed in square brackets (<tt>'['</tt> and
  524. * <tt>']'</tt>) as specified by <a
  525. * href="http://www.ietf.org/rfc/rfc2732.txt">RFC 2732</a>. The
  526. * IPv6 address itself must parse according to <a
  527. * href="http://www.ietf.org/rfc/rfc2373.txt">RFC 2373</a>. IPv6
  528. * addresses are further constrained to describe no more than sixteen
  529. * bytes of address information, a constraint implicit in RFC 2373
  530. * but not expressible in the grammar. </p></li>
  531. *
  532. * <li><p> Characters in the <i>other</i> category are permitted wherever
  533. * RFC 2396 permits <i>escaped</i> octets, that is, in the
  534. * user-information, path, query, and fragment components, as well as in
  535. * the authority component if the authority is registry-based. This
  536. * allows URIs to contain Unicode characters beyond those in the US-ASCII
  537. * character set. </p></li>
  538. *
  539. * </ul>
  540. *
  541. * @param str The string to be parsed into a URI
  542. *
  543. * @throws NullPointerException
  544. * If <tt>str</tt> is <tt>null</tt>
  545. *
  546. * @throws URISyntaxException
  547. * If the given string violates RFC 2396, as augmented
  548. * by the above deviations
  549. */
  550. public URI(String str) throws URISyntaxException {
  551. new Parser(str).parse(false);
  552. }
  553. /**
  554. * Constructs a hierarchical URI from the given components.
  555. *
  556. * <p> If a scheme is given then the path, if also given, must either be
  557. * empty or begin with a slash character (<tt>'/'</tt>). Otherwise a
  558. * component of the new URI may be left undefined by passing <tt>null</tt>
  559. * for the corresponding parameter or, in the case of the <tt>port</tt>
  560. * parameter, by passing <tt>-1</tt>.
  561. *
  562. * <p> This constructor first builds a URI string from the given components
  563. * according to the rules specified in <a
  564. * href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
  565. * section 5.2, step 7: </p>
  566. *
  567. * <ol>
  568. *
  569. * <li><p> Initially, the result string is empty. </p></li>
  570. *
  571. * <li><p> If a scheme is given then it is appended to the result,
  572. * followed by a colon character (<tt>':'</tt>). </p></li>
  573. *
  574. * <li><p> If user information, a host, or a port are given then the
  575. * string <tt>"//"</tt> is appended. </p></li>
  576. *
  577. * <li><p> If user information is given then it is appended, followed by
  578. * a commercial-at character (<tt>'@'</tt>). Any character not in the
  579. * <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
  580. * categories is <a href="#quote">quoted</a>. </p></li>
  581. *
  582. * <li><p> If a host is given then it is appended. If the host is a
  583. * literal IPv6 address but is not enclosed in square brackets
  584. * (<tt>'['</tt> and <tt>']'</tt>) then the square brackets are added.
  585. * </p></li>
  586. *
  587. * <li><p> If a port number is given then a colon character
  588. * (<tt>':'</tt>) is appended, followed by the port number in decimal.
  589. * </p></li>
  590. *
  591. * <li><p> If a path is given then it is appended. Any character not in
  592. * the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
  593. * categories, and not equal to the slash character (<tt>'/'</tt>) or the
  594. * commercial-at character (<tt>'@'</tt>), is quoted. </p></li>
  595. *
  596. * <li><p> If a query is given then a question-mark character
  597. * (<tt>'?'</tt>) is appended, followed by the query. Any character that
  598. * is not a <a href="#legal-chars">legal URI character</a> is quoted.
  599. * </p></li>
  600. *
  601. * <li><p> Finally, if a fragment is given then a hash character
  602. * (<tt>'#'</tt>) is appended, followed by the fragment. Any character
  603. * that is not a legal URI character is quoted. </p></li>
  604. *
  605. * </ol>
  606. *
  607. * <p> The resulting URI string is then parsed as if by invoking the {@link
  608. * #URI(String)} constructor and then invoking the {@link
  609. * #parseServerAuthority()} method upon the result; this may cause a {@link
  610. * URISyntaxException} to be thrown. </p>
  611. *
  612. * @param scheme Scheme name
  613. * @param userInfo User name and authorization information
  614. * @param host Host name
  615. * @param port Port number
  616. * @param path Path
  617. * @param query Query
  618. * @param fragment Fragment
  619. *
  620. * @throws URISyntaxException
  621. * If both a scheme and a path are given but the path is relative,
  622. * if the URI string constructed from the given components violates
  623. * RFC 2396, or if the authority component of the string is
  624. * present but cannot be parsed as a server-based authority
  625. */
  626. public URI(String scheme,
  627. String userInfo, String host, int port,
  628. String path, String query, String fragment)
  629. throws URISyntaxException
  630. {
  631. String s = toString(scheme, null,
  632. null, userInfo, host, port,
  633. path, query, fragment);
  634. checkPath(s, scheme, path);
  635. new Parser(s).parse(true);
  636. }
  637. /**
  638. * Constructs a hierarchical URI from the given components.
  639. *
  640. * <p> If a scheme is given then the path, if also given, must either be
  641. * empty or begin with a slash character (<tt>'/'</tt>). Otherwise a
  642. * component of the new URI may be left undefined by passing <tt>null</tt>
  643. * for the corresponding parameter.
  644. *
  645. * <p> This constructor first builds a URI string from the given components
  646. * according to the rules specified in <a
  647. * href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
  648. * section 5.2, step 7: </p>
  649. *
  650. * <ol>
  651. *
  652. * <li><p> Initially, the result string is empty. </p></li>
  653. *
  654. * <li><p> If a scheme is given then it is appended to the result,
  655. * followed by a colon character (<tt>':'</tt>). </p></li>
  656. *
  657. * <li><p> If an authority is given then the string <tt>"//"</tt> is
  658. * appended, followed by the authority. If the authority contains a
  659. * literal IPv6 address then the address must be enclosed in square
  660. * brackets (<tt>'['</tt> and <tt>']'</tt>). Any character not in the
  661. * <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
  662. * categories, and not equal to the commercial-at character
  663. * (<tt>'@'</tt>), is <a href="#quote">quoted</a>. </p></li>
  664. *
  665. * <li><p> If a path is given then it is appended. Any character not in
  666. * the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
  667. * categories, and not equal to the slash character (<tt>'/'</tt>) or the
  668. * commercial-at character (<tt>'@'</tt>), is quoted. </p></li>
  669. *
  670. * <li><p> If a query is given then a question-mark character
  671. * (<tt>'?'</tt>) is appended, followed by the query. Any character that
  672. * is not a <a href="#legal-chars">legal URI character</a> is quoted.
  673. * </p></li>
  674. *
  675. * <li><p> Finally, if a fragment is given then a hash character
  676. * (<tt>'#'</tt>) is appended, followed by the fragment. Any character
  677. * that is not a legal URI character is quoted. </p></li>
  678. *
  679. * </ol>
  680. *
  681. * <p> The resulting URI string is then parsed as if by invoking the {@link
  682. * #URI(String)} constructor and then invoking the {@link
  683. * #parseServerAuthority()} method upon the result; this may cause a {@link
  684. * URISyntaxException} to be thrown. </p>
  685. *
  686. * @param scheme Scheme name
  687. * @param authority Authority
  688. * @param path Path
  689. * @param query Query
  690. * @param fragment Fragment
  691. *
  692. * @throws URISyntaxException
  693. * If both a scheme and a path are given but the path is relative,
  694. * if the URI string constructed from the given components violates
  695. * RFC 2396, or if the authority component of the string is
  696. * present but cannot be parsed as a server-based authority
  697. */
  698. public URI(String scheme,
  699. String authority,
  700. String path, String query, String fragment)
  701. throws URISyntaxException
  702. {
  703. String s = toString(scheme, null,
  704. authority, null, null, -1,
  705. path, query, fragment);
  706. checkPath(s, scheme, path);
  707. new Parser(s).parse(false);
  708. }
  709. /**
  710. * Constructs a hierarchical URI from the given components.
  711. *
  712. * <p> A component may be left undefined by passing <tt>null</tt>.
  713. *
  714. * <p> This convenience constructor works as if by invoking the
  715. * seven-argument constructor as follows:
  716. *
  717. * <blockquote><tt>
  718. * new {@link #URI(String, String, String, int, String, String, String)
  719. * URI}(scheme, null, host, -1, path, null, fragment);
  720. * </tt></blockquote>
  721. *
  722. * @param scheme Scheme name
  723. * @param host Host name
  724. * @param path Path
  725. * @param fragment Fragment
  726. *
  727. * @throws URISyntaxException
  728. * If the URI string constructed from the given components
  729. * violates RFC 2396
  730. */
  731. public URI(String scheme, String host, String path, String fragment)
  732. throws URISyntaxException
  733. {
  734. this(scheme, null, host, -1, path, null, fragment);
  735. }
  736. /**
  737. * Constructs a URI from the given components.
  738. *
  739. * <p> A component may be left undefined by passing <tt>null</tt>.
  740. *
  741. * <p> This constructor first builds a URI in string form using the given
  742. * components as follows: </p>
  743. *
  744. * <ol>
  745. *
  746. * <li><p> Initially, the result string is empty. </p></li>
  747. *
  748. * <li><p> If a scheme is given then it is appended to the result,
  749. * followed by a colon character (<tt>':'</tt>). </p></li>
  750. *
  751. * <li><p> If a scheme-specific part is given then it is appended. Any
  752. * character that is not a <a href="#legal-chars">legal URI character</a>
  753. * is <a href="#quote">quoted</a>. </p></li>
  754. *
  755. * <li><p> Finally, if a fragment is given then a hash character
  756. * (<tt>'#'</tt>) is appended to the string, followed by the fragment.
  757. * Any character that is not a legal URI character is quoted. </p></li>
  758. *
  759. * </ol>
  760. *
  761. * <p> The resulting URI string is then parsed in order to create the new
  762. * URI instance as if by invoking the {@link #URI(String)} constructor;
  763. * this may cause a {@link URISyntaxException} to be thrown. </p>
  764. *
  765. * @param scheme Scheme name
  766. * @param ssp Scheme-specific part
  767. * @param fragment Fragment
  768. *
  769. * @throws URISyntaxException
  770. * If the URI string constructed from the given components
  771. * violates RFC 2396
  772. */
  773. public URI(String scheme, String ssp, String fragment)
  774. throws URISyntaxException
  775. {
  776. new Parser(toString(scheme, ssp,
  777. null, null, null, -1,
  778. null, null, fragment))
  779. .parse(false);
  780. }
  781. /**
  782. * Creates a URI by parsing the given string.
  783. *
  784. * <p> This convenience factory method works as if by invoking the {@link
  785. * #URI(String)} constructor; any {@link URISyntaxException} thrown by the
  786. * constructor is caught and wrapped in a new {@link
  787. * IllegalArgumentException} object, which is then thrown.
  788. *
  789. * <p> This method is provided for use in situations where it is known that
  790. * the given string is a legal URI, for example for URI constants declared
  791. * within in a program, and so it would be considered a programming error
  792. * for the string not to parse as such. The constructors, which throw
  793. * {@link URISyntaxException} directly, should be used situations where a
  794. * URI is being constructed from user input or from some other source that
  795. * may be prone to errors. </p>
  796. *
  797. * @param str The string to be parsed into a URI
  798. * @return The new URI
  799. *
  800. * @throws NullPointerException
  801. * If <tt>str</tt> is <tt>null</tt>
  802. *
  803. * @throws IllegalArgumentException
  804. * If the given string violates RFC 2396
  805. */
  806. public static URI create(String str) {
  807. try {
  808. return new URI(str);
  809. } catch (URISyntaxException x) {
  810. IllegalArgumentException y = new IllegalArgumentException();
  811. y.initCause(x);
  812. throw y;
  813. }
  814. }
  815. // -- Operations --
  816. /**
  817. * Attempts to parse this URI's authority component, if defined, into
  818. * user-information, host, and port components.
  819. *
  820. * <p> If this URI's authority component has already been recognized as
  821. * being server-based then it will already have been parsed into
  822. * user-information, host, and port components. In this case, or if this
  823. * URI has no authority component, this method simply returns this URI.
  824. *
  825. * <p> Otherwise this method attempts once more to parse the authority
  826. * component into user-information, host, and port components, and throws
  827. * an exception describing why the authority component could not be parsed
  828. * in that way.
  829. *
  830. * <p> This method is provided because the generic URI syntax specified in
  831. * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
  832. * cannot always distinguish a malformed server-based authority from a
  833. * legitimate registry-based authority. It must therefore treat some
  834. * instances of the former as instances of the latter. The authority
  835. * component in the URI string <tt>"//foo:bar"</tt>, for example, is not a
  836. * legal server-based authority but it is legal as a registry-based
  837. * authority.
  838. *
  839. * <p> In many common situations, for example when working URIs that are
  840. * known to be either URNs or URLs, the hierarchical URIs being used will
  841. * always be server-based. They therefore must either be parsed as such or
  842. * treated as an error. In these cases a statement such as
  843. *
  844. * <blockquote>
  845. * <tt>URI </tt><i>u</i><tt> = new URI(str).parseServerAuthority();</tt>
  846. * </blockquote>
  847. *
  848. * <p> can be used to ensure that <i>u</i> always refers to a URI that, if
  849. * it has an authority component, has a server-based authority with proper
  850. * user-information, host, and port components. Invoking this method also
  851. * ensures that if the authority could not be parsed in that way then an
  852. * appropriate diagnostic message can be issued based upon the exception
  853. * that is thrown. </p>
  854. *
  855. * @return A URI whose authority field has been parsed
  856. * as a server-based authority
  857. *
  858. * @throws URISyntaxException
  859. * If the authority component of this URI is defined
  860. * but cannot be parsed as a server-based authority
  861. * according to RFC 2396
  862. */
  863. public URI parseServerAuthority()
  864. throws URISyntaxException
  865. {
  866. // We could be clever and cache the error message and index from the
  867. // exception thrown during the original parse, but that would require
  868. // either more fields or a more-obscure representation.
  869. if ((host != null) || (authority == null))
  870. return this;
  871. defineString();
  872. new Parser(string).parse(true);
  873. return this;
  874. }
  875. /**
  876. * Normalizes this URI's path.
  877. *
  878. * <p> If this URI is opaque, or if its path is already in normal form,
  879. * then this URI is returned. Otherwise a new URI is constructed that is
  880. * identical to this URI except that its path is computed by normalizing
  881. * this URI's path in a manner consistent with <a
  882. * href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
  883. * section 5.2, step 6, sub-steps c through f; that is:
  884. * </p>
  885. *
  886. * <ol>
  887. *
  888. * <li><p> All <tt>"."</tt> segments are removed. </p></li>
  889. *
  890. * <li><p> If a <tt>".."</tt> segment is preceded by a non-<tt>".."</tt>
  891. * segment then both of these segments are removed. This step is
  892. * repeated until it is no longer applicable. </p></li>
  893. *
  894. * <li><p> If the path is relative, and if its first segment contains a
  895. * colon character (<tt>':'</tt>), then a <tt>"."</tt> segment is
  896. * prepended. This prevents a relative URI with a path such as
  897. * <tt>"a:b/c/d"</tt> from later being re-parsed as an opaque URI with a
  898. * scheme of <tt>"a"</tt> and a scheme-specific part of <tt>"b/c/d"</tt>.
  899. * <b><i>(Deviation from RFC 2396)</i></b> </p></li>
  900. *
  901. * </ol>
  902. *
  903. * <p> A normalized path will begin with one or more <tt>".."</tt> segments
  904. * if there were insufficient non-<tt>".."</tt> segments preceding them to
  905. * allow their removal. A normalized path will begin with a <tt>"."</tt>
  906. * segment if one was inserted by step 3 above. Otherwise, a normalized
  907. * path will not contain any <tt>"."</tt> or <tt>".."</tt> segments. </p>
  908. *
  909. * @return A URI equivalent to this URI,
  910. * but whose path is in normal form
  911. */
  912. public URI normalize() {
  913. return normalize(this);
  914. }
  915. /**
  916. * Resolves the given URI against this URI.
  917. *
  918. * <p> If the given URI is already absolute, or if this URI is opaque, then
  919. * the given URI is returned.
  920. *
  921. * <p><a name="resolve-frag"> If the given URI's fragment component is
  922. * defined, its path component is empty, and its scheme, authority, and
  923. * query components are undefined, then a URI with the given fragment but
  924. * with all other components equal to those of this URI is returned. This
  925. * allows a URI representing a standalone fragment reference, such as
  926. * <tt>"#foo"</tt>, to be usefully resolved against a base URI.
  927. *
  928. * <p> Otherwise this method constructs a new hierarchical URI in a manner
  929. * consistent with <a
  930. * href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
  931. * section 5.2; that is: </p>
  932. *
  933. * <ol>
  934. *
  935. * <li><p> A new URI is constructed with this URI's scheme and the given
  936. * URI's query and fragment components. </p></li>
  937. *
  938. * <li><p> If the given URI has an authority component then the new URI's
  939. * authority and path are taken from the given URI. </p></li>
  940. *
  941. * <li><p> Otherwise the new URI's authority component is copied from
  942. * this URI, and its path is computed as follows: </p></li>
  943. *
  944. * <ol type=a>
  945. *
  946. * <li><p> If the given URI's path is absolute then the new URI's path
  947. * is taken from the given URI. </p></li>
  948. *
  949. * <li><p> Otherwise the given URI's path is relative, and so the new
  950. * URI's path is computed by resolving the path of the given URI
  951. * against the path of this URI. This is done by concatenating all but
  952. * the last segment of this URI's path, if any, with the given URI's
  953. * path and then normalizing the result as if by invoking the {@link
  954. * #normalize() normalize} method. </p></li>
  955. *
  956. * </ol>
  957. *
  958. * </ol>
  959. *
  960. * <p> The result of this method is absolute if, and only if, either this
  961. * URI is absolute or the given URI is absolute. </p>
  962. *
  963. * @param uri The URI to be resolved against this URI
  964. * @return The resulting URI
  965. *
  966. * @throws NullPointerException
  967. * If <tt>uri</tt> is <tt>null</tt>
  968. */
  969. public URI resolve(URI uri) {
  970. return resolve(this, uri);
  971. }
  972. /**
  973. * Constructs a new URI by parsing the given string and then resolving it
  974. * against this URI.
  975. *
  976. * <p> This convenience method works as if invoking it were equivalent to
  977. * evaluating the expression <tt>{@link #resolve(java.net.URI)
  978. * resolve}(URI.{@link #create(String) create}(str))</tt>. </p>
  979. *
  980. * @param str The string to be parsed into a URI
  981. * @return The resulting URI
  982. *
  983. * @throws NullPointerException
  984. * If <tt>str</tt> is <tt>null</tt>
  985. *
  986. * @throws IllegalArgumentException
  987. * If the given string violates RFC 2396
  988. */
  989. public URI resolve(String str) {
  990. return resolve(URI.create(str));
  991. }
  992. /**
  993. * Relativizes the given URI against this URI.
  994. *
  995. * <p> The relativization of the given URI against this URI is computed as
  996. * follows: </p>
  997. *
  998. * <ol>
  999. *
  1000. * <li><p> If either this URI or the given URI are opaque, or if the
  1001. * scheme and authority components of the two URIs are not identical, or
  1002. * if the path of this URI is not a prefix of the path of the given URI,
  1003. * then the given URI is returned. </p></li>
  1004. *
  1005. * <li><p> Otherwise a new relative hierarchical URI is constructed with
  1006. * query and fragment components taken from the given URI and with a path
  1007. * component computed by removing this URI's path from the beginning of
  1008. * the given URI's path. </p></li>
  1009. *
  1010. * </ol>
  1011. *
  1012. * @param uri The URI to be relativized against this URI
  1013. * @return The resulting URI
  1014. *
  1015. * @throws NullPointerException
  1016. * If <tt>uri</tt> is <tt>null</tt>
  1017. */
  1018. public URI relativize(URI uri) {
  1019. return relativize(this, uri);
  1020. }
  1021. /**
  1022. * Constructs a URL from this URI.
  1023. *
  1024. * <p> This convenience method works as if invoking it were equivalent to
  1025. * evaluating the expression <tt>new URL(this.toString())</tt> after
  1026. * first checking that this URI is absolute. </p>
  1027. *
  1028. * @return A URL constructed from this URI
  1029. *
  1030. * @throws IllegalArgumentException
  1031. * If this URL is not absolute
  1032. *
  1033. * @throws MalformedURLException
  1034. * If a protocol handler for the URL could not be found,
  1035. * or if some other error occurred while constructing the URL
  1036. */
  1037. public URL toURL()
  1038. throws MalformedURLException {
  1039. if (!isAbsolute())
  1040. throw new IllegalArgumentException("URI is not absolute");
  1041. return new URL(toString());
  1042. }
  1043. // -- Component access methods --
  1044. /**
  1045. * Returns the scheme component of this URI.
  1046. *
  1047. * <p> The scheme component of a URI, if defined, only contains characters
  1048. * in the <i>alphanum</i> category and in the string <tt>"-.+"</tt>. A
  1049. * scheme always starts with an <i>alpha</i> character. </p>
  1050. *
  1051. * The scheme component of a URI cannot contain escaped octets, hence this
  1052. * method does not perform any decoding. </p>
  1053. *
  1054. * @return The scheme component of this URI,
  1055. * or <tt>null</tt> if the scheme is undefined
  1056. */
  1057. public String getScheme() {
  1058. return scheme;
  1059. }
  1060. /**
  1061. * Tells whether or not this URI is absolute.
  1062. *
  1063. * <p> A URI is absolute if, and only if, it has a scheme component. </p>
  1064. *
  1065. * @return <tt>true</tt> if, and only if, this URI is absolute
  1066. */
  1067. public boolean isAbsolute() {
  1068. return scheme != null;
  1069. }
  1070. /**
  1071. * Tells whether or not this URI is opaque.
  1072. *
  1073. * <p> A URI is opaque if, and only if, it is absolute and its
  1074. * scheme-specific part does not begin with a slash character ('/').
  1075. * An opaque URI has a scheme, a scheme-specific part, and possibly
  1076. * a fragment; all other components are undefined. </p>
  1077. *
  1078. * @return <tt>true</tt> if, and only if, this URI is opaque
  1079. */
  1080. public boolean isOpaque() {
  1081. return path == null;
  1082. }
  1083. /**
  1084. * Returns the raw scheme-specific part of this URI. The scheme-specific
  1085. * part is never undefined, though it may be empty.
  1086. *
  1087. * <p> The scheme-specific part of a URI only contains legal URI
  1088. * characters. </p>
  1089. *
  1090. * @return The raw scheme-specific part of this URI
  1091. * (never <tt>null</tt>)
  1092. */
  1093. public String getRawSchemeSpecificPart() {
  1094. defineSchemeSpecificPart();
  1095. return schemeSpecificPart;
  1096. }
  1097. /**
  1098. * Returns the decoded scheme-specific part of this URI.
  1099. *
  1100. * <p> The string returned by this method is equal to that returned by the
  1101. * {@link #getRawSchemeSpecificPart() getRawSchemeSpecificPart} method
  1102. * except that all sequences of escaped octets are <a
  1103. * href="#decode">decoded</a>. </p>
  1104. *
  1105. * @return The decoded scheme-specific part of this URI
  1106. * (never <tt>null</tt>)
  1107. */
  1108. public String getSchemeSpecificPart() {
  1109. if (decodedSchemeSpecificPart == null)
  1110. decodedSchemeSpecificPart = decode(getRawSchemeSpecificPart());
  1111. return decodedSchemeSpecificPart;
  1112. }
  1113. /**
  1114. * Returns the raw authority component of this URI.
  1115. *
  1116. * <p> The authority component of a URI, if defined, only contains the
  1117. * commercial-at character (<tt>'@'</tt>) and characters in the
  1118. * <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and <i>other</i>
  1119. * categories. If the authority is server-based then it is further
  1120. * constrained to have valid user-information, host, and port
  1121. * components. </p>
  1122. *
  1123. * @return The raw authority component of this URI,
  1124. * or <tt>null</tt> if the authority is undefined
  1125. */
  1126. public String getRawAuthority() {
  1127. return authority;
  1128. }
  1129. /**
  1130. * Returns the decoded authority component of this URI.
  1131. *
  1132. * <p> The string returned by this method is equal to that returned by the
  1133. * {@link #getRawAuthority() getRawAuthority} method except that all
  1134. * sequences of escaped octets are <a href="#decode">decoded</a>. </p>
  1135. *
  1136. * @return The decoded authority component of this URI,
  1137. * or <tt>null</tt> if the authority is undefined
  1138. */
  1139. public String getAuthority() {
  1140. if (decodedAuthority == null)
  1141. decodedAuthority = decode(authority);
  1142. return decodedAuthority;
  1143. }
  1144. /**
  1145. * Returns the raw user-information component of this URI.
  1146. *
  1147. * <p> The user-information component of a URI, if defined, only contains
  1148. * characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and
  1149. * <i>other</i> categories. </p>
  1150. *
  1151. * @return The raw user-information component of this URI,
  1152. * or <tt>null</tt> if the user information is undefined
  1153. */
  1154. public String getRawUserInfo() {
  1155. return userInfo;
  1156. }
  1157. /**
  1158. * Returns the decoded user-information component of this URI.
  1159. *
  1160. * <p> The string returned by this method is equal to that returned by the
  1161. * {@link #getRawUserInfo() getRawUserInfo} method except that all
  1162. * sequences of escaped octets are <a href="#decode">decoded</a>. </p>
  1163. *
  1164. * @return The decoded user-information component of this URI,
  1165. * or <tt>null</tt> if the user information is undefined
  1166. */
  1167. public String getUserInfo() {
  1168. if ((decodedUserInfo == null) && (userInfo != null))
  1169. decodedUserInfo = decode(userInfo);
  1170. return decodedUserInfo;
  1171. }
  1172. /**
  1173. * Returns the host component of this URI.
  1174. *
  1175. * <p> The host component of a URI, if defined, will have one of the
  1176. * following forms: </p>
  1177. *
  1178. * <ul type=disc>
  1179. *
  1180. * <li><p> A domain name consisting of one or more <i>labels</i>
  1181. * separated by period characters (<tt>'.'</tt>), optionally followed by
  1182. * a period character. Each label consists of <i>alphanum</i> characters
  1183. * as well as hyphen characters (<tt>'-'</tt>), though hyphens never
  1184. * occur as the first or last characters in a label. The rightmost
  1185. * label of a domain name consisting of two or more labels, begins
  1186. * with an <i>alpha</i> character. </li></p>
  1187. *
  1188. * <li><p> A dotted-quad IPv4 address of the form
  1189. * <i>digit</i><tt>+.</tt><i>digit</i><tt>+.</tt><i>digit</i><tt>+.</tt><i>digit</i><tt>+</tt>,
  1190. * where no <i>digit</i> sequence is longer than three characters and no
  1191. * sequence has a value larger than 255. </p></li>
  1192. *
  1193. * <li><p> An IPv6 address enclosed in square brackets (<tt>'['</tt> and
  1194. * <tt>']'</tt>) and consisting of hexadecimal digits, colon characters
  1195. * (<tt>':'</tt>), and possibly an embedded IPv4 address. The full
  1196. * syntax of IPv6 addresses is specified in <a
  1197. * href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC 2373: IPv6
  1198. * Addressing Architecture</i></a>. </p></li>
  1199. *
  1200. * </ul>
  1201. *
  1202. * The host component of a URI cannot contain escaped octets, hence this
  1203. * method does not perform any decoding. </p>
  1204. *
  1205. * @return The host component of this URI,
  1206. * or <tt>null</tt> if the host is undefined
  1207. */
  1208. public String getHost() {
  1209. return host;
  1210. }
  1211. /**
  1212. * Returns the port number of this URI.
  1213. *
  1214. * <p> The port component of a URI, if defined, is a non-negative
  1215. * integer. </p>
  1216. *
  1217. * @return The port component of this URI,
  1218. * or <tt>-1</tt> if the port is undefined
  1219. */
  1220. public int getPort() {
  1221. return port;
  1222. }
  1223. /**
  1224. * Returns the raw path component of this URI.
  1225. *
  1226. * <p> The path component of a URI, if defined, only contains the slash
  1227. * character (<tt>'/'</tt>), the commercial-at character (<tt>'@'</tt>),
  1228. * and characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>,
  1229. * and <i>other</i> categories. </p>
  1230. *
  1231. * @return The path component of this URI,
  1232. * or <tt>null</tt> if the path is undefined
  1233. */
  1234. public String getRawPath() {
  1235. return path;
  1236. }
  1237. /**
  1238. * Returns the decoded path component of this URI.
  1239. *
  1240. * <p> The string returned by this method is equal to that returned by the
  1241. * {@link #getRawPath() getRawPath} method except that all sequences of
  1242. * escaped octets are <a href="#decode">decoded</a>. </p>
  1243. *
  1244. * @return The decoded path component of this URI,
  1245. * or <tt>null</tt> if the path is undefined
  1246. */
  1247. public String getPath() {
  1248. if ((decodedPath == null) && (path != null))
  1249. decodedPath = decode(path);
  1250. return decodedPath;
  1251. }
  1252. /**
  1253. * Returns the raw query component of this URI.
  1254. *
  1255. * <p> The query component of a URI, if defined, only contains legal URI
  1256. * characters. </p>
  1257. *
  1258. * @return The raw query component of this URI,
  1259. * or <tt>null</tt> if the query is undefined
  1260. */
  1261. public String getRawQuery() {
  1262. return query;
  1263. }
  1264. /**
  1265. * Returns the decoded query component of this URI.
  1266. *
  1267. * <p> The string returned by this method is equal to that returned by the