merge in nyc-release history after reset to nyc-dev
diff --git a/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java b/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java
index 4ec595d..71deb6c 100644
--- a/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java
+++ b/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java
@@ -196,11 +196,20 @@
assertEquals(HttpUrl.parse("http://user@host/path"), HttpUrl.parse("http://user@host/path"));
}
+ /** Given multiple '@' characters, the last one is the delimiter. */
@Test public void authorityWithMultipleAtSigns() throws Exception {
- assertEquals(HttpUrl.parse("http://foo%40bar@baz/path"),
- HttpUrl.parse("http://foo@bar@baz/path"));
- assertEquals(HttpUrl.parse("http://foo:pass1%40bar%3Apass2@baz/path"),
- HttpUrl.parse("http://foo:pass1@bar:pass2@baz/path"));
+ HttpUrl httpUrl = HttpUrl.parse("http://foo@bar@baz/path");
+ assertEquals("foo@bar", httpUrl.username());
+ assertEquals("", httpUrl.password());
+ assertEquals(HttpUrl.parse("http://foo%40bar@baz/path"), httpUrl);
+ }
+
+ /** Given multiple ':' characters, the first one is the delimiter. */
+ @Test public void authorityWithMultipleColons() throws Exception {
+ HttpUrl httpUrl = HttpUrl.parse("http://foo:pass1@bar:pass2@baz/path");
+ assertEquals("foo", httpUrl.username());
+ assertEquals("pass1@bar:pass2", httpUrl.password());
+ assertEquals(HttpUrl.parse("http://foo:pass1%40bar%3Apass2@baz/path"), httpUrl);
}
@Test public void usernameAndPassword() throws Exception {
@@ -457,8 +466,40 @@
new UrlComponentEncodingTester()
.override(Encoding.IDENTITY, ' ', '"', '#', '<', '>', '?', '`')
.skipForUri('%', ' ', '"', '#', '<', '>', '\\', '^', '`', '{', '|', '}')
+ .identityForNonAscii()
.test(Component.FRAGMENT);
- // TODO(jwilson): don't percent-encode non-ASCII characters. (But do encode control characters!)
+ }
+
+ @Test public void fragmentNonAscii() throws Exception {
+ HttpUrl url = HttpUrl.parse("http://host/#Σ");
+ assertEquals("http://host/#Σ", url.toString());
+ assertEquals("Σ", url.fragment());
+ assertEquals("Σ", url.encodedFragment());
+ assertEquals("http://host/#Σ", url.uri().toString());
+ }
+
+ @Test public void fragmentNonAsciiThatOffendsJavaNetUri() throws Exception {
+ HttpUrl url = HttpUrl.parse("http://host/#\u0080");
+ assertEquals("http://host/#\u0080", url.toString());
+ assertEquals("\u0080", url.fragment());
+ assertEquals("\u0080", url.encodedFragment());
+ assertEquals(new URI("http://host/#"), url.uri()); // Control characters may be stripped!
+ }
+
+ @Test public void fragmentPercentEncodedNonAscii() throws Exception {
+ HttpUrl url = HttpUrl.parse("http://host/#%C2%80");
+ assertEquals("http://host/#%C2%80", url.toString());
+ assertEquals("\u0080", url.fragment());
+ assertEquals("%C2%80", url.encodedFragment());
+ assertEquals("http://host/#%C2%80", url.uri().toString());
+ }
+
+ @Test public void fragmentPercentEncodedPartialCodePoint() throws Exception {
+ HttpUrl url = HttpUrl.parse("http://host/#%80");
+ assertEquals("http://host/#%80", url.toString());
+ assertEquals("\ufffd", url.fragment()); // Unicode replacement character.
+ assertEquals("%80", url.encodedFragment());
+ assertEquals("http://host/#%80", url.uri().toString());
}
@Test public void relativePath() throws Exception {
@@ -928,14 +969,128 @@
assertEquals("http://host/?d=abc!@[]%5E%60%7B%7D%7C%5C", uri.toString());
}
- @Test public void toUriForbiddenCharacter() throws Exception {
- HttpUrl httpUrl = HttpUrl.parse("http://host/a[b");
- try {
- httpUrl.uri();
- fail();
- } catch (IllegalStateException expected) {
- assertEquals("not valid as a java.net.URI: http://host/a[b", expected.getMessage());
- }
+ @Test public void toUriWithUsernameNoPassword() throws Exception {
+ HttpUrl httpUrl = new HttpUrl.Builder()
+ .scheme("http")
+ .username("user")
+ .host("host")
+ .build();
+ assertEquals("http://user@host/", httpUrl.toString());
+ assertEquals("http://user@host/", httpUrl.uri().toString());
+ }
+
+ @Test public void toUriUsernameSpecialCharacters() throws Exception {
+ HttpUrl url = new HttpUrl.Builder()
+ .scheme("http")
+ .host("host")
+ .username("=[]:;\"~|?#@^/$%*")
+ .build();
+ assertEquals("http://%3D%5B%5D%3A%3B%22~%7C%3F%23%40%5E%2F$%25*@host/", url.toString());
+ assertEquals("http://%3D%5B%5D%3A%3B%22~%7C%3F%23%40%5E%2F$%25*@host/", url.uri().toString());
+ }
+
+ @Test public void toUriPasswordSpecialCharacters() throws Exception {
+ HttpUrl url = new HttpUrl.Builder()
+ .scheme("http")
+ .host("host")
+ .username("user")
+ .password("=[]:;\"~|?#@^/$%*")
+ .build();
+ assertEquals("http://user:%3D%5B%5D%3A%3B%22~%7C%3F%23%40%5E%2F$%25*@host/", url.toString());
+ assertEquals("http://user:%3D%5B%5D%3A%3B%22~%7C%3F%23%40%5E%2F$%25*@host/",
+ url.uri().toString());
+ }
+
+ @Test public void toUriPathSpecialCharacters() throws Exception {
+ HttpUrl url = new HttpUrl.Builder()
+ .scheme("http")
+ .host("host")
+ .addPathSegment("=[]:;\"~|?#@^/$%*")
+ .build();
+ assertEquals("http://host/=[]:;%22~%7C%3F%23@%5E%2F$%25*", url.toString());
+ assertEquals("http://host/=%5B%5D:;%22~%7C%3F%23@%5E%2F$%25*", url.uri().toString());
+ }
+
+ @Test public void toUriQueryParameterNameSpecialCharacters() throws Exception {
+ HttpUrl url = new HttpUrl.Builder()
+ .scheme("http")
+ .host("host")
+ .addQueryParameter("=[]:;\"~|?#@^/$%*", "a")
+ .build();
+ assertEquals("http://host/?%3D[]:;%22~|?%23@^/$%25*=a", url.toString());
+ assertEquals("http://host/?%3D[]:;%22~%7C?%23@%5E/$%25*=a", url.uri().toString());
+ }
+
+ @Test public void toUriQueryParameterValueSpecialCharacters() throws Exception {
+ HttpUrl url = new HttpUrl.Builder()
+ .scheme("http")
+ .host("host")
+ .addQueryParameter("a", "=[]:;\"~|?#@^/$%*")
+ .build();
+ assertEquals("http://host/?a=%3D[]:;%22~|?%23@^/$%25*", url.toString());
+ assertEquals("http://host/?a=%3D[]:;%22~%7C?%23@%5E/$%25*", url.uri().toString());
+ }
+
+ @Test public void toUriQueryValueSpecialCharacters() throws Exception {
+ HttpUrl url = new HttpUrl.Builder()
+ .scheme("http")
+ .host("host")
+ .query("=[]:;\"~|?#@^/$%*")
+ .build();
+ assertEquals("http://host/?=[]:;%22~|?%23@^/$%25*", url.toString());
+ assertEquals("http://host/?=[]:;%22~%7C?%23@%5E/$%25*", url.uri().toString());
+ }
+
+ @Test public void toUriFragmentSpecialCharacters() throws Exception {
+ HttpUrl url = new HttpUrl.Builder()
+ .scheme("http")
+ .host("host")
+ .fragment("=[]:;\"~|?#@^/$%*")
+ .build();
+ assertEquals("http://host/#=[]:;\"~|?#@^/$%25*", url.toString());
+ assertEquals("http://host/#=[]:;%22~%7C?%23@%5E/$%25*", url.uri().toString());
+ }
+
+ @Test public void toUriWithControlCharacters() throws Exception {
+ // Percent-encoded in the path.
+ assertEquals(new URI("http://host/a%00b"), HttpUrl.parse("http://host/a\u0000b").uri());
+ assertEquals(new URI("http://host/a%C2%80b"), HttpUrl.parse("http://host/a\u0080b").uri());
+ assertEquals(new URI("http://host/a%C2%9Fb"), HttpUrl.parse("http://host/a\u009fb").uri());
+ // Percent-encoded in the query.
+ assertEquals(new URI("http://host/?a%00b"), HttpUrl.parse("http://host/?a\u0000b").uri());
+ assertEquals(new URI("http://host/?a%C2%80b"), HttpUrl.parse("http://host/?a\u0080b").uri());
+ assertEquals(new URI("http://host/?a%C2%9Fb"), HttpUrl.parse("http://host/?a\u009fb").uri());
+ // Stripped from the fragment.
+ assertEquals(new URI("http://host/#a%00b"), HttpUrl.parse("http://host/#a\u0000b").uri());
+ assertEquals(new URI("http://host/#ab"), HttpUrl.parse("http://host/#a\u0080b").uri());
+ assertEquals(new URI("http://host/#ab"), HttpUrl.parse("http://host/#a\u009fb").uri());
+ }
+
+ @Test public void toUriWithSpaceCharacters() throws Exception {
+ // Percent-encoded in the path.
+ assertEquals(new URI("http://host/a%0Bb"), HttpUrl.parse("http://host/a\u000bb").uri());
+ assertEquals(new URI("http://host/a%20b"), HttpUrl.parse("http://host/a b").uri());
+ assertEquals(new URI("http://host/a%E2%80%89b"), HttpUrl.parse("http://host/a\u2009b").uri());
+ assertEquals(new URI("http://host/a%E3%80%80b"), HttpUrl.parse("http://host/a\u3000b").uri());
+ // Percent-encoded in the query.
+ assertEquals(new URI("http://host/?a%0Bb"), HttpUrl.parse("http://host/?a\u000bb").uri());
+ assertEquals(new URI("http://host/?a%20b"), HttpUrl.parse("http://host/?a b").uri());
+ assertEquals(new URI("http://host/?a%E2%80%89b"), HttpUrl.parse("http://host/?a\u2009b").uri());
+ assertEquals(new URI("http://host/?a%E3%80%80b"), HttpUrl.parse("http://host/?a\u3000b").uri());
+ // Stripped from the fragment.
+ assertEquals(new URI("http://host/#a%0Bb"), HttpUrl.parse("http://host/#a\u000bb").uri());
+ assertEquals(new URI("http://host/#a%20b"), HttpUrl.parse("http://host/#a b").uri());
+ assertEquals(new URI("http://host/#ab"), HttpUrl.parse("http://host/#a\u2009b").uri());
+ assertEquals(new URI("http://host/#ab"), HttpUrl.parse("http://host/#a\u3000b").uri());
+ }
+
+ @Test public void toUriWithNonHexPercentEscape() throws Exception {
+ assertEquals(new URI("http://host/%25xx"), HttpUrl.parse("http://host/%xx").uri());
+ }
+
+ @Test public void toUriWithTruncatedPercentEscape() throws Exception {
+ assertEquals(new URI("http://host/%25a"), HttpUrl.parse("http://host/%a").uri());
+ assertEquals(new URI("http://host/%25"), HttpUrl.parse("http://host/%").uri());
}
@Test public void fromJavaNetUrl() throws Exception {
@@ -1169,4 +1324,24 @@
assertEquals(urlString, url.newBuilder().build().toString());
assertEquals("http://%6d%6D:%6d%6D@host/%6d%6D?%6d%6D", url.resolve("").toString());
}
+
+ @Test public void clearFragment() throws Exception {
+ HttpUrl url = HttpUrl.parse("http://host/#fragment")
+ .newBuilder()
+ .fragment(null)
+ .build();
+ assertEquals("http://host/", url.toString());
+ assertEquals(null, url.fragment());
+ assertEquals(null, url.encodedFragment());
+ }
+
+ @Test public void clearEncodedFragment() throws Exception {
+ HttpUrl url = HttpUrl.parse("http://host/#fragment")
+ .newBuilder()
+ .encodedFragment(null)
+ .build();
+ assertEquals("http://host/", url.toString());
+ assertEquals(null, url.fragment());
+ assertEquals(null, url.encodedFragment());
+ }
}
diff --git a/okhttp-tests/src/test/java/com/squareup/okhttp/URLConnectionTest.java b/okhttp-tests/src/test/java/com/squareup/okhttp/URLConnectionTest.java
index 79a6cf2..59cbc54 100644
--- a/okhttp-tests/src/test/java/com/squareup/okhttp/URLConnectionTest.java
+++ b/okhttp-tests/src/test/java/com/squareup/okhttp/URLConnectionTest.java
@@ -2429,7 +2429,7 @@
}
@Test public void malformedUrlThrowsUnknownHostException() throws IOException {
- connection = client.open(new URL("http:///foo.html"));
+ connection = client.open(new URL("http://./foo.html"));
try {
connection.connect();
fail();
diff --git a/okhttp-tests/src/test/java/com/squareup/okhttp/UrlComponentEncodingTester.java b/okhttp-tests/src/test/java/com/squareup/okhttp/UrlComponentEncodingTester.java
index 199279f..22502eb 100644
--- a/okhttp-tests/src/test/java/com/squareup/okhttp/UrlComponentEncodingTester.java
+++ b/okhttp-tests/src/test/java/com/squareup/okhttp/UrlComponentEncodingTester.java
@@ -27,6 +27,10 @@
/** Tests how each code point is encoded and decoded in the context of each URL component. */
class UrlComponentEncodingTester {
+ private static final int UNICODE_2 = 0x07ff; // Arbitrary code point that's 2 bytes in UTF-8.
+ private static final int UNICODE_3 = 0xffff; // Arbitrary code point that's 3 bytes in UTF-8.
+ private static final int UNICODE_4 = 0x10ffff; // Arbitrary code point that's 4 bytes in UTF-8.
+
/**
* The default encode set for the ASCII range. The specific rules vary per-component: for example,
* '?' may be identity-encoded in a fragment, but must be percent-encoded in a path.
@@ -164,11 +168,14 @@
map.put((int) '}', Encoding.IDENTITY);
map.put((int) '~', Encoding.IDENTITY);
map.put( 0x7f, Encoding.PERCENT); // Delete
+ map.put( UNICODE_2, Encoding.PERCENT);
+ map.put( UNICODE_3, Encoding.PERCENT);
+ map.put( UNICODE_4, Encoding.PERCENT);
defaultEncodings = Collections.unmodifiableMap(map);
}
private final Map<Integer, Encoding> encodings;
- private final StringBuilder skipForUri = new StringBuilder();
+ private final StringBuilder uriEscapedCodePoints = new StringBuilder();
public UrlComponentEncodingTester() {
this.encodings = new LinkedHashMap<>(defaultEncodings);
@@ -181,12 +188,19 @@
return this;
}
+ public UrlComponentEncodingTester identityForNonAscii() {
+ encodings.put(UNICODE_2, Encoding.IDENTITY);
+ encodings.put(UNICODE_3, Encoding.IDENTITY);
+ encodings.put(UNICODE_4, Encoding.IDENTITY);
+ return this;
+ }
+
/**
* Configure a character to be skipped but only for conversion to and from {@code java.net.URI}.
* That class is more strict than the others.
*/
public UrlComponentEncodingTester skipForUri(int... codePoints) {
- skipForUri.append(new String(codePoints, 0, codePoints.length));
+ uriEscapedCodePoints.append(new String(codePoints, 0, codePoints.length));
return this;
}
@@ -202,9 +216,10 @@
testToUrl(codePoint, encoding, component);
testFromUrl(codePoint, encoding, component);
- if (skipForUri.indexOf(Encoding.IDENTITY.encode(codePoint)) == -1) {
- testToUri(codePoint, encoding, component);
- testFromUri(codePoint, encoding, component);
+ if (codePoint != '%') {
+ boolean uriEscaped = uriEscapedCodePoints.indexOf(
+ Encoding.IDENTITY.encode(codePoint)) != -1;
+ testUri(codePoint, encoding, component, uriEscaped);
}
}
return this;
@@ -261,21 +276,29 @@
}
}
- private void testToUri(int codePoint, Encoding encoding, Component component) {
+ private void testUri(
+ int codePoint, Encoding encoding, Component component, boolean uriEscaped) {
+ String string = new String(new int[] { codePoint }, 0, 1);
String encoded = encoding.encode(codePoint);
HttpUrl httpUrl = HttpUrl.parse(component.urlString(encoded));
URI uri = httpUrl.uri();
- if (!uri.toString().equals(uri.toString())) {
- fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding));
- }
- }
-
- private void testFromUri(int codePoint, Encoding encoding, Component component) {
- String encoded = encoding.encode(codePoint);
- HttpUrl httpUrl = HttpUrl.parse(component.urlString(encoded));
- HttpUrl toAndFromUri = HttpUrl.get(httpUrl.uri());
- if (!toAndFromUri.equals(httpUrl)) {
- fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding));
+ HttpUrl toAndFromUri = HttpUrl.get(uri);
+ if (uriEscaped) {
+ // The URI has more escaping than the HttpURL. Check that the decoded values still match.
+ if (uri.toString().equals(httpUrl.toString())) {
+ fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding));
+ }
+ if (!component.get(toAndFromUri).equals(string)) {
+ fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding));
+ }
+ } else {
+ // Check that the URI and HttpURL have the exact same escaping.
+ if (!toAndFromUri.equals(httpUrl)) {
+ fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding));
+ }
+ if (!uri.toString().equals(httpUrl.toString())) {
+ fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding));
+ }
}
}
@@ -358,10 +381,11 @@
return query.substring(1, query.length() - 1);
}
@Override public void set(HttpUrl.Builder builder, String value) {
- builder.query(value);
+ builder.query("a" + value + "z");
}
@Override public String get(HttpUrl url) {
- return url.query();
+ String query = url.query();
+ return query.substring(1, query.length() - 1);
}
},
FRAGMENT {
@@ -373,10 +397,11 @@
return fragment.substring(1, fragment.length() - 1);
}
@Override public void set(HttpUrl.Builder builder, String value) {
- builder.fragment(value);
+ builder.fragment("a" + value + "z");
}
@Override public String get(HttpUrl url) {
- return url.fragment();
+ String fragment = url.fragment();
+ return fragment.substring(1, fragment.length() - 1);
}
};
diff --git a/okhttp-tests/src/test/java/com/squareup/okhttp/WebPlatformUrlTest.java b/okhttp-tests/src/test/java/com/squareup/okhttp/WebPlatformUrlTest.java
index e45761c..2c619c2 100644
--- a/okhttp-tests/src/test/java/com/squareup/okhttp/WebPlatformUrlTest.java
+++ b/okhttp-tests/src/test/java/com/squareup/okhttp/WebPlatformUrlTest.java
@@ -57,8 +57,6 @@
"Parsing: <http://f:00000000000000/c> against <http://example.org/foo/bar>",
"Parsing: <http://f:\n/c> against <http://example.org/foo/bar>",
"Parsing: <http://f:999999/c> against <http://example.org/foo/bar>",
- "Parsing: <#β> against <http://example.org/foo/bar>",
- "Parsing: <http://www.google.com/foo?bar=baz# »> against <about:blank>",
"Parsing: <http://192.0x00A80001> against <about:blank>",
// This test fails on Java 7 but passes on Java 8. See HttpUrlTest.hostWithTrailingDot().
"Parsing: <http://%30%78%63%30%2e%30%32%35%30.01%2e> against <http://other.com/>",
diff --git a/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java b/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java
index 6f4b93c..96f6917 100644
--- a/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java
+++ b/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java
@@ -33,10 +33,10 @@
content.writeByte('&');
}
HttpUrl.canonicalize(content, name, 0, name.length(),
- HttpUrl.FORM_ENCODE_SET, false, true);
+ HttpUrl.FORM_ENCODE_SET, false, false, true, true);
content.writeByte('=');
HttpUrl.canonicalize(content, value, 0, value.length(),
- HttpUrl.FORM_ENCODE_SET, false, true);
+ HttpUrl.FORM_ENCODE_SET, false, false, true, true);
return this;
}
@@ -46,10 +46,10 @@
content.writeByte('&');
}
HttpUrl.canonicalize(content, name, 0, name.length(),
- HttpUrl.FORM_ENCODE_SET, true, true);
+ HttpUrl.FORM_ENCODE_SET, true, false, true, true);
content.writeByte('=');
HttpUrl.canonicalize(content, value, 0, value.length(),
- HttpUrl.FORM_ENCODE_SET, true, true);
+ HttpUrl.FORM_ENCODE_SET, true, false, true, true);
return this;
}
diff --git a/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java b/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java
index 0919b91..beabeca 100644
--- a/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java
+++ b/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java
@@ -258,11 +258,13 @@
static final String USERNAME_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#";
static final String PASSWORD_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#";
static final String PATH_SEGMENT_ENCODE_SET = " \"<>^`{}|/\\?#";
+ static final String PATH_SEGMENT_ENCODE_SET_URI = "[]";
static final String QUERY_ENCODE_SET = " \"'<>#";
static final String QUERY_COMPONENT_ENCODE_SET = " \"'<>#&=";
- static final String CONVERT_TO_URI_ENCODE_SET = "^`{}|\\";
+ static final String QUERY_COMPONENT_ENCODE_SET_URI = "\\^`{|}";
static final String FORM_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#&!$(),~";
static final String FRAGMENT_ENCODE_SET = "";
+ static final String FRAGMENT_ENCODE_SET_URI = " \"#<>\\^`{|}";
/** Either "http" or "https". */
private final String scheme;
@@ -325,17 +327,29 @@
}
/**
- * Attempt to convert this URL to a {@link URI java.net.URI}. This method throws an unchecked
- * {@link IllegalStateException} if the URL it holds isn't valid by URI's overly-stringent
- * standard. For example, URI rejects paths containing the '[' character. Consult that class for
- * the exact rules of what URLs are permitted.
+ * Returns this URL as a {@link URI java.net.URI}. Because {@code URI} is more strict than this
+ * class, the returned URI may be semantically different from this URL:
+ * <ul>
+ * <li>Characters forbidden by URI like {@code [} and {@code |} will be escaped.
+ * <li>Invalid percent-encoded sequences like {@code %xx} will be encoded like {@code %25xx}.
+ * <li>Whitespace and control characters in the fragment will be stripped.
+ * </ul>
+ *
+ * <p>These differences may have a significant consequence when the URI is interpretted by a
+ * webserver. For this reason the {@linkplain URI URI class} and this method should be avoided.
*/
public URI uri() {
+ String uri = newBuilder().reencodeForUri().toString();
try {
- String uriSafeUrl = canonicalize(url, CONVERT_TO_URI_ENCODE_SET, true, false);
- return new URI(uriSafeUrl);
+ return new URI(uri);
} catch (URISyntaxException e) {
- throw new IllegalStateException("not valid as a java.net.URI: " + url);
+ // Unlikely edge case: the URI has a forbidden character in the fragment. Strip it & retry.
+ try {
+ String stripped = uri.replaceAll("[\\u0000-\\u001F\\u007F-\\u009F\\p{javaWhitespace}]", "");
+ return URI.create(stripped);
+ } catch (Exception e1) {
+ throw new RuntimeException(e); // Unexpected!
+ }
}
}
@@ -587,12 +601,8 @@
result.encodedUsername = encodedUsername();
result.encodedPassword = encodedPassword();
result.host = host;
- // If we're set to a default port, unset it, in case of a scheme change.
- if (port == defaultPort(scheme)) {
- result.port = -1;
- } else {
- result.port = port;
- }
+ // If we're set to a default port, unset it in case of a scheme change.
+ result.port = port != defaultPort(scheme) ? port : -1;
result.encodedPathSegments.clear();
result.encodedPathSegments.addAll(encodedPathSegments());
result.encodedQuery(encodedQuery());
@@ -686,25 +696,27 @@
public Builder username(String username) {
if (username == null) throw new IllegalArgumentException("username == null");
- this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false);
+ this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, false, true);
return this;
}
public Builder encodedUsername(String encodedUsername) {
if (encodedUsername == null) throw new IllegalArgumentException("encodedUsername == null");
- this.encodedUsername = canonicalize(encodedUsername, USERNAME_ENCODE_SET, true, false);
+ this.encodedUsername = canonicalize(
+ encodedUsername, USERNAME_ENCODE_SET, true, false, false, true);
return this;
}
public Builder password(String password) {
if (password == null) throw new IllegalArgumentException("password == null");
- this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false);
+ this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, false, true);
return this;
}
public Builder encodedPassword(String encodedPassword) {
if (encodedPassword == null) throw new IllegalArgumentException("encodedPassword == null");
- this.encodedPassword = canonicalize(encodedPassword, PASSWORD_ENCODE_SET, true, false);
+ this.encodedPassword = canonicalize(
+ encodedPassword, PASSWORD_ENCODE_SET, true, false, false, true);
return this;
}
@@ -747,7 +759,7 @@
public Builder setPathSegment(int index, String pathSegment) {
if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null");
String canonicalPathSegment = canonicalize(
- pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false);
+ pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false, false, true);
if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) {
throw new IllegalArgumentException("unexpected path segment: " + pathSegment);
}
@@ -760,7 +772,7 @@
throw new IllegalArgumentException("encodedPathSegment == null");
}
String canonicalPathSegment = canonicalize(encodedPathSegment,
- 0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false);
+ 0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false, false, true);
encodedPathSegments.set(index, canonicalPathSegment);
if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) {
throw new IllegalArgumentException("unexpected path segment: " + encodedPathSegment);
@@ -787,14 +799,16 @@
public Builder query(String query) {
this.encodedQueryNamesAndValues = query != null
- ? queryStringToNamesAndValues(canonicalize(query, QUERY_ENCODE_SET, false, true))
+ ? queryStringToNamesAndValues(canonicalize(
+ query, QUERY_ENCODE_SET, false, false, true, true))
: null;
return this;
}
public Builder encodedQuery(String encodedQuery) {
this.encodedQueryNamesAndValues = encodedQuery != null
- ? queryStringToNamesAndValues(canonicalize(encodedQuery, QUERY_ENCODE_SET, true, true))
+ ? queryStringToNamesAndValues(
+ canonicalize(encodedQuery, QUERY_ENCODE_SET, true, false, true, true))
: null;
return this;
}
@@ -803,9 +817,10 @@
public Builder addQueryParameter(String name, String value) {
if (name == null) throw new IllegalArgumentException("name == null");
if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>();
- encodedQueryNamesAndValues.add(canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, true));
+ encodedQueryNamesAndValues.add(
+ canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true));
encodedQueryNamesAndValues.add(value != null
- ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, true)
+ ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, false, true, true)
: null);
return this;
}
@@ -815,9 +830,9 @@
if (encodedName == null) throw new IllegalArgumentException("encodedName == null");
if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>();
encodedQueryNamesAndValues.add(
- canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, true));
+ canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true));
encodedQueryNamesAndValues.add(encodedValue != null
- ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, true)
+ ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, false, true, true)
: null);
return this;
}
@@ -837,7 +852,8 @@
public Builder removeAllQueryParameters(String name) {
if (name == null) throw new IllegalArgumentException("name == null");
if (encodedQueryNamesAndValues == null) return this;
- String nameToRemove = canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, true);
+ String nameToRemove = canonicalize(
+ name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true);
removeAllCanonicalQueryParameters(nameToRemove);
return this;
}
@@ -846,7 +862,7 @@
if (encodedName == null) throw new IllegalArgumentException("encodedName == null");
if (encodedQueryNamesAndValues == null) return this;
removeAllCanonicalQueryParameters(
- canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, true));
+ canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true));
return this;
}
@@ -864,14 +880,42 @@
}
public Builder fragment(String fragment) {
- if (fragment == null) throw new IllegalArgumentException("fragment == null");
- this.encodedFragment = canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false);
+ this.encodedFragment = fragment != null
+ ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false, false)
+ : null;
return this;
}
public Builder encodedFragment(String encodedFragment) {
- if (encodedFragment == null) throw new IllegalArgumentException("encodedFragment == null");
- this.encodedFragment = canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false);
+ this.encodedFragment = encodedFragment != null
+ ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false, false)
+ : null;
+ return this;
+ }
+
+ /**
+ * Re-encodes the components of this URL so that it satisfies (obsolete) RFC 2396, which is
+ * particularly strict for certain components.
+ */
+ Builder reencodeForUri() {
+ for (int i = 0, size = encodedPathSegments.size(); i < size; i++) {
+ String pathSegment = encodedPathSegments.get(i);
+ encodedPathSegments.set(i,
+ canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, true, false, true));
+ }
+ if (encodedQueryNamesAndValues != null) {
+ for (int i = 0, size = encodedQueryNamesAndValues.size(); i < size; i++) {
+ String component = encodedQueryNamesAndValues.get(i);
+ if (component != null) {
+ encodedQueryNamesAndValues.set(i,
+ canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true, true));
+ }
+ }
+ }
+ if (encodedFragment != null) {
+ encodedFragment = canonicalize(
+ encodedFragment, FRAGMENT_ENCODE_SET_URI, true, true, false, false);
+ }
return this;
}
@@ -983,19 +1027,19 @@
int passwordColonOffset = delimiterOffset(
input, pos, componentDelimiterOffset, ":");
String canonicalUsername = canonicalize(
- input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false);
+ input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false, false, true);
this.encodedUsername = hasUsername
? this.encodedUsername + "%40" + canonicalUsername
: canonicalUsername;
if (passwordColonOffset != componentDelimiterOffset) {
hasPassword = true;
this.encodedPassword = canonicalize(input, passwordColonOffset + 1,
- componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false);
+ componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true);
}
hasUsername = true;
} else {
- this.encodedPassword = this.encodedPassword + "%40" + canonicalize(
- input, pos, componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false);
+ this.encodedPassword = this.encodedPassword + "%40" + canonicalize(input, pos,
+ componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true);
}
pos = componentDelimiterOffset + 1;
break;
@@ -1042,14 +1086,14 @@
if (pos < limit && input.charAt(pos) == '?') {
int queryDelimiterOffset = delimiterOffset(input, pos, limit, "#");
this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize(
- input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, true));
+ input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, false, true, true));
pos = queryDelimiterOffset;
}
// Fragment.
if (pos < limit && input.charAt(pos) == '#') {
this.encodedFragment = canonicalize(
- input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false);
+ input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false, false);
}
return ParseResult.SUCCESS;
@@ -1086,7 +1130,7 @@
private void push(String input, int pos, int limit, boolean addTrailingSlash,
boolean alreadyEncoded) {
String segment = canonicalize(
- input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false);
+ input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, false, true);
if (isDot(segment)) {
return; // Skip '.' path segments.
}
@@ -1371,8 +1415,6 @@
String result = IDN.toASCII(input).toLowerCase(Locale.US);
if (result.isEmpty()) return null;
- if (result == null) return null;
-
// Confirm that the IDN ToASCII result doesn't contain any illegal characters.
if (containsInvalidHostnameAsciiCodes(result)) {
return null;
@@ -1439,7 +1481,7 @@
private static int parsePort(String input, int pos, int limit) {
try {
// Canonicalize the port string to skip '\n' etc.
- String portString = canonicalize(input, pos, limit, "", false, false);
+ String portString = canonicalize(input, pos, limit, "", false, false, false, true);
int i = Integer.parseInt(portString);
if (i > 0 && i <= 65535) return i;
return -1;
@@ -1508,6 +1550,13 @@
}
}
+ static boolean percentEncoded(String encoded, int pos, int limit) {
+ return pos + 2 < limit
+ && encoded.charAt(pos) == '%'
+ && decodeHexDigit(encoded.charAt(pos + 1)) != -1
+ && decodeHexDigit(encoded.charAt(pos + 2)) != -1;
+ }
+
static int decodeHexDigit(char c) {
if (c >= '0' && c <= '9') return c - '0';
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
@@ -1527,22 +1576,26 @@
* </ul>
*
* @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'.
+ * @param strict true to encode '%' if it is not the prefix of a valid percent encoding.
* @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded
+ * @param asciiOnly true to encode all non-ASCII codepoints.
*/
static String canonicalize(String input, int pos, int limit, String encodeSet,
- boolean alreadyEncoded, boolean plusIsSpace) {
+ boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) {
int codePoint;
for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
codePoint = input.codePointAt(i);
if (codePoint < 0x20
- || codePoint >= 0x7f
+ || codePoint == 0x7f
+ || codePoint >= 0x80 && asciiOnly
|| encodeSet.indexOf(codePoint) != -1
- || (codePoint == '%' && !alreadyEncoded)
- || (codePoint == '+' && plusIsSpace)) {
+ || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))
+ || codePoint == '+' && plusIsSpace) {
// Slow path: the character at i requires encoding!
Buffer out = new Buffer();
out.writeUtf8(input, pos, i);
- canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, plusIsSpace);
+ canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, strict, plusIsSpace,
+ asciiOnly);
return out.readUtf8();
}
}
@@ -1551,8 +1604,8 @@
return input.substring(pos, limit);
}
- static void canonicalize(Buffer out, String input, int pos, int limit,
- String encodeSet, boolean alreadyEncoded, boolean plusIsSpace) {
+ static void canonicalize(Buffer out, String input, int pos, int limit, String encodeSet,
+ boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) {
Buffer utf8Buffer = null; // Lazily allocated.
int codePoint;
for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
@@ -1564,9 +1617,10 @@
// Encode '+' as '%2B' since we permit ' ' to be encoded as either '+' or '%20'.
out.writeUtf8(alreadyEncoded ? "+" : "%2B");
} else if (codePoint < 0x20
- || codePoint >= 0x7f
+ || codePoint == 0x7f
+ || codePoint >= 0x80 && asciiOnly
|| encodeSet.indexOf(codePoint) != -1
- || (codePoint == '%' && !alreadyEncoded)) {
+ || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))) {
// Percent encode this character.
if (utf8Buffer == null) {
utf8Buffer = new Buffer();
@@ -1585,9 +1639,9 @@
}
}
- static String canonicalize(
- String input, String encodeSet, boolean alreadyEncoded, boolean plusIsSpace) {
+ static String canonicalize(String input, String encodeSet, boolean alreadyEncoded, boolean strict,
+ boolean plusIsSpace, boolean asciiOnly) {
return canonicalize(
- input, 0, input.length(), encodeSet, alreadyEncoded, plusIsSpace);
+ input, 0, input.length(), encodeSet, alreadyEncoded, strict, plusIsSpace, asciiOnly);
}
}