blob: 681b24248373df9ad19f5c217a9321546f1046bc [file] [log] [blame]
John Reckfb3017f2010-10-26 19:01:24 -07001/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Michael Kolb8233fac2010-10-26 16:08:53 -070016
John Reckfb3017f2010-10-26 19:01:24 -070017package com.android.browser;
18
Michael Kolb8233fac2010-10-26 16:08:53 -070019import android.net.Uri;
20import android.util.Patterns;
21import android.webkit.URLUtil;
22
John Reckfb3017f2010-10-26 19:01:24 -070023import java.util.regex.Matcher;
24import java.util.regex.Pattern;
25
Michael Kolb8233fac2010-10-26 16:08:53 -070026/**
27 * Utility methods for Url manipulation
28 */
John Reckfb3017f2010-10-26 19:01:24 -070029public class UrlUtils {
30
Michael Kolb8233fac2010-10-26 16:08:53 -070031 static final Pattern ACCEPTED_URI_SCHEMA = Pattern.compile(
32 "(?i)" + // switch on case insensitive matching
33 "(" + // begin group for schema
34 "(?:http|https|file):\\/\\/" +
Patrick Scottb92bbb42011-01-05 11:38:58 -050035 "|(?:inline|data|about|javascript):" +
Michael Kolb8233fac2010-10-26 16:08:53 -070036 ")" +
37 "(.*)" );
38
39 // Google search
40 private final static String QUICKSEARCH_G = "http://www.google.com/m?q=%s";
41 private final static String QUERY_PLACE_HOLDER = "%s";
42
John Reck434e9f82011-08-10 18:16:52 -070043 // Regular expression to strip http://, optionally www., and optionally
44 // the trailing slash
45 private static final Pattern STRIP_URL_PATTERN =
46 Pattern.compile("^http://(?:www\\.)?(.*?)/?$");
John Reckfb3017f2010-10-26 19:01:24 -070047
48 private UrlUtils() { /* cannot be instantiated */ }
49
50 /**
John Reck434e9f82011-08-10 18:16:52 -070051 * Strips the provided url of preceding "http://", "www.", and any trailing "/". Does not
John Reckfb3017f2010-10-26 19:01:24 -070052 * strip "https://". If the provided string cannot be stripped, the original string
53 * is returned.
54 *
55 * TODO: Put this in TextUtils to be used by other packages doing something similar.
56 *
57 * @param url a url to strip, like "http://www.google.com/"
John Reck434e9f82011-08-10 18:16:52 -070058 * @return a stripped url like "google.com", or the original string if it could
John Reckfb3017f2010-10-26 19:01:24 -070059 * not be stripped
60 */
John Reck2cb9dc22011-05-11 16:12:27 -070061 public static String stripUrl(String url) {
John Reckfb3017f2010-10-26 19:01:24 -070062 if (url == null) return null;
63 Matcher m = STRIP_URL_PATTERN.matcher(url);
John Reck434e9f82011-08-10 18:16:52 -070064 if (m.matches()) {
65 return m.group(1);
John Reckfb3017f2010-10-26 19:01:24 -070066 } else {
67 return url;
68 }
69 }
Michael Kolb8233fac2010-10-26 16:08:53 -070070
71 protected static String smartUrlFilter(Uri inUri) {
72 if (inUri != null) {
73 return smartUrlFilter(inUri.toString());
74 }
75 return null;
76 }
77
78 /**
79 * Attempts to determine whether user input is a URL or search
80 * terms. Anything with a space is passed to search.
81 *
82 * Converts to lowercase any mistakenly uppercased schema (i.e.,
83 * "Http://" converts to "http://"
84 *
85 * @return Original or modified URL
86 *
87 */
John Reck961d35d2011-06-23 09:45:54 -070088 public static String smartUrlFilter(String url) {
John Reck71e51422011-07-01 16:49:28 -070089 return smartUrlFilter(url, true);
90 }
Michael Kolb8233fac2010-10-26 16:08:53 -070091
John Reck71e51422011-07-01 16:49:28 -070092 /**
93 * Attempts to determine whether user input is a URL or search
94 * terms. Anything with a space is passed to search if canBeSearch is true.
95 *
96 * Converts to lowercase any mistakenly uppercased schema (i.e.,
97 * "Http://" converts to "http://"
98 *
99 * @param canBeSearch If true, will return a search url if it isn't a valid
100 * URL. If false, invalid URLs will return null
101 * @return Original or modified URL
102 *
103 */
104 public static String smartUrlFilter(String url, boolean canBeSearch) {
Michael Kolb8233fac2010-10-26 16:08:53 -0700105 String inUrl = url.trim();
106 boolean hasSpace = inUrl.indexOf(' ') != -1;
107
108 Matcher matcher = ACCEPTED_URI_SCHEMA.matcher(inUrl);
109 if (matcher.matches()) {
110 // force scheme to lowercase
111 String scheme = matcher.group(1);
112 String lcScheme = scheme.toLowerCase();
113 if (!lcScheme.equals(scheme)) {
114 inUrl = lcScheme + matcher.group(2);
115 }
John Reck71e51422011-07-01 16:49:28 -0700116 if (hasSpace && Patterns.WEB_URL.matcher(inUrl).matches()) {
Michael Kolb8233fac2010-10-26 16:08:53 -0700117 inUrl = inUrl.replace(" ", "%20");
118 }
119 return inUrl;
120 }
121 if (!hasSpace) {
122 if (Patterns.WEB_URL.matcher(inUrl).matches()) {
123 return URLUtil.guessUrl(inUrl);
124 }
125 }
John Reck71e51422011-07-01 16:49:28 -0700126 if (canBeSearch) {
127 return URLUtil.composeSearchUrl(inUrl,
128 QUICKSEARCH_G, QUERY_PLACE_HOLDER);
129 }
130 return null;
Michael Kolb8233fac2010-10-26 16:08:53 -0700131 }
132
133 /* package */ static String fixUrl(String inUrl) {
134 // FIXME: Converting the url to lower case
135 // duplicates functionality in smartUrlFilter().
136 // However, changing all current callers of fixUrl to
137 // call smartUrlFilter in addition may have unwanted
138 // consequences, and is deferred for now.
139 int colon = inUrl.indexOf(':');
140 boolean allLower = true;
141 for (int index = 0; index < colon; index++) {
142 char ch = inUrl.charAt(index);
143 if (!Character.isLetter(ch)) {
144 break;
145 }
146 allLower &= Character.isLowerCase(ch);
147 if (index == colon - 1 && !allLower) {
148 inUrl = inUrl.substring(0, colon).toLowerCase()
149 + inUrl.substring(colon);
150 }
151 }
152 if (inUrl.startsWith("http://") || inUrl.startsWith("https://"))
153 return inUrl;
154 if (inUrl.startsWith("http:") ||
155 inUrl.startsWith("https:")) {
156 if (inUrl.startsWith("http:/") || inUrl.startsWith("https:/")) {
157 inUrl = inUrl.replaceFirst("/", "//");
158 } else inUrl = inUrl.replaceFirst(":", "://");
159 }
160 return inUrl;
161 }
162
John Reck324d4402011-01-11 16:56:42 -0800163 // Returns the filtered URL. Cannot return null, but can return an empty string
164 /* package */ static String filteredUrl(String inUrl) {
165 if (inUrl == null) {
166 return "";
167 }
John Recke44e5622011-01-27 17:47:46 -0800168 if (inUrl.startsWith("content:")
169 || inUrl.startsWith("browser:")) {
John Reck324d4402011-01-11 16:56:42 -0800170 return "";
171 }
172 return inUrl;
173 }
174
John Reckfb3017f2010-10-26 19:01:24 -0700175}