blob: c922e55f54d5e0007ca39088d61bce50dbc99e14 [file] [log] [blame]
John Reckfb3017f2010-10-26 19:01:24 -07001/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Michael Kolb8233fac2010-10-26 16:08:53 -070016
John Reckfb3017f2010-10-26 19:01:24 -070017package com.android.browser;
18
Michael Kolb8233fac2010-10-26 16:08:53 -070019import android.net.Uri;
20import android.util.Patterns;
21import android.webkit.URLUtil;
22
John Reckfb3017f2010-10-26 19:01:24 -070023import java.util.regex.Matcher;
24import java.util.regex.Pattern;
25
Michael Kolb8233fac2010-10-26 16:08:53 -070026/**
27 * Utility methods for Url manipulation
28 */
John Reckfb3017f2010-10-26 19:01:24 -070029public class UrlUtils {
30
Michael Kolb8233fac2010-10-26 16:08:53 -070031 static final Pattern ACCEPTED_URI_SCHEMA = Pattern.compile(
32 "(?i)" + // switch on case insensitive matching
33 "(" + // begin group for schema
34 "(?:http|https|file):\\/\\/" +
Patrick Scottb92bbb42011-01-05 11:38:58 -050035 "|(?:inline|data|about|javascript):" +
Michael Kolb8233fac2010-10-26 16:08:53 -070036 ")" +
37 "(.*)" );
38
39 // Google search
40 private final static String QUICKSEARCH_G = "http://www.google.com/m?q=%s";
41 private final static String QUERY_PLACE_HOLDER = "%s";
42
John Reckfb3017f2010-10-26 19:01:24 -070043 // Regular expression which matches http://, followed by some stuff, followed by
44 // optionally a trailing slash, all matched as separate groups.
45 private static final Pattern STRIP_URL_PATTERN = Pattern.compile("^(http://)(.*?)(/$)?");
46
47 private UrlUtils() { /* cannot be instantiated */ }
48
49 /**
50 * Strips the provided url of preceding "http://" and any trailing "/". Does not
51 * strip "https://". If the provided string cannot be stripped, the original string
52 * is returned.
53 *
54 * TODO: Put this in TextUtils to be used by other packages doing something similar.
55 *
56 * @param url a url to strip, like "http://www.google.com/"
57 * @return a stripped url like "www.google.com", or the original string if it could
58 * not be stripped
59 */
John Reck2cb9dc22011-05-11 16:12:27 -070060 public static String stripUrl(String url) {
John Reckfb3017f2010-10-26 19:01:24 -070061 if (url == null) return null;
62 Matcher m = STRIP_URL_PATTERN.matcher(url);
63 if (m.matches() && m.groupCount() == 3) {
64 return m.group(2);
65 } else {
66 return url;
67 }
68 }
Michael Kolb8233fac2010-10-26 16:08:53 -070069
70 protected static String smartUrlFilter(Uri inUri) {
71 if (inUri != null) {
72 return smartUrlFilter(inUri.toString());
73 }
74 return null;
75 }
76
77 /**
78 * Attempts to determine whether user input is a URL or search
79 * terms. Anything with a space is passed to search.
80 *
81 * Converts to lowercase any mistakenly uppercased schema (i.e.,
82 * "Http://" converts to "http://"
83 *
84 * @return Original or modified URL
85 *
86 */
John Reck961d35d2011-06-23 09:45:54 -070087 public static String smartUrlFilter(String url) {
John Reck71e51422011-07-01 16:49:28 -070088 return smartUrlFilter(url, true);
89 }
Michael Kolb8233fac2010-10-26 16:08:53 -070090
John Reck71e51422011-07-01 16:49:28 -070091 /**
92 * Attempts to determine whether user input is a URL or search
93 * terms. Anything with a space is passed to search if canBeSearch is true.
94 *
95 * Converts to lowercase any mistakenly uppercased schema (i.e.,
96 * "Http://" converts to "http://"
97 *
98 * @param canBeSearch If true, will return a search url if it isn't a valid
99 * URL. If false, invalid URLs will return null
100 * @return Original or modified URL
101 *
102 */
103 public static String smartUrlFilter(String url, boolean canBeSearch) {
Michael Kolb8233fac2010-10-26 16:08:53 -0700104 String inUrl = url.trim();
105 boolean hasSpace = inUrl.indexOf(' ') != -1;
106
107 Matcher matcher = ACCEPTED_URI_SCHEMA.matcher(inUrl);
108 if (matcher.matches()) {
109 // force scheme to lowercase
110 String scheme = matcher.group(1);
111 String lcScheme = scheme.toLowerCase();
112 if (!lcScheme.equals(scheme)) {
113 inUrl = lcScheme + matcher.group(2);
114 }
John Reck71e51422011-07-01 16:49:28 -0700115 if (hasSpace && Patterns.WEB_URL.matcher(inUrl).matches()) {
Michael Kolb8233fac2010-10-26 16:08:53 -0700116 inUrl = inUrl.replace(" ", "%20");
117 }
118 return inUrl;
119 }
120 if (!hasSpace) {
121 if (Patterns.WEB_URL.matcher(inUrl).matches()) {
122 return URLUtil.guessUrl(inUrl);
123 }
124 }
John Reck71e51422011-07-01 16:49:28 -0700125 if (canBeSearch) {
126 return URLUtil.composeSearchUrl(inUrl,
127 QUICKSEARCH_G, QUERY_PLACE_HOLDER);
128 }
129 return null;
Michael Kolb8233fac2010-10-26 16:08:53 -0700130 }
131
132 /* package */ static String fixUrl(String inUrl) {
133 // FIXME: Converting the url to lower case
134 // duplicates functionality in smartUrlFilter().
135 // However, changing all current callers of fixUrl to
136 // call smartUrlFilter in addition may have unwanted
137 // consequences, and is deferred for now.
138 int colon = inUrl.indexOf(':');
139 boolean allLower = true;
140 for (int index = 0; index < colon; index++) {
141 char ch = inUrl.charAt(index);
142 if (!Character.isLetter(ch)) {
143 break;
144 }
145 allLower &= Character.isLowerCase(ch);
146 if (index == colon - 1 && !allLower) {
147 inUrl = inUrl.substring(0, colon).toLowerCase()
148 + inUrl.substring(colon);
149 }
150 }
151 if (inUrl.startsWith("http://") || inUrl.startsWith("https://"))
152 return inUrl;
153 if (inUrl.startsWith("http:") ||
154 inUrl.startsWith("https:")) {
155 if (inUrl.startsWith("http:/") || inUrl.startsWith("https:/")) {
156 inUrl = inUrl.replaceFirst("/", "//");
157 } else inUrl = inUrl.replaceFirst(":", "://");
158 }
159 return inUrl;
160 }
161
John Reck324d4402011-01-11 16:56:42 -0800162 // Returns the filtered URL. Cannot return null, but can return an empty string
163 /* package */ static String filteredUrl(String inUrl) {
164 if (inUrl == null) {
165 return "";
166 }
John Recke44e5622011-01-27 17:47:46 -0800167 if (inUrl.startsWith("content:")
168 || inUrl.startsWith("browser:")) {
John Reck324d4402011-01-11 16:56:42 -0800169 return "";
170 }
171 return inUrl;
172 }
173
John Reckfb3017f2010-10-26 19:01:24 -0700174}