John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2010 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 16 | |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 17 | package com.android.browser; |
| 18 | |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 19 | import android.net.Uri; |
| 20 | import android.util.Patterns; |
| 21 | import android.webkit.URLUtil; |
| 22 | |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 23 | import java.util.regex.Matcher; |
| 24 | import java.util.regex.Pattern; |
| 25 | |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 26 | /** |
| 27 | * Utility methods for Url manipulation |
| 28 | */ |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 29 | public class UrlUtils { |
| 30 | |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 31 | static final Pattern ACCEPTED_URI_SCHEMA = Pattern.compile( |
| 32 | "(?i)" + // switch on case insensitive matching |
| 33 | "(" + // begin group for schema |
| 34 | "(?:http|https|file):\\/\\/" + |
Patrick Scott | b92bbb4 | 2011-01-05 11:38:58 -0500 | [diff] [blame] | 35 | "|(?:inline|data|about|javascript):" + |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 36 | ")" + |
| 37 | "(.*)" ); |
| 38 | |
| 39 | // Google search |
| 40 | private final static String QUICKSEARCH_G = "http://www.google.com/m?q=%s"; |
| 41 | private final static String QUERY_PLACE_HOLDER = "%s"; |
| 42 | |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 43 | // Regular expression which matches http://, followed by some stuff, followed by |
| 44 | // optionally a trailing slash, all matched as separate groups. |
| 45 | private static final Pattern STRIP_URL_PATTERN = Pattern.compile("^(http://)(.*?)(/$)?"); |
| 46 | |
| 47 | private UrlUtils() { /* cannot be instantiated */ } |
| 48 | |
| 49 | /** |
| 50 | * Strips the provided url of preceding "http://" and any trailing "/". Does not |
| 51 | * strip "https://". If the provided string cannot be stripped, the original string |
| 52 | * is returned. |
| 53 | * |
| 54 | * TODO: Put this in TextUtils to be used by other packages doing something similar. |
| 55 | * |
| 56 | * @param url a url to strip, like "http://www.google.com/" |
| 57 | * @return a stripped url like "www.google.com", or the original string if it could |
| 58 | * not be stripped |
| 59 | */ |
John Reck | 2cb9dc2 | 2011-05-11 16:12:27 -0700 | [diff] [blame] | 60 | public static String stripUrl(String url) { |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 61 | if (url == null) return null; |
| 62 | Matcher m = STRIP_URL_PATTERN.matcher(url); |
| 63 | if (m.matches() && m.groupCount() == 3) { |
| 64 | return m.group(2); |
| 65 | } else { |
| 66 | return url; |
| 67 | } |
| 68 | } |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 69 | |
| 70 | protected static String smartUrlFilter(Uri inUri) { |
| 71 | if (inUri != null) { |
| 72 | return smartUrlFilter(inUri.toString()); |
| 73 | } |
| 74 | return null; |
| 75 | } |
| 76 | |
| 77 | /** |
| 78 | * Attempts to determine whether user input is a URL or search |
| 79 | * terms. Anything with a space is passed to search. |
| 80 | * |
| 81 | * Converts to lowercase any mistakenly uppercased schema (i.e., |
| 82 | * "Http://" converts to "http://" |
| 83 | * |
| 84 | * @return Original or modified URL |
| 85 | * |
| 86 | */ |
John Reck | 961d35d | 2011-06-23 09:45:54 -0700 | [diff] [blame] | 87 | public static String smartUrlFilter(String url) { |
John Reck | 71e5142 | 2011-07-01 16:49:28 -0700 | [diff] [blame] | 88 | return smartUrlFilter(url, true); |
| 89 | } |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 90 | |
John Reck | 71e5142 | 2011-07-01 16:49:28 -0700 | [diff] [blame] | 91 | /** |
| 92 | * Attempts to determine whether user input is a URL or search |
| 93 | * terms. Anything with a space is passed to search if canBeSearch is true. |
| 94 | * |
| 95 | * Converts to lowercase any mistakenly uppercased schema (i.e., |
| 96 | * "Http://" converts to "http://" |
| 97 | * |
| 98 | * @param canBeSearch If true, will return a search url if it isn't a valid |
| 99 | * URL. If false, invalid URLs will return null |
| 100 | * @return Original or modified URL |
| 101 | * |
| 102 | */ |
| 103 | public static String smartUrlFilter(String url, boolean canBeSearch) { |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 104 | String inUrl = url.trim(); |
| 105 | boolean hasSpace = inUrl.indexOf(' ') != -1; |
| 106 | |
| 107 | Matcher matcher = ACCEPTED_URI_SCHEMA.matcher(inUrl); |
| 108 | if (matcher.matches()) { |
| 109 | // force scheme to lowercase |
| 110 | String scheme = matcher.group(1); |
| 111 | String lcScheme = scheme.toLowerCase(); |
| 112 | if (!lcScheme.equals(scheme)) { |
| 113 | inUrl = lcScheme + matcher.group(2); |
| 114 | } |
John Reck | 71e5142 | 2011-07-01 16:49:28 -0700 | [diff] [blame] | 115 | if (hasSpace && Patterns.WEB_URL.matcher(inUrl).matches()) { |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 116 | inUrl = inUrl.replace(" ", "%20"); |
| 117 | } |
| 118 | return inUrl; |
| 119 | } |
| 120 | if (!hasSpace) { |
| 121 | if (Patterns.WEB_URL.matcher(inUrl).matches()) { |
| 122 | return URLUtil.guessUrl(inUrl); |
| 123 | } |
| 124 | } |
John Reck | 71e5142 | 2011-07-01 16:49:28 -0700 | [diff] [blame] | 125 | if (canBeSearch) { |
| 126 | return URLUtil.composeSearchUrl(inUrl, |
| 127 | QUICKSEARCH_G, QUERY_PLACE_HOLDER); |
| 128 | } |
| 129 | return null; |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 130 | } |
| 131 | |
| 132 | /* package */ static String fixUrl(String inUrl) { |
| 133 | // FIXME: Converting the url to lower case |
| 134 | // duplicates functionality in smartUrlFilter(). |
| 135 | // However, changing all current callers of fixUrl to |
| 136 | // call smartUrlFilter in addition may have unwanted |
| 137 | // consequences, and is deferred for now. |
| 138 | int colon = inUrl.indexOf(':'); |
| 139 | boolean allLower = true; |
| 140 | for (int index = 0; index < colon; index++) { |
| 141 | char ch = inUrl.charAt(index); |
| 142 | if (!Character.isLetter(ch)) { |
| 143 | break; |
| 144 | } |
| 145 | allLower &= Character.isLowerCase(ch); |
| 146 | if (index == colon - 1 && !allLower) { |
| 147 | inUrl = inUrl.substring(0, colon).toLowerCase() |
| 148 | + inUrl.substring(colon); |
| 149 | } |
| 150 | } |
| 151 | if (inUrl.startsWith("http://") || inUrl.startsWith("https://")) |
| 152 | return inUrl; |
| 153 | if (inUrl.startsWith("http:") || |
| 154 | inUrl.startsWith("https:")) { |
| 155 | if (inUrl.startsWith("http:/") || inUrl.startsWith("https:/")) { |
| 156 | inUrl = inUrl.replaceFirst("/", "//"); |
| 157 | } else inUrl = inUrl.replaceFirst(":", "://"); |
| 158 | } |
| 159 | return inUrl; |
| 160 | } |
| 161 | |
John Reck | 324d440 | 2011-01-11 16:56:42 -0800 | [diff] [blame] | 162 | // Returns the filtered URL. Cannot return null, but can return an empty string |
| 163 | /* package */ static String filteredUrl(String inUrl) { |
| 164 | if (inUrl == null) { |
| 165 | return ""; |
| 166 | } |
John Reck | e44e562 | 2011-01-27 17:47:46 -0800 | [diff] [blame] | 167 | if (inUrl.startsWith("content:") |
| 168 | || inUrl.startsWith("browser:")) { |
John Reck | 324d440 | 2011-01-11 16:56:42 -0800 | [diff] [blame] | 169 | return ""; |
| 170 | } |
| 171 | return inUrl; |
| 172 | } |
| 173 | |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 174 | } |