John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2010 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 16 | |
Bijan Amirzada | 41242f2 | 2014-03-21 12:12:18 -0700 | [diff] [blame] | 17 | package com.android.browser; |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 18 | |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 19 | import android.net.Uri; |
| 20 | import android.util.Patterns; |
| 21 | import android.webkit.URLUtil; |
| 22 | |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 23 | import java.util.regex.Matcher; |
| 24 | import java.util.regex.Pattern; |
| 25 | |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 26 | /** |
| 27 | * Utility methods for Url manipulation |
| 28 | */ |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 29 | public class UrlUtils { |
| 30 | |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 31 | static final Pattern ACCEPTED_URI_SCHEMA = Pattern.compile( |
| 32 | "(?i)" + // switch on case insensitive matching |
| 33 | "(" + // begin group for schema |
| 34 | "(?:http|https|file):\\/\\/" + |
Patrick Scott | b92bbb4 | 2011-01-05 11:38:58 -0500 | [diff] [blame] | 35 | "|(?:inline|data|about|javascript):" + |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 36 | ")" + |
| 37 | "(.*)" ); |
| 38 | |
| 39 | // Google search |
| 40 | private final static String QUICKSEARCH_G = "http://www.google.com/m?q=%s"; |
| 41 | private final static String QUERY_PLACE_HOLDER = "%s"; |
| 42 | |
John Reck | fe5b94d | 2011-09-06 10:21:29 -0700 | [diff] [blame] | 43 | // Regular expression to strip http:// and optionally |
John Reck | 434e9f8 | 2011-08-10 18:16:52 -0700 | [diff] [blame] | 44 | // the trailing slash |
| 45 | private static final Pattern STRIP_URL_PATTERN = |
John Reck | fe5b94d | 2011-09-06 10:21:29 -0700 | [diff] [blame] | 46 | Pattern.compile("^http://(.*?)/?$"); |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 47 | |
| 48 | private UrlUtils() { /* cannot be instantiated */ } |
| 49 | |
| 50 | /** |
John Reck | fe5b94d | 2011-09-06 10:21:29 -0700 | [diff] [blame] | 51 | * Strips the provided url of preceding "http://" and any trailing "/". Does not |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 52 | * strip "https://". If the provided string cannot be stripped, the original string |
| 53 | * is returned. |
| 54 | * |
| 55 | * TODO: Put this in TextUtils to be used by other packages doing something similar. |
| 56 | * |
| 57 | * @param url a url to strip, like "http://www.google.com/" |
John Reck | fe5b94d | 2011-09-06 10:21:29 -0700 | [diff] [blame] | 58 | * @return a stripped url like "www.google.com", or the original string if it could |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 59 | * not be stripped |
| 60 | */ |
John Reck | 2cb9dc2 | 2011-05-11 16:12:27 -0700 | [diff] [blame] | 61 | public static String stripUrl(String url) { |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 62 | if (url == null) return null; |
| 63 | Matcher m = STRIP_URL_PATTERN.matcher(url); |
John Reck | 434e9f8 | 2011-08-10 18:16:52 -0700 | [diff] [blame] | 64 | if (m.matches()) { |
| 65 | return m.group(1); |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 66 | } else { |
| 67 | return url; |
| 68 | } |
| 69 | } |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 70 | |
| 71 | protected static String smartUrlFilter(Uri inUri) { |
| 72 | if (inUri != null) { |
| 73 | return smartUrlFilter(inUri.toString()); |
| 74 | } |
| 75 | return null; |
| 76 | } |
| 77 | |
| 78 | /** |
| 79 | * Attempts to determine whether user input is a URL or search |
| 80 | * terms. Anything with a space is passed to search. |
| 81 | * |
| 82 | * Converts to lowercase any mistakenly uppercased schema (i.e., |
| 83 | * "Http://" converts to "http://" |
| 84 | * |
| 85 | * @return Original or modified URL |
| 86 | * |
| 87 | */ |
John Reck | 961d35d | 2011-06-23 09:45:54 -0700 | [diff] [blame] | 88 | public static String smartUrlFilter(String url) { |
John Reck | 71e5142 | 2011-07-01 16:49:28 -0700 | [diff] [blame] | 89 | return smartUrlFilter(url, true); |
| 90 | } |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 91 | |
John Reck | 71e5142 | 2011-07-01 16:49:28 -0700 | [diff] [blame] | 92 | /** |
| 93 | * Attempts to determine whether user input is a URL or search |
| 94 | * terms. Anything with a space is passed to search if canBeSearch is true. |
| 95 | * |
| 96 | * Converts to lowercase any mistakenly uppercased schema (i.e., |
| 97 | * "Http://" converts to "http://" |
| 98 | * |
| 99 | * @param canBeSearch If true, will return a search url if it isn't a valid |
| 100 | * URL. If false, invalid URLs will return null |
| 101 | * @return Original or modified URL |
| 102 | * |
| 103 | */ |
| 104 | public static String smartUrlFilter(String url, boolean canBeSearch) { |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 105 | String inUrl = url.trim(); |
| 106 | boolean hasSpace = inUrl.indexOf(' ') != -1; |
| 107 | |
| 108 | Matcher matcher = ACCEPTED_URI_SCHEMA.matcher(inUrl); |
| 109 | if (matcher.matches()) { |
| 110 | // force scheme to lowercase |
| 111 | String scheme = matcher.group(1); |
| 112 | String lcScheme = scheme.toLowerCase(); |
| 113 | if (!lcScheme.equals(scheme)) { |
| 114 | inUrl = lcScheme + matcher.group(2); |
| 115 | } |
John Reck | 71e5142 | 2011-07-01 16:49:28 -0700 | [diff] [blame] | 116 | if (hasSpace && Patterns.WEB_URL.matcher(inUrl).matches()) { |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 117 | inUrl = inUrl.replace(" ", "%20"); |
| 118 | } |
| 119 | return inUrl; |
| 120 | } |
| 121 | if (!hasSpace) { |
| 122 | if (Patterns.WEB_URL.matcher(inUrl).matches()) { |
| 123 | return URLUtil.guessUrl(inUrl); |
| 124 | } |
| 125 | } |
John Reck | 71e5142 | 2011-07-01 16:49:28 -0700 | [diff] [blame] | 126 | if (canBeSearch) { |
| 127 | return URLUtil.composeSearchUrl(inUrl, |
| 128 | QUICKSEARCH_G, QUERY_PLACE_HOLDER); |
| 129 | } |
| 130 | return null; |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 131 | } |
| 132 | |
kaiyiz | 6e5b3e0 | 2013-08-19 20:02:01 +0800 | [diff] [blame] | 133 | public static String fixUrl(String inUrl) { |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame] | 134 | // FIXME: Converting the url to lower case |
| 135 | // duplicates functionality in smartUrlFilter(). |
| 136 | // However, changing all current callers of fixUrl to |
| 137 | // call smartUrlFilter in addition may have unwanted |
| 138 | // consequences, and is deferred for now. |
| 139 | int colon = inUrl.indexOf(':'); |
| 140 | boolean allLower = true; |
| 141 | for (int index = 0; index < colon; index++) { |
| 142 | char ch = inUrl.charAt(index); |
| 143 | if (!Character.isLetter(ch)) { |
| 144 | break; |
| 145 | } |
| 146 | allLower &= Character.isLowerCase(ch); |
| 147 | if (index == colon - 1 && !allLower) { |
| 148 | inUrl = inUrl.substring(0, colon).toLowerCase() |
| 149 | + inUrl.substring(colon); |
| 150 | } |
| 151 | } |
| 152 | if (inUrl.startsWith("http://") || inUrl.startsWith("https://")) |
| 153 | return inUrl; |
| 154 | if (inUrl.startsWith("http:") || |
| 155 | inUrl.startsWith("https:")) { |
| 156 | if (inUrl.startsWith("http:/") || inUrl.startsWith("https:/")) { |
| 157 | inUrl = inUrl.replaceFirst("/", "//"); |
| 158 | } else inUrl = inUrl.replaceFirst(":", "://"); |
| 159 | } |
| 160 | return inUrl; |
| 161 | } |
| 162 | |
John Reck | 324d440 | 2011-01-11 16:56:42 -0800 | [diff] [blame] | 163 | // Returns the filtered URL. Cannot return null, but can return an empty string |
| 164 | /* package */ static String filteredUrl(String inUrl) { |
| 165 | if (inUrl == null) { |
| 166 | return ""; |
| 167 | } |
John Reck | e44e562 | 2011-01-27 17:47:46 -0800 | [diff] [blame] | 168 | if (inUrl.startsWith("content:") |
| 169 | || inUrl.startsWith("browser:")) { |
John Reck | 324d440 | 2011-01-11 16:56:42 -0800 | [diff] [blame] | 170 | return ""; |
| 171 | } |
| 172 | return inUrl; |
| 173 | } |
| 174 | |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 175 | } |