John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2010 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame^] | 16 | |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 17 | package com.android.browser; |
| 18 | |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame^] | 19 | import android.net.Uri; |
| 20 | import android.util.Patterns; |
| 21 | import android.webkit.URLUtil; |
| 22 | |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 23 | import java.util.regex.Matcher; |
| 24 | import java.util.regex.Pattern; |
| 25 | |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame^] | 26 | /** |
| 27 | * Utility methods for Url manipulation |
| 28 | */ |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 29 | public class UrlUtils { |
| 30 | |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame^] | 31 | static final Pattern ACCEPTED_URI_SCHEMA = Pattern.compile( |
| 32 | "(?i)" + // switch on case insensitive matching |
| 33 | "(" + // begin group for schema |
| 34 | "(?:http|https|file):\\/\\/" + |
| 35 | "|(?:inline|data|about|content|javascript):" + |
| 36 | ")" + |
| 37 | "(.*)" ); |
| 38 | |
| 39 | // Google search |
| 40 | private final static String QUICKSEARCH_G = "http://www.google.com/m?q=%s"; |
| 41 | private final static String QUERY_PLACE_HOLDER = "%s"; |
| 42 | |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 43 | // Regular expression which matches http://, followed by some stuff, followed by |
| 44 | // optionally a trailing slash, all matched as separate groups. |
| 45 | private static final Pattern STRIP_URL_PATTERN = Pattern.compile("^(http://)(.*?)(/$)?"); |
| 46 | |
| 47 | private UrlUtils() { /* cannot be instantiated */ } |
| 48 | |
| 49 | /** |
| 50 | * Strips the provided url of preceding "http://" and any trailing "/". Does not |
| 51 | * strip "https://". If the provided string cannot be stripped, the original string |
| 52 | * is returned. |
| 53 | * |
| 54 | * TODO: Put this in TextUtils to be used by other packages doing something similar. |
| 55 | * |
| 56 | * @param url a url to strip, like "http://www.google.com/" |
| 57 | * @return a stripped url like "www.google.com", or the original string if it could |
| 58 | * not be stripped |
| 59 | */ |
| 60 | /* package */ static String stripUrl(String url) { |
| 61 | if (url == null) return null; |
| 62 | Matcher m = STRIP_URL_PATTERN.matcher(url); |
| 63 | if (m.matches() && m.groupCount() == 3) { |
| 64 | return m.group(2); |
| 65 | } else { |
| 66 | return url; |
| 67 | } |
| 68 | } |
Michael Kolb | 8233fac | 2010-10-26 16:08:53 -0700 | [diff] [blame^] | 69 | |
| 70 | protected static String smartUrlFilter(Uri inUri) { |
| 71 | if (inUri != null) { |
| 72 | return smartUrlFilter(inUri.toString()); |
| 73 | } |
| 74 | return null; |
| 75 | } |
| 76 | |
| 77 | /** |
| 78 | * Attempts to determine whether user input is a URL or search |
| 79 | * terms. Anything with a space is passed to search. |
| 80 | * |
| 81 | * Converts to lowercase any mistakenly uppercased schema (i.e., |
| 82 | * "Http://" converts to "http://" |
| 83 | * |
| 84 | * @return Original or modified URL |
| 85 | * |
| 86 | */ |
| 87 | protected static String smartUrlFilter(String url) { |
| 88 | |
| 89 | String inUrl = url.trim(); |
| 90 | boolean hasSpace = inUrl.indexOf(' ') != -1; |
| 91 | |
| 92 | Matcher matcher = ACCEPTED_URI_SCHEMA.matcher(inUrl); |
| 93 | if (matcher.matches()) { |
| 94 | // force scheme to lowercase |
| 95 | String scheme = matcher.group(1); |
| 96 | String lcScheme = scheme.toLowerCase(); |
| 97 | if (!lcScheme.equals(scheme)) { |
| 98 | inUrl = lcScheme + matcher.group(2); |
| 99 | } |
| 100 | if (hasSpace) { |
| 101 | inUrl = inUrl.replace(" ", "%20"); |
| 102 | } |
| 103 | return inUrl; |
| 104 | } |
| 105 | if (!hasSpace) { |
| 106 | if (Patterns.WEB_URL.matcher(inUrl).matches()) { |
| 107 | return URLUtil.guessUrl(inUrl); |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | // FIXME: Is this the correct place to add to searches? |
| 112 | // what if someone else calls this function? |
| 113 | |
| 114 | // Browser.addSearchUrl(mBrowser.getContentResolver(), inUrl); |
| 115 | return URLUtil.composeSearchUrl(inUrl, QUICKSEARCH_G, QUERY_PLACE_HOLDER); |
| 116 | } |
| 117 | |
| 118 | /* package */ static String fixUrl(String inUrl) { |
| 119 | // FIXME: Converting the url to lower case |
| 120 | // duplicates functionality in smartUrlFilter(). |
| 121 | // However, changing all current callers of fixUrl to |
| 122 | // call smartUrlFilter in addition may have unwanted |
| 123 | // consequences, and is deferred for now. |
| 124 | int colon = inUrl.indexOf(':'); |
| 125 | boolean allLower = true; |
| 126 | for (int index = 0; index < colon; index++) { |
| 127 | char ch = inUrl.charAt(index); |
| 128 | if (!Character.isLetter(ch)) { |
| 129 | break; |
| 130 | } |
| 131 | allLower &= Character.isLowerCase(ch); |
| 132 | if (index == colon - 1 && !allLower) { |
| 133 | inUrl = inUrl.substring(0, colon).toLowerCase() |
| 134 | + inUrl.substring(colon); |
| 135 | } |
| 136 | } |
| 137 | if (inUrl.startsWith("http://") || inUrl.startsWith("https://")) |
| 138 | return inUrl; |
| 139 | if (inUrl.startsWith("http:") || |
| 140 | inUrl.startsWith("https:")) { |
| 141 | if (inUrl.startsWith("http:/") || inUrl.startsWith("https:/")) { |
| 142 | inUrl = inUrl.replaceFirst("/", "//"); |
| 143 | } else inUrl = inUrl.replaceFirst(":", "://"); |
| 144 | } |
| 145 | return inUrl; |
| 146 | } |
| 147 | |
John Reck | fb3017f | 2010-10-26 19:01:24 -0700 | [diff] [blame] | 148 | } |