Package us.codecraft.webmagic.utils
Class UrlUtils
- java.lang.Object
-
- us.codecraft.webmagic.utils.UrlUtils
-
public class UrlUtils extends java.lang.Object
url and html utils.- Since:
- 0.1.0
- Author:
- code4crafter@gmail.com
-
-
Constructor Summary
Constructors Constructor Description UrlUtils()
-
Method Summary
All Methods Static Methods Concrete Methods Deprecated Methods Modifier and Type Method Description static java.lang.String
canonicalizeUrl(java.lang.String url, java.lang.String refer)
canonicalizeUrl
Borrowed from Jsoup.static java.util.List<Request>
convertToRequests(java.util.Collection<java.lang.String> urls)
static java.util.List<java.lang.String>
convertToUrls(java.util.Collection<Request> requests)
static java.lang.String
encodeIllegalCharacterInUrl(java.lang.String url)
Deprecated.static java.lang.String
fixIllegalCharacterInUrl(java.lang.String url)
static java.lang.String
getCharset(java.lang.String contentType)
static java.lang.String
getDomain(java.lang.String url)
static java.lang.String
getHost(java.lang.String url)
static java.lang.String
removePort(java.lang.String domain)
static java.lang.String
removeProtocol(java.lang.String url)
-
-
-
Method Detail
-
canonicalizeUrl
public static java.lang.String canonicalizeUrl(java.lang.String url, java.lang.String refer)
canonicalizeUrl
Borrowed from Jsoup.- Parameters:
url
- urlrefer
- refer- Returns:
- canonicalizeUrl
-
encodeIllegalCharacterInUrl
public static java.lang.String encodeIllegalCharacterInUrl(java.lang.String url)
Deprecated.- Parameters:
url
- url- Returns:
- new url
-
fixIllegalCharacterInUrl
public static java.lang.String fixIllegalCharacterInUrl(java.lang.String url)
-
getHost
public static java.lang.String getHost(java.lang.String url)
-
removeProtocol
public static java.lang.String removeProtocol(java.lang.String url)
-
getDomain
public static java.lang.String getDomain(java.lang.String url)
-
removePort
public static java.lang.String removePort(java.lang.String domain)
-
convertToRequests
public static java.util.List<Request> convertToRequests(java.util.Collection<java.lang.String> urls)
-
convertToUrls
public static java.util.List<java.lang.String> convertToUrls(java.util.Collection<Request> requests)
-
getCharset
public static java.lang.String getCharset(java.lang.String contentType)
-
-