Module | FeedTools::UriHelper |
In: |
lib/feed_tools/helpers/uri_helper.rb
|
Generic url processing methods needed in numerous places throughout FeedTools
Converts a url into a tag uri
# File lib/feed_tools/helpers/uri_helper.rb, line 176 176: def self.build_tag_uri(url, date) 177: unless url.kind_of? String 178: raise ArgumentError, "Expected String, got #{url.class.name}" 179: end 180: unless date.kind_of? Time 181: raise ArgumentError, "Expected Time, got #{date.class.name}" 182: end 183: tag_uri = normalize_url(url) 184: unless FeedTools::UriHelper.is_uri?(tag_uri) 185: raise ArgumentError, "Must supply a valid URL." 186: end 187: host = URI.parse(tag_uri).host 188: tag_uri.gsub!(/^(http|ftp|file):\/*/, "") 189: tag_uri.gsub!(/#/, "/") 190: tag_uri = "tag:#{host},#{date.strftime('%Y-%m-%d')}:" + 191: "#{tag_uri[(tag_uri.index(host) + host.size)..-1]}" 192: return tag_uri 193: end
Converts a url into a urn:uuid: uri
# File lib/feed_tools/helpers/uri_helper.rb, line 196 196: def self.build_urn_uri(url) 197: unless url.kind_of? String 198: raise ArgumentError, "Expected String, got #{url.class.name}" 199: end 200: normalized_url = normalize_url(url) 201: require 'uuidtools' 202: return UUID.sha1_create(UUID_URL_NAMESPACE, normalized_url).to_uri.to_s 203: end
Returns true if the idn module can be used.
# File lib/feed_tools/helpers/uri_helper.rb, line 33 33: def self.idn_enabled? 34: # This is an override variable to keep idn from being used even if it 35: # is available. 36: if FeedTools.configurations[:idn_enabled] == false 37: return false 38: end 39: if @idn_enabled.nil? || @idn_enabled == false 40: @idn_enabled = false 41: begin 42: require 'idn' 43: if IDN::Idna.toASCII('http://www.詹姆斯.com/') == 44: "http://www.xn--8ws00zhy3a.com/" 45: @idn_enabled = true 46: else 47: @idn_enabled = false 48: end 49: rescue LoadError 50: # Tidy not installed, disable features that rely on tidy. 51: @idn_enabled = false 52: end 53: end 54: return @idn_enabled 55: end
Returns true if the parameter appears to be a valid uri
# File lib/feed_tools/helpers/uri_helper.rb, line 206 206: def self.is_uri?(url) 207: return false if url.nil? 208: begin 209: uri = URI.parse(url) 210: if uri.scheme.blank? 211: return false 212: end 213: rescue URI::InvalidURIError 214: return false 215: end 216: return true 217: end
Attempts to ensures that the passed url is valid and sane. Accepts very, very ugly urls and makes every effort to figure out what it was supposed to be. Also translates from the feed: and rss: pseudo-protocols to the http: protocol.
# File lib/feed_tools/helpers/uri_helper.rb, line 61 61: def self.normalize_url(url) 62: if url.nil? 63: return nil 64: end 65: if !url.kind_of?(String) 66: url = url.to_s 67: end 68: if url.blank? 69: return "" 70: end 71: normalized_url = url.strip 72: 73: begin 74: normalized_url = 75: FeedTools::URI.convert_path(normalized_url.strip).normalize.to_s 76: rescue Exception 77: end 78: 79: begin 80: begin 81: normalized_url = 82: FeedTools::URI.parse(normalized_url.strip).normalize.to_s 83: rescue Exception 84: normalized_url = CGI.unescape(url.strip) 85: end 86: rescue Exception 87: normalized_url = url.strip 88: end 89: 90: # if a url begins with the '/' character, it only makes sense that they 91: # meant to be using a file:// url. Fix it for them. 92: if normalized_url.length > 0 && normalized_url[0..0] == "/" 93: normalized_url = "file://" + normalized_url 94: end 95: 96: # if a url begins with a drive letter followed by a colon, we're looking at 97: # a file:// url. Fix it for them. 98: if normalized_url.length > 0 && 99: normalized_url.scan(/^[a-zA-Z]:[\\\/]/).size > 0 100: normalized_url = "file:///" + normalized_url 101: end 102: 103: # if a url begins with javascript:, it's quite possibly an attempt at 104: # doing something malicious. Let's keep that from getting anywhere, 105: # shall we? 106: if (normalized_url.downcase =~ /javascript:/) != nil 107: return "#" 108: end 109: 110: # deal with all of the many ugly possibilities involved in the rss: 111: # and feed: pseudo-protocols (incidentally, whose crazy idea was this 112: # mess?) 113: normalized_url.gsub!(/^htp:\/*/i, "http://") 114: normalized_url.gsub!(/^http:\/*(feed:\/*)?/i, "http://") 115: normalized_url.gsub!(/^http:\/*(rss:\/*)?/i, "http://") 116: normalized_url.gsub!(/^feed:\/*(http:\/*)?/i, "http://") 117: normalized_url.gsub!(/^rss:\/*(http:\/*)?/i, "http://") 118: normalized_url.gsub!(/^file:\/*/i, "file:///") 119: normalized_url.gsub!(/^https:\/*/i, "https://") 120: normalized_url.gsub!(/^mms:\/*/i, "http://") 121: # fix (very) bad urls (usually of the user-entered sort) 122: normalized_url.gsub!(/^http:\/*(http:\/*)*/i, "http://") 123: normalized_url.gsub!(/^http:\/*$/i, "") 124: 125: if (normalized_url =~ /^file:/i) == 0 126: # Adjust windows-style urls 127: normalized_url.gsub!(/^file:\/\/\/([a-zA-Z])\|/i, 'file:///\1:') 128: normalized_url.gsub!(/\\/, '/') 129: else 130: if FeedTools::URI.parse(normalized_url).scheme == nil && 131: normalized_url =~ /\./ && 132: normalized_url = "http://" + normalized_url 133: end 134: if normalized_url == "http://" 135: return nil 136: end 137: end 138: if normalized_url =~ /^https?:\/\/#/i 139: normalized_url.gsub!(/^https?:\/\/#/i, "#") 140: end 141: if normalized_url =~ /^https?:\/\/\?/i 142: normalized_url.gsub!(/^https?:\/\/\?/i, "?") 143: end 144: 145: normalized_url = 146: FeedTools::URI.parse(normalized_url.strip).normalize.to_s 147: return normalized_url 148: end
Resolves a relative uri
# File lib/feed_tools/helpers/uri_helper.rb, line 151 151: def self.resolve_relative_uri(relative_uri, base_uri_sources=[]) 152: return relative_uri if base_uri_sources.blank? 153: return nil if relative_uri.nil? 154: begin 155: # Massive HACK to get around file protocol URIs being used to 156: # resolve relative URIs on feeds in the local file system. 157: # Better to leave these URIs unresolved and hope some other 158: # tool resolves them correctly. 159: base_uri_sources.reject! do |base_uri| 160: base_uri == nil || 161: FeedTools::URI.parse(base_uri).scheme == "file" 162: end 163: base_uri = FeedTools::URI.parse( 164: FeedTools::XmlHelper.select_not_blank(base_uri_sources)) 165: resolved_uri = base_uri 166: if relative_uri.to_s != '' 167: resolved_uri = base_uri + relative_uri.to_s 168: end 169: return FeedTools::UriHelper.normalize_url(resolved_uri.to_s) 170: rescue 171: return relative_uri 172: end 173: end