Module FeedTools::UriHelper
In: lib/feed_tools/helpers/uri_helper.rb
ActiveRecord::Base DatabaseFeedCache StandardError FeedAccessError FeedItem Feed URI Cloud Link Author Image Enclosure TextInput Category lib/feed_tools/feed_item.rb lib/feed_tools/feed.rb lib/feed_tools.rb lib/feed_tools/vendor/uri.rb lib/feed_tools/database_feed_cache.rb lib/feed_tools/feed_structures.rb FeedToolsHelper FeedItemHelper HtmlHelper FeedHelper DebugHelper XmlHelper UriHelper RetrievalHelper GenericHelper FEED_TOOLS_VERSION FeedTools dot/m_79_0.png

Generic url processing methods needed in numerous places throughout FeedTools

Methods

Public Class methods

Converts a url into a tag uri

[Source]

     # File lib/feed_tools/helpers/uri_helper.rb, line 176
176:     def self.build_tag_uri(url, date)
177:       unless url.kind_of? String
178:         raise ArgumentError, "Expected String, got #{url.class.name}"
179:       end
180:       unless date.kind_of? Time
181:         raise ArgumentError, "Expected Time, got #{date.class.name}"
182:       end
183:       tag_uri = normalize_url(url)
184:       unless FeedTools::UriHelper.is_uri?(tag_uri)
185:         raise ArgumentError, "Must supply a valid URL."
186:       end
187:       host = URI.parse(tag_uri).host
188:       tag_uri.gsub!(/^(http|ftp|file):\/*/, "")
189:       tag_uri.gsub!(/#/, "/")
190:       tag_uri = "tag:#{host},#{date.strftime('%Y-%m-%d')}:" +
191:         "#{tag_uri[(tag_uri.index(host) + host.size)..-1]}"
192:       return tag_uri
193:     end

Converts a url into a urn:uuid: uri

[Source]

     # File lib/feed_tools/helpers/uri_helper.rb, line 196
196:     def self.build_urn_uri(url)
197:       unless url.kind_of? String
198:         raise ArgumentError, "Expected String, got #{url.class.name}"
199:       end
200:       normalized_url = normalize_url(url)
201:       require 'uuidtools'
202:       return UUID.sha1_create(UUID_URL_NAMESPACE, normalized_url).to_uri.to_s
203:     end

Returns true if the idn module can be used.

[Source]

    # File lib/feed_tools/helpers/uri_helper.rb, line 33
33:     def self.idn_enabled?
34:       # This is an override variable to keep idn from being used even if it
35:       # is available.
36:       if FeedTools.configurations[:idn_enabled] == false
37:         return false
38:       end
39:       if @idn_enabled.nil? || @idn_enabled == false
40:         @idn_enabled = false
41:         begin
42:           require 'idn'
43:           if IDN::Idna.toASCII('http://www.詹姆斯.com/') ==
44:             "http://www.xn--8ws00zhy3a.com/"
45:             @idn_enabled = true
46:           else
47:             @idn_enabled = false
48:           end
49:         rescue LoadError
50:           # Tidy not installed, disable features that rely on tidy.
51:           @idn_enabled = false
52:         end
53:       end
54:       return @idn_enabled
55:     end

Returns true if the parameter appears to be a valid uri

[Source]

     # File lib/feed_tools/helpers/uri_helper.rb, line 206
206:     def self.is_uri?(url)
207:       return false if url.nil?
208:       begin
209:         uri = URI.parse(url)
210:         if uri.scheme.blank?
211:           return false
212:         end
213:       rescue URI::InvalidURIError
214:         return false
215:       end
216:       return true
217:     end

Attempts to ensures that the passed url is valid and sane. Accepts very, very ugly urls and makes every effort to figure out what it was supposed to be. Also translates from the feed: and rss: pseudo-protocols to the http: protocol.

[Source]

     # File lib/feed_tools/helpers/uri_helper.rb, line 61
 61:     def self.normalize_url(url)
 62:       if url.nil?
 63:         return nil
 64:       end
 65:       if !url.kind_of?(String)
 66:         url = url.to_s
 67:       end
 68:       if url.blank?
 69:         return ""
 70:       end
 71:       normalized_url = url.strip
 72: 
 73:       begin
 74:         normalized_url =
 75:           FeedTools::URI.convert_path(normalized_url.strip).normalize.to_s
 76:       rescue Exception
 77:       end
 78:       
 79:       begin
 80:         begin
 81:           normalized_url =
 82:             FeedTools::URI.parse(normalized_url.strip).normalize.to_s
 83:         rescue Exception
 84:           normalized_url = CGI.unescape(url.strip)
 85:         end
 86:       rescue Exception
 87:         normalized_url = url.strip
 88:       end
 89: 
 90:       # if a url begins with the '/' character, it only makes sense that they
 91:       # meant to be using a file:// url.  Fix it for them.
 92:       if normalized_url.length > 0 && normalized_url[0..0] == "/"
 93:         normalized_url = "file://" + normalized_url
 94:       end
 95: 
 96:       # if a url begins with a drive letter followed by a colon, we're looking at
 97:       # a file:// url.  Fix it for them.
 98:       if normalized_url.length > 0 &&
 99:           normalized_url.scan(/^[a-zA-Z]:[\\\/]/).size > 0
100:         normalized_url = "file:///" + normalized_url
101:       end
102: 
103:       # if a url begins with javascript:, it's quite possibly an attempt at
104:       # doing something malicious.  Let's keep that from getting anywhere,
105:       # shall we?
106:       if (normalized_url.downcase =~ /javascript:/) != nil
107:         return "#"
108:       end
109: 
110:       # deal with all of the many ugly possibilities involved in the rss:
111:       # and feed: pseudo-protocols (incidentally, whose crazy idea was this
112:       # mess?)
113:       normalized_url.gsub!(/^htp:\/*/i, "http://")
114:       normalized_url.gsub!(/^http:\/*(feed:\/*)?/i, "http://")
115:       normalized_url.gsub!(/^http:\/*(rss:\/*)?/i, "http://")
116:       normalized_url.gsub!(/^feed:\/*(http:\/*)?/i, "http://")
117:       normalized_url.gsub!(/^rss:\/*(http:\/*)?/i, "http://")
118:       normalized_url.gsub!(/^file:\/*/i, "file:///")
119:       normalized_url.gsub!(/^https:\/*/i, "https://")
120:       normalized_url.gsub!(/^mms:\/*/i, "http://")
121:       # fix (very) bad urls (usually of the user-entered sort)
122:       normalized_url.gsub!(/^http:\/*(http:\/*)*/i, "http://")
123:       normalized_url.gsub!(/^http:\/*$/i, "")
124: 
125:       if (normalized_url =~ /^file:/i) == 0
126:         # Adjust windows-style urls
127:         normalized_url.gsub!(/^file:\/\/\/([a-zA-Z])\|/i, 'file:///\1:')
128:         normalized_url.gsub!(/\\/, '/')
129:       else
130:         if FeedTools::URI.parse(normalized_url).scheme == nil &&
131:             normalized_url =~ /\./ &&
132:           normalized_url = "http://" + normalized_url
133:         end
134:         if normalized_url == "http://"
135:           return nil
136:         end
137:       end
138:       if normalized_url =~ /^https?:\/\/#/i
139:         normalized_url.gsub!(/^https?:\/\/#/i, "#")
140:       end
141:       if normalized_url =~ /^https?:\/\/\?/i
142:         normalized_url.gsub!(/^https?:\/\/\?/i, "?")
143:       end
144: 
145:       normalized_url =
146:         FeedTools::URI.parse(normalized_url.strip).normalize.to_s
147:       return normalized_url
148:     end

Resolves a relative uri

[Source]

     # File lib/feed_tools/helpers/uri_helper.rb, line 151
151:     def self.resolve_relative_uri(relative_uri, base_uri_sources=[])
152:       return relative_uri if base_uri_sources.blank?
153:       return nil if relative_uri.nil?
154:       begin
155:         # Massive HACK to get around file protocol URIs being used to
156:         # resolve relative URIs on feeds in the local file system.
157:         # Better to leave these URIs unresolved and hope some other
158:         # tool resolves them correctly.
159:         base_uri_sources.reject! do |base_uri|
160:           base_uri == nil ||
161:             FeedTools::URI.parse(base_uri).scheme == "file"
162:         end
163:         base_uri = FeedTools::URI.parse(
164:           FeedTools::XmlHelper.select_not_blank(base_uri_sources))
165:         resolved_uri = base_uri
166:         if relative_uri.to_s != ''
167:           resolved_uri = base_uri + relative_uri.to_s
168:         end
169:         return FeedTools::UriHelper.normalize_url(resolved_uri.to_s)
170:       rescue
171:         return relative_uri
172:       end
173:     end

[Validate]