Module FeedTools::RetrievalHelper
In: lib/feed_tools/helpers/retrieval_helper.rb
ActiveRecord::Base DatabaseFeedCache StandardError FeedAccessError FeedItem Feed URI Cloud Link Author Image Enclosure TextInput Category lib/feed_tools/feed_item.rb lib/feed_tools/feed.rb lib/feed_tools.rb lib/feed_tools/vendor/uri.rb lib/feed_tools/database_feed_cache.rb lib/feed_tools/feed_structures.rb FeedToolsHelper FeedItemHelper DebugHelper HtmlHelper FeedHelper XmlHelper RetrievalHelper UriHelper GenericHelper FEED_TOOLS_VERSION FeedTools dot/m_79_0.png

Methods for pulling remote data

Methods

Constants

ACCEPT_HEADER = "application/atom+xml,application/rdf+xml," + "application/rss+xml,application/x-netcdf,application/xml;" + "q=0.9,text/xml;q=0.2,*/*;q=0.1"   Stolen from the Universal Feed Parser

Public Class methods

Makes an HTTP GET request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.

[Source]

     # File lib/feed_tools/helpers/retrieval_helper.rb, line 250
250:     def self.http_get(url, options={}, &block)
251:       return FeedTools::RetrievalHelper.http_request(
252:         :get, url, options, &block)
253:     end

Makes an HTTP POST request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.

[Source]

     # File lib/feed_tools/helpers/retrieval_helper.rb, line 258
258:     def self.http_post(url, options={}, &block)
259:       return FeedTools::RetrievalHelper.http_request(
260:         :post, url, options, &block)
261:     end

Makes an HTTP request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.

[Source]

     # File lib/feed_tools/helpers/retrieval_helper.rb, line 43
 43:     def self.http_request(http_operation, url, options={}, &block)
 44:       response = nil
 45:       
 46:       options = {
 47:         :feed_object => nil,
 48:         :form_data => nil,
 49:         :request_headers => {},
 50:         :follow_redirects => true,
 51:         :redirect_limit => 10,
 52:         :response_chain => []
 53:       }.merge(options)
 54:       
 55:       if options[:redirect_limit] == 0
 56:         raise FeedAccessError, 'Redirect too deep'
 57:       end
 58:       
 59:       if options[:response_chain].blank? ||
 60:           !options[:response_chain].kind_of?(Array)
 61:         options[:response_chain] = []
 62:       end
 63:       
 64:       if !options[:request_headers].kind_of?(Hash)
 65:         options[:request_headers] = {}
 66:       end
 67:       if !options[:form_data].kind_of?(Hash)
 68:         options[:form_data] = nil
 69:       end
 70: 
 71:       if options[:request_headers].blank? && options[:feed_object] != nil
 72:         options[:request_headers] = {}
 73:         unless options[:feed_object].http_headers.nil?
 74:           unless options[:feed_object].http_headers['etag'].nil?
 75:             options[:request_headers]["If-None-Match"] =
 76:               options[:feed_object].http_headers['etag']
 77:           end
 78:           unless options[:feed_object].http_headers['last-modified'].nil?
 79:             options[:request_headers]["If-Modified-Since"] =
 80:               options[:feed_object].http_headers['last-modified']
 81:           end
 82:         end
 83:         unless options[:feed_object].configurations[:user_agent].nil?
 84:           options[:request_headers]["User-Agent"] =
 85:             options[:feed_object].configurations[:user_agent]
 86:         end
 87:       end
 88:       if options[:request_headers]["Accept"].nil?
 89:         options[:request_headers]["Accept"] =
 90:           FeedTools::RetrievalHelper::ACCEPT_HEADER
 91:       end
 92:       if options[:request_headers]["User-Agent"].nil?
 93:         options[:request_headers]["User-Agent"] =
 94:           FeedTools.configurations[:user_agent]
 95:       end
 96:       
 97:       uri = nil
 98:       begin
 99:         uri = URI.parse(url)
100:       rescue URI::InvalidURIError
101:         # Uh, maybe try to fix it?
102:         uri = URI.parse(FeedTools::UriHelper.normalize_url(url))
103:       end
104:       
105:       begin
106:         proxy_address = nil
107:         proxy_port = nil
108:         proxy_user = nil
109:         proxy_password = nil
110:         
111:         auth_user = nil
112:         auth_password = nil
113:         auth_scheme = nil
114:         
115:         if options[:feed_object] != nil
116:           proxy_address =
117:             options[:feed_object].configurations[:proxy_address] || nil
118:           proxy_port =
119:             options[:feed_object].configurations[:proxy_port].to_i || nil
120:           proxy_user =
121:             options[:feed_object].configurations[:proxy_user] || nil
122:           proxy_password =
123:             options[:feed_object].configurations[:proxy_password] || nil
124: 
125:           auth_user =
126:             options[:feed_object].configurations[:auth_user] || nil
127:           auth_password =
128:             options[:feed_object].configurations[:auth_password] || nil
129:           auth_scheme =
130:             options[:feed_object].configurations[:auth_scheme] || nil
131:         end        
132:         
133:         if (auth_user &&
134:             (auth_scheme == nil || auth_scheme.to_s.to_sym == :basic))
135:           options[:request_headers]["Authorization"] =
136:             "Basic " + [
137:               "#{auth_user}:#{auth_password}"
138:             ].pack('m').delete("\r\n")
139:         end
140:         
141:         # No need to check for nil
142:         http = Net::HTTP::Proxy(
143:           proxy_address, proxy_port, proxy_user, proxy_password).new(
144:             uri.host, (uri.port or 80))
145: 
146:         if options[:feed_object] != nil &&
147:             options[:feed_object].configurations[:http_timeout] != nil
148:           http.open_timeout = 
149:             options[:feed_object].configurations[:http_timeout].to_f
150:         elsif FeedTools.configurations[:http_timeout] != nil
151:           http.open_timeout = FeedTools.configurations[:http_timeout].to_f
152:         end
153:         if http.open_timeout != nil && http.open_timeout == 0
154:           http.open_timeout = nil
155:         end
156:         
157:         path = uri.path 
158:         path += ('?' + uri.query) if uri.query
159:         
160:         request_params = [path, options[:request_headers]]
161:         if http_operation == :post
162:           options[:form_data] = {} if options[:form_data].blank?
163:           request_params << options[:form_data]
164:         end
165:         Thread.pass
166:         response = http.send(http_operation, *request_params)
167:         Thread.pass
168:         
169:         case response
170:         when Net::HTTPSuccess
171:           if options[:feed_object] != nil
172:             # We've reached the final destination, process all previous
173:             # redirections, and see if we need to update the url.
174:             for redirected_response in options[:response_chain]
175:               if redirected_response.last.code.to_i == 301
176:                 # Reset the cache object or we may get duplicate entries
177: 
178:                 # TODO: verify this line is necessary!
179: #=============================================================================
180:                 options[:feed_object].cache_object = nil
181:                 
182:                 options[:feed_object].href =
183:                   redirected_response.last['location']
184:               else
185:                 # Jump out as soon as we hit anything that isn't a
186:                 # permanently moved redirection.
187:                 break
188:               end
189:             end
190:           end
191:         when Net::HTTPNotModified
192:           # Do nothing, we just don't want it processed as a redirection
193:         when Net::HTTPRedirection
194:           if response['location'].nil?
195:             raise FeedAccessError,
196:               "No location to redirect to supplied for " + response.code
197:           end
198:           options[:response_chain] << [url, response]
199: 
200:           redirected_location = response['location']
201:           redirected_location = FeedTools::UriHelper.resolve_relative_uri(
202:             redirected_location, [uri.to_s])
203:           
204:           if options[:response_chain].assoc(redirected_location) != nil
205:             raise FeedAccessError,
206:               "Redirection loop detected: #{redirected_location}"
207:           end
208:           
209:           # Let the block handle redirects
210:           follow_redirect = true
211:           if block != nil
212:             follow_redirect = block.call(redirected_location, response)
213:           end
214:           
215:           if follow_redirect
216:             response = FeedTools::RetrievalHelper.http_request(
217:               http_operation,
218:               redirected_location, 
219:               options.merge(
220:                 {:redirect_limit => (options[:redirect_limit] - 1)}),
221:               &block)
222:           end
223:         end
224:       rescue SocketError
225:         raise FeedAccessError, 'Socket error prevented feed retrieval'
226:       rescue Timeout::Error, Errno::ETIMEDOUT
227:         raise FeedAccessError, 'Timeout while attempting to retrieve feed'
228:       rescue Errno::ENETUNREACH
229:         raise FeedAccessError, 'Network was unreachable'
230:       rescue Errno::ECONNRESET
231:         raise FeedAccessError, 'Connection was reset by peer'
232:       end
233:       
234:       if response != nil
235:         class << response
236:           def response_chain
237:             return @response_chain
238:           end
239:         end
240:         response.instance_variable_set("@response_chain",
241:           options[:response_chain])
242:       end
243:       
244:       return response
245:     end

Public Instance methods

Makes an HTTP HEAD request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.

[Source]

     # File lib/feed_tools/helpers/retrieval_helper.rb, line 266
266:     def http_head(url, options={}, &block)
267:       return FeedTools::RetrievalHelper.http_request(
268:         :head, url, options, &block)
269:     end

[Validate]