This class encapsulates an HTML page. If Mechanize finds a content type of ‘text/html’, this class will be instantiated and returned.
require 'rubygems' require 'mechanize' agent = Mechanize.new agent.get('http://google.com/').class #=> Mechanize::Page
pretty_inspect | -> | inspect |
mech | [RW] |
# File lib/mechanize/page.rb, line 26 26: def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil) 27: @encoding = nil 28: 29: method = response.respond_to?(:each_header) ? :each_header : :each 30: response.send(method) do |header,v| 31: next unless v =~ /charset/i 32: encoding = v[/charset=([^; ]+)/, 1] 33: @encoding = encoding unless encoding == 'none' 34: end 35: 36: # Force the encoding to be 8BIT so we can perform regular expressions. 37: # We'll set it to the detected encoding later 38: body.force_encoding('ASCII-8BIT') if body && body.respond_to?(:force_encoding) 39: 40: @encoding ||= Util.detect_charset(body) 41: 42: super(uri, response, body, code) 43: @mech ||= mech 44: 45: @encoding = nil if html_body =~ /<meta[^>]*charset[^>]*>/i 46: 47: raise Mechanize::ContentTypeError.new(response['content-type']) unless 48: response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i 49: @parser = @links = @forms = @meta = @bases = @frames = @iframes = nil 50: end
Return a list of all base tags
# File lib/mechanize/page.rb, line 243 243: def bases 244: @bases ||= 245: search('base').map { |node| Base.new(node, @mech, self) } 246: end
Get the content type
# File lib/mechanize/page.rb, line 92 92: def content_type 93: response['content-type'] 94: end
# File lib/mechanize/page.rb, line 72 72: def encoding 73: parser.respond_to?(:encoding) ? parser.encoding : nil 74: end
# File lib/mechanize/page.rb, line 58 58: def encoding=(encoding) 59: @encoding = encoding 60: 61: if @parser 62: parser_encoding = @parser.encoding 63: if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase) 64: # lazy reinitialize the parser with the new encoding 65: @parser = nil 66: end 67: end 68: 69: encoding 70: end
Return a list of all form tags
# File lib/mechanize/page.rb, line 217 217: def forms 218: @forms ||= search('form').map do |html_form| 219: form = Form.new(html_form, @mech, self) 220: form.action ||= @uri.to_s 221: form 222: end 223: end
Return a list of all frame tags
# File lib/mechanize/page.rb, line 250 250: def frames 251: @frames ||= 252: search('frame').map { |node| Frame.new(node, @mech, self) } 253: end
Return a list of all iframe tags
# File lib/mechanize/page.rb, line 257 257: def iframes 258: @iframes ||= 259: search('iframe').map { |node| Frame.new(node, @mech, self) } 260: end
# File lib/mechanize/page.rb, line 269 269: def image_urls 270: @image_urls ||= images.map(&:url).uniq 271: end
Return a list of all img tags
# File lib/mechanize/page.rb, line 264 264: def images 265: @images ||= 266: search('img').map { |node| Image.new(node, self) } 267: end
Return a list of all label tags
# File lib/mechanize/page.rb, line 275 275: def labels 276: @labels ||= 277: search('label').map { |node| Label.new(node, self) } 278: end
# File lib/mechanize/page.rb, line 280 280: def labels_hash 281: unless @labels_hash 282: hash = {} 283: labels.each do |label| 284: hash[label.node['for']] = label if label.for 285: end 286: @labels_hash = hash 287: end 288: return @labels_hash 289: end
Return a list of all link and area tags
# File lib/mechanize/page.rb, line 207 207: def links 208: @links ||= %w{ a area }.map do |tag| 209: search(tag).map do |node| 210: Link.new(node, @mech, self) 211: end 212: end.flatten 213: end
Return a list of all meta tags
# File lib/mechanize/page.rb, line 227 227: def meta 228: @meta ||= search('head > meta').map do |node| 229: next unless node['http-equiv'] && node['content'] 230: (equiv, content) = node['http-equiv'], node['content'] 231: if equiv && equiv.downcase == 'refresh' 232: Meta.parse(content, uri) do |delay, href| 233: node['delay'] = delay 234: node['href'] = href 235: Meta.new(node, @mech, self) 236: end 237: end 238: end.compact 239: end
# File lib/mechanize/page.rb, line 76 76: def parser 77: return @parser if @parser 78: 79: if body && response 80: if mech.html_parser == Nokogiri::HTML 81: @parser = mech.html_parser.parse(html_body, nil, @encoding) 82: else 83: @parser = mech.html_parser.parse(html_body) 84: end 85: end 86: 87: @parser 88: end
# File lib/mechanize/page.rb, line 52 52: def title 53: @title ||= if parser && search('title').inner_text.length > 0 54: search('title').inner_text 55: end 56: end