Class | HTML5::XMLParser |
In: |
lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb
|
Parent: | HTMLParser |
liberal XML parser
# File lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb, line 22 22: def initialize(options = {}) 23: super options 24: @phases[:initial] = XmlRootPhase.new(self, @tree) 25: end
# File lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb, line 27 27: def normalize_token(token) 28: case token[:type] 29: when :StartTag, :EmptyTag 30: # We need to remove the duplicate attributes and convert attributes 31: # to a Hash so that [["x", "y"], ["x", "z"]] becomes {"x": "y"} 32: 33: token[:data] = Hash[*token[:data].reverse.flatten] 34: 35: # For EmptyTags, process both a Start and an End tag 36: if token[:type] == :EmptyTag 37: save = @tokenizer.content_model_flag 38: @phase.processStartTag(token[:name], token[:data]) 39: @tokenizer.content_model_flag = save 40: token[:data] = {} 41: token[:type] = :EndTag 42: end 43: 44: when :Characters 45: # un-escape RCDATA_ELEMENTS (e.g. style, script) 46: if @tokenizer.content_model_flag == :CDATA 47: token[:data] = token[:data]. 48: gsub('<','<').gsub('>','>').gsub('&','&') 49: end 50: 51: when :EndTag 52: if token[:data] 53: parse_error("attributes-in-end-tag") 54: end 55: 56: when :Comment 57: # Rescue CDATA from the comments 58: if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]" 59: token[:type] = :Characters 60: token[:data] = token[:data][7 ... -2] 61: end 62: end 63: 64: return token 65: end