Module HTML5::Sniffer
In: lib/feed_tools/vendor/html5/lib/html5/sniffer.rb
Phase XmlElementPhase InTablePhase RootElementPhase InHeadPhase AfterHeadPhase AfterFramesetPhase XmlRootPhase InitialPhase InTableBodyPhase InFramesetPhase InColumnGroupPhase InCaptionPhase TrailingEndPhase InSelectPhase BeforeHeadPhase InCellPhase InBodyPhase AfterBodyPhase InRowPhase Exception SerializeError EOF AssertionError ParseError HTMLSanitizer HTMLTokenizer XhmlRootPhase XMLParser XHTMLParser HTMLParser String EncodingBytes HTMLSerializer XHTMLSerializer TreeWalkers::Base NonRecursiveTreeWalker TreeWalker TreeWalker Base TreeWalker Element DocumentFragment Node CommentNode DocumentType TextNode Document Base::Node Node Node Base::TreeBuilder TreeBuilder TreeBuilder TreeBuilder Element DocumentFragment CommentNode DocumentType TextNode Document Element DocumentFragment CommentNode DocumentType TextNode Document Enumerable TestData Base OptionalTagFilter InjectMetaCharset WhitespaceFilter HTMLSanitizeFilter HTMLSanitizeModule SimpleDelegator HTMLInputStream EncodingParser ContentAttrParser Node TreeBuilder lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb lib/feed_tools/vendor/html5/lib/html5/constants.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb lib/feed_tools/vendor/html5/lib/html5/inputstream.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb Hpricot TokenConstructor lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb REXML lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb SimpleTree TreeWalkers HTMLSanitizeModule lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb Hpricot lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb REXML lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb Base lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb SimpleTree TreeBuilders lib/feed_tools/vendor/html5/tests/preamble.rb TestSupport Sniffer lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb lib/feed_tools/vendor/html5/lib/html5/filters/base.rb lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb Filters HTML5 dot/m_66_0.png

Methods

Public Instance methods

4.7.4

[Source]

    # File lib/feed_tools/vendor/html5/lib/html5/sniffer.rb, line 4
 4:   def html_or_feed str
 5:     s = str[0, 512] # steps 1, 2
 6:     pos = 0
 7: 
 8:     while pos < s.length
 9:       case s[pos]
10:       when 0x09, 0x20, 0x0A, 0x0D # tab, space, LF, CR
11:         pos += 1
12:       when  0x3C # "<"
13:         pos += 1
14:         if s[pos..pos+2] == "!--" # [0x21, 0x2D, 0x2D]
15:           pos += 3
16:           until s[pos..pos+2] == "-->" or pos >= s.length
17:             pos += 1
18:           end
19:           pos += 3
20:         elsif s[pos] == 0x21 # "!"
21:           pos += 1
22:           until s[pos] == 0x3E or pos >= s.length # ">"
23:             pos += 1 
24:           end
25:           pos += 1
26:         elsif s[pos] == 0x3F # "?"
27:           until s[pos..pos+1] == "?>" or pos >= s.length # [0x3F, 0x3E]
28:             pos +=  1
29:           end
30:           pos += 2
31:         elsif s[pos..pos+2] == "rss"   # [0x72, 0x73, 0x73]
32:           return "application/rss+xml"
33:         elsif s[pos..pos+3] == "feed"  # [0x66, 0x65, 0x65, 0x64]
34:           return "application/atom+xml"
35:         elsif s[pos..pos+6] == "rdf:RDF" # [0x72, 0x64, 0x66, 0x3A, 0x52, 0x44, 0x46]
36:           raise NotImplementedError
37:         end
38:       else
39:         break
40:       end
41:     end
42:     "text/html"
43:   end

[Validate]