Class HTML5::Filters::OptionalTagFilter
In: lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb
Parent: Base
Phase XmlElementPhase InTablePhase RootElementPhase InHeadPhase AfterHeadPhase AfterFramesetPhase XmlRootPhase InTableBodyPhase InFramesetPhase InColumnGroupPhase InitialPhase InCaptionPhase TrailingEndPhase InSelectPhase BeforeHeadPhase AfterBodyPhase InCellPhase InBodyPhase InRowPhase XhmlRootPhase Exception SerializeError EOF AssertionError ParseError HTMLSanitizer HTMLTokenizer XMLParser XHTMLParser HTMLParser String EncodingBytes HTMLSerializer XHTMLSerializer TreeWalkers::Base NonRecursiveTreeWalker TreeWalker TreeWalker Base TreeWalker Element DocumentFragment Node CommentNode DocumentType TextNode Document Base::Node Node Node Base::TreeBuilder TreeBuilder TreeBuilder TreeBuilder Element DocumentFragment CommentNode DocumentType TextNode Document Element DocumentFragment CommentNode DocumentType TextNode Document Base InjectMetaCharset OptionalTagFilter WhitespaceFilter HTMLSanitizeFilter HTMLSanitizeModule Enumerable TestData SimpleDelegator HTMLInputStream EncodingParser ContentAttrParser Node TreeBuilder lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb lib/feed_tools/vendor/html5/lib/html5/constants.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb lib/feed_tools/vendor/html5/lib/html5/inputstream.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb Hpricot TokenConstructor lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb REXML lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb SimpleTree TreeWalkers HTMLSanitizeModule lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb Hpricot lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb Base lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb REXML lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb SimpleTree TreeBuilders lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb lib/feed_tools/vendor/html5/lib/html5/filters/base.rb lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb Filters Sniffer lib/feed_tools/vendor/html5/tests/preamble.rb TestSupport HTML5 dot/m_75_0.png

Methods

Public Instance methods

[Source]

    # File lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb, line 18
18:       def each
19:         slider do |previous, token, nexttok|
20:           type = token[:type]
21:           if type == :StartTag
22:             yield token unless token[:data].empty? and is_optional_start(token[:name], previous, nexttok)
23:           elsif type == :EndTag
24:             yield token unless is_optional_end(token[:name], nexttok)
25:           else
26:             yield token
27:           end
28:         end
29:       end

[Source]

     # File lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb, line 90
 90:       def is_optional_end(tagname, nexttok)
 91:         type = nexttok ? nexttok[:type] : nil
 92:         if %w[html head body].include?(tagname)
 93:           # An html element's end tag may be omitted if the html element
 94:           # is not immediately followed by a space character or a comment.
 95:           return ![:Comment, :SpaceCharacters].include?(type)
 96:         elsif %w[li optgroup option tr].include?(tagname)
 97:           # A li element's end tag may be omitted if the li element is
 98:           # immediately followed by another li element or if there is
 99:           # no more content in the parent element.
100:           # An optgroup element's end tag may be omitted if the optgroup
101:           # element is immediately followed by another optgroup element,
102:           # or if there is no more content in the parent element.
103:           # An option element's end tag may be omitted if the option
104:           # element is immediately followed by another option element,
105:           # or if there is no more content in the parent element.
106:           # A tr element's end tag may be omitted if the tr element is
107:           # immediately followed by another tr element, or if there is
108:           # no more content in the parent element.
109:           if type == :StartTag
110:             return nexttok[:name] == tagname
111:           else
112:             return type == :EndTag || type == nil
113:           end
114:         elsif %w(dt dd).include?(tagname)
115:           # A dt element's end tag may be omitted if the dt element is
116:           # immediately followed by another dt element or a dd element.
117:           # A dd element's end tag may be omitted if the dd element is
118:           # immediately followed by another dd element or a dt element,
119:           # or if there is no more content in the parent element.
120:           if type == :StartTag
121:             return %w(dt dd).include?(nexttok[:name])
122:           elsif tagname == 'dd'
123:             return type == :EndTag || type == nil
124:           else
125:             return false
126:           end
127:         elsif tagname == 'p'
128:           # A p element's end tag may be omitted if the p element is
129:           # immediately followed by an address, blockquote, dl, fieldset,
130:           # form, h1, h2, h3, h4, h5, h6, hr, menu, ol, p, pre, table,
131:           # or ul  element, or if there is no more content in the parent
132:           # element.
133:           if type == :StartTag
134:             return %w(address blockquote dl fieldset form h1 h2 h3 h4 h5
135:                       h6 hr menu ol p pre table ul).include?(nexttok[:name])
136:           else
137:             return type == :EndTag || type == nil
138:           end
139:         elsif tagname == 'colgroup'
140:           # A colgroup element's end tag may be omitted if the colgroup
141:           # element is not immediately followed by a space character or
142:           # a comment.
143:           if [:Comment, :SpaceCharacters].include?(type)
144:             return false
145:           elsif type == :StartTag
146:             # XXX: we also look for an immediately following colgroup
147:             # element. See is_optional_start.
148:             return nexttok[:name] != 'colgroup'
149:           else
150:             return true
151:           end
152:         elsif %w(thead tbody).include? tagname
153:           # A thead element's end tag may be omitted if the thead element
154:           # is immediately followed by a tbody or tfoot element.
155:           # A tbody element's end tag may be omitted if the tbody element
156:           # is immediately followed by a tbody or tfoot element, or if
157:           # there is no more content in the parent element.
158:           # A tfoot element's end tag may be omitted if the tfoot element
159:           # is immediately followed by a tbody element, or if there is no
160:           # more content in the parent element.
161:           # XXX: we never omit the end tag when the following element is
162:           # a tbody. See is_optional_start.
163:           if type == :StartTag
164:             return %w(tbody tfoot).include?(nexttok[:name])
165:           elsif tagname == 'tbody'
166:             return (type == :EndTag or type == nil)
167:           else
168:             return false
169:           end
170:         elsif tagname == 'tfoot'
171:           # A tfoot element's end tag may be omitted if the tfoot element
172:           # is immediately followed by a tbody element, or if there is no
173:           # more content in the parent element.
174:           # XXX: we never omit the end tag when the following element is
175:           # a tbody. See is_optional_start.
176:           if type == :StartTag
177:             return nexttok[:name] == 'tbody'
178:           else
179:             return type == :EndTag || type == nil
180:           end
181:         elsif %w(td th).include? tagname
182:           # A td element's end tag may be omitted if the td element is
183:           # immediately followed by a td or th element, or if there is
184:           # no more content in the parent element.
185:           # A th element's end tag may be omitted if the th element is
186:           # immediately followed by a td or th element, or if there is
187:           # no more content in the parent element.
188:           if type == :StartTag
189:             return %w(td th).include?(nexttok[:name])
190:           else
191:             return type == :EndTag || type == nil
192:           end
193:         end
194:         return false
195:       end

[Source]

    # File lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb, line 31
31:       def is_optional_start(tagname, previous, nexttok)
32:         type = nexttok ? nexttok[:type] : nil
33:         if tagname == 'html'
34:           # An html element's start tag may be omitted if the first thing
35:           # inside the html element is not a space character or a comment.
36:           return ![:Comment, :SpaceCharacters].include?(type)
37:         elsif tagname == 'head'
38:           # A head element's start tag may be omitted if the first thing
39:           # inside the head element is an element.
40:           return type == :StartTag
41:         elsif tagname == 'body'
42:           # A body element's start tag may be omitted if the first thing
43:           # inside the body element is not a space character or a comment,
44:           # except if the first thing inside the body element is a script
45:           # or style element and the node immediately preceding the body
46:           # element is a head element whose end tag has been omitted.
47:           if [:Comment, :SpaceCharacters].include?(type)
48:             return false
49:           elsif type == :StartTag
50:             # XXX: we do not look at the preceding event, so we never omit
51:             # the body element's start tag if it's followed by a script or
52:             # a style element.
53:             return !%w[script style].include?(nexttok[:name])
54:           else
55:             return true
56:           end
57:         elsif tagname == 'colgroup'
58:           # A colgroup element's start tag may be omitted if the first thing
59:           # inside the colgroup element is a col element, and if the element
60:           # is not immediately preceeded by another colgroup element whose
61:           # end tag has been omitted.
62:           if type == :StartTag
63:             # XXX: we do not look at the preceding event, so instead we never
64:             # omit the colgroup element's end tag when it is immediately
65:             # followed by another colgroup element. See is_optional_end.
66:             return nexttok[:name] == "col"
67:           else
68:             return false
69:           end
70:         elsif tagname == 'tbody'
71:           # A tbody element's start tag may be omitted if the first thing
72:           # inside the tbody element is a tr element, and if the element is
73:           # not immediately preceeded by a tbody, thead, or tfoot element
74:           # whose end tag has been omitted.
75:           if type == :StartTag
76:             # omit the thead and tfoot elements' end tag when they are
77:             # immediately followed by a tbody element. See is_optional_end.
78:             if previous and previous[:type] == :EndTag && %w(tbody thead tfoot).include?(previous[:name])
79:               return false
80:             end
81: 
82:             return nexttok[:name] == 'tr'
83:           else
84:             return false
85:           end
86:         end
87:         return false
88:       end

[Source]

    # File lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb, line 8
 8:       def slider
 9:         previous1 = previous2 = nil
10:         __getobj__.each do |token|
11:           yield previous2, previous1, token if previous1 != nil
12:           previous2 = previous1
13:           previous1 = token
14:         end
15:         yield previous2, previous1, nil
16:       end

[Validate]