class Ronn::RoffFilter

Filter for converting HTML to ROFF

Constants

HTML_ROFF_ENTITIES

Public Class Methods

new(html_fragment, name, section, tagline, manual = nil, version = nil, date = nil) click to toggle source

Convert Ronn HTML to roff. The html input is an HTML fragment, not a complete document

   # File lib/ronn/roff.rb
11 def initialize(html_fragment, name, section, tagline, manual = nil,
12                version = nil, date = nil)
13   @buf = []
14   title_heading name, section, tagline, manual, version, date
15   doc = Nokogiri::HTML.fragment(html_fragment)
16   remove_extraneous_elements! doc
17   normalize_whitespace! doc
18   block_filter doc
19   write "\n"
20 end

Public Instance Methods

to_s() click to toggle source
   # File lib/ronn/roff.rb
22 def to_s
23   @buf.join.gsub(/[ \t]+$/, '')
24 end

Protected Instance Methods

block_filter(node) click to toggle source
    # File lib/ronn/roff.rb
 88 def block_filter(node)
 89   return if node.nil?
 90 
 91   if node.is_a?(Array) || node.is_a?(Nokogiri::XML::NodeSet)
 92     node.each { |ch| block_filter(ch) }
 93 
 94   elsif node.document? || node.fragment?
 95     block_filter(node.children)
 96 
 97   elsif node.text?
 98     # This hack is necessary to support mixed-child-type dd's
 99     inline_filter(node)
100 
101   elsif node.elem?
102     case node.name
103     when 'html', 'body'
104       block_filter(node.children)
105     when 'div'
106       block_filter(node.children)
107     when 'h1'
108       # discard
109       nop
110     when 'h2'
111       macro 'SH', quote(escape(node.inner_html))
112     when 'h3'
113       macro 'SS', quote(escape(node.inner_html))
114     when 'h4', 'h5', 'h6'
115       # Ronn discourages use of this many heading levels, but if they are used,
116       # we should make them legible instead of ignoring them.
117       macro 'SS', quote(escape(node.inner_html))
118 
119     when 'p'
120       prev = previous(node)
121       if prev && %w[dd li blockquote].include?(node.parent.name)
122         macro 'IP'
123       elsif prev && !%w[h1 h2 h3].include?(prev.name)
124         macro 'P'
125       elsif node.previous&.text?
126         macro 'IP'
127       end
128       inline_filter(node.children)
129 
130     when 'blockquote'
131       prev = previous(node)
132       indent = prev.nil? || !%w[h1 h2 h3].include?(prev.name)
133       macro 'IP', %w["" 4] if indent
134       block_filter(node.children)
135       macro 'IP', %w["" 0] if indent
136 
137     when 'pre'
138       prev = previous(node)
139       indent = prev.nil? || !%w[h1 h2 h3].include?(prev.name)
140       macro 'IP', %w["" 4] if indent
141       macro 'nf'
142       # HACK: strip an initial \n to avoid extra spacing
143       if node.children && node.children[0].text?
144         text = node.children[0].to_s
145         node.children[0].replace(text[1..]) if text.start_with? "\n"
146       end
147       inline_filter(node.children)
148       macro 'fi'
149       macro 'IP', %w["" 0] if indent
150 
151     when 'dl'
152       macro 'TP'
153       block_filter(node.children)
154     when 'dt'
155       prev = previous(node)
156       macro 'TP' unless prev.nil?
157       inline_filter(node.children)
158       write "\n"
159     when 'dd'
160       if node.at('p')
161         block_filter(node.children)
162       else
163         inline_filter(node.children)
164       end
165       write "\n"
166 
167     when 'ol', 'ul'
168       block_filter(node.children)
169       macro 'IP', %w["" 0]
170     when 'li'
171       case node.parent.name
172       when 'ol'
173         macro 'IP', %W["#{node.parent.children.index(node) + 1}." 4]
174       when 'ul'
175         macro 'IP', ['"\(bu"', '4']
176       else
177         raise "List element found as a child of non-list parent element: #{node.inspect}"
178       end
179       if node.at('p,ol,ul,dl,div')
180         block_filter(node.children)
181       else
182         inline_filter(node.children)
183       end
184       write "\n"
185 
186     when 'span', 'code', 'b', 'strong', 'kbd', 'samp', 'var', 'em', 'i',
187          'u', 'br', 'a'
188       inline_filter(node)
189 
190     when 'table'
191       macro 'TS'
192       write "allbox;\n"
193       block_filter(node.children)
194       macro 'TE'
195     when 'thead'
196       # Convert to format section and first row
197       tr = node.children[0]
198       header_contents = []
199       cell_formats = []
200       tr.children.each do |th|
201         style = th['style']
202         cell_format = case style
203                       when 'text-align:left;'
204                         'l'
205                       when 'text-align:right;'
206                         'r'
207                       when 'text-align:center;'
208                         'c'
209                       else
210                         'l'
211                       end
212         header_contents << th.inner_html
213         cell_formats << cell_format
214       end
215       write cell_formats.join(' ') + ".\n"
216       write header_contents.join("\t") + "\n"
217     when 'th'
218       raise 'internal error: unexpected <th> element'
219     when 'tbody'
220       # Let the 'tr' handle it
221       block_filter(node.children)
222     when 'tr'
223       # Convert to a table data row
224       node.children.each do |child|
225         block_filter(child)
226         write "\t"
227       end
228       write "\n"
229     when 'td'
230       inline_filter(node.children)
231 
232     else
233       warn 'unrecognized block tag: %p', node.name
234     end
235 
236   elsif node.is_a?(Nokogiri::XML::DTD)
237     # Ignore
238     nop
239   elsif node.is_a?(Nokogiri::XML::Comment)
240     # Ignore
241     nop
242   else
243     raise "unexpected node: #{node.inspect}"
244   end
245 end
comment(text) click to toggle source
    # File lib/ronn/roff.rb
381 def comment(text)
382   writeln %(.\\" #{text})
383 end
escape(text) click to toggle source
    # File lib/ronn/roff.rb
342 def escape(text)
343   return text.to_s if text.nil? || text.empty?
344   ent = HTML_ROFF_ENTITIES
345   text = text.dup
346   text.gsub!(/&#x([0-9A-Fa-f]+);/) { $1.to_i(16).chr }  # hex entities
347   text.gsub!(/&#(\d+);/) { $1.to_i.chr }                # dec entities
348   text.gsub!('\\', '\e')                                # backslash
349   text.gsub!('...', '\|.\|.\|.')                        # ellipses
350   text.gsub!(/[.-]/) { |m| "\\#{m}" }                   # control chars
351   ent.each do |key, val|
352     text.gsub!(key, val)
353   end
354   text.gsub!('&amp;', '&')                              # amps
355   text
356 end
inline_filter(node) click to toggle source
    # File lib/ronn/roff.rb
247 def inline_filter(node)
248   return unless node # is an empty node
249 
250   if node.is_a?(Array) || node.is_a?(Nokogiri::XML::NodeSet)
251     node.each { |ch| inline_filter(ch) }
252 
253   elsif node.text?
254     text = node.to_html.dup
255     write escape(text)
256 
257   elsif node.comment?
258     # ignore HTML comments
259 
260   elsif node.elem?
261     case node.name
262     when 'span'
263       inline_filter(node.children)
264 
265     when 'code'
266       if child_of?(node, 'pre')
267         inline_filter(node.children)
268       else
269         write '\fB'
270         inline_filter(node.children)
271         write '\fR'
272       end
273 
274     when 'b', 'strong', 'kbd', 'samp'
275       write '\fB'
276       inline_filter(node.children)
277       write '\fR'
278 
279     when 'var', 'em', 'i', 'u'
280       write '\fI'
281       inline_filter(node.children)
282       write '\fR'
283 
284     when 'br'
285       macro 'br'
286 
287     when 'a'
288       if node.classes.include?('man-ref')
289         inline_filter(node.children)
290       elsif node.has_attribute?('data-bare-link')
291         write '\fI'
292         inline_filter(node.children)
293         write '\fR'
294       else
295         inline_filter(node.children)
296         write ' '
297         write '\fI'
298         write escape(node.attributes['href'].content)
299         write '\fR'
300       end
301 
302     when 'sup'
303       # This superscript equivalent is a big ugly hack.
304       write '^('
305       inline_filter(node.children)
306       write ')'
307 
308     else
309       warn 'unrecognized inline tag: %p', node.name
310     end
311 
312   else
313     raise "unexpected node: #{node.inspect}"
314   end
315 end
macro(name, value = nil) click to toggle source
    # File lib/ronn/roff.rb
321 def macro(name, value = nil)
322   maybe_new_line
323   writeln ".#{[name, value].compact.join(' ')}"
324 end
maybe_new_line() click to toggle source
    # File lib/ronn/roff.rb
317 def maybe_new_line
318   write "\n" if @buf.last && @buf.last[-1] != "\n"
319 end
nop() click to toggle source
    # File lib/ronn/roff.rb
389 def nop
390   # Do nothing
391 end
normalize_whitespace!(node) click to toggle source
   # File lib/ronn/roff.rb
52 def normalize_whitespace!(node)
53   if node.is_a?(Array) || node.is_a?(Nokogiri::XML::NodeSet)
54     node.to_a.dup.each { |ch| normalize_whitespace! ch }
55   elsif node.text?
56     preceding = node.previous
57     following = node.next
58     content = node.content.gsub(/[\n ]+/m, ' ')
59     if preceding.nil? || block_element?(preceding.name) ||
60        preceding.name == 'br'
61       content.lstrip!
62     end
63     if following.nil? || block_element?(following.name) ||
64        following.name == 'br'
65       content.rstrip!
66     end
67     if content.empty?
68       node.remove
69     else
70       node.content = content
71     end
72   elsif node.elem? && node.name == 'pre'
73     # stop traversing
74   elsif node.elem? && node.children
75     normalize_whitespace! node.children
76   elsif node.elem?
77     # element has no children
78   elsif node.document? || node.fragment?
79     normalize_whitespace! node.children
80   elsif node.is_a?(Nokogiri::XML::DTD) || node.is_a?(Nokogiri::XML::Comment)
81     # ignore
82     nop
83   else
84     warn 'unexpected node during whitespace normalization: %p', node
85   end
86 end
previous(node) click to toggle source
   # File lib/ronn/roff.rb
28 def previous(node)
29   return unless node.respond_to?(:previous)
30   prev = node.previous
31   prev = prev.previous until prev.nil? || prev.elem?
32   prev
33 end
quote(text) click to toggle source
    # File lib/ronn/roff.rb
358 def quote(text)
359   "\"#{text.gsub('"', '\\"')}\""
360 end
remove_extraneous_elements!(doc) click to toggle source
   # File lib/ronn/roff.rb
46 def remove_extraneous_elements!(doc)
47   doc.traverse do |node|
48     node.parent.children.delete(node) if node.comment?
49   end
50 end
title_heading(name, section, _tagline, manual, version, date) click to toggle source
   # File lib/ronn/roff.rb
35 def title_heading(name, section, _tagline, manual, version, date)
36   comment "generated with Ronn-NG/v#{Ronn.version}"
37   comment "http://github.com/apjanke/ronn-ng/tree/#{Ronn.revision}"
38   return if name.nil?
39   if manual
40     macro 'TH', %("#{escape(name.upcase)}" "#{section}" "#{date.strftime('%B %Y')}" "#{version}" "#{manual}")
41   else
42     macro 'TH', %("#{escape(name.upcase)}" "#{section}" "#{date.strftime('%B %Y')}" "#{version}")
43   end
44 end
warn(text, *args) click to toggle source
    # File lib/ronn/roff.rb
385 def warn(text, *args)
386   Kernel.warn format("warn: #{text}", args)
387 end
write(text) click to toggle source

write text to output buffer

    # File lib/ronn/roff.rb
363 def write(text)
364   return if text.nil? || text.empty?
365   # lines cannot start with a '.' or "'". insert zero-width character before.
366   text = text.gsub("\n\\.", "\n\\\\&\\.")
367   text = text.gsub("\n'", "\n\\&\\'")
368   buf_ends_in_newline = @buf.last && @buf.last[-1] == "\n"
369   @buf << '\&' if text[0, 2] == '\.' && buf_ends_in_newline
370   @buf << '\&' if text[0, 1] == "'" && buf_ends_in_newline
371   @buf << text
372 end
writeln(text) click to toggle source

write text to output buffer on a new line.

    # File lib/ronn/roff.rb
375 def writeln(text)
376   maybe_new_line
377   write text
378   write "\n"
379 end