[1fa2099] | 1 | require 'fileutils'
|
---|
| 2 | require 'rexml/parsers/pullparser'
|
---|
| 3 |
|
---|
| 4 | module DocBook
|
---|
| 5 |
|
---|
| 6 | class Epub
|
---|
| 7 | CHECKER = "epubcheck"
|
---|
| 8 | STYLESHEET = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "docbook.xsl"))
|
---|
| 9 | CALLOUT_PATH = File.join('images', 'callouts')
|
---|
| 10 | CALLOUT_FULL_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', '..', CALLOUT_PATH))
|
---|
| 11 | CALLOUT_LIMIT = 15
|
---|
| 12 | CALLOUT_EXT = ".png"
|
---|
| 13 | XSLT_PROCESSOR = "xsltproc"
|
---|
| 14 | OUTPUT_DIR = ".epubtmp#{Time.now.to_f.to_s}"
|
---|
| 15 | MIMETYPE = "application/epub+zip"
|
---|
| 16 | META_DIR = "META-INF"
|
---|
| 17 | OEBPS_DIR = "OEBPS"
|
---|
| 18 | ZIPPER = "zip"
|
---|
| 19 |
|
---|
| 20 | attr_reader :output_dir
|
---|
| 21 |
|
---|
| 22 | def initialize(docbook_file, output_dir=OUTPUT_DIR, css_file=nil, customization_layer=nil, embedded_fonts=[])
|
---|
| 23 | @docbook_file = docbook_file
|
---|
| 24 | @output_dir = output_dir
|
---|
| 25 | @meta_dir = File.join(@output_dir, META_DIR)
|
---|
| 26 | @oebps_dir = File.join(@output_dir, OEBPS_DIR)
|
---|
| 27 | @css_file = css_file ? File.expand_path(css_file) : css_file
|
---|
| 28 | @embedded_fonts = embedded_fonts
|
---|
| 29 | @to_delete = []
|
---|
| 30 |
|
---|
| 31 | if customization_layer
|
---|
| 32 | @stylesheet = File.expand_path(customization_layer)
|
---|
| 33 | else
|
---|
| 34 | @stylesheet = STYLESHEET
|
---|
| 35 | end
|
---|
| 36 |
|
---|
| 37 | unless File.exist?(@docbook_file)
|
---|
| 38 | raise ArgumentError.new("File #{@docbook_file} does not exist")
|
---|
| 39 | end
|
---|
| 40 | end
|
---|
| 41 |
|
---|
| 42 | def render_to_file(output_file, verbose=false)
|
---|
| 43 | render_to_epub(output_file, verbose)
|
---|
| 44 | bundle_epub(output_file, verbose)
|
---|
| 45 | cleanup_files(@to_delete)
|
---|
| 46 | end
|
---|
| 47 |
|
---|
| 48 | def self.invalid?(file)
|
---|
| 49 | # Obnoxiously, we can't just check for a non-zero output...
|
---|
| 50 | cmd = %Q(#{CHECKER} "#{file}")
|
---|
| 51 | output = `#{cmd} 2>&1`
|
---|
| 52 |
|
---|
| 53 | if $?.to_i == 0
|
---|
| 54 | return false
|
---|
| 55 | else
|
---|
| 56 | STDERR.puts output if $DEBUG
|
---|
| 57 | return output
|
---|
| 58 | end
|
---|
| 59 | end
|
---|
| 60 |
|
---|
| 61 | private
|
---|
| 62 | def render_to_epub(output_file, verbose)
|
---|
| 63 | @collapsed_docbook_file = collapse_docbook()
|
---|
| 64 |
|
---|
| 65 | chunk_quietly = "--stringparam chunk.quietly " + (verbose ? '0' : '1')
|
---|
| 66 | callout_path = "--stringparam callout.graphics.path #{CALLOUT_PATH}/"
|
---|
| 67 | callout_limit = "--stringparam callout.graphics.number.limit #{CALLOUT_LIMIT}"
|
---|
| 68 | callout_ext = "--stringparam callout.graphics.extension #{CALLOUT_EXT}"
|
---|
| 69 | html_stylesheet = "--stringparam html.stylesheet #{File.basename(@css_file)}" if @css_file
|
---|
| 70 | base = "--stringparam base.dir #{OEBPS_DIR}/"
|
---|
| 71 | unless @embedded_fonts.empty?
|
---|
| 72 | embedded_fonts = @embedded_fonts.map {|f| File.basename(f)}.join(',')
|
---|
| 73 | font = "--stringparam epub.embedded.fonts \"#{embedded_fonts}\""
|
---|
| 74 | end
|
---|
| 75 | meta = "--stringparam epub.metainf.dir #{META_DIR}/"
|
---|
| 76 | oebps = "--stringparam epub.oebps.dir #{OEBPS_DIR}/"
|
---|
| 77 | options = [chunk_quietly,
|
---|
| 78 | callout_path,
|
---|
| 79 | callout_limit,
|
---|
| 80 | callout_ext,
|
---|
| 81 | base,
|
---|
| 82 | font,
|
---|
| 83 | meta,
|
---|
| 84 | oebps,
|
---|
| 85 | html_stylesheet,
|
---|
| 86 | ].join(" ")
|
---|
| 87 | # Double-quote stylesheet & file to help Windows cmd.exe
|
---|
| 88 | db2epub_cmd = %Q(cd "#{@output_dir}" && #{XSLT_PROCESSOR} #{options} "#{@stylesheet}" "#{@collapsed_docbook_file}")
|
---|
| 89 | STDERR.puts db2epub_cmd if $DEBUG
|
---|
| 90 | success = system(db2epub_cmd)
|
---|
| 91 | raise "Could not render as .epub to #{output_file} (#{db2epub_cmd})" unless success
|
---|
| 92 | @to_delete << Dir["#{@meta_dir}/*"]
|
---|
| 93 | @to_delete << Dir["#{@oebps_dir}/*"]
|
---|
| 94 | end
|
---|
| 95 |
|
---|
| 96 | def bundle_epub(output_file, verbose)
|
---|
| 97 |
|
---|
| 98 | quiet = verbose ? "" : "-q"
|
---|
| 99 | mimetype_filename = write_mimetype()
|
---|
| 100 | meta = File.basename(@meta_dir)
|
---|
| 101 | oebps = File.basename(@oebps_dir)
|
---|
| 102 | images = copy_images()
|
---|
| 103 | csses = copy_csses()
|
---|
| 104 | fonts = copy_fonts()
|
---|
| 105 | callouts = copy_callouts()
|
---|
| 106 | # zip -X -r ../book.epub mimetype META-INF OEBPS
|
---|
| 107 | # Double-quote stylesheet & file to help Windows cmd.exe
|
---|
| 108 | zip_cmd = %Q(cd "#{@output_dir}" && #{ZIPPER} #{quiet} -X -r "#{File.expand_path(output_file)}" "#{mimetype_filename}" "#{meta}" "#{oebps}")
|
---|
| 109 | puts zip_cmd if $DEBUG
|
---|
| 110 | success = system(zip_cmd)
|
---|
| 111 | raise "Could not bundle into .epub file to #{output_file}" unless success
|
---|
| 112 | end
|
---|
| 113 |
|
---|
| 114 | # Input must be collapsed because REXML couldn't find figures in files that
|
---|
| 115 | # were XIncluded or added by ENTITY
|
---|
| 116 | # http://sourceforge.net/tracker/?func=detail&aid=2750442&group_id=21935&atid=373747
|
---|
| 117 | def collapse_docbook
|
---|
| 118 | # Double-quote stylesheet & file to help Windows cmd.exe
|
---|
| 119 | collapsed_file = File.join(File.expand_path(File.dirname(@docbook_file)),
|
---|
| 120 | '.collapsed.' + File.basename(@docbook_file))
|
---|
| 121 | entity_collapse_command = %Q(xmllint --loaddtd --noent -o "#{collapsed_file}" "#{@docbook_file}")
|
---|
| 122 | entity_success = system(entity_collapse_command)
|
---|
| 123 | raise "Could not collapse named entites in #{@docbook_file}" unless entity_success
|
---|
| 124 |
|
---|
| 125 | xinclude_collapse_command = %Q(xmllint --xinclude -o "#{collapsed_file}" "#{collapsed_file}")
|
---|
| 126 | xinclude_success = system(xinclude_collapse_command)
|
---|
| 127 | raise "Could not collapse XIncludes in #{@docbook_file}" unless xinclude_success
|
---|
| 128 |
|
---|
| 129 | @to_delete << collapsed_file
|
---|
| 130 | return collapsed_file
|
---|
| 131 | end
|
---|
| 132 |
|
---|
| 133 | def copy_callouts
|
---|
| 134 | new_callout_images = []
|
---|
| 135 | if has_callouts?
|
---|
| 136 | calloutglob = "#{CALLOUT_FULL_PATH}/*#{CALLOUT_EXT}"
|
---|
| 137 | Dir.glob(calloutglob).each {|img|
|
---|
| 138 | img_new_filename = File.join(@oebps_dir, CALLOUT_PATH, File.basename(img))
|
---|
| 139 |
|
---|
| 140 | # TODO: What to rescue for these two?
|
---|
| 141 | FileUtils.mkdir_p(File.dirname(img_new_filename))
|
---|
| 142 | FileUtils.cp(img, img_new_filename)
|
---|
| 143 | @to_delete << img_new_filename
|
---|
| 144 | new_callout_images << img
|
---|
| 145 | }
|
---|
| 146 | end
|
---|
| 147 | return new_callout_images
|
---|
| 148 | end
|
---|
| 149 |
|
---|
| 150 | def copy_fonts
|
---|
| 151 | new_fonts = []
|
---|
| 152 | @embedded_fonts.each {|font_file|
|
---|
| 153 | font_new_filename = File.join(@oebps_dir, File.basename(font_file))
|
---|
| 154 | FileUtils.cp(font_file, font_new_filename)
|
---|
| 155 | new_fonts << font_file
|
---|
| 156 | }
|
---|
| 157 | return new_fonts
|
---|
| 158 | end
|
---|
| 159 |
|
---|
| 160 | def copy_csses
|
---|
| 161 | if @css_file
|
---|
| 162 | css_new_filename = File.join(@oebps_dir, File.basename(@css_file))
|
---|
| 163 | FileUtils.cp(@css_file, css_new_filename)
|
---|
| 164 | end
|
---|
| 165 | end
|
---|
| 166 |
|
---|
| 167 | def copy_images
|
---|
| 168 | image_references = get_image_refs()
|
---|
| 169 | new_images = []
|
---|
| 170 | image_references.each {|img|
|
---|
| 171 | # TODO: It'd be cooler if we had a filetype lookup rather than just
|
---|
| 172 | # extension
|
---|
| 173 | if img =~ /\.(svg|png|gif|jpe?g|xml)/i
|
---|
| 174 | img_new_filename = File.join(@oebps_dir, img)
|
---|
| 175 | img_full = File.join(File.expand_path(File.dirname(@docbook_file)), img)
|
---|
| 176 |
|
---|
| 177 | # TODO: What to rescue for these two?
|
---|
| 178 | FileUtils.mkdir_p(File.dirname(img_new_filename))
|
---|
| 179 | puts(img_full + ": " + img_new_filename) if $DEBUG
|
---|
| 180 | FileUtils.cp(img_full, img_new_filename)
|
---|
| 181 | @to_delete << img_new_filename
|
---|
| 182 | new_images << img_full
|
---|
| 183 | end
|
---|
| 184 | }
|
---|
| 185 | return new_images
|
---|
| 186 | end
|
---|
| 187 |
|
---|
| 188 | def write_mimetype
|
---|
| 189 | mimetype_filename = File.join(@output_dir, "mimetype")
|
---|
| 190 | File.open(mimetype_filename, "w") {|f| f.print MIMETYPE}
|
---|
| 191 | @to_delete << mimetype_filename
|
---|
| 192 | return File.basename(mimetype_filename)
|
---|
| 193 | end
|
---|
| 194 |
|
---|
| 195 | def cleanup_files(file_list)
|
---|
| 196 | file_list.flatten.each {|f|
|
---|
| 197 | # Yikes
|
---|
| 198 | FileUtils.rm_r(f, :force => true )
|
---|
| 199 | }
|
---|
| 200 | end
|
---|
| 201 |
|
---|
| 202 | # Returns an Array of all of the (image) @filerefs in a document
|
---|
| 203 | def get_image_refs
|
---|
| 204 | parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
|
---|
| 205 | image_refs = []
|
---|
| 206 | while parser.has_next?
|
---|
| 207 | el = parser.pull
|
---|
| 208 | if el.start_element? and (el[0] == "imagedata" or el[0] == "graphic")
|
---|
| 209 | image_refs << el[1]['fileref']
|
---|
| 210 | end
|
---|
| 211 | end
|
---|
| 212 | return image_refs.uniq
|
---|
| 213 | end
|
---|
| 214 |
|
---|
| 215 | # Returns true if the document has code callouts
|
---|
| 216 | def has_callouts?
|
---|
| 217 | parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
|
---|
| 218 | while parser.has_next?
|
---|
| 219 | el = parser.pull
|
---|
| 220 | if el.start_element? and (el[0] == "calloutlist" or el[0] == "co")
|
---|
| 221 | return true
|
---|
| 222 | end
|
---|
| 223 | end
|
---|
| 224 | return false
|
---|
| 225 | end
|
---|
| 226 | end
|
---|
| 227 | end
|
---|