tDiaryのデータをHTML化する - 2008-12-05 - ククログ

ククログ

株式会社クリアコード > ククログ > tDiaryのデータをHTML化する

tDiaryのデータをHTML化する

tDiaryをローカルなネットワークに配置して、tDiaryが表示する内容を静的なHTMLとして公開したい場合はよくありますよね。ククログもそんなよくある使い方の1つです。

tDiaryには静的なHTMLを生成するためのsqueezeプラグインがありますが、squeezeプラグインが出力するHTMLは以下の点でCGIで表示される内容と異なります。

  • 各日付のページしか生成しない
    • 最新の日記n件ページや月別ページやカテゴリページは生成しない
  • リンクがCGI用のリンクのままで、次の日記のページに移動するリンクが壊れている
  • テーマファイルや画像はコピーしてくれないので、生成したHTMLの入ったディレクトリ以下だけでは完結しない

ただし、これはsqueezeプラグインが検索エンジンへの入力データとしてのHTML生成を目的としているためです。よくある使い方では、生成されたHTMLはCGIで出力されているように表示できることが目的なので、上記のようなミスマッチが発生します。

そこで、ククログではhtml-archiver.rbという静的なHTMLを生成するスクリプトを使っています。html-archiver.rbは最後の方に載せています。

html-archiver.rbの使い方

html-archiver.rbを使うと、CGIで出力されている内容と同じように表示されるHTMLが生成されます。生成例は今見ているこのページです。

使い方はこうなります。

% ruby html-archiver.rb --tdiary tdiayr.rbのあるディレクトリ --conf tdiary.confのあるディレクトリ 出力先ディレクトリ

例えば、以下のような場合を考えます。

  • tdiary.rbは~tdiary/work/ruby/tdiary/core/にある
  • tdiary.confは~tdiary/public_html/にある
  • HTMLは~tdiary/public_html/html/以下に出力する

この場合はこのようなコマンドになります。

% ruby html-archiver.rb --tdiary ~tdiary/work/ruby/tdiary/core/ --conf ~tdiary/public_html/ ~tdiary/public_html/html/

機能

  • 日付ページの生成:
  • 最新n件ページの生成:
  • 月別ページの生成:
  • RSS 1.0の生成:
  • テーマファイルのコピー
  • 画像のコピー

制限

  • ツッコミが生成されるかどうかは試していない
  • カテゴリ一覧ページがきちんと生成されるかは(最近は)試していない
  • タブインデント(tDiary本体のコーディングスタイルに合わせているため)
  • 思ったほど使う場面が少ないかもしれない(もしかしたら、tDiaryが表示する内容を静的なHTMLとして公開することがそんなにないかもしれない)

ライセンス

GPL3あるいは3以降の新しいバージョンのGPL

html-archiver.rb

#!/usr/bin/env ruby
# -*- coding: utf-8; ruby-indent-level: 3; tab-width: 3; indent-tabs-mode: t -*-

require 'uri'
require 'cgi'
require 'fileutils'
require 'pathname'
require 'optparse'
require 'ostruct'
require 'enumerator'
require 'rss'

options = OpenStruct.new
options.tdiary_path = "./"
options.conf_dir = "./"
opts = OptionParser.new do |opts|
	opts.banner += " OUTPUT_DIR"

	opts.on("-t", "--tdiary=TDIARY_DIRECTORY",
			  "a directory that has tdiary.rb") do |path|
		options.tdiary_path = path
	end

	opts.on("-c", "--conf=TDIARY_CONF", "a path of tdiary.conf") do |conf|
		options.conf_dir = conf
	end
end
opts.parse!

output_dir = ARGV.shift

Dir.chdir(options.conf_dir) do
	$LOAD_PATH.unshift(File.expand_path(options.tdiary_path))
	require "tdiary"
end

module HTMLArchiver
	class CGI < ::CGI
		def referer
			nil
		end

		private
		def env_table
			{"REQUEST_METHOD" => "GET", "QUERY_STRING" => ""}
		end
	end

	module Image
		def init_image_dir
			@image_dest_dir = @dest + "images"
		end
	end

	module Base
		include Image

		def initialize(rhtml, dest, conf)
			@ignore_parser_cache = true

			cgi = CGI.new
			setup_cgi(cgi, conf)
			@dest = dest
			init_image_dir
			super(cgi, rhtml, conf)
		end

		def eval_rhtml(*args)
			link_detect_re = /(<(?:a|link)\b.*?\bhref|<img\b.*?\bsrc)="(.*?)"/
			super.gsub(link_detect_re) do |link_attribute|
				prefix = $1
				link = $2
				uri = URI(link)
				if uri.absolute? or link[0] == ?/
					link_attribute
				else
					%Q[#{prefix}="#{relative_path}#{link}"]
				end
			end
		end

		def save
			return unless can_save?
			filename = output_filename
			if !filename.exist? or filename.mtime != last_modified
				filename.open('w') {|f| f.print(eval_rhtml)}
				filename.utime(last_modified, last_modified)
			end
		end

		protected
		def output_component_name
			dir = @dest + output_component_dir
			name = output_component_base
			FileUtils.mkdir_p(dir.to_s, :mode => 0755)
			filename = dir + "#{name}.html"
			[dir, name, filename]
		end

		def mode
			self.class.to_s.split(/::/).last.downcase
		end

		def cookie_name; ''; end
		def cookie_mail; ''; end

		def load_plugins
			result = super
			@plugin.instance_eval(<<-EOS, __FILE__, __LINE__ + 1)
				def anchor( s )
					case s
					when /\\A(\\d+)#?([pct]\\d*)?\\z/
						day = $1
						anchor = $2
						if /\\A(\\d{4})(\\d{2})(\\d{2})?\\z/ =~ day
							day = [$1, $2, $3].compact
							day = day.collect {|component| component.to_i.to_s}
							day = day.join("/")
						end
						if anchor then
							"\#{day}.html#\#{anchor}"
						else
							"\#{day}.html"
						end
					when /\\A(\\d{8})-\\d+\\z/
						@conf['latest.path'][$1]
					else
						""
					end
				end

				def category_anchor(category)
					href = "category/\#{u category}.html"
					if @category_icon[category] and !@conf.mobile_agent?
						%Q|<a href="\#{href}"><img class="category" src="\#{h @category_icon_url}\#{h @category_icon[category]}" alt="\#{h category}"></a>|
					else
						%Q|[<a href="\#{href}">\#{h category}</a>]|
					end
				end

				def navi_admin
					""
				end

				@image_dir = #{@image_dest_dir.to_s.dump}
				@image_url = "#{@conf.base_url}#{@image_dest_dir.basename}"
			EOS
			result
		end

		private
		def setup_cgi(cgi, conf)
		end
	end

	class Day < TDiary::TDiaryDay
		include Base

		def initialize(diary, dest, conf)
			@target_date = diary.date
			@target_diaries = {@target_date.strftime("%Y%m%d") => diary}
			super("day.rhtml", dest, conf)
		end

		def can_save?
			not @diary.nil?
		end

		def output_filename
			dir, name, filename = output_component_name
			filename
		end

		def [](date)
			@target_diaries[date.strftime("%Y%m%d")] or super
		end

		def relative_path
			"../../"
		end

		private
		def output_component_dir
			Pathname(@target_date.strftime("%Y")) + @target_date.month.to_s
		end

		def output_component_base
			@target_date.day.to_s
		end

		def setup_cgi(cgi, conf)
			super
			cgi.params["date"] = [@target_date.strftime("%Y%m%d")]
		end
	end

	class Month < TDiary::TDiaryMonth
		include Base
		def initialize(date, dest, conf)
			@target_date = date
			super("month.rhtml", dest, conf)
		end

		def can_save?
			not @diary.nil?
		end

		def output_filename
			dir, name, filename = output_component_name
			filename
		end

		def relative_path
			"../"
		end

		private
		def output_component_dir
			@target_date.strftime("%Y")
		end

		def output_component_base
			@target_date.month.to_s
		end

		private
		def setup_cgi(cgi, conf)
			super
			cgi.params["date"] = [@target_date.strftime("%Y%m")]
		end
	end

	class Category < TDiary::TDiaryView
		include Base

		def initialize(category, diaries, dest, conf)
			@category = category
			diaries = diaries.reject {|date, diary| !diary.visible?}
			_, diary = diaries.sort_by {|date, diary| diary.last_modified}.last
			@target_date = diary.date
			super("latest.rhtml", dest, conf)
			@diaries = diaries
			@diary = diary
		end

		def can_save?
			not @diary.nil?
		end

		def output_filename
			category_dir = @dest + "category"
			category_dir.mkpath
			category_dir + "#{@category}.html"
		end

		def relative_path
			"../"
		end

		def latest(limit=5)
			@diaries.keys.sort.reverse_each do |date|
				diary = @diaries[date]
				yield(diary)
			end
		end

		protected
		def setup_cgi(cgi, conf)
			super
			cgi.params["date"] = [@target_date.strftime("%Y%m")]
		end
	end

	class Latest < TDiary::TDiaryLatest
		include Base

		def initialize(date, index, dest, conf)
			@target_date = date
			@index = index
			super("latest.rhtml", dest, conf)
		end

		def relative_path
			if @index.zero?
				""
			else
				"../"
			end
		end

		def can_save?
			true
		end

		def output_filename
			if @index.zero?
				@dest + "index.html"
			else
				latest_dir = @dest + "latest"
				FileUtils.mkdir_p(latest_dir.to_s, :mode => 0755)
				latest_dir + "#{@index}.html"
			end
		end

		protected
		def setup_cgi(cgi, conf)
			super
			return if @index.zero?
			date = @target_date.strftime("%Y%m%d") + "-#{conf.latest_limit}"
			cgi.params["date"] = [date]
		end
	end

	class RSS < TDiary::TDiaryLatest
		include Base

		def initialize(dest, conf)
			super("latest.rhtml", dest, conf)
		end

		def mode
			"latest"
		end

		def relative_path
			""
		end

		def can_save?
			true
		end

		def output_filename
			@dest + output_base_name
		end

		def output_base_name
			"index.rdf"
		end

		def do_eval_rhtml(prefix)
			load_plugins
			make_rss
		end

		private
		def make_rss
			base_uri = @conf['html_archiver.base_url'] || @conf.base_url
			rss_uri = base_uri + output_base_name

			@conf.options['apply_plugin'] = true
			feed = ::RSS::Maker.make("1.0") do |maker|
				setup_channel(maker.channel, rss_uri, base_uri)
				setup_image(maker.image, base_uri)

				@diaries.keys.sort.reverse[0, 15].each do |date|
					diary = @diaries[date]

					maker.items.new_item do |item|
						setup_item(item, diary, base_uri)
					end
				end
			end

			feed.to_s
		end

		def setup_channel(channel, rss_uri, base_uri)
			channel.about = rss_uri
			channel.link = base_uri
			channel.title = @conf.html_title
			channel.description = @conf.description
			channel.dc_creator = @conf.author_name
			channel.dc_rights = @conf.copyright
		end

		def setup_image(image, base_uri)
			return if @conf.banner.nil?
			return if @conf.banner.empty?

			if /^http/ =~ @conf.banner
				rdf_image = @conf.banner
			else
				rdf_image = base_uri + @conf.banner
			end

			maker.image.url = rdf_image
			maker.image.title = @conf.html_title
			maker.link = base_uri
		end

		def setup_item(item, diary, base_uri)
			section = nil
			diary.each_section do |_section|
				section = _section
				break if section
			end
			return if section.nil?

			item.link = base_uri + @plugin.anchor(diary.date.strftime("%Y%m%d"))
			item.dc_date = diary.last_modified
			@plugin.instance_variable_set("@makerss_in_feed", true)
			subtitle = section.subtitle_to_html
			body_enter = @plugin.send(:body_enter_proc, diary.date)
			body = @plugin.send(:apply_plugin, section.body_to_html)
			body_leave = @plugin.send(:body_leave_proc, diary.date)
			@plugin.instance_variable_set("@makerss_in_feed", false)

			subtitle = @plugin.send(:apply_plugin, subtitle, true).strip
			subtitle.sub!(/^(\[([^\]]+)\])+ */, '')
			description = @plugin.send(:remove_tag, body).strip
			subtitle = @conf.shorten(description, 20) if subtitle.empty?
			item.title = subtitle
			item.description = description
			item.content_encoded = body
			item.dc_creator = @conf.author_name
			section.categories.each do |category|
				item.dc_subjects.new_subject do |subject|
					subject.content = category
				end
			end
		end
	end

	class Main < TDiary::TDiaryBase
		include Image

		def initialize(cgi, dest, conf, src=nil)
			super(cgi, nil, conf)
			calendar
			@dest = dest
			@src = src || './'
			init_image_dir
		end

		def run
			@date = Time.now
			load_plugins
			copy_images

			all_days = archive_days
			archive_categories
			archive_latest(all_days)

 			make_rss
			copy_theme
		end

		private
		def copy_images
			image_src_dir = @plugin.instance_variable_get("@image_dir")
			image_src_dir = Pathname(image_src_dir)
			unless image_src_dir.absolute?
				image_src_dir = Pathname(@src) + image_src_dir
			end
			@image_dest_dir.rmtree if @image_dest_dir.exist?
			if image_src_dir.exist?
				FileUtils.cp_r(image_src_dir.to_s, @image_dest_dir.to_s)
			end
		end

		def archive_days
			all_days = []
			@years.keys.sort.each do |year|
				@years[year].sort.each do |month|
					month_time = Time.local(year.to_i, month.to_i)
					month = Month.new(month_time, @dest, conf)
 					month.save
					month.send(:each_day) do |diary|
						all_days << diary.date
 						Day.new(diary, @dest, conf).save
					end
				end
			end
			all_days
		end

		def archive_categories
			cache = @plugin.instance_variable_get("@category_cache")
			cache.categorize([], @years).each do |category, diaries|
				categorized_diaries = {}
				diaries.keys.each do |date|
					date_time = Time.local(*date.scan(/^(\d{4})(\d\d)(\d\d)$/)[0])
					@io.transaction(date_time) do |diaries|
						categorized_diaries[date] = diaries[date]
						DIRTY_NONE
					end
				end
 				Category.new(category, categorized_diaries, @dest, conf).save
			end
		end

		def archive_latest(all_days)
			conf["latest.path"] = {}

			latest_days = []
			all_days.reverse.each_slice(conf.latest_limit) do |days|
				latest_days << days
			end

			latest_days.each_with_index do |days, i|
				date = days.first.strftime("%Y%m%d")
				if i.zero?
					latest_path = "./"
				else
					latest_path = "latest/#{i}.html"
				end
				conf["latest.path"][date] = latest_path
			end
			latest_days.each_with_index do |days, i|
				latest = Latest.new(days.first, i, @dest, conf)
				latest.save
				conf["ndays.prev"] = nil
				conf["ndays.next"] = nil
			end
		end

		def make_rss
			RSS.new(@dest, conf).save
		end

		def copy_theme
			theme_dir = @dest + "theme"
			theme_dir.rmtree if theme_dir.exist?
			theme_dir.mkpath
			tdiary_theme_dir = Pathname(File.join(TDiary::PATH, "theme"))
			FileUtils.cp((tdiary_theme_dir + "base.css").to_s, theme_dir.to_s)
			if @conf.theme
				FileUtils.cp_r((tdiary_theme_dir + @conf.theme).to_s,
									(theme_dir + @conf.theme).to_s)
			end
		end
	end
end

cgi = HTMLArchiver::CGI.new
conf = TDiary::Config.new(cgi)
conf.show_comment = true
conf.hide_comment_form = true
def conf.bot?; false; end
output_dir ||= Pathname(conf.data_path) + "cache" + "html"
output_dir = Pathname(output_dir).expand_path
output_dir.mkpath
HTMLArchiver::Main.new(cgi, output_dir, conf, options.conf_dir).run