#!/usr/bin/env ruby

require 'hyph-utf8'

$encoding_data_dir = "data/encodings"
$encodings = ["ec", "qx", "t2a", "lmc", "il2", "il3", "l7x"]

$path_root=File.expand_path("../../..")
$output_data_dir = "#{$path_root}/tex/generic/hyph-utf8/conversions"

# 0x19; U+0131;  1; dotlessi
$encodings.each do |encoding|
	e = Encoding.new(encoding)

	$file_out = File.open("#{$output_data_dir}#{File::Separator}conv-utf8-#{encoding}.tex", "w")
	$file_out.puts "% conv-utf8-#{encoding}.tex"
	$file_out.puts "%"
	$file_out.puts "% Conversion from UTF-8 to #{encoding.upcase},"
	$file_out.puts "% used before loading hyphenation patterns for 8-bit TeX engines."
	$file_out.puts "%"
	$file_out.puts "% This file is part of hyph-utf8 package and autogenerated."
	$file_out.puts "% See http://tug.org/tex-hyphen"
	$file_out.puts "%"
	$file_out.puts "% Copyright 2008 TeX Users Group."
	$file_out.puts "% You may freely use, modify and/or distribute this file."
	$file_out.puts "% (But consider adapting the scripts if you need modifications.)"
	$file_out.puts "%"

	e.unicode_characters_first_byte.sort.each do |first_byte|
		# sorting all the second characters alphabetically
		first_byte[1].sort!{|x,y| x.code_uni <=> y.code_uni }
		# make all the possible first characters active
		# output the definition into file
		$file_out.puts "\\catcode\"#{first_byte[0].upcase}=\\active"
	end
	$file_out.puts "%"
	e.unicode_characters_first_byte.sort.each do |first_byte|
		$file_out.puts "\\def^^#{first_byte[0]}#1{%"
		string_fi = ""
		for i in 1..(first_byte[1].size)
			uni_character = first_byte[1][i-1]
			
			second_byte = uni_character.bytes[1]
			enc_byte    = uni_character.code_enc
			enc_byte    = [ uni_character.code_enc ].pack('c').unpack('H2')
			ux_code     = sprintf("U+%04X", uni_character.code_uni)
			$file_out.puts "\t\\ifx#1^^#{second_byte}^^#{enc_byte}\\else % #{[uni_character.code_uni].pack('U')} - #{ux_code} - #{uni_character.name}"
			string_fi = string_fi + "\\fi"
		end
		$file_out.puts "\t\\errmessage{Hyphenation pattern file corrupted or #{encoding} encoding not supported!}"
		$file_out.puts string_fi+"}"
	end
	$file_out.puts "%"
	$file_out.puts "% ensure all the chars above have valid \lccode values"
	$file_out.puts "%"
	e.lowercase_characters.sort!{|x,y| x.code_enc <=> y.code_enc }.each do |character|
		code = [ character.code_enc ].pack("c").unpack("H2").first.upcase
		# \lccode"FF="FF
		ux_code = sprintf("U+%04X", character.code_uni)
		$file_out.puts "\\lccode\"#{code}=\"#{code} % #{[character.code_uni].pack('U')} - #{ux_code} - #{character.name}"
	end
	$file_out.puts
	
	# if encoding == 'ec'
	# 	$file_out.puts '% TODO: test if needed and the exact syntax'
	# 	$file_out.puts '% some patterns use apostrophe and hyphen'
	# 	# \lccode`\'=`\'
	# 	$file_out.puts "\\lccode`\\'=`\\'"
	# 	$file_out.puts "\\catcode`-=11"
	# 	$file_out.puts
	# end
	# if encoding == 't2a'
	# 	$file_out.puts "\\lccode`\\'=`\\'"
	# end

	$file_out.close
end

