#!/usr/bin/perl
# Convert clean HTML from Nisus into HTML suitable for Pandoc
# epubber "Astounding Scripts.html" > content.html
# Jerry Stratton astoundingscripts.com

@codeClasses = ("code_printout", "computer_output");
if ($ARGV[0] eq '--kindle') {
	$format = 'kindle';
	shift;
}

$currentClass = '';
while (<>) {
	#switch to ePub stylesheet
	s!<link href="http://www.hoboes.com/library/css/nisus.css" rel="StyleSheet" media="all" />!<link href="epub.css" rel="StyleSheet" media="all" />!;

	#add epub schema
	s!<html xmlns="http://www.w3.org/1999/xhtml"!<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"!;

	if ($format eq 'kindle') {
		s!<div class="footnote" id="(note_[0-9]+)"><p><sup class="notereference"><a href="#reference_[0-9]+">\*</a></sup>(.*)</p></div>!!;
		$notes{$1} = $2 if $1;
		s!<sup class="notereference" id="reference_[0-9]+"><a href="#(note_[0-9]+)">\*</a></sup>!&insertNote($1)!ge;
	} else {
		#convert footnote sidebars to asides
		s!<div class="footnote" id="(note_[0-9]+)"><p><sup class="notereference"><a href="#reference_[0-9]+">\*</a></sup>(.*)</p></div>!<div epub:type="footnote" id="$1"><p>$2</p></div>!;
		s!<sup class="notereference" id="reference_[0-9]+"><a href="(#note_[0-9]+)">\*</a></sup>!<sup><a epub:type="noteref" href="$1">*</a></sup>!g;
	}

	#collect non-div tags with classes into div tags
	/^(\t*)<([a-z]+) class="([a-z_]+)">(.*)$/;
	$indentation = $1;
	$tag = $2;
	$class = $3;
	$restOfLine = $4;

	#if the class has changed (or gone) print the saved lines in a div tag
	if ($savedLines && $currentClass ne $class) {
		print "$currentIndentation<div class=\"$currentClass\">\n";
		print $savedLines;
		print "$currentIndentation</div>\n";
		$savedLines = "";
	}
	$currentClass = $class;
	$currentIndentation = $indentation;

	if ($class) {
		if ($tag ne "div" && $tag ne "span" && $class ne "table_cell") {
			if (grep(/^$class$/, @codeClasses)) {
				$restOfLine =~ s!</$tag>$!!;
				$restOfLine = "<code>$restOfLine</code></$tag>";
			}
			$newLine = "\t$indentation<$tag>$restOfLine\n";
			$savedLines .= $newLine;
			next;
		}
	}

	print if !/<h[12]>(Contents|Index|Script index|Language index)<\/h[12]>/;
}

sub insertNote {
	my $noteKey = shift;

	if (my $note = $notes{$noteKey}) {
		return " [$note]";
	} else {
		print STDERR "Unable to find note $noteKey\n";
		exit(-1);
	}
}
