#!/usr/bin/perl
#error check typed in code from Tim Hartnell's Giant Book of Computer Games
#and optionally convert to CoCo 2 code
#Jerry Stratton hoboes.com/coco

use Text::ParseWords;
use List::Util qw(max);

$dataCounter = 0;
$verifyLineNumbering = 1;
$lineIncrement = 10;
$deleteMeText = 'XXDELETE MEXX';
$screenWidth = 32;
$maxLineLength = 255;
@reservedWords = ('ABS', 'AS', 'ASC', 'ATN', 'COS', 'EOF', 'EXP', 'INT', 'JOYSTK', 'LEN', 'LOG', 'MEM', 'PEEK', 'POINT', 'POS', 'PPOINT', 'RND', 'SGN', 'SIN', 'SQR', 'TAN', 'TIMER', 'USR', 'VAL', 'VARPTR');
$reservedWords = '((' . join('|', @reservedWords) . ')[A-Z0-9]*)';
@simpleStatements = ('CLEAR', 'CLOSE', 'CLS', 'CONT', 'DEF', 'DIM', 'END', 'EXEC', 'FOR', 'GOSUB', 'GOTO', 'IF', 'INPUT', 'LET', 'ON', 'OPEN', 'POKE', 'PRINT', 'READ', 'REM', 'RESTORE', 'RETURN', 'STOP');
$simpleStatements = '(' . join('|', @simpleStatements) . ')';
$space = ' ';

while ($option = shift) {
	if ($option =~ /^--compress$/) {
		if ($ARGV[0] =~ /^[1-3]$/) {
			$compress = shift;
		} else {
			$compress = 1;
		}
		$space = '';
	} elsif ($option =~ /^--nococo$/) {
		$noConversion = 1;
	} elsif ($option =~ /^--shift-tabs$/) {
		help() if $ARGV[0] !~ /^[1-9][0-9]*$/;
		$tabShift = shift;
	} elsif ($option =~ /^--uneven-lines$/) {
		$verifyLineNumbering = 0;
	} elsif ($option =~ /^--help$/) {
		help();
	} else {
		$files[$#files+1] = $option;
	}
}
@ARGV = @files;

$previousLineNumber = 0;
while (<>) {
	next if /^$/;
	next if /^#/;
	die("Cannot parse line $_") if !/^([0-9]+) (.*)$/;
	($lineNumber, $line) = ($1, $2);

	#line not in even increments
	fatal("bad line number (use --uneven-lines to ignore)") if $verifyLineNumbering && $previousLineNumber != $lineNumber-$lineIncrement;
	#line too long
	fatal("is too long: $_") if length($_) > $maxLineLength;
	#duplicate line
	fatal("has already appeared") if exists $validLines{int($lineNumber)};
	#mismatched quotes
	$quoteCount = () = $line =~/"/g;
	fatal("has an odd number of quotes") if $quoteCount % 2;

	#keep a list of line numbers
	$validLines{int($lineNumber)} = 1;

	#keep track of variables that needed to be renamed due to starting with a reserved word
	collateReservedWordVariables($line);

	#keep track of how many DATA elements are necessary to reach each line number
	countData($lineNumber, $1) if $line =~ /^DATA (.+)$/;

	$code[$#code+1] = [$lineNumber, $line];
	$previousLineNumber = $lineNumber;
}

exit if $noConversion;

$printOnlyLines = 'PRINT ?"([^"]+)\"(;)?';
while (shiftCode()) {
	while ($line =~ s/^(?:PRINT:)?($printOnlyLines)$/$1/) {
		reformatPrint();
	}

	#per-line error checking
	#mismatched parentheses
	$openParenCount = () = $line =~ /\Q(/g;
	$closeParenCount = () = $line =~ /\Q)/g;
	fatal("has mismatched parentheses") if $openParenCount != $closeParenCount;
	#commas, semicolons, or quotes instead of colons
	fatal("mistyped colon: $line") if $line =~ /[,;'"][A-Z][A-Z0-9]?=/;
	#missing colon
	fatal("missing colon: $line") if $line =~ /[0-9][A-Z][A-Z0-9]?\$?=/;
	#mistyped equal sign
	fatal("mistyped equal: $line") if $line =~ /^[A-Z][A-Z0-9]*\$?\+/;
	$line = reformatInlinePrints($line);
	$line = '' if $line eq 'PRINT' && $previousLine eq 'CLS';
	$line = "REM REMOVED FULL LINE" if !$line && $lineNumber;

	addLine($lineNumber, $line) if $lineNumber;
	$previousLine = $line;
}

#pause after too many sequential PRINT statements for the CoCo
@lineNumbers = sort {$a <=> $b} keys %convertedLines;
while ($lineNumber = shift @lineNumbers) {
	$line = $convertedLines{$lineNumber};

	$linePrintCount = () = $line =~ /(PRINT|INPUT)/g;
	if ($line =~ /(CLS|RETURN|INKEY\$)/) {
		$printCount = 0;
	} elsif ($linePrintCount+$printCount >=15) {
		$delayLine = int((max(keys %validLines)+1000)/1000)*1000;
		$line = "GOSUB $delayLine:$line";
		$printCount = 0;
	} elsif ($linePrintCount == 0) {
		#attempt to not put delays in for unrelated print statements
		$sequentialNoPrints++;
		$printCount = 0 if $sequentialNoPrints > 5;
	}
	$printCount += $linePrintCount;

	if ($compress) {
		#remove remarks
		$line =~ s/^REM.*/REM/;
		$line =~ s/:REM.*$//;
		#if this line is an empty REM and not referenced, skip it
		next if $line =~ /^REM$/ && !$goLines{$lineNumber};

		#numeric-only data lines can have zeroes replaced with nothing
		if ($line =~ /^DATA(.*)$/) {
			$data = $1;
			if ($data !~ /"/) {
				$data =~ s/, +/,/g;
				$data =~ s/\b0\b//g;
			}
			$line = "DATA$data";
		}

		#try combining lines
		#if the current line is referenced, it cannot be combined
		if ($goLines{$lineNumber}) {
			printCombinedLine();
		#if the length would be longer than $maxLineLength, it cannot be combined
		} elsif (length("$combinedLineNumber $combinedLine:$line") > $maxLineLength) {
			printCombinedLine();
		} elsif ($combinedLine =~ /^DATA/ && $line =~ /^DATA(.*)$/) {
			$data = $1;
			if (length("$combinedLineNumber $combinedLine,$data") > $maxLineLength) {
				printCombinedLine();
			} else {
				$combinedLine .= ",$data";
				$line = '';
			}
		} elsif ($combinedLine =~ /^DATA/) {
			printCombinedLine();
		}

		combineLine($lineNumber, $line);
		#if this line contains a statement that will cause subsequent lines to not be reached, finish the combined line
		#technically, GOTO and RETURN shouldn't be necessary, because the subsequent line should be referenced or it will never
		#be reached anyway
		printCombinedLine() if $line =~ /(GOTO|IF|REM|RETURN)/;
	} else {
		print "$lineNumber $line\n";
	}
}
printCombinedLine();

if ($delayLine) {
	#find an unused string variable
	$delayVariable = 'Z';
	$delayVariableStart = '';
	while ($stringVariables{"$delayVariableStart$delayVariable"}) {
		$delayVariable = chr(ord($delayVariable)-1);
		if ($delayVariable lt 'A') {
			$delayVariable = 'Z';
			if ($delayVariableStart) {
				$delayVariableStart = chr(ord($delayVariableStart)-1);
				die("Unable to find delay variable") if $delayVariableStart lt 'A';
			} else {
				$delayVariableStart = 'Z';
			}
		}
	}
	$delayVariable = "$delayVariableStart$delayVariable";
	print "$delayLine PRINT\@32*15,\"       enter TO CONTINUE\";:$delayVariable\$=INKEY\$:IF $delayVariable\$=\"\" THEN $delayLine ELSE CLS:RETURN\n";
	warning("Added delay subroutine for long pages of text at line $delayLine");
}

sub printCombinedLine {
	return if !defined $combinedLineNumber;

	$combinedLine =~ s/"$//;
	print "$combinedLineNumber $combinedLine\n";
	undef $combinedLineNumber;
	$combinedLine = '';
}

sub combineLine {
	my $currentLineNumber = shift;
	my $currentLine = shift;
	return if $currentLine =~ /^$/;

	$combinedLineNumber = $currentLineNumber if !defined $combinedLineNumber;
	$combinedLine .= ':' if $combinedLine ne '';
	$combinedLine .= $line;
}

sub shiftCode {
	my $lineHasValue = @code ? 1 : 0;
	my $codeParts = shift @code;
	my ($number, $statements) = @$codeParts;
	$statements = uc($statements);

	#combine print statements into one long string for later reformatting
	$lineNumber = $number;

	my $previousStatement;
	
	my @quoteSides = split('"', $statements);
	my @statements = ();
	my $followsQuote = 0;
	while (my $statementSide = shift @quoteSides) {
		my $quoteSide = shift @quoteSides;
		$quoteSide = compressText($quoteSide);
		my @substatements = ();
		foreach my $statement (split(/ *: */, $statementSide)) {
			$substatements[$#substatements+1] = statementConversions($statement, $previousStatement, $followsQuote);
			$previousStatement = $statement;
			$followsQuote = 0;
		}

		#if quoted text exists, it must be appended to the final statement
		$substatements[$#substatements] .= '"' . $quoteSide . '"' if @substatements && ($quoteSide ne undef || @quoteSides || substr($substatements[$#substatements], -1) eq '=');
		#any statement in the previous iteration was the end of a quote
		#which means the first statement in this iteration is a continuation of the final statement in the previous iteration
		$statements[$#statements] .= shift @substatements if $#statements >= 0;
		#append the new statements into any previous ones on this line
		@statements = (@statements, @substatements);
		$followsQuote = 1;
	}

	$line = join(':', @statements);

	#remove extra blank PRINT lines, to reduce over-scrolling
	$line =~ s/(PRINT:){2,}/PRINT:/g;
	$line =~ s/(PRINT:){1,}PRINT$/PRINT/;
	$line =~ s/CLS:(PRINT:)*/CLS:/;
	$line =~ s/CLS:PRINT$/CLS/;

	#ECB requires a semicolon after INPUT statements, not commas
	$line =~ s/(INPUT *"[^"]*"),/$1;/g;

	#get rid of statements that need deleting
	$line =~ s/:$deleteMeText//g;

	return $lineHasValue;
}

#abbreviate and otherwise compress text as much as possible
sub compressText {
	my $text = shift;

	#common contractions
	if ($compress > 1) {
		$text =~ s/CANNOT/CAN'T/g;
		$text =~ s/(ARE|DID|DO|DOES|IS|WOULD) NOT\b/$1N'T/g;
		$text =~ s/YOU HAVE\b/YOU'VE/g;
		$text =~ s/  */ /g;
		$text =~ s/ ?- ?/-/g;
		$text =~ s/ *\.\.\.* */../g;
		$text =~ s/\bAND\b/&/g;
	}

	#these start to get a bit ugly
	if ($compress > 2) {
		$text =~ s/ ARE\b/'RE/g;
		$text =~ s/^ARE\b/'RE/g;
		$text =~ s/ (HAS|IS)\b/'S/g;
		$text =~ s/^(HAS|IS)\b/'S/g;
		$text =~ s/ALTHOUGH/THOUGH/g;
		$text =~ s/CONGRATULATIONS/CONGRATS/g;
		$text =~ s/ '(R|S)/'$1/g;
		$text =~ s/S'RE/S'R/g;
	}

	return $text;
}

#this looks at the statement before any quoted text
sub statementConversions {
	my $statement = shift;
	my $previousStatement = shift;
	my $followsQuote = shift;

	#There is no DEFINT in ECB
	if ($statement =~ m/^DEFINT ([A-Z][A-Z0-9]?-[A-Z][A-Z0-9]?)$/) {
		warning("No DEFINT in ECB; ensure that variables $1 are integers");
		return '';
	}

	#PCBASIC TAB(1) is the first position, COCO TAB(1) moves over one position
	$statement =~ s/^PRINT( )?TAB\(([1-9][0-9]*)\)/shiftTab($2)/e;
	#randomization is handled differently on CoCo than on PCBASIC
	$statement =~ s/\bRND(\([01]\)|\b)/RND(0)/g;
	#if there's a variable, use its negative
	$statement =~ s/^RANDOMIZE *([A-Z][A-Z0-9]?)$/ZQ=RND(-$1)/;
	#if it's more complicated, just replace it with -TIMER
	$statement =~ s/^RANDOMIZE.*/ZQ=RND(-TIMER)/;
	#RESTORE can RESTORE to a line number in PCBASIC
	$statement =~ s/^RESTORE ?([1-9][0-9]*)$/fakeRestore($1)/e;
	#reserved words can't be used at the start of variable names
	foreach my $reserved (keys %reservedWordVariables) {
		my $replacement = $reservedWordVariables{$reserved};
		$statement =~ s/$reserved/$replacement/;
	}

	#data lines can't have anything after them
	$statement = $deleteMeText if $statement =~ /^REM/ && $previousStatement =~ /^DATA/;

	#look for bad line numbers
	if ($statement =~ /(GOSUB|GOTO|THEN) +([0-9, ]+)/) {
		$goNumbers = $2;
		$goNumbers =~ s/ +//g;
		foreach $goNumber (split(',', $goNumbers)) {
			fatal("$statement goes to a non-existent line") if !$validLines{int($goNumber)};
			$goLines{$goNumber} = 1;
		}
	}
	#GOSUB and GOTO require spaces in PCBASIC; THEN probably does, too
	fatal("$statement is missing a space before the number") if $statement =~ /(GOSUB|GOTO|THEN)[0-9]/;

	#statement follows quote
	if ($followsQuote) {
		fatal("$1 statement in $statement follows a quote rather than a colon") if $statement =~ /^$simpleStatements/;
	}

	#try to remember string variable names in case we need to add a wait routine
	if ($statement =~ /\b([A-Z][A-Z0-9]*)\$ *=/ && $1 ne 'INKEY') {
		my $variable = substr($1, 0, 2);
		$stringVariables{$variable} = 1;
	}

	if ($compress) {
		$statement =~ s/ *(,|AND|DATA|DIM|ELSE|GOSUB|GOTO|IF|INPUT|NEXT|ON|OR|PRINT|READ|STEP|TO) */$1/g;
		#THEN needs special handling because it needs a space after letter variables
		$statement =~ s/([^A-Z]) +THEN */$1THEN/g;
		$statement =~ s/^ *THEN */THEN/;
		$statement =~ s/THEN */THEN/g;
		$statement =~ s/THEN(GOSUB|GOTO)/$1/g;
	}

	return $statement;
}

sub warning {
	my $message = shift;
	print STDERR "On line $lineNumber, " if $lineNumber;
	print STDERR "$message.\n";
}

sub fatal {
	my $message = shift;
	die("FATAL ERROR ON LINE $lineNumber $message");
}

#subtract one from tab locations
sub shiftTab {
	my $tab = shift;
	$tab--;
	$tab -= $tabShift if $tabShift;
	return "PRINT TAB($tab)";
}

#rewrap multi-line PRINT statements
sub reformatPrint {
	my $printString = '';
	my $startLine = $lineNumber;
	my $endLine = $lineNumber;
	my $inputAtEnd = '';

	while ($line =~ /^$printOnlyLines$/) {
		$printString .= "$1 ";
		$semicolonAtEnd = $2;
		$endLine = $lineNumber;
		shiftCode();
	}

	if ($line =~ /^INPUT ?"([^"]+)";(.*$)/) {
		$printString .= "$1 ";
		$inputAtEnd = $2;
		$endLine = $lineNumber;
		shiftCode();
	}
	chop $printString;
	$printString =~ s/  / /g;
	$spaceAtEnd = $printString =~ m/ $/;
	$printString = compressText($printString);

	my @words = split(' ', $printString);
	my @printLines = ();
	$printString = '';
	while ($word = shift @words) {
		if (length("$printString $word") <= $screenWidth) {
			$printString .= ' ' if $printString;
			$printString .= $word;
		} else {
			$printString = formatTooLongWord($printString);
			$printLines[$#printLines+1] = $printString if $printString;
			$printString = $word;
		}
	}
	$printLines[$#printLines+1] = $printString;

	my $printLine = $startLine;
	foreach $printString (@printLines) {
		my $code = "PRINT${space}\"$printString\"";
		$code .= ';' if length($printString) == $screenWidth;
		addLine($printLine, $code);
		$lastAddedLine = $printLine;
		$printLine++;
	}
	$convertedLines{$lastAddedLine} =~ s/"(;)?$/ "$1/ if $spaceAtEnd;
	if ($inputAtEnd) {
		$convertedLines{$lastAddedLine} .= ';' if $printCode[$#printCode] !~ /;$/;
		addLine($printLine, "INPUT $inputAtEnd");
	} elsif ($semicolonAtEnd) {
		$convertedLines{$lastAddedLine} .= ';'
	}
}

sub addLine {
	my $lineNumber = shift;
	my $lineCode = shift;

	fatal("line $lineNumber already exists") if $convertedLines{$lineNumber};
	$convertedLines{$lineNumber} = $lineCode;
}

sub reformatInlinePrints {
	my $line = shift;

	while ($line =~ s/PRINT ?"([^"]+)":PRINT ?"([^"]+)"/PRINT "$1 $2"/) {};
	$line =~ s/PRINT "([^"]+)"/spaceLongPrint($1)/ge;
	return $line;
}

sub formatTooLongWord {
	my $text = shift;

	#if it's just a divider line, cut it off to the screenwidth
	$text = substr($text, 0, $screenWidth-1) if $text =~ /^[_*-]+$/;
	return $text;
}

sub spaceLongPrint {
	my $line = shift;
	return "PRINT${space}\"$line\"" if length($line) <= $screenWidth;
	my @words = split(' ',$line);
	my $printString = '';
	my $printLine = '';
	while (my $word = shift @words) {
		if (!$printString || length("$printString $word") <= $screenWidth) {
			$printString .= ' ' if $printString;
			$printString .= $word;
			$printString = formatTooLongWord($printString) if length($printString)>$screenWidth;
		} else {
			$printLine .= ':' if $printLine;
			if (length($printString) > $screenWidth) {
				$word = substr($printString, $screenWidth) . ' ' . $word;
				$printString = substr($printString, 0, $screenWidth);
			}
			$printLine .= "PRINT${space}\"$printString\"";
			$printLine .= ';' if length($printString) == $screenWidth;
			$printString = $word;
		}
	}
	$printLine .= ':' if $printLine && $printLine !~ /:$/;
	$printLine .= "PRINT${space}\"$printString\"";
	return $printLine;
}

sub countData {
	my $lineNumber = shift;
	my $data = shift;

	@data = quotewords(",",1,$data);
	$dataCounters{$lineNumber} = $dataCounter;
	$dataCounter += scalar @data;
}

sub fakeRestore {
	my $restoreLine = shift;

	fatal("there is no $restoreLine to RESTORE to") if !exists $dataCounters{$restoreLine};

	return "RESTORE:FOR${space}I=1${space}TO${space}$dataCounters{$restoreLine}:READ${space}I\$:NEXT${space}I" if $dataCounters{$restoreLine};
	return "RESTORE";
}

sub collateReservedWordVariables {
	my $line = shift;
	$line =~ s/^[0-9]+ //;
	if ($line =~ /(^|:) *$reservedWords *=/) {
		my $reservedWord = $3;
		my $badVariable = $2;
		fatal("variable is the reserved word $reservedWord") if $reservedWord eq $badVariable;
		if (!$reservedWordVariables{$badVariable}) {
			$okayVariable = substr($badVariable, 0, 2);
			$reservedWordVariables{$badVariable} = $okayVariable;
			warning("Found reserved word $reservedWord at start of variable $badVariable; attempting replacement with $okayVariable");
		}
	}
}

sub help {
	print "$0 [--nococo] [--shift-tabs <xx>] [--help] <files>\n";
	print "\n";
	print "--compress [level]:\tattempt to reduce memory requirements for code; levels are 1, 2, 3\n";
	print "--nococo:\tdo not perform ECB conversion\n";
	print "--shift-tabs:\tshift PRINT TAB statements to the left that amount\n";
	print "--uneven-lines:\tdo not verify that line numbers are in increments of $lineIncrement\n";
	print "--help:\tprint this help text\n";
	print "files:\tPC-BASIC files to convert to Extended Color BASIC\n";
	exit;
}
