#!/usr/bin/perl
# dump a tokenized CoCo BASIC program as hex, but line-by-line
# Jerry Stratton astoundingscripts.com
use File::Temp qw/tempfile/;

#Extended Color BASIC token codes
%tokens = (
	128, 'FOR',
	129, 'GO',
	130, 'REM',
	131, "'",
	132, 'ELSE',
	133, 'IF',
	134, 'DATA',
	135, 'PRINT',
	136, 'ON',
	137, 'INPUT',
	138, 'END',
	139, 'NEXT',
	140, 'DIM',
	141, 'READ',
	142, 'RUN',
	143, 'RESTORE',
	144, 'RETURN',
	145, 'STOP',
	146, 'POKE',
	147, 'CONT',
	148, 'LIST',
	149, 'CLEAR',
	150, 'NEW',
	151, 'CLOAD',
	152, 'CSAVE',
	153, 'OPEN',
	154, 'CLOSE',
	155, 'LLIST',
	156, 'SET',
	157, 'RESET',
	158, 'CLS',
	159, 'MOTOR',
	160, 'SOUND',
	161, 'AUDIO',
	162, 'EXEC',
	163, 'SKIPF',
	164, 'TAB(',
	165, 'TO',
	166, 'SUB',
	167, 'THEN',
	168, 'NOT',
	169, 'STEP',
	170, 'OFF',
	171, '+',
	172, '-',
	173, '*',
	174, '/',
	175, '^',
	176, 'AND',
	177, 'OR',
	178, '>',
	179, '=',
	180, '<',
	181, 'DEL',
	182, 'EDIT',
	183, 'TRON',
	184, 'TROFF',
	185, 'DEF',
	186, 'LET',
	187, 'LINE',
	188, 'PCLS',
	189, 'PSET',
	190, 'PRESET',
	191, 'SCREEN',
	192, 'PCLEAR',
	193, 'COLOR',
	194, 'CIRCLE',
	195, 'PAINT',
	196, 'GET',
	197, 'PUT',
	198, 'DRAW',
	199, 'PCOPY',
	200, 'PMODE',
	201, 'PLAY',
	202, 'DLOAD',
	203, 'RENUM',
	204, 'FN',
	205, 'USING',
	206, 'DIR',
	207, 'DRIVE',
	208, 'FIELD',
	209, 'FILES',
	210, 'KILL',
	211, 'LOAD',
	212, 'LSET',
	213, 'MERGE',
	214, 'RENAME',
	215, 'RSET',
	216, 'SAVE',
	217, 'WRITE',
	218, 'VERIFY',
	219, 'UNLOAD',
	220, 'DSKINI',
	221, 'BACKUP',
	222, 'COPY',
	223, 'DSKI$',
	224, 'DSKO$',
	'255-128', 'SGN',
	'255-129', 'INT',
	'255-130', 'ABS',
	'255-131', 'USR',
	'255-132', 'RND',
	'255-133', 'SIN',
	'255-134', 'PEEK',
	'255-135', 'LEN',
	'255-136', 'STR$',
	'255-137', 'VAL',
	'255-138', 'ASC',
	'255-139', 'CHR$',
	'255-140', 'EOF',
	'255-141', 'JOYSTK',
	'255-142', 'LEFT$',
	'255-143', 'RIGHT$',
	'255-144', 'MID$',
	'255-145', 'POINT',
	'255-146', 'INKEY$',
	'255-147', 'MEM',
	'255-148', 'ATN',
	'255-149', 'COS',
	'255-150', 'TAN',
	'255-151', 'EXP',
	'255-152', 'FIX',
	'255-153', 'LOG',
	'255-154', 'POS',
	'255-155', 'SQR',
	'255-156', 'HEX$',
	'255-157', 'VARPTR',
	'255-158', 'INSTR',
	'255-159', 'TIMER',
	'255-160', 'PPOINT',
	'255-161', 'STRING$',
	'255-162', 'CVN',
	'255-163', 'FREE',
	'255-164', 'LOC',
	'255-165', 'LOF',
	'255-166', 'MKN$',
	'255-167', 'AS',
);

while ($option = shift) {
	if ($option =~ m/^([1-9][0-9]*)?-([1-9][0-9]*)?$/) {
		($fromLine, $toLine) = ($1, $2);
		help("$toLine is not greater than $fromLine.") if $fromLine ne "" && $toLine ne "" && $fromLine >= $toLine;
		help("no range specified") if $fromLine eq "" && $toLine eq "";
	} elsif ($option =~ m/^([1-9][0-9]*)$/) {
		$singleLines[$#singleLines+1] = $1;
	} elsif ($option eq '--help') {
		help();
	} elsif ($option eq '--raw') {
		$raw = 1;
	} elsif ($option eq '--tokenize') {
		$tokenize = 1;
	} elsif ($option eq '--list') {
		$detokenize = 1;
	} else {
		$files[$#files+1] = $option;
	}
}
@ARGV = @files;

#read the code
$code = do { local $/; <> };
$baseOffset = 9726;

#If the first character is FF and the second two are the length of the file beyond those three characters
#then this file includes the disk flag and overall file length
if (ord(substr($code, 0, 1)) == 255 && bytesToDecimal(substr($code, 1, 2)) == length($code)-3) {
	$code = substr($code, 3);
	$baseOffset += 3;
}

if ($tokenize) {
	$tokenFile = tokenizeCode($code);
	@ARGV = ($tokenFile);
	$code = do { local $/; <> };
}

#maximum line number size
$maxLineNumberWidth = length(maximumLine($code));

#loop through each line of code
$cumulatedOffset = 4;
while (length($code)) {
	#read the next line address
	($nextLineBytes, $nextLine) = byteTwo();
	$nextLine -= ($baseOffset + $cumulatedOffset);
	($lineNumberBytes, $lineNumber) = byteTwo();

	exit if $lineNumber == 0 && $lineNumberBytes eq "";

	if (printableLine($lineNumber)) {
		#start printing line number
		printf "%${maxLineNumberWidth}i\t", $lineNumber;

		#print the code for this line
		$lineCode = substr($code, 0, $nextLine-1);
		$lineCode .= substr($code,$nextLine-1, 1) if $raw;
		if ($detokenize) {
			detokenizeLine($lineCode);
		} else {
			$lineCode = "$nextLineBytes$lineNumberBytes$lineCode" if $raw;
			$address = $cumulatedOffset;
			$address -= 4 if $raw;
			dumpLine($address, $lineCode);
		}
		#and, end the line
		print "\n";
	}

	#prepare for next line by removing the line we just went through
	$code = substr($code, $nextLine);
	$cumulatedOffset += $nextLine+4;
}

#detokenize CoCo Extended Color BASIC
sub detokenizeLine {
	my $code = shift;
	my $inString = 0;
	my $inRemark = 0;
	while (length($code)) {
		my $byte = substr($code, 0, 1, '');
		my $byteCode = ord($byte);
		if ($byteCode == 0) {
			return;
		} elsif (!$inString && $byteCode == 58 && (ord(substr($code, 0, 1)) == 131 || ord(substr($code, 0, 1)) == 132)) {
			#special code for apostrophe remarks and for ELSE statements
			#tokenization always puts a colon in front; detokenization needs to remove it
			next;
		} elsif ($byteCode > 31 && $byteCode < 128) {
			print $byte;
			if ($byte eq '"') {
				$inString = !$inString;
			}
		} elsif (!$inString && !$inRemark) {
			if ($byteCode == 255) {
				$byteCode .= '-' . ord(substr($code, 0, 1, ''));
			}
			if (defined $tokens{$byteCode}) {
				print $tokens{$byteCode};
				$inRemark = 1 if $tokens{$byteCode} eq 'REM' || $tokens{$byteCode} eq "'";
			} else {
				print "{$byteCode}";
			}
		} else {
			#unprintable character inside a string or a remark
			print '¿';
		}
	}
}

#dump line as hex and ASCII
sub dumpLine {
	my $address = shift;
	my $line = shift;

	my $tabs = '';
	my $currentHex = '';
	my $currentASCII = '';
	my $characterCount = 0;
	while (length($line)) {
		my $character = substr($line, 0, 1, '');;
		my $hex = unpack('H*', $character);
		$currentHex .= ' ' if $characterCount;
		$currentHex .= ' ' if $characterCount == 8;
		$currentHex .= $hex;
		if (ord($character) > 31 && ord($character) < 95) {
			#normal lower-end ASCII
			$currentASCII .= $character;
		} elsif (ord($character) >= ord("a") && ord($character) <= ord("z")) {
			#lower-case character
			$currentASCII .= $character;
		} else {
			#unprintable character
			$currentASCII .= '.';
		}
		$characterCount++;
		if ($characterCount >= 16) {
			printDump($address, $tabs, $currentHex, $currentASCII);
			$tabs = "\n\t";
			$currentHex = '';
			$currentASCII = '';
			$characterCount = 0;
			$address += 16;
		}
	}
	if ($characterCount) {
		printDump($address, $tabs, $currentHex, $currentASCII);
	}
}

#print an assembled line of hex and ascii
sub printDump {
	my $address = shift;
	my $tabs = shift;
	my $hex = shift;
	my $ascii = shift;

	$hex .= ' ' x (48-length($hex)) if length($hex) < 48;

	#print line's offset in hex
	print $tabs;
	print decToHex($address, 2);
	print "\t$hex\t$ascii";
}

#byte off the first two characters of $code
sub byteTwo {
	my $bytes = substr($code, 0, 2, '');
	my $number = bytesToDecimal($bytes);
	return $bytes, $number;
}

#convert two-character strings to the two-byte number they represent
sub bytesToDecimal {
	my $bytes = shift;
	return ord(substr($bytes, 0, 1)) * 256 + ord(substr($bytes, 1,1));
}

#get hex value for number
sub decToHex {
	my $decimal = shift;
	my $minimumCharacters = shift;

	my $hexadecimal = sprintf('%X', $decimal);
	$hexadecimal = "0$hexadecimal" if length($hexadecimal) % 2;

	my $extraNeeded = $minimumCharacters - length($hexadecimal)/2;
	$hexadecimal = '00' x $extraNeeded . $hexadecimal if $extraNeeded > 0;

	return $hexadecimal;
}

#get character for hex number
sub hexToChar {
	my $hex = shift;
}

#get the highest line number in the tokenized code
sub maximumLine {
	my $code = shift;

	$address = $baseOffset;
	my $line = 0;
	do {
		my $newLine = bytesToDecimal(substr($code, $address-$baseOffset+2, 2));
		$address = bytesToDecimal(substr($code, $address-$baseOffset, 2));
		$line = $newLine if $address;
	} while ($address);

	return $line;
}

sub tokenizeCode {
	my $code = shift;

	my($basicHandle, $basicFile) = tempfile(UNLINK=>1);
	print $basicHandle $code;
	my($tokenHandle, $tokenFile) = tempfile(UNLINK=>1);
	`/usr/local/bin/decb copy -b -t "$basicFile" "$tokenFile"`;

	return $tokenFile;
}

#if specific lines have been requested, is this one of them?
sub printableLine {
	my $line = shift;

	return 1 if @singleLines && grep(/^$line$/, @singleLines);
	return 0 if $fromLine ne '' && $line < $fromLine;
	return 0 if $toLine ne '' && $line > $toLine;
	return 1 if $fromLine ne '' && $toLine ne '' && $line >= $fromLine && $line <= $toLine;
	return 1 if $fromLine ne '' && $line >= $fromLine;
	return 1 if $toLine ne '' && $line <= $toLine;
	return 0 if @singleLines && !grep(/^$line$/, @singleLines);
	return 1;
}

sub help {
	$message = shift;

	print "$0 [--list] [--raw] <file to dump>\n";
	print "Dump a tokenized Color Computer BASIC file.\n";
	print "\t##:\tdump only this line (may be specified multiple times).\n";
	print "\t##-##:\tdump range of lines (front or back may be left off for unlimited bottom or top of range).\n";
	print "\t--help:\tthis help text\n";
	print "\t--list:\tdetokenize file and list\n";
	print "\t--raw:\tinclude next line address and line number code in dump\n";
	print "\t--tokenize:\ttokenize the file first\n";
	print "\n$message\n" if $message;
	exit;
}
