#!/usr/bin/perl -w

use strict;
use English;

use CGI::Pretty ();
use Cwd ();
use DirHandle ();
use File::Copy ();
use File::Path ();
use FileHandle ();
use POSIX ();

# human readable area names
my @areas = ("Analysis of Algorithms",
						 "Artificial Intelligence",
						 "Automata and Formal Languages",
						 "Compilers",
						 "Computer Architecture",
						 "Databases",
						 "Graphics",
						 "Human Computer Interaction",
						 "Logic",
						 "Networks",
						 "Numerical Analysis",
						 "Programming Languages",
						 "Software Systems");

# output HTML file
my $output = "index.html";

# page title
my $title = "Comprehensive Examinations Archive";

# overview summary for top of page
my $overview = <<'EOF';

<p>
Prior to Autumn 2005, the Computer Science Department distributed the
last three years worth of comprehensive examinations on paper as a
study aid. From now on, this information will be distributed online.
</p>

<p>
I have tried to provide useful groupings for downloading. For example,
in addition to providing the individual files for downloading, I
provide several aggregations:
<ul>
<li>A single PDF file per year for easy printing</li>
<li>A single zip file per year for easy downloading</li>
<li>A single zip file per area for focusing on a particular subject</li>
</ul>
</p>

<p>
In many cases, no solutions were provided. If your study group creates
a solution, I will post what I receive.
</p>

<p>
Most content from before 2005 was scanned by <a
href="http://carlstrom.com">Brian Carlstrom</a> on an HP OfficeJet
7410 with an automatic feeder and fed through Adobe Acrobat 6.0 OCR
which also performed DPI reduction post-OCR to reduce file sizes. The
OCR worked reasonably well to allow keyword searches accross PDF
files. We tried to manual verify each scan to make sure there were no
missing pages, but please let us know if you find any problems. We
have also archvied the large original pre-DPI reduction files if there
are serious problems with readability let us know. Going forward we
hope to directly post electronic versions of the files directly
without scanning.
</p>

<p>
When multiple versions of a file are available, by default the
aggregations and links prefer original PDF files, original Microsoft
Word DOC files, and scanned PDF files in that order. If you want to
see if an alternative format exists, you can browse the directories by
year.
</p>

<p>
Good luck with comps!
</p>

<address>
 - J<br>
<a href="http://www.stanford.edu/~jgillula">Jeremy Gillula</a><br>
PhD Program Committee
</address>

<p>
Historical curiosities:
<ul>
<li><a href="CS-TR-78-677.pdf">STAN-CS-TR-78-677: Computer Science Comprehensive Exams: 1974-1978</a></li>
<li><a href="CS-TR-81-869.pdf">STAN-CS-TR-81-869: Computer Science Comprehensive Exams: 1978/79 - 1980/81</a></li>
</ul>
</p>
EOF

my $tex = <<'EOF';
\documentclass{article}
\begin{document}
\begin{tabular}{|l|l|r|}
\hline
\textbf{Section} & \textbf{Faculty} & \textbf{Page} \\ \hline
\hline
\textit{Table of Contents} & & \textit{1} \\ \hline
\hline
%s
\hline
\end{tabular}
\end{document}
EOF

sub main () {
	# build list of subdirectories to process
	my $directory = new DirHandle(".");
	if (!defined($directory)) {
		die("error reading current directory");
	}
	my @subdirectories = ();
	while (my $entry = $directory->read()) {
		if (! -d $entry || $entry eq "." || $entry eq "..") {
	    next;
		}
		push(@subdirectories, $entry);
	}

	my $clean = 0;

	my $numArgs = $#ARGV + 1;
	if ($numArgs eq 1) {
		if($ARGV[0] eq "clean") {
			$clean = 1;
		}
	}



	# reverse sort so we do latest years first
	my @years = sort { $b <=> $a } (@subdirectories);
	
	# generating "By Area" zip files
	foreach my $area (@areas) {
		my $areaPath = areaPath($area);
		my $zipFileName = "$areaPath.zip";
		if (! -f $zipFileName) {
	    mkdirOrDie($areaPath);
	    foreach my $year (@years) {
				my ($examination, $solutions, $faculty, $p, $t, $np, $nf) = examinationAndSolutions($year, $area);
				if (defined($examination)) {
					copyFileOrDie($examination, $areaPath);
				}
				if (defined($solutions)) {
					copyFileOrDie($solutions, $areaPath);
				}
	    }
	    systemOrDie("zip", "-r", $zipFileName, $areaPath);
	    removeDirectoryOrDie($areaPath);
		}
	}
	
	# generate "By Year" files
	foreach my $year (@years) {
		
		# create zipFileContents list first, which is stripped down to create pdfFileContents
		my @zipFileContents = ();
		foreach my $area (@areas) {
	    my ($examination, $solutions, $faculty, $p, $t, $np, $nf) = examinationAndSolutions($year, $area);
	    if (defined($examination)) {
				push(@zipFileContents, $examination);
	    }
	    if (defined($solutions)) {
				push(@zipFileContents, $solutions);
	    }
		}
		if(-f "$year/$year-Faculty.txt") {
			push(@zipFileContents, "$year/$year-Faculty.txt");
		}


		# generate "By Year" zip file if needed
		my $zipFileName = "$year.zip";
		if ((-f $zipFileName) && ($clean eq 1)) {
	    removeFileOrDie($zipFileName);			
		}
		if (! -f $zipFileName) {
	    systemOrDie("zip", $zipFileName, @zipFileContents);
		}
		
		# pdfFileContents is subset of zipFileContents
		my @pdfFileContents = ();
		foreach my $zipFileContent (@zipFileContents) {
	    if (endsWithString($zipFileContent, ".pdf")) {
				push(@pdfFileContents, $zipFileContent);
	    }
		}
		
		# generate "By Year" cover file if needed
		my $coverFileName = "$year/$year-Cover.pdf";
		if ((-f $coverFileName) && ($clean eq 1)) {
	    removeFileOrDie($coverFileName);			
		}
		if (! -f $coverFileName) {
	    my $tableOfContents = "";
	    my $page = 2;
	    foreach my $pdfFileContent (@pdfFileContents) {
				my $area = replaceStringInString("_", " ", 
																				 replaceStringInString($year, "", 
																															 replaceStringInString("/", "", 
																																										 replaceStringInString(".", "", 
																																																					 replaceStringInString(".pdf", "", 
																																																																 replaceStringInString("-", " ", 
																																																																											 $pdfFileContent))))));
				my $tempArea = $area;
				$tempArea =~ s/^\s+//;
				my $facultyMember = "[Unknown]";
				if(-f "$year/$year-Faculty.txt") {
					my ($exam, $sol, $faculty, $p, $t, $np, $nf) = examinationAndSolutions($year, $tempArea);
					if(defined($faculty)) {
						$facultyMember = $faculty;
					}
				}
				if(index($area, "solutions") ne -1) {
					$facultyMember = "";
				}


				$tableOfContents .= "$area & $facultyMember & $page \\\\ \\hline \n";
				$page += `pdftk $pdfFileContent dump_data | grep 'NumberOfPages: ' | sed 's/NumberOfPages: //'`;
	    }
			
	    my $coverFileContents = "$year/$year-Cover.tex";
	    my $latex = writeOrDie($coverFileContents);
	    $latex->printf($tex, $tableOfContents);
	    closeOrDie($latex);
	    systemOrDie("pdflatex", "-output-directory", $year, $coverFileContents);
	    removeFileOrDie($coverFileContents);
		}
		
		# generate "By Year" pdf file if needed
		my $pdfFileName = "$year/$year-Full.pdf";
		if ((-f $pdfFileName) && ($clean eq 1)) {
	    removeFileOrDie($pdfFileName);			
		}
		if (! -f $pdfFileName) {
	    systemOrDie("pdftk", $coverFileName, @pdfFileContents, "cat", "output", $pdfFileName);
		}
	}
	
	# create HTML output file
	my $o = writeOrDie($output);
	my $c = new CGI::Pretty();
	$o->print($c->start_html(-title=>$title));
	
	# output header
	$o->print($c->h1($title));
	
	# output long descriptive overview string
	$o->print($overview);
	
	# output "By Year" section
	$o->print($c->hr());
	$o->print($c->h2("By Year"));
	$o->print($c->start_table());
	foreach my $year (@years) {
		$o->print($c->Tr($c->td([$year, 
														 $c->a({href=>"$year.zip"},            "[13 Areas in one ZIP]"),
														 $c->a({href=>"$year/$year-Full.pdf"}, "[13 Areas in one PDF]")])));
		
	}
	$o->print($c->end_table());
	
	# output "By Area" section
	$o->print($c->hr());
	$o->print($c->h2("By Area"));
	$o->print($c->start_table());
	foreach my $area (@areas) {
		my $areaPath = areaPath($area);
		$o->print($c->Tr($c->td([$area, 
														 $c->a({href=>"$areaPath.zip"}, "[All years in one ZIP]")])));
	}
	$o->print($c->end_table());
	
	# output "Individual Files" section
	$o->print($c->hr());
	$o->print($c->h2("Individual Files"));
	$o->print($c->start_table());
	foreach my $year (@years) {
		
		$o->print($c->h3($year));
		$o->print($c->td($c->a({href=>"$year/"},                "[Browse Directory]")));
		$o->print($c->td($c->a({href=>"$year/$year-Cover.pdf"}, "[Cover Sheet]")));
		$o->print($c->start_table());
		$o->print($c->start_Tr());
		$o->print($c->td($c->strong("Area")));
		$o->print($c->td($c->strong("Faculty")));
		$o->print($c->td($c->strong("Pass/Total")));
		$o->print($c->td($c->strong("Num Pass (Num Fail)")));
		$o->print($c->end_Tr());
		
		
		foreach my $area (@areas) {
	    my ($examination, $solutions, $faculty, $pass, $total, $num_pass, $num_fail) = examinationAndSolutions($year, $area);
			
	    $o->print($c->start_Tr());
	    $o->print($c->td($area));
			if (defined($faculty)) {
				$o->print($c->td("($faculty)"));
			} else {
				$o->print($c->td("[Unknown]"));
			}
			if (defined($pass) || defined($total)) {
				if(defined($pass) && defined($total)){
					$o->print($c->td("$pass/$total"))
				} elsif (defined($pass)) {
					$o->print($c->td("$pass/NA"))
				} else {
					$o->print($c->td("NA/$total"))
				}
			} else {
				$o->print($c->td("NA/NA"))
			}
			if(defined($num_pass) || defined($num_fail)) {
				if(defined($num_pass) && defined($num_fail)) {
					$o->print($c->td("$num_pass ($num_fail)"))
				} elsif(defined($num_pass)) {
					$o->print($c->td("$num_pass (NA)"))
				} else {
					$o->print($c->td("NA ($num_fail)"))
				}
			} else {
				$o->print($c->td("NA (NA)"))
			}
	    if (defined($examination)) {
				$o->print($c->td($c->a({href=>$examination}, "[Examination]")));
	    }
	    else {
				$o->print($c->td("[NA]"));
	    }
	    if (defined($solutions)) {
				$o->print($c->td($c->a({href=>$solutions},   "[Solutions]")));
	    }
	    else {
				$o->print($c->td("[NA]"));
	    }
	    $o->print($c->end_Tr());
		}
		$o->print($c->end_table());
	}
	
	#output footer
	$o->print($c->hr());
	$o->print("Generated by ");
	$o->print($c->a({href=>$PROGRAM_NAME}, $PROGRAM_NAME));
	$o->print("at ");
	$o->print(`date`);
	$o->print($c->hr());
	$o->print($c->end_html());
	closeOrDie($o);
	
	# push local content to web site
	systemOrDie("rv", "www.cs.students");
}

# given a year and exam area, return the exam and solutions files and who the faculty member was
# favor the original documents if presented, otherwise return scanned versions.
# return undef if not found
sub examinationAndSolutions ($$) {
	my ($year, $area) = @_;
	
	my $areaPath = areaPath($area);
	my $prefix = "$year/$year-$areaPath";

	my $facultyList = "$year/$year-Faculty.txt";
	
	my $originalExaminationPDF = "$prefix.pdf";
	my $originalSolutionsPDF   = "$prefix-solutions.pdf";
	my $originalExaminationDOC = "$prefix.doc";
	my $originalSolutionsDOC   = "$prefix-solutions.doc";
	my $scannedExaminationPDF  = "$prefix-scanned.pdf";
	my $scannedSolutionsPDF    = "$prefix-scanned-solutions.pdf";
	
	my $examination = undef;
	if (-f $originalExaminationPDF) {
		$examination = $originalExaminationPDF;
	}
	elsif (-f $originalExaminationDOC) {
		$examination = $originalExaminationDOC;
	}
	elsif (-f $scannedExaminationPDF) {
		$examination = $scannedExaminationPDF;
	}

	my $solutions;
	if (-f $originalSolutionsPDF) {
		$solutions = $originalSolutionsPDF;
	}
	elsif (-f $originalSolutionsDOC) {
		$solutions = $originalSolutionsDOC;
	}
	elsif (-f $scannedSolutionsPDF) {
		$solutions = $scannedSolutionsPDF;
	}

	my $faculty = undef;
	my $pass = undef;
	my $total = undef;
	my $num_pass = undef;
	my $num_fail = undef;
	if (-f $facultyList) {
		($faculty, $pass, $total, $num_pass, $num_fail) = facultyMember($area, $facultyList);
	}
	
	return ($examination, $solutions, $faculty, $pass, $total, $num_pass, $num_fail);
}


# given an exam area and a file, return the who the faculty member was
sub facultyMember ($$) {
	my ($area, $file) = @_;

	open(DAT, $file) || die("Could not open faculty member file!");
	my @raw_data=<DAT>;
	close(DAT);

	my $faculty = undef;
	my $pass = undef;
	my $total = undef;
	my $num_pass = undef;
	my $num_fail = undef;	

	foreach my $line (@raw_data) {
		chomp($line);
		my ($l_area,$l_name, $l_pass, $l_total, $l_num_pass, $l_num_fail)=split(/\|/,$line);
		if ($area eq $l_area) {
			if(length($l_name) gt 0) {
				$faculty = $l_name;
			}
			if(defined($l_pass) && (length($l_pass) gt 0)) {
				$pass = $l_pass;
			}
			if(defined($l_total) && (length($l_total) gt 0)) {
				$total = $l_total;
			}
			if(defined($l_num_pass) && (length($l_num_pass) gt 0)) {
				$num_pass = $l_num_pass;
			}
			if(defined($l_num_fail) && (length($l_num_fail) gt 0)) {
				$num_fail = $l_num_fail;
			}
		}
	}

	return ($faculty, $pass, $total, $num_pass, $num_fail);
}

sub areaPath ($) {
	my ($area) = @_;
	$area =~ s/ /_/g;
	return $area;
}

sub writeOrDie ($) {
	my ($file) = @_;
	my $handle = new FileHandle($file, ">");
	if (!defined($handle)) {
		die("Problem writing file $file from ".Cwd::cwd().": $EXTENDED_OS_ERROR");
	}
	return $handle;
}

sub closeOrDie ($) {
	my ($fileHandle) = @_;
	$fileHandle->close() || die("Problem close file handle $fileHandle: $EXTENDED_OS_ERROR");
}

sub systemOrDie (@) {   
	my (@command) = @_;
	systemOrDieWithExpectedValue(0, @command);
}

sub systemOrDieWithExpectedValue ($@)
	{   
    my ($expected, @command) = @_;
    my $exitCode = systemOrDieForValue(@command);
    if ($exitCode == $expected) {
			return;
    }
    die("Exit code $exitCode running command: @command");
	}

my $debug_system = 0;

sub systemOrDieForValue (@) {
	my (@command) = @_;
	if ($debug_system) {
		print("Running @command\n");
	}
	my $waitCode = system(@command);
	if ($waitCode == -1) {
		die("Problem '$EXTENDED_OS_ERROR' running command: @command");
	}
	if (POSIX::WIFEXITED($waitCode)) {
		my $exitCode = POSIX::WEXITSTATUS($waitCode);
		return $exitCode;
	}
	if (POSIX::WIFSIGNALED($waitCode)) {
		my $signal = POSIX::WTERMSIG($waitCode);
		die("Signal $signal terminated command: @command");
	}
	if (POSIX::WIFSTOPPED($waitCode)) {
		my $signal = POSIX::WSTOPSIG($waitCode);
		die("Signal $signal stopped command: @command");
	}
	die("Unexpected exit of command with result $waitCode: @command");
}

sub mkdirOrDie ($) {
	my ($dir) = @_;
	if ( -d $dir) {
		return;
	}
	if ( -e $dir) {
		die("$dir exists but is not a directory");
	}
	File::Path::mkpath($dir);
	if (! -d $dir) {
		die("failed to create diretory $dir");
	}
}

sub copyFileOrDie ($$) {
	my ($from, $to) = @_;
	File::Copy::copy($from, $to) || die("Problem copying $from to $to: $EXTENDED_OS_ERROR");
}

sub removeFileOrDie ($) {
	my ($file) = @_;
	if (! -e $file) {
		return;
	}
	if (! -f $file && ! -l $file) {
		die("file to remove $file is not a file");
	}
	unlink($file) || die("Problem removing file $file from ".Cwd::cwd().": $EXTENDED_OS_ERROR");
	if (-e $file) {
		die("Problem removing file $file from ".Cwd::cwd());
	}
}

sub removeDirectoryOrDie ($) {
	my ($dir) = @_;
	if (! -e $dir) {
		return;
	}
	if (! -d $dir) {
		die("directory to remove $dir is not a directory");
	}
	File::Path::rmtree($dir) || die("Problem removing directory $dir: $EXTENDED_OS_ERROR");
	if (-e $dir) {
		die("Problem removing directory $dir from ".Cwd::cwd());
	}
}

sub endsWithString ($$) {
	my ($string, $pattern) = @_;
	return endsWithRegexp($string, quotemeta($pattern));
}

sub endsWithRegexp ($$) {
	my ($string, $pattern) = @_;
	return $string =~ m/$pattern$/;
}

sub replaceStringInString ($$$) {
	my ($from, $to, $string) = @_;
	return replaceRegexpInString(quotemeta($from), $to, $string);
}   

sub replaceRegexpInString ($$$) {   
	my ($from, $to, $string) = @_;
	$string =~ s/$from/$to/g;
	return $string;
}

main();
