#!/usr/bin/perl -w
use strict;
use XML::Twig;

#process.pl
#v1.3
#04/02/2008
#
#Requires XML-Twig library.  For UMD astro it is in astromake
#astroload xml-twig
#Otherwise available in CPAN
#
#
#Copyright 2007-2008 John C. Vernaleo
#vernaleo@astro.umd.edu or john@netpurgatory.com
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#

my $twig=new XML::Twig;
$twig->parsefile("student-list.xml");
my $root=$twig->root;
my @students=$root->children;
my $student;
my %astrograds;
my $name;
my $counter=0;
my $left=0;
my $completed=0;
my $current=0;
my $completetime=0;
my $maxcompletetime=0;
my $mincompletetime=100;
#This should be the same as completed, but since I don't know
#some starting years I can't use everyone to calculate graduation times.
my $completenumber=0;
my $htmlname="full-list.html";
my $texname="student-list.tex";
my $texname2="advisor-list.tex";
my $statsname="stats.txt";

foreach (@students){
    %astrograds=&read_record($_,%astrograds);
}

##############################BY YEAR###################################
open WEBPAGE, ">$htmlname" or die "Cannot open file: $!";
select WEBPAGE;
&page_top;
open TEXFILE, ">$texname" or die "Cannot open file: $!";
select TEXFILE;
&tex_top;
my $currentyear="";

for $name (reverse sort {$astrograds{$a}{'year'} <=> $astrograds{$b}{'year'}} keys %astrograds){
    if($currentyear ne $astrograds{$name}{'year'}){
	$currentyear=$astrograds{$name}{'year'};
	select TEXFILE;
	if($counter){
	    print '\\\\',"\n";
	    print '\end{tabular*}',"\n\n";
	}
	print '\begin{tabular*}{1.0\textwidth}{lccc}',"\n";
	print '\hline',"\n";
	print '\hline',"\n";
	print '{\bf '.$currentyear.'}\\\\'."\n";
	print '\hline',"\n";
    }
    if($astrograds{$name}{'thesis_status'}=~"X"||$astrograds{$name}{'thesis_status'}=~'MS'||$astrograds{$name}{'thesis_status'}=~'ms'){
	$left++;
    }
    if($astrograds{$name}{'thesis_status'}=~/[0-9]/){
	$completed++;
	unless($currentyear==0){
	    $completetime=$completetime+($astrograds{$name}{'thesis_status'}-$currentyear);
	    if(($astrograds{$name}{'thesis_status'}-$currentyear)>=$maxcompletetime){
		$maxcompletetime=$astrograds{$name}{'thesis_status'}-$currentyear;
	    }
	    if(($astrograds{$name}{'thesis_status'}-$currentyear)<=$mincompletetime){
		$mincompletetime=$astrograds{$name}{'thesis_status'}-$currentyear;
	    }
	    $completenumber++;
	}
    }
    select WEBPAGE;
    &html_record(\%{$astrograds{$name}});
    select TEXFILE;
    &tex_record(\%{$astrograds{$name}});
    $counter++;
}

select WEBPAGE;
print "</body>\n</html>\n";
close WEBPAGE;


select TEXFILE;
print '\hline',"\n";
print '\end{tabular*}',"\n\n";
print '\end{center}',"\n";
print 'Last updated on \today.',"\n";
print '\end{document}',"\n";
close TEXFILE;
system "pdflatex $texname>/dev/null";
select STDOUT;
################################Some Stats#############################
my $date=localtime();
$current=$counter-$left-$completed;
sub stat_print{
    print "As of: $date\n";
    print "$counter student records processed.\n";
    print $current." current students.\n";
    printf "%g students left without a PhD. %.2f%%\n",$left,($left/$counter)*100.0;
    printf "%g students completed a PhD. %.2f%%\n",$completed,($completed/$counter)*100.0;
    printf "Average time for completed PhD is %.2f years.\n",$completetime/$completenumber;
    printf "Maximum time to complete PhD is %.2f years.\n",$maxcompletetime;
    printf "Minimum time to complete PhD is %.2f years.\n",$mincompletetime;
}
&stat_print;
open STATS, ">$statsname" or die "Cannot open file: $!";
select STATS;
&stat_print;
select STDOUT;
##############################BY ADVISOR###############################
my @advisorlist;
my $i=0;
#NEED to sort in reverse so empty fields go last.
#sort again at the very end to put things in the correct order.
for $name (reverse sort {$astrograds{$a}{'thesis_advisor'} cmp $astrograds{$b}{'thesis_advisor'}} keys %astrograds){
    unless($i){
	push(@advisorlist,$astrograds{$name}{'thesis_advisor'});
    }
    if($i){
	if($astrograds{$name}{'thesis_advisor'}){
	    if($advisorlist[-1] ne $astrograds{$name}{'thesis_advisor'}){
		push(@advisorlist,$astrograds{$name}{'thesis_advisor'});
	    }
	}
    }
    $i++;
}

my @advisorlist2;
$i=0;
for $name (reverse sort {$astrograds{$a}{'advisor'} cmp $astrograds{$b}{'advisor'}} keys %astrograds){
    unless($i){
	push(@advisorlist2,$astrograds{$name}{'advisor'});
    }
    if($i){
	if($astrograds{$name}{'advisor'}){
	    if($advisorlist2[-1] ne $astrograds{$name}{'advisor'}){
		push(@advisorlist2,$astrograds{$name}{'advisor'});
	    }
	}
    }
    $i++;
}

#There is no way that this is the best way to merge two sorted lists
#while avoiding duplicate entries.  It is easy but scales very poorly.
my $atest;
my $found=0;
#For some reason, the comparison fails on the ? that is the last element
#of the 2nd year project advisors.  The pop removes it.  If there are ever
#no students with a ? it should still be okay since A'Hearn is next on
#the list and he is listed in the thesis advisor list too.
#But this is a terrible solution that should be done better.
pop(@advisorlist2);
foreach $atest (@advisorlist2){
    foreach (@advisorlist){
	if($_=~$atest){
	    $found=1;
	}
    }
    unless($found){
	push(@advisorlist,$atest);
    }
    $found=0;
}

#Put list in correct alphabetical order
@advisorlist=sort @advisorlist;
my $stat2;
my $statp;
open TEXFILE, ">$texname2" or die "Cannot open file: $!";
select TEXFILE;
&tex_top2;
foreach(@advisorlist){
    print '\begin{tabular*}{1.0\textwidth}{lccccc}'."\n";
    print '\hline'."\n".'\hline'."\n";
    print '{\bf '.$_.'}\\\\'."\n";
    print '\hline'."\n";
    for $name (reverse sort {$astrograds{$a}{'year'} <=> $astrograds{$b}{'year'}} keys %astrograds){
	$stat2="";
	$statp="";
	if($astrograds{$name}{'advisor'}){
	    if($astrograds{$name}{'advisor'} eq $_){
		$stat2=$astrograds{$name}{'status'};
	    }
	}
	if($astrograds{$name}{'thesis_advisor'}){
	    if($astrograds{$name}{'thesis_advisor'} eq $_){
		$statp=$astrograds{$name}{'thesis_status'};
	    }
	}
	if(($statp =~ "X")||($statp =~ "x")){
	    $statp=" Left without PhD "
	    }
	if(($statp eq "MS")||($statp eq "ms")){
	    $statp=" Left with Masters "
	    }
	if(($statp eq "No MS")||($statp eq "No ms")){
	    $statp=" Left without Masters "
	    }
	if(($stat2||$statp)){
	    print "$name & $stat2 & $statp & $astrograds{$name}{'email'} & $astrograds{$name}{'firstposition'}\\\\\n";
	}
    }
    print '\hline'."\n";
    print '\\\\'."\n";
    print '\end{tabular*}'."\n";
}
print '\end{center}',"\n";
print 'Last updated on \today.',"\n";
print '\end{document}',"\n";
close TEXFILE;
system "pdflatex $texname2>/dev/null";

##############################SUBROUTINES##############################

sub read_record{
    my $name=$_->first_child('name')->text;
    $astrograds{$name}={
	name=>$name,
	year=>$_->first_child('year')->text,
	email=>$_->first_child('email')->text,
	firstposition=>$_->first_child('firstposition')->text,
	current=>$_->first_child('current')->text,
	advisor=>$_->first_child('secondyear')->first_child('advisor')->text,
	status=>$_->first_child('secondyear')->first_child('status')->text,
	thesis_advisor=>$_->first_child('phd')->first_child('thesis_advisor')->text,
	external_advisor=>$_->first_child('phd')->first_child('external_advisor')->text,
	thesis_status=>$_->first_child('phd')->first_child('thesis_status')->text,
	title=>$_->first_child('phd')->first_child('title')->text,
	misc=>$_->first_child('misc')->text,
    };
    return(%astrograds);
}

sub page_top{
    print "<?xml version='1.0' encoding='iso-8859-1'?>\n";
    print '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'."\n";
    print '"http://www.w3.org/TR/xhtml1/DTD/xhmtl1-strict.dtd">'."\n";
    print '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" >'."\n";
    print "<head>\n";
    print "<title>UMD Astro Graduate Student List</title>\n";
    print "</head>\n";
    print "<body>\n";
}

sub tex_top{
    print '\documentclass[11pt]{article}'."\n";
    print '\usepackage{fullpage}'."\n";
    print '\begin{document}'."\n";
    print '\begin{center}'."\n";
    print '{\huge Students by Year}'."\n";
    print '\begin{tabular*}{1.0\textwidth}{|l|c|c|c|}'."\n";
    print '\hline'."\n";
    print 'Name&PhD&e-mail&misc (Usually first position after leaving)\\\\'."\n";
    print '\hline'."\n";
    print '\end{tabular*}'."\n";
    
}

sub tex_top2{
    print '\documentclass[10pt]{article}'."\n";
    print '\usepackage{fullpage}'."\n";
    print '\begin{document}'."\n";
    print '\begin{center}'."\n";
    print '{\huge Advisor/Student List}'."\n";
    print '\begin{tabular*}{1.0\textwidth}{p{1in}ccccc}'."\n";
    print '\hline'."\n";
    print 'Name&2yp&PhD&e-mail&misc (Usually first position after leaving)\\\\'."\n";
    print '\hline',"\n";
    print '\end{tabular*}',"\n";
    
}

sub html_record{
    my ($record)=@_;
    my %record=%$record;
    print "<hr />\n";
    print "<p>\n";
    print "<b>Name:</b> $record{'name'}\n<br />\n"; 
    print "<b>Incoming year:</b> $record{'year'}\n<br />\n";
    print "<b>Current email address:</b> $record{'email'}\n<br />\n";
    print "<b>First Position:</b> $record{'firstposition'}\n<br />\n";
    print "<b>Current Position:</b> $record{'current'}\n<br />\n";
    print "<b>Second Year Project</b>\n\<br />\n";
    print "</p><pre>          <b>Advisor:</b> $record{'advisor'}</pre><p>\n<br />\n";
    print "</p><pre>          <b>Status:</b> $record{'status'}</pre><p>\n<br />\n";
    print "<b>Thesis</b>\n\<br />\n";
    print "</p><pre>          <b>Advisor:</b> $record{'thesis_advisor'}</pre><p>\n<br />\n";
    print "</p><pre>          <b>External Advisor:</b> $record{'external_advisor'}</pre><p>\n<br />\n";
    print "</p><pre>          <b>Status:</b> $record{'thesis_status'}</pre><p>\n<br />\n";
    print "</p><pre>          <b>Thesis Title:</b> $record{'title'}</pre><p>\n<br />\n";
    print "<b>Misc.:</b> $record{'misc'}\n<br />\n";
    print "</p>\n";
    print "<hr />\n";
}

sub tex_record{
    my ($record)=@_;
    my %record=%$record;
    my $stat;

    print $record{'name'},' & ';
    $stat="";
    if(($record{'status'} =~ "Current")||($record{'status'} =~ "current")){
	$stat=" 2ndyear "
    }
    if(($record{'thesis_status'} =~ "Current")||($record{'thesis_status'} =~ "current")){
	$stat=" current "
    }
    if(($record{'thesis_status'} =~ "X")||($record{'thesis_status'} =~ "x")){
	$stat=" Left without PhD "
    }
    if(($record{'thesis_status'} eq "MS")||($record{'thesis_status'} eq "ms")){
	$stat=" Left with Masters "
    }
    if(($record{'thesis_status'} eq "No MS")||($record{'thesis_status'} eq "No ms")){
	$stat=" Left without Masters "
    }
    if($record{'thesis_status'}=~/[0-9]/){
	$stat=$record{'thesis_status'};
    }
    print $stat;
    print ' & '.$record{'email'}.' & ';
    if($record{'firstposition'}){
	print $record{'firstposition'};
    }
    print '\\\\',"\n";
    
}
