#!/usr/bin/perl -w use strict; use XML::Twig; #process.pl #v1.3 #04/02/2008 # #Requires XML-Twig library. For UMD astro it is in astromake #astroload xml-twig #Otherwise available in CPAN # # #Copyright 2007-2008 John C. Vernaleo #vernaleo@astro.umd.edu or john@netpurgatory.com # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # my $twig=new XML::Twig; $twig->parsefile("student-list.xml"); my $root=$twig->root; my @students=$root->children; my $student; my %astrograds; my $name; my $counter=0; my $left=0; my $completed=0; my $current=0; my $completetime=0; my $maxcompletetime=0; my $mincompletetime=100; #This should be the same as completed, but since I don't know #some starting years I can't use everyone to calculate graduation times. my $completenumber=0; my $htmlname="full-list.html"; my $texname="student-list.tex"; my $texname2="advisor-list.tex"; my $statsname="stats.txt"; foreach (@students){ %astrograds=&read_record($_,%astrograds); } ##############################BY YEAR################################### open WEBPAGE, ">$htmlname" or die "Cannot open file: $!"; select WEBPAGE; &page_top; open TEXFILE, ">$texname" or die "Cannot open file: $!"; select TEXFILE; &tex_top; my $currentyear=""; for $name (reverse sort {$astrograds{$a}{'year'} <=> $astrograds{$b}{'year'}} keys %astrograds){ if($currentyear ne $astrograds{$name}{'year'}){ $currentyear=$astrograds{$name}{'year'}; select TEXFILE; if($counter){ print '\\\\',"\n"; print '\end{tabular*}',"\n\n"; } print '\begin{tabular*}{1.0\textwidth}{lccc}',"\n"; print '\hline',"\n"; print '\hline',"\n"; print '{\bf '.$currentyear.'}\\\\'."\n"; print '\hline',"\n"; } if($astrograds{$name}{'thesis_status'}=~"X"||$astrograds{$name}{'thesis_status'}=~'MS'||$astrograds{$name}{'thesis_status'}=~'ms'){ $left++; } if($astrograds{$name}{'thesis_status'}=~/[0-9]/){ $completed++; unless($currentyear==0){ $completetime=$completetime+($astrograds{$name}{'thesis_status'}-$currentyear); if(($astrograds{$name}{'thesis_status'}-$currentyear)>=$maxcompletetime){ $maxcompletetime=$astrograds{$name}{'thesis_status'}-$currentyear; } if(($astrograds{$name}{'thesis_status'}-$currentyear)<=$mincompletetime){ $mincompletetime=$astrograds{$name}{'thesis_status'}-$currentyear; } $completenumber++; } } select WEBPAGE; &html_record(\%{$astrograds{$name}}); select TEXFILE; &tex_record(\%{$astrograds{$name}}); $counter++; } select WEBPAGE; print "\n\n"; close WEBPAGE; select TEXFILE; print '\hline',"\n"; print '\end{tabular*}',"\n\n"; print '\end{center}',"\n"; print 'Last updated on \today.',"\n"; print '\end{document}',"\n"; close TEXFILE; system "pdflatex $texname>/dev/null"; select STDOUT; ################################Some Stats############################# my $date=localtime(); $current=$counter-$left-$completed; sub stat_print{ print "As of: $date\n"; print "$counter student records processed.\n"; print $current." current students.\n"; printf "%g students left without a PhD. %.2f%%\n",$left,($left/$counter)*100.0; printf "%g students completed a PhD. %.2f%%\n",$completed,($completed/$counter)*100.0; printf "Average time for completed PhD is %.2f years.\n",$completetime/$completenumber; printf "Maximum time to complete PhD is %.2f years.\n",$maxcompletetime; printf "Minimum time to complete PhD is %.2f years.\n",$mincompletetime; } &stat_print; open STATS, ">$statsname" or die "Cannot open file: $!"; select STATS; &stat_print; select STDOUT; ##############################BY ADVISOR############################### my @advisorlist; my $i=0; #NEED to sort in reverse so empty fields go last. #sort again at the very end to put things in the correct order. for $name (reverse sort {$astrograds{$a}{'thesis_advisor'} cmp $astrograds{$b}{'thesis_advisor'}} keys %astrograds){ unless($i){ push(@advisorlist,$astrograds{$name}{'thesis_advisor'}); } if($i){ if($astrograds{$name}{'thesis_advisor'}){ if($advisorlist[-1] ne $astrograds{$name}{'thesis_advisor'}){ push(@advisorlist,$astrograds{$name}{'thesis_advisor'}); } } } $i++; } my @advisorlist2; $i=0; for $name (reverse sort {$astrograds{$a}{'advisor'} cmp $astrograds{$b}{'advisor'}} keys %astrograds){ unless($i){ push(@advisorlist2,$astrograds{$name}{'advisor'}); } if($i){ if($astrograds{$name}{'advisor'}){ if($advisorlist2[-1] ne $astrograds{$name}{'advisor'}){ push(@advisorlist2,$astrograds{$name}{'advisor'}); } } } $i++; } #There is no way that this is the best way to merge two sorted lists #while avoiding duplicate entries. It is easy but scales very poorly. my $atest; my $found=0; #For some reason, the comparison fails on the ? that is the last element #of the 2nd year project advisors. The pop removes it. If there are ever #no students with a ? it should still be okay since A'Hearn is next on #the list and he is listed in the thesis advisor list too. #But this is a terrible solution that should be done better. pop(@advisorlist2); foreach $atest (@advisorlist2){ foreach (@advisorlist){ if($_=~$atest){ $found=1; } } unless($found){ push(@advisorlist,$atest); } $found=0; } #Put list in correct alphabetical order @advisorlist=sort @advisorlist; my $stat2; my $statp; open TEXFILE, ">$texname2" or die "Cannot open file: $!"; select TEXFILE; &tex_top2; foreach(@advisorlist){ print '\begin{tabular*}{1.0\textwidth}{lccccc}'."\n"; print '\hline'."\n".'\hline'."\n"; print '{\bf '.$_.'}\\\\'."\n"; print '\hline'."\n"; for $name (reverse sort {$astrograds{$a}{'year'} <=> $astrograds{$b}{'year'}} keys %astrograds){ $stat2=""; $statp=""; if($astrograds{$name}{'advisor'}){ if($astrograds{$name}{'advisor'} eq $_){ $stat2=$astrograds{$name}{'status'}; } } if($astrograds{$name}{'thesis_advisor'}){ if($astrograds{$name}{'thesis_advisor'} eq $_){ $statp=$astrograds{$name}{'thesis_status'}; } } if(($statp =~ "X")||($statp =~ "x")){ $statp=" Left without PhD " } if(($statp eq "MS")||($statp eq "ms")){ $statp=" Left with Masters " } if(($statp eq "No MS")||($statp eq "No ms")){ $statp=" Left without Masters " } if(($stat2||$statp)){ print "$name & $stat2 & $statp & $astrograds{$name}{'email'} & $astrograds{$name}{'firstposition'}\\\\\n"; } } print '\hline'."\n"; print '\\\\'."\n"; print '\end{tabular*}'."\n"; } print '\end{center}',"\n"; print 'Last updated on \today.',"\n"; print '\end{document}',"\n"; close TEXFILE; system "pdflatex $texname2>/dev/null"; ##############################SUBROUTINES############################## sub read_record{ my $name=$_->first_child('name')->text; $astrograds{$name}={ name=>$name, year=>$_->first_child('year')->text, email=>$_->first_child('email')->text, firstposition=>$_->first_child('firstposition')->text, current=>$_->first_child('current')->text, advisor=>$_->first_child('secondyear')->first_child('advisor')->text, status=>$_->first_child('secondyear')->first_child('status')->text, thesis_advisor=>$_->first_child('phd')->first_child('thesis_advisor')->text, external_advisor=>$_->first_child('phd')->first_child('external_advisor')->text, thesis_status=>$_->first_child('phd')->first_child('thesis_status')->text, title=>$_->first_child('phd')->first_child('title')->text, misc=>$_->first_child('misc')->text, }; return(%astrograds); } sub page_top{ print "\n"; print ''."\n"; print ''."\n"; print "\n"; print "UMD Astro Graduate Student List\n"; print "\n"; print "\n"; } sub tex_top{ print '\documentclass[11pt]{article}'."\n"; print '\usepackage{fullpage}'."\n"; print '\begin{document}'."\n"; print '\begin{center}'."\n"; print '{\huge Students by Year}'."\n"; print '\begin{tabular*}{1.0\textwidth}{|l|c|c|c|}'."\n"; print '\hline'."\n"; print 'Name&PhD&e-mail&misc (Usually first position after leaving)\\\\'."\n"; print '\hline'."\n"; print '\end{tabular*}'."\n"; } sub tex_top2{ print '\documentclass[10pt]{article}'."\n"; print '\usepackage{fullpage}'."\n"; print '\begin{document}'."\n"; print '\begin{center}'."\n"; print '{\huge Advisor/Student List}'."\n"; print '\begin{tabular*}{1.0\textwidth}{p{1in}ccccc}'."\n"; print '\hline'."\n"; print 'Name&2yp&PhD&e-mail&misc (Usually first position after leaving)\\\\'."\n"; print '\hline',"\n"; print '\end{tabular*}',"\n"; } sub html_record{ my ($record)=@_; my %record=%$record; print "
\n"; print "

\n"; print "Name: $record{'name'}\n
\n"; print "Incoming year: $record{'year'}\n
\n"; print "Current email address: $record{'email'}\n
\n"; print "First Position: $record{'firstposition'}\n
\n"; print "Current Position: $record{'current'}\n
\n"; print "Second Year Project\n\
\n"; print "

          Advisor: $record{'advisor'}

\n
\n"; print "

          Status: $record{'status'}

\n
\n"; print "Thesis\n\
\n"; print "

          Advisor: $record{'thesis_advisor'}

\n
\n"; print "

          External Advisor: $record{'external_advisor'}

\n
\n"; print "

          Status: $record{'thesis_status'}

\n
\n"; print "

          Thesis Title: $record{'title'}

\n
\n"; print "Misc.: $record{'misc'}\n
\n"; print "

\n"; print "
\n"; } sub tex_record{ my ($record)=@_; my %record=%$record; my $stat; print $record{'name'},' & '; $stat=""; if(($record{'status'} =~ "Current")||($record{'status'} =~ "current")){ $stat=" 2ndyear " } if(($record{'thesis_status'} =~ "Current")||($record{'thesis_status'} =~ "current")){ $stat=" current " } if(($record{'thesis_status'} =~ "X")||($record{'thesis_status'} =~ "x")){ $stat=" Left without PhD " } if(($record{'thesis_status'} eq "MS")||($record{'thesis_status'} eq "ms")){ $stat=" Left with Masters " } if(($record{'thesis_status'} eq "No MS")||($record{'thesis_status'} eq "No ms")){ $stat=" Left without Masters " } if($record{'thesis_status'}=~/[0-9]/){ $stat=$record{'thesis_status'}; } print $stat; print ' & '.$record{'email'}.' & '; if($record{'firstposition'}){ print $record{'firstposition'}; } print '\\\\',"\n"; }