#! /bin/perl # I assume Perl Ver. 5 ## Written by Greg Sanders, December 2001 ## This program will convert the canonicalized textual version of a logfile ## into an XML equivalent. if ($#ARGV != 1) # $#ARGV == 1 if there are two command line arguments { print "\n Usage: convert_to_xml input_file.txt output_file.xml\n"; print " The input_file must already exist. The output file is created.\n\n"; print " The output file will eventually include multiple XML documents,\n"; print " ... (each representing one call) separated by lines of hyphens.\n\n"; exit; } @siteNames = ("", "AT&T", "BBN", "CMU", "Colorado", "IBM", "MIT", "MITRE", "Lucent", "SRI"); open INFILE, "$ARGV[0]" or die "Error: can't open input file $ARGV[0]\n\n"; open XMLFILE, "> $ARGV[1]"; $inputLineNum = 0; while () { $inputLineNum += 1; if (/^--------------------------------------/) { print XMLFILE "$_\n"; print XMLFILE "\n\n"; } elsif (/^(\d\d\d\d\d)_[X0-9][X0-9]_[X0-9][X0-9]_(\d\d)_(\d\d\d\d)(\d\d)(\d\d)_(\d\d\d\d\d\d)/) { printf XMLFILE "\n", $siteNames[$2]; } elsif (/^Task Start Time ([0-9.]+)/) { $startTime = $1; printf XMLFILE "\n"; printf XMLFILE " \n", $1 - $startTime; ## So, we're indicating the task starts at time 0.000 ## All other times will be measured from this zero point. } elsif (/^Task End Time ([0-9.]+)/) { printf XMLFILE " \n", $1 - $startTime; printf XMLFILE "\n"; } elsif (/^Start user turn (\d+) at ([0-9.]+)/) { printf XMLFILE " \n", $2 - $startTime; } elsif (/^Start sys turn (\d+) at ([0-9.]+)/) { printf XMLFILE " \n", $2 - $startTime; } elsif (/^Start user utt (\d+) at ([0-9.]+)/) { printf XMLFILE " \n", $2 - $startTime; } elsif (/^Start sys utt (\d+) at ([0-9.]+)/) { printf XMLFILE " \n", $2 - $startTime; } elsif (/^End user utt (\d+) at ([0-9.]+)/) { printf XMLFILE " \n", $2 - $startTime; } elsif (/^End sys utt (\d+) at ([0-9.]+)/) { printf XMLFILE " \n", $2 - $startTime; } elsif (/^End user turn (\d+) at ([0-9.]+)/) { printf XMLFILE " \n", $2 - $startTime; } elsif (/^End sys turn (\d+) at ([0-9.]+)/) { printf XMLFILE " \n", $2 - $startTime; } elsif ( / User: (\S+)\s+(\S+)\s+Asr: ([^\/]+)\/ Transcr: (.*)$/) { printf XMLFILE " \n", $1 - $startTime, $2 - $startTime; printf XMLFILE " \n"; printf XMLFILE " \n"; printf XMLFILE " \n", $1 - $startTime, $2 - $startTime; printf XMLFILE " \n"; printf XMLFILE " \n"; } elsif ( /User: (\S+)\s+(\S+)\s+Asr: ([^\/]+)\/ Transcr: (.*)$/) { printf XMLFILE " \n", $1 - $startTime, $2 - $startTime; printf XMLFILE " \n"; printf XMLFILE " \n"; printf XMLFILE " \n", $1 - $startTime, $2 - $startTime; printf XMLFILE " \n"; printf XMLFILE " \n"; } elsif (/ Sys:\s+(\S+)\s+(\S+)\s+(.*)$/) { if ($1 eq "?" and $2 eq "?") { printf XMLFILE " \n"; printf XMLFILE " \n"; printf XMLFILE " \n"; } else { printf XMLFILE " \n", $1 - $startTime, $2 - $startTime; printf XMLFILE " \n"; printf XMLFILE " \n"; } } elsif (/System:\s+(\S+)\s+(\S+)\s+(.*)$/) { if ($1 eq "?" and $2 eq "?") { printf XMLFILE " \n"; } else { printf XMLFILE " \n", $1 - $startTime, $2 - $startTime; } printf XMLFILE " \n"; printf XMLFILE " \n"; } elsif (/Overlap from ([0-9.]+) to ([0-9.]+)/) { printf XMLFILE "\n", $1 - $startTime, $2 - $startTime; } elsif (/^\s*$/) { printf XMLFILE "\n"; } else { chomp; printf XMLFILE "\n"; } }