#! /usr/local/bin/perl -w use strict; use lib '/home/litherm/lib/perl5/site_perl/'; use POSIX qw/floor/; use Text::Diff; use Text::Diff::Table; use Tk; use Tk::DialogBox; use Tk::FileSelect; use Tk::ROText; use Tk::Toplevel; # These are segments to ignore in the diffs. If you include an MSH segment # the number here should be one lower than normal. i.e. if you want to ignore # MSH-9 this should include an entry like '"MSH-8" => 1' - remember that's # for MSH ONLY. Also remember to put a comma after it unless it's the last # item in the list. use constant IGNORE => { "MSH-6" => 1, "MSH-9" => 1, "OBR-22" => 1 }; # Define some styles for consistant appearance between the screen and # the html dump. use constant STYLES => { header => { background => 'dimgrey', foreground => 'white' }, segment => { background => 'gainsboro', foreground => 'black' }, diff => { background => 'turquoise', foreground => 'black' }, removed => { background => 'salmon', foreground => 'black' }, added => { background => 'lightblue', foreground => 'black' }, normal => { background => 'white', foreground => 'black' }, 'warn' => { background => 'yellow', foreground => 'black' }, segdiff => { background => 'red', foreground => 'white' } }; my $w = {}; my $cfg = {}; &init_files($cfg, @ARGV); gen_tk($w, $cfg); MainLoop; exit; sub gen_tk { my $w = shift; my $cfg = shift; # Major window components $w->{main} = MainWindow->new(-title => 'HL7 Diff'); $w->{mframe} = $w->{main}->Frame->pack(-fill => 'x'); $w->{dframe} = $w->{main}->Frame->pack(-fill => 'both', -expand => 1); # Menu bar components $w->{mframe}->Label(-text => '1)')->pack(-side => 'left'); $w->{infile} = $w->{mframe}->Entry( -textvariable => \$cfg->{infile}, -background => 'white' )->pack(-side => 'left'); $w->{mframe}->Button( -text => 'Browse', -command => [ \&browse, $w, $cfg, 'infile' ] )->pack(-side => 'left'); $w->{mframe}->Label(-text => '2)')->pack(-side => 'left'); $w->{outfile} = $w->{mframe}->Entry( -textvariable => \$cfg->{outfile}, -background => 'white' )->pack(-side => 'left'); $w->{mframe}->Button( -text => 'Browse', -command => [ \&browse, $w, $cfg, 'outfile' ] )->pack(-side => 'left'); $w->{godiff} = $w->{mframe}->Button( -text => 'Diff!', -command => [ \&diff_records, $w, $cfg ] )->pack(-side => 'right'); $w->{godump} = $w->{mframe}->Button( -text => 'Save', -command => [ \&dump_text, $w, $cfg ], -state => 'disabled' )->pack(-side => 'right'); # Text window for displaying $w->{text} = $w->{dframe}->Scrolled( 'ROText', -width => 80, -height => 40, -scrollbars => 'osoe', -background => 'white', -wrap => 'none' )->pack(-fill => 'both', -expand => 1); # Configure some tags for highlighting text in window for (keys %{&STYLES}) { $w->{text}->tagConfigure( $_, -background => &STYLES->{$_}->{background}, -foreground => &STYLES->{$_}->{foreground} ); } } sub browse { my $w = shift; my $cfg = shift; my $dir = shift; my $dialog = $w->{main}->FileSelect(-title => 'Enter a file to diff:'); $cfg->{$dir} = $dialog->Show; $w->{main}->update; } sub dump_text { my $w = shift; my $cfg = shift; my $dialog = $w->{main}->FileSelect(-title => 'Enter a new file to save this data:'); my $file = $dialog->Show; return unless $file; if (-e $file) { $dialog = $w->{main}->DialogBox(-title => 'Error', -buttons => ["OK", "Cancel"]); $dialog->Label(-text => "File exists - replace?")->pack; my $popup = $dialog->Show; return unless $popup eq "OK"; if (!-w $file) { $dialog = $w->{main}->DialogBox(-title => 'Error', -buttons => ["OK"]); $dialog->Label(-text => "Can't write file, sorry.")->pack; my $popup = $dialog->Show; return; } } open FILE, ">$file" || die "Could not open $file : $!\n"; select FILE or die "Select failed one: $!\n"; print "
\n";
$w->{text}->dump('-all', -command => \&dump_parse, '1.0', 'end');
print "\n";
select STDOUT or die "Select failed two: $!\n";
close FILE || die "Could not close $file : $!\n";
}
sub dump_parse
{
my $key = shift;
my $value = shift;
my $index = shift;
if ($key eq "tagon")
{
print "{$value}->{foreground};
print "; background-color:".&STYLES->{$value}->{background};
print "\">";
}
elsif ($key eq "tagoff")
{
print "";
}
elsif ($key eq "text")
{
print $value;
}
}
sub diff_records
{
my $w = shift;
my $cfg = shift;
&test_files($w, $cfg) || return;
$w->{text}->delete('1.0', 'end');
$cfg->{counter} = 0;
$w->{dialog} = $w->{main}->Toplevel(-title => 'Difference Engine');
$w->{diffstage} = $w->{dialog}->Label(-text => 'Stage one diff')->pack;
my $tempText = "Stage one is the line by line diff.\n";
$tempText .= "I have no way of predicting how long\n";
$tempText .= "this will take, nor how much memory.\n";
$w->{diffdesc} = $w->{dialog}->Label(-text => $tempText)->pack;
$w->{diffstat} = $w->{dialog}->Label(-text => "???")->pack;
$w->{difftime} = $w->{dialog}->Label(-text => "00:00:00")->pack;
$w->{dialog}->update;
$w->{dialog}->repeat(1000, [ \&counter, $w, $cfg ]);
my $diffTable = diff $cfg->{infile}, $cfg->{outfile}, {STYLE => 'Table', CONTEXT => 0};
$w->{diffstage}->configure(-text => 'Stage two diff');
$tempText = "Stage two is the field by field diff.\n";
$tempText .= "Unless there are many lines with lots\n";
$tempText .= "of differences, this should not take as\n";
$tempText .= "the first diff did.";
$w->{diffdesc}->configure(-text => $tempText);
$w->{dialog}->update;
$cfg->{x} = 1;
$cfg->{y} = 0;
my $counter = 0;
my $type = '';
my @size = (0, 0, 0, 0);
for (split /\n/, $diffTable)
{
$counter++;
$w->{diffstat}->configure(-text => sprintf("Line %06d", $counter));
# Need to figure out the format of the output table. It should either be:
# three column: +---+------------------+---------------------+
# or
# four column: +---+----------------+---+-------------------+
my ($line1, $line2, $rec1, $rec2, $sep1, $sep2);
if ($_ =~ /^[*+|](-+)[*+|](-+)[*+|](-+)[*+|]$/)
{
$type = 'three';
$size[0] = length $1;
$size[1] = length $2;
$size[2] = length $3;
}
elsif ($_ =~ /^[*+|](-+)[*+|](-+)[*+|](-+)[*+|](-+)[*+|]$/)
{
$type = 'four';
$size[0] = length $1;
$size[1] = length $2;
$size[2] = length $3;
$size[3] = length $4;
}
elsif ($type eq 'three')
{
$sep1 = substr $_, 0, 1;
$line1 = substr $_, 1, $size[0];
$rec1 = substr $_, 2 + $size[0], $size[1];
$rec2 = substr $_, 3 + $size[0] + $size[1], $size[2];
$rec1 =~ s/\s+$//;
$rec2 =~ s/\s+$//;
if ($line1 =~ /\d+/ && $sep1 eq '*')
{
&diff_segments($w, $cfg, $line1, $rec1, $line1, $rec2);
}
}
elsif ($type eq 'four')
{
$sep1 = substr $_, 0, 1;
$line1 = substr $_, 1, $size[0];
$rec1 = substr $_, 2 + $size[0], $size[1];
$sep2 = substr $_, 2 + $size[0] + $size[1], 1;
$line2 = substr $_, 3 + $size[0] + $size[1], $size[2];
$rec2 = substr $_, 4 + $size[0] + $size[1] + $size[2], $size[3];
$rec1 =~ s/\s+$//;
$rec2 =~ s/\s+$//;
if (($line1 =~ /\d+/ || $line2 =~ /\d+/) && ($sep1 eq '*' || $sep2 eq '*'))
{
&diff_segments($w, $cfg, $line1, $rec1, $line2, $rec2);
}
}
else
{
die "Can not parse output of Text::Diff::Table";
}
}
$w->{dialog}->destroy;
$w->{godump}->configure(-state => 'normal');
$w->{main}->update;
}
sub diff_segments
{
my $w = shift;
my $cfg = shift;
my ($line1, $rec1, $line2, $rec2) = @_;
my $split1 = length($rec1) > 2 ? substr($rec1, 3, 1) : '';
my $split2 = length($rec2) > 2 ? substr($rec2, 3, 1) : '';
my $seglist1 = '';
my $seglist2 = '';
my $maxseg1 = 0;
my $maxseg2 = 0;
$cfg->{record} = $cfg->{x};
delete $cfg->{diffs};
$w->{text}->insert(
"$cfg->{x}.0",
"File 1, Record $line1 : File 2, Record $line2\n",
"header"
);
$cfg->{x}++;
# Figure out what segments we need and the order.
for (split /\\r/, $rec1)
{
$seglist1 .= "$_\n";
$maxseg1++;
}
for (split /\\r/, $rec2)
{
$seglist2 .= "$_\n";
$maxseg2++;
}
if (!$rec1)
{
$w->{text}->insert("$cfg->{x}.0", "File 1 does not contain this record.\n");
$cfg->{x}++;
$w->{text}->insert("$cfg->{x}.0", "Record appears as follows in file 2.\n", "added");
$cfg->{x}++;
$w->{text}->insert("$cfg->{x}.0", $seglist2);
$cfg->{x} += $maxseg2;
return;
}
elsif (!$rec2)
{
$w->{text}->insert("$cfg->{x}.0", "File 2 does not contain this record.\n");
$cfg->{x}++;
$w->{text}->insert("$cfg->{x}.0", "Record appears as follows in file 1.\n", "removed");
$cfg->{x}++;
$w->{text}->insert("$cfg->{x}.0", $seglist1);
$cfg->{x} += $maxseg1;
return;
}
my $diffTable = diff \$seglist1, \$seglist2,
{STYLE => 'Table', CONTEXT => ($maxseg1 > $maxseg2) ? $maxseg1 : $maxseg2};
my $type = '';
my @size = (0, 0, 0, 0);
for (split /\n/, $diffTable)
{
# Need to figure out the format of the output table. It should either be:
# three column: +---+------------------+---------------------+
# or
# four column: +---+----------------+---+-------------------+
my ($seg1, $seg2);
if ($_ =~ /^[*+|](-+)[*+|](-+)[*+|](-+)[*+|]$/)
{
$type = 'three';
$size[0] = length $1;
$size[1] = length $2;
$size[2] = length $3;
}
elsif ($_ =~ /^[*+|](-+)[*+|](-+)[*+|](-+)[*+|](-+)[*+|]$/)
{
$type = 'four';
$size[0] = length $1;
$size[1] = length $2;
$size[2] = length $3;
$size[3] = length $4;
}
elsif ($type eq 'three')
{
$seg1 = substr $_, 2 + $size[0], $size[1];
$seg2 = substr $_, 3 + $size[0] + $size[1], $size[2];
$seg1 =~ s/\s+$//;
$seg2 =~ s/\s+$//;
&parse_fields($w, $cfg, $seg1, $seg2);
}
elsif ($type eq 'four')
{
$seg1 = substr $_, 2 + $size[0], $size[1];
$seg2 = substr $_, 4 + $size[0] + $size[1] + $size[2], $size[3];
$seg1 =~ s/\s+$//;
$seg2 =~ s/\s+$//;
&parse_fields($w, $cfg, $seg1, $seg2);
}
else
{
die "Can not parse output of Text::Diff::Table";
}
}
# Check here to see if we need to keep this record
for (@{$cfg->{diffs}})
{
if (!&IGNORE->{$_})
{
return;
}
}
# If we are still here, the only difference where in the IGNORE hash and the
# diff should be deleted.
$w->{text}->delete("$cfg->{record}.0", 'end');
$w->{text}->insert('end', "\n");
$cfg->{x} = $cfg->{record} + 1;
$cfg->{y} = 0;
}
sub parse_fields
{
my $w = shift;
my $cfg = shift;
my ($seg1, $seg2) = @_;
my @seg1 = split /\|/, $seg1;
my @seg2 = split /\|/, $seg2;
$seg1[0] = "Missing segment detected" unless $seg1[0];
$seg2[0] = "Missing segment detected" unless $seg2[0];
$seg1[0] = "Empty segment detected (\\r\\r)" if $seg1[0] eq "\\n";
$seg2[0] = "Empty segment detected (\\r\\r)" if $seg2[0] eq "\\n";
my $maxseg = (scalar @seg1 > scalar @seg2) ? scalar @seg1 : scalar @seg2;
$cfg->{y} = 0;
if (($seg1[0] ne $seg2[0]) || (length $seg1[0] > 3) || (length $seg2[0] > 3))
{
$w->{text}->insert("$cfg->{x}.$cfg->{y}", "Segment mis-match detected!\n", 'warn');
$cfg->{x}++;
push @{$cfg->{diffs}}, "SEGMENT";
}
$w->{text}->insert("$cfg->{x}.$cfg->{y}", " \n1: \n2: \n");
$cfg->{x}++;
$cfg->{y} = 3;
$w->{text}->insert("$cfg->{x}.$cfg->{y}", $seg1[0]);
$cfg->{x}++;
$w->{text}->insert("$cfg->{x}.$cfg->{y}", $seg2[0]);
$cfg->{x} -= 2;
$cfg->{y} = 6;
my $test = 0;
for (my $i = 1; $i < $maxseg; $i++)
{
$seg1[$i] = "" unless $seg1[$i];
$seg2[$i] = "" unless $seg2[$i];
my $color1 = 'normal';
my $color2 = 'normal';
if ($seg1[$i] ne "" && $seg2[$i] eq "")
{
$test++;
$color1 = 'removed';
push @{$cfg->{diffs}}, "REMOVED";
}
elsif ($seg1[$i] eq "" && $seg2[$i] ne "")
{
$test++;
$color2 = 'added';
push @{$cfg->{diffs}}, "ADDED";
}
elsif ($seg1[$i] ne $seg2[$i])
{
$test++;
$color1 = 'diff';
$color2 = 'diff';
push @{$cfg->{diffs}}, "$seg1[0]-$i";
}
my $maxy = length $i;
$maxy = ($maxy > length $seg1[$i]) ? $maxy : length $seg1[$i];
$maxy = ($maxy > length $seg2[$i]) ? $maxy : length $seg2[$i];
my $temp = $i;
$temp++ if $seg1[0] eq "MSH";
my $count = $temp . " " x ($maxy - length $temp);
$seg1[$i] = $seg1[$i] . " " x ($maxy - length $seg1[$i]);
$seg2[$i] = $seg2[$i] . " " x ($maxy - length $seg2[$i]);
$w->{text}->insert("$cfg->{x}.$cfg->{y}", "|$count", 'segment');
$cfg->{x}++;
$w->{text}->insert("$cfg->{x}.$cfg->{y}", "|$seg1[$i]", $color1)
unless length $seg1[0] > 3;
$cfg->{x}++;
$w->{text}->insert("$cfg->{x}.$cfg->{y}", "|$seg2[$i]", $color2)
unless length $seg2[0] > 3;
$cfg->{x} -= 2;
$cfg->{y} += $maxy + 1;
}
if ($test)
{
$w->{text}->delete("$cfg->{x}.0", "$cfg->{x}.4");
$w->{text}->insert("$cfg->{x}.0", "--->", "segdiff");
}
$cfg->{x} += 3;
}
sub test_files
{
my $w = shift;
my $cfg = shift;
if (!$cfg->{infile} || !$cfg->{outfile})
{
my $dialog = $w->{main}->DialogBox(-title => 'Error', -buttons => ['OK']);
$dialog->Label(-text => "Please select both a\nfile 1 and a file 2.")->pack;
$dialog->Show;
return 0;
}
if (!-r $cfg->{infile})
{
my $dialog = $w->{main}->DialogBox(-title => 'Error', -buttons => ['OK']);
$dialog->Label(-text => "Could not open file:\n$cfg->{infile}\n$!")->pack;
$dialog->Show;
return 0;
}
if (!-r $cfg->{outfile})
{
my $dialog = $w->{main}->DialogBox(-title => 'Error', -buttons => ['OK']);
$dialog->Label(-text => "Could not open file:\n$cfg->{outfile}\n$!")->pack;
$dialog->Show;
return 0;
}
return 1;
}
sub counter
{
my $w = shift;
my $cfg = shift;
$cfg->{counter}++;
my $sec = $cfg->{counter};
my ($hour, $min);
$hour = floor($sec/3600);
$sec -= $hour * 3600;
$min = floor($sec/60);
$sec -= $min * 60;
$w->{difftime}->configure(-text => sprintf('%02d:%02d:%02d', $hour, $min, $sec));
$w->{dialog}->update;
}
sub init_files
{
my $cfg = shift;
my @argv = @_;
my $argv = "\t".join("\t", @argv)."\t";
if ($argv =~ /-h/)
{
print "usage: $0