#!/usr/bin/perl


use locale;
use POSIX qw(locale_h);
setlocale(LC_ALL, "norwegian");

my %fork;

use Getopt::Mixed;
Getopt::Mixed::getOptions("abbr=s verbs:s id:s");

my %verbs;
if ($opt_verbs) {
    open (VERBS, $opt_verbs);
    while (<VERBS>) {
	chomp;
	$verbs{$_}=1;
    }
}

open (ABBR, $opt_abbr);
while (<ABBR>) {
    s/\s//g;
    my @a = split(/\./);
    foreach my $a (@a) { $fork{$a}=1 }
}


my $s_id;
my $prev_p;

while (<STDIN>) {

    chomp;

    # empty line
    if ((/^\s*$/) and !($prev_p)) { 
	$s_id++; 
	my $id = $s_id; if ($opt_id) { $id = $opt_id . "-" . $id };
	$prev_p=1; print "[$id] <p>\n" 
	}
    elsif (/^\s*$/) { } 
    else {

	$prev_p=0;

	my $s;

    	s/ +/ /g;
    	my @words = split (/([!|\?|\.|:|;| ])/);
    	my $l = @words;

	# estimate non-sentence-ness


	# fixme: + hvis forrige var blanklinje

	my $ns;

	$ns = 54 -$l;

	foreach my $w (@words) {
	    lc($w);
	    if ($verbs{$w}) {
		$ns -= 10
		}
	}
	
	if (/\.\s*$/) {
	    $ns -= 50;
	}
	if (/\?\s*$/) {
	    $ns -= 30;
	}
	if (/\!\s*$/) {
	    $ns -= 20;
	}
	if (/\:\s*$/) {
	    $ns -= 10;
	}

	if (/^\s*[-|\d|\*]/) { $ns += 10 }

	if (/^\s*\S+[:|.]/) { $ns += 10 }





	# split into sentences
	my $i;

	foreach my $w (@words) {
	    
	    $s .= $w;

	    my $prev; my $next; my $pprev;
	    if ($w =~ m/^[!|\?|\.|:|;]$/) {
		
		my $j = 1;
		while (!$prev) {
		    my $c = $words[$i-$j];
		    if ($c =~ /\S/) { $prev = $c }
		    if (($i-$j) == -1) { $prev = "START__" }
		    $j++;
		}

		my $j = 2;
		while (!$pprev) {
		    my $c = $words[$i-$j];
		    if ($c =~ /\S/) { $pprev = $c }
		    if (($i-$j) < 0) { $pprev = "START__" }
		    $j++;
		}

		my $j = 1;
		while (!$next) {
		    my $c = $words[$i+$j];
		    if ($c =~ /\S/) { $next = $c }
		    if (($i+$j) == $l) { $next = "END__" }
		    $j++;
		}

		if ($fork{$prev}) { }
		elsif ($prev =~ m/^\d{1,2}\)*$/) { }
		elsif ($prev =~ m/^\w$/) { }
		elsif ($words[$i+1] =~ m/\w/) { }
		elsif ($next =~ m/^[a-z]/) { }
		elsif (($i == 1) and ($w eq ":")) { }
		elsif (($pprev == ".") and ($w eq ":")) { }
		else { 
		    $s_id++;
		    my $id = $s_id; if ($opt_id) { $id = $opt_id . "-" . $id };
		    print "[$id";
		    if ($ns > 0) { print " |" }
		    print "] ";
		    $s=~ s/^ +//;
		    print "$s\n";
		    $s="";
		}

	    }
	    
	    $i++;
	}

	if ($s =~ m/\S/) {
	    $s_id++;
	    my $id = $s_id; if ($opt_id) { $id = $opt_id . "-" . $id };
	    print "[$id";
	    if ($ns > 0) { print " |" }
	    print "] ";
	    print "$s\n";
	    $s=""; 
	}
	
    }


   
}
