#!/usr/bin/perl #THIS IS FILE PAR_DP.PL # implements the DP for paragraphing # for usage see the comments under 'usage options' below # (at the start of the main routine) # use warnings; sub c { my ($i,$j) = @_; # returns cost(i,j) return ($j-$i) + $r*rand; # we don't have to store the value c(i,j) since the # routine asks for c(i,j) just once for a given # i & j } sub make_m { # fills in the dynamic programming tables # m, and the minimizer table k @m=(); @k=(); $m[$n+1]=0; for ($i=$n; $i>=1; $i--) { $m[$i]=$max_c; for ($j=$i; $j<=$n; $j++) { $t=c($i,$j) + $m[$j+1]; if ($t<$m[$i]) { $m[$i]=$t; $k[$i]=$j} } } } sub breaks { # prints the optimum line breaks # calling breaks with j=0 gives the line breaks for the given problem # calling breaks with j=s-1 gives the line breaks assuming the problem # is given by words w(s), w(s+1),...,w(n) while ($j<$n ) { $j=$k[$j+1]; print "$j "; } print "\n"; } sub k_and_breaks { # prints k array, with the optimum line breaks starred for $i (1..$n) { print "$i\t"; } print "\n"; $start=1; # start gives the beginning of the current line for $i (1..$n) { if ($i!=$start) {print "$k[$i]\t"} else { print "$k[$i]*\t"; # k[start] is the next line break, star it # unless ($next_break = $n) {$start=$k[$i]+1}; # "unless" statement is the opposite of "if": it executes the block unless the condition is true $start=$k[$i]+1; # next start is right after the next line break } } print "\n"; } $n=8;$r=3; #r=3 seems to give most variety in line breaks $first_time=1; while (1) { if ($first_time) {$opt = "f"; $first_time=0;} else {#do the following on every iteration except the first: print "[n], [r], [a]gain, [s]tart, [m] table, [k] table or [q]uit?"; # usage options: # n redefines n, eg, "n 12" sets n to 12 (n is the number of words) # dont forget to enter the number after n! # r redefines r, eg, "r 5" sets r to 5 (r is the coefficient in the cost function) # a generates a new random cost function and resolves the problem # keeping n & r unchanged # s prints the optimum breakpoints if we start with word w(s) instead of w(1) # eg, "s 3" prints breakpoints if input starts with word 3 # the cost function doesn't change # m prints the entire DP table for the current problem # k prints the k array, with stars next to the entries that specify the optimum breakpoints # $in=<>;chop $in; ($opt, $val)=split " ",$in; } if ($opt =~ /[fnra]/) { if ($opt eq "n") {$n=$val} elsif ($opt eq "r") {$r=$val}; unless ($opt eq "a") { print "n= $n, c(i,j)= (j-i) + $r(rand)\n"; print "line breaks for words 1..$n:\n"; $max_c= c(1,$n)+$r+1; } make_m; $j=0; breaks; } elsif ($in eq "q") {exit} elsif ($in eq "m") { print "i "; for $i (1..$n) { print "$i\t"; } print "\n"; print "m "; for $i (1..$n) { printf "%3.2f\t", $m[$i]; } print "\n"; print "k "; for $i (1..$n) { print "$k[$i]\t"; } print "\n"; } elsif ($in eq "k") { k_and_breaks; } elsif ($opt eq "s") { $s=$val; $j=$s-1; breaks; } }