#!/usr/bin/perl # Conversation 2 Library # 7-May-95 Aneel Nazareth # 12-May-95 Aneel Nazareth # 10-Apr-96 Matthew Gray, mkgray@mit.edu # Added substitution of user input into responses (ver 0.71) # Added pronoun substitution (ver 0.71) # Added randomization of multiple matches (ver 0.72) # Fixed bug in filpr (ver 0.72) $conversion=0.72; srand; ### Data Abstraction undef %convdata; undef %keysdata; # get(keyword,field,index) sub get { $name=$_[0]."\t".$_[1]."\t".$_[2]; # warn "getting $name = $convdata{$name}\n"; return $convdata{$name}; } # put(keyword,index,field,value) sub put { $name=$_[0]."\t".$_[1]."\t".$_[2]; # warn "putting $name = $_[3]\n"; $keysdata{$_[0]}=$_[0] unless $_[0] eq "character"; return $convdata{$name}=$_[3]; } ### Files # readcharacter(filename) # read a character file sub readcharacter { open(FILE, "<$_[0]"); $_=; s/\n//; &put(character,0,charname,$_); $_=; s/\n//; &put(character,0,charauthor,$_); $_=; s/\n//; &put(character,0,introline,$_); $_=; s/\n//; &put(character,0,charstyle,$_); $_=; s/\n//; &put(character,0,userstyle,$_); $/=";\n"; while () { s/;//; @array=split(/\n/, $_); $first=shift(@array); ($randomize, $firstkey, $synonyms)=split(/\t/,$first,3); &put($firstkey,0,randomize,$randomize); @synonyms=split(/\t/, $synonyms); pop @synonyms; &put($firstkey,0,synonyms,$firstkey."\t".join("\t",@synonyms)); &put($firstkey,0,count,$#array); $second=shift(@array); &put($firstkey,0,leadin); for ($i=0; $i<=$#array; $i++) { ($response, $repeatable, $stayontopic) =split(/\t/,$array[$i]); &put($firstkey,$i,response,$response); &put($firstkey,$i,repeatable,$repeatable); &put($firstkey,$i,stayontopic,$stayontopic); &put($firstkey,$i,used,0); } } $/="\n"; close(FILE); } # readcharacter(filename) # read a character file sub readcharacter { open(FILE, "<$_[0]"); $_=; s/\n//; &put(character,0,charname,$_); $_=; s/\n//; &put(character,0,charauthor,$_); $_=; s/\n//; &put(character,0,introline,$_); $_=; s/\n//; &put(character,0,charstyle,$_); $_=; s/\n//; &put(character,0,userstyle,$_); $/=";\n"; while () { s/;//; @array=split(/\n/, $_); $first=shift(@array); ($randomize, $firstkey, $synonyms)=split(/\t/,$first,3); &put($firstkey,0,randomize,$randomize); @synonyms=split(/\t/, $synonyms); pop @synonyms; &put($firstkey,0,synonyms,$firstkey."\t".join("\t",@synonyms)); &put($firstkey,0,count,$#array); $second=shift(@array); &put($firstkey,0,leadin); for ($i=0; $i<=$#array; $i++) { ($response, $repeatable, $stayontopic) =split(/\t/,$array[$i]); &put($firstkey,$i,response,$response); &put($firstkey,$i,repeatable,$repeatable); &put($firstkey,$i,stayontopic,$stayontopic); &put($firstkey,$i,used,0); } } $/="\n"; close(FILE); } # writecharacter(filename) # write a character file sub writecharacter { unlink($_[0]); open(FILE, ">>$_[0]") || die $!; select(FILE); print &get(character,0,charname)."\n"; print &get(character,0,charauthor)."\n"; print &get(character,0,introline)."\n"; print &get(character,0,charstyle)."\n"; print &get(character,0,userstyle)."\n"; foreach $firstkey (sort keys %keysdata) { select(FILE); print &get($firstkey,0,randomize)."\t"; # print $firstkey."\t"; print &get($firstkey,0,synonyms)."\t0\t\n"; $count = &get($firstkey,0,count); print &get($firstkey,0,leadin); for ($i=0; $i<$count; $i++) { print "\n".&get($firstkey,$i,response)."\t"; print &get($firstkey,$i,repeatable)."\t"; print &get($firstkey,$i,stayontopic,$stayontopic); } print ";\n"; } $/="\n"; close(FILE); select(STDOUT); return; } ### Conversation # match(input, key) # tell if key or any of its synonyms are in input sub match { ($input, $onkey)=@_; $matched=0; foreach $syn ( split(/\t/,&get($onkey,0,synonyms)) ) { $matched=($input=~/$syn/i); $one = $1; $two = $2; $three = $3; $four=$4; $matchtext=$& if $matched; # warn "$syn , $input -> $matched '$matchtext'\n"; last if $matched; } return(0) unless $matched; return($matchtext) if $matched; } # useableresponses(keyword) # make an array containing the indices of the responses that may be used # sort it so that we don't use repeatable responses again if unused # responses remain sub useableresponses { $keyword=$_[0]; undef @useable; for ($i=0; $i<&get($keyword,0,count); $i++) { push(@useable, $i) unless (&get($keyword,$i,used) && !&get($keyword,$i,repeatable)); #warn "i=$i useable=@useable $keyword $#useable"; } @useable = sort {&get($keyword,$a,used) <=> &get($keyword,$b,used)} @useable; #warn "$#useable $keyword"; return $#useable; } # respond(input) # respond properly to input sub respond { $input=$_[0]; $key=''; @keys = (); # first let's see if we can match on the last key # in order to try to stay on topic # making sure, of course, that useable responses still exist... if ( $lastkey && &match($input,$lastkey) && (&useableresponses($lastkey) >= 0) ) { $key=$lastkey; #warn "matched last $key\n"; } # next we shall try to match on any key # making sure useable responses exist else { foreach $keyword (keys %keysdata) { if ( &match($input,$keyword) && (&useableresponses($keyword) >= 0) ) { $key=$keyword; #warn "matched any $key\n"; push(@keys, $key); } } } # no dice? then check to see if we're supposed to stay on topic $key=$lastkey if ((!$key) && ($lastkey) && (&get($lastkey,$lastresp,stayontopic)) ); # give up and choose a dummy response $key='@DUMMY@' unless $key; # Do we have more than one match? if($#keys > 0){ $key = $keys[rand($#keys+1)]; } # so now we've got a key... &useableresponses($key); # warn "$key useable=@useable\n"; if (&get($key,0,randomize)) { $choice=rand $#useable; # choose randomly } else { $choice=0; # choose first } &put($key,$useable[$choice],used, &get($key,$useable[$choice],used) + 1); # mark as used #warn &get($key,$useable[$choice],used); $lastkey=$key; # set variables for next time $lastresp=$useable[choice]; # print("Key is $key\n"); $ret = &get($key,$useable[$choice],response); if($ret =~ /__/){ $mout = &match($input, $key); $one = &pronouns($one); $two = &pronouns($two); $three = &pronouns($three); $four = &pronouns($four); $ret =~s/__1__/$one/; $ret =~s/__2__/$two/; $ret =~s/__3__/$three/; $ret =~s/__4__/$four/; } return $ret; } ### Interface # linebreak(string,maxlength) # if string is longer than length (default 70) characters # break it on a word boundary and print it sub linebreak { #print "* linebreak @_\n"; $maxlength = $_[1] if $_[1]; $maxlength = 70 unless $_[1]; if (length $_[0] > $maxlength) { @words=split(/\s+/,$_[0]); while (defined $words[0]) { $line=''; while (length ($line." $words[0]") < $maxlength && (defined $words[0])) { #warn $words[0]; $line .= shift(@words) . ' '; } print "$line\n"; } } else { print $_[0]; } } # filpr(string) # print string filtered of HTML tags (or anything that looks like one) sub filpr { #print "* filpr @_\n"; $_[0] =~ s/<[^<>]*>//g; #print("$_[0]"); &linebreak("$_[0]"); } sub pronouns { local($phrase) = @_; if(($ch = chop($phrase)) =~ /\w/){ $phrase .= $ch; } $phrase=~s/\bmy\b/XXX-A/gi; $phrase=~s/\byour\b/XXX-B/gi; $phrase=~s/\bi am\b/XXX-C/gi; $phrase=~s/\bi'm\b/XXX-D/gi; $phrase=~s/\bi\b/XXX-E/gi; $phrase=~s/\byou are\b/XXX-F/gi; $phrase=~s/\byou're\b/XXX-G/gi; $phrase=~s/\byou\b/XXX-H/gi; $phrase=~s/XXX-A/your/g; $phrase=~s/XXX-B/my/g; $phrase=~s/XXX-C/you are/g; $phrase=~s/XXX-D/you're/g; $phrase=~s/XXX-E/you/g; $phrase=~s/XXX-F/I am/g; $phrase=~s/XXX-G/I'm/g; $phrase=~s/XXX-H/me/g; return $phrase; } 22;