1 files changed, 264 insertions, 0 deletions
diff --git a/wordle.pl b/wordle.pl
new file mode 100644
index 0000000..716c37c
--- /dev/null
+++ b/wordle.pl
@@ -0,0 +1,264 @@
+#!/bin/perl
+
+use warnings;
+use 5.010;
+use File::Slurp;
+use enum qw(CORRECT IN_WORD WRONG);
+use Term::ANSIColor qw(:constants);
+use Data::Dumper;
+use List::MoreUtils qw(uniq);
+use List::Util qw/shuffle/;
+
+
+say "welcome to my wordle pass filter program, where it tries to guess your wordle\n";
+
+$| = 1;
+# word len determines number of guesses and dict filter.
+my $answer = $ARGV[0];
+my $wlen = length($answer);
+
+# load dictionary
+my @words = read_file("dictionary.txt", chomp => 1);
+say "total dict size: " . scalar @words;
+my $total = scalar @words;
+
+# filter to word length  
+@words = grep( {$_ =~ /^.{$wlen}$/} @words);
+say "total $wlen letter words: " . scalar @words . "\n";
+
+# In word letters
+my %iwl;
+# letters in correct position 
+my @cl = split('', substr(("_" x $wlen), 0, $wlen));
+# incorrect letters
+my %wl;
+
+# incorrect position, avoid relooking for words that match that.
+my %iwp;
+
+for(0..$wlen-1) {
+  $iwp{$_} = ();
+}
+
+my $guess = "";
+my $tries = 0;
+
+say "picking first word!";
+while($guess ne $answer ) {
+
+  #this can happen if you input a word that doesnt exist in the dictionary..
+  if(scalar @words eq 0) {
+    say "I declare a scrabble challenge! Get out your dictionary!";
+    exit;
+  }
+
+  # pull our guess...
+  $guess = $words[0];
+
+  # optimization for first guess, avoid multiple of same letter, and rank
+  # letters by importance by weigning the frequency they occour in the english
+  # language and assigning a point value to score that from.
+  unless($tries) {
+    my @good_start_words = grep { uniqletters($_) } @words;
+
+    say "filtering letter duplicates for first guess:" . scalar @good_start_words;
+
+    @good_start_words = reverse sort wordvalue @good_start_words;
+    $guess = $good_start_words[0];
+
+    say "picking $guess (score:" . lvalue($guess) . ") because it scores highest in letter frequency.\n";
+    say  " let's go!";
+    say " -------------------";
+  } else {
+    $guess = $words[0];
+  }
+
+  # keep track of our tries.
+  $tries++;
+
+  # keep track of how many right letters
+  my $corr = 0;
+
+  # get a check results (requivelant to the colors after sumbitting a word on
+  # the wordle site), and parse through them.
+  for my $res (check($guess)) { 
+    # @res = [pos, char, status]
+
+    # if correct, add it to the correct position array
+    if($res->[2] == CORRECT) {
+      $cl[$res->[0]] = $res->[1];
+      $corr++;
+      print GREEN " " . $res->[1] . " " . RESET;
+    }
+    # if its in a word, add to the iwl hash and store its position.
+    elsif($res->[2] == IN_WORD) {
+      print YELLOW " " . $res->[1] . " " . RESET; 
+      $iwl{$res->[1]} = 1;
+      push( @{ $iwp{ $res->[0] } }, $res->[1]);
+    }
+    # if its not in a word, add to the wl hash 
+    elsif($res->[2] == WRONG ) {
+      print RED " " . $res->[1] . " " .  RESET;
+      $wl{$res->[1]} = 1;
+    }
+  }
+  # if we're correct, we have found the right answer.
+  if($corr == $wlen) {
+    say "";
+    last;
+  }
+
+  # rip out any words that match with wrong positions exactly, clunky as heck
+  # but we basically go through iwp, which is an hash containing misses at that
+  # position like
+  # 0: a b c 
+  # 1: a b c
+  # 
+  # meaning at pos 0, a b c are in the word but not that position.
+  # and so forth 
+  #
+  my $ignore = "";
+  foreach (sort keys %iwp) {
+    my $posstr = "";
+    foreach my $array ($iwp{$_}) {
+      $posstr = join('', uniq @$array);
+    }
+    if(length($posstr)) {
+      $ignore .= "[^" . $posstr . "]";
+    } else  {
+      $ignore .= ".";
+    }
+  }
+
+  if($ignore =~ /[a-z]/) {
+    @words = grep { $_ =~ /$ignore/ } @words;
+  }
+  print "filtering: " . scalar  @words;
+  sleep 1; 
+  print "...";
+
+
+  # filter out any words we tried already.shouldnt happen unless our regexs are
+  # failing
+  #if($tries) {
+  #  @words = grep { $_ ne $guess } @words;
+  #  print scalar  @words;
+  #}
+  #sleep 1; 
+  #print "...";
+
+  # filter for letters in the answer only
+  if(scalar keys %iwl) {
+    my $pattern = '(' . join ('|', sort keys %iwl) . ')';
+    @words = grep { $_ =~ /$pattern/ } @words;
+  }
+  print scalar  @words;
+  sleep 1;
+  print "...";
+
+  # if our guess has some correct letters, grep
+  # for those letters in exact position.
+  if($guess =~ /[^_]/ )  {
+    my $pattern = join('', @cl);
+    $pattern =~ s/_/./ig;
+    @words = grep { $_ =~ /$pattern/ } @words;
+  }
+  print scalar  @words;
+  sleep 1;
+  print "...";
+
+  # filter out words with letters on the wrong
+  # letter list
+  if(scalar keys %wl) {
+    my $pattern = '(' . join ('|', sort keys %wl) . ')';
+    @words = grep { not $_ =~ /$pattern/ } @words;
+  }
+  print "" .  scalar @words;
+  print "\n";
+
+}
+
+say "\nmy final answer is '$guess'\n";
+say "it took me $tries/$wlen tries\n";
+if($tries <= $wlen) {
+  say "I won!";
+}
+else{
+  say "Sorry, I lose!";
+}
+#  say "remaining:" . scalar @words;
+#  if($corr == $wlen-1) {
+#    say "final guess:" . join('', @cl) . " answer: $answer";
+#    exit
+#  }
+#
+sub check { 
+  my $word = shift;
+  my @res;
+
+
+  for $i (0..length($word)-1){
+    $char = substr($word, $i, 1);
+    $achar = substr($answer, $i, 1);
+    # check if in word at all
+    if($achar eq $char) {
+      push(@res, [$i, $char, CORRECT]);
+    }
+    elsif($answer =~  /$char/) {
+      push(@res, [$i, $char, IN_WORD]);
+    }
+    else {
+      push(@res, [$i, $char, WRONG]);
+    }
+  }
+  return @res;
+}
+
+sub uniqletters {
+  my $word = shift;
+
+  my @chars = split('', $word);
+
+  for my $char (@chars) {
+    my $ct = ()= $word =~ m/[$char]/g;
+
+    if($ct > 1) {
+      return 0;
+    }
+  }
+  return 1;
+}
+
+
+# okay so the premise here is to create a sort based on the frequency of a a
+# letter... higher frequnency = higher initial hit = higher sort order.
+
+sub lvalue {
+  my $w = shift;
+
+  # my $freq = "etaoinsrhdlucmfywgpbvkxqjz";
+  # reverse frequency, later numbers have higher value
+  my $f= "zjqxkvbpgwyfmculdhrsnioate";
+
+  my $v = 1;
+  for my $c (split('', $w)) {
+    # this is basically bullshit, but sum the value against the index
+    # position...
+    $v += (index($f, $c));
+  }
+  return $v;
+}
+
+# i stole this from stackoverflow.
+sub wordvalue  {
+  $av = lvalue($a);
+  $bv = lvalue($b);
+
+  if($av < $bv){
+    return -1;
+  }elsif($av == $bv){
+    return 0;
+  }else{
+    return 1;
+  }
+}