From 9ec80dffbcb470bea517ed839acd11f6573f3f77 Mon Sep 17 00:00:00 2001
From: Calvin Morrison <calvin@fastmailteam.com>
Date: Fri, 14 Jan 2022 11:24:21 -0500
Subject: try to do a wide sweep on 2nd an 3rd turns to eliminate characters to
 avoid searching for 'bad 'fad' 'sad' and instead search like 'rig' to elim
 'rad' and 'gad'

---
 wordle.pl | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/wordle.pl b/wordle.pl
index a44026c..1f9f608 100644
--- a/wordle.pl
+++ b/wordle.pl
@@ -38,11 +38,11 @@ my $total = scalar @words;
 say "total $wlen letter words: " . scalar @words . "\n";
 
 my $frequency = letter_frequency(@words);
-say "calc: $frequency";
+say "letters by infrequency in dictionary: $frequency";
 
 # generate word values ahead of time as a cache for sort.
 my %wordvalues;
-say "building wordvalues\n";
+say "scoring whole dictionary word value frequency to pick a good first word\n";
 for (@words) {
   $wordvalues{$_} = wordvalue($_);
 }
@@ -74,6 +74,7 @@ say "picking first word!";
 # here's our basic loop, take our @words array from the dictionary and filter it
 # down based on the results of each 'turn'. Since the wordle game tells you what
 # you are right or not, we instead have a "check" function that does the same.
+my @start_words = @words;
 while($guess ne $answer ) {
 
   #this can happen if you input a word that doesnt exist in the dictionary.
@@ -82,10 +83,6 @@ while($guess ne $answer ) {
     exit;
   }
 
-  # pull our guess from the top of the list.
-  # should this be a shift?
-  $guess = $words[0];
-
   # optimization for first guess, avoid multiple of same letter, and rank
   # letters by importance by weighing the frequency they occour in the english
   # language and assigning a point value to score that from.
@@ -100,7 +97,32 @@ while($guess ne $answer ) {
     say "picking $guess (score:" . wordvalue($guess) . ") because it scores highest in letter frequency.\n";
     say  " let's go!";
     say " -------------------";
-  } else {
+  }
+  # if it's our second or third try, here's a idea:
+  # 
+  # we know some of the first word may hit, but we know it may not have, lets
+  # try and ignore the ones we know are good to elimiate a lot of letters.
+  # 
+  elsif($tries == 1 || $tries == 2) {
+    @not_in_last_word = @words;
+    for my $c (split('', @cl)) {
+      @not_in_last_word = grep {not $_ =~ /$c/ } @not_in_last_word;
+    }
+
+    # again we are looking for letters, so stirp the letters!
+    my @not_in_last_word = grep { uniqletters($_) } @not_in_last_word;
+
+    # sort it by value again.
+    @not_in_last_word = reverse sort wordvalue_sort @not_in_last_word;
+     
+
+    #this sucks, we should score all the words to find MAXIMALLY DIFFERENT ones
+    #instead of giving up
+    say "couldn't find a word uniq sorry";
+    $guess = $not_in_last_word[0];
+  }
+  else {
+    @words = reverse sort wordvalue_sort @words;
     $guess = $words[0];
   }
 
@@ -277,13 +299,13 @@ sub wordvalue {
   # reverse frequency, later numbers have higher value
   # general language english:
   # this should be calculated by length 
-  # my $f = "zjqxkvbpgwyfmculdhrsnioate";
+  my $f = "zjqxkvbpgwyfmculdhrsnioate";
 
   my $v = 1;
   for my $c (split('', $w)) {
     # this is basically bullshit, but sum the value against the index
     # position...
-    $v += (index($frequency, $c));
+    $v += (index($f, $c));
   }
   return $v;
 }
-- 
cgit v1.2.3