summaryrefslogtreecommitdiff
path: root/wordle.pl
blob: 1f9f60893ff0fea3e3d414d613af64c04fb98bcb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
#!/bin/perl

use warnings;
use 5.010;
use File::Slurp;
use enum qw(CORRECT IN_WORD WRONG);
use Term::ANSIColor qw(:constants);
use Data::Dumper;
use List::MoreUtils qw(uniq);
use List::Util qw/shuffle/;


say "welcome to my wordle pass filter program, where it tries to guess your wordle\n";

# flush for the drama
$| = 1; 

# load dictionary
my @words = read_file("dictionary.txt", chomp => 1);

my $answer = "";

if ($#ARGV < 0) { 
  $answer = $words[int rand@words];
  say "fine i will play with myself! I will try to guess... $answer\n";
} else {
  $answer = $ARGV[0];
  say "i will try to guess $answer\n";
}

my $wlen = length($answer);

say "total dict size: " . scalar @words;
my $total = scalar @words;


@words = grep( {$_ =~ /^.{$wlen}$/} @words);
say "total $wlen letter words: " . scalar @words . "\n";

my $frequency = letter_frequency(@words);
say "letters by infrequency in dictionary: $frequency";

# generate word values ahead of time as a cache for sort.
my %wordvalues;
say "scoring whole dictionary word value frequency to pick a good first word\n";
for (@words) {
  $wordvalues{$_} = wordvalue($_);
}

# In word letters
my %iwl;
# letters in correct position 
my @cl = split('', substr(("_" x $wlen), 0, $wlen));
# incorrect letters
my %wl;

# incorrect position, avoid relooking for words that match that.
my %iwp;

# fill up the incorrect word position hash so it contains keys for each position
# (numeric) and has an array inside it... guarantee theres a better way in perl
# to do this
for(0..$wlen-1) {
  $iwp{$_} = ();
}

my $guess = "";

# keep track of the tries, we must guess a 6 letter word in 6 tries.
my $tries = 0;

say "picking first word!";

# here's our basic loop, take our @words array from the dictionary and filter it
# down based on the results of each 'turn'. Since the wordle game tells you what
# you are right or not, we instead have a "check" function that does the same.
my @start_words = @words;
while($guess ne $answer ) {

  #this can happen if you input a word that doesnt exist in the dictionary.
  if(scalar @words eq 0) {
    say "I declare a scrabble challenge! Get out your dictionary!";
    exit;
  }

  # optimization for first guess, avoid multiple of same letter, and rank
  # letters by importance by weighing the frequency they occour in the english
  # language and assigning a point value to score that from.
  unless($tries) {
    my @good_start_words = grep { uniqletters($_) } @words;

    say "filtering letter duplicates for first guess:" . scalar @good_start_words;

    @good_start_words = reverse sort wordvalue_sort @good_start_words;
    $guess = $good_start_words[0];

    say "picking $guess (score:" . wordvalue($guess) . ") because it scores highest in letter frequency.\n";
    say  " let's go!";
    say " -------------------";
  }
  # if it's our second or third try, here's a idea:
  # 
  # we know some of the first word may hit, but we know it may not have, lets
  # try and ignore the ones we know are good to elimiate a lot of letters.
  # 
  elsif($tries == 1 || $tries == 2) {
    @not_in_last_word = @words;
    for my $c (split('', @cl)) {
      @not_in_last_word = grep {not $_ =~ /$c/ } @not_in_last_word;
    }

    # again we are looking for letters, so stirp the letters!
    my @not_in_last_word = grep { uniqletters($_) } @not_in_last_word;

    # sort it by value again.
    @not_in_last_word = reverse sort wordvalue_sort @not_in_last_word;
     

    #this sucks, we should score all the words to find MAXIMALLY DIFFERENT ones
    #instead of giving up
    say "couldn't find a word uniq sorry";
    $guess = $not_in_last_word[0];
  }
  else {
    @words = reverse sort wordvalue_sort @words;
    $guess = $words[0];
  }

  # keep track of our tries.
  $tries++;

  # keep track of how many right letters
  my $corr = 0;

  # get a check results (requivelmnt to the colors after submitting a word on
  # the wordle site), and parse through them.
  for my $res (check($guess)) { 
    # @res = [word position, char, status (in the word, not in the word, correct)]

    # if correct, add it to the correct position array
    if($res->[2] == CORRECT) {
      $cl[$res->[0]] = $res->[1];
      $corr++;
      print GREEN " " . $res->[1] . " " . RESET;
    }
    # if its in a word, add to the iwl hash and store its position.
    elsif($res->[2] == IN_WORD) {
      print YELLOW " " . $res->[1] . " " . RESET; 
      $iwl{$res->[1]} = 1;
      push( @{ $iwp{ $res->[0] } }, $res->[1]);
    }
    # if its not in a word, add to the wl hash 
    elsif($res->[2] == WRONG ) {
      print RED " " . $res->[1] . " " .  RESET;
      $wl{$res->[1]} = 1;
    }
  }
  # if we're correct, we have found the right answer.
  if($corr == $wlen) {
    say "";
    last;
  }

  # rip out any words that match with wrong positions exactly, clunky as heck
  # but we basically go through iwp, which is an hash containing misses at that
  # position like
  # 0: a b c 
  # 1: a b c
  # 
  # meaning at pos 0, a b c are in the word but not that position.
  # and so forth 
  #
  # but if we put an "A" in the  first position and its in the wrong position
  # and in the word, we can eliminate all words that start with A.
  my $ignore = "";
  foreach (sort keys %iwp) {
    my $posstr = "";
    foreach my $array ($iwp{$_}) {
      $posstr = join('', uniq @$array);
    }
    if(length($posstr)) {
      $ignore .= "[^" . $posstr . "]";
    } else  {
      $ignore .= ".";
    }
  }

  if($ignore =~ /[a-z]/) {
    @words = grep { $_ =~ /$ignore/ } @words;
  }
  print "filtering: " . scalar  @words;
  # for dramatic effect
  # sleep 1; 
  print "...";


  # filter out any words we tried already.shouldnt happen unless our regexs are
  # failing
  #if($tries) {
  #  @words = grep { $_ ne $guess } @words;
  #  print scalar  @words;
  #}
  #sleep 1; 
  #print "...";

  # filter for letters in the answer only
  if(scalar keys %iwl) {
    for my $c(keys %iwl) {
      @words = grep { $_ =~ /$c/ } @words;
    }
  }
  print scalar  @words;
  # sleep 1;
  print "...";

  # if our guess has some correct letters, grep
  # for those letters in exact position.
  if($guess =~ /[^_]/ )  {
    my $pattern = join('', @cl);
    $pattern =~ s/_/./ig;
    @words = grep { $_ =~ /$pattern/ } @words;
  }
  print scalar  @words;
  # sleep 1;
  print "...";

  # filter out words with letters on the wrong
  # letter list
  if(scalar keys %wl) {
    my $pattern = '(' . join ('|', sort keys %wl) . ')';
    @words = grep { not $_ =~ /$pattern/ } @words;
  }
  print "" .  scalar @words;
  print "\n";

}

say "\nmy final answer is '$guess'\n";
say "it took me $tries/$wlen tries\n";
if($tries <= $wlen) {
  say "I won!";
  exit 0;
}
else{
  say "Sorry, I lose!";
  exit 1;
}
#  say "remaining:" . scalar @words;
#  if($corr == $wlen-1) {
#    say "final guess:" . join('', @cl) . " answer: $answer";
#    exit
#  }
#
sub check { 
  my $word = shift;
  my @res;


  for $i (0..length($word)-1){
    $char = substr($word, $i, 1);
    $achar = substr($answer, $i, 1);
    # check if in word at all
    if($achar eq $char) {
      push(@res, [$i, $char, CORRECT]);
    }
    elsif($answer =~  /$char/) {
      push(@res, [$i, $char, IN_WORD]);
    }
    else {
      push(@res, [$i, $char, WRONG]);
    }
  }
  return @res;
}

sub uniqletters {
  my $word = shift;

  my @chars = split('', $word);

  for my $char (@chars) {
    my $ct = ()= $word =~ m/[$char]/g;

    if($ct > 1) {
      return 0;
    }
  }
  return 1;
}


# okay so the premise here is to create a sort based on the frequency of a a
# letter... higher frequnency = higher initial hit = higher sort order.

sub wordvalue {
  my $w = shift;

  # my $freq = "etaoinsrhdlucmfywgpbvkxqjz";
  # reverse frequency, later numbers have higher value
  # general language english:
  # this should be calculated by length 
  my $f = "zjqxkvbpgwyfmculdhrsnioate";

  my $v = 1;
  for my $c (split('', $w)) {
    # this is basically bullshit, but sum the value against the index
    # position...
    $v += (index($f, $c));
  }
  return $v;
}

# i stole this from stackoverflow.
sub wordvalue_sort  {
  # use a lookup table.
  $av = $wordvalues{$a};
  $bv = $wordvalues{$b};

  if($av < $bv){
    return -1;
  }elsif($av == $bv){
    return 0;
  }else{
    return 1;
  }
}

sub letter_frequency {
  my $charfreqhash;
  for (@_) {
    for $char (split('', $_)) {
      $charfreqhash{$char}++;
    }
  }
  my @char_frequency = reverse sort { $charfreqhash{$b} <=> $charfreqhash{$a} } keys %charfreqhash;
  return join('', @char_frequency);
}