#!/bin/perl

use warnings;
use 5.010;
use File::Slurp;
use enum qw(CORRECT IN_WORD WRONG);
use Term::ANSIColor qw(:constants);
use Data::Dumper;
use List::MoreUtils qw(uniq);
use List::Util qw/shuffle/;


say "welcome to my wordle pass filter program, where it tries to guess your wordle\n";

# flush for the drama
$| = 1; 

# load dictionary
my @words = read_file("dictionary.txt", chomp => 1);

my $answer = "";

if ($#ARGV < 0) { 
  $answer = $words[int rand@words];
  say "fine i will play with myself! I will try to guess... $answer\n";
} else {
  $answer = $ARGV[0];
  say "i will try to guess $answer\n";
}

my $wlen = length($answer);

say "total dict size: " . scalar @words;
my $total = scalar @words;


@words = grep( {$_ =~ /^.{$wlen}$/} @words);
say "total $wlen letter words: " . scalar @words . "\n";

my $frequency = letter_frequency(@words);
say "letters by infrequency in dictionary: $frequency";

# generate word values ahead of time as a cache for sort.
my %wordvalues;
say "scoring whole dictionary word value frequency to pick a good first word\n";
for (@words) {
  $wordvalues{$_} = wordvalue($_);
}

# In word letters
my %iwl;
# letters in correct position 
my @cl = split('', substr(("_" x $wlen), 0, $wlen));
# incorrect letters
my %wl;

# incorrect position, avoid relooking for words that match that.
my %iwp;

# fill up the incorrect word position hash so it contains keys for each position
# (numeric) and has an array inside it... guarantee theres a better way in perl
# to do this
for(0..$wlen-1) {
  $iwp{$_} = ();
}

my $guess = "";

# keep track of the tries, we must guess a 6 letter word in 6 tries.
my $tries = 0;

say "picking first word!";

# here's our basic loop, take our @words array from the dictionary and filter it
# down based on the results of each 'turn'. Since the wordle game tells you what
# you are right or not, we instead have a "check" function that does the same.
my @start_words = @words;
while($guess ne $answer ) {

  #this can happen if you input a word that doesnt exist in the dictionary.
  if(scalar @words eq 0) {
    say "I declare a scrabble challenge! Get out your dictionary!";
    exit;
  }

  # optimization for first guess, avoid multiple of same letter, and rank
  # letters by importance by weighing the frequency they occour in the english
  # language and assigning a point value to score that from.
  unless($tries) {
    my @good_start_words = grep { uniqletters($_) } @words;

    say "filtering letter duplicates for first guess:" . scalar @good_start_words;

    @good_start_words = reverse sort wordvalue_sort @good_start_words;
    $guess = $good_start_words[0];

    say "picking $guess (score:" . wordvalue($guess) . ") because it scores highest in letter frequency.\n";
    say  " let's go!";
    say " -------------------";
  }
  # if it's our second or third try, here's a idea:
  # 
  # we know some of the first word may hit, but we know it may not have, lets
  # try and ignore the ones we know are good to elimiate a lot of letters.
  # 
  elsif($tries == 1 || $tries == 2) {
    @not_in_last_word = @words;
    for my $c (split('', @cl)) {
      @not_in_last_word = grep {not $_ =~ /$c/ } @not_in_last_word;
    }

    # again we are looking for letters, so stirp the letters!
    my @not_in_last_word = grep { uniqletters($_) } @not_in_last_word;

    # sort it by value again.
    @not_in_last_word = reverse sort wordvalue_sort @not_in_last_word;
     

    #this sucks, we should score all the words to find MAXIMALLY DIFFERENT ones
    #instead of giving up
    say "couldn't find a word uniq sorry";
    $guess = $not_in_last_word[0];
  }
  else {
    @words = reverse sort wordvalue_sort @words;
    $guess = $words[0];
  }

  # keep track of our tries.
  $tries++;

  # keep track of how many right letters
  my $corr = 0;

  # get a check results (requivelmnt to the colors after submitting a word on
  # the wordle site), and parse through them.
  for my $res (check($guess)) { 
    # @res = [word position, char, status (in the word, not in the word, correct)]

    # if correct, add it to the correct position array
    if($res->[2] == CORRECT) {
      $cl[$res->[0]] = $res->[1];
      $corr++;
      print GREEN " " . $res->[1] . " " . RESET;
    }
    # if its in a word, add to the iwl hash and store its position.
    elsif($res->[2] == IN_WORD) {
      print YELLOW " " . $res->[1] . " " . RESET; 
      $iwl{$res->[1]} = 1;
      push( @{ $iwp{ $res->[0] } }, $res->[1]);
    }
    # if its not in a word, add to the wl hash 
    elsif($res->[2] == WRONG ) {
      print RED " " . $res->[1] . " " .  RESET;
      $wl{$res->[1]} = 1;
    }
  }
  # if we're correct, we have found the right answer.
  if($corr == $wlen) {
    say "";
    last;
  }

  # rip out any words that match with wrong positions exactly, clunky as heck
  # but we basically go through iwp, which is an hash containing misses at that
  # position like
  # 0: a b c 
  # 1: a b c
  # 
  # meaning at pos 0, a b c are in the word but not that position.
  # and so forth 
  #
  # but if we put an "A" in the  first position and its in the wrong position
  # and in the word, we can eliminate all words that start with A.
  my $ignore = "";
  foreach (sort keys %iwp) {
    my $posstr = "";
    foreach my $array ($iwp{$_}) {
      $posstr = join('', uniq @$array);
    }
    if(length($posstr)) {
      $ignore .= "[^" . $posstr . "]";
    } else  {
      $ignore .= ".";
    }
  }

  if($ignore =~ /[a-z]/) {
    @words = grep { $_ =~ /$ignore/ } @words;
  }
  print "filtering: " . scalar  @words;
  # for dramatic effect
  # sleep 1; 
  print "...";


  # filter out any words we tried already.shouldnt happen unless our regexs are
  # failing
  #if($tries) {
  #  @words = grep { $_ ne $guess } @words;
  #  print scalar  @words;
  #}
  #sleep 1; 
  #print "...";

  # filter for letters in the answer only
  if(scalar keys %iwl) {
    for my $c(keys %iwl) {
      @words = grep { $_ =~ /$c/ } @words;
    }
  }
  print scalar  @words;
  # sleep 1;
  print "...";

  # if our guess has some correct letters, grep
  # for those letters in exact position.
  if($guess =~ /[^_]/ )  {
    my $pattern = join('', @cl);
    $pattern =~ s/_/./ig;
    @words = grep { $_ =~ /$pattern/ } @words;
  }
  print scalar  @words;
  # sleep 1;
  print "...";

  # filter out words with letters on the wrong
  # letter list
  if(scalar keys %wl) {
    my $pattern = '(' . join ('|', sort keys %wl) . ')';
    @words = grep { not $_ =~ /$pattern/ } @words;
  }
  print "" .  scalar @words;
  print "\n";

}

say "\nmy final answer is '$guess'\n";
say "it took me $tries/$wlen tries\n";
if($tries <= $wlen) {
  say "I won!";
  exit 0;
}
else{
  say "Sorry, I lose!";
  exit 1;
}
#  say "remaining:" . scalar @words;
#  if($corr == $wlen-1) {
#    say "final guess:" . join('', @cl) . " answer: $answer";
#    exit
#  }
#
sub check { 
  my $word = shift;
  my @res;


  for $i (0..length($word)-1){
    $char = substr($word, $i, 1);
    $achar = substr($answer, $i, 1);
    # check if in word at all
    if($achar eq $char) {
      push(@res, [$i, $char, CORRECT]);
    }
    elsif($answer =~  /$char/) {
      push(@res, [$i, $char, IN_WORD]);
    }
    else {
      push(@res, [$i, $char, WRONG]);
    }
  }
  return @res;
}

sub uniqletters {
  my $word = shift;

  my @chars = split('', $word);

  for my $char (@chars) {
    my $ct = ()= $word =~ m/[$char]/g;

    if($ct > 1) {
      return 0;
    }
  }
  return 1;
}


# okay so the premise here is to create a sort based on the frequency of a a
# letter... higher frequnency = higher initial hit = higher sort order.

sub wordvalue {
  my $w = shift;

  # my $freq = "etaoinsrhdlucmfywgpbvkxqjz";
  # reverse frequency, later numbers have higher value
  # general language english:
  # this should be calculated by length 
  my $f = "zjqxkvbpgwyfmculdhrsnioate";

  my $v = 1;
  for my $c (split('', $w)) {
    # this is basically bullshit, but sum the value against the index
    # position...
    $v += (index($f, $c));
  }
  return $v;
}

# i stole this from stackoverflow.
sub wordvalue_sort  {
  # use a lookup table.
  $av = $wordvalues{$a};
  $bv = $wordvalues{$b};

  if($av < $bv){
    return -1;
  }elsif($av == $bv){
    return 0;
  }else{
    return 1;
  }
}

sub letter_frequency {
  my $charfreqhash;
  for (@_) {
    for $char (split('', $_)) {
      $charfreqhash{$char}++;
    }
  }
  my @char_frequency = reverse sort { $charfreqhash{$b} <=> $charfreqhash{$a} } keys %charfreqhash;
  return join('', @char_frequency);
}