""" word_coverage.py Compute the minimum number of words from five-letter-words needed to cover N letters from the alphabet. This can be done in O(N^2) time with a dynamic program. For each word that we choose, we have to look at all other words to see how many letters those two cover, combined. https://charlesreid1.com/wiki/Five_Letter_Words https://charlesreid1.com/wiki/Letter_Coverage """ from get_words import get_words import numpy as np from pprint import pprint def word2bitvector(word,N): """ Turns a five-letter word into a bit vector representing character coverage. Uses 26 letters by default. """ bit_vector = [False,]*N for c in word: i = ord(c)-ord('a') try: bit_vector[i] = True except IndexError: pass return np.array(bit_vector) def printbv(bv): """ Pretty printing for boolean bit vector """ result = "" for bit in bv: if bit: result += "1" else: result += "0" return result def btsolution(min_key, min_val, words, bt): """ Reconstruct the sequence of words that gives maximum coverage and minimum word count. Input: minimum word key (last word), minimum value (number of words), backtrack (prior word) Output: list of words """ solution = [] solution.append(words[min_key]) prior_key = bt[min_key] while prior_key != -1: solution.append(words[prior_key]) prior_key = bt[prior_key] return reversed(solution) def get_dummy_words(): return ["aa","ab","bc","aa","dd","de","bb"] if __name__=="__main__": # Searching for words covering first N letters N = 15 words = get_words() words = words[:1000] # Initialization: # ---------------- # Store best coverage bitvectors for each word bestcoverage_bv = [np.array([False]*N) for k in range(len(words))] # Store number of 1s for best coverage vector for each word ones_bv = [-1]*len(words) # Store number of words in best solution for each word ws = [0]*len(words) # Store prior word for backtracking bt = [-1]*len(words) # Fencepost: Initial Step # Word 0 # ---------------- i = 0 # Start with word 0 wi = words[i] # Best letter coverage bit vector bestcoverage_bv[i] = word2bitvector(words[i],N) # Length of 1s ones_bv[i] = sum(bestcoverage_bv[i]) # Number of words in best solution: ws[i] = 1 # Backtracking: first word has no prior word bt[i] = -1 # Start by assuming the word by itself, # and then examine each possible pairing for i in range(1,len(words)): wi = words[i] # Start with bitvector of word i's coverage wi_bv = word2bitvector(wi,N) # Fencepost: initial step # Word i by itself # Assume word i is the first word in the solution, # and if we find a better combination with prior word, # overwrite this solution. # ------------------------ # Best coverage so far (first guess) is word i by itself bestcoverage_bv[i] = wi_bv # Count ones in (first guess) best bitvector ones_bv[i] = sum(bestcoverage_bv[i]) # Number of words in new best solution: ws[i] = 1 # Backtracking bt[i] = -1 # Boolean: is this the first word in the sequence of solutions? first = True # Now loop over the rest of the words, # and look for a better solution. for j in reversed(range(0,i)): # Get the prior word wj = words[j] # Get best coverage bitvector wj_bv = bestcoverage_bv[j] # (potential) new combined coverage vector bestcoverage_bv_i = np.logical_or(wi_bv, wj_bv) # Number of ones in (potential) new combined coverage vector ones_bv_i = sum(bestcoverage_bv_i) # Number of words in (potential) new best solution ws_i = ws[j]+1 # If this solution is better than our current one, # overwrite the current solution. # (Better means, "more ones", or "same ones and fewer words".) #import pdb; pdb.set_trace(); if( (ones_bv_i > ones_bv[i]) or (ones_bv_i==ones_bv[i] and ws_i < ws[i]) ): bestcoverage_bv[i] = bestcoverage_bv_i ones_bv[i] = ones_bv_i ws[i] = ws_i bt[i] = j # This word now follows another word in the sequence of solutions first = False # It's tempting to stop early, # but what if we find the perfect # solution right at the end?!? # Okay, now actually get the solution. # The solution is the maximum of ones_bv and the minimum of ws # # Start by finding the maximum(s) of ones_bv # Then check each corresponding index of ws ones_bv_indices = [k for k,v in enumerate(ones_bv) if v==max(ones_bv)] min_key = ones_bv_indices[0] min_val = ones_bv[ones_bv_indices[0]] for ix in reversed(ones_bv_indices[1:]): if(ones_bv[ix] < min_key): min_key = ix min_val = ones_bv[ix] solution = list(btsolution(min_key, min_val, words, bt)) print("Takes "+str(len(solution))+" words to cover "+str(N)+" letters") pprint(solution)