Experiments with Knuth's 5,757 five letter words. https://charlesreid1.com/wiki/Five_Letter_Words
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

#### 302 lines 7.6 KiB Raw Permalink Blame History

 ``````#!/usr/bin/env python from get_words import get_words import sys import math """ tries.py Donald Knuth, Art of Computer Programming, Volume 4 Fascicle 0 Exercise #35 Problem: What letters of the alphabet can be used as the starting letter of sixteen words that form a complete binary trie within WORDS(n), given n? Example trie: Left side: s h e o e l r w Right side: s t a e l r a e """ ALPHABET = "abcdefghijklmnopqrstuvwxyz" FIVE = 5 class Node(object): def __init__(self, letter, count=0): self.letter = letter self.count = count self.children = [] self.parent = None class TryTrieTree(object): def __init__(self,words): self.root = None self.words = words def __str__(self): final = "" depth = 1 runner = self.root def _str_recursive(runner,depth): # In order traversal: # visit this node first, # then visit children if any s = "" s += ">"*depth s += " " s += self.get_prefix_from_node(runner) s += runner.letter s += ": %d"%(runner.count) s += "\n" # Base case if runner.children == []: # leaf node return s # Recursive case else: for child in runner.children: s += _str_recursive(child,depth+1) return s final = _str_recursive(runner,depth) return final def set_root(self,root_letter): self.root = Node(root_letter) def get_prefix_from_node(self,node): """Given a node in the trie, return the string prefix that would lead to that node. """ if node==None: return "" elif node==self.root: return "" else: prefix = "" while node.parent != None: node = node.parent prefix = node.letter + prefix return prefix def get_node_from_prefix(self,prefix): """Given a string prefix, return the node that represents the tail end of that sequence of letters in this trie. Return None if the path does not exist. """ assert self.root!=None if prefix=='': return None assert prefix==self.root.letter # Base case if len(prefix)==1: return self.root # Recursive case parent_prefix, suffix = prefix[:len(prefix)-1],prefix[len(prefix)-1] parent = self.get_node_from_prefix(parent_prefix) for child in parent.children: if child.letter == suffix: return child # We know this will end because we handle # the base case of prefix="", and prefix # is cut down by one letter each iteration. def assemble(self): """Assemble the trie from the set of words passed to the constructor. """ assert self.root!=None words = self.words # start with an empty prefix prefix = '' candidate = self.root.letter self._assemble(prefix,candidate,words) def _assemble(self,prefix,candidate,words): """Recursive private method called by assemble(). """ prefix_depth = len(prefix) candidate_depth = prefix_depth+1 ppc = prefix+candidate words_with_candidate = [w for w in words if w[:candidate_depth]==ppc] min_branches_req = int(math.pow(2,5-candidate_depth)) max_number_branches = len(words_with_candidate) # If we exceed the minimum number of # branches required, add candidate # as a new node on the trie. if max_number_branches >= min_branches_req: parent = self.get_node_from_prefix(prefix) # If we are looking at the root node, if prefix=='': # parent will be None. # In this case don't worry about # creating new child or introducing # parent and child, b/c the "new child" # is the root (already exists). pass else: # Otherwise, create the new child, # and introduce the parent & child. new_child = Node(candidate) new_child.parent = parent parent.children.append(new_child) # Base case if candidate_depth==4: new_child.count = max_number_branches return # Recursive case for new_candidate in ALPHABET: new_prefix = prefix + candidate self._assemble(new_prefix,new_candidate,words_with_candidate) # otherwise, we don't have enough # branches to continue downward, # so stop here and do nothing. return def bubble_up(self): """Do a depth-first traversal of the entire trytrietree, pruning as we go. This is a pre-order traversal, meaning we traverse children first, then the parents, so we always know the counts of children (or we are on a leaf node). """ self._bubble_up(self.root) def _bubble_up(self,node): """Pre-order depth-first traversal starting at the leaf nodes and proceeding upwards. """ if len(node.children)==0: # Base case # Leaf nodes already have counts # Do nothing return else: # Recursive case # Pre-order traversal: visit/bubble up children first for child in node.children: self._bubble_up(child) # Now that we've completed leaf node counts, we can do interior node counts. # Interior node counts are equal to number of large (>=2) children. large_children = [child for child in node.children if child.count >= 2] node.count = len(large_children) def trie_search(n, verbose=False): words = get_words() words = words[:n] perfect_count = 0 imperfect_count = 0 for letter in ALPHABET: tree = TryTrieTree(words) tree.set_root(letter) tree.assemble() tree.bubble_up() #print(tree) if tree.root.count >= 2: if verbose: print("The letter {0:s} has a perfect binary trie in WORDS({1:d}).".format( letter, n)) perfect_count += 1 else: if verbose: print("The letter {0:s} has no perfect binary trie in WORDS({1:d}).".format( letter, n)) imperfect_count += 1 if verbose: print("") print("Perfect count: {:d}".format(perfect_count)) print("Imperfect count: {:d}".format(imperfect_count)) return perfect_count, imperfect_count def trie_table(): """Compute and print a table of number of words n versus number of perfect tries formed. """ print("%8s\t%8s"%("n","perfect tries")) ns = range(1000,5757,500) for n in ns: p,i = trie_search(n) print("%8d\t%8d"%(n,p)) n = 5757 p,i = trie_search(n) print("%8d\t%8d"%(n,p)) if __name__=="__main__": if len(sys.argv)<2: n = 5757 else: n = int(sys.argv) if n > 5757: n = 5757 _,_ = trie_search(n, verbose=True) #trie_table() ``````