Experiments with Knuth's 5,757 five letter words. https://charlesreid1.com/wiki/Five_Letter_Words
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

302 lines
7.6KB

  1. #!/usr/bin/env python
  2. from get_words import get_words
  3. import sys
  4. import math
  5. """
  6. tries.py
  7. Donald Knuth, Art of Computer Programming, Volume 4 Fascicle 0
  8. Exercise #35
  9. Problem:
  10. What letters of the alphabet can be used
  11. as the starting letter of sixteen words that
  12. form a complete binary trie within
  13. WORDS(n), given n?
  14. Example trie:
  15. Left side:
  16. s
  17. h
  18. e o
  19. e l r w
  20. Right side:
  21. s
  22. t
  23. a e
  24. l r a e
  25. """
  26. ALPHABET = "abcdefghijklmnopqrstuvwxyz"
  27. FIVE = 5
  28. class Node(object):
  29. def __init__(self, letter, count=0):
  30. self.letter = letter
  31. self.count = count
  32. self.children = []
  33. self.parent = None
  34. class TryTrieTree(object):
  35. def __init__(self,words):
  36. self.root = None
  37. self.words = words
  38. def __str__(self):
  39. final = ""
  40. depth = 1
  41. runner = self.root
  42. def _str_recursive(runner,depth):
  43. # In order traversal:
  44. # visit this node first,
  45. # then visit children if any
  46. s = ""
  47. s += ">"*depth
  48. s += " "
  49. s += self.get_prefix_from_node(runner)
  50. s += runner.letter
  51. s += ": %d"%(runner.count)
  52. s += "\n"
  53. # Base case
  54. if runner.children == []:
  55. # leaf node
  56. return s
  57. # Recursive case
  58. else:
  59. for child in runner.children:
  60. s += _str_recursive(child,depth+1)
  61. return s
  62. final = _str_recursive(runner,depth)
  63. return final
  64. def set_root(self,root_letter):
  65. self.root = Node(root_letter)
  66. def get_prefix_from_node(self,node):
  67. """Given a node in the trie,
  68. return the string prefix that
  69. would lead to that node.
  70. """
  71. if node==None:
  72. return ""
  73. elif node==self.root:
  74. return ""
  75. else:
  76. prefix = ""
  77. while node.parent != None:
  78. node = node.parent
  79. prefix = node.letter + prefix
  80. return prefix
  81. def get_node_from_prefix(self,prefix):
  82. """Given a string prefix,
  83. return the node that represents
  84. the tail end of that sequence
  85. of letters in this trie. Return
  86. None if the path does not exist.
  87. """
  88. assert self.root!=None
  89. if prefix=='':
  90. return None
  91. assert prefix[0]==self.root.letter
  92. # Base case
  93. if len(prefix)==1:
  94. return self.root
  95. # Recursive case
  96. parent_prefix, suffix = prefix[:len(prefix)-1],prefix[len(prefix)-1]
  97. parent = self.get_node_from_prefix(parent_prefix)
  98. for child in parent.children:
  99. if child.letter == suffix:
  100. return child
  101. # We know this will end because we handle
  102. # the base case of prefix="", and prefix
  103. # is cut down by one letter each iteration.
  104. def assemble(self):
  105. """Assemble the trie from the set of words
  106. passed to the constructor.
  107. """
  108. assert self.root!=None
  109. words = self.words
  110. # start with an empty prefix
  111. prefix = ''
  112. candidate = self.root.letter
  113. self._assemble(prefix,candidate,words)
  114. def _assemble(self,prefix,candidate,words):
  115. """Recursive private method called by assemble().
  116. """
  117. prefix_depth = len(prefix)
  118. candidate_depth = prefix_depth+1
  119. ppc = prefix+candidate
  120. words_with_candidate = [w for w in words if w[:candidate_depth]==ppc]
  121. min_branches_req = int(math.pow(2,5-candidate_depth))
  122. max_number_branches = len(words_with_candidate)
  123. # If we exceed the minimum number of
  124. # branches required, add candidate
  125. # as a new node on the trie.
  126. if max_number_branches >= min_branches_req:
  127. parent = self.get_node_from_prefix(prefix)
  128. # If we are looking at the root node,
  129. if prefix=='':
  130. # parent will be None.
  131. # In this case don't worry about
  132. # creating new child or introducing
  133. # parent and child, b/c the "new child"
  134. # is the root (already exists).
  135. pass
  136. else:
  137. # Otherwise, create the new child,
  138. # and introduce the parent & child.
  139. new_child = Node(candidate)
  140. new_child.parent = parent
  141. parent.children.append(new_child)
  142. # Base case
  143. if candidate_depth==4:
  144. new_child.count = max_number_branches
  145. return
  146. # Recursive case
  147. for new_candidate in ALPHABET:
  148. new_prefix = prefix + candidate
  149. self._assemble(new_prefix,new_candidate,words_with_candidate)
  150. # otherwise, we don't have enough
  151. # branches to continue downward,
  152. # so stop here and do nothing.
  153. return
  154. def bubble_up(self):
  155. """Do a depth-first traversal of the
  156. entire trytrietree, pruning as we go.
  157. This is a pre-order traversal,
  158. meaning we traverse children first,
  159. then the parents, so we always
  160. know the counts of children
  161. (or we are on a leaf node).
  162. """
  163. self._bubble_up(self.root)
  164. def _bubble_up(self,node):
  165. """Pre-order depth-first traversal
  166. starting at the leaf nodes and proceeding
  167. upwards.
  168. """
  169. if len(node.children)==0:
  170. # Base case
  171. # Leaf nodes already have counts
  172. # Do nothing
  173. return
  174. else:
  175. # Recursive case
  176. # Pre-order traversal: visit/bubble up children first
  177. for child in node.children:
  178. self._bubble_up(child)
  179. # Now that we've completed leaf node counts, we can do interior node counts.
  180. # Interior node counts are equal to number of large (>=2) children.
  181. large_children = [child for child in node.children if child.count >= 2]
  182. node.count = len(large_children)
  183. def trie_search(n, verbose=False):
  184. words = get_words()
  185. words = words[:n]
  186. perfect_count = 0
  187. imperfect_count = 0
  188. for letter in ALPHABET:
  189. tree = TryTrieTree(words)
  190. tree.set_root(letter)
  191. tree.assemble()
  192. tree.bubble_up()
  193. #print(tree)
  194. if tree.root.count >= 2:
  195. if verbose:
  196. print("The letter {0:s} has a perfect binary trie in WORDS({1:d}).".format(
  197. letter, n))
  198. perfect_count += 1
  199. else:
  200. if verbose:
  201. print("The letter {0:s} has no perfect binary trie in WORDS({1:d}).".format(
  202. letter, n))
  203. imperfect_count += 1
  204. if verbose:
  205. print("")
  206. print("Perfect count: {:d}".format(perfect_count))
  207. print("Imperfect count: {:d}".format(imperfect_count))
  208. return perfect_count, imperfect_count
  209. def trie_table():
  210. """Compute and print a table of
  211. number of words n versus number of
  212. perfect tries formed.
  213. """
  214. print("%8s\t%8s"%("n","perfect tries"))
  215. ns = range(1000,5757,500)
  216. for n in ns:
  217. p,i = trie_search(n)
  218. print("%8d\t%8d"%(n,p))
  219. n = 5757
  220. p,i = trie_search(n)
  221. print("%8d\t%8d"%(n,p))
  222. if __name__=="__main__":
  223. if len(sys.argv)<2:
  224. n = 5757
  225. else:
  226. n = int(sys.argv[1])
  227. if n > 5757:
  228. n = 5757
  229. _,_ = trie_search(n, verbose=True)
  230. #trie_table()