Browse Source

Merge pull request #1 from charlesreid1/try-trie-tree

Try trie tree
Charles Reid GitHub 2 years ago
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 310 additions and 7 deletions
  1. +9
  2. +301

+ 9
- 7 View File

@@ -21,7 +21,7 @@ the SGB five letter word list. Solutions to these exercises are listed below.

``````- computes the number of SGB words containing exactly k distinct letters.

`````` - (fixed 2019-03-09.) computes the number of words in the SGB
`````` - (**fixed 2019-03-09**) computes the number of words in the SGB
that are off by a single letter in each position. An example is `rover` and `spuds`.
Each corresponding letter is only different by one: `r -> s`, `o->p`, and so on.
This uses recursive backtracking to generate possible matches for each word, and
@@ -29,13 +29,15 @@ uses a hash table to check for their existence in the original word set.

There are 38 such pairs in the SGB.

Also see [](
Also see [Five Letter Words](
on the wiki.

`````` - (added 2019-03-10.) using the corrected approach (above) to
computing differences by 1, this generalizes the calculation to words that are different
by a distance `d` for each letter position. There

by a distance `d` for each letter position.

Also see [Five Letter Words: Part 4: Revisiting Diff by One](
(blog post) on [](

`````` - computes the euclidean distance between two words. This uses
the traditional Euclidean distance definition but reinterprets distance to mean edit distance.
@@ -46,7 +48,7 @@ the traditional Euclidean distance definition but reinterprets distance to mean

### Variations

`````` - computes words in SGB that have an edit distnace of n.
`````` - computes words in SGB that have an edit distance of n.

`````` - variation on `````` that finds words whose letters are in
reverse lexicographic order.
@@ -61,7 +63,8 @@ of the alphabet. We authored a dynamic program to compute precisely this - given
number of letters N from the alphabet, this program computes the minimum number of
words it takes to cover all N letters.

Also see [](
Also see [Letter Coverage](
page on the wiki.

# Sources

@@ -69,4 +72,3 @@ Also see [](

+ 301
- 0 View File

@@ -0,0 +1,301 @@
#!/usr/bin/env python
from get_words import get_words
import sys
import math


Donald Knuth, Art of Computer Programming, Volume 4 Fascicle 0
Exercise #35

What letters of the alphabet can be used
as the starting letter of sixteen words that
form a complete binary trie within
WORDS(n), given n?

Example trie:

Left side:
e o
e l r w

Right side:

a e
l r a e


ALPHABET = "abcdefghijklmnopqrstuvwxyz"
FIVE = 5

class Node(object):
def __init__(self, letter, count=0):
self.letter = letter
self.count = count
self.children = []
self.parent = None

class TryTrieTree(object):
def __init__(self,words):
self.root = None
self.words = words

def __str__(self):
final = ""
depth = 1
runner = self.root

def _str_recursive(runner,depth):
# In order traversal:
# visit this node first,
# then visit children if any
s = ""
s += ">"*depth
s += " "
s += self.get_prefix_from_node(runner)
s += runner.letter
s += ": %d"%(runner.count)
s += "\n"

# Base case
if runner.children == []:
# leaf node
return s

# Recursive case
for child in runner.children:
s += _str_recursive(child,depth+1)
return s

final = _str_recursive(runner,depth)
return final

def set_root(self,root_letter):
self.root = Node(root_letter)

def get_prefix_from_node(self,node):
"""Given a node in the trie,
return the string prefix that
would lead to that node.
if node==None:
return ""
elif node==self.root:
return ""
prefix = ""
while node.parent != None:
node = node.parent
prefix = node.letter + prefix
return prefix

def get_node_from_prefix(self,prefix):
"""Given a string prefix,
return the node that represents
the tail end of that sequence
of letters in this trie. Return
None if the path does not exist.
assert self.root!=None

if prefix=='':
return None

assert prefix[0]==self.root.letter

# Base case
if len(prefix)==1:
return self.root

# Recursive case
parent_prefix, suffix = prefix[:len(prefix)-1],prefix[len(prefix)-1]
parent = self.get_node_from_prefix(parent_prefix)
for child in parent.children:
if child.letter == suffix:
return child

# We know this will end because we handle
# the base case of prefix="", and prefix
# is cut down by one letter each iteration.

def assemble(self):
"""Assemble the trie from the set of words
passed to the constructor.
assert self.root!=None

words = self.words

# start with an empty prefix
prefix = ''
candidate = self.root.letter

def _assemble(self,prefix,candidate,words):
"""Recursive private method called by assemble().
prefix_depth = len(prefix)
candidate_depth = prefix_depth+1

ppc = prefix+candidate
words_with_candidate = [w for w in words if w[:candidate_depth]==ppc]

min_branches_req = int(math.pow(2,5-candidate_depth))
max_number_branches = len(words_with_candidate)

# If we exceed the minimum number of
# branches required, add candidate
# as a new node on the trie.
if max_number_branches >= min_branches_req:

parent = self.get_node_from_prefix(prefix)
# If we are looking at the root node,
if prefix=='':
# parent will be None.
# In this case don't worry about
# creating new child or introducing
# parent and child, b/c the "new child"
# is the root (already exists).

# Otherwise, create the new child,
# and introduce the parent & child.
new_child = Node(candidate)
new_child.parent = parent

# Base case
if candidate_depth==4:
new_child.count = max_number_branches

# Recursive case
for new_candidate in ALPHABET:
new_prefix = prefix + candidate

# otherwise, we don't have enough
# branches to continue downward,
# so stop here and do nothing.

def bubble_up(self):
"""Do a depth-first traversal of the
entire trytrietree, pruning as we go.
This is a pre-order traversal,
meaning we traverse children first,
then the parents, so we always
know the counts of children
(or we are on a leaf node).

def _bubble_up(self,node):
"""Pre-order depth-first traversal
starting at the leaf nodes and proceeding
if len(node.children)==0:
# Base case
# Leaf nodes already have counts
# Do nothing

# Recursive case
# Pre-order traversal: visit/bubble up children first
for child in node.children:

# Now that we've completed leaf node counts, we can do interior node counts.
# Interior node counts are equal to number of large (>=2) children.
large_children = [child for child in node.children if child.count >= 2]
node.count = len(large_children)

def trie_search(n, verbose=False):

words = get_words()
words = words[:n]

perfect_count = 0
imperfect_count = 0
for letter in ALPHABET:

tree = TryTrieTree(words)

if tree.root.count >= 2:

if verbose:
print("The letter {0:s} has a perfect binary trie in WORDS({1:d}).".format(
letter, n))
perfect_count += 1


if verbose:
print("The letter {0:s} has no perfect binary trie in WORDS({1:d}).".format(
letter, n))
imperfect_count += 1

if verbose:
print("Perfect count: {:d}".format(perfect_count))
print("Imperfect count: {:d}".format(imperfect_count))

return perfect_count, imperfect_count

def trie_table():
"""Compute and print a table of
number of words n versus number of
perfect tries formed.
print("%8s\t%8s"%("n","perfect tries"))

ns = range(1000,5757,500)
for n in ns:
p,i = trie_search(n)

n = 5757
p,i = trie_search(n)

if __name__=="__main__":
if len(sys.argv)<2:
n = 5757
n = int(sys.argv[1])
if n > 5757:
n = 5757

_,_ = trie_search(n, verbose=True)