Browse Source

Merge pull request #1 from charlesreid1/try-trie-tree

Try trie tree
try-trie-tree
Charles Reid 6 years ago committed by GitHub
parent
commit
b45fda3052
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 16
      README.md
  2. 301
      tries.py

16
README.md

@ -21,7 +21,7 @@ the SGB five letter word list. Solutions to these exercises are listed below. @@ -21,7 +21,7 @@ the SGB five letter word list. Solutions to these exercises are listed below.
```distinct.py```- computes the number of SGB words containing exactly k distinct letters.
```diff_by_one_fixed.py``` - (fixed 2019-03-09.) computes the number of words in the SGB
```diff_by_one_fixed.py``` - (**fixed 2019-03-09**) computes the number of words in the SGB
that are off by a single letter in each position. An example is `rover` and `spuds`.
Each corresponding letter is only different by one: `r -> s`, `o->p`, and so on.
This uses recursive backtracking to generate possible matches for each word, and
@ -29,13 +29,15 @@ uses a hash table to check for their existence in the original word set. @@ -29,13 +29,15 @@ uses a hash table to check for their existence in the original word set.
There are 38 such pairs in the SGB.
Also see [https://charlesreid1.com/wiki/F](https://charlesreid1.com/wiki/Letter_Coverage)..
Also see [Five Letter Words](https://charlesreid1.com/wiki/Five_Letter_Words)
on the charlesreid1.com wiki.
```diff_by_n_fixed.py``` - (added 2019-03-10.) using the corrected approach (above) to
computing differences by 1, this generalizes the calculation to words that are different
by a distance `d` for each letter position. There
by a distance `d` for each letter position.
Also see [Five Letter Words: Part 4: Revisiting Diff by One](https://charlesreid1.github.io/five-letter-words-part-4-revisiting-diff-by-one.html)
(blog post) on [charlesreid1.github.io](https://charlesreid1.github.io).
```euclidean_distance.py``` - computes the euclidean distance between two words. This uses
the traditional Euclidean distance definition but reinterprets distance to mean edit distance.
@ -46,7 +48,7 @@ the traditional Euclidean distance definition but reinterprets distance to mean @@ -46,7 +48,7 @@ the traditional Euclidean distance definition but reinterprets distance to mean
### Variations
```diff_by_n.py``` - computes words in SGB that have an edit distnace of n.
```diff_by_n.py``` - computes words in SGB that have an edit distance of n.
```reverse_lexico.py``` - variation on ```lexico.py``` that finds words whose letters are in
reverse lexicographic order.
@ -61,7 +63,8 @@ of the alphabet. We authored a dynamic program to compute precisely this - given @@ -61,7 +63,8 @@ of the alphabet. We authored a dynamic program to compute precisely this - given
number of letters N from the alphabet, this program computes the minimum number of
words it takes to cover all N letters.
Also see [https://charlesreid1.com/wiki/Letter_Coverage](https://charlesreid1.com/wiki/Letter_Coverage)..
Also see [Letter Coverage](https://charlesreid1.com/wiki/Letter_Coverage)
page on the charlesreid1.com wiki.
# Sources
@ -69,4 +72,3 @@ Also see [https://charlesreid1.com/wiki/Letter_Coverage](https://charlesreid1.co @@ -69,4 +72,3 @@ Also see [https://charlesreid1.com/wiki/Letter_Coverage](https://charlesreid1.co
<[http://www-cs-faculty.stanford.edu/~knuth/sgb.html](http://www-cs-faculty.stanford.edu/~knuth/sgb.html)>

301
tries.py

@ -0,0 +1,301 @@ @@ -0,0 +1,301 @@
#!/usr/bin/env python
from get_words import get_words
import sys
import math
"""
tries.py
Donald Knuth, Art of Computer Programming, Volume 4 Fascicle 0
Exercise #35
Problem:
What letters of the alphabet can be used
as the starting letter of sixteen words that
form a complete binary trie within
WORDS(n), given n?
Example trie:
Left side:
s
h
e o
e l r w
Right side:
s
t
a e
l r a e
"""
ALPHABET = "abcdefghijklmnopqrstuvwxyz"
FIVE = 5
class Node(object):
def __init__(self, letter, count=0):
self.letter = letter
self.count = count
self.children = []
self.parent = None
class TryTrieTree(object):
def __init__(self,words):
self.root = None
self.words = words
def __str__(self):
final = ""
depth = 1
runner = self.root
def _str_recursive(runner,depth):
# In order traversal:
# visit this node first,
# then visit children if any
s = ""
s += ">"*depth
s += " "
s += self.get_prefix_from_node(runner)
s += runner.letter
s += ": %d"%(runner.count)
s += "\n"
# Base case
if runner.children == []:
# leaf node
return s
# Recursive case
else:
for child in runner.children:
s += _str_recursive(child,depth+1)
return s
final = _str_recursive(runner,depth)
return final
def set_root(self,root_letter):
self.root = Node(root_letter)
def get_prefix_from_node(self,node):
"""Given a node in the trie,
return the string prefix that
would lead to that node.
"""
if node==None:
return ""
elif node==self.root:
return ""
else:
prefix = ""
while node.parent != None:
node = node.parent
prefix = node.letter + prefix
return prefix
def get_node_from_prefix(self,prefix):
"""Given a string prefix,
return the node that represents
the tail end of that sequence
of letters in this trie. Return
None if the path does not exist.
"""
assert self.root!=None
if prefix=='':
return None
assert prefix[0]==self.root.letter
# Base case
if len(prefix)==1:
return self.root
# Recursive case
parent_prefix, suffix = prefix[:len(prefix)-1],prefix[len(prefix)-1]
parent = self.get_node_from_prefix(parent_prefix)
for child in parent.children:
if child.letter == suffix:
return child
# We know this will end because we handle
# the base case of prefix="", and prefix
# is cut down by one letter each iteration.
def assemble(self):
"""Assemble the trie from the set of words
passed to the constructor.
"""
assert self.root!=None
words = self.words
# start with an empty prefix
prefix = ''
candidate = self.root.letter
self._assemble(prefix,candidate,words)
def _assemble(self,prefix,candidate,words):
"""Recursive private method called by assemble().
"""
prefix_depth = len(prefix)
candidate_depth = prefix_depth+1
ppc = prefix+candidate
words_with_candidate = [w for w in words if w[:candidate_depth]==ppc]
min_branches_req = int(math.pow(2,5-candidate_depth))
max_number_branches = len(words_with_candidate)
# If we exceed the minimum number of
# branches required, add candidate
# as a new node on the trie.
if max_number_branches >= min_branches_req:
parent = self.get_node_from_prefix(prefix)
# If we are looking at the root node,
if prefix=='':
# parent will be None.
# In this case don't worry about
# creating new child or introducing
# parent and child, b/c the "new child"
# is the root (already exists).
pass
else:
# Otherwise, create the new child,
# and introduce the parent & child.
new_child = Node(candidate)
new_child.parent = parent
parent.children.append(new_child)
# Base case
if candidate_depth==4:
new_child.count = max_number_branches
return
# Recursive case
for new_candidate in ALPHABET:
new_prefix = prefix + candidate
self._assemble(new_prefix,new_candidate,words_with_candidate)
# otherwise, we don't have enough
# branches to continue downward,
# so stop here and do nothing.
return
def bubble_up(self):
"""Do a depth-first traversal of the
entire trytrietree, pruning as we go.
This is a pre-order traversal,
meaning we traverse children first,
then the parents, so we always
know the counts of children
(or we are on a leaf node).
"""
self._bubble_up(self.root)
def _bubble_up(self,node):
"""Pre-order depth-first traversal
starting at the leaf nodes and proceeding
upwards.
"""
if len(node.children)==0:
# Base case
# Leaf nodes already have counts
# Do nothing
return
else:
# Recursive case
# Pre-order traversal: visit/bubble up children first
for child in node.children:
self._bubble_up(child)
# Now that we've completed leaf node counts, we can do interior node counts.
# Interior node counts are equal to number of large (>=2) children.
large_children = [child for child in node.children if child.count >= 2]
node.count = len(large_children)
def trie_search(n, verbose=False):
words = get_words()
words = words[:n]
perfect_count = 0
imperfect_count = 0
for letter in ALPHABET:
tree = TryTrieTree(words)
tree.set_root(letter)
tree.assemble()
tree.bubble_up()
#print(tree)
if tree.root.count >= 2:
if verbose:
print("The letter {0:s} has a perfect binary trie in WORDS({1:d}).".format(
letter, n))
perfect_count += 1
else:
if verbose:
print("The letter {0:s} has no perfect binary trie in WORDS({1:d}).".format(
letter, n))
imperfect_count += 1
if verbose:
print("")
print("Perfect count: {:d}".format(perfect_count))
print("Imperfect count: {:d}".format(imperfect_count))
return perfect_count, imperfect_count
def trie_table():
"""Compute and print a table of
number of words n versus number of
perfect tries formed.
"""
print("%8s\t%8s"%("n","perfect tries"))
ns = range(1000,5757,500)
for n in ns:
p,i = trie_search(n)
print("%8d\t%8d"%(n,p))
n = 5757
p,i = trie_search(n)
print("%8d\t%8d"%(n,p))
if __name__=="__main__":
if len(sys.argv)<2:
n = 5757
else:
n = int(sys.argv[1])
if n > 5757:
n = 5757
_,_ = trie_search(n, verbose=True)
#trie_table()
Loading…
Cancel
Save