Browse Source

Merge pull request #1 from charlesreid1/try-trie-tree

Try trie tree
master
Charles Reid GitHub 1 year ago
parent
commit
b45fda3052
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 310 additions and 7 deletions
  1. +9
    -7
      README.md
  2. +301
    -0
      tries.py

+ 9
- 7
README.md View File

@@ -21,7 +21,7 @@ the SGB five letter word list. Solutions to these exercises are listed below.

```distinct.py```- computes the number of SGB words containing exactly k distinct letters.

```diff_by_one_fixed.py``` - (fixed 2019-03-09.) computes the number of words in the SGB
```diff_by_one_fixed.py``` - (**fixed 2019-03-09**) computes the number of words in the SGB
that are off by a single letter in each position. An example is `rover` and `spuds`.
Each corresponding letter is only different by one: `r -> s`, `o->p`, and so on.
This uses recursive backtracking to generate possible matches for each word, and
@@ -29,13 +29,15 @@ uses a hash table to check for their existence in the original word set.

There are 38 such pairs in the SGB.

Also see [https://charlesreid1.com/wiki/F](https://charlesreid1.com/wiki/Letter_Coverage)..
Also see [Five Letter Words](https://charlesreid1.com/wiki/Five_Letter_Words)
on the charlesreid1.com wiki.

```diff_by_n_fixed.py``` - (added 2019-03-10.) using the corrected approach (above) to
computing differences by 1, this generalizes the calculation to words that are different
by a distance `d` for each letter position. There

by a distance `d` for each letter position.

Also see [Five Letter Words: Part 4: Revisiting Diff by One](https://charlesreid1.github.io/five-letter-words-part-4-revisiting-diff-by-one.html)
(blog post) on [charlesreid1.github.io](https://charlesreid1.github.io).

```euclidean_distance.py``` - computes the euclidean distance between two words. This uses
the traditional Euclidean distance definition but reinterprets distance to mean edit distance.
@@ -46,7 +48,7 @@ the traditional Euclidean distance definition but reinterprets distance to mean

### Variations

```diff_by_n.py``` - computes words in SGB that have an edit distnace of n.
```diff_by_n.py``` - computes words in SGB that have an edit distance of n.

```reverse_lexico.py``` - variation on ```lexico.py``` that finds words whose letters are in
reverse lexicographic order.
@@ -61,7 +63,8 @@ of the alphabet. We authored a dynamic program to compute precisely this - given
number of letters N from the alphabet, this program computes the minimum number of
words it takes to cover all N letters.

Also see [https://charlesreid1.com/wiki/Letter_Coverage](https://charlesreid1.com/wiki/Letter_Coverage)..
Also see [Letter Coverage](https://charlesreid1.com/wiki/Letter_Coverage)
page on the charlesreid1.com wiki.

# Sources

@@ -69,4 +72,3 @@ Also see [https://charlesreid1.com/wiki/Letter_Coverage](https://charlesreid1.co
<[http://www-cs-faculty.stanford.edu/~knuth/sgb.html](http://www-cs-faculty.stanford.edu/~knuth/sgb.html)>




+ 301
- 0
tries.py View File

@@ -0,0 +1,301 @@
#!/usr/bin/env python
from get_words import get_words
import sys
import math

"""
tries.py

Donald Knuth, Art of Computer Programming, Volume 4 Fascicle 0
Exercise #35

Problem:
What letters of the alphabet can be used
as the starting letter of sixteen words that
form a complete binary trie within
WORDS(n), given n?

Example trie:

Left side:
s
h
e o
e l r w

Right side:

s
t
a e
l r a e

"""


ALPHABET = "abcdefghijklmnopqrstuvwxyz"
FIVE = 5


class Node(object):
def __init__(self, letter, count=0):
self.letter = letter
self.count = count
self.children = []
self.parent = None


class TryTrieTree(object):
def __init__(self,words):
self.root = None
self.words = words

def __str__(self):
final = ""
depth = 1
runner = self.root

def _str_recursive(runner,depth):
# In order traversal:
# visit this node first,
# then visit children if any
s = ""
s += ">"*depth
s += " "
s += self.get_prefix_from_node(runner)
s += runner.letter
s += ": %d"%(runner.count)
s += "\n"

# Base case
if runner.children == []:
# leaf node
return s

# Recursive case
else:
for child in runner.children:
s += _str_recursive(child,depth+1)
return s

final = _str_recursive(runner,depth)
return final


def set_root(self,root_letter):
self.root = Node(root_letter)


def get_prefix_from_node(self,node):
"""Given a node in the trie,
return the string prefix that
would lead to that node.
"""
if node==None:
return ""
elif node==self.root:
return ""
else:
prefix = ""
while node.parent != None:
node = node.parent
prefix = node.letter + prefix
return prefix


def get_node_from_prefix(self,prefix):
"""Given a string prefix,
return the node that represents
the tail end of that sequence
of letters in this trie. Return
None if the path does not exist.
"""
assert self.root!=None

if prefix=='':
return None

assert prefix[0]==self.root.letter

# Base case
if len(prefix)==1:
return self.root

# Recursive case
parent_prefix, suffix = prefix[:len(prefix)-1],prefix[len(prefix)-1]
parent = self.get_node_from_prefix(parent_prefix)
for child in parent.children:
if child.letter == suffix:
return child

# We know this will end because we handle
# the base case of prefix="", and prefix
# is cut down by one letter each iteration.


def assemble(self):
"""Assemble the trie from the set of words
passed to the constructor.
"""
assert self.root!=None

words = self.words

# start with an empty prefix
prefix = ''
candidate = self.root.letter
self._assemble(prefix,candidate,words)


def _assemble(self,prefix,candidate,words):
"""Recursive private method called by assemble().
"""
prefix_depth = len(prefix)
candidate_depth = prefix_depth+1

ppc = prefix+candidate
words_with_candidate = [w for w in words if w[:candidate_depth]==ppc]

min_branches_req = int(math.pow(2,5-candidate_depth))
max_number_branches = len(words_with_candidate)

# If we exceed the minimum number of
# branches required, add candidate
# as a new node on the trie.
if max_number_branches >= min_branches_req:

parent = self.get_node_from_prefix(prefix)
# If we are looking at the root node,
if prefix=='':
# parent will be None.
# In this case don't worry about
# creating new child or introducing
# parent and child, b/c the "new child"
# is the root (already exists).
pass

else:
# Otherwise, create the new child,
# and introduce the parent & child.
new_child = Node(candidate)
new_child.parent = parent
parent.children.append(new_child)

# Base case
if candidate_depth==4:
new_child.count = max_number_branches
return

# Recursive case
for new_candidate in ALPHABET:
new_prefix = prefix + candidate
self._assemble(new_prefix,new_candidate,words_with_candidate)

# otherwise, we don't have enough
# branches to continue downward,
# so stop here and do nothing.
return


def bubble_up(self):
"""Do a depth-first traversal of the
entire trytrietree, pruning as we go.
This is a pre-order traversal,
meaning we traverse children first,
then the parents, so we always
know the counts of children
(or we are on a leaf node).
"""
self._bubble_up(self.root)


def _bubble_up(self,node):
"""Pre-order depth-first traversal
starting at the leaf nodes and proceeding
upwards.
"""
if len(node.children)==0:
# Base case
# Leaf nodes already have counts
# Do nothing
return

else:
# Recursive case
# Pre-order traversal: visit/bubble up children first
for child in node.children:
self._bubble_up(child)

# Now that we've completed leaf node counts, we can do interior node counts.
# Interior node counts are equal to number of large (>=2) children.
large_children = [child for child in node.children if child.count >= 2]
node.count = len(large_children)


def trie_search(n, verbose=False):

words = get_words()
words = words[:n]

perfect_count = 0
imperfect_count = 0
for letter in ALPHABET:

tree = TryTrieTree(words)
tree.set_root(letter)
tree.assemble()
tree.bubble_up()
#print(tree)

if tree.root.count >= 2:

if verbose:
print("The letter {0:s} has a perfect binary trie in WORDS({1:d}).".format(
letter, n))
perfect_count += 1

else:

if verbose:
print("The letter {0:s} has no perfect binary trie in WORDS({1:d}).".format(
letter, n))
imperfect_count += 1

if verbose:
print("")
print("Perfect count: {:d}".format(perfect_count))
print("Imperfect count: {:d}".format(imperfect_count))

return perfect_count, imperfect_count


def trie_table():
"""Compute and print a table of
number of words n versus number of
perfect tries formed.
"""
print("%8s\t%8s"%("n","perfect tries"))

ns = range(1000,5757,500)
for n in ns:
p,i = trie_search(n)
print("%8d\t%8d"%(n,p))

n = 5757
p,i = trie_search(n)
print("%8d\t%8d"%(n,p))


if __name__=="__main__":
if len(sys.argv)<2:
n = 5757
else:
n = int(sys.argv[1])
if n > 5757:
n = 5757

_,_ = trie_search(n, verbose=True)

#trie_table()


Loading…
Cancel
Save