Experiments with Knuth's 5,757 five letter words. https://charlesreid1.com/wiki/Five_Letter_Words

diff_by_n.py 2.5KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. """
  2. diff_by_n.py
  3. Donald Knuth, Art of Computer Programming, Volume 4 Facsimile 0
  4. Variation on Exercise #28
  5. Find pairs of SGB word vectors that differ by +/-n.
  6. """
  7. from get_words import get_words
  8. import timeit
  9. def gen_variations(word,fragment,distance,depth,variations):
  10. """
  11. Recursive backtracking method to assemble strings
  12. differing by +/-distance at each position
  13. """
  14. if depth==5:
  15. variations.add(fragment)
  16. else:
  17. for d in range(1,distance+1):
  18. fragment_pd = fragment + chr(ord(word[depth])+d)
  19. fragment_md = fragment + chr(ord(word[depth])-d)
  20. for new_fragment in [fragment_pd,fragment_md]:
  21. gen_variations(word,new_fragment,distance,depth+1,variations)
  22. def get_all_variations(word,d):
  23. """
  24. Return all possible words that differ
  25. from `word` by +/-d in each index.
  26. This does not include `word` in the
  27. variations.
  28. """
  29. word_variations = set()
  30. gen_variations(word,'',d,0,word_variations)
  31. word_variations = list(word_variations)
  32. return word_variations
  33. def main():
  34. """
  35. Find pairs of SGB word vectors that differ by
  36. +/-d in each component.
  37. To do this, iterate through each word,
  38. generate the possible candidate matchings,
  39. and if they exist, add the pair to a set.
  40. """
  41. words = get_words()
  42. #words = words[:1000]
  43. words = set(get_words())
  44. for d in [1,2,3]:
  45. tic = timeit.default_timer()
  46. # List of string tuples
  47. off_by_n = set()
  48. # Iterate over every word
  49. for iw,word in enumerate(words):
  50. # Generate all possible candidate matches
  51. # distance +/-d from this word at each
  52. # position
  53. all_vars = get_all_variations(word,d)
  54. for word_var in all_vars:
  55. if word_var in words:
  56. # Found a new (unordered) pair
  57. if word<word_var:
  58. left=word
  59. right=word_var
  60. else:
  61. left=word_var
  62. right=word
  63. off_by_n.add((left,right))
  64. off_by_n = list(off_by_n)
  65. off_by_n.sort()
  66. toc = timeit.default_timer()
  67. for o in off_by_n[:10]:
  68. print("{:s} {:s}".format(o[0],o[1]))
  69. print("Found {0:d} pairs of words that differ by +/-{1:d} in each component.".format(len(off_by_n),d))
  70. print("Time: %0.4f"%(toc-tic))
  71. if __name__=="__main__":
  72. main()