A pandoc tool to translate yer Markdown docs https://pages.charlesreid1.com/translate-yer-docs

translate 2.5KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. #!/usr/bin/env python
  2. import os, glob, subprocess
  3. """
  4. Translate Pipeline Using Pandoc
  5. Use glob to find all markdown files in `rmm/docs/`
  6. For each one, run the pipeline using subprocess:
  7. - run pipeline on each document in `rmm/docs/`
  8. - pandoc markdown-to-json
  9. - panflute filter to translate json and extract links
  10. - pandoc json-to-markdown
  11. - output documents in translated_docs/
  12. TODO:
  13. - call the pandoc API directly to avoid subprocess
  14. """
  15. SOURCE_DOCS = 'rmm/docs'
  16. TARGET_DOCS = 'ruskie_docs'
  17. TRANSLATE_FILTER = 'filters/translate.py'
  18. if not os.path.isdir(os.path.join(os.getcwd(),SOURCE_DOCS)):
  19. err = "ERROR: No rmm/docs/ folder was found.\n"
  20. err += "Looked in %s\n"%(os.path.join(os.getcwd(),SOURCE_DOCS))
  21. err += "Try cloning with --recursive,\n"
  22. err += "or running git submodule update --init\n"
  23. raise Exception(err)
  24. cwd = os.getcwd()
  25. print("[+] Welcome to translate the docs!")
  26. print("[+] Making directory for translated documentation: %s"%(TARGET_DOCS))
  27. subprocess.call(['mkdir','-p',TARGET_DOCS], cwd=cwd)
  28. markdown_files = []
  29. for fdir,fdirnames,fnames in os.walk(os.path.join(os.getcwd(),SOURCE_DOCS)):
  30. for f in fnames:
  31. if f[-3:]=='.md':
  32. markdown_files.append( os.path.join( fdir, f ) )
  33. for en_md in markdown_files:
  34. basename = os.path.split(en_md)[-1]
  35. target = os.path.join(TARGET_DOCS,basename)
  36. pname = os.path.join(SOURCE_DOCS,basename)
  37. print(" [+] Now making documentation for %s to %s"%(pname,target))
  38. # Command 1:
  39. # cat <md>
  40. cat_cmd = ['cat', en_md]
  41. cat_proc = subprocess.Popen(cat_cmd, stdout=subprocess.PIPE)
  42. # pandoc: english markdown to json
  43. # -f from_format
  44. # -t to_format
  45. pandoc_from_en_cmd = ['pandoc','-f','gfm','-t','json','-s']
  46. pandoc_from_en_proc = subprocess.Popen(pandoc_from_en_cmd, stdin=cat_proc.stdout, stdout=subprocess.PIPE)
  47. # pandoc filter to translate with google cloud
  48. pandoc_filter_cmd = [TRANSLATE_FILTER]
  49. pandoc_filter_proc = subprocess.Popen(pandoc_filter_cmd, stdin=pandoc_from_en_proc.stdout, stdout=subprocess.PIPE)
  50. # pandoc: json to translated markdown
  51. pandoc_to_ru_cmd = ['pandoc','-f','json','-t','gfm']
  52. pandoc_to_ru_proc = subprocess.Popen(pandoc_to_ru_cmd, stdin=pandoc_filter_proc.stdout, stdout=subprocess.PIPE)
  53. with open(target,'wb') as f:
  54. f.write(pandoc_to_ru_proc.stdout.read())
  55. print(" [+] Finished with file %s, translated to %s"%(pname,target))
  56. print("[+] All done!")