|
|
|
#!/usr/bin/env python
|
|
|
|
import os, glob, subprocess
|
|
|
|
|
|
|
|
"""
|
|
|
|
Translate Pipeline Using Pandoc
|
|
|
|
|
|
|
|
|
|
|
|
Use glob to find all markdown files in `rmm/docs/`
|
|
|
|
|
|
|
|
For each one, run the pipeline using subprocess:
|
|
|
|
- run pipeline on each document in `rmm/docs/`
|
|
|
|
- pandoc markdown-to-json
|
|
|
|
- panflute filter to translate json and extract links
|
|
|
|
- pandoc json-to-markdown
|
|
|
|
- output documents in translated_docs/
|
|
|
|
|
|
|
|
TODO:
|
|
|
|
- call the pandoc API directly to avoid subprocess
|
|
|
|
"""
|
|
|
|
|
|
|
|
SOURCE_DOCS = 'rmm/docs'
|
|
|
|
TARGET_DOCS = 'ruskie_docs'
|
|
|
|
TRANSLATE_FILTER = 'filters/translate.py'
|
|
|
|
|
|
|
|
if not os.path.isdir(os.path.join(os.getcwd(),SOURCE_DOCS)):
|
|
|
|
err = "ERROR: No rmm/docs/ folder was found.\n"
|
|
|
|
err += "Looked in %s\n"%(os.path.join(os.getcwd(),SOURCE_DOCS))
|
|
|
|
err += "Try cloning with --recursive,\n"
|
|
|
|
err += "or running git submodule update --init\n"
|
|
|
|
raise Exception(err)
|
|
|
|
|
|
|
|
cwd = os.getcwd()
|
|
|
|
|
|
|
|
print("[+] Welcome to translate the docs!")
|
|
|
|
|
|
|
|
print("[+] Making directory for translated documentation: %s"%(TARGET_DOCS))
|
|
|
|
subprocess.call(['mkdir','-p',TARGET_DOCS], cwd=cwd)
|
|
|
|
|
|
|
|
markdown_files = []
|
|
|
|
for fdir,fdirnames,fnames in os.walk(os.path.join(os.getcwd(),SOURCE_DOCS)):
|
|
|
|
for f in fnames:
|
|
|
|
if f[-3:]=='.md':
|
|
|
|
markdown_files.append( os.path.join( fdir, f ) )
|
|
|
|
|
|
|
|
for en_md in markdown_files:
|
|
|
|
|
|
|
|
basename = os.path.split(en_md)[-1]
|
|
|
|
target = os.path.join(TARGET_DOCS,basename)
|
|
|
|
|
|
|
|
pname = os.path.join(SOURCE_DOCS,basename)
|
|
|
|
|
|
|
|
print(" [+] Now making documentation for %s to %s"%(pname,target))
|
|
|
|
|
|
|
|
# Command 1:
|
|
|
|
# cat <md>
|
|
|
|
cat_cmd = ['cat', en_md]
|
|
|
|
cat_proc = subprocess.Popen(cat_cmd, stdout=subprocess.PIPE)
|
|
|
|
|
|
|
|
# pandoc: english markdown to json
|
|
|
|
# -f from_format
|
|
|
|
# -t to_format
|
|
|
|
pandoc_from_en_cmd = ['pandoc','-f','gfm','-t','json','-s']
|
|
|
|
pandoc_from_en_proc = subprocess.Popen(pandoc_from_en_cmd, stdin=cat_proc.stdout, stdout=subprocess.PIPE)
|
|
|
|
|
|
|
|
# pandoc filter to translate with google cloud
|
|
|
|
pandoc_filter_cmd = [TRANSLATE_FILTER]
|
|
|
|
pandoc_filter_proc = subprocess.Popen(pandoc_filter_cmd, stdin=pandoc_from_en_proc.stdout, stdout=subprocess.PIPE)
|
|
|
|
|
|
|
|
# pandoc: json to translated markdown
|
|
|
|
pandoc_to_ru_cmd = ['pandoc','-f','json','-t','gfm']
|
|
|
|
pandoc_to_ru_proc = subprocess.Popen(pandoc_to_ru_cmd, stdin=pandoc_filter_proc.stdout, stdout=subprocess.PIPE)
|
|
|
|
|
|
|
|
with open(target,'wb') as f:
|
|
|
|
f.write(pandoc_to_ru_proc.stdout.read())
|
|
|
|
|
|
|
|
print(" [+] Finished with file %s, translated to %s"%(pname,target))
|
|
|
|
|
|
|
|
print("[+] All done!")
|
|
|
|
|