A pandoc tool to translate yer Markdown docs https://pages.charlesreid1.com/translate-yer-docs
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

80 lines
2.5 KiB

#!/usr/bin/env python
import os, glob, subprocess
"""
Translate Pipeline Using Pandoc
Use glob to find all markdown files in `rmm/docs/`
For each one, run the pipeline using subprocess:
- run pipeline on each document in `rmm/docs/`
- pandoc markdown-to-json
- panflute filter to translate json and extract links
- pandoc json-to-markdown
- output documents in translated_docs/
TODO:
- call the pandoc API directly to avoid subprocess
"""
SOURCE_DOCS = 'rmm/docs'
TARGET_DOCS = 'ruskie_docs'
TRANSLATE_FILTER = 'filters/translate.py'
if not os.path.isdir(os.path.join(os.getcwd(),SOURCE_DOCS)):
err = "ERROR: No rmm/docs/ folder was found.\n"
err += "Looked in %s\n"%(os.path.join(os.getcwd(),SOURCE_DOCS))
err += "Try cloning with --recursive,\n"
err += "or running git submodule update --init\n"
raise Exception(err)
cwd = os.getcwd()
print("[+] Welcome to translate the docs!")
print("[+] Making directory for translated documentation: %s"%(TARGET_DOCS))
subprocess.call(['mkdir','-p',TARGET_DOCS], cwd=cwd)
markdown_files = []
for fdir,fdirnames,fnames in os.walk(os.path.join(os.getcwd(),SOURCE_DOCS)):
for f in fnames:
if f[-3:]=='.md':
markdown_files.append( os.path.join( fdir, f ) )
for en_md in markdown_files:
basename = os.path.split(en_md)[-1]
target = os.path.join(TARGET_DOCS,basename)
pname = os.path.join(SOURCE_DOCS,basename)
print(" [+] Now making documentation for %s to %s"%(pname,target))
# Command 1:
# cat <md>
cat_cmd = ['cat', en_md]
cat_proc = subprocess.Popen(cat_cmd, stdout=subprocess.PIPE)
# pandoc: english markdown to json
# -f from_format
# -t to_format
pandoc_from_en_cmd = ['pandoc','-f','gfm','-t','json','-s']
pandoc_from_en_proc = subprocess.Popen(pandoc_from_en_cmd, stdin=cat_proc.stdout, stdout=subprocess.PIPE)
# pandoc filter to translate with google cloud
pandoc_filter_cmd = [TRANSLATE_FILTER]
pandoc_filter_proc = subprocess.Popen(pandoc_filter_cmd, stdin=pandoc_from_en_proc.stdout, stdout=subprocess.PIPE)
# pandoc: json to translated markdown
pandoc_to_ru_cmd = ['pandoc','-f','json','-t','gfm']
pandoc_to_ru_proc = subprocess.Popen(pandoc_to_ru_cmd, stdin=pandoc_filter_proc.stdout, stdout=subprocess.PIPE)
with open(target,'wb') as f:
f.write(pandoc_to_ru_proc.stdout.read())
print(" [+] Finished with file %s, translated to %s"%(pname,target))
print("[+] All done!")