A pandoc tool to translate yer Markdown docs
https://pages.charlesreid1.com/translate-yer-docs
79 lines
2.5 KiB
79 lines
2.5 KiB
#!/usr/bin/env python |
|
import os, glob, subprocess |
|
|
|
""" |
|
Translate Pipeline Using Pandoc |
|
|
|
|
|
Use glob to find all markdown files in `rmm/docs/` |
|
|
|
For each one, run the pipeline using subprocess: |
|
- run pipeline on each document in `rmm/docs/` |
|
- pandoc markdown-to-json |
|
- panflute filter to translate json and extract links |
|
- pandoc json-to-markdown |
|
- output documents in translated_docs/ |
|
|
|
TODO: |
|
- call the pandoc API directly to avoid subprocess |
|
""" |
|
|
|
SOURCE_DOCS = 'rmm/docs' |
|
TARGET_DOCS = 'ruskie_docs' |
|
TRANSLATE_FILTER = 'filters/translate.py' |
|
|
|
if not os.path.isdir(os.path.join(os.getcwd(),SOURCE_DOCS)): |
|
err = "ERROR: No rmm/docs/ folder was found.\n" |
|
err += "Looked in %s\n"%(os.path.join(os.getcwd(),SOURCE_DOCS)) |
|
err += "Try cloning with --recursive,\n" |
|
err += "or running git submodule update --init\n" |
|
raise Exception(err) |
|
|
|
cwd = os.getcwd() |
|
|
|
print("[+] Welcome to translate the docs!") |
|
|
|
print("[+] Making directory for translated documentation: %s"%(TARGET_DOCS)) |
|
subprocess.call(['mkdir','-p',TARGET_DOCS], cwd=cwd) |
|
|
|
markdown_files = [] |
|
for fdir,fdirnames,fnames in os.walk(os.path.join(os.getcwd(),SOURCE_DOCS)): |
|
for f in fnames: |
|
if f[-3:]=='.md': |
|
markdown_files.append( os.path.join( fdir, f ) ) |
|
|
|
for en_md in markdown_files: |
|
|
|
basename = os.path.split(en_md)[-1] |
|
target = os.path.join(TARGET_DOCS,basename) |
|
|
|
pname = os.path.join(SOURCE_DOCS,basename) |
|
|
|
print(" [+] Now making documentation for %s to %s"%(pname,target)) |
|
|
|
# Command 1: |
|
# cat <md> |
|
cat_cmd = ['cat', en_md] |
|
cat_proc = subprocess.Popen(cat_cmd, stdout=subprocess.PIPE) |
|
|
|
# pandoc: english markdown to json |
|
# -f from_format |
|
# -t to_format |
|
pandoc_from_en_cmd = ['pandoc','-f','gfm','-t','json','-s'] |
|
pandoc_from_en_proc = subprocess.Popen(pandoc_from_en_cmd, stdin=cat_proc.stdout, stdout=subprocess.PIPE) |
|
|
|
# pandoc filter to translate with google cloud |
|
pandoc_filter_cmd = [TRANSLATE_FILTER] |
|
pandoc_filter_proc = subprocess.Popen(pandoc_filter_cmd, stdin=pandoc_from_en_proc.stdout, stdout=subprocess.PIPE) |
|
|
|
# pandoc: json to translated markdown |
|
pandoc_to_ru_cmd = ['pandoc','-f','json','-t','gfm'] |
|
pandoc_to_ru_proc = subprocess.Popen(pandoc_to_ru_cmd, stdin=pandoc_filter_proc.stdout, stdout=subprocess.PIPE) |
|
|
|
with open(target,'wb') as f: |
|
f.write(pandoc_to_ru_proc.stdout.read()) |
|
|
|
print(" [+] Finished with file %s, translated to %s"%(pname,target)) |
|
|
|
print("[+] All done!") |
|
|
|
|