45 lines
1.4 KiB
45 lines
1.4 KiB
import mistune |
|
import re |
|
# from nltk.tag import pos_tag |
|
|
|
# http://mistune.readthedocs.org/en/latest/ |
|
|
|
class ParsingRenderer(mistune.Renderer): |
|
def __init__(self, **kwargs): |
|
super(ParsingRenderer, self).__init__(**kwargs) |
|
self.blocks = [] |
|
self.headlines = u'' |
|
self.doubleemphasiswords = u'' |
|
self.emphasiswords = u'' |
|
|
|
def block_code(self, code, lang): |
|
self.blocks.append(code) |
|
return super(ParsingRenderer, self).block_code(code, lang) |
|
|
|
def header(self, text, level, raw=None): |
|
self.headlines += "%s " % raw.lower() |
|
return super(ParsingRenderer, self).header(text, level, raw) |
|
|
|
def double_emphasis(self, text): |
|
self.doubleemphasiswords += "%s " % text.lower() |
|
return super(ParsingRenderer, self).double_emphasis(text) |
|
|
|
def emphasis(self, text): |
|
self.emphasiswords += "%s " % text.lower() |
|
return super(ParsingRenderer, self).emphasis(text) |
|
|
|
|
|
class MarkdownParser: |
|
def __init__(self): |
|
self.blocks = [] |
|
self.headlines = u'' |
|
self.doubleemphasiswords = u'' |
|
self.emphasiswords = u'' |
|
|
|
def parse(self, markdown_text, config): |
|
renderer = ParsingRenderer() |
|
markdown = mistune.Markdown(renderer=renderer) |
|
markdown(markdown_text) |
|
self.blocks = renderer.blocks |
|
self.headlines = renderer.headlines if renderer.headlines.strip() else u'' |
|
|
|
|