search markdown with whoosh
 
 
 

45 lines
1.4 KiB

import mistune
import re
# from nltk.tag import pos_tag
# http://mistune.readthedocs.org/en/latest/
class ParsingRenderer(mistune.Renderer):
def __init__(self, **kwargs):
super(ParsingRenderer, self).__init__(**kwargs)
self.blocks = []
self.headlines = u''
self.doubleemphasiswords = u''
self.emphasiswords = u''
def block_code(self, code, lang):
self.blocks.append(code)
return super(ParsingRenderer, self).block_code(code, lang)
def header(self, text, level, raw=None):
self.headlines += "%s " % raw.lower()
return super(ParsingRenderer, self).header(text, level, raw)
def double_emphasis(self, text):
self.doubleemphasiswords += "%s " % text.lower()
return super(ParsingRenderer, self).double_emphasis(text)
def emphasis(self, text):
self.emphasiswords += "%s " % text.lower()
return super(ParsingRenderer, self).emphasis(text)
class MarkdownParser:
def __init__(self):
self.blocks = []
self.headlines = u''
self.doubleemphasiswords = u''
self.emphasiswords = u''
def parse(self, markdown_text, config):
renderer = ParsingRenderer()
markdown = mistune.Markdown(renderer=renderer)
markdown(markdown_text)
self.blocks = renderer.blocks
self.headlines = renderer.headlines if renderer.headlines.strip() else u''