Compare commits
10 Commits
Author | SHA1 | Date | |
---|---|---|---|
88d3ba5f22 | |||
82d18bc6e9 | |||
6661b42269 | |||
f7ee48357b | |||
e6ababb454 | |||
f6484e86f5 | |||
c190e7cee0 | |||
8581a42dd7 | |||
a1f443bfd6 | |||
d8ee2517ba |
12
Readme.md
12
Readme.md
@@ -4,6 +4,7 @@ use whoosh to search github issues.
|
||||
|
||||
Implemented in **Python** using **Flask**, **Whoosh** and **Mistune**.
|
||||
|
||||
<img src="img/screenshot.png" width="500px" />
|
||||
|
||||
## notes
|
||||
|
||||
@@ -35,8 +36,15 @@ summary of how to change the schema:
|
||||
- Search class defines Schema object, main definition
|
||||
- `add_issue()` (equally important) which defines how to extract the
|
||||
fields defined in the schema from the document
|
||||
-
|
||||
|
||||
- `create_search_result()` (also important) which packages up the
|
||||
search results for the template to deal with
|
||||
- `search.html`: the search.html template uses a different variable
|
||||
namespace than the Python file `issues_search.py` or the flask app
|
||||
- The `create_search-result()` method of `issues_search.py`
|
||||
defines how search results are parsed and packaged for the
|
||||
`search.html` template
|
||||
- Jinja variables used in `search.html` should be defined in
|
||||
`create_search_result()` method of `issues_search.py`
|
||||
|
||||
|
||||
|
||||
|
31
Todo.md
Normal file
31
Todo.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# TODO
|
||||
|
||||
recap of round 1:
|
||||
- issues search is working well
|
||||
- indexing comments and issues
|
||||
- able to easily add new fields to schema
|
||||
- able to easily modify search + results template
|
||||
- mapping out where everything is
|
||||
|
||||
## Round 2 (done)
|
||||
|
||||
improvements:
|
||||
- storing comments and issues as separate objects?
|
||||
- storing a boolean? that simple? customize the output of the search result
|
||||
based on a boolean?
|
||||
- if so, how do we pass off a search result to a template conditionally,
|
||||
such that we can save some space (jinja question)
|
||||
|
||||
fix stuff that isn't mine:
|
||||
- improve the readme
|
||||
- fix the config.py config file options
|
||||
|
||||
config:
|
||||
- enable user to specify list of organizations+repos
|
||||
- not just one org/list of repos
|
||||
|
||||
## Round 3
|
||||
|
||||
organization:
|
||||
- mapping out how to change the schema... now, how do we streamline it?
|
||||
- how to organize files
|
BIN
img/screenshot.png
Normal file
BIN
img/screenshot.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 614 KiB |
@@ -23,10 +23,8 @@ routes:
|
||||
"""
|
||||
|
||||
def get_items():
|
||||
repo_list = ['2018-may-workshop',
|
||||
'2018-june-workshop',
|
||||
'2018-july-workshop']
|
||||
repo_org = 'dcppc'
|
||||
repo_list = app.config["REPOS"]
|
||||
repo_org =app.config["REPO_ORG"]
|
||||
|
||||
gh_access_token = os.environ['GITHUB_TOKEN']
|
||||
|
||||
@@ -65,12 +63,10 @@ def search():
|
||||
if fields == 'None':
|
||||
fields = None
|
||||
|
||||
directories = []
|
||||
search = Search(app.config["INDEX_DIR"])
|
||||
if not query:
|
||||
parsed_query = ""
|
||||
result = []
|
||||
directories=get_directories()
|
||||
|
||||
else:
|
||||
parsed_query, result = search.search(query.split(), fields=[fields])
|
||||
@@ -78,7 +74,7 @@ def search():
|
||||
|
||||
total = search.get_document_total_count()
|
||||
|
||||
return render_template('search.html', entries=result, query=query, parsed_query=parsed_query, fields=fields, last_searches=get_last_searches(), directories=directories, total=total)
|
||||
return render_template('search.html', entries=result, query=query, parsed_query=parsed_query, fields=fields, last_searches=get_last_searches(), total=total)
|
||||
|
||||
@app.route('/open')
|
||||
def open_file():
|
||||
@@ -98,7 +94,6 @@ def update_index():
|
||||
else:
|
||||
UpdateIndexTask()
|
||||
flash("Updating index, check console output")
|
||||
store_directories()
|
||||
return render_template("search.html", query="", fields="", last_searches=get_last_searches())
|
||||
|
||||
|
||||
@@ -111,18 +106,7 @@ def get_last_searches():
|
||||
return contents
|
||||
|
||||
|
||||
def get_directories():
|
||||
'''remove for issues'''
|
||||
if os.path.exists(directories_file):
|
||||
with codecs.open(directories_file, 'r', encoding='utf-8') as f:
|
||||
directories = f.readlines()
|
||||
f.close()
|
||||
else:
|
||||
directories = []
|
||||
return directories
|
||||
|
||||
def store_search(query, fields):
|
||||
'''remove for issues'''
|
||||
if os.path.exists(last_searches_file):
|
||||
with codecs.open(last_searches_file, 'r', encoding='utf-8') as f:
|
||||
contents = f.readlines()
|
||||
@@ -136,17 +120,6 @@ def store_search(query, fields):
|
||||
with codecs.open(last_searches_file, 'w', encoding='utf-8') as f:
|
||||
f.writelines(contents[:30])
|
||||
|
||||
def store_directories():
|
||||
'''remove for issues'''
|
||||
directories = []
|
||||
for root, dirnames, files in os.walk(app.config["MARKDOWN_FILES_DIR"]):
|
||||
if dirnames:
|
||||
for d in dirnames:
|
||||
if os.path.isdir(os.path.join(root, d)):
|
||||
directories.append("%s\n" % d.lower())
|
||||
directories = sorted(set(directories))
|
||||
with codecs.open(app.config["INDEX_DIR"] + "/directories.txt", 'w', encoding='utf-8') as f:
|
||||
f.writelines(directories)
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run()
|
||||
|
@@ -17,6 +17,16 @@ from whoosh.analysis import StemmingAnalyzer
|
||||
"""
|
||||
issues-search.py Flow:
|
||||
|
||||
|
||||
very high level description:
|
||||
- zeroth step: create a search index
|
||||
- first step: load a search index
|
||||
- second step: call the search() method
|
||||
- third step: update the search index
|
||||
|
||||
|
||||
|
||||
|
||||
program will:
|
||||
- create a Search object
|
||||
- call add_all_issues
|
||||
@@ -88,6 +98,7 @@ class Search:
|
||||
|
||||
schema = Schema(
|
||||
url=ID(stored=True, unique=True),
|
||||
is_comment=BOOLEAN(stored=True),
|
||||
timestamp=STORED,
|
||||
repo_name=TEXT(stored=True),
|
||||
repo_url=ID(stored=True),
|
||||
@@ -116,6 +127,7 @@ class Search:
|
||||
|
||||
Schema:
|
||||
- url
|
||||
- is_comment
|
||||
- timestamp
|
||||
- repo_name
|
||||
- repo_url
|
||||
@@ -137,6 +149,7 @@ class Search:
|
||||
print("Indexing issue %s"%(issue.html_url))
|
||||
writer.add_document(
|
||||
url = issue.html_url,
|
||||
is_comment = False,
|
||||
timestamp = issue.created_at,
|
||||
repo_name = repo_name,
|
||||
repo_url = repo_url,
|
||||
@@ -155,6 +168,7 @@ class Search:
|
||||
print(" > Indexing comment %s"%(comment.html_url))
|
||||
writer.add_document(
|
||||
url = comment.html_url,
|
||||
is_comment = True,
|
||||
timestamp = comment.created_at,
|
||||
repo_name = repo_name,
|
||||
repo_url = repo_url,
|
||||
@@ -168,6 +182,7 @@ class Search:
|
||||
return count
|
||||
|
||||
|
||||
'''
|
||||
def add_all_issues(self, gh_access_token, list_of_repos, which_org, config, create_new_index=False):
|
||||
"""
|
||||
Add all issues in a given github repo to the search index.
|
||||
@@ -214,6 +229,8 @@ class Search:
|
||||
|
||||
writer.commit()
|
||||
print("Done, added %d documents to the index" % c)
|
||||
'''
|
||||
|
||||
|
||||
|
||||
def update_index_incremental(self, gh_access_token, list_of_repos, which_org, config, create_new_index=False):
|
||||
@@ -245,6 +262,12 @@ class Search:
|
||||
writer = self.ix.writer()
|
||||
|
||||
|
||||
|
||||
|
||||
# fix this. the delete all in index
|
||||
# is not occurring in right place.
|
||||
|
||||
|
||||
# Iterate over each repo
|
||||
for this_repo in list_of_repos:
|
||||
|
||||
@@ -307,6 +330,8 @@ class Search:
|
||||
sr.issue_title = r['issue_title']
|
||||
sr.issue_url = r['issue_url']
|
||||
|
||||
sr.is_comment = r['is_comment']
|
||||
|
||||
sr.content = r['content']
|
||||
highlights = r.highlights('content')
|
||||
if not highlights:
|
||||
@@ -360,5 +385,5 @@ if __name__ == "__main__":
|
||||
search.add_all_issues(gh_access_token,
|
||||
repo_list,
|
||||
repo_org,
|
||||
"/Users/charles/codes/markdown-search/config.py")
|
||||
"/Users/charles/codes/issues-search/config.py")
|
||||
|
||||
|
11
requirements.txt
Normal file
11
requirements.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
Flask>=0.12.1
|
||||
apiclient>=1.0.3
|
||||
oauth2client>=3.0.0
|
||||
httplib2>=0.10.3
|
||||
google-api-python-client
|
||||
mistune>=0.8
|
||||
whoosh>=2.7.4
|
||||
PyGithub>=1.39
|
||||
pypandoc>=1.4
|
||||
requests>=2.19
|
||||
pandoc>=1.0
|
@@ -34,9 +34,16 @@
|
||||
<div class="path"><a href='{{ url_for("open_file")}}?path={{e.path|urlencode}}&query={{query}}&fields={{fields}}'>{{e.path}}</a>score: {{'%d' % e.score}}</div>
|
||||
-->
|
||||
<div class="url">
|
||||
<a
|
||||
href='{{e.repo_url}}'>dcppc/{{e.repo_name}}</a>
|
||||
- <a href='{{e.issue_url}}'>{{e.issue_title}}</a> - <a href='{{e.url}}'>link</a><br />
|
||||
{% if e.is_comment %}
|
||||
<b>Comment</b> <a href='{{e.url}}'>(comment link)</a>
|
||||
on issue <a href='{{e.issue_url}}'>{{e.issue_title}}</a>
|
||||
in repo <a href='{{e.repo_url}}'>dcppc/{{e.repo_name}}</a>
|
||||
<br />
|
||||
{% else %}
|
||||
<b>Issue</b> <a href='{{e.issue_url}}'>{{e.issue_title}}</a>
|
||||
in repo <a href='{{e.repo_url}}'>dcppc/{{e.repo_name}}</a>
|
||||
<br />
|
||||
{% endif %}
|
||||
score: {{'%d' % e.score}}
|
||||
</div>
|
||||
<div class="markdown-body">{{ e.content_highlight|safe}}</div>
|
||||
|
Reference in New Issue
Block a user