Compare commits
10 Commits
Author | SHA1 | Date | |
---|---|---|---|
88d3ba5f22 | |||
82d18bc6e9 | |||
6661b42269 | |||
f7ee48357b | |||
e6ababb454 | |||
f6484e86f5 | |||
c190e7cee0 | |||
8581a42dd7 | |||
a1f443bfd6 | |||
d8ee2517ba |
12
Readme.md
12
Readme.md
@@ -4,6 +4,7 @@ use whoosh to search github issues.
|
|||||||
|
|
||||||
Implemented in **Python** using **Flask**, **Whoosh** and **Mistune**.
|
Implemented in **Python** using **Flask**, **Whoosh** and **Mistune**.
|
||||||
|
|
||||||
|
<img src="img/screenshot.png" width="500px" />
|
||||||
|
|
||||||
## notes
|
## notes
|
||||||
|
|
||||||
@@ -35,8 +36,15 @@ summary of how to change the schema:
|
|||||||
- Search class defines Schema object, main definition
|
- Search class defines Schema object, main definition
|
||||||
- `add_issue()` (equally important) which defines how to extract the
|
- `add_issue()` (equally important) which defines how to extract the
|
||||||
fields defined in the schema from the document
|
fields defined in the schema from the document
|
||||||
-
|
- `create_search_result()` (also important) which packages up the
|
||||||
|
search results for the template to deal with
|
||||||
|
- `search.html`: the search.html template uses a different variable
|
||||||
|
namespace than the Python file `issues_search.py` or the flask app
|
||||||
|
- The `create_search-result()` method of `issues_search.py`
|
||||||
|
defines how search results are parsed and packaged for the
|
||||||
|
`search.html` template
|
||||||
|
- Jinja variables used in `search.html` should be defined in
|
||||||
|
`create_search_result()` method of `issues_search.py`
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
31
Todo.md
Normal file
31
Todo.md
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# TODO
|
||||||
|
|
||||||
|
recap of round 1:
|
||||||
|
- issues search is working well
|
||||||
|
- indexing comments and issues
|
||||||
|
- able to easily add new fields to schema
|
||||||
|
- able to easily modify search + results template
|
||||||
|
- mapping out where everything is
|
||||||
|
|
||||||
|
## Round 2 (done)
|
||||||
|
|
||||||
|
improvements:
|
||||||
|
- storing comments and issues as separate objects?
|
||||||
|
- storing a boolean? that simple? customize the output of the search result
|
||||||
|
based on a boolean?
|
||||||
|
- if so, how do we pass off a search result to a template conditionally,
|
||||||
|
such that we can save some space (jinja question)
|
||||||
|
|
||||||
|
fix stuff that isn't mine:
|
||||||
|
- improve the readme
|
||||||
|
- fix the config.py config file options
|
||||||
|
|
||||||
|
config:
|
||||||
|
- enable user to specify list of organizations+repos
|
||||||
|
- not just one org/list of repos
|
||||||
|
|
||||||
|
## Round 3
|
||||||
|
|
||||||
|
organization:
|
||||||
|
- mapping out how to change the schema... now, how do we streamline it?
|
||||||
|
- how to organize files
|
BIN
img/screenshot.png
Normal file
BIN
img/screenshot.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 614 KiB |
@@ -23,10 +23,8 @@ routes:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def get_items():
|
def get_items():
|
||||||
repo_list = ['2018-may-workshop',
|
repo_list = app.config["REPOS"]
|
||||||
'2018-june-workshop',
|
repo_org =app.config["REPO_ORG"]
|
||||||
'2018-july-workshop']
|
|
||||||
repo_org = 'dcppc'
|
|
||||||
|
|
||||||
gh_access_token = os.environ['GITHUB_TOKEN']
|
gh_access_token = os.environ['GITHUB_TOKEN']
|
||||||
|
|
||||||
@@ -65,12 +63,10 @@ def search():
|
|||||||
if fields == 'None':
|
if fields == 'None':
|
||||||
fields = None
|
fields = None
|
||||||
|
|
||||||
directories = []
|
|
||||||
search = Search(app.config["INDEX_DIR"])
|
search = Search(app.config["INDEX_DIR"])
|
||||||
if not query:
|
if not query:
|
||||||
parsed_query = ""
|
parsed_query = ""
|
||||||
result = []
|
result = []
|
||||||
directories=get_directories()
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
parsed_query, result = search.search(query.split(), fields=[fields])
|
parsed_query, result = search.search(query.split(), fields=[fields])
|
||||||
@@ -78,7 +74,7 @@ def search():
|
|||||||
|
|
||||||
total = search.get_document_total_count()
|
total = search.get_document_total_count()
|
||||||
|
|
||||||
return render_template('search.html', entries=result, query=query, parsed_query=parsed_query, fields=fields, last_searches=get_last_searches(), directories=directories, total=total)
|
return render_template('search.html', entries=result, query=query, parsed_query=parsed_query, fields=fields, last_searches=get_last_searches(), total=total)
|
||||||
|
|
||||||
@app.route('/open')
|
@app.route('/open')
|
||||||
def open_file():
|
def open_file():
|
||||||
@@ -98,7 +94,6 @@ def update_index():
|
|||||||
else:
|
else:
|
||||||
UpdateIndexTask()
|
UpdateIndexTask()
|
||||||
flash("Updating index, check console output")
|
flash("Updating index, check console output")
|
||||||
store_directories()
|
|
||||||
return render_template("search.html", query="", fields="", last_searches=get_last_searches())
|
return render_template("search.html", query="", fields="", last_searches=get_last_searches())
|
||||||
|
|
||||||
|
|
||||||
@@ -111,18 +106,7 @@ def get_last_searches():
|
|||||||
return contents
|
return contents
|
||||||
|
|
||||||
|
|
||||||
def get_directories():
|
|
||||||
'''remove for issues'''
|
|
||||||
if os.path.exists(directories_file):
|
|
||||||
with codecs.open(directories_file, 'r', encoding='utf-8') as f:
|
|
||||||
directories = f.readlines()
|
|
||||||
f.close()
|
|
||||||
else:
|
|
||||||
directories = []
|
|
||||||
return directories
|
|
||||||
|
|
||||||
def store_search(query, fields):
|
def store_search(query, fields):
|
||||||
'''remove for issues'''
|
|
||||||
if os.path.exists(last_searches_file):
|
if os.path.exists(last_searches_file):
|
||||||
with codecs.open(last_searches_file, 'r', encoding='utf-8') as f:
|
with codecs.open(last_searches_file, 'r', encoding='utf-8') as f:
|
||||||
contents = f.readlines()
|
contents = f.readlines()
|
||||||
@@ -136,17 +120,6 @@ def store_search(query, fields):
|
|||||||
with codecs.open(last_searches_file, 'w', encoding='utf-8') as f:
|
with codecs.open(last_searches_file, 'w', encoding='utf-8') as f:
|
||||||
f.writelines(contents[:30])
|
f.writelines(contents[:30])
|
||||||
|
|
||||||
def store_directories():
|
|
||||||
'''remove for issues'''
|
|
||||||
directories = []
|
|
||||||
for root, dirnames, files in os.walk(app.config["MARKDOWN_FILES_DIR"]):
|
|
||||||
if dirnames:
|
|
||||||
for d in dirnames:
|
|
||||||
if os.path.isdir(os.path.join(root, d)):
|
|
||||||
directories.append("%s\n" % d.lower())
|
|
||||||
directories = sorted(set(directories))
|
|
||||||
with codecs.open(app.config["INDEX_DIR"] + "/directories.txt", 'w', encoding='utf-8') as f:
|
|
||||||
f.writelines(directories)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app.run()
|
app.run()
|
||||||
|
@@ -17,6 +17,16 @@ from whoosh.analysis import StemmingAnalyzer
|
|||||||
"""
|
"""
|
||||||
issues-search.py Flow:
|
issues-search.py Flow:
|
||||||
|
|
||||||
|
|
||||||
|
very high level description:
|
||||||
|
- zeroth step: create a search index
|
||||||
|
- first step: load a search index
|
||||||
|
- second step: call the search() method
|
||||||
|
- third step: update the search index
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
program will:
|
program will:
|
||||||
- create a Search object
|
- create a Search object
|
||||||
- call add_all_issues
|
- call add_all_issues
|
||||||
@@ -88,6 +98,7 @@ class Search:
|
|||||||
|
|
||||||
schema = Schema(
|
schema = Schema(
|
||||||
url=ID(stored=True, unique=True),
|
url=ID(stored=True, unique=True),
|
||||||
|
is_comment=BOOLEAN(stored=True),
|
||||||
timestamp=STORED,
|
timestamp=STORED,
|
||||||
repo_name=TEXT(stored=True),
|
repo_name=TEXT(stored=True),
|
||||||
repo_url=ID(stored=True),
|
repo_url=ID(stored=True),
|
||||||
@@ -116,6 +127,7 @@ class Search:
|
|||||||
|
|
||||||
Schema:
|
Schema:
|
||||||
- url
|
- url
|
||||||
|
- is_comment
|
||||||
- timestamp
|
- timestamp
|
||||||
- repo_name
|
- repo_name
|
||||||
- repo_url
|
- repo_url
|
||||||
@@ -137,6 +149,7 @@ class Search:
|
|||||||
print("Indexing issue %s"%(issue.html_url))
|
print("Indexing issue %s"%(issue.html_url))
|
||||||
writer.add_document(
|
writer.add_document(
|
||||||
url = issue.html_url,
|
url = issue.html_url,
|
||||||
|
is_comment = False,
|
||||||
timestamp = issue.created_at,
|
timestamp = issue.created_at,
|
||||||
repo_name = repo_name,
|
repo_name = repo_name,
|
||||||
repo_url = repo_url,
|
repo_url = repo_url,
|
||||||
@@ -155,6 +168,7 @@ class Search:
|
|||||||
print(" > Indexing comment %s"%(comment.html_url))
|
print(" > Indexing comment %s"%(comment.html_url))
|
||||||
writer.add_document(
|
writer.add_document(
|
||||||
url = comment.html_url,
|
url = comment.html_url,
|
||||||
|
is_comment = True,
|
||||||
timestamp = comment.created_at,
|
timestamp = comment.created_at,
|
||||||
repo_name = repo_name,
|
repo_name = repo_name,
|
||||||
repo_url = repo_url,
|
repo_url = repo_url,
|
||||||
@@ -168,6 +182,7 @@ class Search:
|
|||||||
return count
|
return count
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
def add_all_issues(self, gh_access_token, list_of_repos, which_org, config, create_new_index=False):
|
def add_all_issues(self, gh_access_token, list_of_repos, which_org, config, create_new_index=False):
|
||||||
"""
|
"""
|
||||||
Add all issues in a given github repo to the search index.
|
Add all issues in a given github repo to the search index.
|
||||||
@@ -214,6 +229,8 @@ class Search:
|
|||||||
|
|
||||||
writer.commit()
|
writer.commit()
|
||||||
print("Done, added %d documents to the index" % c)
|
print("Done, added %d documents to the index" % c)
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def update_index_incremental(self, gh_access_token, list_of_repos, which_org, config, create_new_index=False):
|
def update_index_incremental(self, gh_access_token, list_of_repos, which_org, config, create_new_index=False):
|
||||||
@@ -245,6 +262,12 @@ class Search:
|
|||||||
writer = self.ix.writer()
|
writer = self.ix.writer()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# fix this. the delete all in index
|
||||||
|
# is not occurring in right place.
|
||||||
|
|
||||||
|
|
||||||
# Iterate over each repo
|
# Iterate over each repo
|
||||||
for this_repo in list_of_repos:
|
for this_repo in list_of_repos:
|
||||||
|
|
||||||
@@ -307,6 +330,8 @@ class Search:
|
|||||||
sr.issue_title = r['issue_title']
|
sr.issue_title = r['issue_title']
|
||||||
sr.issue_url = r['issue_url']
|
sr.issue_url = r['issue_url']
|
||||||
|
|
||||||
|
sr.is_comment = r['is_comment']
|
||||||
|
|
||||||
sr.content = r['content']
|
sr.content = r['content']
|
||||||
highlights = r.highlights('content')
|
highlights = r.highlights('content')
|
||||||
if not highlights:
|
if not highlights:
|
||||||
@@ -360,5 +385,5 @@ if __name__ == "__main__":
|
|||||||
search.add_all_issues(gh_access_token,
|
search.add_all_issues(gh_access_token,
|
||||||
repo_list,
|
repo_list,
|
||||||
repo_org,
|
repo_org,
|
||||||
"/Users/charles/codes/markdown-search/config.py")
|
"/Users/charles/codes/issues-search/config.py")
|
||||||
|
|
||||||
|
11
requirements.txt
Normal file
11
requirements.txt
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
Flask>=0.12.1
|
||||||
|
apiclient>=1.0.3
|
||||||
|
oauth2client>=3.0.0
|
||||||
|
httplib2>=0.10.3
|
||||||
|
google-api-python-client
|
||||||
|
mistune>=0.8
|
||||||
|
whoosh>=2.7.4
|
||||||
|
PyGithub>=1.39
|
||||||
|
pypandoc>=1.4
|
||||||
|
requests>=2.19
|
||||||
|
pandoc>=1.0
|
@@ -34,9 +34,16 @@
|
|||||||
<div class="path"><a href='{{ url_for("open_file")}}?path={{e.path|urlencode}}&query={{query}}&fields={{fields}}'>{{e.path}}</a>score: {{'%d' % e.score}}</div>
|
<div class="path"><a href='{{ url_for("open_file")}}?path={{e.path|urlencode}}&query={{query}}&fields={{fields}}'>{{e.path}}</a>score: {{'%d' % e.score}}</div>
|
||||||
-->
|
-->
|
||||||
<div class="url">
|
<div class="url">
|
||||||
<a
|
{% if e.is_comment %}
|
||||||
href='{{e.repo_url}}'>dcppc/{{e.repo_name}}</a>
|
<b>Comment</b> <a href='{{e.url}}'>(comment link)</a>
|
||||||
- <a href='{{e.issue_url}}'>{{e.issue_title}}</a> - <a href='{{e.url}}'>link</a><br />
|
on issue <a href='{{e.issue_url}}'>{{e.issue_title}}</a>
|
||||||
|
in repo <a href='{{e.repo_url}}'>dcppc/{{e.repo_name}}</a>
|
||||||
|
<br />
|
||||||
|
{% else %}
|
||||||
|
<b>Issue</b> <a href='{{e.issue_url}}'>{{e.issue_title}}</a>
|
||||||
|
in repo <a href='{{e.repo_url}}'>dcppc/{{e.repo_name}}</a>
|
||||||
|
<br />
|
||||||
|
{% endif %}
|
||||||
score: {{'%d' % e.score}}
|
score: {{'%d' % e.score}}
|
||||||
</div>
|
</div>
|
||||||
<div class="markdown-body">{{ e.content_highlight|safe}}</div>
|
<div class="markdown-body">{{ e.content_highlight|safe}}</div>
|
||||||
|
Reference in New Issue
Block a user