7 Commits
v0.1 ... master

7 changed files with 76 additions and 10 deletions

View File

@@ -4,6 +4,7 @@ use whoosh to search github issues.
Implemented in **Python** using **Flask**, **Whoosh** and **Mistune**. Implemented in **Python** using **Flask**, **Whoosh** and **Mistune**.
<img src="img/screenshot.png" width="500px" />
## notes ## notes
@@ -35,8 +36,15 @@ summary of how to change the schema:
- Search class defines Schema object, main definition - Search class defines Schema object, main definition
- `add_issue()` (equally important) which defines how to extract the - `add_issue()` (equally important) which defines how to extract the
fields defined in the schema from the document fields defined in the schema from the document
- - `create_search_result()` (also important) which packages up the
search results for the template to deal with
- `search.html`: the search.html template uses a different variable
namespace than the Python file `issues_search.py` or the flask app
- The `create_search-result()` method of `issues_search.py`
defines how search results are parsed and packaged for the
`search.html` template
- Jinja variables used in `search.html` should be defined in
`create_search_result()` method of `issues_search.py`

30
Todo.md Normal file
View File

@@ -0,0 +1,30 @@
# TODO
recap of round 1:
- issues search is working well
- indexing comments and issues
- able to easily add new fields to schema
- able to easily modify search + results template
- mapping out where everything is
## Round 2
improvements:
- storing comments and issues as separate objects?
- storing a boolean? that simple? customize the output of the search result
based on a boolean?
- if so, how do we pass off a search result to a template conditionally,
such that we can save some space (jinja question)
organization:
- mapping out how to change the schema... now, how do we streamline it?
- how to organize files
fix stuff that isn't mine:
- improve the readme
- fix the config.py config file options
config:
- enable user to specify list of organizations+repos
- not just one org/list of repos

BIN
img/screenshot.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 614 KiB

View File

@@ -23,10 +23,8 @@ routes:
""" """
def get_items(): def get_items():
repo_list = ['2018-may-workshop', repo_list = app.config["REPOS"]
'2018-june-workshop', repo_org =app.config["REPO_ORG"]
'2018-july-workshop']
repo_org = 'dcppc'
gh_access_token = os.environ['GITHUB_TOKEN'] gh_access_token = os.environ['GITHUB_TOKEN']

View File

@@ -88,6 +88,7 @@ class Search:
schema = Schema( schema = Schema(
url=ID(stored=True, unique=True), url=ID(stored=True, unique=True),
is_comment=BOOLEAN(stored=True),
timestamp=STORED, timestamp=STORED,
repo_name=TEXT(stored=True), repo_name=TEXT(stored=True),
repo_url=ID(stored=True), repo_url=ID(stored=True),
@@ -116,6 +117,7 @@ class Search:
Schema: Schema:
- url - url
- is_comment
- timestamp - timestamp
- repo_name - repo_name
- repo_url - repo_url
@@ -137,6 +139,7 @@ class Search:
print("Indexing issue %s"%(issue.html_url)) print("Indexing issue %s"%(issue.html_url))
writer.add_document( writer.add_document(
url = issue.html_url, url = issue.html_url,
is_comment = False,
timestamp = issue.created_at, timestamp = issue.created_at,
repo_name = repo_name, repo_name = repo_name,
repo_url = repo_url, repo_url = repo_url,
@@ -155,6 +158,7 @@ class Search:
print(" > Indexing comment %s"%(comment.html_url)) print(" > Indexing comment %s"%(comment.html_url))
writer.add_document( writer.add_document(
url = comment.html_url, url = comment.html_url,
is_comment = True,
timestamp = comment.created_at, timestamp = comment.created_at,
repo_name = repo_name, repo_name = repo_name,
repo_url = repo_url, repo_url = repo_url,
@@ -245,6 +249,12 @@ class Search:
writer = self.ix.writer() writer = self.ix.writer()
# fix this. the delete all in index
# is not occurring in right place.
# Iterate over each repo # Iterate over each repo
for this_repo in list_of_repos: for this_repo in list_of_repos:
@@ -307,6 +317,8 @@ class Search:
sr.issue_title = r['issue_title'] sr.issue_title = r['issue_title']
sr.issue_url = r['issue_url'] sr.issue_url = r['issue_url']
sr.is_comment = r['is_comment']
sr.content = r['content'] sr.content = r['content']
highlights = r.highlights('content') highlights = r.highlights('content')
if not highlights: if not highlights:
@@ -360,5 +372,5 @@ if __name__ == "__main__":
search.add_all_issues(gh_access_token, search.add_all_issues(gh_access_token,
repo_list, repo_list,
repo_org, repo_org,
"/Users/charles/codes/markdown-search/config.py") "/Users/charles/codes/issues-search/config.py")

11
requirements.txt Normal file
View File

@@ -0,0 +1,11 @@
Flask>=0.12.1
apiclient>=1.0.3
oauth2client>=3.0.0
httplib2>=0.10.3
google-api-python-client
mistune>=0.8
whoosh>=2.7.4
PyGithub>=1.39
pypandoc>=1.4
requests>=2.19
pandoc>=1.0

View File

@@ -34,9 +34,16 @@
<div class="path"><a href='{{ url_for("open_file")}}?path={{e.path|urlencode}}&query={{query}}&fields={{fields}}'>{{e.path}}</a>score: {{'%d' % e.score}}</div> <div class="path"><a href='{{ url_for("open_file")}}?path={{e.path|urlencode}}&query={{query}}&fields={{fields}}'>{{e.path}}</a>score: {{'%d' % e.score}}</div>
--> -->
<div class="url"> <div class="url">
<a {% if e.is_comment %}
href='{{e.repo_url}}'>dcppc/{{e.repo_name}}</a> <b>Comment</b> <a href='{{e.url}}'>(comment link)</a>
- <a href='{{e.issue_url}}'>{{e.issue_title}}</a> - <a href='{{e.url}}'>link</a><br /> on issue <a href='{{e.issue_url}}'>{{e.issue_title}}</a>
in repo <a href='{{e.repo_url}}'>dcppc/{{e.repo_name}}</a>
<br />
{% else %}
<b>Issue</b> <a href='{{e.issue_url}}'>{{e.issue_title}}</a>
in repo <a href='{{e.repo_url}}'>dcppc/{{e.repo_name}}</a>
<br />
{% endif %}
score: {{'%d' % e.score}} score: {{'%d' % e.score}}
</div> </div>
<div class="markdown-body">{{ e.content_highlight|safe}}</div> <div class="markdown-body">{{ e.content_highlight|safe}}</div>