Compare commits
17 Commits
testing
...
merge-date
Author | SHA1 | Date | |
---|---|---|---|
55a74f7d98 | |||
ab76226b0c | |||
a4ebef6e6f | |||
bad50efa9b | |||
629fc063db | |||
3b0baa21de | |||
33b8857bd0 | |||
7c50fc9ff1 | |||
eb2cdf1437 | |||
c67e864581 | |||
25cc12cf21 | |||
11c1185e62 | |||
74cfaf8275 | |||
552caad135 | |||
19c42df978 | |||
6f30e3f120 | |||
ad6b653e27 |
249
Hypothesis.md
Normal file
249
Hypothesis.md
Normal file
@@ -0,0 +1,249 @@
|
|||||||
|
# Hypothesis API
|
||||||
|
|
||||||
|
|
||||||
|
## Authenticating
|
||||||
|
|
||||||
|
Example output call for authenticating with the API:
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"links": {
|
||||||
|
"profile": {
|
||||||
|
"read": {
|
||||||
|
"url": "https://hypothes.is/api/profile",
|
||||||
|
"method": "GET",
|
||||||
|
"desc": "Fetch the user's profile"
|
||||||
|
},
|
||||||
|
"update": {
|
||||||
|
"url": "https://hypothes.is/api/profile",
|
||||||
|
"method": "PATCH",
|
||||||
|
"desc": "Update a user's preferences"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"search": {
|
||||||
|
"url": "https://hypothes.is/api/search",
|
||||||
|
"method": "GET",
|
||||||
|
"desc": "Search for annotations"
|
||||||
|
},
|
||||||
|
"group": {
|
||||||
|
"member": {
|
||||||
|
"add": {
|
||||||
|
"url": "https://hypothes.is/api/groups/:pubid/members/:userid",
|
||||||
|
"method": "POST",
|
||||||
|
"desc": "Add the user in the request params to a group."
|
||||||
|
},
|
||||||
|
"delete": {
|
||||||
|
"url": "https://hypothes.is/api/groups/:pubid/members/:userid",
|
||||||
|
"method": "DELETE",
|
||||||
|
"desc": "Remove the current user from a group."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"links": {
|
||||||
|
"url": "https://hypothes.is/api/links",
|
||||||
|
"method": "GET",
|
||||||
|
"desc": "URL templates for generating URLs for HTML pages"
|
||||||
|
},
|
||||||
|
"groups": {
|
||||||
|
"read": {
|
||||||
|
"url": "https://hypothes.is/api/groups",
|
||||||
|
"method": "GET",
|
||||||
|
"desc": "Fetch the user's groups"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"annotation": {
|
||||||
|
"hide": {
|
||||||
|
"url": "https://hypothes.is/api/annotations/:id/hide",
|
||||||
|
"method": "PUT",
|
||||||
|
"desc": "Hide an annotation as a group moderator."
|
||||||
|
},
|
||||||
|
"unhide": {
|
||||||
|
"url": "https://hypothes.is/api/annotations/:id/hide",
|
||||||
|
"method": "DELETE",
|
||||||
|
"desc": "Unhide an annotation as a group moderator."
|
||||||
|
},
|
||||||
|
"read": {
|
||||||
|
"url": "https://hypothes.is/api/annotations/:id",
|
||||||
|
"method": "GET",
|
||||||
|
"desc": "Fetch an annotation"
|
||||||
|
},
|
||||||
|
"create": {
|
||||||
|
"url": "https://hypothes.is/api/annotations",
|
||||||
|
"method": "POST",
|
||||||
|
"desc": "Create an annotation"
|
||||||
|
},
|
||||||
|
"update": {
|
||||||
|
"url": "https://hypothes.is/api/annotations/:id",
|
||||||
|
"method": "PATCH",
|
||||||
|
"desc": "Update an annotation"
|
||||||
|
},
|
||||||
|
"flag": {
|
||||||
|
"url": "https://hypothes.is/api/annotations/:id/flag",
|
||||||
|
"method": "PUT",
|
||||||
|
"desc": "Flag an annotation for review."
|
||||||
|
},
|
||||||
|
"delete": {
|
||||||
|
"url": "https://hypothes.is/api/annotations/:id",
|
||||||
|
"method": "DELETE",
|
||||||
|
"desc": "Delete an annotation"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Listing
|
||||||
|
|
||||||
|
Here is the result of the API call to list an annotation
|
||||||
|
given its annotation ID:
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"updated": "2018-07-26T10:20:47.803636+00:00",
|
||||||
|
"group": "__world__",
|
||||||
|
"target": [
|
||||||
|
{
|
||||||
|
"source": "https://h.readthedocs.io/en/latest/api/authorization/",
|
||||||
|
"selector": [
|
||||||
|
{
|
||||||
|
"conformsTo": "https://tools.ietf.org/html/rfc3236",
|
||||||
|
"type": "FragmentSelector",
|
||||||
|
"value": "access-tokens"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"endContainer": "/div[1]/section[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[2]/p[2]",
|
||||||
|
"startContainer": "/div[1]/section[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[2]/p[1]",
|
||||||
|
"type": "RangeSelector",
|
||||||
|
"startOffset": 14,
|
||||||
|
"endOffset": 116
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "TextPositionSelector",
|
||||||
|
"end": 2234,
|
||||||
|
"start": 1374
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"exact": "hich read or write data as a specific user need to be authorized\nwith an access token. Access tokens can be obtained in two ways:\n\nBy generating a personal API token on the Hypothesis developer\npage (you must be logged in to\nHypothesis to get to this page). This is the simplest method, however\nthese tokens are only suitable for enabling your application to make\nrequests as a single specific user.\n\nBy registering an \u201cOAuth client\u201d and\nimplementing the OAuth authentication flow\nin your application. This method allows any user to authorize your\napplication to read and write data via the API as that user. The Hypothesis\nclient is an example of an application that uses OAuth.\nSee Using OAuth for details of how to implement this method.\n\n\nOnce an access token has been obtained, requests can be authorized by putting\nthe token in the Authorization header.",
|
||||||
|
"prefix": "\n\n\nAccess tokens\u00b6\nAPI requests w",
|
||||||
|
"type": "TextQuoteSelector",
|
||||||
|
"suffix": "\nExample request:\nGET /api HTTP/"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": {
|
||||||
|
"json": "https://hypothes.is/api/annotations/kEaohJC9Eeiy_UOozkpkyA",
|
||||||
|
"html": "https://hypothes.is/a/kEaohJC9Eeiy_UOozkpkyA",
|
||||||
|
"incontext": "https://hyp.is/kEaohJC9Eeiy_UOozkpkyA/h.readthedocs.io/en/latest/api/authorization/"
|
||||||
|
},
|
||||||
|
"tags": [],
|
||||||
|
"text": "sdfsdf",
|
||||||
|
"created": "2018-07-26T10:20:47.803636+00:00",
|
||||||
|
"uri": "https://h.readthedocs.io/en/latest/api/authorization/",
|
||||||
|
"flagged": false,
|
||||||
|
"user_info": {
|
||||||
|
"display_name": null
|
||||||
|
},
|
||||||
|
"user": "acct:Aravindan@hypothes.is",
|
||||||
|
"hidden": false,
|
||||||
|
"document": {
|
||||||
|
"title": [
|
||||||
|
"Authorization \u2014 h 0.0.2 documentation"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"id": "kEaohJC9Eeiy_UOozkpkyA",
|
||||||
|
"permissions": {
|
||||||
|
"read": [
|
||||||
|
"group:__world__"
|
||||||
|
],
|
||||||
|
"admin": [
|
||||||
|
"acct:Aravindan@hypothes.is"
|
||||||
|
],
|
||||||
|
"update": [
|
||||||
|
"acct:Aravindan@hypothes.is"
|
||||||
|
],
|
||||||
|
"delete": [
|
||||||
|
"acct:Aravindan@hypothes.is"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Searching
|
||||||
|
|
||||||
|
Here is the output from a call to the endpoint to search annotations
|
||||||
|
(we pass a specific URL to the search function):
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"updated": "2018-08-10T02:21:46.898833+00:00",
|
||||||
|
"group": "__world__",
|
||||||
|
"target": [
|
||||||
|
{
|
||||||
|
"source": "http://pilot.data-commons.us/organize/CopperInternalDeliveryWorkFlow/",
|
||||||
|
"selector": [
|
||||||
|
{
|
||||||
|
"endContainer": "/div[1]/main[1]/div[1]/div[3]/article[1]/h2[1]",
|
||||||
|
"startContainer": "/div[1]/main[1]/div[1]/div[3]/article[1]/h2[1]",
|
||||||
|
"type": "RangeSelector",
|
||||||
|
"startOffset": 0,
|
||||||
|
"endOffset": 80
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "TextPositionSelector",
|
||||||
|
"end": 12328,
|
||||||
|
"start": 12248
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"exact": "Deliverables are due internally on the first of each month, which here is Day 1,",
|
||||||
|
"prefix": " \n ",
|
||||||
|
"type": "TextQuoteSelector",
|
||||||
|
"suffix": "\u00b6\nDay -30 through -10\nCopper PM "
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": {
|
||||||
|
"json": "https://hypothes.is/api/annotations/IY2W_pxEEeiVuxfD3sehjQ",
|
||||||
|
"html": "https://hypothes.is/a/IY2W_pxEEeiVuxfD3sehjQ",
|
||||||
|
"incontext": "https://hyp.is/IY2W_pxEEeiVuxfD3sehjQ/pilot.data-commons.us/organize/CopperInternalDeliveryWorkFlow/"
|
||||||
|
},
|
||||||
|
"tags": [],
|
||||||
|
"text": "This is a sample annotation",
|
||||||
|
"created": "2018-08-10T02:21:46.898833+00:00",
|
||||||
|
"uri": "http://pilot.data-commons.us/organize/CopperInternalDeliveryWorkFlow/",
|
||||||
|
"flagged": false,
|
||||||
|
"user_info": {
|
||||||
|
"display_name": null
|
||||||
|
},
|
||||||
|
"user": "acct:charlesreid1dib@hypothes.is",
|
||||||
|
"hidden": false,
|
||||||
|
"document": {
|
||||||
|
"title": [
|
||||||
|
"Copper Internal Delivery Workflow - Data Commons Internal Site"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"id": "IY2W_pxEEeiVuxfD3sehjQ",
|
||||||
|
"permissions": {
|
||||||
|
"read": [
|
||||||
|
"group:__world__"
|
||||||
|
],
|
||||||
|
"admin": [
|
||||||
|
"acct:charlesreid1dib@hypothes.is"
|
||||||
|
],
|
||||||
|
"update": [
|
||||||
|
"acct:charlesreid1dib@hypothes.is"
|
||||||
|
],
|
||||||
|
"delete": [
|
||||||
|
"acct:charlesreid1dib@hypothes.is"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"total": 1
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
@@ -40,6 +40,7 @@ class UpdateIndexTask(object):
|
|||||||
'groupsio_username' : app_config['GROUPSIO_USERNAME'],
|
'groupsio_username' : app_config['GROUPSIO_USERNAME'],
|
||||||
'groupsio_password' : app_config['GROUPSIO_PASSWORD']
|
'groupsio_password' : app_config['GROUPSIO_PASSWORD']
|
||||||
}
|
}
|
||||||
|
self.disqus_token = app_config['DISQUS_TOKEN']
|
||||||
thread.daemon = True
|
thread.daemon = True
|
||||||
thread.start()
|
thread.start()
|
||||||
|
|
||||||
@@ -54,6 +55,7 @@ class UpdateIndexTask(object):
|
|||||||
|
|
||||||
search.update_index(self.groupsio_credentials,
|
search.update_index(self.groupsio_credentials,
|
||||||
self.gh_token,
|
self.gh_token,
|
||||||
|
self.disqus_token,
|
||||||
self.run_which,
|
self.run_which,
|
||||||
config)
|
config)
|
||||||
|
|
||||||
@@ -347,5 +349,5 @@ if __name__ == '__main__':
|
|||||||
port = 5000
|
port = 5000
|
||||||
else:
|
else:
|
||||||
port = int(port)
|
port = int(port)
|
||||||
app.run(host="0.0.0.0",port=port)
|
app.run(host="0.0.0.0", port=port)
|
||||||
|
|
||||||
|
@@ -6,6 +6,8 @@ import base64
|
|||||||
|
|
||||||
from gdrive_util import GDrive
|
from gdrive_util import GDrive
|
||||||
from groupsio_util import GroupsIOArchivesCrawler, GroupsIOException
|
from groupsio_util import GroupsIOArchivesCrawler, GroupsIOException
|
||||||
|
from disqus_util import DisqusCrawler
|
||||||
|
|
||||||
from apiclient.http import MediaIoBaseDownload
|
from apiclient.http import MediaIoBaseDownload
|
||||||
|
|
||||||
import mistune
|
import mistune
|
||||||
@@ -19,8 +21,11 @@ import codecs
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import dateutil.parser
|
import dateutil.parser
|
||||||
|
|
||||||
|
from whoosh import query
|
||||||
from whoosh.qparser import MultifieldParser, QueryParser
|
from whoosh.qparser import MultifieldParser, QueryParser
|
||||||
from whoosh.analysis import StemmingAnalyzer
|
from whoosh.analysis import StemmingAnalyzer, LowercaseFilter, StopFilter
|
||||||
|
from whoosh.qparser.dateparse import DateParserPlugin
|
||||||
|
from whoosh import fields, index
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@@ -103,10 +108,21 @@ class Search:
|
|||||||
# ------------------------------
|
# ------------------------------
|
||||||
# Update the entire index
|
# Update the entire index
|
||||||
|
|
||||||
def update_index(self, groupsio_credentials, gh_token, run_which, config):
|
def update_index(self, groupsio_credentials, gh_token, disqus_token, run_which, config):
|
||||||
"""
|
"""
|
||||||
Update the entire search index
|
Update the entire search index
|
||||||
"""
|
"""
|
||||||
|
if run_which=='all' or run_which=='disqus':
|
||||||
|
try:
|
||||||
|
self.update_index_disqus(disqus_token, config)
|
||||||
|
except Exception as e:
|
||||||
|
print("ERROR: While re-indexing: failed to update Disqus comment threads")
|
||||||
|
print("-"*40)
|
||||||
|
print(repr(e))
|
||||||
|
print("-"*40)
|
||||||
|
print("Continuing...")
|
||||||
|
pass
|
||||||
|
|
||||||
if run_which=='all' or run_which=='emailthreads':
|
if run_which=='all' or run_which=='emailthreads':
|
||||||
try:
|
try:
|
||||||
self.update_index_emailthreads(groupsio_credentials, config)
|
self.update_index_emailthreads(groupsio_credentials, config)
|
||||||
@@ -172,7 +188,8 @@ class Search:
|
|||||||
os.mkdir(index_folder)
|
os.mkdir(index_folder)
|
||||||
|
|
||||||
exists = index.exists_in(index_folder)
|
exists = index.exists_in(index_folder)
|
||||||
stemming_analyzer = StemmingAnalyzer()
|
#stemming_analyzer = StemmingAnalyzer()
|
||||||
|
stemming_analyzer = StemmingAnalyzer() | LowercaseFilter() | StopFilter()
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
@@ -180,30 +197,38 @@ class Search:
|
|||||||
# is defined.
|
# is defined.
|
||||||
|
|
||||||
schema = Schema(
|
schema = Schema(
|
||||||
id = ID(stored=True, unique=True),
|
id = fields.ID(stored=True, unique=True),
|
||||||
kind = ID(stored=True),
|
kind = fields.ID(stored=True),
|
||||||
|
|
||||||
created_time = ID(stored=True),
|
created_time = fields.DATETIME(stored=True),
|
||||||
modified_time = ID(stored=True),
|
modified_time = fields.DATETIME(stored=True),
|
||||||
indexed_time = ID(stored=True),
|
indexed_time = fields.DATETIME(stored=True),
|
||||||
|
|
||||||
title = TEXT(stored=True, field_boost=100.0),
|
title = fields.TEXT(stored=True, field_boost=100.0),
|
||||||
url = ID(stored=True, unique=True),
|
|
||||||
|
|
||||||
mimetype=ID(stored=True),
|
url = fields.ID(stored=True),
|
||||||
owner_email=ID(stored=True),
|
|
||||||
owner_name=TEXT(stored=True),
|
|
||||||
|
|
||||||
repo_name=TEXT(stored=True),
|
mimetype = fields.TEXT(stored=True),
|
||||||
repo_url=ID(stored=True),
|
|
||||||
|
|
||||||
github_user=TEXT(stored=True),
|
owner_email = fields.ID(stored=True),
|
||||||
|
owner_name = fields.TEXT(stored=True),
|
||||||
|
|
||||||
|
# mainly for email threads, groups.io, hypothesis
|
||||||
|
group = fields.ID(stored=True),
|
||||||
|
|
||||||
|
repo_name = fields.TEXT(stored=True),
|
||||||
|
repo_url = fields.ID(stored=True),
|
||||||
|
github_user = fields.TEXT(stored=True),
|
||||||
|
|
||||||
|
tags = fields.KEYWORD(commas=True,
|
||||||
|
stored=True,
|
||||||
|
lowercase=True),
|
||||||
|
|
||||||
# comments only
|
# comments only
|
||||||
issue_title=TEXT(stored=True, field_boost=100.0),
|
issue_title = fields.TEXT(stored=True, field_boost=100.0),
|
||||||
issue_url=ID(stored=True),
|
issue_url = fields.ID(stored=True),
|
||||||
|
|
||||||
content=TEXT(stored=True, analyzer=stemming_analyzer)
|
content = fields.TEXT(stored=True, analyzer=stemming_analyzer)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -243,17 +268,22 @@ class Search:
|
|||||||
writer.delete_by_term('id',item['id'])
|
writer.delete_by_term('id',item['id'])
|
||||||
|
|
||||||
# Index a plain google drive file
|
# Index a plain google drive file
|
||||||
|
created_time = dateutil.parser.parse(item['createdTime'])
|
||||||
|
modified_time = dateutil.parser.parse(item['modifiedTime'])
|
||||||
|
indexed_time = datetime.now().replace(microsecond=0)
|
||||||
|
try:
|
||||||
writer.add_document(
|
writer.add_document(
|
||||||
id = item['id'],
|
id = item['id'],
|
||||||
kind = 'gdoc',
|
kind = 'gdoc',
|
||||||
created_time = item['createdTime'],
|
created_time = created_time,
|
||||||
modified_time = item['modifiedTime'],
|
modified_time = modified_time,
|
||||||
indexed_time = datetime.now().replace(microsecond=0).isoformat(),
|
indexed_time = indexed_time,
|
||||||
title = item['name'],
|
title = item['name'],
|
||||||
url = item['webViewLink'],
|
url = item['webViewLink'],
|
||||||
mimetype = mimetype,
|
mimetype = mimetype,
|
||||||
owner_email = item['owners'][0]['emailAddress'],
|
owner_email = item['owners'][0]['emailAddress'],
|
||||||
owner_name = item['owners'][0]['displayName'],
|
owner_name = item['owners'][0]['displayName'],
|
||||||
|
group='',
|
||||||
repo_name='',
|
repo_name='',
|
||||||
repo_url='',
|
repo_url='',
|
||||||
github_user='',
|
github_user='',
|
||||||
@@ -261,6 +291,9 @@ class Search:
|
|||||||
issue_url='',
|
issue_url='',
|
||||||
content = content
|
content = content
|
||||||
)
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
print(repr(e))
|
||||||
|
print(" > XXXXXX Failed to index Google Drive file \"%s\""%(item['name']))
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@@ -314,7 +347,7 @@ class Search:
|
|||||||
)
|
)
|
||||||
assert output == ""
|
assert output == ""
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
print(" > XXXXXX Failed to index document \"%s\""%(item['name']))
|
print(" > XXXXXX Failed to index Google Drive document \"%s\""%(item['name']))
|
||||||
|
|
||||||
|
|
||||||
# If export was successful, read contents of markdown
|
# If export was successful, read contents of markdown
|
||||||
@@ -342,17 +375,22 @@ class Search:
|
|||||||
else:
|
else:
|
||||||
print(" > Creating a new record")
|
print(" > Creating a new record")
|
||||||
|
|
||||||
|
try:
|
||||||
|
created_time = dateutil.parser.parse(item['createdTime'])
|
||||||
|
modified_time = dateutil.parser.parse(item['modifiedTime'])
|
||||||
|
indexed_time = datetime.now()
|
||||||
writer.add_document(
|
writer.add_document(
|
||||||
id = item['id'],
|
id = item['id'],
|
||||||
kind = 'gdoc',
|
kind = 'gdoc',
|
||||||
created_time = item['createdTime'],
|
created_time = created_time,
|
||||||
modified_time = item['modifiedTime'],
|
modified_time = modified_time,
|
||||||
indexed_time = datetime.now().replace(microsecond=0).isoformat(),
|
indexed_time = indexed_time,
|
||||||
title = item['name'],
|
title = item['name'],
|
||||||
url = item['webViewLink'],
|
url = item['webViewLink'],
|
||||||
mimetype = mimetype,
|
mimetype = mimetype,
|
||||||
owner_email = item['owners'][0]['emailAddress'],
|
owner_email = item['owners'][0]['emailAddress'],
|
||||||
owner_name = item['owners'][0]['displayName'],
|
owner_name = item['owners'][0]['displayName'],
|
||||||
|
group='',
|
||||||
repo_name='',
|
repo_name='',
|
||||||
repo_url='',
|
repo_url='',
|
||||||
github_user='',
|
github_user='',
|
||||||
@@ -360,6 +398,10 @@ class Search:
|
|||||||
issue_url='',
|
issue_url='',
|
||||||
content = content
|
content = content
|
||||||
)
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
print(repr(e))
|
||||||
|
print(" > XXXXXX Failed to index Google Drive file \"%s\""%(item['name']))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -393,13 +435,14 @@ class Search:
|
|||||||
issue_comment_content += comment.body.rstrip()
|
issue_comment_content += comment.body.rstrip()
|
||||||
issue_comment_content += "\n"
|
issue_comment_content += "\n"
|
||||||
|
|
||||||
# Now create the actual search index record
|
# Now create the actual search index record.
|
||||||
created_time = clean_timestamp(issue.created_at)
|
|
||||||
modified_time = clean_timestamp(issue.updated_at)
|
|
||||||
indexed_time = clean_timestamp(datetime.now())
|
|
||||||
|
|
||||||
# Add one document per issue thread,
|
# Add one document per issue thread,
|
||||||
# containing entire text of thread.
|
# containing entire text of thread.
|
||||||
|
|
||||||
|
created_time = issue.created_at
|
||||||
|
modified_time = issue.updated_at
|
||||||
|
indexed_time = datetime.now()
|
||||||
|
try:
|
||||||
writer.add_document(
|
writer.add_document(
|
||||||
id = issue.html_url,
|
id = issue.html_url,
|
||||||
kind = 'issue',
|
kind = 'issue',
|
||||||
@@ -411,6 +454,7 @@ class Search:
|
|||||||
mimetype='',
|
mimetype='',
|
||||||
owner_email='',
|
owner_email='',
|
||||||
owner_name='',
|
owner_name='',
|
||||||
|
group='',
|
||||||
repo_name = repo_name,
|
repo_name = repo_name,
|
||||||
repo_url = repo_url,
|
repo_url = repo_url,
|
||||||
github_user = issue.user.login,
|
github_user = issue.user.login,
|
||||||
@@ -418,6 +462,9 @@ class Search:
|
|||||||
issue_url = issue.html_url,
|
issue_url = issue.html_url,
|
||||||
content = issue_comment_content
|
content = issue_comment_content
|
||||||
)
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
print(repr(e))
|
||||||
|
print(" > XXXXXX Failed to index Github issue \"%s\""%(issue.title))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -447,7 +494,8 @@ class Search:
|
|||||||
print(" > XXXXXXXX Failed to find file info.")
|
print(" > XXXXXXXX Failed to find file info.")
|
||||||
return
|
return
|
||||||
|
|
||||||
indexed_time = clean_timestamp(datetime.now())
|
|
||||||
|
indexed_time = datetime.now()
|
||||||
|
|
||||||
if fext in MARKDOWN_EXTS:
|
if fext in MARKDOWN_EXTS:
|
||||||
print("Indexing markdown doc %s from repo %s"%(fname,repo_name))
|
print("Indexing markdown doc %s from repo %s"%(fname,repo_name))
|
||||||
@@ -476,17 +524,19 @@ class Search:
|
|||||||
usable_url = "https://github.com/%s/blob/master/%s"%(repo_name, fpath)
|
usable_url = "https://github.com/%s/blob/master/%s"%(repo_name, fpath)
|
||||||
|
|
||||||
# Now create the actual search index record
|
# Now create the actual search index record
|
||||||
|
try:
|
||||||
writer.add_document(
|
writer.add_document(
|
||||||
id = fsha,
|
id = fsha,
|
||||||
kind = 'markdown',
|
kind = 'markdown',
|
||||||
created_time = '',
|
created_time = None,
|
||||||
modified_time = '',
|
modified_time = None,
|
||||||
indexed_time = indexed_time,
|
indexed_time = indexed_time,
|
||||||
title = fname,
|
title = fname,
|
||||||
url = usable_url,
|
url = usable_url,
|
||||||
mimetype='',
|
mimetype='',
|
||||||
owner_email='',
|
owner_email='',
|
||||||
owner_name='',
|
owner_name='',
|
||||||
|
group='',
|
||||||
repo_name = repo_name,
|
repo_name = repo_name,
|
||||||
repo_url = repo_url,
|
repo_url = repo_url,
|
||||||
github_user = '',
|
github_user = '',
|
||||||
@@ -494,6 +544,11 @@ class Search:
|
|||||||
issue_url = '',
|
issue_url = '',
|
||||||
content = content
|
content = content
|
||||||
)
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
print(repr(e))
|
||||||
|
print(" > XXXXXX Failed to index Github markdown file \"%s\""%(fname))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print("Indexing github file %s from repo %s"%(fname,repo_name))
|
print("Indexing github file %s from repo %s"%(fname,repo_name))
|
||||||
@@ -501,17 +556,19 @@ class Search:
|
|||||||
key = fname+"_"+fsha
|
key = fname+"_"+fsha
|
||||||
|
|
||||||
# Now create the actual search index record
|
# Now create the actual search index record
|
||||||
|
try:
|
||||||
writer.add_document(
|
writer.add_document(
|
||||||
id = key,
|
id = key,
|
||||||
kind = 'ghfile',
|
kind = 'ghfile',
|
||||||
created_time = '',
|
created_time = None,
|
||||||
modified_time = '',
|
modified_time = None,
|
||||||
indexed_time = indexed_time,
|
indexed_time = indexed_time,
|
||||||
title = fname,
|
title = fname,
|
||||||
url = repo_url,
|
url = repo_url,
|
||||||
mimetype='',
|
mimetype='',
|
||||||
owner_email='',
|
owner_email='',
|
||||||
owner_name='',
|
owner_name='',
|
||||||
|
group='',
|
||||||
repo_name = repo_name,
|
repo_name = repo_name,
|
||||||
repo_url = repo_url,
|
repo_url = repo_url,
|
||||||
github_user = '',
|
github_user = '',
|
||||||
@@ -519,6 +576,9 @@ class Search:
|
|||||||
issue_url = '',
|
issue_url = '',
|
||||||
content = ''
|
content = ''
|
||||||
)
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
print(repr(e))
|
||||||
|
print(" > XXXXXX Failed to index Github file \"%s\""%(fname))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -529,23 +589,35 @@ class Search:
|
|||||||
|
|
||||||
def add_emailthread(self, writer, d, config, update=True):
|
def add_emailthread(self, writer, d, config, update=True):
|
||||||
"""
|
"""
|
||||||
Use a Github file API record to add a filename
|
Use a Groups.io email thread record to add
|
||||||
to the search index.
|
an email thread to the search index.
|
||||||
"""
|
"""
|
||||||
indexed_time = clean_timestamp(datetime.now())
|
if 'created_time' in d.keys() and d['created_time'] is not None:
|
||||||
|
created_time = d['created_time']
|
||||||
|
else:
|
||||||
|
created_time = None
|
||||||
|
|
||||||
|
if 'modified_time' in d.keys() and d['modified_time'] is not None:
|
||||||
|
modified_time = d['modified_time']
|
||||||
|
else:
|
||||||
|
modified_time = None
|
||||||
|
|
||||||
|
indexed_time = datetime.now()
|
||||||
|
|
||||||
# Now create the actual search index record
|
# Now create the actual search index record
|
||||||
|
try:
|
||||||
writer.add_document(
|
writer.add_document(
|
||||||
id = d['permalink'],
|
id = d['permalink'],
|
||||||
kind = 'emailthread',
|
kind = 'emailthread',
|
||||||
created_time = '',
|
created_time = created_time,
|
||||||
modified_time = '',
|
modified_time = modified_time,
|
||||||
indexed_time = indexed_time,
|
indexed_time = indexed_time,
|
||||||
title = d['subject'],
|
title = d['subject'],
|
||||||
url = d['permalink'],
|
url = d['permalink'],
|
||||||
mimetype='',
|
mimetype='',
|
||||||
owner_email='',
|
owner_email='',
|
||||||
owner_name=d['original_sender'],
|
owner_name=d['original_sender'],
|
||||||
|
group=d['subgroup'],
|
||||||
repo_name = '',
|
repo_name = '',
|
||||||
repo_url = '',
|
repo_url = '',
|
||||||
github_user = '',
|
github_user = '',
|
||||||
@@ -553,6 +625,48 @@ class Search:
|
|||||||
issue_url = '',
|
issue_url = '',
|
||||||
content = d['content']
|
content = d['content']
|
||||||
)
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
print(repr(e))
|
||||||
|
print(" > XXXXXX Failed to index Groups.io thread \"%s\""%(d['subject']))
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------
|
||||||
|
# Add a single disqus comment thread
|
||||||
|
# to the search index.
|
||||||
|
|
||||||
|
def add_disqusthread(self, writer, d, config, update=True):
|
||||||
|
"""
|
||||||
|
Use a disqus comment thread record
|
||||||
|
to add a disqus comment thread to the
|
||||||
|
search index.
|
||||||
|
"""
|
||||||
|
indexed_time = datetime.now()
|
||||||
|
|
||||||
|
# created_time is already a timestamp
|
||||||
|
|
||||||
|
# Now create the actual search index record
|
||||||
|
try:
|
||||||
|
writer.add_document(
|
||||||
|
id = d['id'],
|
||||||
|
kind = 'disqus',
|
||||||
|
created_time = d['created_time'],
|
||||||
|
modified_time = None,
|
||||||
|
indexed_time = indexed_time,
|
||||||
|
title = d['title'],
|
||||||
|
url = d['link'],
|
||||||
|
mimetype='',
|
||||||
|
owner_email='',
|
||||||
|
owner_name='',
|
||||||
|
repo_name = '',
|
||||||
|
repo_url = '',
|
||||||
|
github_user = '',
|
||||||
|
issue_title = '',
|
||||||
|
issue_url = '',
|
||||||
|
content = d['content']
|
||||||
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
print(repr(e))
|
||||||
|
print(" > XXXXXX Failed to index Disqus comment thread \"%s\""%(d['title']))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -580,9 +694,8 @@ class Search:
|
|||||||
# Updated algorithm:
|
# Updated algorithm:
|
||||||
# - get set of indexed ids
|
# - get set of indexed ids
|
||||||
# - get set of remote ids
|
# - get set of remote ids
|
||||||
# - drop indexed ids not in remote ids
|
# - drop all indexed ids
|
||||||
# - index all remote ids
|
# - index all remote ids
|
||||||
# - add hash check in add_
|
|
||||||
|
|
||||||
|
|
||||||
# Get the set of indexed ids:
|
# Get the set of indexed ids:
|
||||||
@@ -631,10 +744,10 @@ class Search:
|
|||||||
full_items[f['id']] = f
|
full_items[f['id']] = f
|
||||||
|
|
||||||
## Shorter:
|
## Shorter:
|
||||||
#break
|
|
||||||
# Longer:
|
|
||||||
if nextPageToken is None:
|
|
||||||
break
|
break
|
||||||
|
## Longer:
|
||||||
|
#if nextPageToken is None:
|
||||||
|
# break
|
||||||
|
|
||||||
|
|
||||||
writer = self.ix.writer()
|
writer = self.ix.writer()
|
||||||
@@ -642,7 +755,7 @@ class Search:
|
|||||||
temp_dir = tempfile.mkdtemp(dir=os.getcwd())
|
temp_dir = tempfile.mkdtemp(dir=os.getcwd())
|
||||||
print("Temporary directory: %s"%(temp_dir))
|
print("Temporary directory: %s"%(temp_dir))
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
# Drop any id in indexed_ids
|
# Drop any id in indexed_ids
|
||||||
# not in remote_ids
|
# not in remote_ids
|
||||||
@@ -670,6 +783,13 @@ class Search:
|
|||||||
self.add_drive_file(writer, item, temp_dir, config, update=False)
|
self.add_drive_file(writer, item, temp_dir, config, update=False)
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print("ERROR: While adding Google Drive files to search index")
|
||||||
|
print("-"*40)
|
||||||
|
print(repr(e))
|
||||||
|
print("-"*40)
|
||||||
|
print("Continuing...")
|
||||||
|
pass
|
||||||
|
|
||||||
print("Cleaning temporary directory: %s"%(temp_dir))
|
print("Cleaning temporary directory: %s"%(temp_dir))
|
||||||
subprocess.call(['rm','-fr',temp_dir])
|
subprocess.call(['rm','-fr',temp_dir])
|
||||||
@@ -686,12 +806,6 @@ class Search:
|
|||||||
Update the search index using a collection of
|
Update the search index using a collection of
|
||||||
Github repo issues and comments.
|
Github repo issues and comments.
|
||||||
"""
|
"""
|
||||||
# Updated algorithm:
|
|
||||||
# - get set of indexed ids
|
|
||||||
# - get set of remote ids
|
|
||||||
# - drop indexed ids not in remote ids
|
|
||||||
# - index all remote ids
|
|
||||||
|
|
||||||
# Get the set of indexed ids:
|
# Get the set of indexed ids:
|
||||||
# ------
|
# ------
|
||||||
indexed_issues = set()
|
indexed_issues = set()
|
||||||
@@ -772,12 +886,6 @@ class Search:
|
|||||||
files (and, separately, Markdown files) from
|
files (and, separately, Markdown files) from
|
||||||
a Github repo.
|
a Github repo.
|
||||||
"""
|
"""
|
||||||
# Updated algorithm:
|
|
||||||
# - get set of indexed ids
|
|
||||||
# - get set of remote ids
|
|
||||||
# - drop indexed ids not in remote ids
|
|
||||||
# - index all remote ids
|
|
||||||
|
|
||||||
# Get the set of indexed ids:
|
# Get the set of indexed ids:
|
||||||
# ------
|
# ------
|
||||||
indexed_ids = set()
|
indexed_ids = set()
|
||||||
@@ -896,12 +1004,6 @@ class Search:
|
|||||||
|
|
||||||
RELEASE THE SPIDER!!!
|
RELEASE THE SPIDER!!!
|
||||||
"""
|
"""
|
||||||
# Algorithm:
|
|
||||||
# - get set of indexed ids
|
|
||||||
# - get set of remote ids
|
|
||||||
# - drop indexed ids not in remote ids
|
|
||||||
# - index all remote ids
|
|
||||||
|
|
||||||
# Get the set of indexed ids:
|
# Get the set of indexed ids:
|
||||||
# ------
|
# ------
|
||||||
indexed_ids = set()
|
indexed_ids = set()
|
||||||
@@ -919,16 +1021,17 @@ class Search:
|
|||||||
# ask spider to crawl the archives
|
# ask spider to crawl the archives
|
||||||
spider.crawl_group_archives()
|
spider.crawl_group_archives()
|
||||||
|
|
||||||
# now spider.archives is a list of dictionaries
|
# now spider.archives is a dictionary
|
||||||
# that each represent a thread:
|
# with one key per thread ID,
|
||||||
# thread = {
|
# and a value set to the payload:
|
||||||
|
# '<thread-id>' : {
|
||||||
# 'permalink' : permalink,
|
# 'permalink' : permalink,
|
||||||
# 'subject' : subject,
|
# 'subject' : subject,
|
||||||
# 'original_sender' : original_sender,
|
# 'original_sender' : original_sender,
|
||||||
# 'content' : full_content
|
# 'content' : full_content
|
||||||
# }
|
# }
|
||||||
#
|
#
|
||||||
# It is hard to reliablly extract more information
|
# It is hard to reliably extract more information
|
||||||
# than that from the email thread.
|
# than that from the email thread.
|
||||||
|
|
||||||
writer = self.ix.writer()
|
writer = self.ix.writer()
|
||||||
@@ -958,6 +1061,75 @@ class Search:
|
|||||||
print("Done, updated %d Groups.io email threads in the index" % count)
|
print("Done, updated %d Groups.io email threads in the index" % count)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------
|
||||||
|
# Disqus Comments
|
||||||
|
|
||||||
|
|
||||||
|
def update_index_disqus(self, disqus_token, config):
|
||||||
|
"""
|
||||||
|
Update the search index using a collection of
|
||||||
|
Disqus comment threads from the dcppc-internal
|
||||||
|
forum.
|
||||||
|
"""
|
||||||
|
# Updated algorithm:
|
||||||
|
# - get set of indexed ids
|
||||||
|
# - get set of remote ids
|
||||||
|
# - drop all indexed ids
|
||||||
|
# - index all remote ids
|
||||||
|
|
||||||
|
# Get the set of indexed ids:
|
||||||
|
# --------------------
|
||||||
|
indexed_ids = set()
|
||||||
|
p = QueryParser("kind", schema=self.ix.schema)
|
||||||
|
q = p.parse("disqus")
|
||||||
|
with self.ix.searcher() as s:
|
||||||
|
results = s.search(q,limit=None)
|
||||||
|
for result in results:
|
||||||
|
indexed_ids.add(result['id'])
|
||||||
|
|
||||||
|
# Get the set of remote ids:
|
||||||
|
# ------
|
||||||
|
spider = DisqusCrawler(disqus_token,'dcppc-internal')
|
||||||
|
|
||||||
|
# ask spider to crawl disqus comments
|
||||||
|
spider.crawl_threads()
|
||||||
|
|
||||||
|
# spider.comments will be a dictionary
|
||||||
|
# with keys as thread IDs and values as
|
||||||
|
# a dictionary item
|
||||||
|
|
||||||
|
writer = self.ix.writer()
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
# archives is a dictionary
|
||||||
|
# keys are IDs (urls)
|
||||||
|
# values are dictionaries
|
||||||
|
threads = spider.get_threads()
|
||||||
|
|
||||||
|
# Start by collecting all the things
|
||||||
|
remote_ids = set()
|
||||||
|
for k in threads.keys():
|
||||||
|
remote_ids.add(k)
|
||||||
|
|
||||||
|
# drop indexed_ids
|
||||||
|
for drop_id in indexed_ids:
|
||||||
|
writer.delete_by_term('id',drop_id)
|
||||||
|
|
||||||
|
# add remote_ids
|
||||||
|
for add_id in remote_ids:
|
||||||
|
item = threads[add_id]
|
||||||
|
self.add_disqusthread(writer, item, config, update=False)
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
writer.commit()
|
||||||
|
print("Done, updated %d Disqus comment threads in the index" % count)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------
|
# ---------------------------------
|
||||||
# Search results bundler
|
# Search results bundler
|
||||||
|
|
||||||
@@ -1044,6 +1216,7 @@ class Search:
|
|||||||
"ghfile" : None,
|
"ghfile" : None,
|
||||||
"markdown" : None,
|
"markdown" : None,
|
||||||
"emailthread" : None,
|
"emailthread" : None,
|
||||||
|
"disqus" : None,
|
||||||
"total" : None
|
"total" : None
|
||||||
}
|
}
|
||||||
for key in counts.keys():
|
for key in counts.keys():
|
||||||
@@ -1074,7 +1247,9 @@ class Search:
|
|||||||
elif doctype=='issue':
|
elif doctype=='issue':
|
||||||
item_keys = ['title','repo_name','repo_url','url','created_time','modified_time']
|
item_keys = ['title','repo_name','repo_url','url','created_time','modified_time']
|
||||||
elif doctype=='emailthread':
|
elif doctype=='emailthread':
|
||||||
item_keys = ['title','owner_name','url']
|
item_keys = ['title','owner_name','url','created_time','modified_time']
|
||||||
|
elif doctype=='disqus':
|
||||||
|
item_keys = ['title','created_time','url']
|
||||||
elif doctype=='ghfile':
|
elif doctype=='ghfile':
|
||||||
item_keys = ['title','repo_name','repo_url','url']
|
item_keys = ['title','repo_name','repo_url','url']
|
||||||
elif doctype=='markdown':
|
elif doctype=='markdown':
|
||||||
@@ -1091,10 +1266,6 @@ class Search:
|
|||||||
for r in results:
|
for r in results:
|
||||||
d = {}
|
d = {}
|
||||||
for k in item_keys:
|
for k in item_keys:
|
||||||
if k=='created_time' or k=='modified_time':
|
|
||||||
#d[k] = r[k]
|
|
||||||
d[k] = dateutil.parser.parse(r[k]).strftime("%Y-%m-%d")
|
|
||||||
else:
|
|
||||||
d[k] = r[k]
|
d[k] = r[k]
|
||||||
json_results.append(d)
|
json_results.append(d)
|
||||||
|
|
||||||
@@ -1108,7 +1279,16 @@ class Search:
|
|||||||
query_string = " ".join(query_list)
|
query_string = " ".join(query_list)
|
||||||
query = None
|
query = None
|
||||||
if ":" in query_string:
|
if ":" in query_string:
|
||||||
query = QueryParser("content", self.schema).parse(query_string)
|
|
||||||
|
#query = QueryParser("content",
|
||||||
|
# self.schema
|
||||||
|
#).parse(query_string)
|
||||||
|
query = QueryParser("content",
|
||||||
|
self.schema,
|
||||||
|
termclass=query.Variations
|
||||||
|
)
|
||||||
|
query.add_plugin(DateParserPlugin(free=True))
|
||||||
|
query = query.parse(query_string)
|
||||||
elif len(fields) == 1 and fields[0] == "filename":
|
elif len(fields) == 1 and fields[0] == "filename":
|
||||||
pass
|
pass
|
||||||
elif len(fields) == 2:
|
elif len(fields) == 2:
|
||||||
@@ -1116,9 +1296,12 @@ class Search:
|
|||||||
else:
|
else:
|
||||||
# If the user does not specify a field,
|
# If the user does not specify a field,
|
||||||
# these are the fields that are actually searched
|
# these are the fields that are actually searched
|
||||||
fields = ['title', 'content','owner_name','owner_email','url']
|
fields = ['title', 'content','owner_name','owner_email','url','created_date','modified_date']
|
||||||
if not query:
|
if not query:
|
||||||
query = MultifieldParser(fields, schema=self.ix.schema).parse(query_string)
|
query = MultifieldParser(fields, schema=self.ix.schema)
|
||||||
|
query.add_plugin(DateParserPlugin(free=True))
|
||||||
|
query = query.parse(query_string)
|
||||||
|
#query = MultifieldParser(fields, schema=self.ix.schema).parse(query_string)
|
||||||
parsed_query = "%s" % query
|
parsed_query = "%s" % query
|
||||||
print("query: %s" % parsed_query)
|
print("query: %s" % parsed_query)
|
||||||
results = searcher.search(query, terms=False, scored=True, groupedby="kind")
|
results = searcher.search(query, terms=False, scored=True, groupedby="kind")
|
||||||
|
153
disqus_util.py
Normal file
153
disqus_util.py
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
import os, re
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import dateutil.parser
|
||||||
|
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
"""
|
||||||
|
Convenience class wrapper for Disqus comments.
|
||||||
|
|
||||||
|
This requires that the user provide either their
|
||||||
|
API OAuth application credentials (in which case
|
||||||
|
a user needs to authenticate with the application
|
||||||
|
so it can access the comments that they can see)
|
||||||
|
or user credentials from a previous login.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class DisqusCrawler(object):
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
credentials,
|
||||||
|
group_name):
|
||||||
|
|
||||||
|
self.credentials = credentials
|
||||||
|
self.group_name = group_name
|
||||||
|
self.crawled_comments = False
|
||||||
|
self.threads = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_threads(self):
|
||||||
|
"""
|
||||||
|
Return a list of dictionaries containing
|
||||||
|
entries for each comment thread in the given
|
||||||
|
disqus forum.
|
||||||
|
"""
|
||||||
|
return self.threads
|
||||||
|
|
||||||
|
|
||||||
|
def crawl_threads(self):
|
||||||
|
"""
|
||||||
|
This will use the API to get every thread,
|
||||||
|
and will iterate through every thread to
|
||||||
|
get every comment thread.
|
||||||
|
"""
|
||||||
|
# The money shot
|
||||||
|
threads = {}
|
||||||
|
|
||||||
|
# list all threads
|
||||||
|
list_threads_url = 'https://disqus.com/api/3.0/threads/list.json'
|
||||||
|
|
||||||
|
# list all posts (comments)
|
||||||
|
list_posts_url = 'https://disqus.com/api/3.0/threads/listPosts.json'
|
||||||
|
|
||||||
|
base_params = dict(
|
||||||
|
api_key=self.credentials,
|
||||||
|
forum=self.group_name
|
||||||
|
)
|
||||||
|
|
||||||
|
# prepare url params
|
||||||
|
params = {}
|
||||||
|
for k in base_params.keys():
|
||||||
|
params[k] = base_params[k]
|
||||||
|
|
||||||
|
# make api call (first loop in fencepost)
|
||||||
|
results = requests.request('GET', list_threads_url, params=params).json()
|
||||||
|
cursor = results['cursor']
|
||||||
|
responses = results['response']
|
||||||
|
|
||||||
|
while True:
|
||||||
|
|
||||||
|
for response in responses:
|
||||||
|
if '127.0.0.1' not in response['link'] and 'localhost' not in response['link']:
|
||||||
|
|
||||||
|
# Save thread info
|
||||||
|
thread_id = response['id']
|
||||||
|
thread_count = response['posts']
|
||||||
|
|
||||||
|
print("Working on thread %s (%d posts)"%(thread_id,thread_count))
|
||||||
|
if thread_count > 0:
|
||||||
|
|
||||||
|
# prepare url params
|
||||||
|
params_comments = {}
|
||||||
|
for k in base_params.keys():
|
||||||
|
params_comments[k] = base_params[k]
|
||||||
|
|
||||||
|
params_comments['thread'] = thread_id
|
||||||
|
|
||||||
|
# make api call
|
||||||
|
results_comments = requests.request('GET', list_posts_url, params=params_comments).json()
|
||||||
|
cursor_comments = results_comments['cursor']
|
||||||
|
responses_comments = results_comments['response']
|
||||||
|
|
||||||
|
# Save comments for this thread
|
||||||
|
thread_comments = []
|
||||||
|
|
||||||
|
while True:
|
||||||
|
for comment in responses_comments:
|
||||||
|
# Save comment info
|
||||||
|
print(" + %s"%(comment['message']))
|
||||||
|
thread_comments.append(comment['message'])
|
||||||
|
|
||||||
|
if cursor_comments['hasNext']:
|
||||||
|
|
||||||
|
# Prepare for the next URL call
|
||||||
|
params_comments = {}
|
||||||
|
for k in base_params.keys():
|
||||||
|
params_comments[k] = base_params[k]
|
||||||
|
params_comments['thread'] = thread_id
|
||||||
|
params_comments['cursor'] = cursor_comments['next']
|
||||||
|
|
||||||
|
# Make the next URL call
|
||||||
|
results_comments = requests.request('GET', list_posts_url, params=params_comments).json()
|
||||||
|
cursor_comments = results_comments['cursor']
|
||||||
|
responses_comments = results_comments['response']
|
||||||
|
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
link = response['link']
|
||||||
|
clean_link = re.sub('data-commons.us','nihdatacommons.us',link)
|
||||||
|
|
||||||
|
# Finished working on thread.
|
||||||
|
|
||||||
|
# We need to make this value a dictionary
|
||||||
|
thread_info = dict(
|
||||||
|
id = response['id'],
|
||||||
|
created_time = dateutil.parser.parse(response['createdAt']),
|
||||||
|
title = response['title'],
|
||||||
|
forum = response['forum'],
|
||||||
|
link = clean_link,
|
||||||
|
content = "\n\n-----".join(thread_comments)
|
||||||
|
)
|
||||||
|
threads[thread_id] = thread_info
|
||||||
|
|
||||||
|
|
||||||
|
if 'hasNext' in cursor.keys() and cursor['hasNext']:
|
||||||
|
|
||||||
|
# Prepare for next URL call
|
||||||
|
params = {}
|
||||||
|
for k in base_params.keys():
|
||||||
|
params[k] = base_params[k]
|
||||||
|
params['cursor'] = cursor['next']
|
||||||
|
|
||||||
|
# Make the next URL call
|
||||||
|
results = requests.request('GET', list_threads_url, params=params).json()
|
||||||
|
cursor = results['cursor']
|
||||||
|
responses = results['response']
|
||||||
|
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
self.threads = threads
|
||||||
|
|
@@ -1,5 +1,7 @@
|
|||||||
import requests, os, re
|
import requests, os, re
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
import dateutil.parser
|
||||||
|
import datetime
|
||||||
|
|
||||||
class GroupsIOException(Exception):
|
class GroupsIOException(Exception):
|
||||||
pass
|
pass
|
||||||
@@ -64,7 +66,7 @@ class GroupsIOArchivesCrawler(object):
|
|||||||
|
|
||||||
## Short circuit
|
## Short circuit
|
||||||
## for debugging purposes
|
## for debugging purposes
|
||||||
#break
|
break
|
||||||
|
|
||||||
return subgroups
|
return subgroups
|
||||||
|
|
||||||
@@ -251,7 +253,7 @@ class GroupsIOArchivesCrawler(object):
|
|||||||
subject = soup.find('title').text
|
subject = soup.find('title').text
|
||||||
|
|
||||||
# Extract information for the schema:
|
# Extract information for the schema:
|
||||||
# - permalink for thread (done)
|
# - permalink for thread (done above)
|
||||||
# - subject/title (done)
|
# - subject/title (done)
|
||||||
# - original sender email/name (done)
|
# - original sender email/name (done)
|
||||||
# - content (done)
|
# - content (done)
|
||||||
@@ -266,11 +268,35 @@ class GroupsIOArchivesCrawler(object):
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
# found an email!
|
# found an email!
|
||||||
# this is a maze, thanks groups.io
|
# this is a maze, not amazing.
|
||||||
|
# thanks groups.io!
|
||||||
td = tr.find('td')
|
td = tr.find('td')
|
||||||
divrow = td.find('div',{'class':'row'}).find('div',{'class':'pull-left'})
|
|
||||||
|
sender_divrow = td.find('div',{'class':'row'})
|
||||||
|
sender_divrow = sender_divrow.find('div',{'class':'pull-left'})
|
||||||
if (i+1)==1:
|
if (i+1)==1:
|
||||||
original_sender = divrow.text.strip()
|
original_sender = sender_divrow.text.strip()
|
||||||
|
|
||||||
|
date_divrow = td.find('div',{'class':'row'})
|
||||||
|
date_divrow = date_divrow.find('div',{'class':'pull-right'})
|
||||||
|
date_divrow = date_divrow.find('font',{'class':'text-muted'})
|
||||||
|
date_divrow = date_divrow.find('script').text
|
||||||
|
try:
|
||||||
|
time_seconds = re.search(' [0-9]{1,} ',date_divrow).group(0)
|
||||||
|
time_seconds = time_seconds.strip()
|
||||||
|
# Thanks groups.io for the weird date formatting
|
||||||
|
time_seconds = time_seconds[:10]
|
||||||
|
mmicro_seconds = time_seconds[10:]
|
||||||
|
if (i+1)==1:
|
||||||
|
created_time = datetime.datetime.utcfromtimestamp(int(time_seconds))
|
||||||
|
modified_time = datetime.datetime.utcfromtimestamp(int(time_seconds))
|
||||||
|
else:
|
||||||
|
modified_time = datetime.datetime.utcfromtimestamp(int(time_seconds))
|
||||||
|
|
||||||
|
except AttributeError:
|
||||||
|
created_time = None
|
||||||
|
modified_time = None
|
||||||
|
|
||||||
for div in td.find_all('div'):
|
for div in td.find_all('div'):
|
||||||
if div.has_attr('id'):
|
if div.has_attr('id'):
|
||||||
|
|
||||||
@@ -299,7 +325,10 @@ class GroupsIOArchivesCrawler(object):
|
|||||||
|
|
||||||
thread = {
|
thread = {
|
||||||
'permalink' : permalink,
|
'permalink' : permalink,
|
||||||
|
'created_time' : created_time,
|
||||||
|
'modified_time' : modified_time,
|
||||||
'subject' : subject,
|
'subject' : subject,
|
||||||
|
'subgroup' : subgroup_name,
|
||||||
'original_sender' : original_sender,
|
'original_sender' : original_sender,
|
||||||
'content' : full_content
|
'content' : full_content
|
||||||
}
|
}
|
||||||
@@ -324,11 +353,13 @@ class GroupsIOArchivesCrawler(object):
|
|||||||
|
|
||||||
results = []
|
results = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
# We don't care about anything except title and ugly link
|
# This is where we extract
|
||||||
|
# a list of thread titles
|
||||||
|
# and corresponding links.
|
||||||
subject = row.find('span',{'class':'subject'})
|
subject = row.find('span',{'class':'subject'})
|
||||||
title = subject.get_text()
|
title = subject.get_text()
|
||||||
link = row.find('a')['href']
|
link = row.find('a')['href']
|
||||||
#print(title)
|
|
||||||
results.append((title,link))
|
results.append((title,link))
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
89
hypothesis_util.py
Normal file
89
hypothesis_util.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
def get_headers():
|
||||||
|
|
||||||
|
if 'HYPOTHESIS_TOKEN' in os.environ:
|
||||||
|
token = os.environ['HYPOTHESIS_TOKEN']
|
||||||
|
else:
|
||||||
|
raise Exception("Need to specify Hypothesis token with HYPOTHESIS_TOKEN env var")
|
||||||
|
|
||||||
|
auth_header = 'Bearer %s'%(token)
|
||||||
|
|
||||||
|
return {'Authorization': auth_header}
|
||||||
|
|
||||||
|
|
||||||
|
def basic_auth():
|
||||||
|
|
||||||
|
url = ' https://hypothes.is/api'
|
||||||
|
|
||||||
|
# Get the authorization header
|
||||||
|
headers = get_headers()
|
||||||
|
|
||||||
|
# Make the request
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
|
||||||
|
if response.status_code==200:
|
||||||
|
|
||||||
|
# Interpret results as JSON
|
||||||
|
dat = response.json()
|
||||||
|
print(json.dumps(dat, indent=4))
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
print("Response status code was not OK: %d"%(response.status_code))
|
||||||
|
|
||||||
|
|
||||||
|
def list_annotations():
|
||||||
|
# kEaohJC9Eeiy_UOozkpkyA
|
||||||
|
|
||||||
|
url = 'https://hypothes.is/api/annotations/kEaohJC9Eeiy_UOozkpkyA'
|
||||||
|
|
||||||
|
# Get the authorization header
|
||||||
|
headers = get_headers()
|
||||||
|
|
||||||
|
# Make the request
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
|
||||||
|
if response.status_code==200:
|
||||||
|
|
||||||
|
# Interpret results as JSON
|
||||||
|
dat = response.json()
|
||||||
|
print(json.dumps(dat, indent=4))
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
print("Response status code was not OK: %d"%(response.status_code))
|
||||||
|
|
||||||
|
|
||||||
|
def search_annotations():
|
||||||
|
url = ' https://hypothes.is/api/search'
|
||||||
|
|
||||||
|
# Get the authorization header
|
||||||
|
headers = get_headers()
|
||||||
|
|
||||||
|
# Set query params
|
||||||
|
params = dict(
|
||||||
|
url = '*pilot.nihdatacommons.us*',
|
||||||
|
limit = 200
|
||||||
|
)
|
||||||
|
#http://pilot.nihdatacommons.us/organize/CopperInternalDeliveryWorkFlow/',
|
||||||
|
|
||||||
|
# Make the request
|
||||||
|
response = requests.get(url, headers=headers, params=params)
|
||||||
|
|
||||||
|
if response.status_code==200:
|
||||||
|
|
||||||
|
# Interpret results as JSON
|
||||||
|
dat = response.json()
|
||||||
|
print(json.dumps(dat, indent=4))
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
print("Response status code was not OK: %d"%(response.status_code))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__=="__main__":
|
||||||
|
search_annotations()
|
||||||
|
|
@@ -22,6 +22,7 @@ var initIssuesTable = false;
|
|||||||
var initGhfilesTable = false;
|
var initGhfilesTable = false;
|
||||||
var initMarkdownTable = false;
|
var initMarkdownTable = false;
|
||||||
var initEmailthreadsTable = false;
|
var initEmailthreadsTable = false;
|
||||||
|
var initDisqusTable = false;
|
||||||
|
|
||||||
$(document).ready(function() {
|
$(document).ready(function() {
|
||||||
var url_string = document.location.toString();
|
var url_string = document.location.toString();
|
||||||
@@ -32,10 +33,6 @@ $(document).ready(function() {
|
|||||||
load_gdoc_table();
|
load_gdoc_table();
|
||||||
var divList = $('div#collapseDrive').addClass('in');
|
var divList = $('div#collapseDrive').addClass('in');
|
||||||
|
|
||||||
} else if (d==='emailthread') {
|
|
||||||
load_emailthreads_table();
|
|
||||||
var divList = $('div#collapseThreads').addClass('in');
|
|
||||||
|
|
||||||
} else if (d==='issue') {
|
} else if (d==='issue') {
|
||||||
load_issue_table();
|
load_issue_table();
|
||||||
var divList = $('div#collapseIssues').addClass('in');
|
var divList = $('div#collapseIssues').addClass('in');
|
||||||
@@ -48,6 +45,14 @@ $(document).ready(function() {
|
|||||||
load_markdown_table();
|
load_markdown_table();
|
||||||
var divList = $('div#collapseMarkdown').addClass('in');
|
var divList = $('div#collapseMarkdown').addClass('in');
|
||||||
|
|
||||||
|
} else if (d==='emailthread') {
|
||||||
|
load_emailthreads_table();
|
||||||
|
var divList = $('div#collapseThreads').addClass('in');
|
||||||
|
|
||||||
|
} else if (d==='disqus') {
|
||||||
|
load_disqusthreads_table();
|
||||||
|
var divList = $('div#collapseDisqus').addClass('in');
|
||||||
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -77,9 +82,9 @@ function load_gdoc_table(){
|
|||||||
if(!initGdocTable) {
|
if(!initGdocTable) {
|
||||||
var divList = $('div#collapseDrive').attr('class');
|
var divList = $('div#collapseDrive').attr('class');
|
||||||
if (divList.indexOf('in') !== -1) {
|
if (divList.indexOf('in') !== -1) {
|
||||||
console.log('Closing Google Drive master list');
|
//console.log('Closing Google Drive master list');
|
||||||
} else {
|
} else {
|
||||||
console.log('Opening Google Drive master list');
|
//console.log('Opening Google Drive master list');
|
||||||
|
|
||||||
$.getJSON("/list/gdoc", function(result){
|
$.getJSON("/list/gdoc", function(result){
|
||||||
|
|
||||||
@@ -125,7 +130,7 @@ function load_gdoc_table(){
|
|||||||
|
|
||||||
initGdocTable = true
|
initGdocTable = true
|
||||||
});
|
});
|
||||||
console.log('Finished loading Google Drive master list');
|
//console.log('Finished loading Google Drive master list');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -137,9 +142,9 @@ function load_issue_table(){
|
|||||||
if(!initIssuesTable) {
|
if(!initIssuesTable) {
|
||||||
var divList = $('div#collapseIssues').attr('class');
|
var divList = $('div#collapseIssues').attr('class');
|
||||||
if (divList.indexOf('in') !== -1) {
|
if (divList.indexOf('in') !== -1) {
|
||||||
console.log('Closing Github issues master list');
|
//console.log('Closing Github issues master list');
|
||||||
} else {
|
} else {
|
||||||
console.log('Opening Github issues master list');
|
//console.log('Opening Github issues master list');
|
||||||
|
|
||||||
$.getJSON("/list/issue", function(result){
|
$.getJSON("/list/issue", function(result){
|
||||||
var r = new Array(), j = -1, size=result.length;
|
var r = new Array(), j = -1, size=result.length;
|
||||||
@@ -183,7 +188,7 @@ function load_issue_table(){
|
|||||||
|
|
||||||
initIssuesTable = true;
|
initIssuesTable = true;
|
||||||
});
|
});
|
||||||
console.log('Finished loading Github issues master list');
|
//console.log('Finished loading Github issues master list');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -195,13 +200,13 @@ function load_ghfile_table(){
|
|||||||
if(!initGhfilesTable) {
|
if(!initGhfilesTable) {
|
||||||
var divList = $('div#collapseFiles').attr('class');
|
var divList = $('div#collapseFiles').attr('class');
|
||||||
if (divList.indexOf('in') !== -1) {
|
if (divList.indexOf('in') !== -1) {
|
||||||
console.log('Closing Github files master list');
|
//console.log('Closing Github files master list');
|
||||||
} else {
|
} else {
|
||||||
console.log('Opening Github files master list');
|
//console.log('Opening Github files master list');
|
||||||
|
|
||||||
$.getJSON("/list/ghfile", function(result){
|
$.getJSON("/list/ghfile", function(result){
|
||||||
console.log("-----------");
|
//console.log("-----------");
|
||||||
console.log(result);
|
//console.log(result);
|
||||||
var r = new Array(), j = -1, size=result.length;
|
var r = new Array(), j = -1, size=result.length;
|
||||||
r[++j] = '<thead>'
|
r[++j] = '<thead>'
|
||||||
r[++j] = '<tr class="header-row">';
|
r[++j] = '<tr class="header-row">';
|
||||||
@@ -237,7 +242,7 @@ function load_ghfile_table(){
|
|||||||
|
|
||||||
initGhfilesTable = true;
|
initGhfilesTable = true;
|
||||||
});
|
});
|
||||||
console.log('Finished loading Github file list');
|
//console.log('Finished loading Github file list');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -249,9 +254,9 @@ function load_markdown_table(){
|
|||||||
if(!initMarkdownTable) {
|
if(!initMarkdownTable) {
|
||||||
var divList = $('div#collapseMarkdown').attr('class');
|
var divList = $('div#collapseMarkdown').attr('class');
|
||||||
if (divList.indexOf('in') !== -1) {
|
if (divList.indexOf('in') !== -1) {
|
||||||
console.log('Closing Github markdown master list');
|
//console.log('Closing Github markdown master list');
|
||||||
} else {
|
} else {
|
||||||
console.log('Opening Github markdown master list');
|
//console.log('Opening Github markdown master list');
|
||||||
|
|
||||||
$.getJSON("/list/markdown", function(result){
|
$.getJSON("/list/markdown", function(result){
|
||||||
var r = new Array(), j = -1, size=result.length;
|
var r = new Array(), j = -1, size=result.length;
|
||||||
@@ -289,7 +294,7 @@ function load_markdown_table(){
|
|||||||
|
|
||||||
initMarkdownTable = true;
|
initMarkdownTable = true;
|
||||||
});
|
});
|
||||||
console.log('Finished loading Markdown list');
|
//console.log('Finished loading Markdown list');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -302,9 +307,9 @@ function load_emailthreads_table(){
|
|||||||
if(!initEmailthreadsTable) {
|
if(!initEmailthreadsTable) {
|
||||||
var divList = $('div#collapseThreads').attr('class');
|
var divList = $('div#collapseThreads').attr('class');
|
||||||
if (divList.indexOf('in') !== -1) {
|
if (divList.indexOf('in') !== -1) {
|
||||||
console.log('Closing Groups.io email threads master list');
|
//console.log('Closing Groups.io email threads master list');
|
||||||
} else {
|
} else {
|
||||||
console.log('Opening Groups.io email threads master list');
|
//console.log('Opening Groups.io email threads master list');
|
||||||
|
|
||||||
$.getJSON("/list/emailthread", function(result){
|
$.getJSON("/list/emailthread", function(result){
|
||||||
var r = new Array(), j = -1, size=result.length;
|
var r = new Array(), j = -1, size=result.length;
|
||||||
@@ -340,7 +345,57 @@ function load_emailthreads_table(){
|
|||||||
|
|
||||||
initEmailthreadsTable = true;
|
initEmailthreadsTable = true;
|
||||||
});
|
});
|
||||||
console.log('Finished loading Groups.io email threads list');
|
//console.log('Finished loading Groups.io email threads list');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------
|
||||||
|
// Disqus Comment Threads
|
||||||
|
|
||||||
|
function load_disqusthreads_table(){
|
||||||
|
if(!initEmailthreadsTable) {
|
||||||
|
var divList = $('div#collapseDisqus').attr('class');
|
||||||
|
if (divList.indexOf('in') !== -1) {
|
||||||
|
//console.log('Closing Disqus comment threads master list');
|
||||||
|
} else {
|
||||||
|
//console.log('Opening Disqus comment threads master list');
|
||||||
|
|
||||||
|
$.getJSON("/list/disqus", function(result){
|
||||||
|
var r = new Array(), j = -1, size=result.length;
|
||||||
|
r[++j] = '<thead>'
|
||||||
|
r[++j] = '<tr class="header-row">';
|
||||||
|
r[++j] = '<th width="70%">Page Title</th>';
|
||||||
|
r[++j] = '<th width="30%">Created</th>';
|
||||||
|
r[++j] = '</tr>';
|
||||||
|
r[++j] = '</thead>'
|
||||||
|
r[++j] = '<tbody>'
|
||||||
|
for (var i=0; i<size; i++){
|
||||||
|
r[++j] ='<tr><td>';
|
||||||
|
r[++j] = '<a href="' + result[i]['url'] + '" target="_blank">'
|
||||||
|
r[++j] = result[i]['title'];
|
||||||
|
r[++j] = '</a>'
|
||||||
|
r[++j] = '</td><td>';
|
||||||
|
r[++j] = result[i]['created_time'];
|
||||||
|
r[++j] = '</td></tr>';
|
||||||
|
}
|
||||||
|
r[++j] = '</tbody>'
|
||||||
|
|
||||||
|
// Construct names of id tags
|
||||||
|
var doctype = 'disqus';
|
||||||
|
var idlabel = '#' + doctype + '-master-list';
|
||||||
|
var filtlabel = idlabel + '_filter';
|
||||||
|
|
||||||
|
// Initialize the DataTable
|
||||||
|
$(idlabel).html(r.join(''));
|
||||||
|
$(idlabel).DataTable({
|
||||||
|
responsive: true,
|
||||||
|
lengthMenu: [50,100,250,500]
|
||||||
|
});
|
||||||
|
|
||||||
|
initDisqusTable = true;
|
||||||
|
});
|
||||||
|
console.log('Finished loading Disqus comment threads list');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -31,7 +31,7 @@ $(document).ready(function() {
|
|||||||
aTargets : [2]
|
aTargets : [2]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
lengthMenu: [50,100,250,500]
|
lengthMenu: [10,20,50,100]
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log('Finished loading search results list');
|
console.log('Finished loading search results list');
|
||||||
|
@@ -54,6 +54,8 @@
|
|||||||
</p>
|
</p>
|
||||||
<p><a href="{{ url_for('update_index',run_which='emailthreads') }}" class="btn btn-large btn-danger btn-reindex-type">Update Groups.io Email Threads Index</a>
|
<p><a href="{{ url_for('update_index',run_which='emailthreads') }}" class="btn btn-large btn-danger btn-reindex-type">Update Groups.io Email Threads Index</a>
|
||||||
</p>
|
</p>
|
||||||
|
<p><a href="{{ url_for('update_index',run_which='disqus') }}" class="btn btn-large btn-danger btn-reindex-type">Update Disqus Comment Threads Index</a>
|
||||||
|
</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@@ -11,6 +11,7 @@
|
|||||||
{#
|
{#
|
||||||
# google drive files panel
|
# google drive files panel
|
||||||
#}
|
#}
|
||||||
|
<a name="gdoc"></a>
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="panel">
|
<div class="panel">
|
||||||
<div class="panel-group" id="accordionDrive" role="tablist" aria-multiselectable="true">
|
<div class="panel-group" id="accordionDrive" role="tablist" aria-multiselectable="true">
|
||||||
@@ -48,6 +49,7 @@
|
|||||||
{#
|
{#
|
||||||
# github issue panel
|
# github issue panel
|
||||||
#}
|
#}
|
||||||
|
<a name="issue"></a>
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="panel">
|
<div class="panel">
|
||||||
<div class="panel-group" id="accordionIssues" role="tablist" aria-multiselectable="true">
|
<div class="panel-group" id="accordionIssues" role="tablist" aria-multiselectable="true">
|
||||||
@@ -87,6 +89,7 @@
|
|||||||
{#
|
{#
|
||||||
# github file panel
|
# github file panel
|
||||||
#}
|
#}
|
||||||
|
<a name="ghfile"></a>
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="panel">
|
<div class="panel">
|
||||||
<div class="panel-group" id="accordionFiles" role="tablist" aria-multiselectable="true">
|
<div class="panel-group" id="accordionFiles" role="tablist" aria-multiselectable="true">
|
||||||
@@ -124,6 +127,7 @@
|
|||||||
{#
|
{#
|
||||||
# gh markdown file panel
|
# gh markdown file panel
|
||||||
#}
|
#}
|
||||||
|
<a name="markdown"></a>
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="panel">
|
<div class="panel">
|
||||||
<div class="panel-group" id="accordionMarkdown" role="tablist" aria-multiselectable="true">
|
<div class="panel-group" id="accordionMarkdown" role="tablist" aria-multiselectable="true">
|
||||||
@@ -160,8 +164,9 @@
|
|||||||
|
|
||||||
|
|
||||||
{#
|
{#
|
||||||
# groups.io
|
# groups.io email threads
|
||||||
#}
|
#}
|
||||||
|
<a name="emailthread"></a>
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="panel">
|
<div class="panel">
|
||||||
<div class="panel-group" id="accordionThreads" role="tablist" aria-multiselectable="true">
|
<div class="panel-group" id="accordionThreads" role="tablist" aria-multiselectable="true">
|
||||||
@@ -195,6 +200,42 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{#
|
||||||
|
# disqus comment threads
|
||||||
|
#}
|
||||||
|
<a name="disqus"></a>
|
||||||
|
<div class="row">
|
||||||
|
<div class="panel">
|
||||||
|
<div class="panel-group" id="accordionDisqus" role="tablist" aria-multiselectable="true">
|
||||||
|
<div class="panel panel-default">
|
||||||
|
<div class="panel-heading" role="tab" id="disqus">
|
||||||
|
|
||||||
|
<h2 class="masterlist-header">
|
||||||
|
<a class="collapsed"
|
||||||
|
role="button"
|
||||||
|
onClick="load_disqusthreads_table()"
|
||||||
|
data-toggle="collapse"
|
||||||
|
data-parent="#accordionDisqus"
|
||||||
|
href="#collapseDisqus"
|
||||||
|
aria-expanded="true"
|
||||||
|
aria-controls="collapseDisqus">
|
||||||
|
Disqus Comment Threads <small>indexed by centillion</small>
|
||||||
|
</a>
|
||||||
|
</h2>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div id="collapseDisqus" class="panel-collapse collapse" role="tabpanel"
|
||||||
|
aria-labelledby="disqus">
|
||||||
|
<div class="panel-body">
|
||||||
|
<table class="table table-striped" id="disqus-master-list">
|
||||||
|
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
@@ -68,28 +68,33 @@
|
|||||||
<b>Indexing:</b>
|
<b>Indexing:</b>
|
||||||
|
|
||||||
<span class="badge">{{totals["gdoc"]}}</span>
|
<span class="badge">{{totals["gdoc"]}}</span>
|
||||||
<a href="/master_list?doctype=gdoc">
|
<a href="/master_list?doctype=gdoc#gdoc">
|
||||||
Google Drive files
|
Google Drive files
|
||||||
</a>,
|
</a>,
|
||||||
|
|
||||||
<span class="badge">{{totals["issue"]}}</span>
|
<span class="badge">{{totals["issue"]}}</span>
|
||||||
<a href="/master_list?doctype=issue">
|
<a href="/master_list?doctype=issue#issue">
|
||||||
Github issues
|
Github issues
|
||||||
</a>,
|
</a>,
|
||||||
|
|
||||||
<span class="badge">{{totals["ghfile"]}}</span>
|
<span class="badge">{{totals["ghfile"]}}</span>
|
||||||
<a href="/master_list?doctype=ghfile">
|
<a href="/master_list?doctype=ghfile#ghfile">
|
||||||
Github files
|
Github files
|
||||||
</a>,
|
</a>,
|
||||||
|
|
||||||
<span class="badge">{{totals["markdown"]}}</span>
|
<span class="badge">{{totals["markdown"]}}</span>
|
||||||
<a href="/master_list?doctype=markdown">
|
<a href="/master_list?doctype=markdown#markdown">
|
||||||
Github Markdown files
|
Github Markdown files
|
||||||
</a>,
|
</a>,
|
||||||
|
|
||||||
<span class="badge">{{totals["emailthread"]}}</span>
|
<span class="badge">{{totals["emailthread"]}}</span>
|
||||||
<a href="/master_list?doctype=emailthread">
|
<a href="/master_list?doctype=emailthread#emailthread">
|
||||||
Groups.io email threads
|
Groups.io email threads
|
||||||
|
</a>,
|
||||||
|
|
||||||
|
<span class="badge">{{totals["disqus"]}}</span>
|
||||||
|
<a href="/master_list?doctype=disqus#disqus">
|
||||||
|
Disqus comment threads
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
Reference in New Issue
Block a user