update test plan with TOC + autoformatting changes from pandoc

fix document title
add table of contents and autoformatting.
2018-08-24 10:26:33 -07:00 · 2018-08-24 10:23:02 -07:00 · 2018-08-24 10:21:48 -07:00 · 2018-08-23 12:44:54 -07:00 · 2018-08-23 12:36:29 -07:00 · 2018-08-23 11:18:45 -07:00
4 changed files with 554 additions and 275 deletions
--- a/centillion_search.py
+++ b/centillion_search.py
@@ -21,8 +21,6 @@ import dateutil.parser

 from whoosh.qparser import MultifieldParser, QueryParser
 from whoosh.analysis import StemmingAnalyzer
-from whoosh.qparser.dateparse import DateParserPlugin
-from whoosh import fields, index


 """
@@ -182,38 +180,30 @@ class Search:
        # is defined.

        schema = Schema(
-                id = fields.ID(stored=True, unique=True),
-                kind = fields.ID(stored=True),
+                id = ID(stored=True, unique=True),
+                kind = ID(stored=True),

-                created_time = fields.DATETIME(stored=True),
-                modified_time = fields.DATETIME(stored=True),
-                indexed_time = fields.DATETIME(stored=True),
+                created_time = ID(stored=True),
+                modified_time = ID(stored=True),
+                indexed_time = ID(stored=True),
                
-                title = fields.TEXT(stored=True, field_boost=100.0),
+                title = TEXT(stored=True, field_boost=100.0),
+                url = ID(stored=True, unique=True),
                
-                url = fields.ID(stored=True),
+                mimetype=ID(stored=True),
+                owner_email=ID(stored=True),
+                owner_name=TEXT(stored=True),
                
-                mimetype = fields.TEXT(stored=True),
+                repo_name=TEXT(stored=True),
+                repo_url=ID(stored=True),

-                owner_email = fields.ID(stored=True),
-                owner_name = fields.TEXT(stored=True),
-
-                # mainly for email threads, groups.io, hypothesis
-                group = fields.ID(stored=True),
-
-                repo_name = fields.TEXT(stored=True),
-                repo_url = fields.ID(stored=True),
-                github_user = fields.TEXT(stored=True),
-
-                tags = fields.KEYWORD(commas=True,
-                                      stored=True,
-                                      lowercase=True),
+                github_user=TEXT(stored=True),

                # comments only
-                issue_title = fields.TEXT(stored=True, field_boost=100.0),
-                issue_url = fields.ID(stored=True),
+                issue_title=TEXT(stored=True, field_boost=100.0),
+                issue_url=ID(stored=True),
                
-                content = fields.TEXT(stored=True, analyzer=stemming_analyzer)
+                content=TEXT(stored=True, analyzer=stemming_analyzer)
        )


@@ -253,22 +243,17 @@ class Search:
            writer.delete_by_term('id',item['id'])

            # Index a plain google drive file
-            created_time = dateutil.parser.parse(item['createdTime'])
-            modified_time = dateutil.parser.parse(item['modifiedTime'])
-            indexed_time = datetime.now().replace(microsecond=0)
-            try:
            writer.add_document(
                    id = item['id'],
                    kind = 'gdoc',
-                        created_time = created_time,
-                        modified_time = modified_time,
-                        indexed_time = indexed_time,
+                    created_time = item['createdTime'],
+                    modified_time = item['modifiedTime'],
+                    indexed_time = datetime.now().replace(microsecond=0).isoformat(),
                    title = item['name'],
                    url = item['webViewLink'],
                    mimetype = mimetype,
                    owner_email = item['owners'][0]['emailAddress'],
                    owner_name = item['owners'][0]['displayName'],
-                        group='',
                    repo_name='',
                    repo_url='',
                    github_user='',
@@ -276,9 +261,6 @@ class Search:
                    issue_url='',
                    content = content
            )
-            except ValueError as e:
-                print(repr(e))
-                print(" > XXXXXX Failed to index Google Drive file \"%s\""%(item['name']))


        else:
@@ -332,7 +314,7 @@ class Search:
                )
                assert output == ""
            except RuntimeError:
-                print(" > XXXXXX Failed to index Google Drive document \"%s\""%(item['name']))
+                print(" > XXXXXX Failed to index document \"%s\""%(item['name']))


            # If export was successful, read contents of markdown
@@ -360,22 +342,17 @@ class Search:
            else:
                print(" > Creating a new record")

-            try:
-                created_time = dateutil.parser.parse(item['createdTime'])
-                modified_time = dateutil.parser.parse(item['modifiedTime'])
-                indexed_time = datetime.now()
            writer.add_document(
                    id = item['id'],
                    kind = 'gdoc',
-                        created_time = created_time,
-                        modified_time = modified_time,
-                        indexed_time = indexed_time,
+                    created_time = item['createdTime'],
+                    modified_time = item['modifiedTime'],
+                    indexed_time = datetime.now().replace(microsecond=0).isoformat(),
                    title = item['name'],
                    url = item['webViewLink'],
                    mimetype = mimetype,
                    owner_email = item['owners'][0]['emailAddress'],
                    owner_name = item['owners'][0]['displayName'],
-                        group='',
                    repo_name='',
                    repo_url='',
                    github_user='',
@@ -383,10 +360,6 @@ class Search:
                    issue_url='',
                    content = content
            )
-            except ValueError as e:
-                print(repr(e))
-                print(" > XXXXXX Failed to index Google Drive file \"%s\""%(item['name']))
-



@@ -420,14 +393,13 @@ class Search:
                issue_comment_content += comment.body.rstrip()
                issue_comment_content += "\n"

-        # Now create the actual search index record.
+        # Now create the actual search index record
+        created_time = clean_timestamp(issue.created_at)
+        modified_time = clean_timestamp(issue.updated_at)
+        indexed_time = clean_timestamp(datetime.now())
+
        # Add one document per issue thread,
        # containing entire text of thread.
-
-        created_time = issue.created_at
-        modified_time = issue.updated_at
-        indexed_time = datetime.now()
-        try:
        writer.add_document(
                id = issue.html_url,
                kind = 'issue',
@@ -439,7 +411,6 @@ class Search:
                mimetype='',
                owner_email='',
                owner_name='',
-                    group='',
                repo_name = repo_name,
                repo_url = repo_url,
                github_user = issue.user.login,
@@ -447,9 +418,6 @@ class Search:
                issue_url = issue.html_url,
                content = issue_comment_content
        )
-        except ValueError as e:
-            print(repr(e))
-            print(" > XXXXXX Failed to index Github issue \"%s\""%(issue.title))



@@ -479,8 +447,7 @@ class Search:
            print(" > XXXXXXXX Failed to find file info.")
            return

-
-        indexed_time = datetime.now()
+        indexed_time = clean_timestamp(datetime.now())

        if fext in MARKDOWN_EXTS:
            print("Indexing markdown doc %s from repo %s"%(fname,repo_name))
@@ -509,19 +476,17 @@ class Search:
            usable_url = "https://github.com/%s/blob/master/%s"%(repo_name, fpath)

            # Now create the actual search index record
-            try:
            writer.add_document(
                    id = fsha,
                    kind = 'markdown',
-                        created_time = None,
-                        modified_time = None,
+                    created_time = '',
+                    modified_time = '',
                    indexed_time = indexed_time,
                    title = fname,
                    url = usable_url,
                    mimetype='',
                    owner_email='',
                    owner_name='',
-                        group='',
                    repo_name = repo_name,
                    repo_url = repo_url,
                    github_user = '',
@@ -529,11 +494,6 @@ class Search:
                    issue_url = '',
                    content = content
            )
-            except ValueError as e:
-                print(repr(e))
-                print(" > XXXXXX Failed to index Github markdown file \"%s\""%(fname))
-
-

        else:
            print("Indexing github file %s from repo %s"%(fname,repo_name))
@@ -541,19 +501,17 @@ class Search:
            key = fname+"_"+fsha

            # Now create the actual search index record
-            try:
            writer.add_document(
                    id = key,
                    kind = 'ghfile',
-                        created_time = None,
-                        modified_time = None,
+                    created_time = '',
+                    modified_time = '',
                    indexed_time = indexed_time,
                    title = fname,
                    url = repo_url,
                    mimetype='',
                    owner_email='',
                    owner_name='',
-                        group='',
                    repo_name = repo_name,
                    repo_url = repo_url,
                    github_user = '',
@@ -561,9 +519,6 @@ class Search:
                    issue_url = '',
                    content = ''
            )
-            except ValueError as e:
-                print(repr(e))
-                print(" > XXXXXX Failed to index Github file \"%s\""%(fname))



@@ -577,32 +532,20 @@ class Search:
        Use a Github file API record to add a filename
        to the search index.
        """
-        if 'created_time' in d.keys() and d['created_time'] is not None:
-            created_time = d['created_time']
-        else:
-            created_time = None
-
-        if 'modified_time' in d.keys() and d['modified_time'] is not None:
-            modified_time = d['modified_time']
-        else:
-            modified_time = None
-
-        indexed_time = datetime.now()
+        indexed_time = clean_timestamp(datetime.now())

        # Now create the actual search index record
-        try:
        writer.add_document(
                id = d['permalink'],
                kind = 'emailthread',
-                    created_time = created_time,
-                    modified_time = modified_time,
+                created_time = '',
+                modified_time = '',
                indexed_time = indexed_time,
                title = d['subject'],
                url = d['permalink'],
                mimetype='',
                owner_email='',
                owner_name=d['original_sender'],
-                    group=d['subgroup'],
                repo_name = '',
                repo_url = '',
                github_user = '',
@@ -610,9 +553,7 @@ class Search:
                issue_url = '',
                content = d['content']
        )
-        except ValueError as e:
-            print(repr(e))
-            print(" > XXXXXX Failed to index Groups.io thread \"%s\""%(d['subject']))
+



@@ -690,10 +631,10 @@ class Search:
                full_items[f['id']] = f
            
            ## Shorter:
+            #break
+            # Longer:
+            if nextPageToken is None:
                break
-            ## Longer:
-            #if nextPageToken is None:
-            #    break


        writer = self.ix.writer()
@@ -701,7 +642,7 @@ class Search:
        temp_dir = tempfile.mkdtemp(dir=os.getcwd())
        print("Temporary directory: %s"%(temp_dir))

-        try:
+

        # Drop any id in indexed_ids
        # not in remote_ids
@@ -729,13 +670,6 @@ class Search:
            self.add_drive_file(writer, item, temp_dir, config, update=False)
            count += 1

-        except Exception as e:
-            print("ERROR: While adding Google Drive files to search index")
-            print("-"*40)
-            print(repr(e))
-            print("-"*40)
-            print("Continuing...")
-            pass

        print("Cleaning temporary directory: %s"%(temp_dir))
        subprocess.call(['rm','-fr',temp_dir])
@@ -1140,7 +1074,7 @@ class Search:
        elif doctype=='issue':
            item_keys = ['title','repo_name','repo_url','url','created_time','modified_time']
        elif doctype=='emailthread':
-            item_keys = ['title','owner_name','url','created_time','modified_time']
+            item_keys = ['title','owner_name','url']
        elif doctype=='ghfile':
            item_keys = ['title','repo_name','repo_url','url']
        elif doctype=='markdown':
@@ -1157,6 +1091,10 @@ class Search:
            for r in results:
                d = {}
                for k in item_keys:
+                    if k=='created_time' or k=='modified_time':
+                        #d[k] = r[k]
+                        d[k] = dateutil.parser.parse(r[k]).strftime("%Y-%m-%d")
+                    else:
                        d[k] = r[k]
                json_results.append(d)

@@ -1170,9 +1108,7 @@ class Search:
            query_string = " ".join(query_list)
            query = None
            if ":" in query_string:
-                query = QueryParser("content", self.schema)
-                query.add_plugin(DateParserPlugin(free=True))
-                query = query.parse(query_string)
+                query = QueryParser("content", self.schema).parse(query_string)
            elif len(fields) == 1 and fields[0] == "filename":
                pass
            elif len(fields) == 2:
@@ -1180,12 +1116,9 @@ class Search:
            else:
                # If the user does not specify a field,
                # these are the fields that are actually searched
-                fields = ['title', 'content','owner_name','owner_email','url','created_date','modified_date']
+                fields = ['title', 'content','owner_name','owner_email','url']
            if not query:
-                query = MultifieldParser(fields, schema=self.ix.schema)
-                query.add_plugin(DateParserPlugin(free=True))
-                query = query.parse(query_string)
-                #query = MultifieldParser(fields, schema=self.ix.schema).parse(query_string) 
+                query = MultifieldParser(fields, schema=self.ix.schema).parse(query_string)
            parsed_query = "%s" % query
            print("query: %s" % parsed_query)
            results = searcher.search(query, terms=False, scored=True, groupedby="kind")
--- a/groupsio_util.py
+++ b/groupsio_util.py
@@ -1,7 +1,5 @@
 import requests, os, re
 from bs4 import BeautifulSoup
-import dateutil.parser
-import datetime

 class GroupsIOException(Exception):
    pass
@@ -66,7 +64,7 @@ class GroupsIOArchivesCrawler(object):

            ## Short circuit
            ## for debugging purposes
-            break
+            #break

        return subgroups

@@ -253,7 +251,7 @@ class GroupsIOArchivesCrawler(object):
            subject = soup.find('title').text

            # Extract information for the schema:
-            # - permalink for thread (done above)
+            # - permalink for thread (done)
            # - subject/title (done)
            # - original sender email/name (done)
            # - content (done)
@@ -268,35 +266,11 @@ class GroupsIOArchivesCrawler(object):
                    pass
                else:
                    # found an email!
-                    # this is a maze, not amazing.
-                    # thanks groups.io!
+                    # this is a maze, thanks groups.io
                    td = tr.find('td')
-
-                    sender_divrow = td.find('div',{'class':'row'})
-                    sender_divrow = sender_divrow.find('div',{'class':'pull-left'})
+                    divrow = td.find('div',{'class':'row'}).find('div',{'class':'pull-left'})
                    if (i+1)==1:
-                        original_sender = sender_divrow.text.strip()
-
-                    date_divrow = td.find('div',{'class':'row'})
-                    date_divrow = date_divrow.find('div',{'class':'pull-right'})
-                    date_divrow = date_divrow.find('font',{'class':'text-muted'})
-                    date_divrow = date_divrow.find('script').text
-                    try:
-                        time_seconds = re.search(' [0-9]{1,} ',date_divrow).group(0)
-                        time_seconds = time_seconds.strip()
-                        # Thanks groups.io for the weird date formatting
-                        time_seconds = time_seconds[:10]
-                        mmicro_seconds = time_seconds[10:]
-                        if (i+1)==1:
-                            created_time  = datetime.datetime.utcfromtimestamp(int(time_seconds))
-                            modified_time = datetime.datetime.utcfromtimestamp(int(time_seconds))
-                        else:
-                            modified_time = datetime.datetime.utcfromtimestamp(int(time_seconds))
-
-                    except AttributeError:
-                        created_time = None
-                        modified_time = None
-
+                        original_sender = divrow.text.strip()
                    for div in td.find_all('div'):
                        if div.has_attr('id'):

@@ -325,10 +299,7 @@ class GroupsIOArchivesCrawler(object):

            thread = {
                    'permalink' : permalink,
-                    'created_time' : created_time,
-                    'modified_time' : modified_time,
                    'subject' : subject,
-                    'subgroup' : subgroup_name,
                    'original_sender' : original_sender,
                    'content' : full_content
            }
@@ -353,13 +324,11 @@ class GroupsIOArchivesCrawler(object):

        results = []
        for row in rows:
-            # This is where we extract
-            # a list of thread titles 
-            # and corresponding links.
+            # We don't care about anything except title and ugly link
            subject = row.find('span',{'class':'subject'})
            title = subject.get_text()
            link = row.find('a')['href']
-
+            #print(title)
            results.append((title,link))

        return results
--- a/quality/Readme.md
+++ b/quality/Readme.md
@@ -0,0 +1,181 @@
+# Centillion Quality Engineering Plan
+
+Table of Contents
+-------
+
+*   [Centillion Quality Engineering Plan](#centillion-quality-engineering-plan)
+    *   [Summary](#summary)
+    *   [Tracking Bugs and Issues](#tracking-bugs-and-issues)
+    *   [Branches, Versioning, and Git Workflow](#branches-versioning-and-git-workflow)
+    *   [Communication and Mailing Lists](#communication-and-mailing-lists)
+    *   [Checklists](#checklists)
+    *   [Documentation](#documentation)
+    *   [Configuration Management Tools](#configuration-management-tools)
+    *   [Tests](#tests)
+    *   [Code Reviews](#code-reviews)
+    *   [Formal Release Process](#formal-release-process)
+    *   [Continual Process Improvement](#continual-process-improvement)
+
+Summary
+-------
+
+This document contains a quality engineering plan for centillion, the
+Data Commons search engine.
+
+Tracking Bugs and Issues
+------------------------
+
+We utilize the [issues
+section](https://github.com/dcppc/centillion/issues) of the centillion
+repository to keep track of bugs and feature requests.
+
+Branches, Versioning, and Git Workflow
+--------------------------------------
+
+All code is kept under version control in the
+[dcppc/centillion](https://github.com/dcppc/centillion) Github
+repository.
+
+**Primary Git Branches:**
+
+We utillize a git branch pattern that has two primary branches: a
+development branch and a stable branch.
+
+-   The primary **development branch** is `dcppc` and is actively
+    developed and deployed to <https://betasearch.nihdatacommons.us>.
+
+-   The primary **stable branch** is `releases/v1` and is stable and
+    deployed to <https://search.nihdatacommons.us>.
+
+All tagged versions of Centillion exist on the stable branch. Only
+tagged versions of centillion are run on
+<https://search.nihdatacommons.us>.
+
+**Other Branches:**
+
+Features are developed by creating a new branch from `dcppc`, working on
+the feature, and opening a pull request. When the pull request is
+approved, it can be merged into the `dcppc` branch.
+
+When features have accumulated and a new version is ready, a new
+pre-release branch will be made to prepare for a new release. When the
+pre-release branch is ready, it is merged into the stable branch in a
+single merge commit and a new version of centillion is tagged. The new
+version is deployed on <https://search.nihdatacommons.us>.
+
+Commits to fix bugs (hotfixes) may need to be applied to both the stable
+and development branches. In this case, a hotfix branch should be
+created from the head commit of the stable branch, and the appropriate
+changes should be made on the branch. A pull request should be opened to
+merge the hotfix into the release branch. A second pull request should
+be opened to merge the hotfix into the development branch. Once the
+hotfix is merged into the stable branch, a new version should be tagged.
+
+Communication and Mailing Lists
+-------------------------------
+
+-   No mailing list currently exists for centillion.
+
+-   Github issues are the primary form of communication about
+    development of centillion. This is the best method for communicating
+    bug reports or detailed information.
+
+-   The Send Feedback button on the centillion page is the primary way
+    of getting quick feedback from users about the search engine.
+
+-   The [\#centillion](https://nih-dcppc.slack.com/messages/CCD64QD6G)
+    Slack channel in the DCPPC slack workspace is the best place for
+    conversations about centillion (providing feedback, answering quick
+    questions, etc.)
+
+Checklists
+----------
+
+We plan to utilize the Wiki feature of the Github repository to develop
+checlists:
+
+-   Checklist for releases
+-   Checklist for deployment of https://search.nihdatacommons.us nginx
+    etc.
+
+Documentation
+-------------
+
+The documentation is a pile of markdown documents, turned into a static
+site using mkdocs.
+
+Configuration Management Tools
+------------------------------
+
+We do not currently utilize any configuration management software,
+because centillion is not packaged as an importable Python module.
+
+Packaging centillion is a future goal that is closely related to the
+need to improve and modularize the internal search schema/document type
+abstraction. These improvements would allow the types of collections
+being indexed to be separated from "core centillion", and core
+centillion would be packaged.
+
+Tests
+-----
+
+See (ref) for a full test plan with more detail.
+
+Summary of test plan:
+
+-   Implement tests for the four major pages/components
+    -   Login/authentication
+    -   Search
+    -   Master List
+    -   Control Panel
+-   Test authentication with two bot accounts (yammasnake and florence
+    python)
+
+-   Separate frontend and backend tests
+
+-   Add a test flag in the flask config file to change the backend
+    behavior of the server
+
+Code Reviews
+------------
+
+CI tests will be implemented for all pull requests.
+
+Pull requests to the **stable branch** have the following checks in
+place:
+
+-   PRs to the stable branch require at least 1 PR review
+-   PRs to the stable branch must pass CI tests
+
+Pull requests to the **development branch** have the following checks in
+place:
+
+-   PRs to the development branch must pass CI tests
+
+Formal Release Process
+----------------------
+
+In order to ensure a stable, consistent product, we utilize the
+branching pattern described above to implement new features in the
+development branch and test them out on
+<https://betasearch.nihdatacommons.us>.
+
+Once features and bug fixes have been tested and reviewed internally,
+they are ready to be deployed. A new pre-release branch is created from
+the development branch. The pre-release branch has a feature freeze in
+place. Changes are made to the pre-release branch to prepare it for the
+next major version release.
+
+When the pre-release branch is finished, it is merged into the stable
+branch. The head commit of the stable version is tagged with the lastest
+release number.
+
+Finally, the new version is deployed on
+<https://search.nihdatacommons.us>.
+
+Continual Process Improvement
+-----------------------------
+
+We will utilize the centillion wiki on Github to keep track of repeated
+processes and opportunities for improvement. Feedback and ideas for
+process improvement can also be submitted via Github issues.
--- a/tests/Readme.md
+++ b/tests/Readme.md
@@ -0,0 +1,196 @@
+Centillion Tests
+================
+
+Table of Contents
+------------------
+
+*   [Centillion Tests](#centillion-tests)
+    *   [Test Plan](#test-plan)
+        *   [Local Tests](#local-tests)
+        *   [Short Tests](#short-tests)
+        *   [Long Tests](#long-tests)
+    *   [Credentials](#credentials)
+    *   [Detailed Description of Tests](#detailed-description-of-tests)
+        *   [Authentication Layer Tests](#authentication-layer-tests)
+        *   [Search Function Tests](#search-function-tests)
+        *   [Master List Endpoint Tests](#master-list-endpoint-tests)
+        *   [Control Panel Endpoint Tests](#control-panel-endpoint-tests)
+        *   [Continuous Integration Plan](#continuous-integration-plan)
+    *   [Procedure/Checklist](#procedurechecklist)
+
+
+Test Plan
+---------
+
+Related: <https://github.com/dcppc/centillion/issues/82>
+
+The test suite for centillion needs to check each of the major
+components of centillion, as well as check the authentication mechanism
+using multiple login credentials.
+
+We implement the following checks:
+
+1.  Check authentication mechanism(s) (yamasnake and florence python)
+
+2.  Check search function
+
+3.  Check master list endpoint
+
+4.  Check control panel endpoint
+
+5.  Check update search index endpoints
+
+The tests are written such that the back end and front end are tested
+separately.
+
+We need also need different tiers of tests, so we don't max out API
+calls by making lots of commits to multiple PRs.
+
+We have three tiers of tests: \* Local tests - quick tests for CI, no
+API calls \* Short tests - tests using dummy API accounts \* Long tests
+- tests using DCPPC API accounts
+
+### Local Tests
+
+Local tests can be run locally without any interaction with APIs. These
+will still utilize centillion's search schema, but will load the search
+index with fake documents rather than fetching them from an API.
+
+Uncle Archie, which runs CI tests, runs local tests only (unless you
+request it to run short test or long test.)
+
+### Short Tests
+
+Short tests utilize credentials for bot accounts that have intentionally
+been set up to have a "known" corpus of test documents. These would
+provide unit-style tests for centillion - are the mechanics of indexing
+a particular type of document from a particular API working?
+
+### Long Tests
+
+Long tests are indexing the real deal, utilizing the credentials used in
+the final production centillion. This test takes longer but is more
+likely to catch corner cases specific to the DCPPC documents.
+
+Credentials
+-----------
+
+Running tests on centillion requires multiple sets of credentials. Let's
+lay out what is needed:
+
+-   The Flask app requires a token/secret token API key pair to allow
+    users to authenticate through Github and confirm they are members of
+    the DCPPC organization. This OAuth application is owned by Charles
+    Reid (@charlesreid1).
+
+-   The search index needs a Github access token so that it can
+    interface with the Github API to index files and issues. This access
+    token is specified (along with other secrets) in the Flask
+    configuration file. The access key comes from Florence Python
+    (@fp9695253).
+
+-   The search index also requires a Google Drive API access token. This
+    must be an access token for a user who has authenticated with the
+    Centillion Google Drive OAuth application. This access token comes
+    from <mailroom@nihdatacommons.com>.
+
+-   The search index requires API credentials for any other APIs
+    associated with other document collections (Groups.io, Hypothesis,
+    Disqus).
+
+-   The backend test requires the credentials provided to Flask.
+
+-   The frontend test (Selenium) needs two Github username/passwords:
+    one for Florence Python (@fp9695253) and one for Yamma Snake
+    (@yammasnake). These are required to simulate the user
+    authenticating with Github through the browser.
+    -   The frontend test credentials are a special case.
+    -   The frontend tests expect credentials to come from environment
+        variables.
+    -   These environment variables get passed in at test time.
+    -   Tests are all run on [Uncle
+        Archie](https://github.com/dcppc/uncle-archie).
+    -   Uncle Archie already has to protect a confidential config file
+        containing Github credentials, so add additional credentials for
+        frontend tests there.
+    -   Logical separation: these credentials are not needed to
+        *operate* centillion, these credentials are needed to *test*
+        centillion
+    -   Uncle Archie already requires github credentials, already
+        protects sensitive info.
+    -   Google Drive requiring its own credentials file on disk is a
+        pain.
+
+In summary: tests use the `config_flask.py` and `config_centillion.py`
+files to provide it with the API keys it needs and to instruct it on
+what to index. The credentials and config files will control what the
+search index will actually index. The Uncle Archie CI tester config file
+contains the credentials needed to run frontend tests (check the
+login/authentication layer).
+
+Detailed Description of Tests
+-----------------------------
+
+### Authentication Layer Tests
+
+Frontend tests run as Florence Python:
+
+-   Can we log in via github and reach centillion
+-   Can we reach the control panel
+
+Frontend tests run as Yamma Snake (DCPPC member):
+
+-   Can we log in via github and reach centillion
+-   Can we reach the control panel
+
+### Search Function Tests
+
+Frontend tests:
+
+-   Can we enter something into search box and submit
+-   Can we sort the results
+-   Do the results look okay
+
+Backend tests:
+
+-   Load the search index and run a query using whoosh API
+
+### Master List Endpoint Tests
+
+Frontend tests:
+
+-   Can we get to the master list page
+-   Can we sort the results
+-   Do the results look okay
+
+Backend tests:
+
+-   Check the output of the `/list` API endpoint
+
+### Control Panel Endpoint Tests
+
+Frontend tests:
+
+-   Can we get to the control panel page
+-   Can we click the button to trigger an indexing event
+
+Backend tests:
+
+-   Trigger a re-index of the search index from the backend.
+
+### Continuous Integration Plan
+
+Tests are automatically run using Uncle Archie for continuous
+integration and deployment.
+
+Procedure/Checklist
+-------------------
+
+Pre-release procedure:
+
+-   prepare to run all test
+
+-   run short tests
+-   deploy to beta
+-   run long tests
+-   test out
Author	SHA1	Message	Date
Charles Reid	cf1f707243	update test plan with TOC + autoformatting changes from pandoc	2018-08-24 10:26:33 -07:00
Charles Reid	c0f087a549	fix document title	2018-08-24 10:23:02 -07:00
Charles Reid	907e70f953	add table of contents and autoformatting. This commit was the result of running a pandocs command to clean up the formatting and generate a table of contents for the markdown document.	2018-08-24 10:21:48 -07:00
Charles Reid	96e4e7e662	improve local tests idea	2018-08-23 12:44:54 -07:00
Charles Reid	c790babe80	flesh out test plans in greater detail	2018-08-23 12:36:29 -07:00
Charles Reid	adaf25d751	add QE and testing readmes	2018-08-23 11:18:45 -07:00