## functions to perform db queries and process jobs from the job queue
## cursor is an object of MySQLdb

from TriesA5 import TriesA5
import MySQLdb
import wiki2text_v2 as w2t
from collections import defaultdict


def get_treejson(cursor, article_id):
    """ function returns string with json which represents saved tree
    """
    query = "SELECT * FROM bm_triesJsons WHERE article_id=%s;"
    cursor.execute(query % article_id)
    jsons_rows = cursor.fetchall()
    if len(jsons_rows) == 0:
        return None
    return jsons_rows[0][2]

def save_covering(cursor, article_id, revision_id, timestamp, jsn):
    query = "INSERT INTO bm_coveringsJsons VALUES (%s, %s, %s, %s);"
    cursor.execute(query, (article_id, revision_id, timestamp, jsn))

def save_treejson(cursor, article_id, revision_id, jsn):
    subquery1 = "INSERT INTO bm_triesJsons VALUES (%s, %s, %s) ON DUPLICATE KEY"
    subquery2 = " UPDATE article_id=%s, revision_id=%s, tree_json=%s;"
    query = "%s%s" % (subquery1, subquery2)
    cursor.execute(query, (article_id, revision_id, jsn,
                           article_id, revision_id, jsn))

def delete_job_from_queue(cursor, article_id, revision_id):
    query = ("DELETE FROM bm_jobQueue WHERE article_id=%s AND revision_id=%s;"%
                (article_id, revision_id))
    cursor.execute(query)

def get_all_jobs(cursor):
    """ retruns a dictionary of all jobs
    key is article_id, value is a list of revisions id
    the list is ascending order
    """
    cursor.execute("SELECT * FROM bm_jobQueue;")
    rows = cursor.fetchall()
    if len(rows) == 0:
        return None
    # there is a job in the queue
    d = defaultdict( list )
    for v, k in rows:
        d[v].append(k)
    # sorting in ascending order
    for v in d:
        d[v] = sorted(d[v])
    return d

def process_a_job(cursor, job, tree_json, N, K_REV, K_TIME):
    """ Given a job the function reads tree from the db, updates it,
    calculates covering, saves the tree and covering to the db, and
    deletes the job from the db.
    Job is a row from the job queue db table.
    """
    (article_id, revision_id) = job
    (timestamp, revision_text, user_id, user_name) = get_jobinfo(cursor,
                                                                 revision_id)
    a5 = TriesA5(N, K_REV, K_TIME)
    # if we are processing not the very first revison of the article
    # then load tree from json
    if tree_json != None:
        a5.load_from_json(tree_json)
    # adding new revision
    revision_text_tokens = w2t.rawtext2tokens(revision_text)
    a5.add_revision(revision_text_tokens, timestamp, user_id, user_name,
                    wiki_revision_id=int(revision_id))
    # save covering
    jsn = a5.dump_covering2json()
    save_covering(cursor, article_id, revision_id, timestamp, jsn)
    # save the tree
    save_treejson(cursor, article_id, revision_id, a5.dump2json())
    # delete the job from the queue
    delete_job_from_queue(cursor, article_id, revision_id)

def get_jobinfo(cursor, revision_id):
    """ function returns tuple (timestamp, text_id, user_id, user_name)
    """
    query = "SELECT * FROM revision WHERE rev_id=%s;"
    cursor.execute(query % revision_id)
    rows = cursor.fetchall()
    if len(rows) == 0:
        return (None, None, None, None)
    row = rows[0]
    timestamp = row[6]
    text_id = row[2]
    user_id = row[5]
    user_name = row[6]
    # obtaining text itself
    query = "SELECT * FROM text WHERE old_id=%s;"
    cursor.execute(query % text_id)
    rows = cursor.fetchall()
    if len(rows) == 0:
        return (timestamp, None, user_id, user_name)
    text = rows[0][1]
    return (timestamp, text, user_id, user_name)

def is_tree_uptodate(cursor, tree_json, article_id, revision_id):
    """ function returns true if tree stored in tree_json
    was previous revision of an article with article_id and revision_id
    """
    if tree_json == None:
        return False
    last_inserted_rev_id = TriesA5.get_last_revision_id(tree_json)
    # getting id of previous revision
    rev_parent_id = get_previous_revision_id(cursor, revision_id)
    return rev_parent_id == last_inserted_rev_id

def fetch_new_revisions(cursor, tree_json, article_id):
    """ function returns a list of id's of all revisions
    which have not been added to the tree
    """
    if tree_json == None:
        last_inserted_rev_id = 0
    else:
        last_inserted_rev_id = TriesA5.get_last_revision_id(tree_json)
    latest_revision_id = get_latest_revision_id(cursor, article_id)
    # starting from the latest revision iterate down
    # untill we get whether last inserted revision id or revision with id zero
    all_rev_id = []
    current_rev_id = latest_revision_id
    while True:
        if current_rev_id == 0 or current_rev_id == last_inserted_rev_id:
            break
        all_rev_id.append(current_rev_id)
        current_rev_id = get_previous_revision_id(cursor, current_rev_id)
    return all_rev_id

def get_latest_revision_id(cursor, article_id):
    """ function returns id of the latest revision of given article
    """
    query = "SELECT * FROM page WHERE page_id=%s;"
    cursor.execute(query % article_id)
    row = cursor.fetchone()
    if row == None:
        raise Exception("there isn't article with id %s" % article_id)
    latest_revision_id = row[9]
    return latest_revision_id

def get_previous_revision_id(cursor, revision_id):
    """ function returns id of previous revision
    """
    query = "SELECT * FROM revision WHERE rev_id=%s;" % revision_id
    cursor.execute(query)
    row = cursor.fetchone()
    if row == None:
        raise Exception("there isn't revision with id %s" % revision_id)
    rev_parent_id = row[10]
    return rev_parent_id

def put_job_in_queue(cursor, article_id, revision_id):
    query = "INSERT INTO bm_jobQueue (article_id, revision_id) VALUES (%s, %s);"
    cursor.execute(query % (article_id, revision_id))