import os
import numpy as np
import json
import cjson
import glob
import wiki2text_v2 as w2t
import re
import time
import gzip


class TriesA5(object):
    """ class TriesA5 stores all data and methods
    necessary for constructing a tree for A5 algorithm
    Each leaf represents some string s obtained
    by traversing the tree from root to the leaf.
    In each leaf we store covering lables of
    tokens in the string s on the path the root -> the_leaf.

    information about nodes is stored in a dictionary self.nodes
    self.nodes[nodeId] returns an list, say, the_node which
    stores infromation about a node, indices below
    which starts from NODE_ are indices in list the_node

    information about edges is stored in a dictionary self.edges
    key is souce node id and value is a dictionary, say, edge_group
    key of edge_group is first token and value is a list which contains
    source node id, destination node id, and tokens.
    So an edge is retreived by its souce node id and first token:
    e = self.edges[source_node_id][first_token]
    """
    def __init__(self, N=4, K_rev=100, K_time=90):
        # initialization class constants
        # last revision when a node was visited
        # by the algorithm
        self.NODE_LAST_VISIT = 0;
        # covering label in a leaf
        # covering label is whether integer or a list
        # it is integer in case all tokens have
        # the same label and a list otherwise
        self.NODE_COVERING_LABEL = 1;
        # source node id of an edge
        self.EDGE_SRC_NODE_ID = 0;
        # destination node id of an edge
        self.EDGE_DEST_NODE_ID = 1;
        # tokens which are on an edge
        self.EDGE_TOKENS = 2;

        # initializing class data members
        self.nodes = {}
        self.edges = {}
        # N is a constant with propertie that
        # every string of lenght >= N is rare
        # N determines depth of the tree
        self.N = N
        # last inserted node id
        self.lin_id = -1
        # the tree keeps at least K_revisions revisions
        # and at least K_time days of revisions
        # (revisions that are not older than K_time days)
        self.K_revisions = K_rev
        self.K_time = K_time
        # timeTable is a mapping revisions <=> timestamps
        self.timetable = []
        # timestamp format is 'yyyymmddhhmmss'
        self.cr_timestamp = None
        # previous revision timestamp
        self.pr_timestamp = None
        # cr_number is current revision number
        self.cr_number = -1
        # cr_covering is covering for current revision, i.e.
        # self.cr_covering[i] is an author of token i in
        # current revision (self.crevision)
        self.cr_covering = []
        # crevision is current revision - list of tokens
        self.crevision = []
        # inserting root
        self.add_node()
        # rev_incremental_number is a revision index number
        # e.g. first added revision has rev_incremental_number one,
        #      second added revision has rev_incremental_number two,
        #      third added revision has rev_incremental_number three, etc
        self.rev_incremental_number = 0
        # revision_id_table is a mapping wiki revision id <=> index number
        # the table have similar role as timetable
        self.revision_id_table = []

    @classmethod
    def from_json(cls, json_string):
        """ alternative constructor for creating an object of class TriesA5
        from json string which was obtained by method dump2json
        """
        result = cls()
        result.load_from_json(json_string)
        return result

    def add_revision(self, revision, timestamp=None,
                     user_id=None, user_name=None, wiki_revision_id=None):
        """ methods adds revision r_k (k = 1, 2, 3, ... ) to the tree
        revision is array of tokens
        let's denote S a set of all substrings s from r_k of length N

        If k == 1 then in each leaf we store 1 as covering label.
        If k > 1n then :
             - obtain covering r_k given tree for
               revisions r_1, ..., r_{k-1}
             - add to the tree all subsrings s of r_k of length N
             - update covering labels for all leaves
               with path strings from set S
        """
        revision = self.add_dummy_tokens(revision)
        # user_id, 0 - if the user is anonymous or nonexistent
        # user_name is ip adress if the user is anonymous
        self.user_id = user_id
        self.user_name = user_name
        self.crevision = revision
        if wiki_revision_id != None:
            self.cr_number = wiki_revision_id
        else:
            self.cr_number += 1
        # last inserted revision id
        self.last_inserted_revision_id = wiki_revision_id
        # revision index number
        self.rev_incremental_number += 1
        # saving previous timestamp
        self.pr_timestamp = self.cr_timestamp
        self.cr_timestamp = timestamp
        # check that new revision is older that prevision one
        # todo(michael): do we need it?
        #if (self.pr_timestamp != None and self.cr_timestamp != None
        #    and (int(self.cr_timestamp) < int(self.pr_timestamp))):
        #    raise Exception('newer revision has already been added')
        self.update_timetable(timestamp)
        self.update_revision_id_table()
        # calculating covering and adding nodes to the tree if needed
        # list_of_leaves_id is a list such that i-th element of it
        # is a node id of a leaf with string label self.crevision[i : i + N]
        list_of_leaves_id = self.calculate_covering_and_add_branches()
        # update covering labels
        self.update_covering_labels(list_of_leaves_id)
        # truncating the tree
        th = self.get_threshold()
        self.truncate(th)

    def add_dummy_tokens(self, revision):
        """ method adds N dummy tokens to the beginning and to the end
        of the revision
        """
        dummy = ["dummy"] * self.N
        result = []
        result.extend(dummy)
        result.extend(revision)
        result.extend(dummy)
        return result

    def add_node(self):
        """ method adds node to array self.nodes
        returns an id of a new node
        """
        new_node_id = self.get_new_node_id()
        new_node = []
        # setting node last visit to current revision
        new_node.append(self.cr_number)
        self.nodes[new_node_id] = new_node
        return new_node_id

    def get_node(self, node_id):
        self.nodes[node_id][self.NODE_LAST_VISIT] = self.cr_number
        return self.nodes[node_id]

    def get_leaf_covering_label(self, leaf_id):
        """ method returns covering label of
        a node with id leaf_id
        if the label is an integer then we return list of lenght N
        filled with the integer
        """
        labels = self.nodes[leaf_id][self.NODE_COVERING_LABEL]
        if not isinstance(labels, list):
            labels = [labels] * self.N
        return labels

    def set_covering_label(self, leaf_id, new_label):
        """ method sets covering label for a leaf with id leaf_id,
        new_label can be an integer of a list
        """
        node = []
        # adding last visit
        node.append(self.cr_number)
        # adding covering label
        node.append(new_label)
        self.nodes[leaf_id] = node

    def add_edge(self, src_node_id, dest_node_id, tokens):
        if not self.edges.has_key(src_node_id):
            self.edges[src_node_id] = {}
        new_edge = []
        new_edge.append(src_node_id)
        new_edge.append(dest_node_id)
        new_edge.append(tokens[:])
        self.edges[src_node_id][tokens[0]] = new_edge

    def get_edge(self, src_node_id, first_token):
        e = self.edges[src_node_id][first_token]
        # remember last visit
        self.get_node(src_node_id)
        self.get_node(e[self.EDGE_DEST_NODE_ID])
        return e

    def edge_exist(self, src_node_id, first_token):
        """ method returns true if the edge exists in the tree
        """
        return (self.edges.has_key(src_node_id) and
                    self.edges[src_node_id].has_key(first_token))

    def split_edge(self, src_node_id, first_token, split_point):
        """ src_node_id and first_token determines the edge to split
        split_point is lenth of tokens in firs part of the edge
        method returns id of a new node that splits old edge
        """
        e = self.get_edge(src_node_id, first_token)
        # shrinking the edge
        e_dest_node_id_old = e[self.EDGE_DEST_NODE_ID]
        middle_node_id = self.add_node()
        first_part_of_e = e[self.EDGE_TOKENS][:split_point]
        second_part_of_e = e[self.EDGE_TOKENS][split_point:]
        self.add_edge(src_node_id, middle_node_id, first_part_of_e)
        # creating 2nd part of the edge
        self.add_edge(middle_node_id, e_dest_node_id_old, second_part_of_e)
        return middle_node_id

    def calculate_covering_and_add_branches(self):
        """ method calculates covering for current
        revision (sets property self.cr_covering)
        and adds new branches to the tree
        (through the method get_leaf_nodeid_or_add_it)
        method returns list_of_leaves_id which is a list such
        that i-th element of it is a node id of a leaf with
        string label self.crevision[i : i + N]
        """
        # to calculate covering for each token in current revision
        # we build matrix matrix_labels such that
        # for i-th token coresponds matrix_lables[i] wich is an array of labels
        # for substring self.crevision[i],...,self.crevision[i + N - 1]
        # last N-1 indices are filled with self.cr_number
        matrix_labels = np.zeros( (len(self.crevision), self.N) )
        matrix_labels = matrix_labels + self.cr_number
        # list_of_leaves_id[idx] is a leaf id with string label
        # self.crevision[idx:idx + N]
        list_of_leaves_id = []
        # case when revision is too short
        if len(self.crevision) < self.N :
            self.cr_covering = [self.cr_number] * len(self.crevision)
            return list_of_leaves_id
        # filling matrix_labels
        for i in xrange(len(self.crevision) - self.N + 1):
            s = self.crevision[i: i + self.N]
            leaf_id = self.get_leaf_nodeid_or_add_it(s)
            list_of_leaves_id.append(leaf_id)
            matrix_labels[i] = self.get_leaf_covering_label(leaf_id)
        # calculating covering
        covering = []
        # take care of first N-1 tokens
        for i in xrange(self.N - 1):
            val = matrix_labels[i][0]
            for k in xrange(i, -1, -1):
                val = min(val, matrix_labels[k][i - k])
            covering.append(int(val))
        # rest of tokens
        for i in xrange(self.N-1, len(self.crevision), 1):
            val = matrix_labels[i][0]
            for k in xrange(self.N):
                val = min(val, matrix_labels[i-k][k])
            covering.append(int(val))
        self.cr_covering = covering
        return list_of_leaves_id

    def get_leaf_nodeid_or_add_it(self, s):
        """ given string s (list of tokens) of length N
        the method returns leaf id that string path
        the root -> the leaf equals s
        If there is no such leaf, then
        method add it to the tree and sets
        covering label of the leaf as current revision number
        """
        # we need tom "match" substring s along the tree
        # trying to make step down the tree
        # from node with current_node_id
        # set it to the root in the beginning
        # if we cannot make step then add
        # new edge so the tree contains string s
        current_node_id = 0
        # we are trying match substring s[token_idx] ... s[end]
        # in the beginning token_idx = 0
        token_idx = 0;
        while True:
            rev_tokens = s[token_idx:]
            # if the edge along which we need
            # to traverse down the tree
            # doesn not exist then we create
            # new edge and return new leaf id
            if not self.edge_exist(current_node_id, rev_tokens[0]):
                new_node_id = self.add_node()
                self.add_edge(current_node_id, new_node_id, rev_tokens)
                # adding covering label to the new created leaf
                self.set_covering_label(new_node_id, self.cr_number)
                return new_node_id
            # we can traverse down
            e = self.get_edge(current_node_id, rev_tokens[0])
            e_tokens = e[self.EDGE_TOKENS]
            # now compare tokens from the edge (e_tokens)
            # and rev_tokens (the rest of string that we need to add
            # to the tree)
            prefix_len = self._length_common_prefix(e_tokens, rev_tokens)

            # if $revTokens fits on the edge e, then
            # nothing to do, just return
            if prefix_len == len(rev_tokens):
                return e[self.EDGE_DEST_NODE_ID]

            # if edge e fits on the rev_tokens
            # then continue traversing
            if prefix_len == len(e_tokens):
                token_idx += len(e_tokens)
                current_node_id = e[self.EDGE_DEST_NODE_ID]
                continue

            # now we need create new edge because
            # prefix_len is less than lenght of the edge
            middle_node_id = self.split_edge(current_node_id,
                                             rev_tokens[0],
                                             prefix_len)
            # creating new edge with rev_tokens
            new_node_id = self.add_node()
            self.add_edge(middle_node_id,
                          new_node_id,
                          rev_tokens[prefix_len:])
            # adding covering label to the new created leaf
            self.set_covering_label(new_node_id, self.cr_number)
            return new_node_id

    def update_covering_labels(self, list_of_leaves_id):
        """  method updates all covering labels in the tree
        given covering for current revision (property self.cr_covering)
        and list_of_leaves_id (resul of calculate_covering_and_add_branches)
        """
        for i in xrange(len(self.crevision) - self.N + 1):
            leaf_id = list_of_leaves_id[i]
            labels = self.cr_covering[i : i + self.N]
            # if values in the list labels are coinside then we
            # store only the integer
            if min(labels) == max(labels):
                labels = labels[0]
            # setting covering label
            self.set_covering_label(leaf_id, labels)

    def update_revision_id_table(self):
        """ revision id timetable is needed for
        truncating nodes of the tree.
        timetable is a list of tuples
        (revision index number, self.cr_number)
        add new elemnts to the end
        [oldest ----- most recent]
        """
        # adding rule: if distance between
        # current revsion and previous inserted revision
        # is more or equal than minrev revisions then add new tuple
        # to the list
        minrev = 5
        if len(self.revision_id_table) == 0:
            tuple2add = (self.rev_incremental_number, self.cr_number)
            self.revision_id_table.append(tuple2add)
            return
        revdist = self.rev_incremental_number - self.revision_id_table[-1][0]
        if revdist >= minrev:
            tuple2add = (self.rev_incremental_number, self.cr_number)
            self.revision_id_table.append(tuple2add)
        # deleting rule: delete tuple from the table
        # if distance between current revision and tuple is more than
        # max(self.K_rev, number_of_revisions_in_timetable) + minrev revisions
        if len(self.timetable) == 0:
            th = 0
        else:
            inc_number_oldest = self.timetable[0][0]
            th = self.rev_incremental_number - inc_number_oldest
        th = max(self.K_revisions, th) + minrev
        while True:
            revdist = self.rev_incremental_number - self.revision_id_table[0][0]
            if revdist > th:
                del self.revision_id_table[0]
            else:
                break

    def update_timetable(self, timestamp=None):
        """ time table is needed for
        truncating nodes of the tree.
        timetable is a list of tuples
        (revision index number, timestamp)
        add new elemnts to the end
        [oldest ----- most recent]
        """
        self.cr_timestamp = timestamp
        if timestamp == None:
            return
        # adding rule: if day distance between
        # current revsion and previous inserted revision
        # is more or equal than mindays days then add new tuple
        # to the time list
        mindays = 5
        if len(self.timetable) == 0:
            tuple2add = (self.rev_incremental_number, self.cr_timestamp)
            self.timetable.append(tuple2add)
            return
        if self.time_distance(self.timetable[-1][1],
                              self.cr_timestamp) >= mindays:
            tuple2add = (self.rev_incremental_number, self.cr_timestamp)
            self.timetable.append(tuple2add)
        # deleting rule: delete tuple from the timetable
        # if distance between current revision and tuple is
        # more thatn self.K_time+mindays days
        while True:
            if self.time_distance(self.timetable[0][1],
                                  self.cr_timestamp) > self.K_time + mindays:
                del self.timetable[0]
            else:
                break

    def get_threshold(self):
        """ method calculates threshold for truncating the tree.

        note that revision_id_table stores maximum between K_rev revisions and
        number of revisions stored in the timetable, so threshold is the oldest
        revision id stored in revision_id_table
        """
        return self.revision_id_table[0][1]

    def truncate(self, threshold):
        """ the function delete nodes and edges.
        all nodes have revision id when they were visited last time.
        nodes and edges which were visited last time
        before threshold will be deleted.
        """
        # sorted list of pairs - id and how many
        # revisions ago it was visited
        # sorted by second parameter - revisions between last visit
        threshold = self.cr_number - threshold
        l = [(x, self.cr_number - y[self.NODE_LAST_VISIT]) for x, y
                              in sorted(self.nodes.iteritems(),
                                        key = lambda (x,y): self.cr_number
                                            - y[self.NODE_LAST_VISIT])]
        lv = np.array([y for x,y in l])
        temp = 2*threshold
        idx = lv <= threshold
        lv[idx] = temp
        idx_min = lv.argmin()
        if lv[idx_min] == temp:
            return
        id_to_del = [x for x,y in l[idx_min:]]
        # deleting
        for x in id_to_del:
            if self.edges.has_key(x):
                del self.edges[x]
        # deleting edges when rare node is
        # destination node
        edges_id_to_del = []
        for x in self.edges:
            for y in self.edges[x]:
                e = self.edges[x][y]
                if e[self.EDGE_DEST_NODE_ID] in id_to_del:
                    edges_id_to_del.append((x,y))
        for x,y in edges_id_to_del:
            del self.edges[x][y]
        for x in id_to_del:
            del self.nodes[x]

    def get_new_node_id(self):
        self.lin_id += 1
        return self.lin_id

    def _length_common_prefix(self, str1, str2):
        """ returns length of maximal common
        prefix between arguments
        """
        res = 0
        for i in xrange(min(len(str1), len(str2))):
            if str1[i] != str2[i]:
                return res
            res += 1
        return res

    def time_distance(self, x1, x2):
        """ x1 and x2 are timestamps in 'yyyymmddhhmmss' format
        returns |x2 - x1| in days.
        """
        year1 = int(x1[:4])
        month1 = int(x1[4:6])
        day1 = int(x1[6:8])
        year2 = int(x2[:4])
        month2 = int(x2[4:6])
        day2 = int(x2[6:8])
        dist = (365 * (year2 - year1) +
                30 * (month2 - month1) +
                day2 - day1)
        return abs(dist)

    def is_leaf(self, node_id):
        return node_id not in self.edges

    def dump2json(self):
        """ method returns json string which stores the tree and
        extra info (last_inserted_revision_id)
        format of the string is next:
        "[last_inserted_revision_id, json_representing_the_tree]"
        """
        tree_dict = self.get_tree_as_dict()
        res = [self.last_inserted_revision_id, tree_dict]
        return cjson.encode(res)

    def get_tree_as_dict(self):
        """ method returns dictionary which represents the tree and
        contains some class values
        json representation of the tree dictionary is next
        nodeid -> [last visit, [tokens, XXX] ]
        where XXX is destination node id if edge
        with tokens leads to non leaf node
        or XXX is [last visit, labels of tokens]
        where labels of tokens is tuple (l_1, ..., l_N)
        labels of tokens on the path root->the leaf,
        if l_1=...=l_N=l then instead of tuple store only number l
        """
        # convering x to str(x) because
        # fast module cjson for serialization
        # accepts only keys of type string
        res = {}
        for x in self.nodes:
            if not self.is_leaf(x):
                l = [self.nodes[x][self.NODE_LAST_VISIT]]
                res[str(x)] = l
        for x in self.edges:
            for y in self.edges[x]:
                e = self.edges[x][y]
                res[str(x)].append(' '.join(e[self.EDGE_TOKENS]))
                # Test if edge leads to leaf.
                dest = e[self.EDGE_DEST_NODE_ID]
                if self.is_leaf(dest):
                    n_dest = self.nodes[dest]
                    res[str(x)].append(n_dest[self.NODE_LAST_VISIT])
                    res[str(x)].append(n_dest[self.NODE_COVERING_LABEL])
                else:
                    res[str(x)].append(dest)
        # adding another values like N, K_rev, K_time, current timestamp
        class_values = {}
        class_values['N'] = self.N
        class_values['K_revisions'] = self.K_revisions
        class_values['K_time'] = self.K_time
        class_values['cr_timestamp'] = self.cr_timestamp
        class_values['lin_id'] = self.lin_id
        class_values['rev_incremental_number'] = self.rev_incremental_number
        res['class_values'] = class_values
        # addin timetable and revision_id_table
        res['timetable'] = self.timetable
        res['revision_id_table'] = self.revision_id_table
        return res

    def get_tree_json(self, json_string):
        """method returns json which represents the tree
        """
        idx = json_string.find('{')
        return json_string[idx : -1]

    @staticmethod
    def get_last_revision_id(json_string):
        """ method returns last inserted revision id given json
        which  stores information about the ojbect TriesA5
        """
        idx = json_string.find(',')
        return int(json_string[1 : idx])

    def load_from_json(self, json_string):
        """ method loads the tree from json string
        About id of leaf nodes:
        we are losing ids of leaf nodes while dumping to json
        because leaf node cannot become internal node during
        tree construction, then we are assigning negative integers
        as id to leaf nodes (to prevent fast increasing self.lin_id)
        """
        tree_json = self.get_tree_json(json_string)
        nodes_edges = cjson.decode(tree_json)
        #nodes_edges = json.loads(tree_json)
        # first we extract class_values
        class_values = nodes_edges['class_values']
        del(nodes_edges['class_values'])
        self.N = class_values['N']
        self.K_revisions = class_values['K_revisions']
        self.K_time = class_values['K_time']
        self.cr_timestamp = class_values['cr_timestamp']
        self.lin_id = class_values['lin_id']
        self.rev_incremental_number = class_values['rev_incremental_number']
        # loading timetable and revision_id_table
        self.timetable = nodes_edges['timetable']
        self.revision_id_table = nodes_edges['revision_id_table']
        del(nodes_edges['timetable'])
        del(nodes_edges['revision_id_table'])
        # last inserted node id
        #lin_id = 0
        # last inserted leaf id
        lil_id = 0
        nodes = {}
        edges = {}
        for x in nodes_edges:
            src_node_id = int(x)
            #lin_id = max(lin_id, src_node_id)
            line = nodes_edges[x]
            nodes[src_node_id] = [line[0]]
            edges[src_node_id] = {}
            idx = 1
            while idx < len(line):
                # in the beginning of each loop idx point
                # to a string of tokens, so line[idx] is
                # a string with tokens
                if not isinstance(line[idx], basestring):
                    raise Exception('error in parsing json')
                tokens = line[idx].split()
                # edge leads to a leaf
                if ((idx + 2) < len(line) and
                   not isinstance(line[idx + 2], basestring)):
                    # updating leaf id
                    lil_id -= 1
                    # adding a node
                    nodes[lil_id] = [line[idx + 1], line[idx + 2]]
                    # adding an edge
                    e = [src_node_id, lil_id, tokens]
                    edges[src_node_id][tokens[0]] = e
                    idx += 3
                # edge leads to an internal node
                else:
                    # creating an edge
                    e = [src_node_id, line[idx + 1], tokens]
                    edges[src_node_id][tokens[0]] = e
                    idx += 2
        # at this point we have nodes and edges
        self.nodes = nodes
        self.edges = edges
        self.last_inserted_revision_id = self.get_last_revision_id(json_string)
        #self.lin_id = lin_id

    def dump_covering2json(self):
        """ precondition is self.cr_covering is
        a covering of current revision
        """
        return cjson.encode(self.cr_covering[self.N : -self.N])


def plot_TriesA5(nodes, edges, revisions, file_name):
    ''' file_name is a name of a file to save image
    nonleaf nodes have text: node_id(node last visit)
    leaf nodes have text: [covering label](node last visit)
    '''
    NODE_LAST_VISIT = 0;
    NODE_COVERING_LABEL = 1;
    EDGE_SRC_NODE_ID = 0;
    EDGE_DEST_NODE_ID = 1;
    EDGE_TOKENS = 2;
    dot_file_name = "plot_trie.dot"
    colors = ['black', 'blue', 'red',
              'forestgreen', 'darkorange',
              'chartreuse', 'deeppink1',
              'firebrick4', 'darkviolet',
              'brown', 'chocolate4']
    # creating nodes names
    node_names = {}
    for x in nodes.keys():
        n = x
        if x < 0:
            n = 10000 + x
        s = "node_%s"%(n)
        node_names[x] = s
    # creating edges
    edge_lines = []
    for x in edges:
        e = edges[x]
        for y in e:
            s = ("%s -> %s [label=\"%s\"];\n"%
                (node_names[e[y][EDGE_SRC_NODE_ID]],
                 node_names[e[y][EDGE_DEST_NODE_ID]],
                 ' '.join(e[y][EDGE_TOKENS])))
            edge_lines.append(s)
    # creating nodes
    node_lines = []
    for x in nodes:
        n = nodes[x]
        label = "%s"%(x)
        if len(nodes[x]) == 2:
            cov_label = nodes[x][NODE_COVERING_LABEL]
            if isinstance(cov_label, list):
                cov_label = ' '.join([str(z) for z in cov_label])
            label = '[%s]' % cov_label
            label = '%s(%s)' % (label, nodes[x][NODE_LAST_VISIT])
        else:
            label = '%s(%s)' % (label, nodes[x][NODE_LAST_VISIT])
        s = "%s [label=\"%s\"];\n" % (node_names[x], label)
        node_lines.append(s)
    # revision lines
    revision_lines = []
    for i in xrange(len(revisions)):
        s = ("revision_%s [shape=plaintext, label=\"%s:%s\"];\n" %
             (i, i, ' '.join(revisions[i])))
        revision_lines.append(s)
    # writing sript
    prog = "digraph G {\n"
    for x in revision_lines:
        prog = prog + x
    for x in node_lines:
        prog = prog + x
    for x in edge_lines:
        y  = x.encode('utf-8')
        prog = '%s%s'%(prog, y)
    prog = prog + "}\n"
    dot_file = 'plot_tree.dot'
    f = open(dot_file,'w')
    f.write(prog)
    f.close()
    temp = 'dot -Tpng -Gcharset=latin1 %s -o %s'%(dot_file, file_name)
    print temp
    os.system(temp)