#!/usr/bin/env python

import sys, os, shutil, time, math, re
from optparse import OptionParser

import fileReadHelp
try:
  import annotate
except ImportError:
  annotate = None

usage =  '%prog [options] <graphfiles>'
parser = OptionParser(usage=usage)
parser.add_option('-g', '--graphlist', dest='graphlist',
                  help='file with list of graphfiles', metavar='FILE')
parser.add_option('-t', '--testdir', dest='testdir',
                  help='test directory', metavar='DIR',
                  default='.')
parser.add_option('-o', '--outdir', dest='outdir',
                  help='output directory', metavar='DIR',
                  default=None)
parser.add_option('-p', '--perfdir', dest='perfdir',
                  help='performance data directory', metavar='DIR',
                  default=None)
parser.add_option('-n', '--name', dest='name',
                  help='Test platform name', metavar='NAME',
                  default=None)
parser.add_option('-s', '--startdate', dest='startdate',
                  help='graph start date', metavar='DATE')
parser.add_option('-e', '--enddate', dest='enddate',
                  help='graph end date', metavar='DATE')
parser.add_option('-a', '--alttitle', dest='alttitle', 
                  help='alternate/custom site title', 
                  metavar='NAME', default=None)
parser.add_option('-v', '--verbose', dest='verbose',
                  action='store_true', default=False)
parser.add_option('-d', '--debug', dest='debug',
                  action='store_true', default=False)
parser.add_option('-r', '--reduce', dest='g_reduce', type='choice',
                  metavar='STRATEGY', default='avg',
                  choices=['avg', 'med', 'min', 'max'])
parser.add_option('-x', '--no-bounds', dest='g_display_bounds',
                  action='store_false', default=True)
if annotate:
    parser.add_option('-j', '--annotate', dest='annotation_file',
                      default=None)

debug = False
verbose = False

def try_parse_float(value):
    try:
        # removes any trailing characters (units)
        return float(re.sub(r'[^\d,.]+$', '', value))
    except ValueError:
        return value

############

# Global info about generating graphs
class GraphStuff:
    def __init__(self, _name, _testdir, _perfdir, _outdir, _startdate, _enddate,
                 _reduce, _display_bounds, _alttitle, _annotation_file):
        self.numGraphs = 0;
        self.testdir = _testdir
        self.perfdir = _perfdir
        self.outdir = _outdir
        self.datdir = self.outdir+'/'+'CSVfiles'
        self.title = 'Chapel Performance Graphs'
        if _alttitle: 
            self.title = _alttitle
        if _name != None:
            self.title += ' for '+_name
        self.gfname = self.outdir+'/'+'graphdata.js'
        self.gfile = None
        self.suites = list()
        self.startdate = _startdate
        self.enddate = _enddate
        self._reduce = _reduce
        self.display_bounds = _display_bounds
        self.annotation_file = _annotation_file

    def init(self):
        if os.path.exists(self.outdir):
            if verbose:
                sys.stdout.write('Removing old directory %s...\n'%(self.outdir))
            try:
                shutil.rmtree(self.outdir)
            except OSError:
                sys.stderr.write('Error: Could not clean up directory: %s\n'%(self.outdir))
                raise
        if verbose:
            sys.stdout.write('Creating directory %s...\n'%(self.outdir))
        try:
            os.makedirs(self.outdir)
        except OSError:
            sys.stderr.write('ERROR: Could not (re)create directory: %s\n'%(self.outdir))
            raise
        if verbose:
            sys.stdout.write('Creating directory %s...\n'%(self.datdir))
        try:
            os.makedirs(self.datdir)
        except OSError:
            sys.stderr.write('ERROR: Could not create directory: %s\n'%(self.datdir))
            raise
        try:
            self.gfile = open(self.gfname, 'w')
        except IOError:
            sys.stderr.write('ERROR: Could not open file: %s\n'%(self.gfname))

        if annotate:
            try:
                self.all_annotations = annotate.load(self.annotation_file)
            except IOError:
                sys.stderr.write('ERROR: Could not open file: %s\n'%(self.annotation_file))
                raise

        self.gfile.write('// AUTOMATICALLY GENERATED GRAPH DESCRIPTION\n')
        self.gfile.write('document.title = "%s";\n'%(self.title))
        self.gfile.write('var pageTitle = "%s";\n'%(self.title))
        self.gfile.write('var allGraphs = [\n')
        self.firstGraph = True
        return 0

    def __str__(self):
        s = 'Number of unique graphs = '+str(self.numGraphs)
        s += 'test dir = '+self.testdir
        s += 'performance data dir = '+self.perfdir
        s += 'output dir = '+self.outdir
        s += 'start date = '+self.startdate
        s += 'end date = '+self.enddate
        s += ': '+str(len(self.dygarphs))+' dygraphs'
        return s

    def finish(self):
        if self.gfile:
            self.gfile.write('\n];\n')
            first = True
            self.gfile.write('var perfSuites = [\n')
            for s in self.suites:
                if not first:
                    self.gfile.write(',\n')
                else:
                    first = False
                self.gfile.write('{ "suite" : "%s" }'%(s))
            self.gfile.write('\n];\n')
            self.gfile.close()

    def genGraphInfo(self, ginfo):
        if not self.firstGraph:
            self.gfile.write(',\n')
        else:
            self.firstGraph = False

        self.gfile.write('{\n')
        if ginfo.title != '':
            self.gfile.write('   "title" : "%s",\n'%(ginfo.title))
        elif ginfo.graphname != '':
            sys.stdout.write('WARNING: \'graphname\' is deprecated.  Use \'graphtitle\' instead.\n')
            self.gfile.write('   "title" : "%s",\n'%(ginfo.graphname))
        else:
            sys.stdout.write('WARNING: No graph title found.\n')
            self.gfile.write('   "title" : "%s",\n'%(ginfo.name))
        self.gfile.write('   "suite" : "%s",\n'%(ginfo.suite))
        self.gfile.write('   "datfname" : "%s",\n'%(ginfo.datfname))
        self.gfile.write('   "ylabel" : "%s",\n'%(ginfo.ylabel))
        self.gfile.write('   "startdate" : "%s",\n'%(ginfo.startdate))
        self.gfile.write('   "enddate" : "%s",\n'%(ginfo.enddate))
        self.gfile.write('   "displayrange" : %s,\n'%(str(ginfo.displayrange).lower()))
        self.gfile.write('   "unloggable" : %s,\n'%(str(ginfo.unloggable).lower()))
        self.gfile.write('   "expand" : %d,\n'%(ginfo.expand))
        self.gfile.write('   "sort" : %s,\n'%(str(ginfo.sort).lower()))
        self.gfile.write('   "annotations" : [')
        if ginfo.annotations:
          self.gfile.write('\n      ')
          self.gfile.write(',\n      '.join(ginfo.annotations))
          self.gfile.write('\n   ')
        self.gfile.write(']\n')
        self.gfile.write('}')


    def genGraphStuff(self, fname, suite):
        fullFname = self.testdir+'/'+fname
        if not os.path.exists(fullFname):
            fullFname = './'+fname
        if verbose:
            sys.stdout.write('Reading %s...\n'%(fullFname))
        lines=fileReadHelp.ReadFileWithComments(fullFname)
        if lines==None:
            sys.stdout.write('WARNING: Could not read graph description from %s.  Ignoring.\n'%(fullFname))
            return

        basename = os.path.splitext(os.path.basename(fname))[0]

        graphs=list()
        currgraph=-1
        firstGraphNum = self.numGraphs
        for l in lines:
            key = l.split(':')[0].strip()
            rest = l[l.index(':')+1:].strip()
            if key == 'perfkeys' :
                if currgraph != -1:
                    try:
                        graphs[currgraph].generateGraphData(self, currgraph)
                    except (ValueError, IOError, OSError):
                        raise
                # new graph
                currgraph += 1
                graphs.append(GraphClass(basename, firstGraphNum+currgraph,
                    suite, self.startdate, self.enddate,
                    self._reduce, self.display_bounds))
                graphs[currgraph].perfkeys += [ s.strip() for s in rest.split(', ') ]
            elif key == 'graphkeys' :
                graphs[currgraph].graphkeys += [ s.strip() for s in rest.split(',') ] 
            # files takes a list of the files to use per perfkey, while file
            # uses the same file for every perfkey. 
            elif key == 'files' :
                graphs[currgraph].datfilenames += [ s.strip() for s in rest.split(',') ]
            elif key == 'file' :
                numKeysRange = range(0, len(graphs[currgraph].perfkeys))
                graphs[currgraph].datfilenames += [ rest.strip() for i in numKeysRange]
            elif key == 'graphtitle':
                graphs[currgraph].title = rest.strip()
            elif key == 'graphname':
                # deprecated
                graphs[currgraph].graphname = rest.strip()
            elif key == 'ylabel':
                graphs[currgraph].ylabel = rest.strip()
            elif key == 'startdate':
                graphs[currgraph].startdate = time.strptime(rest.strip(), '%m/%d/%y')
            elif key == 'enddate':
                graphs[currgraph].enddate = time.strptime(rest.strip(), '%m/%d/%y')
            elif key == 'generate':
                graphs[currgraph].generate = [s if s != '' else self._reduce for s in re.sub(r'\s+', '', rest)]
            elif key == 'displayrange':
                graphs[currgraph].displayrange = rest.lower() in ('true', 't', '1', 'on', 'y', 'yes')
            # expansion is used to turn a single graph with multiple series into
            # multiple graphs where the first one is all the series combined and
            # the subsequent ones contain each series individually. -1 means 
            # expand all. Other numbers indicate the number of series to expand 
            elif key == 'expand': 
                graphs[currgraph].expand = int(rest.strip())
            elif key == 'sort': 
                graphs[currgraph].sort = rest.lower() in ('true', 't', '1', 'on', 'y', 'yes')

        if graphs[currgraph].graphkeys:
          series = graphs[currgraph].graphkeys[0]
        else:
          series = graphs[currgraph].perfkeys[0]

        if annotate:
            graphs[currgraph].annotations = annotate.get(
                self.all_annotations, basename, series,
                graphs[currgraph].startdate, graphs[currgraph].enddate)

        try:
            graphs[currgraph].generateGraphData(self, currgraph)
        except (ValueError, IOError, OSError):
            raise

        return (currgraph+1)


############

# graph class
class GraphClass:
    gid = 1
    def __init__(self, _name, _graphNum, _suite='', _startdate=None,
                 _enddate=None, _reduce='avg', _displayrange=True, 
                 _expand=0, _sort=True):
        self.id = GraphClass.gid
        GraphClass.gid += 1
        self.name = _name.strip()
        self.srcdatfname = self.name+'.dat'
        self.title = ''
        self.suite = _suite
        self.graphname = ''
        self.ylabel = ''
        self.startdate = _startdate
        self.enddate = _enddate
        self.graphNum = _graphNum
        self.perfkeys = list()
        self.graphkeys = list()
        self.datfilenames = list()
        self.generate = list()
        self.displayrange = _displayrange
        self._reduce = _reduce
        self.unloggable = False
        self.expand = _expand
        self.sort = _sort 
        self.annotations = []

    def __str__(self):
        l  = 'Graph: '+str(self.name)+' (id='+str(self.id)+')\n'
        l += '\ttitle: '+self.title+'\n'
        l += '\tsuite: '+self.suite+'\n'
        l += '\tgraphname: '+self.graphname+'\n'
        l += '\tylabel: '+self.ylabel+'\n'
        if self.startdate != None:
            l += '\tstartdate: '+time.strftime('%Y-%m-%d',self.startdate)+'\n'
        else:
            l += '\tstartdate: Not specified\n'
        if self.enddate != None:
            l += '\tenddate: '+time.strftime('%Y-%m-%d',self.enddate)+'\n'
        else:
            l += '\tenddate: Not specified\n'
        l += '\tperfkeys: '+list.__str__(self.perfkeys)+'\n'
        l += '\tgraphkeys: '+list.__str__(self.graphkeys)+'\n'
        l += '\tdatfilenames: '+list.__str__(self.datfilenames)+'\n'
        l += '\tdisplayrange:'+str(self.displayrange).lower()+'\n'
        return l

    # For each unique data file
    class DatFileClass:
        def __init__(self, _filename):
            # lines will end up looking like:
            # lines[lineNum][trailNum][field]
            # where lineNum is the number after they are "merged"
            self.lines = []
            self.mykeys = {}
            try:
                self.dfile = open(_filename, 'r')
                # First line must be a comment and must be a tab separated list
                #  of the performance keys
                self.perfkeys = [ l.strip() for l in self.dfile.readline()[1:].split('\t') ]
            except IOError:
                pass
                # Allows some performance tests to be graphed when you aren't
                # running over every single performance test
            except:
                raise

        def add(self, _i, _k):
            # the _ith data stream comes from the column of the _kth perfkey
            self.mykeys[_i] = self.perfkeys.index(_k)

        def __str__(self):
            l  = '\tperfkeys: '+list.__str__(self.perfkeys)+'\n'
            l += '\tmykeys: '+dict.__str__(self.mykeys)+'\n'
            return l

        def __del__(self):
            if hasattr(self, 'dfile'):
                self.dfile.close()

    # Generate the new data file inline in CSV format
    def generateData(self, graphInfo, datfiles):
        # An alternative is to have an off-line process incrementally
        # update a CSV data file.  Since we would still have to open
        # potentially multiple data files and read thru them and look
        # at every line for the appropriate date, I opted for
        # regenerating.
        fname = graphInfo.datdir+'/'+self.datfname
        f = open(fname, 'w')
        f.write('Date,')
        for i in range(len(self.graphkeys)):
            f.write(self.graphkeys[i])
            if i < len(self.graphkeys)-1:
                f.write(',')
            else:
                f.write('\n')

        numKeys = len(self.perfkeys)
        # currLines stores the current merged line number of each dat file
        currLines = [0]*numKeys
        startdate = self.startdate
        enddate = self.enddate
        minDate = None

        first = True
        done = False
        for i in xrange(numKeys):
            # The file may be missing (in the case where only a subdirectory
            # has been tested for performance).  If so, we don't want to
            # try to access a non-existent datfile
            if self.datfilenames[i] in datfiles:
                df = datfiles[self.datfilenames[i]]
                for line in df.dfile:
                    line = line.strip()
                    if line == '' or line[0] == '#':
                        continue
                    fields = line.split()
                    myDate=time.strptime(fields[0].strip(),'%m/%d/%y')
                    fields[0] = myDate
                    def parseAndCheck(graphInfo, num):
                        val = try_parse_float(x)
                        if val != None and not self.unloggable:
                            if val <= 0.0:
                                self.unloggable = True
                        return val

                    fields[1:] = [parseAndCheck(graphInfo, x) for x in fields[1:]]
                    if df.lines and myDate == df.lines[-1][0][0]:
                        df.lines[-1].append(fields)
                    else:
                        df.lines.append([fields])

        found_data = False
        while not done:
            done = True
            for i in xrange(numKeys):
                # The file may be missing (in the case where only a subdirectory
                # has been tested for performance).  If so, we don't want to
                # try to access a non-existent datfile
                if not self.datfilenames[i] in datfiles:
                    continue

                df = datfiles[self.datfilenames[i]]
                # find the min date
                if currLines[i] < len(df.lines):
                    done = False
                    myDate = df.lines[currLines[i]][0][0]
                    if minDate==None or myDate<minDate:
                        minDate = myDate

            if startdate==None:
                startdate = minDate

            if done:
                break

            # We didn't print anything last iteration if there was no data
            # found, so skip the new line in that case
            if not first and found_data:
                f.write('\n')
            else:
                first = False

            # write out the data for this date
            found_data = False
            line_to_write = time.strftime('%Y-%m-%d', minDate)
            for i in range(numKeys):
                if not self.datfilenames[i] in datfiles:
                    continue
                df = datfiles[self.datfilenames[i]]
                if currLines[i] < len(df.lines):
                    fields = zip(*df.lines[currLines[i]])
                    myDate = fields[0][0]
                    if myDate == minDate:
                        # consume this line
                        line_to_write += ','
                        if len(fields)>df.mykeys[i] and '-' not in fields[df.mykeys[i]]:
                            fieldId = df.mykeys[i]
                            value = fields[fieldId][0]
                            if self.generate:
                                method = self.generate[i]
                            else:
                                method = self._reduce

                            if method == 'avg':
                                value = math.fsum(fields[fieldId])/len(fields[fieldId])
                            elif method == 'med':
                                slist = sorted(fields[fieldId])
                                if len(fields[fieldId]) % 2 == 0:
                                    value = (slist[len(slist)/2]+slist[len(slist)/2-1])/2
                                else:
                                    value = slist[len(slist)/2]
                            elif method == 'min':
                                value = min(fields[fieldId])
                            elif method == 'max':
                                value = max(fields[fieldId])

                            if self.displayrange:
                                minval = min(fields[fieldId])
                                maxval = max(fields[fieldId])
                                line_to_write += "{0};{1};{2}".format(minval, value, maxval)
                            else:
                                line_to_write += "{0}".format(value)
                            found_data = True
                        currLines[i] += 1
                    else:
                        # no data for this date
                        line_to_write += ','
                else:
                    # no data for this date
                    line_to_write += ','

            if found_data:
              f.write(line_to_write)

            if self.enddate==None:
                enddate = minDate
            else:
                enddate = self.enddate
            minDate = None

        f.write('\n')
        f.close()

        if startdate == None:
            startdate = time.localtime()
        if enddate == None:
            self.enddate = startdate
        if graphInfo.startdate != None:
            self.startdate = time.strftime('%Y-%m-%d', graphInfo.startdate)
        else:
            self.startdate = time.strftime('%Y-%m-%d', startdate)
        if graphInfo.enddate != None:
            self.enddate = time.strftime('%Y-%m-%d', graphInfo.enddate)
        elif enddate != None:
            self.enddate = time.strftime('%Y-%m-%d', enddate)



    def generateGraphData(self, graphInfo, gnum):
        if debug:
            print '==='
            print self

        if verbose:
            sys.stdout.write('Generating graph data for %s (graph #%d)\n'%(self.name, gnum))

        self.datfname = self.name+str(gnum)+'.txt'

        nperfkeys = len(self.perfkeys)
        if nperfkeys != len(self.graphkeys):
            start = len(self.graphkeys)
            for i in range(start,nperfkeys):
                self.graphkeys.append(self.perfkeys[i])

        for i in range(nperfkeys):
            for j in range(nperfkeys):
                if (i != j) and (self.graphkeys[i]==self.graphkeys[j]):
                    sys.stdout.write('WARNING: Duplicate graph label (%s)\n'%(self.graphkeys[i]))

        defaultFilename = self.srcdatfname
        if nperfkeys != len(self.datfilenames):
            start = len(self.datfilenames)
            for i in range(start,nperfkeys):
                self.datfilenames.append(defaultFilename)

        # fix path to dat files
        for i in range(nperfkeys):
            fn = self.datfilenames[i]
            idx = fn.rfind('/')
            if idx != -1:
                if graphInfo.perfdir[0] == '/':
                    # absolute path (prepend path and strip relative path)
                    self.datfilenames[i] = graphInfo.perfdir+fn[idx:]
                else:
                    # relative path (find .dat file in the subdir)
                    self.datfilenames[i] = './'+fn[0:idx]+'/'+graphInfo.perfdir+fn[idx:]
            else:
                self.datfilenames[i] = graphInfo.perfdir+'/'+fn

        # create a hashmap of open datfiles
        datfiles = {}
        for i in range(nperfkeys):
            d = self.datfilenames[i]
            if not datfiles.has_key(d):
                datfiles[d] = self.DatFileClass(d)
            try:
                if hasattr(datfiles[d], 'dfile'):
                    # May not have a dfile if the specific performance test
                    # was not previously run and dumped into this folder
                    # Should distinguish this case from cases where there are
                    # legitimately no perfkeys
                    datfiles[d].add(i, self.perfkeys[i])
                else:
                    # We don't have a dfile for that file name.  Delete the
                    # created DatFileClass so it doesn't mess us up
                    del datfiles[d]
            except ValueError:
                sys.stderr.write('ERROR: Could not find perfkey \'%s\' in %s\n'%(self.perfkeys[i], self.datfilenames[i]))
                raise

        # generate the new data files
        self.generateData(graphInfo, datfiles)
        graphInfo.genGraphInfo(self)

        for n, d in datfiles.iteritems():
            del d


####################

def main():
    (options, args) = parser.parse_args()

    global debug
    debug = options.debug
    global verbose
    verbose = options.verbose

    if options.perfdir != None:
        perfdir = options.perfdir
    else:
        perfdir = './perfdat/'+os.uname()[1]

    if options.outdir != None:
        outdir = options.outdir
    else:
        outdir = perfdir+'/html'

    if options.startdate != None:
        startdate = time.strptime(options.startdate,'%m/%d/%y')
    else:
        startdate = None

    if options.enddate != None:
        enddate = time.strptime(options.enddate,'%m/%d/%y')
    else:
        enddate = time.localtime()

    if annotate:
        if options.annotation_file != None:
            annotation_file = options.annotation_file
        elif 'CHPL_HOME' in os.environ:
            annotation_file = os.environ['CHPL_HOME'] + '/test/ANNOTATIONS.yaml'
        else:
            annotation_file = 'ANNOTATIONS.yaml'
    else:
        annotation_file = None

    # This allows the graph webpage to have an alternate title. The default is
    # Chapel Performance Graphs, which is not always appropriate 
    if options.alttitle != None:
        alttitle = options.alttitle
    else:
        alttitle = None

    graphInfo = GraphStuff(options.name, options.testdir, perfdir, outdir,
        startdate, enddate, options.g_reduce, options.g_display_bounds,
        alttitle, annotation_file)
    try:
        graphInfo.init()
    except (IOError, OSError):
        return -1

    # get the list of .graph files
    lines = list()
    # command line .graph files
    for a in args:
        lines.append(a)
    # .graph files from the specified  file
    graphlist = options.graphlist
    if graphlist != None:
        try:
            f = open(graphlist, 'r')
            try:
                lines += f.readlines()
            finally:
                f.close()
        except IOError:
            sys.stderr.write('ERROR: Could not open graph file: %s\n'%(graphlist))
            return -1
    
    currSuite = None
    numSuites = 0
    numGraphfiles = 0
    graphList = list()
    # Generate graphs
    for line in lines:
        l = line.strip()
        if l != '':
            if l[0] != '#':
                if graphList.count(l) == 0:
                    graphList.append(l)
                try:
                    graphInfo.genGraphStuff(l, currSuite)
                except (ValueError, IOError, OSError):
                    return -1
                numGraphfiles += 1
            else:
                # parse for suite name
                ls = l[1:].split(':')
                if ls[0].strip() == 'suite':
                    currSuite = ls[1].strip()
                    graphInfo.suites.append(currSuite)
                    numSuites += 1
                    if verbose:
                        sys.stdout.write('suite: %s\n'%(currSuite))

    # Copy the index.html and support css and js files
    auxdir = os.path.dirname(os.path.realpath(__file__))+'/perf'
    try:
        shutil.copy(auxdir+'/index.html', outdir)
    except IOError:
        sys.stderr.write('WARNING: Could not copy index.html')
    try:
        shutil.copy(auxdir+'/perfgraph.css', outdir)
    except IOError:
        sys.stderr.write('WARNING: Could not copy perfgraph.css')
    try:
        shutil.copy(auxdir+'/perfgraph.js', outdir)
    except IOError:
        sys.stderr.write('WARNING: Could not copy perfgraph.js')

    # TODO point these to the right location
    dygraphDir = os.path.join(
        os.environ.get('CHPL_HOME', ''),
        'third-party',
        'dygraphs')
    try:
        shutil.copy(os.path.join(dygraphDir, 'dygraph-combined.js'), outdir)
    except IOError:
        sys.stderr.write('WARNING: Could not copy dygraph-combined.js')

    graphInfo.finish()

    sys.stdout.write('Read %d graph files in %d suites\n'%(numGraphfiles, numSuites))
    return 0

if __name__ == '__main__':
    sys.exit(main())

