#!/data/bin/python
import string, glob, os, sys, re, getopt
import datetime, time, pdb, math
from os.path import *
import commands
import operator
import collections

JEPROF_VERSION = "5.0.1-101-g26b1c1398264dec25bf998f6bec21799ad4513da"
PPROF_VERSION  = "2.0"

obj_tool_map = {'objdump': 'objdump', 'nm': 'nm', 'addr2line': 'addr2line', 'c++filt': 'c++filt'}

profile_marker = "profile"
heap_marker    = "heap"

# Special routine name that should not have any symbols.
# Used as separator to parse "addr2line -i" output.
sep_symbol = '_fini';
sep_address = 0;

# There is a pervasive dependency on the length (in hex characters,
# i.e., nibbles) of an address, distinguishing between 32-bit and
# 64-bit profiles.  To err on the safe size, default to 64-bit here:
address_length = 16

# A list of paths to search for shared object files
prefix_list = [] 

g_debug = 0 

def DEBUG(str):
    if g_debug:
        print("####DEBUG: " + str)
    return 0
    

class JEPROF(object):
    def __init__(self):
        self.opt_help = 0
        self.opt_ver  = 0
        self.opt_text = 0
        self.opt_show_bytes = 0
        self.opt_interactive = 0
        self.opt_debug = 0
        self.opt_test  = 0
        self.opt_lib   = ""

        self.use_symbol_page = 0
        self.use_symbolized_profile = 0

        self.prog = "" 
        self.pfile_args    = []
        self.profile_files = []

        self.profile_type = ""
        self.opt_lib_prefix = ""

        self.tmpfile_sym = ""
        self.tmpfile_ps  = ""

        self.g_test_debug = 0
        # Not confirmed
        self.opt_inuse_space   = 0
        self.opt_inuse_objects = 0
        self.opt_alloc_space   = 0
        self.opt_alloc_objects = 0
        self.opt_thread        = 0

    def usage_string(self, s):
        if s:
            print(s)
        print("\nUsage:\njeprof [options] <program> <profiles>")
        print("   <program>  is a binary file name.")
        print("   <profiles> is a profile file name.")
        print("\nOptions:")
        print("   --help              This message")
        print("   --version           Version information")
        print("   --show_bytes        Display space in bytes")
        print("   --text              Generate text report\n")

    def version_string(self):
        print("jeprof (part of jemalloc %s)"%JEPROF_VERSION)
        print("based on pprof (part of gperftools %s)"%PPROF_VERSION)
        print("\nCopyright 1998-2007 Google Inc.\n")
        print("This is BSD licensed software; see the source for copying conditions")
        print("and license information.")
        print("There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A")
        print("PARTICULAR PURPOSE.")

    ''' Borrowed from shellescape module'''
    def quote(self, s):
        _find_unsafe = re.compile(r'[a-zA-Z0-9_^@%+=:,./-]').search

        #Return a shell-escaped version of the string *s*.
        if not s:
            return "''"

        if _find_unsafe(s) is None:
            return s
        # use single quotes, and put single quotes into double quotes
        # the string $'b is then quoted as '$'"'"'b'
        return "'" + s.replace("'", "'\"'\"'") + "'"

    def ListSplice(self, l, start, finish):
	return l[:start] + l[finish:]

    def IsProfileURL(self, filename):
       if os.path.exists(filename):
           print("Using local file %s"%filename)
           return 0
       else:
           return 1

    def IsSymbolizedProfileFile(self, filename):
        return 0
       
    def ConfigureObjTools(self, progname):
        return 0

    def ParamParser(self):
        try:
            opts, args = getopt.getopt(sys.argv[1:], "hvst", ["help", "version", "show_bytes", "text"])
        except getopt.GetoptError:
            self.usage_string("\nInvalid options\n")
            sys.exit()

        for opt, value in opts:
            if opt in ("-h", "--help"):
                self.usage_string()
                sys.exit()

            if opt in ("-v", "--version"):
                self.version_string()
                sys.exit()

            if opt in ("-s", "--show_bytes"):
                self.opt_show_bytes = 1

            if opt in ("-t", "--text"):
                self.opt_text = 1
                self.opt_interactive = 0

        if len(args) != 2:
            print("program or profile is missed!\n")
            sys.exit()

        if self.IsProfileURL(args[0]):
            self.use_symbol_page = 1
        elif self.IsSymbolizedProfileFile(args[0]):
            self.use_symbolized_profile = 1

        if self.use_symbol_page or self.use_symbolized_profile:
            print("line %s\n"%str(sys._getframe().f_lineno))
        else:
            self.prog = args[0]
            DEBUG("prog name %s"%self.prog)

        # Parse profile file/location arguments
        for pro in args[1:]:
            DEBUG("%s:%s: %s\n"%(sys._getframe().f_back.f_code.co_name,str(sys._getframe().f_lineno),pro))
            self.pfile_args.append(pro)

        if self.use_symbol_page:
            pass
        elif not self.use_symbolized_profile:
            self.ConfigureObjTools(self.prog)

    def FetchDynamicProfile(self, profile_name):
        if not self.IsProfileURL(profile_name):
            return profile_name
        else:
            #parse profile based on url, not supported yet
            pass


    def FetchDynamicProfiles(self):
        if len(self.pfile_args) == 1:
            self.profile_files.append(self.FetchDynamicProfile(self.pfile_args[0]))
        else:
            pass
            #multiple profiles, Not supported yet

    def ReadProfileHeader(self, fname):
        first_line = ""

        with open(fname, 'r') as f:
            lines = f.readlines()
            first_line = lines[0]
            DEBUG("%s:%s: %s\n"%(sys._getframe().f_back.f_code.co_name,str(sys._getframe().f_lineno),first_line))

        return first_line

    def HeapProfileIndex(self):
        index = 1
        
        if self.opt_inuse_space:
            index = 1
        elif self.opt_inuse_objects:
            index = 0
        elif self.opt_alloc_space:
            index = 3
        elif self.opt_alloc_objects:
            index = 2

        return index

    def AdjustSamples(self, adj, alg, n1, s1, n2, s2):
        if adj:
            if alg == 2:
                # Remote-heap version 2
                # The sampling frequency is the rate of a Poisson process.
                # This means that the probability of sampling an allocation of
                # size X with sampling rate Y is 1 - exp(-X/Y)
                if n1:
                    ratio = ((s1 * 1.0)/n1)/adj
                    scale_factor = 1/(1 - math.exp(-ratio))
                    n1 *= scale_factor
                    s1 *= scale_factor

                if n2:
                    ratio = ((s2 * 1.0)/n2)/adj
                    scale_factor = 1/(1 - math.exp(-ratio))
                    n2 *= scale_factor
                    s2 *= scale_factor
        else:
            # Remote-heap version 1
            ratio = ((s1 * 1.0)/n1)/adj
            if ratio < 1:
                n1 /= ratio;
                s1 /= ratio

            ratio = ((s2 * 1.0)/n2)/adj
            if ratio < 1:
                n2 /= ratio
                s2 /= ratio
        return [n1, s1, n2, s2]

    # Get entry from profile; zero if not present
    def GetEntry(self, profile, k):
        if profile.has_key(k):
            return profile[k]
        else:
            return 0

    # Add entry to specified profile
    def AddEntry(self, profile, k, count):
        if not profile.has_key(k):
            profile[k] = 0 
        profile[k] += count 


    # Add a stack of entries to specified profile, and add them to the $pcs list
    def AddEntries(self, profile, pcs, stack, count):
        k = [] 
        for e in stack.split():
            pc = self.HexExtend(e)
            pcs[pc] = 1
            k.append(pc)
        self.AddEntry(profile, "\n".join(k), count)

    # Given a hex value in the form "0x1abcd" or "1abcd", return either
    # "0001abcd" or "000000000001abcd", depending on the current (global)
    # address length.
    def HexExtend(self, addr):
        pattern = re.compile("^(0x)?0*")
        addr = pattern.sub('', addr)
        zeros_needed = address_length - len(addr)
        if zeros_needed < 0:
            print("Warning: address %s is longer than address length %d"%(addr, address_length))
            sys.exit(0)

        return addr.zfill(address_length) 

    def HexStrToInt(self, s):
        if s:
            if s[0] == 'x':
                s = s.replace('x', '')
            return int(s, 16)
        return 0 

    def HexStrAdjust(self, s):
        if s == "0x":
            s = s[:-1]
        return s

    def FixCallerAddresses(self, stack):
        fixedaddrs = []
        delim_pattern = re.compile(".*(\s+).*")
        m = delim_pattern.match(stack)
        delimiter = m.group(1)
        addrs = stack.split( ) 
        if len(addrs) > 0:
            fixedaddrs.append(addrs[0])

        for i in range(1, len(addrs)):
            fixedaddrs.append(self.AddressSub(addrs[i], "0x1"))

        return delimiter.join(fixedaddrs)

    def ReadMappedLibraries(self, profile_name):
        mark_line = 0
        mapping = ""
        with open(profile_name, 'r') as f:
            lines = f.readlines()
            for line in lines:
                if mark_line:
                    line = line.replace('\r', '')
                    mapping = mapping + line
                else:
                    if line.startswith("MAPPED_LIBRARIES:"):
                        mark_line = 1
                    else:
                        continue
        return mapping

    def ReadMemoryMap(self, line):
        print("Alert: This is a NULL API!")
        return "" 

    ##### Symbol extraction #####
    
    # Aggressively search the lib_prefix values for the given library
    # If all else fails, just return the name of the library unmodified.
    # If the lib_prefix is "/my/path,/other/path" and $file is "/lib/dir/mylib.so"
    # it will search the following locations in this order, until it finds a file:
    #   /my/path/lib/dir/mylib.so
    #   /other/path/lib/dir/mylib.so
    #   /my/path/dir/mylib.so
    #   /other/path/dir/mylib.so
    #   /my/path/mylib.so
    #   /other/path/mylib.so
    #   /lib/dir/mylib.so              (returned as last resort)

    def FindLibrary(self, f):
        suffix = f
        # Search for the library as described above
        # Need to be further implemented later
        return f

    # Return path to library with debugging symbols.
    # For libc libraries, the copy in /usr/lib/debug contains debugging symbols
    def DebuggingLibrary(self, lib):
        if re.match("^/", lib):
            f = "/usr/lib/debug" + lib
            if os.path.exists(f):
                return f
            elif os.path.exists(f + ".debug"):
                return f + ".debug"
        return ""

    def ParseTextSectionHeaderFromOtool(self, lib):
        pass

    def ShellEscape(self, objd, arg, lib):
        escaped_words = []
        
        escaped_word = objd.format(self.quote(objd))
        escaped_words.append(escaped_word)

        escaped_word = arg.format(self.quote(arg))
        escaped_words.append(escaped_word)

        escaped_word = lib.format(self.quote(lib))
        escaped_words.append(escaped_word)

        return " ".join(escaped_words)


    # Parse text section header of a library using objdump
    def ParseTextSectionHeaderFromObjdump(self, lib):
        size = 0
        vma = '0'
        file_offset = '0'
        # Get objdump output from the library file to figure out how to
        # map between mapped addresses and addresses in the library.
        cmd = self.ShellEscape(obj_tool_map['objdump'], "-h", lib)
        status, lines = commands.getstatusoutput(cmd)
        if status == 256:
            print(lines)
            return {}
        
        obj_list = lines.splitlines(True)
        for i in range(len(obj_list)):
            s = re.sub('\r', '', obj_list[i]) # turn windows-looking lines into unix-looking lines
            # Idx Name          Size      VMA       LMA       File off  Algn
            #  10 .text         00104b2c  420156f0  420156f0  000156f0  2**4
            # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file
            # offset may still be 8.  But AddressSub below will still handle that.
            x = re.split('\s+', s)
            x = [j for j in x if j != '']
            if len(x) >= 6 and x[1] == '.text':
                size = x[2]
                vma = x[3]
                file_offset = x[5]
                break

        if size is None:
            return None

        r = {}
        r['size'] = size
        r['vma']  = vma
        r['file_offset'] = file_offset

        return r

    def ParseTextSectionHeader(self, lib):
        # obj_tool_map("otool") is only defined if we're in a Mach-O environment
        if obj_tool_map.has_key('otool'):
            r = self.ParseTextSectionHeaderFromOtool(lib)
            if r:
                return r

        return self.ParseTextSectionHeaderFromObjdump(lib)

    # Subtract two hex addresses of length $address_length
    def AddressSub(self, addr1, addr2):
        if address_length == 8:
            print("Not supported yet")
            pass
        else:
            a1 = self.HexStrToInt(addr1[-7:])
            addr1 = self.HexStrAdjust(addr1[:-7])
            a2 = self.HexStrToInt(addr2[-7:])
            addr2 = self.HexStrAdjust(addr2[:-7])
            b = 0
            if a2 > a1:
                b = 1
                a1 += 0x10000000
            diff = a1 - a2
            r = "%07x"%diff 

            a1 = self.HexStrToInt(addr1[-7:])
            addr1 = self.HexStrAdjust(addr1[:-7])
            a2 = self.HexStrToInt(addr2[-7:])
            addr2 = self.HexStrAdjust(addr2[:-7])
            b = 0
            if a2 > a1:
                b = 1
                a1 += 0x10000000
            diff = a1 - a2
            r = "%07x"%diff + r

            a1 = self.HexStrToInt(addr1)
            a2 = self.HexStrToInt(addr2)
            if a2 > a1:
                a1 += 0x100
            diff = a1 - a2
            r = "%02x"%diff + r
        
        return r

    def AddressAdd(self, addr1, addr2):
        if address_length == 8:
            print("Not supported yet")
            pass
        else:
            if self.opt_debug and self.opt_test:
                print("AddressAdd %s + %s = "%(offset, vma_offset))

            a1 = addr1[-7:]
            addr1 = addr1[0:-7]
            a2 = addr2[-7:]
            addr2 = addr2[0:-7]
            sum = int(a1, 16) + int(a2, 16)
            c = 0
            if sum > 0xfffffff:
                c = 1
                sum -= 0x10000000
            r = "%07x"%sum 

            a1 = addr1[-7:]
            addr1 = addr1[0:-7]
            a2 = addr2[-7:]
            addr2 = addr2[0:-7]
            sum = int(a1, 16) + int(a2, 16) + c
            c = 0
            if sum > 0xfffffff:
                c = 1
                sum == 0x10000000
            r = "%07x"%sum + r

            sum = int(addr1, 16) + int(addr2, 16) + c
            if sum > 0xff:
                sum -= 0x100
            r = "%02x"%sum + r

            if self.opt_debug and self.opt_test:
                print(r)

        return r

    # dump into a list of libraries
    def ParseLibraries(self, prog, mapping, pcs):
        result = [] # list in list
        buildvar = ""
        zero_offset = self.HexExtend("0")

        if self.use_symbol_page:
            return

        filepath, tmpfile = os.path.split(prog)
        dir_path = dirname(abspath(prog))
        prog = dir_path + '/' + tmpfile

        for l in mapping.split("\n"):
            if re.match("^\s*build=(.*)$", l):
                buildvar = m.group(1)

            start = "" 
            finish = ""
            offset = ""
            lib = ""
            m1 = re.match("^([a-f0-9]+)-([a-f0-9]+)\s+..x.\s+([a-f0-9]+)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$", l, re.IGNORECASE)
            m2 = re.match("^\s*([a-f0-9]+)-([a-f0-9]+):\s*(\S+\.so(\.\d+)*)", l)
            m3 = re.match("^([a-f0-9]+)-([a-f0-9]+)\s+..x.\s+([a-f0-9]+)\s+\S+:\S+\s+\d+\s+(\S+)$", l, re.IGNORECASE)
            m4 = re.match("^(0x[a-f0-9]+)\s(0x[a-f0-9]+)\s\d+\s\d+\s0x[a-f0-9]+\sr-x\s\d+\s\d+\s0x\d+\s(COW|NCO)\s(NC|NNC)\svnode\s(\S+\.so(\.\d+)*)", l)
            if m1:
                # Full line. Example:
                #   40000000-40015000 r-xp 00000000 03:01 12845071   /lib/ld-2.3.2.so
                start  = self.HexExtend(m1.group(1))
                finish = self.HexExtend(m1.group(2))
                offset = self.HexExtend(m1.group(3))
                lib = m1.group(4)
                lib = lib.replace("\\", "/")
            elif m2:
                # Cooked line from DumpAddressMap.  Example:
                #   40000000-40015000: /lib/ld-2.3.2.so
                start  = self.HexExtend(m2.group(1))
                finish = self.HexExtend(m2.group(2))
                offset = zero_offset
                lib = m2.group(3)
            elif m3 and (m3.group(4) == prog):
                # PIEs and address space randomization do not play well with our
                # default assumption that main executable is at lowest
                # addresses. So we're detecting main executable in
                # /proc/self/maps as well.
                start  = self.HexExtend(m3.group(1))
                finish = self.HexExtend(m3.group(2))
                offset = self.HexExtend(m3.group(3))
                lib = m3.group(4)
                lib = lib.replace("\\", "/")
            elif m4:
                # FreeBSD 10.0 virtual memory map /proc/curproc/map as defined in
                # function procfs_doprocmap (sys/fs/procfs/procfs_map.c)
                #
                # Example:
                # 0x800600000 0x80061a000 26 0 0xfffff800035a0000 r-x 75 33 0x1004 COW NC vnode /libexec/ld-elf.so.1 NCH -1
                start  = self.HexExtend(m4.group(1))
                finish = self.HexExtend(m4.group(2))
                offset = zero_offset
                lib = self.FindLibrary(m4.group(5))
            else:
                continue

            # Expand "$build" variable if available
            build_pattern = re.compile("\$build\b") 
            if build_pattern.match(l):
                l = build_pattern.sub(buildvar, l) 

            lib = self.FindLibrary(lib)
            if not self.DebuggingLibrary(lib):
                text = self.ParseTextSectionHeader(lib)
                if text:
                    vma_offset = self.AddressSub(text['vma'], text['file_offset'])
                    offset = self.AddressAdd(offset, vma_offset)
                else:
                    print("!!! skip lib %s"%lib)
                    continue
                    

            if self.opt_debug:
                print(start, finish, offset, lib)

            result.append([lib, start, finish, offset])

        # Append special entry for additional library (not relocated)
        if self.opt_lib != "":
            print("Not implemented, the process is not supposed to be here!")
            pass


        # Append special entry for the main program.  This covers
        # 0..max_pc_value_seen, so that we assume pc values not found in one
        # of the library ranges will be treated as coming from the main
        # program binary.
        min_pc = self.HexExtend("0")
        max_pc = min_pc              # find the maximal PC value in any sample
        for key in pcs.keys():
            if pcs[key]:
                if self.HexExtend(key) > max_pc:
                    max_pc = self.HexExtend(key)

        result.append([prog, min_pc, max_pc, zero_offset])
        return result

    def ReadThreadedHeapProfile(self, prog, prof, header):
        header = header.strip('\n')
        index = self.HeapProfileIndex()
        sampling_algorithm = 0
        sample_adjustment  = 0
        
        pattern = re.compile("^heap_v2/(\d+)")
        m = pattern.match(header)
        if m:
            ptype = "_v2"
            sampling_algorithm = 2
            sample_adjustment = int(m.group(1))

        if ptype != "_v2" or not sample_adjustment:
            print("Threaded heap profiles require v2 sampling with a sample rate\n")
            sys.exit(0)
        
        profile = {} 
        thread_profiles = {} 
        pcs = {} 
        stack = ""
        mapping  = ""

        with open(prof, 'r') as f:
            lines = f.readlines()
            for line in lines:
                #DEBUG("%s:%s: %s\n"%(sys._getframe().f_back.f_code.co_name, str(sys._getframe().f_lineno), line))
                line = line.replace('\r', '')
                #line = line.replace('\n', '')
                if line.startswith("MAPPED_LIBRARIES:"):
                    mapping = mapping + self.ReadMappedLibraries(prof)
                    break

                if line.startswith("--- Memory map:"):
                    mapping = mapping + self.ReadMemoryMap(prof)
                    break

                # Read entry of the form:
                # @ a1 a2 ... an
                #   t*: <count1>: <bytes1> [<count2>: <bytes2>]
                #   t1: <count1>: <bytes1> [<count2>: <bytes2>]
                #     ...
                #   tn: <count1>: <bytes1> [<count2>: <bytes2>]
                header_space = re.compile('^\s*')
                tail_space   = re.compile('\s*$')
                if header_space.match(line):
                    line = header_space.sub('', line)
                if tail_space.match(line):
                    line = tail_space.sub('', line)

                stack_pattern = re.compile('^@\s+(.*)$') 
                m = stack_pattern.match(line)
                if m:
                    stack = m.group(1)
                    continue

                thread_pattern = re.compile('\s*(t(\*|\d+)):\s+(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]$')
                m = thread_pattern.match(line)
                if m:
                    if stack == "":
                        # Still in the header, so this is just a per-thread summary.
                        continue
                    thread = m.group(2)
                    n1, s1, n2, s2 = int(m.group(3)), int(m.group(4)), int(m.group(5)), int(m.group(6))
                    counts = self.AdjustSamples(sample_adjustment, sampling_algorithm,
                                                n1, s1, n2, s2)
                    if thread == "*":
                        self.AddEntries(profile, pcs, self.FixCallerAddresses(stack), counts[index])
                    else:
                        if not thread_profiles.has_key(thread):
                            thread_profiles[thread] = {}
                        self.AddEntries(thread_profiles[thread], pcs, self.FixCallerAddresses(stack), counts[index])

        r = {}
        r['version'] = "heap"
        r['period']  = 1
        r['profile'] = profile
        r['threads'] = thread_profiles
        r['libs']    = self.ParseLibraries(prog, mapping, pcs)
        r['pcs']     = pcs
        return r


    def ReadProfile(self, prog, fname):
        result = {}

        header = self.ReadProfileHeader(fname); 
        if header == "":
            print("Profile is empty.\n")
            sys.exit()

        heap_pattern = re.compile("^heap")
        if heap_pattern.match(header):
            self.profile_type = 'heap'
            result = self.ReadThreadedHeapProfile(self.prog, self.profile_files[0], header)
        else:
            DEBUG("%s:%s: Not supposed to be here!\n"%(sys._getframe().f_back.f_code.co_name,str(sys._getframe().f_lineno)))
            pass

        # if we got symbols along with the profile, return those as well
        # TODO: maybe sometime :) 

        return result;

    def TwoLayerListSort(self, l):
        l.sort(key = operator.itemgetter(1))

    # Run $nm_command and get all the resulting procedure boundaries whose
    # names match "$regexp" and returns them in a hashtable mapping from
    # procedure name to a two-element vector of [start address, end address]
    def GetProcedureBoundariesViaNm(self, escaped_nm_command, regexp):
        global sep_address
        symbol_table = collections.OrderedDict() 

        status, lines = commands.getstatusoutput(escaped_nm_command)
        if status == 256:
            print(lines)
            sys.exit(0)

        last_start = "0"
        routine = ""
        obj_list = lines.splitlines(True)
        for i in range(len(obj_list)):
            # Full line example:
            # 0000000000000000 a _begin
            # 0000000000000000 r __ehdr_start
            # 0000000000000000 A GLIBC_2.2.5
            # 0000000000000000 A GLIBC_2.3
            # 0000000000000000 A GLIBC_2.4
            s = re.sub(r'\r', '', obj_list[i]) # turn windows-looking lines into unix-looking lines
            m = re.match("^\s*([0-9a-f]+) (.) (..*)", s)
            if m:
                start_val, ptype, this_routine = m.group(1), m.group(2), m.group(3)

                # It's possible for two symbols to share the same address, if
                # one is a zero-length variable (like __start_google_malloc) or
                # one symbol is a weak alias to another (like __libc_malloc).
                # In such cases, we want to ignore all values except for the
                # actual symbol, which in nm-speak has type "T".  The logic
                # below does this, though it's a bit tricky: what happens when
                # we have a series of lines with the same address, is the first
                # one gets queued up to be processed.  However, it won't
                # *actually* be processed until later, when we read a line with
                # a different address.  That means that as long as we're reading
                # lines with the same address, we have a chance to replace that
                # item in the queue, which we do whenever we see a 'T' entry --
                # that is, a line with type 'T'.  If we never see a 'T' entry,
                # we'll just go ahead and process the first entry (which never
                # got touched in the queue), and ignore the others.
                if (start_val == last_start) and (re.match("t", ptype, re.IGNORECASE)):
                    # We are the 'T' symbol at this address, replace previous symbol.
                    routine = this_routine
                    continue
                elif start_val == last_start:
                    continue

                if this_routine == sep_symbol:
                    sep_address = self.HexExtend(start_val)

                # Tag this routine with the starting address in case the image
                # has multiple occurrences of this routine.  We use a syntax
                # that resembles template parameters that are automatically
                # stripped out by ShortFunctionName()
                this_routine = this_routine + "<" + start_val + ">"

                # if routine and re.match(("\\" + regexp), routine):
                if routine and re.match(regexp, routine):
                    symbol_table[routine] = [self.HexExtend(last_start), self.HexExtend(start_val)]

                last_start = start_val
                routine = this_routine
            else:
                pass
                                         
        # Handle the last line in the nm output.  Unfortunately, we don't know
        # how big this last symbol is, because we don't know how big the file
        # is.  For now, we just give it a size of 0.
        # if routine and re.match(("\\" + regexp), routine):
        if routine and re.match(regexp, routine):
            symbol_table[routine] = [self.HexExtend(last_start), self.HexExtend(last_start)]

        return symbol_table


    # Gets the procedure boundaries for all routines in "$image" whose names
    # match "$regexp" and returns them in a hashtable mapping from procedure
    # name to a two-element vector of [start address, end address].
    # Will return an empty map if nm is not installed or not working properly.
    def GetProcedureBoundaries(self, image, regexp):
        # If $image doesn't start with /, then put ./ in front of it.  This works
        # around an obnoxious bug in our probing of nm -f behavior.
        # "nm -f $image" is supposed to fail on GNU nm, but if:
        #
        # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND
        # b. you have a.out in your current directory (a not uncommon occurence)
        #
        # then "nm -f $image" succeeds because -f only looks at the first letter of
        # the argument, which looks valid because it's [BbSsPp], and then since
        # there's no image provided, it looks for a.out and finds it.
        
        # This regex makes sure that $image starts with . or /, forcing the -f
        # parsing to fail since . and / are not valid formats.

        # if not (image.startswith('.') or image.startswith('/')):
        #     print("%s:%s: File format incorrect!\n"%(sys._getframe().f_back.f_code.co_name,str(sys._getframe().f_lineno)))
        #     return None
        image = re.sub('^[^/]', './$&', image) 

        # For libc libraries, the copy in /usr/lib/debug contains debugging symbols
        debugging = self.DebuggingLibrary(image)
        if debugging:
            image = debugging

        nm = obj_tool_map['nm']
        cppfilt = obj_tool_map['c++filt']
        # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm
        # binary doesn't support --demangle.  In addition, for OS X we need
        # to use the -f flag to get 'flat' nm output (otherwise we don't sort
        # properly and get incorrect results).  Unfortunately, GNU nm uses -f
        # in an incompatible way.  So first we test whether our nm supports
        # --demangle and -f.
        demangle_flag = ""
        cppfilt_flag  = ""

        to_devnull = " >/dev/null 2>&1" # Need to consier windows situation
        if os.system((self.ShellEscape(nm, "--demangle", "image") + to_devnull)) == 0:
            # In this mode, we do "nm --demangle <foo>"
            demangle_flag = "--demangle"
            cppfilt_flag  = ""
        elif os.system((self.ShellEscape(cppfilt, "", image) + to_devnull)) == 0:
            cppfilt_flag = " | " + self.ShellEscape(cppfilt, "", "")

        flatten_flag = ""
        if os.system((self.ShellEscape(nm, "-f", image) + to_devnull)) == 0:
            flatten_flag = "-f"

        # Finally, in the case $imagie isn't a debug library, we try again with
        # -D to at least get *exported* symbols.  If we can't use --demangle,
        # we use c++filt instead, if it exists on this system.
        nm_commands = [self.ShellEscape(nm, ("-n" + flatten_flag + demangle_flag),
                                        image) + " 2>/dev/null " + cppfilt_flag,
                       self.ShellEscape(nm, ("-D " + "-n" + flatten_flag + demangle_flag),
                                        image) + " 2>/dev/null " + cppfilt_flag,
                       self.ShellEscape("6nm", image, "") + " 2>/dev/null | sort"]

        # If the executable is an MS Windows PDB-format executable, we'll
        # have set up obj_tool_map("nm_pdb").  In this case, we actually
        # want to use both unix nm and windows-specific nm_pdb, since
        # PDB-format executables can apparently include dwarf .o files.
        if obj_tool_map.has_key('nm_pdb'):
            print("%s: Not supposed to be here!"%str(sys._getframe().f_lineno))
            return None

        for nm_command in nm_commands:
            symbol_table = self.GetProcedureBoundariesViaNm(nm_command, regexp)
            if symbol_table:
                return symbol_table

        symbol_table = {}
        return symbol_table

    def ShortFunctionName(self, function):
        f = re.sub(r"\([^()]*\)(\s*const)?", "", function)  # Argument types
        while re.match(r".*<.*>.*", f):
            f = re.sub(r"<[^<>]*>", "", f)      # Remove template arguments

        m = re.match(r"^.*\s+(\w+::)", f)
        if m:
            s = m.group(1)
        else:
            s = ""
        f = re.sub(r"^.*\s+(\w+::)", s, f)  # Remove leading type
        return f

    def MapSymbolsWithNM(self, image, offset, pclist, symbols):
        symbol_table = collections.OrderedDict() 
        # Get nm output sorted by increasing address
        symbol_table = self.GetProcedureBoundaries(image, ".")
        if symbol_table:
            pass
        else:
            return 0

        sort_symbol = sorted(symbol_table.items(), key=lambda x: x[1][0])
        names = [items[0] for items in sort_symbol]
        if len(names) < 0:
            # No symbols: just use addresses
            for pc in pclist:
                pcstr = "0x" + pc
                symbols[pc] = [pcstr, "?", pcstr]
            return 0

        # Sort addresses so we can do a join against nm output
        index = 0
        fullname = names[0]
        name = self.ShortFunctionName(fullname)
        pclist.sort()
        
        for pc in pclist:
            # Adjust for mapped offset
            mpc = self.AddressSub(pc, offset)
            while index < (len(names) - 1) and mpc >= symbol_table[fullname][1]:
                index += 1
                fullname = names[index]
                name = self.ShortFunctionName(fullname)

            if mpc < symbol_table[fullname][1]:
                symbols[pc] = [name, "?", fullname]
            else:
                pcstr = "0x" + pc
                symbols[pc] = [pcstr, "?", pcstr]

        return 1

    def MapToSymbols(self, image, offset, pclist, symbols):
        debug = 0
        global sep_address

        # Ignore empty binaries
        if len(pclist) == 0:
            return

        addr2line = obj_tool_map['addr2line']
        cmd = self.ShellEscape(addr2line, "-f -C -e", image)

        # If "addr2line" isn't installed on the system at all, just use
        # nm to get what info we can (function names, but not line numbers).
        # TODO: Not needed so far


        # "addr2line -i" can produce a variable number of lines per input
        # address, with no separator that allows us to tell when data for
        # the next address starts.  So we find the address for a special
        # symbol (_fini) and interleave this address between all real
        # addresses passed to addr2line.  The name of this special symbol
        # can then be used as a separator.
        sep_address = 0   # May be filled in by MapSymbolsWithNM()
        nm_symbols = collections.OrderedDict() 
        self.MapSymbolsWithNM(image, offset, pclist, nm_symbols)
        if sep_address:
            # Only add " -i" to addr2line if the binary supports it.
            # addr2line --help returns 0, but not if it sees an unknown flag first.
            if os.system(cmd + " -i --help >/dev/null 2>&1") == 0:
                cmd += " -i"
            else:
                sep_address = None  # no need for sep_address if we don't support -i

        # Make file with all PC values with intervening 'sep_address' so
        # that we can reliably detect the end of inlined function list
        with open(self.tmpfile_sym, mode = 'w') as f:
            if debug:
                print("---- $image ---\n")

            for i in range(len(pclist)):
                f.write(self.AddressSub(pclist[i], offset) + "\n")
                if sep_address:
                    f.write(sep_address + "\n")

            if debug:
                print("----\n")
                os.system("cat", self.tmpfile_sym)
                print("----\n")
                os.system("cmd < " + self.ShellEscape(self.tmpfile_sym))
                print("----\n")

        count = 0
        status, lines = commands.getstatusoutput((cmd + " <" + self.ShellEscape(self.tmpfile_sym, '', '')))
        if status == 256:
            print(lines)
            sys.exit(0)
        res_list = lines.splitlines(True)
        for i in range(len(res_list)):
            if i % 2 == 1:
                continue
            # Read fullfunction and filelineinfo from next pair of lines
            fullfunction = re.sub('\r?\n$', '', res_list[i])
            filelinenum  = re.sub('\r?\n$', '', res_list[i+1])

            if sep_address and (fullfunction == sep_symbol):
                count += 1
                continue

            filelinenum = re.sub(r'\\', '/', filelinenum) # turn windows-style paths into unix-style paths
            
            pcstr = pclist[count]
            function = self.ShortFunctionName(fullfunction)
            if nm_symbols.has_key(pcstr):
                nms = nm_symbols[pcstr]
                if fullfunction == '??':
                    # nm found a symbol for us.
                    function = nms[0]
                    fullfunction = nms[2]
                else:
                    # MapSymbolsWithNM tags each routine with its starting address,
	            # useful in case the image has multiple occurrences of this
	            # routine.  (It uses a syntax that resembles template paramters,
	            # that are automatically stripped out by ShortFunctionName().)
	            # addr2line does not provide the same information.  So we check
	            # if nm disambiguated our symbol, and if so take the annotated
	            # (nm) version of the routine-name.  TODO(csilvers): this won't
	            # catch overloaded, inlined symbols, which nm doesn't see.
	            # Better would be to do a check similar to nm's, in this fn.
                    if re.match('^'+re.escape(function), nms[2]):  # sanity check it's the right fn
                        function = nms[0]
                        fullfunction = nms[2]

            # Prepend to accumulated symbols for pcstr
            # (so that caller comes before callee)
            if symbols.has_key(pcstr):
                sym = symbols[pcstr]
            else:
                sym = []
                symbols[pcstr] = sym
            sym.insert(0, fullfunction)
            sym.insert(0, filelinenum)
            sym.insert(0, function)
            if not sep_address:
                # Inlining is off, so this entry ends immediately
                count += 1


    # Extract symbols for all PC values found in profile
    def ExtractSymbols(self, libs, pcset):
        symbols = collections.OrderedDict() 

        # Map each PC value to the containing library.  To make this faster,
        # we sort libraries by their starting pc value (highest first), and
        # advance through the libraries as we advance the pc.  Sometimes the
        # addresses of libraries may overlap with the addresses of the main
        # binary, so to make sure the libraries 'win', we iterate over the
        # libraries in reverse order (which assumes the binary doesn't start
        # in the middle of a library, which seems a fair assumption).
        pcs = pcset.keys()
        pcs.sort()
        self.TwoLayerListSort(libs)
        libs.reverse()
        for l in libs:
            libname, start, finish, offset = l[0], l[1], l[2], l[3]

            # Use debug library if it exists
            debug_libname = self.DebuggingLibrary(libname)
            if debug_libname:
                libname = debug_libname

            # Get list of pcs that belong in this library.
            contained = []
            start_pc_index = 0
            finish_pc_index = 0

            # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index].
            for i in range(len(pcs)-1, -1, -1):
                if pcs[i] <= finish:
                    finish_pc_index = i + 1
                    break

            # Find smallest start_pc_index such that $start <= $pc[$start_pc_index].
            for i in range(finish_pc_index - 1, -1, -1):
                if pcs[i] < start:
                    start_pc_index = i + 1
                    break;
            # This keeps PC values higher than $pc[$finish_pc_index] in @pcs,
            # in case there are overlaps in libraries and the main binary.
            contained = pcs[start_pc_index:finish_pc_index]
	    pcs = self.ListSplice(pcs, start_pc_index, finish_pc_index)

            self.MapToSymbols(libname, self.AddressSub(start, offset), contained, symbols)
            
        return symbols

    def MergeSymbols(self):
        pass

    def FilterAndPrint(self, profile, symbols, libs):
        # Get total data in profile
        total = self.TotalProfile(profile)

        # Remove uniniteresting stack items
        profile = self.RemoveUninterestingFrames(symbols, profile)

        calls = self.ExtractCalls(symbols, profile)

        # Reduce profiles to required output granularity, and also clean
        # each stack trace so a given entry exists at most once.
        reduced = self.ReduceProfile(symbols, profile)

        # Get derived profiles
        flat = self.FlatProfile(reduced)
        cumulative = self.CumulativeProfile(reduced)

        # Print
        # NOTE: Currently only supports text mode output.
        if total != 0:
            print("Total: %s %s"%(self.Unparse(total), self.Units()))
        self.PrintText(symbols, flat, cumulative, -1)

    # Get total count in profile
    def TotalProfile(self, profile):
        result = 0
        for k in profile:
            result += profile[k]
        return result

    def FilterFrames(self, symbols, profile):
        return profile;

    def RemoveUninterestingFrames(self, symbols, profile):
        # List of function names to skip
        skip = {}
        skip_regexp = re.compile('NOMATCH')

        # if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') {
        if 1:
            for name in ('calloc',
                         'cfree',
                         'malloc',
                         '__libc_malloc',
                         '__libc_calloc',
                         '__libc_realloc',
                         'newImpl',
                         'void* newImpl',
                         'free',
                         'memalign',
                         'posix_memalign',
                         'aligned_alloc',
                         'pvalloc',
                         'valloc',
                         'realloc',
                         'mallocx',
                         'rallocx',
                         'xallocx',
                         'dallocx',
                         'sdallocx',
                         'tc_calloc',
                         'tc_cfree',
                         'tc_malloc',
                         'tc_free',
                         'tc_memalign',
                         'tc_posix_memalign',
                         'tc_pvalloc',
                         'tc_valloc',
                         'tc_realloc',
                         'tc_new',
                         'tc_delete',
                         'tc_newarray',
                         'tc_deletearray',
                         'tc_new_nothrow',
                         'tc_newarray_nothrow',
                         'do_malloc',
                         '::do_malloc',   # new name -- got moved to an unnamed ns
                         '::do_malloc_or_cpp_alloc',
                         'DoSampledAllocation',
                         'simple_alloc::allocate',
                         '__malloc_alloc_template::allocate',
                         '__builtin_delete',
                         '__builtin_new',
                         '__builtin_vec_delete',
                         '__builtin_vec_new',
                         'operator new',
                         'operator new[]',
                         # The entry to our memory-allocation routines on OS X
                         'malloc_zone_malloc',
                         'malloc_zone_calloc',
                         'malloc_zone_valloc',
                         'malloc_zone_realloc',
                         'malloc_zone_memalign',
                         'malloc_zone_free',
                         # These mark the beginning/end of our custom sections
                         '__start_google_malloc',
                         '__stop_google_malloc',
                         '__start_malloc_hook',
                         '__stop_malloc_hook') :
                skip[name] = 1
                skip['_' + name] = 1   # Mach (OS X) adds a _ prefix to everything
            # TODO: Remove TCMalloc once everything has been
            # moved into the tcmalloc:: namespace and we have flushed
            # old code out of the system.
            skip_regexp = re.compile('TCMalloc|^tcmalloc::')

        result = {}
        for k in profile:
            count = profile[k]
            addrs = k.split('\n');
            path = []
            for a in addrs:
                if symbols.has_key(a):
                    func = symbols[a][0]
                    if skip.has_key(func) or skip_regexp.match(func):
                        # Throw away the portion of the backtrace seen so far, under the
                        # assumption that previous frames were for functions internal to the
                        # allocator.
                        path = []
                        continue
                path.append(a)
            reduced_path = '\n'.join(path)
            self.AddEntry(result, reduced_path, count)

        result = self.FilterFrames(symbols, result)
        return result

    def ExtractCalls(self, symbols, profile):
        pass

    # Generate flattened profile:
    # If count is charged to stack [a,b,c,d], in generated profile,
    # it will be charged to [a]
    def FlatProfile(self, profile):
        result = {};
        for k in profile:
            count = profile[k]
            addrs = k.split('\n');
            if len(addrs) >= 0 :
                self.AddEntry(result, addrs[0], count)
        return result

    # Generate cumulative profile:
    # If count is charged to stack [a,b,c,d], in generated profile,
    # it will be charged to [a], [b], [c], [d]
    def CumulativeProfile(self, profile):
        result = {}
        for k in profile:
            count = profile[k]
            addrs = k.split('\n')
            for a in addrs:
                self.AddEntry(result, a, count)
        return result

    # Provides a map from fullname to shortname for cases where the
    # shortname is ambiguous.  The symlist has both the fullname and
    # shortname for all symbols, which is usually fine, but sometimes --
    # such as overloaded functions -- two different fullnames can map to
    # the same shortname.  In that case, we use the address of the
    # function to disambiguate the two.  This function fills in a map that
    # maps fullnames to modified shortnames in such cases.  If a fullname
    # is not present in the map, the 'normal' shortname provided by the
    # symlist is the appropriate one to use.
    def FillFullnameToShortnameMap(self, symbols, fullname_to_shortname_map):
        shortnames_seen_once = {}
        shortnames_seen_more_than_once = {}

        skip_regexp = re.compile('<[0-9a-fA-F]+>$')
        for symlist in symbols.values():
            # TODO(csilvers): deal with inlined symbols too.
            shortname = symlist[0]
            fullname = symlist[2]

            if not skip_regexp.match(fullname):
                continue
            if shortnames_seen_once.has_key(shortname) and \
               shortnames_seen_once[shortname] != fullname:
                shortnames_seen_more_than_once[shortname] = 1
            else:
                shortnames_seen_once[shortname] = fullname

        want_regexp = re.compile('<0*([^>]*)>$')
        for symlist in symbols.values():
            shortname = symlist[0]
            fullname = symlist[2]
            # TODO(csilvers): take in a list of addresses we care about, and only
            # store in the map if $symlist->[1] is in that list.  Saves space.
            if fullname_to_shortname_map.has_key(fullname):
                continue
            if shortnames_seen_more_than_once.has_key(shortname):
                result = want_regexp.match(fullname)
                if result:
                    fullname_to_shortname_map[fullname] = shortname + '@' +result.group(1)

    # Translate a stack of addresses into a stack of symbols
    def TranslateStack(self, symbols, fullname_to_shortname_map, k):
        addrs = k.split('\n')
        result = []

        for a in addrs:
            # Skip large addresses since they sometimes show up as fake entries on RH9
            if len(a) > 8 and a > '7fffffffffffffff':
                continue

            # if main::opt_disasm or $main::opt_list:
            if 0:
                # We want just the address for the key
                result.append(a)
                continue

            if not symbols.has_key(a):
                symlist = [a, '', a]
            else:
                symlist = symbols[a]

            # We can have a sequence of symbols for a particular entry
            # (more than one symbol in the case of inlining).  Callers
            # come before callees in symlist, so walk backwards since
            # the translated stack should contain callees before callers.
            for j in range(len(symlist) - 1, 1, -3):
                func = symlist[j - 2]
                fileline = symlist[j -1]
                fullfunc = symlist[j]
                if fullname_to_shortname_map.has_key(fullfunc):
                    func = fullname_to_shortname_map[fullfunc]
                if j > 2:
                    func = func + ' (inline)'

                result.append(func)

        return result


    # Reduce profile to granularity given by user
    def ReduceProfile(self, symbols, profile):
        result = {}
        fullname_to_shortname_map = {}
        self.FillFullnameToShortnameMap(symbols, fullname_to_shortname_map)

        for k in profile:
            count = profile[k]
            translated = self.TranslateStack(symbols, fullname_to_shortname_map, k)
            path = []
            seen = {'' : 1} # So that empty keys are skipped
            for e in translated:
                # To avoid double-counting due to recursion, skip a stack-trace
                # entry if it has already been seen
                if not seen.has_key(e):
                    seen[e] = 1
                    path.append(e)

            reduced_path = '\n'.join(path)
            self.AddEntry(result, reduced_path, count)

        return result


    # Generate percent string for a number and a total 
    def Percent(self, num, tot):
        if tot != 0:
            return '%.1f%%' % (num * 100.0 / tot)
        else:
            return ((('+inf', '-inf') [num > 0]), 'nan') [num == 0]

    # Generate pretty-printed form of number
    def Unparse(self, num):
        if self.opt_show_bytes:
            return "%d" % num
        else:
            return "%.1f" % (num / 1048576.0)

    # Return output units 
    def Units(self):
        if self.opt_show_bytes:
            return "B"
        else:
            return "MB"
              

    # Print text output
    def PrintText(self, symbols, flat_profile, cum_profile, line_limit):
        total = self.TotalProfile(flat_profile)

        s = flat_profile

        running_sum = 0
        lines = 0

        # sort key by descending value then by itself with same value.
        for i in sorted(cum_profile.items(), cmp=lambda x,y:cmp(self.GetEntry(s, y[0]), self.GetEntry(s, x[0])) or cmp(x[0], y[0])):
            k = i[0]
            f = self.GetEntry(flat_profile, k)
            c = self.GetEntry(cum_profile, k)
            running_sum += f

            sym = k
            if symbols.has_key(k):
                sym = symbols[k][0] + ' ' + symbols[k][1]

            if f != 0 or c != 0:
                print("%8s %6s %6s %8s %6s %s" % (self.Unparse(f), self.Percent(f, total), self.Percent(running_sum, total), self.Unparse(c), self.Percent(c, total), sym))

            lines += 1
            if line_limit >= 0 and lines >= line_limit:
                break

    def cleanup(self):
        if os.path.exists(self.tmpfile_sym):
            os.unlink(self.tmpfile_sym)

    def MainProcess(self):
        self.tmpfile_sym = "/tmp/jeprof" + str(os.getpid()) + ".sym"
        self.tmpfile_ps  = "/tmp/jeprof" + str(os.getpid())

        self.ParamParser()

        # Fetch all profile data
        self.FetchDynamicProfiles()
        
        # this will hold symbols that we read from the profile files
        symbol_map = {}

        data = {}
        # Read one profile, pick the last item on the list
        if len(self.profile_files) == 1:
            data = self.ReadProfile(self.prog, self.profile_files[0]) 
            profile = data['profile']
            pcs     = data['pcs']
            libs    = data['libs']   # Info about main program and shared libraries
            #TODO: Not needed sofar
            #symbol_map = self.MergeSymbols()
        else:
            # Add additional profiles, if available.
            # reference to perl script
            # Not needed for us so far
            DEBUG("%s:%s: Not supposed to be here!\n"%(sys._getframe().f_back.f_code.co_name,str(sys._getframe().f_lineno)))
            pass

        # Collect symbols
        symbols = {}
        if self.use_symbolized_profile:
            pass
        elif self.use_symbol_page:
            pass
        else:
            symbols = self.ExtractSymbols(libs, pcs)
        
        if not self.opt_thread:
            self.FilterAndPrint(profile, symbols, libs)

	# TODO: per-thread show if needed

	self.cleanup()

def main():
    jp = JEPROF()
    jp.MainProcess()

if __name__ == "__main__":
    main()


