#!/usr/bin/env python3
# Copyright Pivotal 2014


import glob
import os
import re
import shutil
import stat
import sys
from optparse import OptionParser
from subprocess import Popen, PIPE, STDOUT


def _getPlatformInfo():
    if which('lsb_release') is None:
        for file in glob.glob('/etc/*release'):
            shutil.copy(file, '.')
    else:
        Popen('{} -a > ./lsb_release.out'.format(which('lsb_release')),
              shell=True, stderr=PIPE)

    if os.path.exists('/etc/gpdb-appliance-version'):
        shutil.copy('/etc/gpdb-appliance-version', '.')

    Popen('uname -r > uname.out', shell=True)


def _getFileInfo(coreFile):
    file_cmd = which('file')
    if file_cmd is None:
        raise Exception("cannot find file command")

    cmd = Popen([file_cmd, '--version'], stdout=PIPE, stderr=STDOUT)
    fileVersion = cmd.communicate()[0].split()[0].strip().decode()
    # file allow setting parameters from command line from version 5.21, refer:
    # https://github.com/file/file/commit/6ce24f35cd4a43c4bdd249e8e0c4952c1f8eac67
    # Set ELF program sections processed for core files to suppres "too many
    # program headers" output
    opts = [file_cmd]
    if fileVersion >= 'file-5.21':
        opts += ['-P', 'elf_phnum=2048']
    opts += [coreFile]
    cmd = Popen(opts, stdout=PIPE)
    return cmd.communicate()[0].decode()


def _isCore(fileCmdOutput):
    if fileCmdOutput.find('LSB core file') is -1:
        return False
    return True


def _findBinary(fileCmdOutput):
    # execfn: '/path/to/postgres'
    field = str.find(fileCmdOutput, 'execfn')
    # if 'execfn' field is not found, search for 'from' field instead
    if field < 0:
        # from: /path/to/postgres
        # from: postgres: 5432, ...
        field = str.find(fileCmdOutput, 'from')
    # if 'from' field is still missing, search for any single-quoted string
    if field < 0:
        field = 0
    start = str.find(fileCmdOutput, "'", field) + 1
    end = str.find(fileCmdOutput, "'", start)
    if start <= 0 or end < 0:
        return None
    cmd = fileCmdOutput[start:end]
    # special characters can be correctly handled in abs format, no need to
    # remove them
    if os.path.isabs(cmd):
        return cmd
    # otherwise try to search with the process name, punctuations like ':'
    # should be removed
    cmd = cmd.split()[0].translate(None, str.punctuation)
    return which(cmd)


def _getLibraryListWithLDD(binary):
    # We manually seed this with a few libraries that are missed
    # This may not be needed for all processes, but will round out the
    # postgres binary debugging
    # TODO: Look at ways to distinguish a 32 vs. 64 bit executable
    libraries = [
        # on centos
        '/lib64/libgcc_s.so.1',
        '/lib64/libnss_files.so.2',
        '/lib/libgcc_s.so.1',
        '/lib/libnss_files.so.2',

        # on ubuntu
        '/lib/x86_64-linux-gnu/libgcc_s.so.1',
        '/lib/x86_64-linux-gnu/libnss_files.so.2',
        '/usr/lib32/libgcc_s.so.1',
        '/lib32/libnss_files.so.2',
        '/usr/libx32/libgcc_s.so.1',
        '/libx32/libnss_files.so.2',
    ]

    ldd = which('ldd')
    if ldd:
        args = [ldd]
    else:
        # simulate ldd with ld-linux.so
        args = ['/lib64/ld-linux-x86-64.so.2', '--list']

    ldd_output = Popen(args + [binary], stdout=PIPE)
    for line in ldd_output.stdout:
        match = re.search(r'(\S+) \(0x', line.decode())
        if match and match.group(1):
            libraries.append(match.group(1).strip())
    return libraries


def _getLibraryListWithGDB(coreFile, binary):
    gdb = which('gdb')
    if gdb is None:
        return False

    libraries = []
    # fix for issues with PYTHONPATH and PYTHONHOME
    environ = os.environ.copy()
    for key in ('PYTHONHOME', 'PYTHONPATH', 'LD_LIBRARY_PATH'):
        if key in environ:
            del environ[key]

    cmd = Popen([gdb,
                 '--batch',     # exit after processing options
                 '--nx',        # do not read any .gdbinit files
                 '--eval-command=info sharedlibrary',
                                # list shared libraries explicitly
                 '-c', coreFile,
                 binary],
                stdout=PIPE, stderr=PIPE, env=environ)
    result = cmd.communicate()[0].decode()

    # gdb output looks like below:
    #
    #     ...
    #     (gdb) info sharedlibrary
    #     From        To          Syms Read  Shared Object Library
    #     0x00001000  0x00001234  Yes (*)    /path/to/liba.so.1.0
    #     0x00002000  0x00002234  Yes (*)    /path/to/libb.so.1.0
    #     0x00003000  0x00003234  Yes        /path/to/libc.so.1.0
    #     (*): Shared library is missing debugging information.
    #
    # to get the list we first search for the header line, then collect all the
    # path strings in following lines.
    header = False
    for line in result.splitlines():
        if header:
            begin = line.find(os.path.sep)
            if begin >= 0:
                libraries.append(line[begin:])
        elif 'Shared Object Library' in line:
            header = True

    return libraries


def _copyFilePath(src, dst):
    srcDir = os.path.dirname(src)
    if srcDir.find('/') is 0:
        srcDir = srcDir[1:]
    dstDir = os.path.join(dst, srcDir)
    if not os.path.exists(dstDir):
        os.makedirs(dstDir)
    shutil.copy(src, dstDir)


def _generateGDBScript(b, c):
    with open('runGDB.sh', 'w') as f1:
        print('''\
#!/bin/bash
unset PYTHONHOME
unset PYTHONPATH
curDIR=`pwd`
/usr/bin/gdb \\
    --eval-command="set sysroot $curDIR" \\
    --eval-command="core {core}" \\
    {binary} \\
    "$@"
'''.format(core=c, binary=b), file=f1)
    os.chmod('runGDB.sh', 0o0755)


# This is taken from Python 3.3:
def which(cmd, mode=os.F_OK | os.X_OK, path=None):
    """Given a command, mode, and a PATH string, return the path which
    conforms to the given mode on the PATH, or None if there is no such
    file.

    `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
    of os.environ.get("PATH"), or can be overridden with a custom search
    path.

    """
    # Check that a given file can be accessed with the correct mode.
    # Additionally check that `file` is not a directory, as on Windows
    # directories pass the os.access check.
    def _access_check(fn, mode):
        return (os.path.exists(fn) and os.access(fn, mode)
                and not os.path.isdir(fn))

    # If we're given a path with a directory part, look it up directly rather
    # than referring to PATH directories. This includes checking relative to the
    # current directory, e.g. ./script
    if os.path.dirname(cmd):
        if _access_check(cmd, mode):
            return cmd
        return None

    if path is None:
        path = os.environ.get("PATH", os.defpath)
        if not path:
            return None
        path = path.split(os.pathsep)

    if sys.platform == "win32":
        # The current directory takes precedence on Windows.
        if not os.curdir in path:
            path.insert(0, os.curdir)

        # PATHEXT is necessary to check on Windows.
        pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
        # See if the given file matches any of the expected path extensions.
        # This will allow us to short circuit when given "python.exe".
        # If it does match, only test that one, otherwise we have to try
        # others.
        if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
            files = [cmd]
        else:
            files = [cmd + ext for ext in pathext]
    else:
            # On other platforms you don't have things like PATHEXT to tell you
            # what file suffixes are executable, so just pass on cmd as-is.
        files = [cmd]

    seen = set()
    for dir in path:
        normdir = os.path.normcase(dir)
        if not normdir in seen:
            seen.add(normdir)
            for thefile in files:
                name = os.path.join(dir, thefile)
                if _access_check(name, mode):
                    return name
    return None


def packCoreFile(coreFile, binary):
    packDir = './packcore-' + os.path.basename(coreFile)
    packTarball = packDir + '.tgz'
    oldDir = os.getcwd()
    os.mkdir(packDir)
    try:
        os.chdir(packDir)
        shutil.copy(coreFile, '.')
        _getPlatformInfo()
        shutil.copy(binary, '.')

        libraries = _getLibraryListWithGDB(coreFile, binary)

        if libraries is False:
            libraries = _getLibraryListWithLDD(binary)

        for lib in libraries:
            try:
                _copyFilePath(lib, '.')
            except IOError:
                continue

        _generateGDBScript(os.path.basename(binary), os.path.basename(coreFile))
        os.chdir(oldDir)
        cmd = Popen(['tar', 'zcf', packTarball, packDir])
        cmd.wait()
    except Exception as e:
        Popen(['rm', '-rf', packTarball])
        raise e
    finally:
        os.chdir(oldDir)
        Popen(['rm', '-rf', packDir])


def parseArgs():
    u = '''%prog [options] core_file
This will create an archive with the core file and all required
libraries for analysis.  The preference is to use GDB so that we can
resolve dependencies for extensions.'''

    parser = OptionParser(version='%prog: $Revision: #1 $', usage=u)
    parser.add_option('-b', '--binary', action='store', type='string', dest='binary', metavar='PROGRAMME', help='The full path to the binary that created the core file.  Used when packcore cannot determine the source binary')
    (option, args) = parser.parse_args()
    if len(args) != 1:
        parser.error('Please specify a core file')
        sys.exit(1)
    return (option, args)


def main():
    # Check python vesion
    if sys.hexversion < 0x020600f0:
        sys.stderr.write('packcore requires a minimum python version of 2.6.  Current version is:\n' + sys.version)
        sys.exit(1)

    (options, args) = parseArgs()

    coreFile = os.path.abspath(args[0])
    fileCmd = _getFileInfo(coreFile)
    if not _isCore(fileCmd):
        sys.stderr.write(args[0] + ' is not a valid core file\n')
        sys.exit(1)

    if options.binary:
        binary = which(options.binary)
    else:
        binary = _findBinary(fileCmd)

    if not binary:
        sys.stderr.write("Unable to find full path to binary for core file\n")
        sys.exit(1)

    packCoreFile(coreFile, binary)
    sys.exit(0)


if __name__ == "__main__":
    main()
