import datetime
import logging
import os
import sys
from pathlib import Path
from subprocess import PIPE, Popen, check_output
from tempfile import gettempdir, mkdtemp

import click
from advisor_logging import check, log, log_process, progress


@click.command()
# Required arguments
@click.option('--path', '-p', help='Absolute path to the Devito executable.',
              required=True)
# Optional arguments
@click.option('--exec-args', type=click.UNPROCESSED, default='',
              help='Arguments passed to the executable.')
@click.option('--output', '-o', help='A directory for storing profiling reports. '
                                     'The directory is created if it does not exist. '
                                     'If unspecified, reports are stored within '
                                     'a temporary directory.')
@click.option('--name', '-n', help='A unique name identifying the run. '
                                   'If unspecified, a name is generated joining '
                                   'the executable name with the options specified '
                                   'in --exec-args (if any).')
def run_with_advisor(path, output, name, exec_args):
    path = Path(path)
    check(path.is_file(), f'{path} not found')
    check(path.suffix == '.py', f'{path} not a Python file')

    # Create a directory to store the profiling report
    if name is None:
        name = path.stem
        if exec_args:
            name = f"{name}_{''.join(exec_args.split())}"
    if output is None:
        output = Path(gettempdir()).joinpath('devito-profilings')
        output.mkdir(parents=True, exist_ok=True)
    else:
        output = Path(output)
    if name is None:
        output = Path(mkdtemp(dir=str(output), prefix=f"{name}-"))
    else:
        output = Path(output).joinpath(name)
        output.mkdir(parents=True, exist_ok=True)

    # advixe-cl and icx should be available through Intel oneAPI
    # (tested with Intel oneAPI 2025.1)
    try:
        ret = check_output(['advixe-cl', '--version']).decode("utf-8")
        log(f"Found advixe-cl version: {ret.strip()}\n")
    except FileNotFoundError:
        check(False, "Error: Couldn't detect `advixe-cl` to run Intel Advisor."
              " Please source the Advisor environment.")

    try:
        ret = check_output(['icx', '--version']).decode("utf-8")
        log(f"Found icx version: {ret.strip()}\n")
    except FileNotFoundError:
        check(False, "Error: Couldn't detect Intel Compiler (icx)."
              " Please source the Intel oneAPI compilers.")

    # All good, Intel compiler and advisor are available
    os.environ['DEVITO_ARCH'] = 'icx'

    # Tell Devito to instrument the generated code for Advisor
    os.environ['DEVITO_PROFILING'] = 'advisor'

    # Devito Logging is disabled unless the user asks explicitly to see it
    devito_logging = os.environ.get('DEVITO_LOGGING')
    if devito_logging is None:
        os.environ['DEVITO_LOGGING'] = 'WARNING'

    with progress('Setting up multi-threading environment with OpenMP'):
        # Roofline analyses are recommended with threading enabled
        os.environ['DEVITO_LANGUAGE'] = 'openmp'

        # Thread pinning is strongly recommended for reliable results.
        # This script is using numactl for this purpose. Users may want to set their
        # own pinning: https://hpc-wiki.info/hpc/Binding/Pinning
        try:
            ret = check_output(['numactl', '--show']).decode("utf-8")
            ret = dict(i.split(':') for i in ret.split('\n') if i)
            n_sockets = len(ret['cpubind'].split())
        except FileNotFoundError:
            check(False, "Couldn't detect `numactl`, necessary for thread pinning.")

        # Prevent NumPy from using threads, which otherwise leads to a deadlock when
        # used in combination with Advisor. This issue has been described at:
        #   `software.intel.com/en-us/forums/intel-advisor-xe/topic/780506`
        # Note: we should rather sniff the BLAS library used by NumPy, and set the
        # appropriate env var only
        os.environ['OPENBLAS_NUM_THREADS'] = '1'
        os.environ['MKL_NUM_THREADS'] = '1'
        # Note: `Numaexpr`, used by NumPy, also employs threading, so we shall disable
        # it too via the corresponding env var. See:
        #   `stackoverflow.com/questions/17053671/python-how-do-you-stop-numpy-from-multithreading`  # noqa
        os.environ['NUMEXPR_NUM_THREADS'] = '1'

    # To build a roofline with Advisor, we need to run two analyses back to
    # back, `survey` and `tripcounts`.

    numactl_cmd = [
        'numactl',
        '--cpunodebind=0'
    ]
    advisor_cmd = [
        'advixe-cl',
        '-data-limit=500',
        '-project-dir', str(output),
        '-search-dir src:r=%s' % gettempdir(),  # Root directory where Devito stores the generated code  # noqa
    ]
    advisor_survey = [
        '-collect survey',
        '-run-pass-thru=--no-altstack',  # Avoids `https://software.intel.com/en-us/vtune-amplifier-help-error-message-stack-size-is-too-small`  # noqa
        '-run-pass-thru=-timestamp=sys',  # Avoids 'VTune Amplifier may detect which timer source to use incorrectly on Intel® Xeon® processor E5-XXXX processors (200287361)' # noqa
        '-strategy ldconfig:notrace:notrace',  # Avoids `https://software.intel.com/en-us/forums/intel-vtune-amplifier-xe/topic/779309`  # noqa
        '-start-paused',  # The generated code will enable/disable Advisor on a loop basis according to the decorated pragmas  # noqa
    ]
    advisor_flops = [
        '--collect=tripcounts',
        '--enable-cache-simulation', # Switch to '-enable-cache-simulation' for a CARM roofline model `https://software.intel.com/content/www/us/en/develop/articles/integrated-roofline-model-with-intel-advisor.html`  # noqa
        '--flop',
        '--stacks',
        '--collect=map',
        '-start-paused',
    ]
    py_cmd = [sys.executable, str(path)] + exec_args.split()

    # Before collecting the `survey` and `tripcounts` a "pure" python run
    # to warmup the jit cache is preceded

    log(f'Starting Intel Advisor\'s `roofline` analysis for `{name}`')
    dt = datetime.datetime.now()

    # Set up a file logger that will track the output of the advisor profiling
    advixe_logger = logging.getLogger('run_advisor_logger')
    advixe_logger.setLevel(logging.INFO)

    advixe_formatter = logging.Formatter('%(asctime)s: %(message)s')
    logger_datetime = f'{dt.year}.{dt.month}.{dt.day}.{dt.hour}.{dt.minute}.{dt.second}'
    advixe_handler = logging.FileHandler(f'{output}/{name}_{logger_datetime}.log')
    advixe_handler.setFormatter(advixe_formatter)
    advixe_logger.addHandler(advixe_handler)

    log(f"Project folder: {output}")
    log(f"Logging progress in: `{advixe_handler.baseFilename}`")

    with progress('Performing `cache warm-up` run'):
        try:
            p_warm_up = Popen(py_cmd, stdout=PIPE, stderr=PIPE)
            log_process(p_warm_up, advixe_logger)
        except OSError:
            check(False, 'Failed!')

    with progress('Performing `survey` analysis'):
        cmd = numactl_cmd + ['--'] + advisor_cmd + advisor_survey + ['--'] + py_cmd
        try:
            p_survey = Popen(cmd, stdout=PIPE, stderr=PIPE)
            log_process(p_survey, advixe_logger)
        except OSError:
            check(False, 'Failed!')

    with progress('Performing `tripcounts` analysis'):
        cmd = numactl_cmd + ['--'] + advisor_cmd + advisor_flops + ['--'] + py_cmd
        try:
            p_tripcounts = Popen(cmd, stdout=PIPE, stderr=PIPE)
            log_process(p_tripcounts, advixe_logger)
        except OSError:
            check(False, 'Failed!')

    log(f'Storing `survey` and `tripcounts` data in `{output}`')
    log('To plot a roofline type: ')
    log(f'python3 roofline.py --name {name} --project {output} --scale {n_sockets}')

    log('\nTo open the roofline using advixe-gui: ')
    log(f'advixe-gui {output}')


if __name__ == '__main__':
    run_with_advisor()
