#!/usr/bin/env python3
#  Copyright (c) 2015 - 2023, Intel Corporation
#  SPDX-License-Identifier: BSD-3-Clause

"""
Usage: test_license [-u update_dir] [repo_dir]
    repo_dir - The repository directory to test, defaults to '.'.
    update_dir - The directory containing updated license headers.

Assumes a repository has a sub-directory called copying_headers which
is populated with the files that describe the license headers. These
files are copying_headers/MANIFEST.* and copying_headers/header.*.

The MANIFEST.* sub-manifest files and the header.* license header
files have extensions based on the license tag that determines the
license header required for each of the files listed the sub-manifest.

Additionally there is a file called MANIFEST.EXEMPT which lists all
files that do not require license headers.

Currently no support for the -u option.
"""


import sys
import os
import re
import getopt
import itertools

__version__ = '0.0.1'

# maximum number of lines to search for license header
DEFAULT_SEARCH_LENGTH = 1024

class CommandLineError(Exception):
    def __init__(self, message):
        self.message = message
        self.errno = 1
    def __str__(self):
        return '{0}\nFor help run: {1} --help\n'.format(self.message, sys.argv[0])

class ManifestIncompleteError(Exception):
    def __init__(self, paths):
        self.paths = [str(pp) for pp in paths]
        self.paths.sort()
        self.errno = 2
    def __str__(self):
        return 'Files missing from license manifest:\n    {0}'.format('\n    '.join(self.paths))

class ManifestDoubleCountedError(Exception):
    def __init__(self, paths):
        self.paths = [str(pp) for pp in paths]
        self.paths.sort()
        self.errno = 2
    def __str__(self):
        return 'Files accounted for in more than one license manifest:\n    {0}'.format('\n    '.join(self.paths))

class MissingHeaderError(Exception):
    def __init__(self, path):
        self.path = str(path)
        self.errno = 3
    def __str__(self):
        return 'File does not contain license header: "{0}"'.format(self.path)

class BadRepoError(Exception):
    def __init__(self, path):
        self.path = str(path)
        self.errno = 4
    def __str__(self):
        return 'Unable to find file(s): {0}'.format(self.path)

class LicenseHeaderFormatError(Exception):
    def __init__(self, msg, path):
        self.msg = msg
        self.path = path
        self.errno = 5
    def __str__(self):
        return '{0}:\n{1}'.format(self.msg, self.path)

class FilterBorg(object):
    _sharedState = {}
    def __init__(self):
        self.__dict__ = self._sharedState

class Filter(FilterBorg):
    """
    Stores compiled regular expression for removing comment and
    white-space characters from beginning and ends of lines while
    preserving line endings.  Also removes copyright years.
    """
    def __init__(self):
        FilterBorg.__init__(self)
        if self.__dict__ == {}:
            base_filter = r'[ \t\f\v]*[\[\]!@#$*/%;.\\"\(\)]*[ \t\f\v]*'
            self._Aleft_filter = re.compile(r'\A' + base_filter)
            self._left_filter = re.compile(os.linesep + base_filter)
            self._right_filter = re.compile(base_filter + os.linesep)
            self._Zright_filter = re.compile(base_filter + r'\Z')
            date_filter = r'[,\- \t\(\)]*[12][90]\d\d\)?'
            self._crdate_filter = re.compile(r'([Cc]opyright)' + date_filter)
            self._cdate_filter = re.compile(r'(\([Cc]\))' + date_filter)

    def filter(self, head):
        result = self._Aleft_filter.sub('', head)
        result = self._left_filter.sub(os.linesep, result)
        result = self._right_filter.sub(os.linesep, result)
        result = self._Zright_filter.sub('', result)
        while True:
            tmp = self._crdate_filter.sub(r'\1', result)
            if tmp == result:
                break
            result = tmp
        while True:
            tmp = self._cdate_filter.sub(r'\1', result)
            if tmp == result:
                break
            result = tmp
        result = result.strip()
        return result


class License(object):
    def __init__(self, header_path, search_length=0):
        # Set number of lines to search for license
        global DEFAULT_SEARCH_LENGTH
        if search_length:
            self.search_length = search_length
        else:
            self.search_length = DEFAULT_SEARCH_LENGTH

        # Read license header
        try:
            lic_text = open(header_path).read()
        except OSError:
            raise BadRepoError(header_path)

        # Check line endings
        if not os.linesep in lic_text and '\n' in lic_text:
            raise(LicenseHeaderFormatError('Linux license on Windows system', header_path))
        if '\r' in lic_text and '\r' not in os.linesep:
            raise(LicenseHeaderFormatError('Windows license on Linux system', header_path))

        # Check search length
        if not self.search_length > 2*lic_text.count('\n'):
            raise LicenseHeaderFormatError('search_length {0} too short'.format(self.serch_length), header_path)

        # Create filtered license
        self.lic = Filter().filter(lic_text)

        # Check filtered license length
        if len(self.lic) == 0:
            raise LicenseHeaderFormatError('Empty license', header_path)

    def check(self, path):
        # Read the head of the file
        try:
            with open(path) as fid:
                head = list(itertools.islice(fid, self.search_length))
        except OSError:
            raise BadRepoError(path)
        head = ''.join(head)
        # Filter head
        head = Filter().filter(head)
        # Look for filtered license text
        if self.lic not in head:
            raise MissingHeaderError(path)


class Repo(object):
    def __init__(self, repo_dir, do_ignore_service):
        self.repo_dir = repo_dir
        self.do_ignore_service = do_ignore_service
        self.copying = os.path.join(repo_dir, 'copying_headers')
        self.default_manifest = 'MANIFEST.BSD3-intel'
        try:
            listing = os.listdir(self.copying)
        except OSError:
            raise BadRepoError('{0}/*'.format(self.copying))

        # Derive list of tags from copying_headers/header.* file extensions
        self.tag_list = [path.replace('header.','')
                         for path in listing if path.startswith('header.')]

        # Create a list of paths to partial manifest files that are not the default
        self.manifest_list = [os.path.join(self.copying, path)
                              for path in listing
                              if path.startswith('MANIFEST.') and
                              path != self.default_manifest]

    def check(self):
        self.check_manifest()
        self.check_license()

    def parse_manifest(self, manifest):
        return [nn for nn in open(manifest).read().strip().splitlines()
                if not (self.do_ignore_service and nn.startswith('service/'))]

    def check_manifest(self):
        """
        Check that the partial manifests contain the shipping manifest
        and that the shipping manifest includes a license.
        """
        manifest = os.path.join(self.repo_dir, 'MANIFEST')
        try:
            full = self.parse_manifest(manifest)
        except OSError:
            raise BadRepoError(manifest)
        full = set(full)
        license_names = set(('LICENSE', 'LICENSE.txt', 'LICENSE.TXT',
                             'COPYING', 'COPYING.txt', 'COPYING.TXT'))
        if not full.intersection(license_names):
            license_names.add('__AT LEAST ONE REQUIRED__')
            raise ManifestIncompleteError(license_names)
        part = []
        for path in self.manifest_list:
            try:
                extension = self.parse_manifest(path)
                repeat_files = set(part).intersection(extension)
                if repeat_files:
                    raise ManifestDoubleCountedError(repeat_files)
                part.extend(extension)
            except OSError:
                raise BadRepoError(path)
        part = set(part)
        # Create default manifest for Intel BSD3 clause license
        if not full.issubset(part):
            missing_files = list(full.difference(part))
            missing_files.sort()
            default_manifest = os.path.join(self.repo_dir,
                                            'copying_headers',
                                            self.default_manifest)
            with open(default_manifest, 'w') as fid:
                for ff in missing_files:
                    if not (self.do_ignore_service and ff.startswith('service/')):
                        fid.write('{}\n'.format(ff))
        manifest = os.path.join(self.repo_dir, 'copying_headers', 'MANIFEST.EXEMPT')
        bad_paths = []
        all_paths = self.parse_manifest(manifest)
        for path in all_paths:
            if not os.path.exists(path.strip()):
                bad_paths.append(path.strip())
        if bad_paths:
            raise BadRepoError(', '.join(['"{}"'.format(pp) for pp in bad_paths]))

    def check_license(self):
        """
        Check all of the tagged files for license headers.
        """
        for tag in self.tag_list:
            header_path = os.path.join(self.copying, 'header.{0}'.format(tag))
            manifest = os.path.join(self.copying, 'MANIFEST.{0}'.format(tag))
            try:
                paths = self.parse_manifest(manifest)
            except OSError:
                raise BadRepoError(manifest)
            lic = License(header_path)
            for path in paths:
                lic.check(os.path.join(self.repo_dir, path))

    def update(self, update_dir):
        """
        Update headers and license files based on the set of license
        headers in the update_dir/header.* that match tags in the
        repository.
        """
        raise NotImplmentedError("To be implemented")


def main():
    global __version__
    try:
        opts, args = getopt.gnu_getopt(sys.argv[1:], 'u:h:i', ['help', 'version', 'ignore-service'])
    except getopt.GetoptError as err:
        raise CommandLineError(err)
    update_dir = ''
    do_ignore_service = False
    for flag, val in opts:
        if flag == '-u':
            update_dir = val
        elif flag in ('--help', '-h'):
            sys.stdout.write('{}\n'.format(__doc__))
            return ''
        elif flag == '--version':
            sys.stdout.write('{}\n'.format(__version__))
            return ''
        elif flag == '--ignore-service':
            do_ignore_service = True
        else:
            raise CommandLineError('Parsing command line, unknown flag {0}\n'.format(flag))

    if len(args) != 1:
        repo_dir = '.'
    else:
        repo_dir = args[0]

    repo = Repo(repo_dir, do_ignore_service)
    if not update_dir:
        repo.check()
    else:
        repo.update(update_dir)
    return '[ PASSED ] license test'

if __name__ == "__main__":
    try:
        sys.stdout.write('{}\n'.format(main()))
    except (CommandLineError, ManifestIncompleteError, MissingHeaderError,
            BadRepoError, ManifestDoubleCountedError, LicenseHeaderFormatError) as err:
        sys.stderr.write('ERROR: {0}\n\n'.format(err))
        sys.exit(err.errno)
