#!/usr/bin/python

# debtags-fetch - Keep debtags source data up to date
#
# Copyright (C) 2006--2012  Enrico Zini <enrico@debian.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import sys
import re
import os
import os.path
import shutil
import urllib2
import urlparse
import gzip
import subprocess
import tempfile
import logging

log = logging.getLogger(sys.argv[0])

CONFIG = '/etc/debtags/sources.list'
CONFIGDIR = '/etc/debtags/sources.list.d'
OUTPUTDIR = '/var/lib/debtags'

# TODO: allow to use more than one config file
# TODO: allow to override $OUTPUTDIR with parameters read from commandline
# TODO: allow to override everything for tests

class atomic_writer(object):
    """
    Atomically write to a file
    """
    def __init__(self, fname, mode=0644, sync=True):
        self.fname = fname
        self.mode = mode
        self.sync = sync
        dirname = os.path.dirname(self.fname)
        self.outfd = tempfile.NamedTemporaryFile(dir=dirname)

    def __enter__(self):
        return self.outfd

    def __exit__(self, exc_type, exc_val, exc_tb):
        if exc_type is None:
            self.outfd.flush()
            if self.sync:
                os.fdatasync(self.outfd.fileno())
            os.fchmod(self.outfd.fileno(), self.mode)
            os.rename(self.outfd.name, self.fname)
            self.outfd.delete = False
        self.outfd.close()
        return False

class Source(object):
    LOCAL = True

    def dump(self):
        print "tag", str(self), " # save to", self.dst

    def infd_to_gzoutfd(self, infd, outfd, sfx):
        gzoutfd = gzip.GzipFile(self.dst + sfx, "w", 9, outfd)
        try:
            shutil.copyfileobj(infd, gzoutfd)
        finally:
            gzoutfd.close()

    def copy_and_compress(self, fname, sfx):
        with atomic_writer(self.dst + sfx + ".gz") as outfd:
            with open(fname, "r") as infd:
                self.infd_to_gzoutfd(infd, outfd, sfx)

class AptSource(Source):
    re_xtags_split = re.compile(r",\s+")
    re_xtags_compressed = re.compile(r"^(.+){(.+)}$")

    def apt_normalise(self, tags):
        """
        Expand tags compressed with curly braces
        """
        res = []
        for tag in self.re_xtags_split.split(tags):
            mo = self.re_xtags_compressed.match(tag)
            if mo:
                pfx = mo.group(1)
                for sfx in mo.group(2).split(","):
                    res.append(pfx + sfx)
            else:
                res.append(tag)
        return ", ".join(x.strip(",") for x in res)

    def __init__(self, dst):
        self.dst = os.path.join(OUTPUTDIR, dst)

    def __str__(self):
        return "apt://"

    def fetch(self):
        from debian import deb822

        log.info("Fetching tags from apt...")
        # FIXME: KeyboardInterrupt makes a mess here. Try it out and enjoy the senseless backtraces.
        # See http://stackoverflow.com/questions/4606942/why-cant-i-handle-a-keyboardinterrupt-in-python
        # If you have an idea for a patch, bring it on!  Me, I just feel like crying.
        with atomic_writer(self.dst + ".tag.gz") as outfd:
            gzoutfd = gzip.GzipFile(self.dst + ".tag", "w", 9, outfd)
            try:
                proc = subprocess.Popen(["apt-cache", "dumpavail"], stdout=subprocess.PIPE)
                done = False
                try:
                    for pkg in deb822.Deb822.iter_paragraphs(proc.stdout):
                        name = pkg["Package"]
                        tags = pkg.get("Tag", None)
                        if tags is None: continue
                        print >>gzoutfd, "%s: %s" % (name, self.apt_normalise(tags))
                    done = True
                finally:
                    if done:
                        if proc.wait() != 0:
                            raise RuntimeError("apt-cache dumpavail returned error code %d" % proc.returncode)
                    else:
                        proc.kill()
                        proc.wait()
            finally:
                gzoutfd.close()

        vocsrc = "/usr/share/debtags/vocabulary"
        log.info("Fetching vocabulary from %s...", vocsrc)
        self.copy_and_compress(vocsrc, ".voc")

        return True

class FileSource(Source):
    def __init__(self, basedir, dst):
        self.dst = os.path.join(OUTPUTDIR, dst)
        self.basedir = basedir

    def __str__(self):
        return "file:" + self.basedir

    def fetch(self):
        pathname = os.path.join(self.basedir, "tags-current")
        if os.path.exists(pathname + ".gz"):
            pathname += ".gz"
            log.info("Fetching tags from %s...", pathname)
            with atomic_writer(self.dst + ".tag.gz") as outfd:
                with open(pathname) as infd:
                    shutil.copyfileobj(infd, outfd)
        elif os.path.exists(pathname):
            log.info("Fetching tags from %s...", pathname)
            self.copy_and_compress(pathname, ".tag")
        else:
            log.error("Cannot find tags-current.gz or tags-current in %s", self.basedir)
            return False

        pathname = os.path.join(self.basedir, "vocabulary")
        if os.path.exists(pathname + ".gz"):
            pathname += ".gz"
            log.info("Fetching vocabulary from %s...", pathname)
            with atomic_writer(self.dst + ".voc.gz") as outfd:
                with open(pathname) as infd:
                    shutil.copyfileobj(infd, outfd)
        elif os.path.exists(pathname):
            log.info("Fetching vocabulary from %s...", pathname)
            self.copy_and_compress(pathname, ".voc")
        else:
            log.error("Cannot find vocabulary.gz or vocabulary in %s", self.basedir)
            return False

        return True

class HttpSource(Source):
    def __init__(self, url, dst):
        self.dst = os.path.join(OUTPUTDIR, dst)
        self.url = url

    def __str__(self):
        return self.url

    def fetch(self):
        urlvoc = urlparse.urljoin(self.url, "vocabulary.gz")
        log.info("Fetching vocabulary from %s...", urlvoc);
        with atomic_writer(self.dst + ".voc.gz") as outfd:
            infd = urllib2.urlopen(urlvoc)
            try:
                shutil.copyfileobj(infd, outfd)
            finally:
                infd.close()

        urltag = urlparse.urljoin(self.url, "tags-current.gz")
        log.info("Fetching tags from %s...", urltag);
        with atomic_writer(self.dst + ".tag.gz") as outfd:
            infd = urllib2.urlopen(urltag)
            try:
                shutil.copyfileobj(infd, outfd)
            finally:
                infd.close()

        return True

class Config(object):
    re_emptyline = re.compile(r"^\s*(?:#|$)")
    re_tagline = re.compile(r"^\s*tags\s+(.+?)\s*$")
    re_remote = re.compile(r"^(?:http|ftp)://")
    re_mangle = re.compile(r"[^A-Za-z0-9._-]")

    def __init__(self):
        # Array of Source objects
        self.sources = []

    def _mangle_src(self, src):
        return self.re_mangle.sub("-", src)

    def read(self, fname):
        """
        Read one config file and append its sources to self.sources
        """
        with open(fname) as fd:
            for lineno, line in enumerate(fd):
                # Skip empty lines and comments
                if self.re_emptyline.match(line):
                    continue

                # We are only interested in 'tags' lines
                mo = self.re_tagline.match(line)
                if not mo: continue

                src = mo.group(1)

                if src == "apt://":
                    self.sources.append(AptSource("debtags-fetch-apt"))
                elif self.re_remote.match(src):
                    self.sources.append(HttpSource(src, "debtags-fetch-" + self._mangle_src(src)))
                elif src.startswith("file:") and len(src) > 5:
                    # Remove file: and excess leading slashes
                    src = src[5:]
                    if src[0] == "/": src = "/" + src.lstrip("/")
                    self.sources.append(FileSource(src, "debtags-fetch-" + self._mangle_src(src)))
                else:
                    log.warn("%s:%d: ignoring unsupported tag source %s",
                             fname, lineno, src)

    def read_all(self):
        """
        Read all config files and append their sources to self.sources
        """
        self.read(CONFIG)
        if os.path.isdir(CONFIGDIR):
            for fn in os.listdir(CONFIGDIR):
                if ".dpkg-" in fn: continue
                if fn.endswith("~"): continue
                pathname = os.path.join(CONFIGDIR, fn)
                if os.path.isdir(pathname): continue
                self.read(pathname)


if __name__ == "__main__":
    from optparse import OptionParser
    import sys

    VERSION="1.8"

    class Parser(OptionParser):
        def error(self, msg):
            sys.stderr.write("%s: error: %s\n\n" % (self.get_prog_name(), msg))
            self.print_help(sys.stderr)
            sys.exit(2)

    parser = Parser(usage="usage: %prog [options] [islocal|dump|update]",
                    version="%prog "+ VERSION,
                    description="Fetches data from Debtags sources.\n"
                    "%prog islocal  exits with success if all sources are local.\n"
                    "%prog dump     dumps the source configuration.\n"
                    "%prog update   fetches all sources, saving them to " + OUTPUTDIR + "\n")
    parser.add_option("--local", action="store_true",
                      help="do not download tags: only read local sources.")
    parser.add_option("-q", "--quiet", action="store_true",
                      help="quiet mode: only output errors.")
    parser.add_option("-v", "--verbose", action="store_true",
                      help="verbose mode: output progress and non-essential information.")
    (opts, args) = parser.parse_args()

    #FORMAT = "%(asctime)-15s %(levelname)s %(message)s"
    FORMAT = "%(message)s"
    if opts.quiet:
        logging.basicConfig(level=logging.ERROR, stream=sys.stderr, format=FORMAT)
    elif not opts.verbose:
        logging.basicConfig(level=logging.WARNING, stream=sys.stderr, format=FORMAT)
    else:
        logging.basicConfig(level=logging.INFO, stream=sys.stderr, format=FORMAT)

    if not args:
        parser.error("please provide a command: update, dump or islocal.")

    if args[0] == "islocal":
        # Return true if all sources are local
        config = Config()
        config.read_all()
        for src in config.sources:
            if not src.LOCAL:
                sys.exit(1)
        sys.exit(0)
    elif args[0] == "dump":
        # Dump configuration
        config = Config()
        config.read_all()
        for src in config.sources:
            src.dump()
        sys.exit(0)
    elif args[0] == "update":
        if not os.access(OUTPUTDIR, os.W_OK):
            if not os.path.exists(OUTPUTDIR):
                log.error("Output directory %s does not exist", OUTPUTDIR)
                sys.exit(1)
            else:
                log.error("I do not have permission to write to %s", OUTPUTDIR)
                sys.exit(1)

        config = Config()
        config.read_all()

        # Delete old sources (this will also get rid of sources removed from
        # sources.list)
        # TODO: only remember them for deletion, and delete them on commit.
        for fn in os.listdir(OUTPUTDIR):
            if fn.startswith("debtags-fetch-"):
                os.unlink(os.path.join(OUTPUTDIR, fn))

        # Acquire new sources
        for src in config.sources:
            if opts.local and not src.LOCAL:
                log.info("Skipping %s since --local is used", str(src))
                continue
            if not src.fetch():
                sys.exit(1)

        sys.exit(0)
    else:
        parser.error("the command should be: update, dump or islocal.")
