# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import os
import shutil
import unittest
import StringIO
import tarfile
import HTMLParser
import urlparse
import urllib

from cuddlefish.docs import generate
from cuddlefish.tests import env_root

INITIAL_FILESET = [ ["static-files", "base.html"], \
                    ["dev-guide", "index.html"], \
                    ["modules", "sdk", "aardvark-feeder.html"], \
                    ["modules", "sdk", "anteater", "anteater.html"]]

EXTENDED_FILESET = [ ["static-files", "base.html"], \
                    ["dev-guide", "extra.html"], \
                    ["dev-guide", "index.html"], \
                    ["modules", "sdk", "aardvark-feeder.html"], \
                    ["modules", "sdk", "anteater", "anteater.html"]]

EXTRAFILE = ["dev-guide", "extra.html"]

def get_test_root():
    return os.path.join(env_root, "python-lib", "cuddlefish", "tests", "static-files")

def get_sdk_docs_root():
    return os.path.join(get_test_root(), "sdk-docs")

def get_base_url_path():
    return os.path.join(get_sdk_docs_root(), "doc")

def url_from_path(path):
    path = path.lstrip("/")
    return "file://"+"/"+"/".join(path.split(os.sep))+"/"

def get_base_url():
    return url_from_path(get_base_url_path())

class Link_Checker(HTMLParser.HTMLParser):
    def __init__(self, tester, filename, base_url):
        HTMLParser.HTMLParser.__init__(self)
        self.tester = tester
        self.filename = filename
        self.base_url = base_url
        self.errors = []

    def handle_starttag(self, tag, attrs):
        link = self.find_link(attrs)
        if link:
            self.validate_link(link)

    def handle_startendtag(self, tag, attrs):
        link = self.find_link(attrs)
        if link:
            self.validate_link(link)

    def find_link(self, attrs):
        attrs = dict(attrs)
        href = attrs.get('href', '')
        if href:
            parsed = urlparse.urlparse(href)
            if not parsed.scheme:
                return href
        src = attrs.get('src', '')
        if src:
            parsed = urlparse.urlparse(src)
            if not parsed.scheme:
                return src

    def validate_link(self, link):
        parsed = urlparse.urlparse(link)
        # there should not be any file:// URLs
        self.tester.assertNotEqual(parsed.scheme, "file")
        # any other absolute URLs will not be checked
        if parsed.scheme:
            return
        current_path_as_url = url_from_path(os.path.dirname(self.filename))
        # otherwise try to open the file at: baseurl + path
        absolute_url = current_path_as_url + parsed.path
        try:
            urllib.urlopen(absolute_url)
        except IOError:
            self.errors.append(self.filename + "\n    " + absolute_url)

class Generate_Docs_Tests(unittest.TestCase):

    def test_generate_static_docs(self):

        def cleanup():
            shutil.rmtree(get_base_url_path())
            tgz.close()
            os.remove(tar_filename)
            generate.clean_generated_docs(os.path.join(env_root, "doc"))

        # make sure we start clean
        if os.path.exists(get_base_url_path()):
            shutil.rmtree(get_base_url_path())
        # generate a doc tarball, and extract it
        base_url = get_base_url()
        tar_filename = generate.generate_static_docs(env_root)
        tgz = tarfile.open(tar_filename)
        tgz.extractall(get_sdk_docs_root())
        broken_links = []
        # get each HTML file...
        for root, subFolders, filenames in os.walk(get_sdk_docs_root()):
            for filename in filenames:
                if not filename.endswith(".html"):
                    continue
                if root.endswith("static-files"):
                    continue
                filename = os.path.join(root, filename)
                # ...and feed it to the link checker
                linkChecker = Link_Checker(self, filename, base_url)
                linkChecker.feed(open(filename, "r").read())
                broken_links.extend(linkChecker.errors)
        if broken_links:
            print
            print "The following links are broken:"
            for broken_link in sorted(broken_links):
                print " "+ broken_link

            cleanup()
            self.fail("%d links are broken" % len(broken_links))

        cleanup()

    def test_generate_docs(self):
        test_root = get_test_root()
        docs_root = os.path.join(test_root, "doc")
        generate.clean_generated_docs(docs_root)
        new_digest = self.check_generate_regenerate_cycle(test_root, INITIAL_FILESET)
        # touching an MD file under sdk **does** cause a regenerate
        os.utime(os.path.join(test_root, "doc", "module-source", "sdk", "aardvark-feeder.md"), None)
        new_digest = self.check_generate_regenerate_cycle(test_root, INITIAL_FILESET, new_digest)
        # touching a non MD file under sdk **does not** cause a regenerate
        os.utime(os.path.join(test_root, "doc", "module-source", "sdk", "not_a_doc.js"), None)
        self.check_generate_is_skipped(test_root, INITIAL_FILESET, new_digest)
        # touching a non MD file under static-files **does not** cause a regenerate
        os.utime(os.path.join(docs_root, "static-files", "another.html"), None)
        new_digest = self.check_generate_is_skipped(test_root, INITIAL_FILESET, new_digest)
        # touching an MD file under dev-guide **does** cause a regenerate
        os.utime(os.path.join(docs_root, "dev-guide-source", "index.md"), None)
        new_digest = self.check_generate_regenerate_cycle(test_root, INITIAL_FILESET, new_digest)
        # adding a file **does** cause a regenerate
        open(os.path.join(docs_root, "dev-guide-source", "extra.md"), "w").write("some content")
        new_digest = self.check_generate_regenerate_cycle(test_root, EXTENDED_FILESET, new_digest)
        # deleting a file **does** cause a regenerate
        os.remove(os.path.join(docs_root, "dev-guide-source", "extra.md"))
        new_digest = self.check_generate_regenerate_cycle(test_root, INITIAL_FILESET, new_digest)
        # remove the files
        generate.clean_generated_docs(docs_root)

    def check_generate_is_skipped(self, test_root, files_to_expect, initial_digest):
        generate.generate_docs(test_root, stdout=StringIO.StringIO())
        docs_root = os.path.join(test_root, "doc")
        for file_to_expect in files_to_expect:
            self.assertTrue(os.path.exists(os.path.join(docs_root, *file_to_expect)))
        self.assertTrue(initial_digest == open(os.path.join(docs_root, "status.md5"), "r").read())

    def check_generate_regenerate_cycle(self, test_root, files_to_expect, initial_digest = None):
        # test that if we generate, files are getting generated
        generate.generate_docs(test_root, stdout=StringIO.StringIO())
        docs_root = os.path.join(test_root, "doc")
        for file_to_expect in files_to_expect:
            self.assertTrue(os.path.exists(os.path.join(docs_root, *file_to_expect)), os.path.join(docs_root, *file_to_expect) + "not found")
        if initial_digest:
            self.assertTrue(initial_digest != open(os.path.join(docs_root, "status.md5"), "r").read())
        # and that if we regenerate, nothing changes...
        new_digest = open(os.path.join(docs_root, "status.md5"), "r").read()
        self.check_generate_is_skipped(test_root, files_to_expect, new_digest)
        return new_digest

if __name__ == '__main__':
    unittest.main()