import argparse
import getpass
import json
import os
import pip
import re
import subprocess
import sys

lacking = []
try:
    import requests
except ModuleNotFoundError:
    lacking.append('requests')
try:
    from rdflib import Graph, URIRef
except ModuleNotFoundError:
    lacking.append('rdflib')

if len(lacking) > 0:
    choice = ''
    while choice != '1' and choice != '2':
        print("\nYour python installation lacks some dependencies (" + ", ".join(lacking) + ").")
        print("You can install them on your own or we can try to install them for you. which option do you prefer?\n")
        print("1. Install manually")
        print("2. Try to install automatically")
        choice = input().strip()
    print("\n")
    if choice == '1':
        for i in lacking:
            print("You are missing the '%s' library. You should be able to install it with `pip install %s` or from your operating system package (e.g. python3-%s under debian/ubuntu or python-%s under fedora/redhat/centos)\n" % (i, i, i, i))
    else:
        for i in lacking:
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', i])
        print("\nInstallation successful - please run the script again")
        quit()

args = argparse.ArgumentParser()
args.add_argument('--user', help='User name (for downloading restricted-access resources')
args.add_argument('--pswd', help='User password (for downloading restricted-access resources')
args.add_argument('--maxDepth', default=-1, type=int, help='Maximum recursion depth (-1 means do not limit the recursion depth)')
args.add_argument('--flat', action='store_const', const=True, help='Do not create directory structure (download all resources to the `tagetDir`)')
args.add_argument('--batch', action='store_const', const=True, help='Do not ask for user input (e.g. for the user name and password)')
args.add_argument('--overwrite', action='store_const', const=True, help='Should existing files and directories be overwritten?')
args.add_argument('--targetDir', default='.', help='Directory to store downloaded resources')
args.add_argument('--matchUrl', nargs='*', default=[], help='Explicit list of allowed resource URLs')
args.add_argument('--skipUrl', nargs='*', default=[], help='Explicit list of allowed resource URLs')
args.add_argument('url', nargs='*', help='Resource URLs to be downloaded', default=[https://arche.acdh.oeaw.ac.at/api/12095])
args = args.parse_args()

repoSchemas = {}
def getSchema(resUrl):
    repoUrl = re.sub('[0-9]+$', '', resUrl)
    if repoUrl not in repoSchemas:
        resp = requests.get(repoUrl + '/describe', headers={'Accept': 'application/json'})
        if resp.status_code != 200:
            raise Exception("Couldn't read " + repoUrl + " repository configuration")
        repoSchemas[repoUrl] = json.loads(resp.text)['schema']
    return repoSchemas[repoUrl]

def resolveUrl(location):
    url = None
    while location:
        urltmp = location
        resp = requests.head(urltmp)
        location = resp.headers.get('location')
        if location is None and resp.status_code == 200:
            url = re.sub('/metadata$', '', urltmp)
    return url

def getMetadata(url, schema):
    params = {
        'resourceProperties[0]': schema['fileName'],
        'relativesProperties[0]': schema['parent']
    }
    resp = requests.get(url + '/metadata', params=params, headers={'Accept': 'application/n-triples', 'X-METADATA-READ-MODE': '1_0_0_0'})
    graph = Graph()
    graph.parse(data=resp.text, format='nt')
    filename = graph.value(URIRef(url), URIRef(schema['fileName']), None, default=None, any=True)
    children = set()
    for s, p, o in graph.triples((None, URIRef(schema['parent']), None)):
        children.add(str(s))
    return (filename, list(children))

def readInput(msg, private):
    if private:
        return getpass.getpass(msg)
        
    try:
        inp = raw_input(msg)
    except NameError:
        inp = input(msg)
    return inp

def download(res, args):
    dirname = res['path']
    schema = getSchema(res['url'])
    (filename, children) = getMetadata(res['url'], schema)
    resp = requests.get(res['url'], allow_redirects=False, stream=True, auth=args.auth)
    hasBinary = resp.headers.get('location') is None
    if filename is None and (hasBinary or res['depth'] > 0):
        print("No file name found  for " + res['url'] + "- using the resource id instead")
        filename = re.sub('^.*/', '', res['url'])

    toDwnld = []
    if (resp.status_code == 401 or resp.status_code == 403) and args.auth is None and not args.batch:
        args.auth = requests.auth.HTTPBasicAuth(
            readInput('A restricted access resource encountered, please provide a username: ', False), 
            readInput('and a password: ', True)
        )
        toDwnld = download(res, args)
    elif resp.status_code == 302 and not hasBinary:
        if filename and not args.flat:
            dirname = os.path.join(dirname, filename)
            if not os.path.exists(dirname):
                os.makedirs(dirname)
            elif not args.overwrite:
                print("Aborting - " + dirname + " already exists")
                return []
        if len(children) > 0 and (res['depth'] < args.maxDepth or args.maxDepth == -1):
            print('Adding ' + str(len(children)) + ' children of ' + res['url'] + ' to the download queue')
            toDwnld = [{'url': x, 'path': dirname, 'depth': res['depth'] + 1} for x in children]
    elif resp.status_code == 200 and hasBinary:
        path = os.path.join(dirname, filename)
        if not os.path.exists(path) or os.path.isdir(path) or args.overwrite:
            print('Downloading ' + res['url'] + ' as ' + path)
            with open(path, 'wb') as of:
                for chunk in resp.iter_content(chunk_size=8192):
                    if chunk:
                        of.write(chunk)
        else:
            print("Aborting - " + path + " already exists")
    else:
        sc = resp.status_code
        if sc == 401:
            sc = 'Access Denied'
        print("Failed to download " + res['url'] + " with code " + str(sc))

    return toDwnld

stack = []
for i in args.url:
    ir = resolveUrl(i)
    if ir:
        stack.append({'url': ir, 'path': args.targetDir, 'depth': 0})
        getSchema(ir)
    else:
        print("Can't resolve " + i)

args.auth = None
if args.user and args.pswd:
    args.auth = requests.auth.HTTPBasicAuth(args.user, args.pswd)
if not os.path.exists(args.targetDir):
    os.makedirs(args.targetDir)
    
downloaded = set()
while len(stack) > 0:
    res = stack.pop()
    if res['url'] in downloaded:
        continue
    if len(args.matchUrl) > 0 and res['url'] not in args.matchUrl:
        continue
    if len(args.skipUrl) > 0 and res['url'] in args.skipUrl:
        continue
    if args.maxDepth >= 0 and res['depth'] > args.maxDepth:
        continue
    stack += download(res, args)
    downloaded.add(res['url'])
print('Download ended')