"""ask for an input file (.xlsx) and an output file (.pdf) and downloads and unite every invoice""" import sys import os import subprocess import shutil import tempfile import requests import requests_ntlm import openpyxl import PyPDF2 import logger def get_invoices_info(input_file_path: str) -> dict: """extract invoices IDs and URLs from xlsx input file""" xlsx_file = openpyxl.load_workbook(input_file_path) sheet = xlsx_file.active invoices = dict() for i in range(1, sheet.max_row+1): invoice_id = sheet["I"+str(i)].value if invoice_id is not None and "CCSR" in invoice_id: invoice_id = invoice_id.replace("/", "-") invoice_url = sheet["BG"+str(i)].hyperlink.target invoice = { "id": invoice_id, "url": invoice_url, "path": None, "good": None, } invoices[invoice_id] = invoice return invoices def open_file(file_path): """open a file with the default software""" if sys.platform == "win32": os.startfile(file_path) # pylint: disable=maybe-no-member else: opener = "open" if sys.platform == "darwin" else "xdg-open" subprocess.call([opener, file_path]) def download_invoices(input_file_path: str, output_file_path: str, username: str, password: str): """download invoices from CCSR""" invoices = get_invoices_info(input_file_path) session = requests.Session() session.auth = requests_ntlm.HttpNtlmAuth("sr\\"+username, password) logger.downloader_logger.info("Inizio download fatture dal portale CCSR") tmp_dir = tempfile.mkdtemp() invoices_count = len(invoices) downloaded_count = 0 for invoice_id, invoice in invoices.items(): resp = session.get(invoice["url"]) if resp.status_code == 200: with open(tmp_dir+"/"+invoice_id+".pdf", "wb") as output_file: output_file.write(resp.content) invoice["path"] = output_file.name print(invoice["path"]) try: PyPDF2.PdfFileReader(open(invoice["path"], "rb")) except (PyPDF2.utils.PdfReadError, OSError): logger.downloader_logger.error("fattura %s corrotta!", invoice_id) invoice["good"] = False else: downloaded_count += 1 logger.downloader_logger.info("%d/%d scaricata fattura %s in %s", downloaded_count, invoices_count, invoice_id, invoice["path"]) invoice["good"] = True else: logger.downloader_logger.error("impossibile scaricare fattura %s: %d", invoice_id, resp.status_code) invoice["good"] = False merger = PyPDF2.PdfFileMerger() for invoice in invoices.values(): if invoice["good"]: merger.append(PyPDF2.PdfFileReader(open(invoice["path"], "rb"))) merger.write(output_file_path) open_file(output_file_path) shutil.rmtree(tmp_dir, ignore_errors=True) logger.downloader_logger.info("Download terminato. Il pdf contenente le fatture si trova in %s", output_file_path)