mirror of
https://github.com/Noettore/fattureCCSR.git
synced 2025-10-15 03:36:39 +02:00
76 lines
2.8 KiB
Python
76 lines
2.8 KiB
Python
"""ask for an input file (.xlsx) and an output file (.pdf) and downloads and unite every invoice"""
|
|
|
|
import shutil
|
|
import tempfile
|
|
import requests
|
|
import requests_ntlm
|
|
import openpyxl
|
|
import PyPDF2
|
|
|
|
import logger
|
|
|
|
def get_invoices_info(input_file_path: str) -> dict:
|
|
"""extract invoices IDs and URLs from xlsx input file"""
|
|
xlsx_file = openpyxl.load_workbook(input_file_path)
|
|
sheet = xlsx_file.active
|
|
invoices = dict()
|
|
|
|
for i in range(1, sheet.max_row+1):
|
|
invoice_id = sheet["I"+str(i)].value
|
|
if invoice_id is not None and "CCSR" in invoice_id:
|
|
invoice_id = invoice_id.replace("/", "-")
|
|
invoice_url = sheet["BG"+str(i)].hyperlink.target
|
|
invoice = {
|
|
"id": invoice_id,
|
|
"url": invoice_url,
|
|
"path": None,
|
|
"good": None,
|
|
}
|
|
invoices[invoice_id] = invoice
|
|
|
|
return invoices
|
|
|
|
|
|
def download_invoices(input_file_path: str, output_file_path: str, username: str, password: str):
|
|
"""download invoices from CCSR"""
|
|
invoices = get_invoices_info(input_file_path)
|
|
|
|
session = requests.Session()
|
|
session.auth = requests_ntlm.HttpNtlmAuth("sr\\"+username, password)
|
|
logger.downloader_logger.info("Inizio download fatture dal portale CCSR")
|
|
|
|
tmp_dir = tempfile.mkdtemp()
|
|
|
|
invoices_count = len(invoices)
|
|
processed_count = 0
|
|
|
|
for invoice_id, invoice in invoices.items():
|
|
resp = session.get(invoice["url"])
|
|
processed_count += 1
|
|
if resp.status_code == 200:
|
|
with open(tmp_dir+"/"+invoice_id+".pdf", "wb") as output_file:
|
|
output_file.write(resp.content)
|
|
invoice["path"] = output_file.name
|
|
print(invoice["path"])
|
|
try:
|
|
PyPDF2.PdfFileReader(open(invoice["path"], "rb"))
|
|
except (PyPDF2.utils.PdfReadError, OSError):
|
|
logger.downloader_logger.error("%d/%d fattura %s corrotta!", processed_count, invoices_count, invoice_id)
|
|
invoice["good"] = False
|
|
else:
|
|
logger.downloader_logger.info("%d/%d scaricata fattura %s in %s", processed_count, invoices_count, invoice_id, invoice["path"])
|
|
invoice["good"] = True
|
|
else:
|
|
logger.downloader_logger.error("%d/%d impossibile scaricare fattura %s: %d", processed_count, invoices_count, invoice_id, resp.status_code)
|
|
invoice["good"] = False
|
|
|
|
merger = PyPDF2.PdfFileMerger()
|
|
for invoice in invoices.values():
|
|
if invoice["good"]:
|
|
merger.append(PyPDF2.PdfFileReader(open(invoice["path"], "rb")))
|
|
merger.write(output_file_path)
|
|
|
|
shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
|
|
logger.downloader_logger.info("Download terminato. Il pdf contenente le fatture si trova in %s", output_file_path)
|