blob: 98b424fd2ad3713eae3557f36a6d9d295f19d03c [file] [log] [blame]
import errno
import logging
import os
import shutil
import stat
import subprocess
import tarfile
import time
import zipfile
from io import BytesIO
from socket import error as SocketError # NOQA: N812
from urllib.request import urlopen
logger = logging.getLogger(__name__)
def call(*args):
"""Log terminal command, invoke it as a subprocess.
Returns a bytestring of the subprocess output if no error.
"""
logger.debug(" ".join(args))
try:
return subprocess.check_output(args).decode('utf8')
except subprocess.CalledProcessError as e:
logger.critical("%s exited with return code %i" %
(e.cmd, e.returncode))
logger.critical(e.output)
raise
def seekable(fileobj):
"""Attempt to use file.seek on given file, with fallbacks."""
try:
fileobj.seek(fileobj.tell())
except Exception:
return BytesIO(fileobj.read())
else:
return fileobj
def untar(fileobj, dest="."):
"""Extract tar archive."""
logger.debug("untar")
fileobj = seekable(fileobj)
with tarfile.open(fileobj=fileobj) as tar_data:
tar_data.extractall(path=dest)
def unzip(fileobj, dest=None, limit=None):
"""Extract zip archive."""
logger.debug("unzip")
fileobj = seekable(fileobj)
with zipfile.ZipFile(fileobj) as zip_data:
for info in zip_data.infolist():
if limit is not None and info.filename not in limit:
continue
zip_data.extract(info, path=dest)
perm = info.external_attr >> 16 & 0x1FF
os.chmod(os.path.join(dest, info.filename), perm)
def get(url):
"""Issue GET request to a given URL and return the response."""
import requests
logger.debug("GET %s" % url)
resp = requests.get(url, stream=True)
resp.raise_for_status()
return resp
def get_download_to_descriptor(fd, url, max_retries=5):
"""Download an URL in chunks and saves it to a file descriptor (truncating it)
It doesn't close the descriptor, but flushes it on success.
It retries the download in case of ECONNRESET up to max_retries.
This function is meant to download big files directly to the disk without
caching the whole file in memory.
"""
if max_retries < 1:
max_retries = 1
wait = 2
for current_retry in range(1, max_retries+1):
try:
logger.info("Downloading %s Try %d/%d" % (url, current_retry, max_retries))
resp = urlopen(url)
# We may come here in a retry, ensure to truncate fd before start writing.
fd.seek(0)
fd.truncate(0)
while True:
chunk = resp.read(16*1024)
if not chunk:
break # Download finished
fd.write(chunk)
fd.flush()
# Success
return
except SocketError as e:
if current_retry < max_retries and e.errno == errno.ECONNRESET:
# Retry
logger.error("Connection reset by peer. Retrying after %ds..." % wait)
time.sleep(wait)
wait *= 2
else:
# Maximum retries or unknown error
raise
def rmtree(path):
# This works around two issues:
# 1. Cannot delete read-only files owned by us (e.g. files extracted from tarballs)
# 2. On Windows, we sometimes just need to retry in case the file handler
# hasn't been fully released (a common issue).
def handle_remove_readonly(func, path, exc):
excvalue = exc[1]
if func in (os.rmdir, os.remove, os.unlink) and excvalue.errno == errno.EACCES:
os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) # 0777
func(path)
else:
raise
return shutil.rmtree(path, onerror=handle_remove_readonly)
def sha256sum(file_path):
"""Computes the SHA256 hash sum of a file"""
from hashlib import sha256
hash = sha256()
with open(file_path, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b''):
hash.update(chunk)
return hash.hexdigest()