Source code for sofar.update_conventions

import contextlib
import os
import shutil
import re
import glob
import json
import requests
from bs4 import BeautifulSoup
from tempfile import TemporaryDirectory


[docs] def update_conventions(conventions_path=None, assume_yes=False): """ Update SOFA conventions. SOFA convention define what data is stored in a SOFA file and how it is stored. Updating makes sure that sofar is using the latest conventions. This is done in three steps 1. Download official SOFA conventions as csv files from https://www.sofaconventions.org/conventions/ and https://www.sofaconventions.org/conventions/deprecated/. 2. Notify which conventions would be added or updated. 3. Convert csv files to json files to be read by sofar. The csv and json files are stored at sofar/conventions. Sofar works only on the json files. To get a list of all currently available SOFA conventions and their paths see :py:func:`~sofar.list_conventions`. .. note:: If the official convention contain errors, calling this function might break sofar. If this is the case sofar must be re-installed, e.g., by running ``pip install --force-reinstall sofar``. Be sure that you want to do this. Parameters ---------- conventions_path : str, optional Path to the folder where the conventions are saved. The default is ``None``, which saves the conventions inside the sofar package. Conventions saved under a different path can not be used by sofar. This parameter was added mostly for testing and debugging. assume_yes : bool, optional ``False`` Updating the conventions must be confirmed by typing "y". ``True`` The conventions are updated without confirmation. The default is ``False`` """ # url for parsing and downloading the convention files urls = ("https://www.sofaconventions.org/conventions/", "https://www.sofaconventions.org/conventions/deprecated/") ext = 'csv' print(f"Reading SOFA conventions from {urls[0]} ...") # get file names of conventions from sofaconventions.org page = requests.get(urls[0]).text soup = BeautifulSoup(page, 'html.parser') standardized = [os.path.split(node.get('href'))[1] for node in soup.find_all('a') if node.get('href').endswith(ext)] page = requests.get(urls[1]).text soup = BeautifulSoup(page, 'html.parser') deprecated = [os.path.split(node.get('href'))[1] for node in soup.find_all('a') if node.get('href').endswith(ext)] conventions = standardized + deprecated # directory handling if conventions_path is None: conventions_path = os.path.join( os.path.dirname(__file__), "sofa_conventions", "conventions") if not os.path.isdir(conventions_path): os.mkdir(conventions_path) if not os.path.isdir(os.path.join(conventions_path, "deprecated")): os.mkdir(os.path.join(conventions_path, "deprecated")) # Loop and download conventions to temporary directory if they changed updated = False update = [] deprecate = [] with TemporaryDirectory() as temp: os.mkdir(os.path.join(temp, 'deprecated')) for convention in conventions: # exclude these conventions if convention.startswith(("General_", "GeneralString_")): continue # get filename and url is_standardized = convention in standardized standardized_csv = os.path.join(conventions_path, convention) deprecated_csv = os.path.join( conventions_path, "deprecated", convention) url = ( f"{urls[0]}/{convention}" if is_standardized else f"{urls[1]}/{convention}" ) # download SOFA convention definitions to package directory data = requests.get(url) # remove windows style line breaks and trailing tabs data = data.content.replace(b"\r\n", b"\n").replace(b"\t\n", b"\n") # check if convention needs to be added or updated if is_standardized and not os.path.isfile(standardized_csv): # add new standardized convention updated = True with open(os.path.join(temp, convention), "wb") as file: file.write(data) print(f"- add convention: {convention[:-4]}") update.append(convention) if is_standardized and os.path.isfile(standardized_csv): # check for update of a standardized convention with open(standardized_csv, "rb") as file: data_current = b"".join(file.readlines()) data_current = data_current.replace( b"\r\n", b"\n").replace(b"\t\n", b"\n") if data_current != data: updated = True with open(os.path.join(temp, convention), "wb") as file: file.write(data) print(f"- update convention: {convention[:-4]}") update.append(convention) elif not is_standardized and os.path.isfile(standardized_csv): # deprecate standardized convention updated = True with open(os.path.join(temp, 'deprecated', convention), "wb") \ as file: file.write(data) print(f"- deprecate convention: {convention[:-4]}") deprecate.append(convention) elif not is_standardized and os.path.isfile(deprecated_csv): # check for update of a deprecated convention with open(deprecated_csv, "rb") as file: data_current = b"".join(file.readlines()) data_current = data_current.replace( b"\r\n", b"\n").replace(b"\t\n", b"\n") if data_current != data: updated = True with open(os.path.join(temp, 'deprecated', convention), "wb") as file: file.write(data) print(f"- update deprecated convention: {convention[:-4]}") update.append(os.path.join('deprecated', convention)) elif not is_standardized and not os.path.isfile(deprecated_csv): # add new deprecation updated = True with open(os.path.join(temp, 'deprecated', convention), "wb") \ as file: file.write(data) print(f"- add deprecated convention: {convention[:-4]}") update.append(os.path.join('deprecated', convention)) if updated and not assume_yes: # these lines were only tested manually. I was too lazy to write a # test coping with keyboard input print(("\nDo you want to update the conventions above? (y/n)\n" "Read the documentation before continuing. " "If updating breaks sofar it has to be re-installed")) response = input() if response != "y": print("\nUpdating the conventions was canceled.") return if updated: for convention in update: shutil.copy(os.path.join(temp, convention), os.path.join(conventions_path, convention)) for convention in deprecate: os.remove(os.path.join(conventions_path, convention)) os.remove( os.path.join(conventions_path, f"{convention[:-3]}json")) shutil.copy( os.path.join(temp, 'deprecated', convention), os.path.join(conventions_path, 'deprecated', convention)) # compile json files from csv file _compile_conventions(conventions_path) print("... done.") else: print("... conventions already up to date.")
def _compile_conventions(conventions_path=None): """ Compile SOFA conventions (json files) from source conventions (csv files from SOFA SOFAtoolbox), i.e., only do step 2 from `update_conventions`. This is a helper function for debugging and developing and might break sofar. Parameters ---------- conventions_path : str Path to the `conventions`folder containing csv and json files. The default ``None`` uses the default location inside the sofar package. """ # directory handling if conventions_path is None: conventions_path = os.path.join( os.path.dirname(__file__), "sofa_conventions", "conventions") if not os.path.isdir(conventions_path): raise ValueError(f"{conventions_path} does not exist") # get list of source conventions csv_files = glob.glob(os.path.join(conventions_path, "*.csv")) + \ glob.glob(os.path.join(conventions_path, "deprecated", "*.csv")) for csv_file in csv_files: # convert SOFA conventions from csv to json convention_dict = _convention_csv2dict(csv_file) with open(f"{csv_file[:-3]}json", 'w') as file: json.dump(convention_dict, file, indent=4) def _convention_csv2dict(file: str): """ Read a SOFA convention as csv file from the official Matlab/Octave API for SOFA (SOFAtoolbox) and convert them to a Python dictionary. The dictionary can be written for example to a json file using import json with open(filename, 'w') as file: json.dump(dictionary, file, indent=4) Parameters ---------- file : str filename of the SOFA convention Returns ------- convention : dict SOFA convention as nested dictionary. Each attribute is a sub dictionary with the keys `default`, `flags`, `dimensions`, `type`, and `comment`. """ # read the file # (encoding should be changed to utf-8 after the SOFA conventions repo is # clean.) # TODO: add explicit test for this function that checks the output with open(file, 'r', encoding="windows-1252") as fid: lines = fid.readlines() # write into dict convention = {} for idl, line in enumerate(lines): try: # separate by tabs line = line.strip().split("\t") # parse the line entry by entry for idc, cell in enumerate(line): # detect empty cells and leading trailing white spaces cell = None if cell.replace(' ', '') == '' else cell.strip() # nothing to do for empty cells if cell is None: line[idc] = cell continue # parse text cells that do not contain arrays if cell[0] != '[': # check for numbers with contextlib.suppress(ValueError): cell = float(cell) if '.' in cell else int(cell) line[idc] = cell continue # parse array cell # remove brackets cell = cell[1:-1] if ';' not in cell: # get rid of white spaces cell = cell.strip() cell = cell.replace(' ', ',') cell = cell.replace(' ', '') # create flat list of integers and floats numbers = cell.split(',') cell = [float(n) if '.' in n else int(n) for n in numbers] else: # create a nested list of integers and floats # separate multidimensional arrays cell = cell.split(';') cell_nd = [None] * len(cell) for idx, cc in enumerate(cell): # get rid of white spaces cc = cc.strip() cc = cc.replace(' ', ',') cc = cc.replace(' ', '') numbers = cc.split(',') cell_nd[idx] = [float(n) if '.' in n else int(n) for n in numbers] cell = cell_nd # write parsed cell to line line[idc] = cell # first line contains field names if idl == 0: fields = line[1:] continue # add blank comment if it does not exist if len(line) == 5: line.append("") # convert empty defaults from None to "" if line[1] is None: line[1] = "" # make sure some unusual default values are converted for json if line[1] == "permute([0 0 0 1 0 0; 0 0 0 1 0 0], [3 1 2]);": # Field Data.SOS in SimpleFreeFieldHRSOS and SimpleFreeFieldSOS line[1] = [[[0, 0, 0, 1, 0, 0], [0, 0, 0, 1, 0, 0]]] if line[1] == "permute([0 0 0 1 0 0], [3 1 2]);": # Field Data.SOS in GeneralSOS line[1] = [[[0, 0, 0, 1, 0, 0]]] if line[1] == "{''}": line[1] = [''] # convert versions to strings if "Version" in line[0] and not isinstance(line[1], str): line[1] = str(float(line[1])) # write second to last line convention[line[0]] = {} for ff, field in enumerate(fields): convention[line[0]][field.lower()] = line[ff + 1] except: # noqa raise ValueError((f"Failed to parse line {idl}, entry {idc} in: " f"{file}: \n{line}\n")) # reorder the fields to be nicer to read and understand # 1. Move everything to the end that is not GLOBAL keys = list(convention.keys()) for key in keys: if "GLOBAL" not in key: convention[key] = convention.pop(key) # 1. Move Data entries to the end for key in keys: if key.startswith("Data"): convention[key] = convention.pop(key) return convention def _check_congruency(save_dir=None, branch="master"): """ SOFA conventions are stored in two different places - is this a good idea? They should be identical, but let's find out. Prints warnings about incongruent conventions Parameters ---------- save : str directory to save diverging conventions for further inspections """ # urls for checking which conventions exist urls_check = ["https://www.sofaconventions.org/conventions/", ("https://github.com/sofacoustics/SOFAtoolbox/tree/" f"{branch}/SOFAtoolbox/conventions/")] # urls for loading the convention files urls_load = ["https://www.sofaconventions.org/conventions/", ("https://raw.githubusercontent.com/sofacoustics/SOFAtoolbox/" f"{branch}/SOFAtoolbox/conventions/")] subdirs = ["sofaconventions", "sofatoolbox"] # check save_dir if save_dir is not None: if not os.path.isdir(save_dir): raise ValueError(f"{save_dir} does not exist") for subdir in subdirs: if not os.path.isdir(os.path.join(save_dir, subdir)): os.makedirs(os.path.join(save_dir, subdir)) # get file names of conventions from sofaconventions.org url = urls_check[0] page = requests.get(url).text soup = BeautifulSoup(page, 'html.parser') sofaconventions = [os.path.split(node.get('href'))[1] for node in soup.find_all('a') if node.get('href').endswith(".csv")] if not sofaconventions: raise ValueError(f"Did not find any conventions at {url}") # get file names of conventions from github url = urls_check[1] page = requests.get(url).text sofatoolbox = re.findall( r'"SOFAtoolbox/conventions/([^"]+\.csv)"', page) if not sofatoolbox: raise ValueError(f"Did not find any conventions at {url}") # check if lists are identical. Remove items not contained in both lists report = "" for convention in sofaconventions: if convention.startswith(("General_", "GeneralString_")): sofaconventions.remove(convention) elif convention not in sofatoolbox: sofaconventions.remove(convention) report += (f"- {convention} is missing in SOFAtoolbox\n") for convention in sofatoolbox: if convention.startswith(("General_", "GeneralString_")): sofatoolbox.remove(convention) elif convention not in sofaconventions: sofatoolbox.remove(convention) report += (f"- {convention} is missing on sofaconventions.org\n") # Loop and download conventions to check if they are identical for convention in sofaconventions: # download SOFA convention definitions to package directory data = [requests.get(url + convention) for url in urls_load] # remove trailing tabs and windows style line breaks data = [d.content.replace(b"\r\n", b"\n").replace(b"\t\n", b"\n") for d in data] # check for equality if data[0] != data[1]: report += f"- {convention} differs across platforms\n" # save diverging files if save_dir is not None: for subdir, d in zip(subdirs, data): filename = os.path.join(save_dir, subdir, convention) with open(filename, "wb") as file: file.write(d) if report: print("Diverging conventions across platforms:\n" + report)