Source code for snowex_db.utilities

"""
Module for storing misc. type functions that don't warrant a separate module
but to provide some use in the code set.
"""

import datetime
import logging
from os import walk
from os.path import getctime, join
import re
from pathlib import Path
import coloredlogs

state_tz_map = {'US/Pacific': ['CA', 'NV', 'WA'],
                'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'],
          }


[docs] def get_logger(name, debug=True, ext_logger=None): """ Retrieve a colored logs logger and assign a custom name to it. Args: name: Name of the loggger debug: Boolean for where to show debug statements ext_logger: Recieves a logger object and installs colored logs to it. Returns: log: Logger object with colored logs installed """ fmt = fmt = '%(name)s %(levelname)s %(message)s' if ext_logger is None: log = logging.getLogger(name) else: log = ext_logger if debug: level = 'DEBUG' else: level = 'INFO' coloredlogs.install(fmt=fmt, level=level, logger=log) return log
[docs] def read_n_lines(f, nlines): """ Opens and reads nlines from a file to avoid reading an entire file. Useful for reading headers Args: f: filename to open nlines: number of lines to read in Returns: lines: list of lines from file nlines long """ lines = [] with open(f, 'r') as fp: for i, line in enumerate(fp): if i < nlines: lines.append(line) else: break fp.close() return lines
[docs] def find_files(directory, ext, pattern): """ Finds filesnames using the extension and a substring pattern Args: directory: Directory to search ext: File extension to search for pattern: Substring to search for in the file basename """ files = [] for r, ds, fs in walk(directory): for f in fs: if f.split('.')[-1] == ext and pattern in f: files.append(join(r, f)) return files
[docs] def find_kw_in_lines(kw, lines, addon_str=' = '): """ Returns the index of a list of strings that had a kw in it Args: kw: Keyword to find in a line lines: List of strings to search for the keyword addon_str: String to append to your key word to help filter Return: i: Integer of the index of a line containing a kw. -1 otherwise """ str_temp = '{}' + addon_str for i, line in enumerate(lines): s = str_temp.format(kw) uncommented = line.strip('#') if s in uncommented: if s[0] == uncommented[0]: break # No match if i == len(lines) - 1: i = -1 return i
[docs] def assign_default_kwargs(object, kwargs, defaults, leave=[]): """ Assign keyword arguments to class attributes. If a key in the default is not in the kwargs then its value becomes the value in the default. Any value found in the defaults is removed from the kwargs Args: object: Object to assign as keys in defaults as attributes kwargs: Dictionary of keyword arguments provided defaults: Dictionary of all class related arguments that are assigned as attributes leave: List of attributes to leave in mod_kwargs Returns: mod_kwargs: kwargs with all keys in the defaults removed from it. """ mod_kwargs = kwargs.copy() # Loop over all the defaults for k, v in defaults.items(): # if the k was provided then use it and remove it from the kwargs if k in kwargs.keys(): value = kwargs[k] # Delete it so kwargs could be passed on for other use unless its # requested to be left if k not in leave: del mod_kwargs[k] else: # Make sure we have a value assigned from the defaults value = v # Assign it as a class attribute setattr(object, k, value) return mod_kwargs
[docs] def get_file_creation_date(file): """ Returns the files creation date as a datetime object. Useful for assuming a date accessed of data for NSIDC citation Args: file: Returns: result: A datetime object of when the file was created according to the system """ result = datetime.datetime.fromtimestamp(getctime(file)).date() return result
[docs] def get_site_id_from_filename(filename: str, regex: str) -> str: """ Get the site ID based on the site code in the filename from the pit files """ compiled = re.compile(regex) match = compiled.match(Path(filename).name) if match: code = match.group(1) return code else: raise RuntimeError(f"No site ID found for {filename}")
[docs] def get_timezone_from_site_id(site_id: str) -> str: """ Get the timezone based on the site id """ abbrev = site_id[0:2] tz = [k for k, states in state_tz_map.items() if abbrev in states][0] return tz