Source code for snowex_db.utilities
"""
Module for storing misc. type functions that don't warrant a separate module
but to provide some use in the code set.
"""
import datetime
import logging
from os import walk
from os.path import getctime, join
import re
from pathlib import Path
import coloredlogs
state_tz_map = {'US/Pacific': ['CA', 'NV', 'WA'],
'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'],
}
[docs]
def get_logger(name, debug=True, ext_logger=None):
"""
Retrieve a colored logs logger and assign a custom name to it.
Args:
name: Name of the loggger
debug: Boolean for where to show debug statements
ext_logger: Recieves a logger object and installs colored logs to it.
Returns:
log: Logger object with colored logs installed
"""
fmt = fmt = '%(name)s %(levelname)s %(message)s'
if ext_logger is None:
log = logging.getLogger(name)
else:
log = ext_logger
if debug:
level = 'DEBUG'
else:
level = 'INFO'
coloredlogs.install(fmt=fmt, level=level, logger=log)
return log
[docs]
def read_n_lines(f, nlines):
"""
Opens and reads nlines from a file to avoid reading an entire file.
Useful for reading headers
Args:
f: filename to open
nlines: number of lines to read in
Returns:
lines: list of lines from file nlines long
"""
lines = []
with open(f, 'r') as fp:
for i, line in enumerate(fp):
if i < nlines:
lines.append(line)
else:
break
fp.close()
return lines
[docs]
def find_files(directory, ext, pattern):
"""
Finds filesnames using the extension and a substring pattern
Args:
directory: Directory to search
ext: File extension to search for
pattern: Substring to search for in the file basename
"""
files = []
for r, ds, fs in walk(directory):
for f in fs:
if f.split('.')[-1] == ext and pattern in f:
files.append(join(r, f))
return files
[docs]
def find_kw_in_lines(kw, lines, addon_str=' = '):
"""
Returns the index of a list of strings that had a kw in it
Args:
kw: Keyword to find in a line
lines: List of strings to search for the keyword
addon_str: String to append to your key word to help filter
Return:
i: Integer of the index of a line containing a kw. -1 otherwise
"""
str_temp = '{}' + addon_str
for i, line in enumerate(lines):
s = str_temp.format(kw)
uncommented = line.strip('#')
if s in uncommented:
if s[0] == uncommented[0]:
break
# No match
if i == len(lines) - 1:
i = -1
return i
[docs]
def assign_default_kwargs(object, kwargs, defaults, leave=[]):
"""
Assign keyword arguments to class attributes. If a key in the default
is not in the kwargs then its value becomes the value in the default.
Any value found in the defaults is removed from the kwargs
Args:
object: Object to assign as keys in defaults as attributes
kwargs: Dictionary of keyword arguments provided
defaults: Dictionary of all class related arguments that are assigned as attributes
leave: List of attributes to leave in mod_kwargs
Returns:
mod_kwargs: kwargs with all keys in the defaults removed from it.
"""
mod_kwargs = kwargs.copy()
# Loop over all the defaults
for k, v in defaults.items():
# if the k was provided then use it and remove it from the kwargs
if k in kwargs.keys():
value = kwargs[k]
# Delete it so kwargs could be passed on for other use unless its
# requested to be left
if k not in leave:
del mod_kwargs[k]
else:
# Make sure we have a value assigned from the defaults
value = v
# Assign it as a class attribute
setattr(object, k, value)
return mod_kwargs
[docs]
def get_file_creation_date(file):
"""
Returns the files creation date as a datetime object. Useful for assuming
a date accessed of data for NSIDC citation
Args:
file:
Returns:
result: A datetime object of when the file was created according to
the system
"""
result = datetime.datetime.fromtimestamp(getctime(file)).date()
return result
[docs]
def get_site_id_from_filename(filename: str, regex: str) -> str:
"""
Get the site ID based on the site code in the filename from the pit files
"""
compiled = re.compile(regex)
match = compiled.match(Path(filename).name)
if match:
code = match.group(1)
return code
else:
raise RuntimeError(f"No site ID found for {filename}")
[docs]
def get_timezone_from_site_id(site_id: str) -> str:
"""
Get the timezone based on the site id
"""
abbrev = site_id[0:2]
tz = [k for k, states in state_tz_map.items() if abbrev in states][0]
return tz