Source code for snowex_db.point_metadata
import logging
from typing import Tuple, Union
from insitupy.campaigns.snowex.snowex_metadata import SnowExMetaDataParser
from insitupy.profiles.metadata import ProfileMetaData
LOG = logging.getLogger()
[docs]
class PointSnowExMetadataParser(SnowExMetaDataParser):
"""
Extend the parser to update the extended variables
"""
[docs]
def find_header_info(self, filename):
"""
Read in all site details file for a pit If the filename has the word
site in it then we read everything in the file. Otherwise, we use this
to read all the site data up to the header of the profile.
E.g. Read all commented data until we see a column descriptor.
Args:
filename: Path to a csv containing # leading lines with site details
Returns:
tuple: **data** - Dictionary containing site details
**columns** - List of clean column names
**header_pos** - Index of the columns header for skiprows in
read_csv
"""
filename = str(filename)
with open(filename, encoding='latin') as fp:
lines = fp.readlines()
fp.close()
header_pos, header_indicator = self._find_header_position(lines)
strip_out = ["(hh:mm, local, MST)"]
header_line = lines[header_pos]
# Strip out unhelpful string segments that break parsing
for so in strip_out:
header_line = header_line.replace(so, "")
# identify columns, map columns, and units map
columns, columns_map, units_map = self._parse_columns(header_line)
# Combine with user defined units map
self._units_map = {**self._units_map, **units_map}
LOG.debug(
f'Column Data found to be {len(columns)} columns based on'
f' Line {header_pos}'
)
# Only parse what we know if the header
lines = lines[0:header_pos]
final_lines = lines
# Clean up the lines from line returns to grab header info
final_lines = [ln.strip() for ln in final_lines]
# Join all data and split on header separator
# This handles combining split lines
str_data = " ".join(final_lines).split('#')
str_data = [ln.strip() for ln in str_data if ln]
return str_data, columns, columns_map, header_pos
[docs]
def parse(self, filename: str) -> (
Tuple)[Union[ProfileMetaData | None], list, dict, int]:
"""
Parse the file and return a metadata object.
We can override these methods as needed to parse the different
metadata
This populates self.rough_obj
Args:
filename: (str) Full path to the file with the header info to parse
Returns:
Tuple:
metadata object or None, column list, position of header in file
"""
(
meta_lines, columns, columns_map, header_position
) = self.find_header_info(filename)
self._rough_obj = self._preparse_meta(meta_lines)
# We do not have header metadata for point files
if not self.rough_obj:
LOG.debug(
"No metadata found in the file header, "
"using default no extra metadata"
)
metadata = None
else:
# In the case we have metadata (like for a perimeter file)
LOG.debug(
f"Metadata found in the file header: {self.rough_obj}"
)
metadata = ProfileMetaData(
site_name=self.parse_id(),
date_time=self.parse_date_time(),
latitude=self.parse_latitude(),
longitude=self.parse_longitude(),
utm_epsg=str(self.parse_utm_epsg()),
campaign_name=self.parse_campaign_name(),
flags=self.parse_flags(),
observers=self.parse_observers()
)
return metadata, columns, columns_map, header_position