Commit bc962109 authored by xiang's avatar xiang
Browse files

Merge branch 'staging' into 'master'

Staging

See merge request ceotr/app_common!21
parents cceac27e 415b295d
ERRDAP dataset configuration editor
errdap_dataset_xml can help you edit ERRDAP configuration by Python easily!
usage:
dataset_xml_parser can help you read ERRDAP dataset xml file and return str
ERDDAPDatasetXMLEditor can help edit the ERRDAP datast xml
Errdap configuration contain 4 sections
those were header, database behaviour, dataset global attribute, variables attribute
change header:
set_header
get_header
database behaviour:
set_attr
get_all_attr
remove_attr(name)
dataset global attribute:
get_all_added_attr()
set_added_attr(name, text)
remove_added_attr(name)
variables attribute:
remove_data_variable(source_name)
edit_data_variable_destination_name(source_name, new_destination_name)
edit_data_variable_data_type(source_name, new_data_type)
set_data_variable_add_attribute(source_name, attr_name, new_attr_text)
remove_data_variable_add_attribute(source_name, attr_name)
\ No newline at end of file
from .dataset_xml_editor import ERDDAPDatasetXMLEditor
from .erddap_dataset_configure_parser import dataset_xml_parser
from xml.etree.ElementTree import (
fromstring,
ElementTree,
Element,
tostring
)
from .dataset_xml_editor_base import ERDDAPDatasetXMLEditorBase
class ERDDAPDatasetXMLEditor(ERDDAPDatasetXMLEditorBase):
def __init__(self, xml_str):
self._tree = ElementTree(fromstring(xml_str))
self._root = self._tree.getroot()
self._add_attr_index = None
def set_header(self, name, content):
self._set_header(self._root, name, content)
def get_header(self):
return self._get_header(self._root)
def get_all_attr(self):
_attr = {}
for child in self._root:
t = child.tag
te = child.text
if t != "addAttributes" and t != "dataVariable":
_attr[t] = te
return _attr
def set_attr(self, tag, text):
if type(text) is int:
text = str(text)
for child in self._root:
if child.tag == tag:
child.text = text
break
else:
new_element = Element(tag)
new_element.text = text
self._root.insert(0, new_element)
def get_all_added_attr(self):
_add_attr = {}
added_attr_element = self._get_added_attr_section()
for child in added_attr_element:
attrib = child.attrib
te = child.text
_add_attr[attrib["name"]] = te
return _add_attr
def _get_element_by_tag_generator(self, tag):
for index, child in enumerate(self._root):
if child.tag == tag:
yield index, child
def _get_added_attr_section(self):
added_attr_element = None
if self._add_attr_index is None:
for index, child in enumerate(self._root):
t = child.tag
if t == "addAttributes":
self._add_attr_index = index
self._add_attr_index = index
added_attr_element = child
else:
added_attr_element = self._root[self._add_attr_index]
return added_attr_element
def _added_new_element_to_attr_section(self, name, text, added_attr_element):
new_element = Element("att", attrib={'name': name})
new_element.text = text
new_element.tail = '\n\n '
added_attr_element.append(new_element)
def _text_update_for_added_attr(self, name, text, child):
UPDATED = True
attrs = child.attrib
if attrs["name"] == name:
child.text = text
return UPDATED
else:
return not UPDATED
def set_added_attr(self, name, text):
added_attr_element = self._get_added_attr_section()
if added_attr_element:
for child in added_attr_element:
if self._text_update_for_added_attr(name, text, child):
break
else:
self._added_new_element_to_attr_section(name, text, added_attr_element)
def remove_added_attr(self, name):
added_attr_element = self._get_added_attr_section()
for child in added_attr_element:
attrs = child.attrib
if attrs["name"] == name:
added_attr_element.remove(child)
break
def remove_attr(self, name):
attr = self._root.find(name)
self._root.remove(attr)
def write(self, path):
self._tree.write(path)
def to_string(self):
return tostring(self._root, encoding='unicode', method='xml')
def _find_data_variable_by_source_name(self, source_name):
for index, child in self._get_element_by_tag_generator("dataVariable"):
for sub in child:
if sub.tag == "sourceName":
if sub.text == source_name:
return index, child
else:
break
else:
return None, None
def remove_data_variable(self, source_name):
index, child = self._find_data_variable_by_source_name(source_name)
if child:
self._root.remove(child)
def edit_data_variable_destination_name(self, source_name, new_destination_name):
index, child = self._find_data_variable_by_source_name(source_name)
self.sub_element_text_change_by_tag(child, "destinationName", new_destination_name)
def edit_data_variable_data_type(self, source_name, new_data_type):
index, child = self._find_data_variable_by_source_name(source_name)
self.sub_element_text_change_by_tag(child, "dataType", new_data_type)
def set_data_variable_add_attribute(self, source_name, attr_name, new_attr_text):
index, child = self._find_data_variable_by_source_name(source_name)
add_attribute_element = self._get_data_variable_add_attributes(child)
self.set_add_attribute(add_attribute_element, attr_name, new_attr_text)
def remove_data_variable_add_attribute(self, source_name, attr_name):
index, child = self._find_data_variable_by_source_name(source_name)
add_attribute_element = self._get_data_variable_add_attributes(child)
for sub in add_attribute_element:
attrib_name = sub.attrib["name"]
if attrib_name == attr_name:
add_attribute_element.remove(sub)
break
from xml.etree.ElementTree import (
fromstring,
ElementTree,
Element,
tostring
)
class ERDDAPDatasetXMLEditorBase:
@staticmethod
def _get_data_variable_add_attributes(data_variable_element):
for sub in data_variable_element:
if sub.tag == "addAttributes":
return sub
@staticmethod
def create_new_att_element(name, text):
new_element = Element("att", attrib={'name': name})
new_element.text = text
new_element.tail = '\n\n '
return new_element
@staticmethod
def update_att_element_text(ele, name, text):
UPDATED = True
attrs = ele.attrib
if attrs["name"] == name:
ele.text = text
return UPDATED
else:
return not UPDATED
def set_add_attribute(self, ele, name, new_attr_text):
for sub in ele:
if self.update_att_element_text(sub, name, new_attr_text):
break
else:
new_element = self.create_new_att_element(name, new_attr_text)
ele.append(new_element)
@staticmethod
def _set_header(ele, name, content):
if type(content) is int:
content = str(content)
ele.set(name, content)
@staticmethod
def _get_header(ele):
return ele.attrib
@staticmethod
def sub_element_text_change_by_tag(ele, tag, new_text):
for sub in ele:
if sub.tag == tag:
sub.text = new_text
......@@ -5,22 +5,30 @@ DATASET_XML = 1
AFTER_DATASET_XML = 3
def dataset_xml_parser(file_path):
start_pattern = '<dataset type=.* datasetID=.* active=\"true\">'
def header_matcher(input):
start_pattern = '<dataset\s*((type=.*)|(datasetID=.*)|(active=(\"true\"|\"false\"))\s*){3}>'
return re.match(start_pattern, input)
def end_matcher(input):
end_pattern = "</dataset>"
return re.match(end_pattern, input)
def dataset_xml_parser(file_path):
with open(file_path, 'r') as f:
content = f.readlines()
section = 0
content_section = BEFORE_DATASET_XML
xml_content = ""
for c in content:
if section == 0:
if re.match(start_pattern, c):
section = DATASET_XML
if content_section == BEFORE_DATASET_XML:
if header_matcher(c):
content_section = DATASET_XML
if section == DATASET_XML:
if content_section == DATASET_XML:
xml_content = xml_content + "\n" + c
if re.match(end_pattern, c):
section = AFTER_DATASET_XML
if end_matcher(c):
content_section = AFTER_DATASET_XML
return xml_content
......@@ -5,3 +5,6 @@ This module provide file handler which and send command to local server or remot
from .file_system_manager import FileSystemManager
fsm = FileSystemManager()
# !!! not finish don't use it
raise NotImplementedError
<dataset active="true" datasetID="1557432033_2426023_0a86_8c8c_12d2" type="EDDTableFromNcFiles">
<reloadEveryNMinutes>40320</reloadEveryNMinutes>
<updateEveryNMillis>10000</updateEveryNMillis>
<fileDir>/path/to/files/</fileDir>
<fileNameRegex>.*\.nc</fileNameRegex>
<recursive>true</recursive>
<pathRegex>.*</pathRegex>
<metadataFrom>last</metadataFrom>
<preExtractRegex />
<postExtractRegex />
<extractRegex />
<columnNameForExtract />
<sortedColumnSourceName>time</sortedColumnSourceName>
<sortFilesBySourceNames>time</sortFilesBySourceNames>
<fileTableInMemory>false</fileTableInMemory>
<accessibleViaFiles>false</accessibleViaFiles>
<addAttributes>
<att name="_NCProperties">null</att>
<att name="cdm_data_type">TrajectoryProfile</att>
<att name="cdm_profile_variables">???</att>
<att name="cdm_trajectory_variables">???</att>
<att name="Conventions">CF-1.6, COARDS, ACDD-1.3</att>
<att name="creator_name">OTN</att>
<att name="creator_type">institution</att>
</addAttributes>
<dataVariable>
<sourceName>m_water_depth</sourceName>
<destinationName>something</destinationName>
<dataType>double</dataType>
<addAttributes>
<att name="_ChunkSizes">null</att>
<att name="coordinates">null</att>
<att name="resolution">null</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>sci_water_pressure</sourceName>
<destinationName>sci_water_pressure</destinationName>
<dataType>double</dataType>
<addAttributes>
<att name="_ChunkSizes">null</att>
<att name="colorBarMaximum" type="double">5000.0</att>
<att name="colorBarMinimum" type="double">0.0</att>
<att name="coordinates">null</att>
<att name="resolution">null</att>
</addAttributes>
</dataVariable>
</dataset>
\ No newline at end of file
<dataset active="true" datasetID="1557432033_2426023_0a86_8c8c_12d2" type="EDDTableFromNcFiles">
<reloadEveryNMinutes>40320</reloadEveryNMinutes>
<updateEveryNMillis>10000</updateEveryNMillis>
<fileDir>/path/to/files/</fileDir>
<fileNameRegex>.*\.nc</fileNameRegex>
<recursive>true</recursive>
<pathRegex>.*</pathRegex>
<metadataFrom>last</metadataFrom>
<preExtractRegex />
<postExtractRegex />
<extractRegex />
<columnNameForExtract />
<sortedColumnSourceName>time</sortedColumnSourceName>
<sortFilesBySourceNames>time</sortFilesBySourceNames>
<fileTableInMemory>false</fileTableInMemory>
<accessibleViaFiles>false</accessibleViaFiles>
<addAttributes>
<att name="_NCProperties">null</att>
<att name="cdm_data_type">TrajectoryProfile</att>
<att name="cdm_profile_variables">???</att>
<att name="cdm_trajectory_variables">???</att>
<att name="Conventions">CF-1.6, COARDS, ACDD-1.3</att>
<att name="creator_name">OTN</att>
<att name="creator_type">institution</att>
</addAttributes>
<dataVariable>
<sourceName>m_water_depth</sourceName>
<destinationName>m_water_depth</destinationName>
<dataType>int</dataType>
<addAttributes>
<att name="_ChunkSizes">null</att>
<att name="coordinates">null</att>
<att name="resolution">null</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>sci_water_pressure</sourceName>
<destinationName>sci_water_pressure</destinationName>
<dataType>double</dataType>
<addAttributes>
<att name="_ChunkSizes">null</att>
<att name="colorBarMaximum" type="double">5000.0</att>
<att name="colorBarMinimum" type="double">0.0</att>
<att name="coordinates">null</att>
<att name="resolution">null</att>
</addAttributes>
</dataVariable>
</dataset>
\ No newline at end of file
<dataset active="true" datasetID="1557432033_2426023_0a86_8c8c_12d2" type="EDDTableFromNcFiles">
<reloadEveryNMinutes>40320</reloadEveryNMinutes>
<updateEveryNMillis>10000</updateEveryNMillis>
<fileDir>/path/to/files/</fileDir>
<fileNameRegex>.*\.nc</fileNameRegex>
<recursive>true</recursive>
<pathRegex>.*</pathRegex>
<metadataFrom>last</metadataFrom>
<preExtractRegex />
<postExtractRegex />
<extractRegex />
<columnNameForExtract />
<sortedColumnSourceName>time</sortedColumnSourceName>
<sortFilesBySourceNames>time</sortFilesBySourceNames>
<fileTableInMemory>false</fileTableInMemory>
<accessibleViaFiles>false</accessibleViaFiles>
<addAttributes>
<att name="_NCProperties">null</att>
<att name="cdm_data_type">TrajectoryProfile</att>
<att name="cdm_profile_variables">example</att>
<att name="cdm_trajectory_variables">example</att>
<att name="Conventions">CF-1.6, COARDS, ACDD-1.3</att>
<att name="creator_type">institution</att>
<att name="test">example</att>
</addAttributes>
<dataVariable>
<sourceName>m_water_depth</sourceName>
<destinationName>m_water_depth</destinationName>
<dataType>double</dataType>
<addAttributes>
<att name="_ChunkSizes">null</att>
<att name="coordinates">null</att>
<att name="resolution">null</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>sci_water_pressure</sourceName>
<destinationName>sci_water_pressure</destinationName>
<dataType>double</dataType>
<addAttributes>
<att name="_ChunkSizes">null</att>
<att name="colorBarMaximum" type="double">5000.0</att>
<att name="colorBarMinimum" type="double">0.0</att>
<att name="coordinates">null</att>
<att name="resolution">null</att>
</addAttributes>
</dataVariable>
</dataset>
\ No newline at end of file
<dataset active="true" datasetID="1557432033_2426023_0a86_8c8c_12d2" type="EDDTableFromNcFiles">
<reloadEveryNMinutes>40320</reloadEveryNMinutes>
<updateEveryNMillis>10000</updateEveryNMillis>
<fileDir>/path/to/files/</fileDir>
<fileNameRegex>.*\.nc</fileNameRegex>
<recursive>true</recursive>
<pathRegex>.*</pathRegex>
<metadataFrom>last</metadataFrom>
<preExtractRegex />
<postExtractRegex />
<extractRegex />
<columnNameForExtract />
<sortedColumnSourceName>time</sortedColumnSourceName>
<sortFilesBySourceNames>time</sortFilesBySourceNames>
<fileTableInMemory>false</fileTableInMemory>
<accessibleViaFiles>false</accessibleViaFiles>
<addAttributes>
<att name="_NCProperties">null</att>
<att name="cdm_data_type">TrajectoryProfile</att>
<att name="cdm_profile_variables">???</att>
<att name="cdm_trajectory_variables">???</att>
<att name="Conventions">CF-1.6, COARDS, ACDD-1.3</att>
<att name="creator_name">OTN</att>
<att name="creator_type">institution</att>
</addAttributes>
<dataVariable>
<sourceName>m_water_depth</sourceName>
<destinationName>m_water_depth</destinationName>
<dataType>double</dataType>
<addAttributes>
<att name="coordinates">null</att>
<att name="resolution">null</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>sci_water_pressure</sourceName>
<destinationName>sci_water_pressure</destinationName>
<dataType>double</dataType>
<addAttributes>
<att name="_ChunkSizes">null</att>
<att name="colorBarMaximum" type="double">5000.0</att>
<att name="colorBarMinimum" type="double">0.0</att>
<att name="coordinates">null</att>
<att name="resolution">null</att>
</addAttributes>
</dataVariable>
</dataset>
\ No newline at end of file
<dataset active="true" datasetID="1557432033_2426023_0a86_8c8c_12d2" type="EDDTableFromNcFiles">
<reloadEveryNMinutes>40320</reloadEveryNMinutes>
<updateEveryNMillis>10000</updateEveryNMillis>
<fileDir>/path/to/files/</fileDir>
<fileNameRegex>.*\.nc</fileNameRegex>
<recursive>true</recursive>
<pathRegex>.*</pathRegex>
<metadataFrom>last</metadataFrom>
<preExtractRegex />
<postExtractRegex />
<extractRegex />
<columnNameForExtract />
<sortedColumnSourceName>time</sortedColumnSourceName>
<sortFilesBySourceNames>time</sortFilesBySourceNames>
<fileTableInMemory>false</fileTableInMemory>
<accessibleViaFiles>false</accessibleViaFiles>
<addAttributes>
<att name="_NCProperties">null</att>
<att name="cdm_data_type">TrajectoryProfile</att>
<att name="cdm_profile_variables">???</att>
<att name="cdm_trajectory_variables">???</att>
<att name="Conventions">CF-1.6, COARDS, ACDD-1.3</att>
<att name="creator_name">OTN</att>
<att name="creator_type">institution</att>
</addAttributes>
<dataVariable>
<sourceName>sci_water_pressure</sourceName>
<destinationName>sci_water_pressure</destinationName>
<dataType>double</dataType>
<addAttributes>
<att name="_ChunkSizes">null</att>
<att name="colorBarMaximum" type="double">5000.0</att>
<att name="colorBarMinimum" type="double">0.0</att>
<att name="coordinates">null</att>
<att name="resolution">null</att>
</addAttributes>
</dataVariable>
</dataset>
\ No newline at end of file
<dataset active="true" datasetID="1557432033_2426023_0a86_8c8c_12d2" type="EDDTableFromNcFiles">
<reloadEveryNMinutes>40320</reloadEveryNMinutes>
<updateEveryNMillis>10000</updateEveryNMillis>
<fileDir>/path/to/files/</fileDir>
<fileNameRegex>.*\.nc</fileNameRegex>
<recursive>true</recursive>
<pathRegex>.*</pathRegex>
<metadataFrom>last</metadataFrom>
<preExtractRegex />
<postExtractRegex />
<extractRegex />
<columnNameForExtract />
<sortedColumnSourceName>time</sortedColumnSourceName>
<sortFilesBySourceNames>time</sortFilesBySourceNames>
<fileTableInMemory>false</fileTableInMemory>
<accessibleViaFiles>false</accessibleViaFiles>
<addAttributes>
<att name="_NCProperties">null</att>
<att name="cdm_data_type">TrajectoryProfile</att>
<att name="cdm_profile_variables">???</att>
<att name="cdm_trajectory_variables">???</att>
<att name="Conventions">CF-1.6, COARDS, ACDD-1.3</att>
<att name="creator_name">OTN</att>
<att name="creator_type">institution</att>
</addAttributes>
<dataVariable>