Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed parsing from beautifulsoup to lxml #232

Draft
wants to merge 11 commits into
base: master
Choose a base branch
from
Prev Previous commit
Next Next commit
added 'A13': 'Withdrawn' to docstatus
  • Loading branch information
jvanelteren committed Jan 5, 2023
commit f2aaa89499faaee142e9d7c8058bc9d0ca6e4be1
3 changes: 2 additions & 1 deletion entsoe/entsoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ def _base_request(self, params: Dict, start: pd.Timestamp,
if response.headers.get('content-type', '') == 'application/xml':
if 'No matching data found' in response.text:
raise NoMatchingDataError
print('response received')
return response

@staticmethod
Expand Down Expand Up @@ -1652,7 +1653,7 @@ def query_procured_balancing_capacity(
response = super(EntsoePandasClient, self).query_procured_balancing_capacity(
country_code=area, start=start, end=end,
process_type=process_type, type_marketagreement_type=type_marketagreement_type)
df = parse_procured_balancing_capacity(response.content, area.tz)
df = parse_procured_balancing_capacity(response, area.tz)
df = df.tz_convert(area.tz)
df = df.truncate(before=start, after=end)
return df
Expand Down
1 change: 1 addition & 0 deletions entsoe/mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def code(self):
'A02': 'Final',
'A05': 'Active',
'A09': 'Cancelled',
'A13': 'Withdrawn',
'X01': 'Estimated'}

BSNTYPE = {'A29': 'Already allocated capacity (AAC)',
Expand Down
18 changes: 10 additions & 8 deletions entsoe/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@


def find(element, tag):
print(tag)
return next(element.iter('{*}'+tag)).text

def findall(element, tag):
Expand Down Expand Up @@ -449,7 +450,7 @@ def gen_frames(archive):
for f in arc.infolist():
if f.filename.endswith('xml'):
#TODO this should generate bytes not xml text
frame = parse_imbalance_prices(xml_text=arc.read(f))
frame = parse_imbalance_prices(xml_bytes=arc.read(f))
yield frame

frames = gen_frames(zip_contents)
Expand Down Expand Up @@ -637,8 +638,8 @@ def _parse_generation_timeseries(element, per_plant: bool = False, include_eic:
series.index = _parse_datetimeindex(element)

# Check if there is a psrtype, if so, get it.
_psrtype = findall(element, 'psrType')
if _psrtype is not None:
_psrtype = list(findall(element, 'psrType'))
if _psrtype:
psrtype = find(element, 'psrType')
else:
psrtype = None
Expand Down Expand Up @@ -866,7 +867,7 @@ def _unavailability_gen_ts(element) -> list:

#TODO
HEADERS_UNAVAIL_TRANSM = ['created_doc_time',
'docstatus',
'docStatus',
'businesstype',
'in_domain',
'out_domain',
Expand Down Expand Up @@ -949,7 +950,7 @@ def _available_period(timeseries) -> list:
def _outage_parser(xml_file: bytes, headers, ts_func) -> pd.DataFrame:
# xml_text = xml_file.decode()
# soup = bs4.BeautifulSoup(xml_text, 'html.parser')
element = etree.iterparse(BytesIO(xml_file))
element = etree.parse(BytesIO(xml_file))



Expand All @@ -960,10 +961,11 @@ def _outage_parser(xml_file: bytes, headers, ts_func) -> pd.DataFrame:
creation_date = pd.Timestamp(find(element, 'createdDateTime'))
except AttributeError:
creation_date = ""

try:

value = list(findall(element, 'value'))
if value:
docstatus = DOCSTATUS[find(element, 'value')]
except AttributeError:
else:
docstatus = None
d = list()
series = _extract_timeseries(xml_file)
Expand Down