From 528c37ea9c8fb538172b2f76e86ce8d3ccbdf4ec Mon Sep 17 00:00:00 2001 From: Matt Swain Date: Tue, 1 Nov 2016 21:09:25 +0000 Subject: [PATCH] Fix absolute reference links with anchor --- chemdataextractor/reader/markup.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/chemdataextractor/reader/markup.py b/chemdataextractor/reader/markup.py index 49c28f5..b92b103 100644 --- a/chemdataextractor/reader/markup.py +++ b/chemdataextractor/reader/markup.py @@ -164,9 +164,8 @@ def _parse_table_footnotes(self, fns, refs, specials): def _parse_reference(self, el): """Return reference ID from href or text content.""" - if el.get('href', '').startswith('#'): - # TODO: Get anchor from href with absolute url, e.g. http://pubs.acs.org/doi/full/10.1021/acs.jmedchem.6b01195#cor1 - return [el.get('href')[1:]] + if '#' in el.get('href', ''): + return [el.get('href').split('#', 1)[1]] elif 'rid' in el.attrib: return [el.attrib['rid']] elif 'idref' in el.attrib: