Skip to content

Fix for Issue #724 #1080

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions rdflib/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ def add(self, triple):
assert isinstance(o, Node), "Object %s must be an rdflib term" % (o,)
self.__store.add((s, p, o), self, quoted=False)


def addN(self, quads):
"""Add a sequence of triple with context"""

Expand Down Expand Up @@ -989,6 +990,7 @@ def parse(
location=None,
file=None,
data=None,
ignore_errors=False,
**args
):
"""
Expand Down Expand Up @@ -1055,7 +1057,7 @@ def parse(
>>> os.remove(file_name)

"""

errorFlag = ignore_errors
source = create_input_source(
source=source,
publicID=publicID,
Expand All @@ -1072,14 +1074,15 @@ def parse(
format = "application/rdf+xml"
parser = plugin.get(format, Parser)()
try:
parser.parse(source, self, **args)
parser.parse(source, self, ignore_errors=errorFlag,**args )
finally:
if source.auto_close:
source.close()
return self

def load(self, source, publicID=None, format="xml"):
self.parse(source, publicID, format)
def load(self, source, publicID=None, format="xml" , ignore_errors=False):
errorFlag = ignore_errors
self.parse(source, publicID, format , ignore_errors =errorFlag)

def query(
self,
Expand Down Expand Up @@ -1516,6 +1519,7 @@ def parse(
location=None,
file=None,
data=None,
ignore_errors = False,
**args
):
"""
Expand All @@ -1529,7 +1533,7 @@ def parse(
The graph into which the source was parsed. In the case of n3
it returns the root context.
"""

errorFlag = ignore_errors
source = create_input_source(
source=source,
publicID=publicID,
Expand All @@ -1545,7 +1549,7 @@ def parse(

context = Graph(store=self.store, identifier=g_id)
context.remove((None, None, None)) # hmm ?
context.parse(source, publicID=publicID, format=format, **args)
context.parse(source, publicID=publicID, format=format, ignore_errors=errorFlag,**args)
return context

def __reduce__(self):
Expand Down Expand Up @@ -1700,10 +1704,12 @@ def parse(
location=None,
file=None,
data=None,
ignore_errors=False,
**args
):
errorFlag = ignore_errors
c = ConjunctiveGraph.parse(
self, source, publicID, format, location, file, data, **args
self, source, publicID, format, location, file, data, ignore_errors=errorFlag,**args
)
self.graph(c)
return c
Expand Down Expand Up @@ -1762,7 +1768,7 @@ def add(self, triple):

def addN(self, quads):
"""Add a sequence of triple with context"""

self.store.addN(
(s, p, o, c)
for s, p, o, c in quads
Expand Down Expand Up @@ -1998,7 +2004,7 @@ def namespaces(self):
def absolutize(self, uri, defrag=1):
raise UnSupportedAggregateOperation()

def parse(self, source, publicID=None, format="xml", **args):
def parse(self, source, publicID=None, format="xml", ignore_errors=False,**args):
raise ModificationException()

def n3(self):
Expand Down
2 changes: 1 addition & 1 deletion rdflib/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class Parser(object):
def __init__(self):
pass

def parse(self, source, sink):
def parse(self, source, sink , ignore_errors=False):
pass


Expand Down
26 changes: 18 additions & 8 deletions rdflib/plugins/parsers/notation3.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@

from uuid import uuid4

import logging
log = logging.getLogger(__name__) #For logging the statements that could not be parsed


from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id
from rdflib.graph import QuotedGraph, ConjunctiveGraph, Graph
from rdflib.compat import long_type
Expand Down Expand Up @@ -351,6 +355,7 @@ def __init__(
genPrefix="",
why=None,
turtle=False,
ignore_errors=False,
):
""" note: namespace names should *not* end in # ;
the # will get added during qname processing """
Expand All @@ -363,7 +368,7 @@ def __init__(
self._store = store
if genPrefix:
store.setGenPrefix(genPrefix) # pass it on

self.errorFlag = ignore_errors
self._thisDoc = thisDoc
self.lines = 0 # for error handling
self.startOfLine = 0 # For calculating character number
Expand Down Expand Up @@ -1637,7 +1642,11 @@ def UEscape(self, argstr, i, startline):
return self._unicodeEscape(argstr, i, startline, unicodeEscape8, 8, "U")

def BadSyntax(self, argstr, i, msg):
raise BadSyntax(self._thisDoc, self.lines, argstr, i, msg)
if self.errorFlag:
log.info("Bad Synatx at line %i: %s" %(self.lines+1 , msg))
else:
raise BadSyntax(self._thisDoc, self.lines, argstr, i, msg)



# If we are going to do operators then they should generate
Expand Down Expand Up @@ -1884,8 +1893,8 @@ class TurtleParser(Parser):
def __init__(self):
pass

def parse(self, source, graph, encoding="utf-8", turtle=True):

def parse(self, source, graph, encoding="utf-8", turtle=True , ignore_errors=False):
errorFlag = ignore_errors
if encoding not in [None, "utf-8"]:
raise Exception(
("N3/Turtle files are always utf-8 encoded, ", "I was passed: %s")
Expand All @@ -1895,7 +1904,7 @@ def parse(self, source, graph, encoding="utf-8", turtle=True):
sink = RDFSink(graph)

baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "")
p = SinkParser(sink, baseURI=baseURI, turtle=turtle)
p = SinkParser(sink, baseURI=baseURI, turtle=turtle , ignore_errors=errorFlag)

p.loadStream(source.getByteStream())

Expand All @@ -1915,7 +1924,8 @@ class N3Parser(TurtleParser):
def __init__(self):
pass

def parse(self, source, graph, encoding="utf-8"):
def parse(self, source, graph, encoding="utf-8" , ignore_errors = False):
errorFlag=ignore_errors
# we're currently being handed a Graph, not a ConjunctiveGraph
assert graph.store.context_aware # is this implied by formula_aware
assert graph.store.formula_aware
Expand All @@ -1926,7 +1936,7 @@ def parse(self, source, graph, encoding="utf-8"):
# TODO: update N3Processor so that it can use conj_graph as the sink
conj_graph.namespace_manager = graph.namespace_manager

TurtleParser.parse(self, source, conj_graph, encoding, turtle=False)
TurtleParser.parse(self, source, conj_graph, encoding, turtle=False , ignore_errors=errorFlag)


def _test(): # pragma: no cover
Expand All @@ -1945,7 +1955,7 @@ def main(): # pragma: no cover
sink = RDFSink(g)
base_uri = "file://" + os.path.join(os.getcwd(), sys.argv[1])

p = SinkParser(sink, baseURI=base_uri)
p = SinkParser(sink, baseURI=base_uri , ignore_errors=False)
p._bindings[""] = p._baseURI + "#"
p.startDoc()

Expand Down
11 changes: 9 additions & 2 deletions rdflib/plugins/parsers/nquads.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,14 @@

__all__ = ["NQuadsParser"]

import logging
log = logging.getLogger(__name__) #For logging the statements that could not be parsed


class NQuadsParser(NTriplesParser):
def parse(self, inputsource, sink, **kwargs):
def parse(self, inputsource, sink, ignore_errors=False , **kwargs ):
"""Parse f as an N-Triples file."""
errorFlag=ignore_errors
assert sink.store.context_aware, (
"NQuadsParser must be given" " a context aware store."
)
Expand All @@ -63,7 +67,10 @@ def parse(self, inputsource, sink, **kwargs):
try:
self.parseline()
except ParseError as msg:
raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
if not errorFlag:
raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
else:
log.info("Invalid line (%s):\n%r" % (msg, __line))

return self.sink

Expand Down
5 changes: 3 additions & 2 deletions rdflib/plugins/parsers/nt.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ class NTParser(Parser):
def __init__(self):
super(NTParser, self).__init__()

def parse(self, source, sink, baseURI=None):
def parse(self, source, sink, baseURI=None , ignore_errors=False):
errorFlag = ignore_errors
f = source.getByteStream() # TODO getCharacterStream?
parser = NTriplesParser(NTSink(sink))
parser.parse(f)
parser.parse(f , ignore_errors=errorFlag)
f.close()
18 changes: 14 additions & 4 deletions rdflib/plugins/parsers/ntriples.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
import re
import codecs

import logging
log = logging.getLogger(__name__) #For logging the statements that could not be parsed


from rdflib.term import URIRef as URI
from rdflib.term import BNode as bNode
from rdflib.term import Literal
Expand Down Expand Up @@ -130,8 +134,9 @@ def __init__(self, sink=None):
else:
self.sink = Sink()

def parse(self, f):
def parse(self, f , ignore_errors=False):
"""Parse f as an N-Triples file."""
errorFlag = ignore_errors
if not hasattr(f, "read"):
raise ParseError("Item to parse must be a file-like object.")

Expand All @@ -147,17 +152,22 @@ def parse(self, f):
try:
self.parseline()
except ParseError:
raise ParseError("Invalid line: %r" % self.line)
if not errorFlag:
raise ParseError("Invalid line: %r" % self.line)
else :
log.info("Invalid line: %r" % self.line)
continue
return self.sink

def parsestring(self, s):
def parsestring(self, s , ignore_errors=False):
"""Parse s as an N-Triples string."""
errorFlag=ignore_errors
if not isinstance(s, str):
raise ParseError("Item to parse must be a string instance.")
f = BytesIO()
f.write(cast_bytes(s))
f.seek(0)
self.parse(f)
self.parse(f , ignore_errors=errorFlag)

def readline(self):
"""Read an N-Triples line from buffered input."""
Expand Down
21 changes: 14 additions & 7 deletions rdflib/plugins/parsers/rdfxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from xml.sax import make_parser
from xml.sax.handler import ErrorHandler
from xml.sax.saxutils import handler, quoteattr, escape
import logging
log = logging.getLogger(__name__) #For logging the statements that could not be parsed


from urllib.parse import urldefrag, urljoin
Expand Down Expand Up @@ -115,9 +117,10 @@ def next_li(self):


class RDFXMLHandler(handler.ContentHandler):
def __init__(self, store):
def __init__(self, store , ignore_errors=False):
self.store = store
self.preserve_bnode_ids = False
self.errorFlag = ignore_errors
self.reset()

def reset(self):
Expand Down Expand Up @@ -208,8 +211,10 @@ def error(self, message):
locator.getLineNumber(),
locator.getColumnNumber(),
)
raise ParserError(info + message)

if not self.errorFlag:
raise ParserError(info + message)
else:
log.info(info + message)
def get_current(self):
return self.stack[-2]

Expand Down Expand Up @@ -561,7 +566,8 @@ def literal_element_end(self, name, qname):
self.parent.object += self.current.object + end


def create_parser(target, store):
def create_parser(target, store , ignore_errors=False):
errorFlag = ignore_errors
parser = make_parser()
try:
# Workaround for bug in expatreader.py. Needed when
Expand All @@ -570,7 +576,7 @@ def create_parser(target, store):
except AttributeError:
pass # Not present in Jython (at least)
parser.setFeature(handler.feature_namespaces, 1)
rdfxml = RDFXMLHandler(store)
rdfxml = RDFXMLHandler(store , ignore_errors = errorFlag)
rdfxml.setDocumentLocator(target)
# rdfxml.setDocumentLocator(_Locator(self.url, self.parser))
parser.setContentHandler(rdfxml)
Expand All @@ -582,8 +588,9 @@ class RDFXMLParser(Parser):
def __init__(self):
pass

def parse(self, source, sink, **args):
self._parser = create_parser(source, sink)
def parse(self, source, sink , ignore_errors=False , **args):
errorFlag = ignore_errors
self._parser = create_parser(source, sink , ignore_errors=errorFlag)
content_handler = self._parser.getContentHandler()
preserve_bnode_ids = args.get("preserve_bnode_ids", None)
if preserve_bnode_ids is not None:
Expand Down
6 changes: 3 additions & 3 deletions rdflib/plugins/parsers/trig.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ class TrigParser(Parser):
def __init__(self):
pass

def parse(self, source, graph, encoding="utf-8"):

def parse(self, source, graph, encoding="utf-8" , ignore_errors=False):
errorFlag = ignore_errors
if encoding not in [None, "utf-8"]:
raise Exception(
("TriG files are always utf-8 encoded, ", "I was passed: %s") % encoding
Expand All @@ -151,7 +151,7 @@ def parse(self, source, graph, encoding="utf-8"):
baseURI = conj_graph.absolutize(
source.getPublicId() or source.getSystemId() or ""
)
p = TrigSinkParser(sink, baseURI=baseURI, turtle=True)
p = TrigSinkParser(sink, baseURI=baseURI, turtle=True , ignore_errors=errorFlag)

p.loadStream(source.getByteStream())

Expand Down
Loading