Skip to content

Commit

Permalink
#8 Work in progress:
Browse files Browse the repository at this point in the history
-Check XML schemas during parsing
  • Loading branch information
FABallemand committed Aug 21, 2023
1 parent e9d5371 commit c51888d
Show file tree
Hide file tree
Showing 6 changed files with 1,073 additions and 11 deletions.
56 changes: 48 additions & 8 deletions ezgpx/gpx_parser/parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import os
from typing import Optional, Union
import logging
from datetime import datetime
import xml.etree.ElementTree as ET
import xmlschema

from ..gpx_elements import Bounds, Copyright, Email, Extensions, Gpx, Link, Metadata, Person, Point, PointSegment, Route, TrackSegment, Track, WayPoint

Expand All @@ -13,14 +15,16 @@ class Parser():
GPX file parser.
"""

def __init__(self, file_path: str = "") -> None:
def __init__(self, file_path: Optional[str] = None) -> None:
"""
initialize Parser instance.
Args:
file_path (str, optional): Path to the file to parse. Defaults to "".
file_path (str, optional): Path to the file to parse. Defaults to None.
"""
self.file_path: str = file_path
if not os.path.exists(self.file_path):
logging.warning("File path does not exist")

self.gpx_tree: ET.ElementTree = None
self.gpx_root: ET.Element = None
Expand All @@ -39,11 +43,39 @@ def __init__(self, file_path: str = "") -> None:

self.gpx: Gpx = Gpx()

if self.file_path != "":
if self.file_path is not None:
self.parse()

def check_schema(self):
pass
def check_schema(self, extensions_schema: bool = False) -> bool:
"""
Check XML schema.
Args:
extensions_schema (bool, optional): Toogle extensions schema verificaton. Requires internet connection and is not guaranted to work.Defaults to False.
Returns:
bool: True if the file follows XML schemas.
"""
if extensions_schema:
gpx_schemas = [s for s in self.gpx.xsi_schema_location if s.endswith(".xsd")]
for gpx_schema in gpx_schemas:
print(f"schema = {gpx_schema}")
schema = xmlschema.XMLSchema(gpx_schema)
if not schema.is_valid(self.file_path):
logging.error(f"File does not follow {gpx_schema}")
return False
else:
schema = None
if self.gpx.version == "1.1":
schema = xmlschema.XMLSchema("schemas/gpx_1_1/gpx.xsd")
elif self.gpx.version == "1.0":
schema = xmlschema.XMLSchema("schemas/gpx_1_0/gpx.xsd")

if schema is not None:
return schema.is_valid(self.file_path)
else:
logging.error("Unable to check XML schema")
return True

def find_precision(self, number: str) -> int:
"""
Expand Down Expand Up @@ -623,18 +655,20 @@ def parse_root_extensions(self):
extensions = self.gpx_root.find("topo:extensions", self.name_space)
self.gpx.extensions = self.parse_extensions(extensions)

def parse(self, file_path: str = "") -> Gpx:
def parse(self, file_path: Optional[str] = None, check_schema: bool = True, extensions_schema: bool = False) -> Gpx:
"""
Parse GPX file.
Args:
file_path (str, optional): Path to the file to parse. Defaults to "".
file_path (str, optional): Path to the file to parse. Defaults to None.
check_schema (bool, optional): Toogle schema verificaton. Defaults to True.
extensions_schema (bool, optional): Toogle extensions schema verificaton. Requires internet connection and is not guaranted to work.Defaults to False.
Returns:
Gpx: Gpx instance., self.name_space).text
"""
# File
if file_path != "":
if file_path is not None and os.path.exists(file_path):
self.file_path = file_path
elif self.file_path == "":
logging.error("No GPX file to parse.")
Expand All @@ -655,6 +689,12 @@ def parse(self, file_path: str = "") -> Gpx:
logging.error("Unable to parse properties in GPX file.")
raise

# Check XML schema
if check_schema:
if not self.check_schema(extensions_schema):
logging.error("Invalid GPX file (does not follow XML schema).")
raise

# Find precisions
self.find_precisions()

Expand Down
231 changes: 231 additions & 0 deletions ezgpx/schemas/gpx_1_0/gpx.xsd
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- GPX.xsd version 1.0 - For more information on GPX and this schema, visit http://www.topografix.com/gpx.asp -->
<xsd:schema
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
xmlns:gpx="http://www.topografix.com/GPX/1/0"
targetNamespace="http://www.topografix.com/GPX/1/0"
elementFormDefault="qualified">

<!-- Main GPX definition -->

<xsd:element name="gpx">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="name" type="xsd:string" minOccurs="0"/> <!-- GPX file name -->
<xsd:element name="desc" type="xsd:string" minOccurs="0"/> <!-- GPX file description -->
<xsd:element name="author" type="xsd:string" minOccurs="0"/> <!-- GPX file author -->
<xsd:element name="email" type="gpx:emailType" minOccurs="0"/> <!-- GPX file author email -->
<xsd:element name="url" type="xsd:anyURI" minOccurs="0"/> <!-- GPX file URL -->
<xsd:element name="urlname" type="xsd:string" minOccurs="0"/>
<xsd:element name="time" type="xsd:dateTime" minOccurs="0"/> <!-- GPX file creation time -->
<xsd:element name="keywords" type="xsd:string" minOccurs="0"/> <!-- GPX file keywords -->
<xsd:element name="bounds" type="gpx:boundsType" minOccurs="0"/> <!-- GPX file bounding rect -->
<xsd:element name="wpt" minOccurs="0" maxOccurs="unbounded">
<xsd:complexType>
<xsd:sequence> <!-- elements must appear in this order -->
<!-- Position info -->
<xsd:element name="ele" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="time" type="xsd:dateTime" minOccurs="0"/>
<xsd:element name="magvar" type="gpx:degreesType" minOccurs="0"/>
<xsd:element name="geoidheight" type="xsd:decimal" minOccurs="0"/>

<!-- Description info -->
<xsd:element name="name" type="xsd:string" minOccurs="0"/>
<xsd:element name="cmt" type="xsd:string" minOccurs="0"/>
<xsd:element name="desc" type="xsd:string" minOccurs="0"/>
<xsd:element name="src" type="xsd:string" minOccurs="0"/>
<xsd:element name="url" type="xsd:anyURI" minOccurs="0"/>
<xsd:element name="urlname" type="xsd:string" minOccurs="0"/>
<xsd:element name="sym" type="xsd:string" minOccurs="0"/>
<xsd:element name="type" type="xsd:string" minOccurs="0"/>

<!-- Accuracy info -->
<xsd:element name="fix" type="gpx:fixType" minOccurs="0"/>
<xsd:element name="sat" type="xsd:nonNegativeInteger" minOccurs="0"/>
<xsd:element name="hdop" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="vdop" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="pdop" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="ageofdgpsdata" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="dgpsid" type="gpx:dgpsStationType" minOccurs="0"/>

<!-- you can add your own privately defined wpt elements at the end of the wpt -->
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence>
<xsd:attribute name="lat" type="gpx:latitudeType" use="required"/>
<xsd:attribute name="lon" type="gpx:longitudeType" use="required"/>
</xsd:complexType>
</xsd:element>
<xsd:element name="rte" minOccurs="0" maxOccurs="unbounded">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="name" type="xsd:string" minOccurs="0"/>
<xsd:element name="cmt" type="xsd:string" minOccurs="0"/>
<xsd:element name="desc" type="xsd:string" minOccurs="0"/>
<xsd:element name="src" type="xsd:string" minOccurs="0"/> <!-- the source of this data: "Garmin eTrex", "Map", etc -->
<xsd:element name="url" type="xsd:anyURI" minOccurs="0"/>
<xsd:element name="urlname" type="xsd:string" minOccurs="0"/>
<xsd:element name="number" type="xsd:nonNegativeInteger" minOccurs="0"/> <!-- GPS track number -->
<!-- <xsd:element name="type" type="xsd:string" minOccurs="0"/> PROPOSED -->
<!-- you can add your own privately defined rte elements at the end of the rte -->
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
<xsd:element name="rtept" minOccurs="0" maxOccurs="unbounded">
<xsd:complexType>
<xsd:sequence> <!-- elements must appear in this order -->

<!-- Position info -->
<xsd:element name="ele" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="time" type="xsd:dateTime" minOccurs="0"/>
<xsd:element name="magvar" type="gpx:degreesType" minOccurs="0"/>
<xsd:element name="geoidheight" type="xsd:decimal" minOccurs="0"/>

<!-- Description info -->
<xsd:element name="name" type="xsd:string" minOccurs="0"/>
<xsd:element name="cmt" type="xsd:string" minOccurs="0"/>
<xsd:element name="desc" type="xsd:string" minOccurs="0"/>
<xsd:element name="src" type="xsd:string" minOccurs="0"/>
<xsd:element name="url" type="xsd:anyURI" minOccurs="0"/>
<xsd:element name="urlname" type="xsd:string" minOccurs="0"/>
<xsd:element name="sym" type="xsd:string" minOccurs="0"/>
<xsd:element name="type" type="xsd:string" minOccurs="0"/>

<!-- Accuracy info -->
<xsd:element name="fix" type="gpx:fixType" minOccurs="0"/>
<xsd:element name="sat" type="xsd:nonNegativeInteger" minOccurs="0"/>
<xsd:element name="hdop" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="vdop" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="pdop" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="ageofdgpsdata" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="dgpsid" type="gpx:dgpsStationType" minOccurs="0"/>

<!-- you can add your own privately defined rtept elements at the end of the rtept -->
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence>
<xsd:attribute name="lat" type="gpx:latitudeType" use="required"/>
<xsd:attribute name="lon" type="gpx:longitudeType" use="required"/>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
<xsd:element name="trk" minOccurs="0" maxOccurs="unbounded">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="name" type="xsd:string" minOccurs="0"/>
<xsd:element name="cmt" type="xsd:string" minOccurs="0"/>
<xsd:element name="desc" type="xsd:string" minOccurs="0"/>
<xsd:element name="src" type="xsd:string" minOccurs="0"/> <!-- the source of this data: "Garmin eTrex", "Map", etc -->
<xsd:element name="url" type="xsd:anyURI" minOccurs="0"/>
<xsd:element name="urlname" type="xsd:string" minOccurs="0"/>
<xsd:element name="number" type="xsd:nonNegativeInteger" minOccurs="0"/> <!-- GPS track number -->
<!-- <xsd:element name="type" type="xsd:string" minOccurs="0"/> PROPOSED -->
<!-- you can add your own privately defined trk elements at the end of the trk -->
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
<xsd:element name="trkseg" minOccurs="0" maxOccurs="unbounded">
<xsd:complexType>
<xsd:sequence> <!-- elements must appear in this order -->
<xsd:element name="trkpt" minOccurs="0" maxOccurs="unbounded">
<xsd:complexType>
<xsd:sequence> <!-- elements must appear in this order -->

<!-- Position info -->
<xsd:element name="ele" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="time" type="xsd:dateTime" minOccurs="0"/>
<xsd:element name="course" type="gpx:degreesType" minOccurs="0"/>
<xsd:element name="speed" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="magvar" type="gpx:degreesType" minOccurs="0"/>
<xsd:element name="geoidheight" type="xsd:decimal" minOccurs="0"/>

<!-- Description info -->
<xsd:element name="name" type="xsd:string" minOccurs="0"/>
<xsd:element name="cmt" type="xsd:string" minOccurs="0"/>
<xsd:element name="desc" type="xsd:string" minOccurs="0"/>
<xsd:element name="src" type="xsd:string" minOccurs="0"/>
<xsd:element name="url" type="xsd:anyURI" minOccurs="0"/>
<xsd:element name="urlname" type="xsd:string" minOccurs="0"/>
<xsd:element name="sym" type="xsd:string" minOccurs="0"/>
<xsd:element name="type" type="xsd:string" minOccurs="0"/>

<!-- Accuracy info -->
<xsd:element name="fix" type="gpx:fixType" minOccurs="0"/>
<xsd:element name="sat" type="xsd:nonNegativeInteger" minOccurs="0"/>
<xsd:element name="hdop" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="vdop" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="pdop" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="ageofdgpsdata" type="xsd:decimal" minOccurs="0"/>
<xsd:element name="dgpsid" type="gpx:dgpsStationType" minOccurs="0"/>

<!-- you can add your own privately defined trkpt elements at the end of the trkpt -->
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence>
<xsd:attribute name="lat" type="gpx:latitudeType" use="required"/>
<xsd:attribute name="lon" type="gpx:longitudeType" use="required"/>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
<!-- you can add your own privately defined elements at the end of the GPX file -->
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence>
<xsd:attribute name="version" type="xsd:string" use="required" fixed="1.0"/> <!-- version 1.0 -->
<xsd:attribute name="creator" type="xsd:string" use="required"/>
</xsd:complexType>
</xsd:element>

<!-- Other types used by GPX -->

<xsd:simpleType name="latitudeType">
<xsd:restriction base="xsd:decimal">
<xsd:minInclusive value="-90.0"/>
<xsd:maxInclusive value="90.0"/>
</xsd:restriction>
</xsd:simpleType>

<xsd:simpleType name="longitudeType">
<xsd:restriction base="xsd:decimal">
<xsd:minInclusive value="-180.0"/>
<xsd:maxInclusive value="180.0"/>
</xsd:restriction>
</xsd:simpleType>

<xsd:simpleType name="degreesType"> <!-- for bearing, heading, course. Units are degrees, true -->
<xsd:restriction base="xsd:decimal">
<xsd:minInclusive value="0.0"/>
<xsd:maxInclusive value="360.0"/>
</xsd:restriction>
</xsd:simpleType>

<xsd:simpleType name="fixType">
<xsd:restriction base="xsd:string">
<xsd:enumeration value="none"/> <!-- none means GPS had no fix. To signify "the fix info is unknown, leave out the <fix> tag entirely -->
<xsd:enumeration value="2d"/>
<xsd:enumeration value="3d"/>
<xsd:enumeration value="dgps"/>
<xsd:enumeration value="pps"/> <!-- military signal used -->
</xsd:restriction>
</xsd:simpleType>

<xsd:simpleType name="dgpsStationType">
<xsd:restriction base="xsd:integer">
<xsd:minInclusive value="0"/>
<xsd:maxInclusive value="1023"/>
</xsd:restriction>
</xsd:simpleType>

<xsd:complexType name="boundsType"> <!-- bounding rect for data in file -->
<xsd:attribute name="minlat" type="gpx:latitudeType" use="required"/>
<xsd:attribute name="minlon" type="gpx:longitudeType" use="required"/>
<xsd:attribute name="maxlat" type="gpx:latitudeType" use="required"/>
<xsd:attribute name="maxlon" type="gpx:longitudeType" use="required"/>
</xsd:complexType>

<xsd:simpleType name="emailType">
<xsd:restriction base="xsd:string">
<xsd:pattern value="[\p{L}_]+(\.[\p{L}_]+)*@[\p{L}_]+(\.[\p{L}_]+)+"/>
</xsd:restriction>
</xsd:simpleType>

</xsd:schema>
Loading

0 comments on commit c51888d

Please sign in to comment.