@@ -17,7 +17,6 @@ import (
1717
1818var xmlMIMERegex = regexp .MustCompile (`(?i)((application|image|message|model)/((\w|\.|-)+\+?)?|text/)(wb)?xml` )
1919
20-
2120// LoadURL loads the XML document from the specified URL.
2221func LoadURL (url string ) (* Node , error ) {
2322 resp , err := http .Get (url )
@@ -41,7 +40,7 @@ func Parse(r io.Reader) (*Node, error) {
4140func ParseWithOptions (r io.Reader , options ParserOptions ) (* Node , error ) {
4241 var data []byte
4342 var lineStarts []int
44-
43+
4544 // If line numbers are requested, read all data for position tracking
4645 if options .WithLineNumbers {
4746 var err error
@@ -50,7 +49,7 @@ func ParseWithOptions(r io.Reader, options ParserOptions) (*Node, error) {
5049 return nil , err
5150 }
5251 r = bytes .NewReader (data )
53-
52+
5453 // Pre-calculate line starts
5554 lineStarts = []int {0 }
5655 for i , b := range data {
@@ -90,7 +89,7 @@ func ParseWithOptions(r io.Reader, options ParserOptions) (*Node, error) {
9089 data : data ,
9190 lineStarts : lineStarts ,
9291 }
93-
92+
9493 err = annotator .annotateLineNumbers (p .doc )
9594 if err != nil {
9695 return nil , err
@@ -115,8 +114,8 @@ type parser struct {
115114 reader * cachedReader // Need to maintain a reference to the reader, so we can determine whether a node contains CDATA.
116115 once sync.Once
117116 space2prefix map [string ]* xmlnsPrefix
118- currentLine int // Track current line number during parsing
119- lastProcessedPos int // Track how much cached data we've already processed for line counting
117+ currentLine int // Track current line number during parsing
118+ lastProcessedPos int // Track how much cached data we've already processed for line counting
120119}
121120
122121type xmlnsPrefix struct {
@@ -144,14 +143,14 @@ func createParser(r io.Reader) *parser {
144143// updateLineNumber scans only new cached data for newlines to update current line position
145144func (p * parser ) updateLineNumber () {
146145 cached := p .reader .CacheWithLimit (- 1 ) // Get all cached data
147-
146+
148147 // Only process data we haven't seen before
149148 for i := p .lastProcessedPos ; i < len (cached ); i ++ {
150149 if cached [i ] == '\n' {
151150 p .currentLine ++
152151 }
153152 }
154-
153+
155154 // Update our position to avoid reprocessing this data
156155 p .lastProcessedPos = len (cached )
157156}
@@ -166,10 +165,10 @@ func (p *parser) parse() (*Node, error) {
166165 p .reader .StartCaching ()
167166 tok , err := p .decoder .Token ()
168167 p .reader .StopCaching ()
169-
168+
170169 // Update line number based on processed content
171170 p .updateLineNumber ()
172-
171+
173172 if err != nil {
174173 return nil , err
175174 }
@@ -335,7 +334,7 @@ func (p *parser) parse() (*Node, error) {
335334 AddSibling (p .prev .Parent , node )
336335 }
337336 case xml.ProcInst : // Processing Instruction
338- if p .prev .Type != DeclarationNode {
337+ if ! ( p .prev .Type == DeclarationNode || p . prev . Type == ProcessingInstruction ) {
339338 p .level ++
340339 }
341340 node := & Node {Type : DeclarationNode , Data : tok .Target , level : p .level , LineNumber : p .currentLine }
@@ -346,6 +345,10 @@ func (p *parser) parse() (*Node, error) {
346345 AddAttr (node , pair [:i ], strings .Trim (pair [i + 1 :], `"'` ))
347346 }
348347 }
348+ if tok .Target != "xml" {
349+ node .Type = ProcessingInstruction
350+ node .ProcInst = & ProcInstData {Target : tok .Target , Inst : strings .TrimSpace (string (tok .Inst ))}
351+ }
349352 if p .level == p .prev .level {
350353 AddSibling (p .prev , node )
351354 } else if p .level > p .prev .level {
@@ -502,11 +505,11 @@ func (p *lineNumberAnnotator) getLineForPosition(pos int) int {
502505 if pos < 0 {
503506 return 1
504507 }
505-
508+
506509 line := 1
507510 for i , start := range p .lineStarts {
508511 if pos < start {
509- return i // i is the line number (1-based because lineStarts[0] = 0 for line 1)
512+ return i // i is the line number (1-based because lineStarts[0] = 0 for line 1)
510513 }
511514 line = i + 1
512515 }
@@ -538,7 +541,7 @@ func (p *lineNumberAnnotator) annotateNodesByPosition(node *Node) {
538541 if node == nil {
539542 return
540543 }
541-
544+
542545 // Annotate current node if not already done
543546 if node .LineNumber == 0 {
544547 switch node .Type {
@@ -547,19 +550,17 @@ func (p *lineNumberAnnotator) annotateNodesByPosition(node *Node) {
547550 case CommentNode :
548551 node .LineNumber = p .findCommentPosition (node .Data )
549552 case DeclarationNode :
550- if node .Data == "xml" {
551- node .LineNumber = p .findDeclarationLine ()
552- } else {
553- node .LineNumber = p .findProcessingInstructionPosition (node .Data )
554- }
553+ node .LineNumber = p .findDeclarationLine ()
554+ case ProcessingInstruction :
555+ node .LineNumber = p .findProcessingInstructionPosition (node .Data )
555556 case TextNode , CharDataNode :
556557 text := strings .TrimSpace (node .Data )
557558 if text != "" {
558559 node .LineNumber = p .findTextPosition (text )
559560 }
560561 }
561562 }
562-
563+
563564 // Recursively annotate children
564565 for child := node .FirstChild ; child != nil ; child = child .NextSibling {
565566 p .annotateNodesByPosition (child )
@@ -568,10 +569,10 @@ func (p *lineNumberAnnotator) annotateNodesByPosition(node *Node) {
568569
569570// State to track positions as we traverse the document
570571type positionTracker struct {
571- currentPos int
572- elementCounts map [string ]int
573- commentCounts map [string ]int
574- textCounts map [string ]int
572+ currentPos int
573+ elementCounts map [string ]int
574+ commentCounts map [string ]int
575+ textCounts map [string ]int
575576}
576577
577578// findElementPosition finds the line number for the next occurrence of an element
@@ -583,7 +584,7 @@ func (p *lineNumberAnnotator) findElementPosition(name string) int {
583584 textCounts : make (map [string ]int ),
584585 }
585586 }
586-
587+
587588 p .tracker .elementCounts [name ]++
588589 return p .findNthElementOccurrence (name , p .tracker .elementCounts [name ])
589590}
@@ -593,17 +594,17 @@ func (p *lineNumberAnnotator) findNthElementOccurrence(name string, n int) int {
593594 count := 0
594595 pos := 0
595596 dataStr := string (p .data )
596-
597+
597598 // Look for both prefixed and non-prefixed versions
598599 patterns := []string {
599- fmt .Sprintf ("<%s" , name ), // <name
600- fmt .Sprintf (":%s" , name ), // prefix:name
600+ fmt .Sprintf ("<%s" , name ), // <name
601+ fmt .Sprintf (":%s" , name ), // prefix:name
601602 }
602-
603+
603604 for {
604605 earliestPos := len (p .data )
605606 foundPattern := ""
606-
607+
607608 // Find the earliest occurrence of any pattern
608609 for _ , pattern := range patterns {
609610 foundPos := strings .Index (dataStr [pos :], pattern )
@@ -615,15 +616,15 @@ func (p *lineNumberAnnotator) findNthElementOccurrence(name string, n int) int {
615616 }
616617 }
617618 }
618-
619+
619620 if earliestPos == len (p .data ) {
620621 break // No more occurrences found
621622 }
622-
623+
623624 // Validate the match
624625 nextCharPos := earliestPos + len (foundPattern )
625626 isValidMatch := false
626-
627+
627628 if foundPattern [0 ] == '<' {
628629 // Direct element match like <name
629630 if nextCharPos < len (p .data ) {
@@ -654,7 +655,7 @@ func (p *lineNumberAnnotator) findNthElementOccurrence(name string, n int) int {
654655 }
655656 }
656657 }
657-
658+
658659 if isValidMatch {
659660 count ++
660661 if count == n {
@@ -669,10 +670,10 @@ func (p *lineNumberAnnotator) findNthElementOccurrence(name string, n int) int {
669670 return p .getLineForPosition (earliestPos )
670671 }
671672 }
672-
673+
673674 pos = earliestPos + 1
674675 }
675-
676+
676677 return 1
677678}
678679
@@ -685,7 +686,7 @@ func (p *lineNumberAnnotator) findCommentPosition(content string) int {
685686 textCounts : make (map [string ]int ),
686687 }
687688 }
688-
689+
689690 p .tracker .commentCounts [content ]++
690691 return p .findNthCommentOccurrence (content , p .tracker .commentCounts [content ])
691692}
@@ -695,7 +696,7 @@ func (p *lineNumberAnnotator) findNthCommentOccurrence(content string, n int) in
695696 pattern := fmt .Sprintf ("<!--%s-->" , content )
696697 count := 0
697698 pos := 0
698-
699+
699700 for {
700701 foundPos := strings .Index (string (p .data [pos :]), pattern )
701702 if foundPos < 0 {
@@ -730,7 +731,7 @@ func (p *lineNumberAnnotator) findTextPosition(text string) int {
730731 textCounts : make (map [string ]int ),
731732 }
732733 }
733-
734+
734735 p .tracker .textCounts [text ]++
735736 return p .findNthTextOccurrence (text , p .tracker .textCounts [text ])
736737}
@@ -739,7 +740,7 @@ func (p *lineNumberAnnotator) findTextPosition(text string) int {
739740func (p * lineNumberAnnotator ) findNthTextOccurrence (text string , n int ) int {
740741 count := 0
741742 pos := 0
742-
743+
743744 for {
744745 foundPos := strings .Index (string (p .data [pos :]), text )
745746 if foundPos < 0 {
@@ -765,16 +766,14 @@ func (p *lineNumberAnnotator) findProcessingInstructionPosition(target string) i
765766 return 1
766767}
767768
768-
769-
770769// LoadURLWithLineNumbers loads the XML document from the specified URL with line number annotations.
771770func LoadURLWithLineNumbers (url string ) (* Node , error ) {
772771 resp , err := http .Get (url )
773772 if err != nil {
774773 return nil , err
775774 }
776775 defer resp .Body .Close ()
777-
776+
778777 if xmlMIMERegex .MatchString (resp .Header .Get ("Content-Type" )) {
779778 return ParseWithOptions (resp .Body , ParserOptions {WithLineNumbers : true })
780779 }
0 commit comments