@@ -22,6 +22,7 @@ state type reader = state obj {
2222 fn get_mark_chpos ( ) -> uint ;
2323 fn get_interner ( ) -> @interner:: interner [ str ] ;
2424 fn get_chpos ( ) -> uint ;
25+ fn get_col ( ) -> uint ;
2526 fn get_filemap ( ) -> codemap:: filemap ;
2627 fn err ( str m) ;
2728} ;
@@ -33,6 +34,7 @@ fn new_reader(session sess, io::reader rdr,
3334 state obj reader ( session sess,
3435 str file,
3536 uint len,
37+ mutable uint col,
3638 mutable uint pos,
3739 mutable char ch,
3840 mutable uint mark_chpos,
@@ -68,9 +70,11 @@ fn new_reader(session sess, io::reader rdr,
6870
6971 fn bump ( ) {
7072 if ( pos < len) {
73+ col += 1 u;
7174 chpos += 1 u;
7275 if ( ch == '\n' ) {
7376 codemap:: next_line ( fm, chpos) ;
77+ col = 0 u;
7478 }
7579 auto next = str:: char_range_at ( file, pos) ;
7680 pos = next. _1 ;
@@ -82,6 +86,10 @@ fn new_reader(session sess, io::reader rdr,
8286
8387 fn get_interner ( ) -> @interner:: interner [ str ] { ret itr; }
8488
89+ fn get_col ( ) -> uint {
90+ ret col;
91+ }
92+
8593 fn get_filemap ( ) -> codemap:: filemap {
8694 ret fm;
8795 }
@@ -92,7 +100,8 @@ fn new_reader(session sess, io::reader rdr,
92100 }
93101 auto file = str:: unsafe_from_bytes ( rdr. read_whole_stream ( ) ) ;
94102 let vec[ str] strs = [ ] ;
95- auto rd = reader ( sess, file, str:: byte_len ( file) , 0 u, -1 as char ,
103+ auto rd = reader ( sess, file, str:: byte_len ( file) , 0 u, 0 u,
104+ -1 as char ,
96105 filemap. start_pos , filemap. start_pos ,
97106 strs, filemap, itr) ;
98107 rd. init ( ) ;
@@ -155,7 +164,7 @@ fn is_whitespace(char c) -> bool {
155164 ret c == ' ' || c == '\t' || c == '\r' || c == '\n' ;
156165}
157166
158- fn consume_any_whitespace ( & reader rdr) {
167+ fn consume_whitespace_and_comments ( & reader rdr) {
159168 while ( is_whitespace ( rdr. curr ( ) ) ) {
160169 rdr. bump ( ) ;
161170 }
@@ -170,7 +179,7 @@ fn consume_any_line_comment(&reader rdr) {
170179 rdr. bump ( ) ;
171180 }
172181 // Restart whitespace munch.
173- be consume_any_whitespace ( rdr) ;
182+ be consume_whitespace_and_comments ( rdr) ;
174183 }
175184 case ( '*' ) {
176185 rdr. bump ( ) ;
@@ -207,7 +216,7 @@ fn consume_block_comment(&reader rdr) {
207216 }
208217 }
209218 // restart whitespace munch.
210- be consume_any_whitespace ( rdr) ;
219+ be consume_whitespace_and_comments ( rdr) ;
211220}
212221
213222fn digits_to_string ( str s) -> int {
@@ -430,7 +439,7 @@ fn scan_numeric_escape(&reader rdr, uint n_hex_digits) -> char {
430439fn next_token ( & reader rdr) -> token:: token {
431440 auto accum_str = "" ;
432441
433- consume_any_whitespace ( rdr) ;
442+ consume_whitespace_and_comments ( rdr) ;
434443
435444 if ( rdr. is_eof ( ) ) { ret token:: EOF ; }
436445
@@ -720,70 +729,161 @@ fn next_token(&reader rdr) -> token::token {
720729 fail;
721730}
722731
723- tag cmnt_ {
724- cmnt_line( str) ;
725- cmnt_block ( vec[ str] ) ;
732+
733+ tag cmnt_style {
734+ isolated; // No code on either side of each line of the comment
735+ trailing; // Code exists to the left of the comment
736+ mixed; // Code before /* foo */ and after the comment
726737}
727738
728- type cmnt = rec ( cmnt_ val , uint pos , bool space_after ) ;
739+ type cmnt = rec ( cmnt_style style , vec [ str ] lines , uint pos ) ;
729740
730- fn consume_whitespace ( & reader rdr) -> uint {
731- auto lines = 0 u;
732- while ( is_whitespace ( rdr. curr ( ) ) ) {
733- if ( rdr. curr ( ) == '\n' ) { lines += 1 u; }
741+ fn read_to_eol ( & reader rdr) -> str {
742+ auto val = "" ;
743+ while ( rdr. curr ( ) != '\n' && !rdr. is_eof ( ) ) {
744+ str:: push_char ( val, rdr. curr ( ) ) ;
745+ rdr. bump ( ) ;
746+ }
747+ if ( rdr. curr ( ) == '\n' ) {
734748 rdr. bump ( ) ;
749+ } else {
750+ assert rdr. is_eof ( ) ;
735751 }
736- ret lines ;
752+ ret val ;
737753}
738754
739- fn read_line_comment ( & reader rdr) -> cmnt {
740- auto p = rdr. get_chpos ( ) ;
741- rdr. bump ( ) ; rdr. bump ( ) ;
742- while ( rdr. curr ( ) == ' ' ) { rdr. bump ( ) ; }
743- auto val = "" ;
744- while ( rdr. curr ( ) != '\n' && !rdr. is_eof ( ) ) {
745- str:: push_char ( val, rdr. curr ( ) ) ;
755+ fn read_one_line_comment ( & reader rdr) -> str {
756+ auto val = read_to_eol ( rdr) ;
757+ assert val. ( 0 ) == ( '/' as u8 ) && val. ( 1 ) == ( '/' as u8 ) ;
758+ ret val;
759+ }
760+
761+ fn consume_whitespace ( & reader rdr) {
762+ while ( is_whitespace ( rdr. curr ( ) ) && !rdr. is_eof ( ) ) {
746763 rdr. bump ( ) ;
747764 }
748- ret rec( val=cmnt_line ( val) ,
749- pos=p,
750- space_after=consume_whitespace ( rdr) > 1 u) ;
751765}
752766
753- fn read_block_comment ( & reader rdr) -> cmnt {
767+
768+ fn consume_non_eol_whitespace ( & reader rdr) {
769+ while ( is_whitespace ( rdr. curr ( ) ) &&
770+ rdr. curr ( ) != '\n' && !rdr. is_eof ( ) ) {
771+ rdr. bump ( ) ;
772+ }
773+ }
774+
775+
776+ fn read_line_comments ( & reader rdr, bool code_to_the_left ) -> cmnt {
777+ log ">>> line comments" ;
754778 auto p = rdr. get_chpos ( ) ;
755- rdr. bump ( ) ; rdr. bump ( ) ;
756- while ( rdr. curr ( ) == ' ' ) { rdr. bump ( ) ; }
757779 let vec[ str] lines = [ ] ;
758- auto val = "" ;
759- auto level = 1 ;
760- while ( true ) {
761- if ( rdr. curr ( ) == '\n' ) {
762- vec:: push[ str] ( lines, val) ;
763- val = "" ;
764- consume_whitespace ( rdr) ;
780+ while ( rdr. curr ( ) == '/' && rdr. next ( ) == '/' ) {
781+ lines += [ read_one_line_comment ( rdr) ] ;
782+ consume_non_eol_whitespace ( rdr) ;
783+ }
784+ log "<<< line comments" ;
785+ ret rec( style = if ( code_to_the_left) { trailing } else { isolated } ,
786+ lines = lines,
787+ pos=p) ;
788+ }
789+
790+ fn all_whitespace ( & str s, uint begin , uint end) -> bool {
791+ let uint i = begin;
792+ while ( i != end) {
793+ if ( !is_whitespace ( s. ( i) as char ) ) {
794+ ret false ;
795+ }
796+ i += 1 u;
797+ }
798+ ret true;
799+ }
800+
801+ fn trim_whitespace_prefix_and_push_line ( & mutable vec[ str] lines ,
802+ & str s, uint col) {
803+ auto s1;
804+ if ( all_whitespace ( s, 0 u, col) ) {
805+ if ( col < str:: byte_len ( s) ) {
806+ s1 = str:: slice ( s, col, str:: byte_len ( s) ) ;
765807 } else {
766- if ( rdr. curr ( ) == '*' && rdr. next ( ) == '/' ) {
767- level -= 1 ;
768- if ( level == 0 ) {
769- rdr. bump ( ) ; rdr. bump ( ) ;
770- vec:: push[ str] ( lines, val) ;
771- break ;
772- }
773- } else if ( rdr. curr ( ) == '/' && rdr. next ( ) == '*' ) {
774- level += 1 ;
775- }
776- str:: push_char ( val, rdr. curr ( ) ) ;
777- rdr. bump ( ) ;
808+ s1 = "" ;
778809 }
810+ } else {
811+ s1 = s;
812+ }
813+ log "pushing line: " + s1;
814+ lines += [ s1] ;
815+ }
816+
817+ fn read_block_comment ( & reader rdr,
818+ bool code_to_the_left ) -> cmnt {
819+ log ">>> block comment" ;
820+ auto p = rdr. get_chpos ( ) ;
821+ let vec[ str] lines = [ ] ;
822+ let uint col = rdr. get_col ( ) ;
823+ rdr. bump ( ) ;
824+ rdr. bump ( ) ;
825+ auto curr_line = "/*" ;
826+ let int level = 1 ;
827+ while ( level > 0 ) {
828+ log #fmt( "=== block comment level %d" , level) ;
779829 if ( rdr. is_eof ( ) ) {
780- rdr. err ( "Unexpected end of file in block comment" ) ;
830+ rdr. err ( "unterminated block comment" ) ;
781831 fail;
782832 }
833+ if ( rdr. curr ( ) == '\n' ) {
834+ trim_whitespace_prefix_and_push_line ( lines, curr_line, col) ;
835+ curr_line = "" ;
836+ rdr. bump ( ) ;
837+ } else {
838+ str:: push_char ( curr_line, rdr. curr ( ) ) ;
839+ if ( rdr. curr ( ) == '/' && rdr. next ( ) == '*' ) {
840+ rdr. bump ( ) ;
841+ rdr. bump ( ) ;
842+ curr_line += "*" ;
843+ level += 1 ;
844+ } else {
845+ if ( rdr. curr ( ) == '*' && rdr. next ( ) == '/' ) {
846+ rdr. bump ( ) ;
847+ rdr. bump ( ) ;
848+ curr_line += "/" ;
849+ level -= 1 ;
850+ } else {
851+ rdr. bump ( ) ;
852+ }
853+ }
854+ }
783855 }
784- ret rec( val=cmnt_block ( lines) ,
785- pos=p,
786- space_after=consume_whitespace ( rdr) > 1 u) ;
856+ if ( str:: byte_len ( curr_line) != 0 u) {
857+ trim_whitespace_prefix_and_push_line ( lines, curr_line, col) ;
858+ }
859+
860+ auto style = if ( code_to_the_left) { trailing } else { isolated } ;
861+ consume_non_eol_whitespace ( rdr) ;
862+ if ( !rdr. is_eof ( ) &&
863+ rdr. curr ( ) != '\n' &&
864+ vec:: len ( lines) == 1 u) {
865+ style = mixed;
866+ }
867+ log "<<< block comment" ;
868+ ret rec( style = style, lines = lines, pos=p) ;
869+ }
870+
871+ fn peeking_at_comment ( & reader rdr) -> bool {
872+ ret ( rdr. curr ( ) == '/' && rdr. next ( ) == '/' ) ||
873+ ( rdr. curr ( ) == '/' && rdr. next ( ) == '*' ) ;
874+ }
875+
876+ fn consume_comment ( & reader rdr, bool code_to_the_left ,
877+ & mutable vec[ cmnt] comments ) {
878+ log ">>> consume comment" ;
879+ if ( rdr. curr ( ) == '/' && rdr. next ( ) == '/' ) {
880+ vec:: push[ cmnt] ( comments,
881+ read_line_comments ( rdr, code_to_the_left) ) ;
882+ } else if ( rdr. curr ( ) == '/' && rdr. next ( ) == '*' ) {
883+ vec:: push[ cmnt] ( comments,
884+ read_block_comment ( rdr, code_to_the_left) ) ;
885+ } else { fail; }
886+ log "<<< consume comment" ;
787887}
788888
789889fn gather_comments ( session sess, str path ) -> vec[ cmnt ] {
@@ -793,17 +893,22 @@ fn gather_comments(session sess, str path) -> vec[cmnt] {
793893 let vec[ cmnt] comments = [ ] ;
794894 while ( !rdr. is_eof ( ) ) {
795895 while ( true ) {
796- consume_whitespace ( rdr) ;
797- if ( rdr. curr ( ) == '/' && rdr. next ( ) == '/' ) {
798- vec:: push[ cmnt] ( comments, read_line_comment ( rdr) ) ;
799- } else if ( rdr. curr ( ) == '/' && rdr. next ( ) == '*' ) {
800- vec:: push[ cmnt] ( comments, read_block_comment ( rdr) ) ;
801- } else { break ; }
896+ auto code_to_the_left = true ;
897+ consume_non_eol_whitespace ( rdr) ;
898+ if ( rdr. next ( ) == '\n' ) {
899+ code_to_the_left = false ;
900+ consume_whitespace ( rdr) ;
901+ }
902+ while ( peeking_at_comment ( rdr) ) {
903+ consume_comment ( rdr, code_to_the_left, comments) ;
904+ consume_whitespace ( rdr) ;
905+ }
906+ break ;
802907 }
803908 next_token ( rdr) ;
804909 }
805910 ret comments;
806- }
911+ }
807912
808913
809914//
0 commit comments