@@ -13,13 +13,21 @@ def reviewify
13
13
strt , brk , ul , ol , li = false , false , false , false , false
14
14
chars = [ "?" , "," , "." , ":" , ";" , "..." , "''" , "'" ]
15
15
sent = sentences [ i ] = sent . join ( " " )
16
- sent = sentences [ i ] = sent . gsub ( /(``[^'']+$)/ ) { |s | "#{ $1} \" " }
16
+ if sentences [ i + 1 ] and sentences [ i + 1 ] . join ( " " ) . index ( /<\/ p>\s +<p>/ )
17
+ sent = sentences [ i ] = sent . gsub ( /([``|''][^'']+$)/ ) { |s | "#{ $1} \" " }
18
+ end
17
19
sent = sentences [ i ] = sent . gsub ( "`` " , ' "' ) . gsub ( "--" , "—" )
18
20
chars . each { |chr | sent = sentences [ i ] = sent . gsub ( " " + chr , chr ) }
19
21
if sent . index ( /<\/ p>\s +<p>/ )
20
22
sent = sentences [ i ] = sent . gsub ( /<\/ p>\s +<p>/ ) { |s | "" }
21
23
brk = true
22
24
end
25
+ if sent . index ( "<em>" ) and !sent . index ( "</em>" )
26
+ sent = sentences [ i ] = sent + "</em>"
27
+ end
28
+ if ( sent . index ( "</em>" ) and sent . index ( "<em>" ) and sent . index ( "</em>" ) < sent . index ( "<em>" ) ) or ( sent . index ( "</em>" ) and !sent . index ( "<em>" ) )
29
+ sent = sentences [ i ] = sent . gsub ( /(<\/ em>.*?\s )/ , "" )
30
+ end
23
31
if sent . index ( "<p>" )
24
32
sent = sentences [ i ] = sent . gsub ( "<p>" , "" )
25
33
strt = true
@@ -48,10 +56,14 @@ def reviewify
48
56
end
49
57
end
50
58
glob = HTMLEntities . new . decode ( sentences . compact . join ( " " ) )
59
+ glob = glob . gsub ( " n't" , "n't" )
60
+ glob = glob . gsub ( /<em> (.*?) <\/ em>/ ) { |s | "<em>#{ $1} </em>" }
61
+ glob = glob . gsub ( " !" , "!" )
51
62
glob = glob . gsub ( /<a class="sentence" href="#" id="sent\- \d ">\. <\/ a>/ ) { |s | "" } . gsub ( "</p>." , "</p>" )
52
63
glob = glob . gsub ( "< span style =" , "<span class=\" heading\" style=" ) . gsub ( "< \\ \/ span >." , "</span>" ) . gsub ( "< br >" , "<br>" ) . gsub ( "'' >" , "\" >" )
53
64
glob = glob . gsub ( "-LRB- " , "(" ) . gsub ( " -RRB-" , ")" )
54
65
glob = glob . gsub ( "</p></a>" , "</a></p>" )
66
+ glob = glob . gsub ( ",'' " , ", \" " )
55
67
return { :content => glob , :n_sentences => "0" * ( sentences . length + 1 ) }
56
68
end
57
69
end
0 commit comments