Skip to content

Commit

Permalink
Hopefully Fixes nlplab#832, Closes nlplab#1009 (double spaces)
Browse files Browse the repository at this point in the history
  • Loading branch information
Goran Topic committed Apr 26, 2013
1 parent 6dbfcb5 commit d357d4c
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 9 deletions.
11 changes: 10 additions & 1 deletion client/src/visualizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -1220,8 +1220,17 @@ var Visualizer = (function($, window, undefined) {
'space characters'
, 'warning', 15]]]);
}
var startPos = text.getStartPositionOfChar(firstChar).x;
var lastChar = fragment.to - fragment.chunk.from - 1;

// Adjust for XML whitespace (#832, #1009)
var textUpToFirstChar = fragment.chunk.text.substring(0, firstChar);
var textUpToLastChar = fragment.chunk.text.substring(0, lastChar);
var textUpToFirstCharUnspaced = textUpToFirstChar.replace(/\s\s+/g, ' ');
var textUpToLastCharUnspaced = textUpToLastChar.replace(/\s\s+/g, ' ');
firstChar -= textUpToFirstChar.length - textUpToFirstCharUnspaced.length;
lastChar -= textUpToLastChar.length - textUpToLastCharUnspaced.length;

var startPos = text.getStartPositionOfChar(firstChar).x;
var endPos = (lastChar < 0)
? startPos
: text.getEndPositionOfChar(lastChar).x;
Expand Down
8 changes: 0 additions & 8 deletions server/src/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,14 +629,6 @@ def _enrich_json_with_text(j_dic, txt_file_path, raw_text=None):
Messager.error('Error reading text file: nonstandard encoding or binary?', -1)
raise UnableToReadTextFile(txt_file_path)

# TODO XXX huge hack, sorry, the client currently crashing on
# chrome for two or more consecutive space, so replace every
# second with literal non-breaking space. Note that this is just
# for the client display -- server-side storage is not affected.
# NOTE: it might be possible to fix this in a principled way by
# having xml:space="preserve" on the relevant elements.
text = text.replace(" ", ' '+unichr(0x00A0))

j_dic['text'] = text

from logging import info as log_info
Expand Down

0 comments on commit d357d4c

Please sign in to comment.