handle surrogate pairs

CodeMirror / javascript use utf16 code unit offsets,
but Jupyter protocol expects unicode *character* offsets,
so we need to translate back and forth.
pull/2509/head
Min RK 9 years ago
parent e04c210fbd
commit a5e64e10b7

@ -1017,6 +1017,36 @@ define([
}
};
// javascript stores text as utf16 and string indices use "code units",
// which stores high-codepoint characters as "surrogate pairs",
// which occupy two indices in the javascript string.
// We need to translate cursor_pos in the protocol (in characters)
// to js offset (with surrogate pairs taking two spots).
function js_idx_to_char_idx (js_idx, text) {
var char_idx = js_idx;
for (var i = 0; i < text.length && i < js_idx; i++) {
var char_code = text.charCodeAt(i);
// check for the first half of a surrogate pair
if (char_code >= 0xD800 && char_code < 0xDC00) {
char_idx -= 1;
}
}
return char_idx;
}
function char_idx_to_js_idx (char_idx, text) {
var js_idx = char_idx;
for (var i = 0; i < text.length && i < js_idx; i++) {
var char_code = text.charCodeAt(i);
// check for the first half of a surrogate pair
if (char_code >= 0xD800 && char_code < 0xDC00) {
js_idx += 1;
}
}
return js_idx;
}
// Test if a drag'n'drop event contains a file (as opposed to an HTML
// element/text from the document)
var dnd_contain_file = function(event) {
@ -1112,6 +1142,8 @@ define([
format_datetime: format_datetime,
datetime_sort_helper: datetime_sort_helper,
dnd_contain_file: dnd_contain_file,
js_idx_to_char_idx: js_idx_to_char_idx,
char_idx_to_js_idx: char_idx_to_js_idx,
_ansispan:_ansispan,
change_favicon: change_favicon
};

@ -153,6 +153,8 @@ define([
// one kernel completion came back, finish_completing will be called with the results
// we fork here and directly call finish completing if kernel is busy
var cursor_pos = this.editor.indexFromPos(cur);
var text = this.editor.getValue();
cursor_pos = utils.js_idx_to_char_idx(cursor_pos, text);
if (this.skip_kernel_completion) {
this.finish_completing({ content: {
matches: [],
@ -160,7 +162,7 @@ define([
cursor_end: cursor_pos,
}});
} else {
this.cell.kernel.complete(this.editor.getValue(), cursor_pos,
this.cell.kernel.complete(text, cursor_pos,
$.proxy(this.finish_completing, this)
);
}
@ -175,6 +177,7 @@ define([
var start = content.cursor_start;
var end = content.cursor_end;
var matches = content.matches;
console.log(content);
var cur = this.editor.getCursor();
if (end === null) {
@ -187,7 +190,13 @@ define([
} else if (start < 0) {
start = end + start;
}
} else {
// handle surrogate pairs
var text = this.editor.getValue();
end = utils.char_idx_to_js_idx(end, text);
start = utils.char_idx_to_js_idx(start, text);
}
var results = CodeMirror.contextHint(this.editor);
var filtered_results = [];
//remove results from context completion

Loading…
Cancel
Save