-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
support saving and restoring non-ascii group and dataset names, like …
…JSONLab
- Loading branch information
Showing
4 changed files
with
191 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
function newname = decodevarname(name,varargin) | ||
% | ||
% newname = decodevarname(name) | ||
% | ||
% Decode a hex-encoded variable name (from encodevarname) and restore | ||
% its original form | ||
% | ||
% This function is sensitive to the default charset | ||
% settings in MATLAB, please call feature('DefaultCharacterSet','utf8') | ||
% to set the encoding to UTF-8 before calling this function. | ||
% | ||
% author: Qianqian Fang (q.fang <at> neu.edu) | ||
% | ||
% input: | ||
% name: a string output from encodevarname, which converts the leading non-ascii | ||
% letter into "x0xHH_" and non-ascii letters into "_0xHH_" | ||
% format, where hex key HH stores the ascii (or Unicode) value | ||
% of the character. | ||
% | ||
% output: | ||
% newname: the restored original string | ||
% | ||
% example: | ||
% decodevarname('x0x5F_a) % returns _a | ||
% decodevarname('a_') % returns a_ as it is a valid variable name | ||
% decodevarname('x0xE58F98__0xE9878F_') % returns '变量' | ||
% | ||
% this file is part of EazyH5 Toolbox: https://github.com/fangq/eazyh5 | ||
% | ||
% License: GPLv3 or 3-clause BSD license, see https://github.com/fangq/eazyh5 for details | ||
% | ||
|
||
isunpack=jsonopt('UnpackHex',1,varargin{:}); | ||
newname=name; | ||
if(isempty(regexp(name,'0x([0-9a-fA-F]+)_','once'))) | ||
return | ||
end | ||
if(isunpack) | ||
if(exist('native2unicode','builtin')) | ||
newname=regexprep(name,'(^x|_){1}0x([0-9a-fA-F]+)_','${hex2unicode($2)}'); | ||
else | ||
pos=regexp(name,'(^x|_){1}0x([0-9a-fA-F]+)_','start'); | ||
pend=regexp(name,'(^x|_){1}0x([0-9a-fA-F]+)_','end'); | ||
if(isempty(pos)) | ||
return; | ||
end | ||
str0=name; | ||
pos0=[0 pend(:)' length(name)]; | ||
newname=''; | ||
for i=1:length(pos) | ||
newname=[newname str0(pos0(i)+1:pos(i)-1) char(hex2dec(str0(pos(i)+3:pend(i)-1)))]; | ||
end | ||
if(pos(end)~=length(name)) | ||
newname=[newname str0(pos0(end-1)+1:pos0(end))]; | ||
end | ||
end | ||
end | ||
|
||
function str=hex2unicode(hexstr) | ||
val=hex2dec(hexstr); | ||
id=histc(val,[0 2^8 2^16 2^32 2^64]); | ||
type={'uint8','uint16','uint32','uint64'}; | ||
bytes=typecast(cast(val,type{id~=0}),'uint8'); | ||
str=native2unicode(fliplr(bytes(:,1:find(bytes,1,'last')))); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
function str = encodevarname(str,varargin) | ||
% | ||
% newname = encodevarname(name) | ||
% | ||
% Encode an invalid variable name using a hex-format for bi-directional | ||
% conversions. | ||
|
||
% This function is sensitive to the default charset | ||
% settings in MATLAB, please call feature('DefaultCharacterSet','utf8') | ||
% to set the encoding to UTF-8 before calling this function. | ||
% | ||
% author: Qianqian Fang (q.fang <at> neu.edu) | ||
% | ||
% input: | ||
% name: a string, can be either a valid or invalid variable name | ||
% | ||
% output: | ||
% newname: a valid variable name by converting the leading non-ascii | ||
% letter into "x0xHH_" and non-ascii letters into "_0xHH_" | ||
% format, where HH is the ascii (or Unicode) value of the | ||
% character. | ||
% | ||
% if the encoded variable name CAN NOT be longer than 63, i.e. | ||
% the maximum variable name specified by namelengthmax, and | ||
% one uses the output of this function as a struct or variable | ||
% name, the name will be trucated at 63. Please consider using | ||
% the name as a containers.Map key, which does not have such | ||
% limit. | ||
% | ||
% example: | ||
% encodevarname('_a') % returns x0x5F_a | ||
% encodevarname('a_') % returns a_ as it is a valid variable name | ||
% encodevarname('变量') % returns 'x0xE58F98__0xE9878F_' | ||
% | ||
% this file is part of EazyH5 Toolbox: https://github.com/fangq/eazyh5 | ||
% | ||
% License: GPLv3 or 3-clause BSD license, see https://github.com/fangq/eazyh5 for details | ||
% | ||
|
||
if(~isempty(regexp(str,'^[^A-Za-z]','once'))) | ||
if(exist('unicode2native','builtin')) | ||
str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once'); | ||
else | ||
str=sprintf('x0x%X_%s',char(str(1))+0,str(2:end)); | ||
end | ||
end | ||
if(isvarname(str)) | ||
return; | ||
end | ||
if(exist('unicode2native','builtin')) | ||
str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_'); | ||
else | ||
cpos=regexp(str,'[^0-9A-Za-z_]'); | ||
if(isempty(cpos)) | ||
return; | ||
end | ||
str0=str; | ||
pos0=[0 cpos(:)' length(str)]; | ||
str=''; | ||
for i=1:length(cpos) | ||
str=[str str0(pos0(i)+1:cpos(i)-1) sprintf('_0x%X_',str0(cpos(i))+0)]; | ||
end | ||
if(cpos(end)~=length(str)) | ||
str=[str str0(pos0(end-1)+1:pos0(end))]; | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters