Skip to content

Commit

Permalink
support saving and restoring non-ascii group and dataset names, like …
Browse files Browse the repository at this point in the history
…JSONLab
  • Loading branch information
fangq committed Sep 30, 2019
1 parent 1486603 commit bb762a8
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 31 deletions.
64 changes: 64 additions & 0 deletions decodevarname.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
function newname = decodevarname(name,varargin)
%
% newname = decodevarname(name)
%
% Decode a hex-encoded variable name (from encodevarname) and restore
% its original form
%
% This function is sensitive to the default charset
% settings in MATLAB, please call feature('DefaultCharacterSet','utf8')
% to set the encoding to UTF-8 before calling this function.
%
% author: Qianqian Fang (q.fang <at> neu.edu)
%
% input:
% name: a string output from encodevarname, which converts the leading non-ascii
% letter into "x0xHH_" and non-ascii letters into "_0xHH_"
% format, where hex key HH stores the ascii (or Unicode) value
% of the character.
%
% output:
% newname: the restored original string
%
% example:
% decodevarname('x0x5F_a) % returns _a
% decodevarname('a_') % returns a_ as it is a valid variable name
% decodevarname('x0xE58F98__0xE9878F_') % returns '变量'
%
% this file is part of EazyH5 Toolbox: https://github.com/fangq/eazyh5
%
% License: GPLv3 or 3-clause BSD license, see https://github.com/fangq/eazyh5 for details
%

isunpack=jsonopt('UnpackHex',1,varargin{:});
newname=name;
if(isempty(regexp(name,'0x([0-9a-fA-F]+)_','once')))
return
end
if(isunpack)
if(exist('native2unicode','builtin'))
newname=regexprep(name,'(^x|_){1}0x([0-9a-fA-F]+)_','${hex2unicode($2)}');
else
pos=regexp(name,'(^x|_){1}0x([0-9a-fA-F]+)_','start');
pend=regexp(name,'(^x|_){1}0x([0-9a-fA-F]+)_','end');
if(isempty(pos))
return;
end
str0=name;
pos0=[0 pend(:)' length(name)];
newname='';
for i=1:length(pos)
newname=[newname str0(pos0(i)+1:pos(i)-1) char(hex2dec(str0(pos(i)+3:pend(i)-1)))];
end
if(pos(end)~=length(name))
newname=[newname str0(pos0(end-1)+1:pos0(end))];
end
end
end

function str=hex2unicode(hexstr)
val=hex2dec(hexstr);
id=histc(val,[0 2^8 2^16 2^32 2^64]);
type={'uint8','uint16','uint32','uint64'};
bytes=typecast(cast(val,type{id~=0}),'uint8');
str=native2unicode(fliplr(bytes(:,1:find(bytes,1,'last'))));
67 changes: 67 additions & 0 deletions encodevarname.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
function str = encodevarname(str,varargin)
%
% newname = encodevarname(name)
%
% Encode an invalid variable name using a hex-format for bi-directional
% conversions.

% This function is sensitive to the default charset
% settings in MATLAB, please call feature('DefaultCharacterSet','utf8')
% to set the encoding to UTF-8 before calling this function.
%
% author: Qianqian Fang (q.fang <at> neu.edu)
%
% input:
% name: a string, can be either a valid or invalid variable name
%
% output:
% newname: a valid variable name by converting the leading non-ascii
% letter into "x0xHH_" and non-ascii letters into "_0xHH_"
% format, where HH is the ascii (or Unicode) value of the
% character.
%
% if the encoded variable name CAN NOT be longer than 63, i.e.
% the maximum variable name specified by namelengthmax, and
% one uses the output of this function as a struct or variable
% name, the name will be trucated at 63. Please consider using
% the name as a containers.Map key, which does not have such
% limit.
%
% example:
% encodevarname('_a') % returns x0x5F_a
% encodevarname('a_') % returns a_ as it is a valid variable name
% encodevarname('变量') % returns 'x0xE58F98__0xE9878F_'
%
% this file is part of EazyH5 Toolbox: https://github.com/fangq/eazyh5
%
% License: GPLv3 or 3-clause BSD license, see https://github.com/fangq/eazyh5 for details
%

if(~isempty(regexp(str,'^[^A-Za-z]','once')))
if(exist('unicode2native','builtin'))
str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once');
else
str=sprintf('x0x%X_%s',char(str(1))+0,str(2:end));
end
end
if(isvarname(str))
return;
end
if(exist('unicode2native','builtin'))
str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_');
else
cpos=regexp(str,'[^0-9A-Za-z_]');
if(isempty(cpos))
return;
end
str0=str;
pos0=[0 cpos(:)' length(str)];
str='';
for i=1:length(cpos)
str=[str str0(pos0(i)+1:cpos(i)-1) sprintf('_0x%X_',str0(cpos(i))+0)];
end
if(cpos(end)~=length(str))
str=[str str0(pos0(end-1)+1:pos0(end))];
end
end
end
72 changes: 42 additions & 30 deletions loadh5.m
Original file line number Diff line number Diff line change
@@ -1,35 +1,42 @@
function varargout=loadh5(filename, path, varargin)
%
% [data, meta] = loadh5(filename)
% [data, meta] = loadh5(root_id)
% [data, meta] = loadh5(filename, rootpath)
% [data, meta] = loadh5(filename, rootpath,'param1',value1,'param2',value2,...)
% [data, meta] = loadh5(filename)
% [data, meta] = loadh5(root_id)
% [data, meta] = loadh5(filename, rootpath)
% [data, meta] = loadh5(filename, rootpath, options)
% [data, meta] = loadh5(filename, rootpath, 'Param1',value1, 'Param2',value2,...)
%
% Load data in an HDF5 file to a MATLAB structure.
% Load data in an HDF5 file to a MATLAB structure.
%
% Author: Pauli Virtanen <pav at iki.fi>
% author: Qianqian Fang (q.fang <at> neu.edu)
%
% Updated by Qianqian Fang <q.fang at neu.edu>
% - reading attributes and return as 2nd output 'meta'
% - handle arbitrary matlab object saved by saveh5.m
% - support Real/Imag composite record for complex arrays
% input
% filename
% Name of the file to load data from
% root_id: an HDF5 handle (of type 'H5ML.id' in MATLAB)
% rootpath : (optional)
% Root path to read part of the HDF5 file to load
% options: (optional) a struct or Param/value pairs for user specified options
% Order: 'creation' - creation order (default), or 'alphabet' - alphabetic
% PackHex: [1|0]: conver invalid characters in the group/dataset
% names to 0x[hex code] by calling encodevarname.m;
% if set to 0, call getvarname
%
% input
% filename
% Name of the file to load data from
% root_id: an HDF5 handle (of type 'H5ML.id' in MATLAB)
% rootpath : optional
% Root path to read part of the HDF5 file to load
% param/value: acceptable optional parameters include
% 'order': 'creation' - creation order, or 'alphabet' - alphabetic
% output
% data: a structure (array) or cell (array)
% meta: optional output to store the attributes stored in the file
%
% output
% data: a structure (array) or cell (array)
% meta: optional output to store the attributes stored in the file
% example:
% a={rand(2), struct('va',1,'vb','string'), 1+2i};
% saveh5(a,'test.h5');
% a2=loadh5('test.h5')
% a2=regrouph5(a2)
% isequaln(a,a2.a)
%
% This file is part of EazyH5 Toolbox: https://github.com/fangq/eazyh5
% This function was adapted from h5load.m by Pauli Virtanen <pav at iki.fi>
% This file is part of EazyH5 Toolbox: https://github.com/fangq/eazyh5
%
% License: GPLv3 or 3-clause BSD license, see https://github.com/fangq/eazyh5 for details
% License: GPLv3 or 3-clause BSD license, see https://github.com/fangq/eazyh5 for details
%

opt=varargin2struct(varargin{:});
Expand Down Expand Up @@ -100,6 +107,8 @@
status=0;
attr=struct();

encodename=jsonopt('PackHex',1,inputdata.opt);

try
data=inputdata.data;
meta=inputdata.meta;
Expand All @@ -121,7 +130,11 @@
H5G.close(group_loc);
rethrow(ME);
end
name=genvarname(name);
if(encodename)
name=encodevarname(name);
else
name=genvarname(name);
end
data.(name) = sub_data;
meta.(name) = sub_meta;

Expand All @@ -141,7 +154,11 @@
end

sub_data = fix_data(sub_data, attr);
name=genvarname(name);
if(encodename)
name=encodevarname(name);
else
name=genvarname(name);
end
data.(name) = sub_data;
meta.(name) = attr;
end
Expand Down Expand Up @@ -194,11 +211,6 @@
end
end

% if isnumeric(data) && ndims(data) > 1
% % permute dimensions
% data = permute(data, fliplr(1:ndims(data)));
% end

%--------------------------------------------------------------------------
function [status, dataout]= getattribute(loc_id,attr_name,info,datain)
status=0;
Expand Down
19 changes: 18 additions & 1 deletion saveh5.m
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ function saveh5(data, fname, varargin)
% input:
% data: a structure (array) or cell (array) to be stored.
% fname: the output HDF5 (.h5) file name
% options: (optional) Param/value pairs for user specified options
% options: (optional) a struct or Param/value pairs for user specified options
% RootName: the HDF5 path of the root object. If not given, the
% actual variable name for the data input will be used as
% the root object. The value shall not include '/'.
% UnpackHex [1|0]: conver the 0x[hex code] in variable names
% back to Unicode string using decodevarname.m
% Compression: ['deflate'|''] - use zlib-deflate method
% to compress data array
% CompressArraySize: [100|int]: only to compress an array if the
Expand All @@ -24,6 +26,7 @@ function saveh5(data, fname, varargin)
% compression level
% Chunk: a size vector or empty - breaking a large array into
% small chunks of size specified by this parameter
%
% example:
% a=struct('a',rand(5),'b','string','c',true,'d',2+3i,'e',{'test',[],1:5});
% saveh5(a,'test.h5');
Expand Down Expand Up @@ -119,6 +122,9 @@ function saveh5(data, fname, varargin)
indexed = H5ML.get_constant_value('H5P_CRT_ORDER_INDEXED');
order = bitor(tracked,indexed);
H5P.set_link_creation_order(gcpl,order);
if(jsonopt('UnpackHex',1,varargin{:}))
name=decodevarname(name);
end
try
handle=H5G.create(handle, name, pd,gcpl,pd);
isnew=1;
Expand Down Expand Up @@ -147,6 +153,9 @@ function saveh5(data, fname, varargin)
order = bitor(tracked,indexed);
H5P.set_link_creation_order(gcpl,order);
try
if(jsonopt('UnpackHex',1,varargin{:}))
name=decodevarname(name);
end
handle=H5G.create(handle, name, pd,gcpl,pd);
isnew=1;
catch
Expand Down Expand Up @@ -200,6 +209,10 @@ function saveh5(data, fname, varargin)
end
end

if(jsonopt('UnpackHex',1,varargin{:}))
name=decodevarname(name);
end

if(isreal(item))
if(issparse(item))
idx=find(item);
Expand Down Expand Up @@ -277,6 +290,10 @@ function saveh5(data, fname, varargin)
function oid=any2h5(name, item,handle,level,varargin)
pd = 'H5P_DEFAULT';

if(jsonopt('UnpackHex',1,varargin{:}))
name=decodevarname(name);
end

rawdata=getByteStreamFromArray(item); % use undocumented matlab function
oid=H5D.create(handle,name,H5T.copy('H5T_STD_U8LE'),H5S.create_simple(ndims(rawdata), size(rawdata),size(rawdata)),pd);
H5D.write(oid,'H5ML_DEFAULT','H5S_ALL','H5S_ALL',pd,rawdata);
Expand Down

0 comments on commit bb762a8

Please sign in to comment.