Skip to content

Commit

Permalink
[feat] add jsoncache to handle _DataLink_ download cache, rename json…
Browse files Browse the repository at this point in the history
…path
  • Loading branch information
fangq committed Mar 20, 2024
1 parent 4f2edeb commit a599e71
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 30 deletions.
3 changes: 2 additions & 1 deletion Contents.m
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
% encodevarname - newname = encodevarname(name)
% fast_match_bracket - [endpos, maxlevel] = fast_match_bracket(key,pos,startpos,brackets)
% filterjsonmmap - mmap=filterjsonmmap(mmap, patterns, isinclude)
% getfromjsonpath - obj=getfromjsonpath(root, jsonpath)
% jsoncache - [cachepath, filename]=jsoncache(hyperlink)
% jsonpath - obj=jsonpath(root, jsonpath)
% gzipdecode - output = gzipdecode(input)
% gzipencode - output = gzipencode(input)
% isoctavemesh - [isoctave verinfo]=isoctavemesh
Expand Down
42 changes: 21 additions & 21 deletions jdatadecode.m
Original file line number Diff line number Diff line change
Expand Up @@ -477,29 +477,29 @@
end
if (~isempty(ref.path))
uripath = [ref.proto ref.path];
[fpath, fname, fext] = fileparts(uripath);
opt.maxlinklevel = opt.maxlinklevel - 1;
switch (lower(fext))
case {'.json', '.jnii', '.jdt', '.jdat', '.jmsh', '.jnirs'}
newdata = loadjson(uripath, opt);
case {'.bjd', '.bnii', '.jdb', '.jbat', '.bmsh', '.bnirs', '.pmat'}
newdata = loadbj(uripath, opt, 'Base64', 0);
case {'.ubj'}
newdata = loadubjson(uripath, opt, 'Base64', 0);
case {'.msgpack'}
newdata = loadmsgpack(uripath, opt, 'Base64', 0);
case {'.h5', '.hdf5', '.snirf'} % this requires EasyH5 toolbox
newdata = loadh5(uripath, opt);
otherwise
% _DataLink_ url does not specify type, assuming JSON format
if (regexpi(datalink, '^\s*(http|https|ftp|file)://'))
newdata = loadjson(uripath, opt);
else
warning('_DataLink_ url is not supported');
end
[cachepath, filename] = jsoncache(uripath);
if (iscell(cachepath) && ~isempty(cachepath))
rawdata = webread(uripath);
fname = [cachepath{1} filesep filename];
fpath = fileparts(fname);
if (~exist(fpath, 'dir'))
mkdir(fpath);
end
fid = fopen(fname, 'wb');
if (fid == 0)
error('can not save URL to cache at path %s', fname);
end
fwrite(fid, uint8(rawdata));
fclose(fid);

opt.maxlinklevel = opt.maxlinklevel - 1;
newdata = loadjd(fname, opt);
elseif (~iscell(cachepath) && exist(cachepath, 'file'))
opt.maxlinklevel = opt.maxlinklevel - 1;
newdata = loadjd(cachepath, opt);
end
if (~isempty(ref.jsonpath))
newdata = getfromjsonpath(newdata, ref.jsonpath);
newdata = jsonpath(newdata, ref.jsonpath);
end
end
end
Expand Down
130 changes: 130 additions & 0 deletions jsoncache.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
function [cachepath, filename] = jsoncache(dbname, docname, filename, domain)
%
% cachepaths=jsoncache()
% [cachepath, filename]=jsoncache(hyperlink)
% [cachepath, tf]=jsoncache(filename)
% cachepath=jsoncache(dbname, docname, filename, domain)
%
% return the JSON cache folder where _DataLink_ hyperlinked data files are downloaded
%
% author: Qianqian Fang (q.fang at neu.edu)
%
% input:
% hyperlink: if a single input is provided, the function check if it is
% a hyperlink starting with http:// or https://, if so, it
% trys to extract the database name, document name and file
% name using NeuroJSON's standard link format as
%
% https://neurojson.org/io/stat.cgi?dbname=..&docname=..&file=..&size=..
%
% if the string does not contain a link, it is treated as a
% local file path
% dbname: the name of the NeuroJSON database (must exist)
% docname: the name of the NeuroJSON dataset document (must exist)
% filename: the name of the data file - may contain a relative folder
% domain: optional, if not given, 'io' is used; otherwise, user can
% specify customized domain name
%
% output:
% cachepaths: if the linked file is found in any of the cache folders,
% this returns the full path of the found file as a string;
% otherwise, this stores a cell array listing the searched cache
% folders in the search order
% tf: if a file is found in the cache folder, this returns true;
% otherwise, this contains the extracted file name.
%
% the cached data files will be searched in the following order
%
% [pwd '/.neurojson'] | on all OSes
% /home/USERNAME/.neurojson | on all OSes (per-user)
% /home/USERNAME/.cache/neurojson | if on Linux (per-user)
% /var/cache/neurojson | if on Linux (system wide)
% /home/USERNAME/Library/neurojson| if on MacOS (per-user)
% /Library/neurojson | if on MacOS (system wide)
% C:\ProgramData\neurojson | if on Windows (system wide)
%
% if a global variable NEUROJSON_CACHE is set in 'base', it will be
% used instead of the above search paths
%
% -- this function is part of iso2mesh toolbox (http://iso2mesh.sf.net)
%

pathname = getenv('HOME');
cachepath = {[pwd filesep '.neurojson'], [pathname filesep '.neurojson']};
if (ispc)
cachepath{end + 1} = [getenv('PROGRAMDATA') filesep 'neurojson'];
elseif (ismac)
cachepath{end + 1} = [pathname '/Library/neurojson'];
cachepath{end + 1} = '/Library/neurojson';
else
cachepath{end + 1} = [pathname '/.cache/neurojson'];
cachepath{end + 1} = '/var/cache/neurojson';
end

cachepath = unique(cachepath, 'stable');

if (nargin < 4)
domain = 'io';
end

if (nargin == 1)
link = dbname;
if (isempty(regexp(link, '://', 'once')))
filename = link;
if (exist(filename, 'file'))
cachepath = filename;
filename = true;
return
end
else
if (~isempty(regexp(link, '^https*://neurojson.org/io/', 'once')))
domain = 'io';
end
dbname = regexp(link, '(?<=db=)[^&]+', 'match');
if (~isempty(dbname))
dbname = dbname{1};
else
dbname = '';
end
docname = regexp(link, '(?<=doc=)[^&]+', 'match');
if (~isempty(docname))
docname = docname{1};
else
docname = '';
end
filename = regexp(link, '(?<=file=)[^&]+', 'match');
if (~isempty(filename))
filename = filename{1};
else
filename = '';
end
end
end

p = getvarfrom({'caller', 'base'}, 'NEUROJSON_CACHE');

if (nargin == 0 || nargin == 1 || nargin >= 3)
if (~isempty(p))
cachepath = [{p}, cachepath{:}];
elseif (exist('dbname', 'var') && exist('docname', 'var'))
cachepath = cellfun(@(x) [x filesep domain filesep dbname filesep docname], cachepath, 'UniformOutput', false);
end
if (exist('filename', 'var') && ~isempty(filename))
for i = 1:length(cachepath)
if (exist([cachepath{i} filesep filename], 'file'))
cachepath = [cachepath{i} filesep filename];
filename = true;
return
end
end
elseif (exist('link', 'var'))
[pathname, fname, fext] = fileparts(link);
filename = [fname fext];
end
if (~isempty(p))
cachepath(2) = [];
else
cachepath(1) = [];
end
return
end
2 changes: 1 addition & 1 deletion jsonget.m
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@

if (isstruct(fname) || iscell(fname) || isa(fname, 'table') || isa(fname, 'containers.Map'))
for i = 1:length(keylist)
json{end + 1} = getfromjsonpath(fname, keylist{i});
json{end + 1} = jsonpath(fname, keylist{i});
end
if (length(json) == 1)
json = json{1};
Expand Down
19 changes: 12 additions & 7 deletions getfromjsonpath.m → jsonpath.m
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
function obj = getfromjsonpath(root, jsonpath)
function obj = jsonpath(root, jsonpath)
%
% obj=getfromjsonpath(root, jsonpath)
% obj=jsonpath(root, jsonpath)
%
% Query and retrieve elements from matlab data structures using JSONPath
%
Expand All @@ -14,7 +14,7 @@
% obj: if the specified element exist, obj returns the result
%
% example:
% getfromjsonpath(struct('a',[1,2,3]), '$.a[1]') % returns 2
% jsonpath(struct('a',[1,2,3]), '$.a[1]') % returns 2
%
% license:
% BSD or GPL version 3, see LICENSE_{BSD,GPLv3}.txt files for details
Expand All @@ -27,7 +27,7 @@
[pat, paths] = regexp(jsonpath, '(\.{0,2}[^\s\.]+)', 'match', 'tokens');
if (~isempty(pat) && ~isempty(paths))
for i = 1:length(paths)
[obj, isfound] = getonelevel(obj, paths{i}{1});
[obj, isfound] = getonelevel(obj, paths, i);
if (~isfound)
return
end
Expand All @@ -36,7 +36,12 @@

%% scan function

function [obj, isfound] = getonelevel(input, pathname)
function [obj, isfound] = getonelevel(input, paths, pathid)

pathname = paths{pathid};
if (iscell(pathname))
pathname = pathname{1};
end

deepscan = ~isempty(regexp(pathname, '^\.\.', 'once'));

Expand Down Expand Up @@ -78,7 +83,7 @@
end
items = fieldnames(input);
for idx = 1:length(items)
[val, isfound] = getonelevel(input.(items{idx}), ['..' pathname]);
[val, isfound] = getonelevel(input.(items{idx}), [paths{:} {['..' pathname]}], pathid + 1);
if (isfound)
if (~exist('obj', 'var'))
obj = {};
Expand All @@ -96,7 +101,7 @@
end
items = keys(input);
for idx = 1:length(items)
[val, isfound] = getonelevel(input(items{idx}), ['..' pathname]);
[val, isfound] = getonelevel(input(items{idx}), [paths{:} {['..' pathname]}], pathid + 1);
if (isfound)
if (~exist('obj', 'var'))
obj = {};
Expand Down

0 comments on commit a599e71

Please sign in to comment.