From a599e71055f0cb3b3bddac6a8562a5df452736a3 Mon Sep 17 00:00:00 2001 From: Qianqian Fang Date: Wed, 20 Mar 2024 01:15:04 -0400 Subject: [PATCH] [feat] add jsoncache to handle _DataLink_ download cache, rename jsonpath --- Contents.m | 3 +- jdatadecode.m | 42 +++++------ jsoncache.m | 130 ++++++++++++++++++++++++++++++++ jsonget.m | 2 +- getfromjsonpath.m => jsonpath.m | 19 +++-- 5 files changed, 166 insertions(+), 30 deletions(-) create mode 100644 jsoncache.m rename getfromjsonpath.m => jsonpath.m (85%) diff --git a/Contents.m b/Contents.m index b1fc8f3..0096c6e 100644 --- a/Contents.m +++ b/Contents.m @@ -7,7 +7,8 @@ % encodevarname - newname = encodevarname(name) % fast_match_bracket - [endpos, maxlevel] = fast_match_bracket(key,pos,startpos,brackets) % filterjsonmmap - mmap=filterjsonmmap(mmap, patterns, isinclude) -% getfromjsonpath - obj=getfromjsonpath(root, jsonpath) +% jsoncache - [cachepath, filename]=jsoncache(hyperlink) +% jsonpath - obj=jsonpath(root, jsonpath) % gzipdecode - output = gzipdecode(input) % gzipencode - output = gzipencode(input) % isoctavemesh - [isoctave verinfo]=isoctavemesh diff --git a/jdatadecode.m b/jdatadecode.m index 1a6f6ab..59f81d5 100644 --- a/jdatadecode.m +++ b/jdatadecode.m @@ -477,29 +477,29 @@ end if (~isempty(ref.path)) uripath = [ref.proto ref.path]; - [fpath, fname, fext] = fileparts(uripath); - opt.maxlinklevel = opt.maxlinklevel - 1; - switch (lower(fext)) - case {'.json', '.jnii', '.jdt', '.jdat', '.jmsh', '.jnirs'} - newdata = loadjson(uripath, opt); - case {'.bjd', '.bnii', '.jdb', '.jbat', '.bmsh', '.bnirs', '.pmat'} - newdata = loadbj(uripath, opt, 'Base64', 0); - case {'.ubj'} - newdata = loadubjson(uripath, opt, 'Base64', 0); - case {'.msgpack'} - newdata = loadmsgpack(uripath, opt, 'Base64', 0); - case {'.h5', '.hdf5', '.snirf'} % this requires EasyH5 toolbox - newdata = loadh5(uripath, opt); - otherwise - % _DataLink_ url does not specify type, assuming JSON format - if (regexpi(datalink, '^\s*(http|https|ftp|file)://')) - newdata = loadjson(uripath, opt); - else - warning('_DataLink_ url is not supported'); - end + [cachepath, filename] = jsoncache(uripath); + if (iscell(cachepath) && ~isempty(cachepath)) + rawdata = webread(uripath); + fname = [cachepath{1} filesep filename]; + fpath = fileparts(fname); + if (~exist(fpath, 'dir')) + mkdir(fpath); + end + fid = fopen(fname, 'wb'); + if (fid == 0) + error('can not save URL to cache at path %s', fname); + end + fwrite(fid, uint8(rawdata)); + fclose(fid); + + opt.maxlinklevel = opt.maxlinklevel - 1; + newdata = loadjd(fname, opt); + elseif (~iscell(cachepath) && exist(cachepath, 'file')) + opt.maxlinklevel = opt.maxlinklevel - 1; + newdata = loadjd(cachepath, opt); end if (~isempty(ref.jsonpath)) - newdata = getfromjsonpath(newdata, ref.jsonpath); + newdata = jsonpath(newdata, ref.jsonpath); end end end diff --git a/jsoncache.m b/jsoncache.m new file mode 100644 index 0000000..0155c41 --- /dev/null +++ b/jsoncache.m @@ -0,0 +1,130 @@ +function [cachepath, filename] = jsoncache(dbname, docname, filename, domain) +% +% cachepaths=jsoncache() +% [cachepath, filename]=jsoncache(hyperlink) +% [cachepath, tf]=jsoncache(filename) +% cachepath=jsoncache(dbname, docname, filename, domain) +% +% return the JSON cache folder where _DataLink_ hyperlinked data files are downloaded +% +% author: Qianqian Fang (q.fang at neu.edu) +% +% input: +% hyperlink: if a single input is provided, the function check if it is +% a hyperlink starting with http:// or https://, if so, it +% trys to extract the database name, document name and file +% name using NeuroJSON's standard link format as +% +% https://neurojson.org/io/stat.cgi?dbname=..&docname=..&file=..&size=.. +% +% if the string does not contain a link, it is treated as a +% local file path +% dbname: the name of the NeuroJSON database (must exist) +% docname: the name of the NeuroJSON dataset document (must exist) +% filename: the name of the data file - may contain a relative folder +% domain: optional, if not given, 'io' is used; otherwise, user can +% specify customized domain name +% +% output: +% cachepaths: if the linked file is found in any of the cache folders, +% this returns the full path of the found file as a string; +% otherwise, this stores a cell array listing the searched cache +% folders in the search order +% tf: if a file is found in the cache folder, this returns true; +% otherwise, this contains the extracted file name. +% +% the cached data files will be searched in the following order +% +% [pwd '/.neurojson'] | on all OSes +% /home/USERNAME/.neurojson | on all OSes (per-user) +% /home/USERNAME/.cache/neurojson | if on Linux (per-user) +% /var/cache/neurojson | if on Linux (system wide) +% /home/USERNAME/Library/neurojson| if on MacOS (per-user) +% /Library/neurojson | if on MacOS (system wide) +% C:\ProgramData\neurojson | if on Windows (system wide) +% +% if a global variable NEUROJSON_CACHE is set in 'base', it will be +% used instead of the above search paths +% +% -- this function is part of iso2mesh toolbox (http://iso2mesh.sf.net) +% + +pathname = getenv('HOME'); +cachepath = {[pwd filesep '.neurojson'], [pathname filesep '.neurojson']}; +if (ispc) + cachepath{end + 1} = [getenv('PROGRAMDATA') filesep 'neurojson']; +elseif (ismac) + cachepath{end + 1} = [pathname '/Library/neurojson']; + cachepath{end + 1} = '/Library/neurojson'; +else + cachepath{end + 1} = [pathname '/.cache/neurojson']; + cachepath{end + 1} = '/var/cache/neurojson'; +end + +cachepath = unique(cachepath, 'stable'); + +if (nargin < 4) + domain = 'io'; +end + +if (nargin == 1) + link = dbname; + if (isempty(regexp(link, '://', 'once'))) + filename = link; + if (exist(filename, 'file')) + cachepath = filename; + filename = true; + return + end + else + if (~isempty(regexp(link, '^https*://neurojson.org/io/', 'once'))) + domain = 'io'; + end + dbname = regexp(link, '(?<=db=)[^&]+', 'match'); + if (~isempty(dbname)) + dbname = dbname{1}; + else + dbname = ''; + end + docname = regexp(link, '(?<=doc=)[^&]+', 'match'); + if (~isempty(docname)) + docname = docname{1}; + else + docname = ''; + end + filename = regexp(link, '(?<=file=)[^&]+', 'match'); + if (~isempty(filename)) + filename = filename{1}; + else + filename = ''; + end + end +end + +p = getvarfrom({'caller', 'base'}, 'NEUROJSON_CACHE'); + +if (nargin == 0 || nargin == 1 || nargin >= 3) + if (~isempty(p)) + cachepath = [{p}, cachepath{:}]; + elseif (exist('dbname', 'var') && exist('docname', 'var')) + cachepath = cellfun(@(x) [x filesep domain filesep dbname filesep docname], cachepath, 'UniformOutput', false); + end + if (exist('filename', 'var') && ~isempty(filename)) + for i = 1:length(cachepath) + if (exist([cachepath{i} filesep filename], 'file')) + cachepath = [cachepath{i} filesep filename]; + filename = true; + return + end + end + elseif (exist('link', 'var')) + [pathname, fname, fext] = fileparts(link); + filename = [fname fext]; + end + if (~isempty(p)) + cachepath(2) = []; + else + cachepath(1) = []; + end + return +end diff --git a/jsonget.m b/jsonget.m index 2667a5d..f2a2c9b 100644 --- a/jsonget.m +++ b/jsonget.m @@ -63,7 +63,7 @@ if (isstruct(fname) || iscell(fname) || isa(fname, 'table') || isa(fname, 'containers.Map')) for i = 1:length(keylist) - json{end + 1} = getfromjsonpath(fname, keylist{i}); + json{end + 1} = jsonpath(fname, keylist{i}); end if (length(json) == 1) json = json{1}; diff --git a/getfromjsonpath.m b/jsonpath.m similarity index 85% rename from getfromjsonpath.m rename to jsonpath.m index 128c5c1..b0f8f6b 100644 --- a/getfromjsonpath.m +++ b/jsonpath.m @@ -1,6 +1,6 @@ -function obj = getfromjsonpath(root, jsonpath) +function obj = jsonpath(root, jsonpath) % -% obj=getfromjsonpath(root, jsonpath) +% obj=jsonpath(root, jsonpath) % % Query and retrieve elements from matlab data structures using JSONPath % @@ -14,7 +14,7 @@ % obj: if the specified element exist, obj returns the result % % example: -% getfromjsonpath(struct('a',[1,2,3]), '$.a[1]') % returns 2 +% jsonpath(struct('a',[1,2,3]), '$.a[1]') % returns 2 % % license: % BSD or GPL version 3, see LICENSE_{BSD,GPLv3}.txt files for details @@ -27,7 +27,7 @@ [pat, paths] = regexp(jsonpath, '(\.{0,2}[^\s\.]+)', 'match', 'tokens'); if (~isempty(pat) && ~isempty(paths)) for i = 1:length(paths) - [obj, isfound] = getonelevel(obj, paths{i}{1}); + [obj, isfound] = getonelevel(obj, paths, i); if (~isfound) return end @@ -36,7 +36,12 @@ %% scan function -function [obj, isfound] = getonelevel(input, pathname) +function [obj, isfound] = getonelevel(input, paths, pathid) + +pathname = paths{pathid}; +if (iscell(pathname)) + pathname = pathname{1}; +end deepscan = ~isempty(regexp(pathname, '^\.\.', 'once')); @@ -78,7 +83,7 @@ end items = fieldnames(input); for idx = 1:length(items) - [val, isfound] = getonelevel(input.(items{idx}), ['..' pathname]); + [val, isfound] = getonelevel(input.(items{idx}), [paths{:} {['..' pathname]}], pathid + 1); if (isfound) if (~exist('obj', 'var')) obj = {}; @@ -96,7 +101,7 @@ end items = keys(input); for idx = 1:length(items) - [val, isfound] = getonelevel(input(items{idx}), ['..' pathname]); + [val, isfound] = getonelevel(input(items{idx}), [paths{:} {['..' pathname]}], pathid + 1); if (isfound) if (~exist('obj', 'var')) obj = {};