From a599e71055f0cb3b3bddac6a8562a5df452736a3 Mon Sep 17 00:00:00 2001
From: Qianqian Fang <fangqq@gmail.com>
Date: Wed, 20 Mar 2024 01:15:04 -0400
Subject: [PATCH] [feat] add jsoncache to handle _DataLink_ download cache,
 rename jsonpath

---
 Contents.m                      |   3 +-
 jdatadecode.m                   |  42 +++++------
 jsoncache.m                     | 130 ++++++++++++++++++++++++++++++++
 jsonget.m                       |   2 +-
 getfromjsonpath.m => jsonpath.m |  19 +++--
 5 files changed, 166 insertions(+), 30 deletions(-)
 create mode 100644 jsoncache.m
 rename getfromjsonpath.m => jsonpath.m (85%)

diff --git a/Contents.m b/Contents.m
index b1fc8f3..0096c6e 100644
--- a/Contents.m
+++ b/Contents.m
@@ -7,7 +7,8 @@
 %   encodevarname      - newname = encodevarname(name)
 %   fast_match_bracket - [endpos, maxlevel] = fast_match_bracket(key,pos,startpos,brackets)
 %   filterjsonmmap     - mmap=filterjsonmmap(mmap, patterns, isinclude)
-%   getfromjsonpath    - obj=getfromjsonpath(root, jsonpath)
+%   jsoncache          - [cachepath, filename]=jsoncache(hyperlink)
+%   jsonpath           - obj=jsonpath(root, jsonpath)
 %   gzipdecode         - output = gzipdecode(input)
 %   gzipencode         - output = gzipencode(input)
 %   isoctavemesh       - [isoctave verinfo]=isoctavemesh
diff --git a/jdatadecode.m b/jdatadecode.m
index 1a6f6ab..59f81d5 100644
--- a/jdatadecode.m
+++ b/jdatadecode.m
@@ -477,29 +477,29 @@
         end
         if (~isempty(ref.path))
             uripath = [ref.proto ref.path];
-            [fpath, fname, fext] = fileparts(uripath);
-            opt.maxlinklevel = opt.maxlinklevel - 1;
-            switch (lower(fext))
-                case {'.json', '.jnii', '.jdt', '.jdat', '.jmsh', '.jnirs'}
-                    newdata = loadjson(uripath, opt);
-                case {'.bjd', '.bnii', '.jdb', '.jbat', '.bmsh', '.bnirs', '.pmat'}
-                    newdata = loadbj(uripath, opt, 'Base64', 0);
-                case {'.ubj'}
-                    newdata = loadubjson(uripath, opt, 'Base64', 0);
-                case {'.msgpack'}
-                    newdata = loadmsgpack(uripath, opt, 'Base64', 0);
-                case {'.h5', '.hdf5', '.snirf'}  % this requires EasyH5 toolbox
-                    newdata = loadh5(uripath, opt);
-                otherwise
-                    % _DataLink_ url does not specify type, assuming JSON format
-                    if (regexpi(datalink, '^\s*(http|https|ftp|file)://'))
-                        newdata = loadjson(uripath, opt);
-                    else
-                        warning('_DataLink_ url is not supported');
-                    end
+            [cachepath, filename] = jsoncache(uripath);
+            if (iscell(cachepath) && ~isempty(cachepath))
+                rawdata = webread(uripath);
+                fname = [cachepath{1} filesep filename];
+                fpath = fileparts(fname);
+                if (~exist(fpath, 'dir'))
+                    mkdir(fpath);
+                end
+                fid = fopen(fname, 'wb');
+                if (fid == 0)
+                    error('can not save URL to cache at path %s', fname);
+                end
+                fwrite(fid, uint8(rawdata));
+                fclose(fid);
+
+                opt.maxlinklevel = opt.maxlinklevel - 1;
+                newdata = loadjd(fname, opt);
+            elseif (~iscell(cachepath) && exist(cachepath, 'file'))
+                opt.maxlinklevel = opt.maxlinklevel - 1;
+                newdata = loadjd(cachepath, opt);
             end
             if (~isempty(ref.jsonpath))
-                newdata = getfromjsonpath(newdata, ref.jsonpath);
+                newdata = jsonpath(newdata, ref.jsonpath);
             end
         end
     end
diff --git a/jsoncache.m b/jsoncache.m
new file mode 100644
index 0000000..0155c41
--- /dev/null
+++ b/jsoncache.m
@@ -0,0 +1,130 @@
+function [cachepath, filename] = jsoncache(dbname, docname, filename, domain)
+%
+% cachepaths=jsoncache()
+% [cachepath, filename]=jsoncache(hyperlink)
+% [cachepath, tf]=jsoncache(filename)
+% cachepath=jsoncache(dbname, docname, filename, domain)
+%
+% return the JSON cache folder where _DataLink_ hyperlinked data files are downloaded
+%
+% author: Qianqian Fang (q.fang at neu.edu)
+%
+% input:
+%    hyperlink: if a single input is provided, the function check if it is
+%               a hyperlink starting with http:// or https://, if so, it
+%               trys to extract the database name, document name and file
+%               name using NeuroJSON's standard link format as
+%
+%    https://neurojson.org/io/stat.cgi?dbname=..&docname=..&file=..&size=..
+%
+%               if the string does not contain a link, it is treated as a
+%               local file path
+%    dbname: the name of the NeuroJSON database (must exist)
+%    docname: the name of the NeuroJSON dataset document (must exist)
+%    filename: the name of the data file - may contain a relative folder
+%    domain: optional, if not given, 'io' is used; otherwise, user can
+%            specify customized domain name
+%
+% output:
+%    cachepaths: if the linked file is found in any of the cache folders,
+%            this returns the full path of the found file as a string;
+%            otherwise, this stores a cell array listing the searched cache
+%            folders in the search order
+%    tf: if a file is found in the cache folder, this returns true;
+%            otherwise, this contains the extracted file name.
+%
+%    the cached data files will be searched in the following order
+%
+%    [pwd '/.neurojson']             | on all OSes
+%    /home/USERNAME/.neurojson       | on all OSes (per-user)
+%    /home/USERNAME/.cache/neurojson | if on Linux (per-user)
+%    /var/cache/neurojson            | if on Linux (system wide)
+%    /home/USERNAME/Library/neurojson| if on MacOS (per-user)
+%    /Library/neurojson              | if on MacOS (system wide)
+%    C:\ProgramData\neurojson        | if on Windows (system wide)
+%
+%    if a global variable NEUROJSON_CACHE is set in 'base', it will be
+%    used instead of the above search paths
+%
+% -- this function is part of iso2mesh toolbox (http://iso2mesh.sf.net)
+%
+
+pathname = getenv('HOME');
+cachepath = {[pwd filesep '.neurojson'], [pathname filesep '.neurojson']};
+if (ispc)
+    cachepath{end + 1} = [getenv('PROGRAMDATA') filesep 'neurojson'];
+elseif (ismac)
+    cachepath{end + 1} = [pathname '/Library/neurojson'];
+    cachepath{end + 1} = '/Library/neurojson';
+else
+    cachepath{end + 1} = [pathname '/.cache/neurojson'];
+    cachepath{end + 1} = '/var/cache/neurojson';
+end
+
+cachepath = unique(cachepath, 'stable');
+
+if (nargin < 4)
+    domain = 'io';
+end
+
+if (nargin == 1)
+    link = dbname;
+    if (isempty(regexp(link, '://', 'once')))
+        filename = link;
+        if (exist(filename, 'file'))
+            cachepath = filename;
+            filename = true;
+            return
+        end
+    else
+        if (~isempty(regexp(link, '^https*://neurojson.org/io/', 'once')))
+            domain = 'io';
+        end
+        dbname = regexp(link, '(?<=db=)[^&]+', 'match');
+        if (~isempty(dbname))
+            dbname = dbname{1};
+        else
+            dbname = '';
+        end
+        docname = regexp(link, '(?<=doc=)[^&]+', 'match');
+        if (~isempty(docname))
+            docname = docname{1};
+        else
+            docname = '';
+        end
+        filename = regexp(link, '(?<=file=)[^&]+', 'match');
+        if (~isempty(filename))
+            filename = filename{1};
+        else
+            filename = '';
+        end
+    end
+end
+
+p = getvarfrom({'caller', 'base'}, 'NEUROJSON_CACHE');
+
+if (nargin == 0 || nargin == 1 || nargin >= 3)
+    if (~isempty(p))
+        cachepath = [{p}, cachepath{:}];
+    elseif (exist('dbname', 'var') && exist('docname', 'var'))
+        cachepath = cellfun(@(x) [x filesep domain filesep dbname filesep docname], cachepath, 'UniformOutput', false);
+    end
+    if (exist('filename', 'var') && ~isempty(filename))
+        for i = 1:length(cachepath)
+            if (exist([cachepath{i} filesep filename], 'file'))
+                cachepath = [cachepath{i} filesep filename];
+                filename = true;
+                return
+            end
+        end
+    elseif (exist('link', 'var'))
+        [pathname, fname, fext] = fileparts(link);
+        filename = [fname fext];
+    end
+    if (~isempty(p))
+        cachepath(2) = [];
+    else
+        cachepath(1) = [];
+    end
+    return
+end
diff --git a/jsonget.m b/jsonget.m
index 2667a5d..f2a2c9b 100644
--- a/jsonget.m
+++ b/jsonget.m
@@ -63,7 +63,7 @@
 
 if (isstruct(fname) || iscell(fname) || isa(fname, 'table') || isa(fname, 'containers.Map'))
     for i = 1:length(keylist)
-        json{end + 1} = getfromjsonpath(fname, keylist{i});
+        json{end + 1} = jsonpath(fname, keylist{i});
     end
     if (length(json) == 1)
         json = json{1};
diff --git a/getfromjsonpath.m b/jsonpath.m
similarity index 85%
rename from getfromjsonpath.m
rename to jsonpath.m
index 128c5c1..b0f8f6b 100644
--- a/getfromjsonpath.m
+++ b/jsonpath.m
@@ -1,6 +1,6 @@
-function obj = getfromjsonpath(root, jsonpath)
+function obj = jsonpath(root, jsonpath)
 %
-%    obj=getfromjsonpath(root, jsonpath)
+%    obj=jsonpath(root, jsonpath)
 %
 %    Query and retrieve elements from matlab data structures using JSONPath
 %
@@ -14,7 +14,7 @@
 %        obj: if the specified element exist, obj returns the result
 %
 %    example:
-%        getfromjsonpath(struct('a',[1,2,3]), '$.a[1]')      % returns 2
+%        jsonpath(struct('a',[1,2,3]), '$.a[1]')      % returns 2
 %
 % license:
 %     BSD or GPL version 3, see LICENSE_{BSD,GPLv3}.txt files for details
@@ -27,7 +27,7 @@
 [pat, paths] = regexp(jsonpath, '(\.{0,2}[^\s\.]+)', 'match', 'tokens');
 if (~isempty(pat) && ~isempty(paths))
     for i = 1:length(paths)
-        [obj, isfound] = getonelevel(obj, paths{i}{1});
+        [obj, isfound] = getonelevel(obj, paths, i);
         if (~isfound)
             return
         end
@@ -36,7 +36,12 @@
 
 %% scan function
 
-function [obj, isfound] = getonelevel(input, pathname)
+function [obj, isfound] = getonelevel(input, paths, pathid)
+
+pathname = paths{pathid};
+if (iscell(pathname))
+    pathname = pathname{1};
+end
 
 deepscan = ~isempty(regexp(pathname, '^\.\.', 'once'));
 
@@ -78,7 +83,7 @@
         end
         items = fieldnames(input);
         for idx = 1:length(items)
-            [val, isfound] = getonelevel(input.(items{idx}), ['..' pathname]);
+            [val, isfound] = getonelevel(input.(items{idx}), [paths{:} {['..' pathname]}], pathid + 1);
             if (isfound)
                 if (~exist('obj', 'var'))
                     obj = {};
@@ -96,7 +101,7 @@
         end
         items = keys(input);
         for idx = 1:length(items)
-            [val, isfound] = getonelevel(input(items{idx}), ['..' pathname]);
+            [val, isfound] = getonelevel(input(items{idx}), [paths{:} {['..' pathname]}], pathid + 1);
             if (isfound)
                 if (~exist('obj', 'var'))
                     obj = {};