emscripten-core · kripken · Jan 13, 2022 · Jan 13, 2022 · Jan 13, 2022 · Jan 13, 2022
diff --git a/emcc.py b/emcc.py
@@ -1135,7 +1135,7 @@ def run(args):
     logger.debug('stopping after linking to object file')
     return 0
 
-  phase_calculate_system_libraries(state, linker_arguments, linker_inputs, newargs)
+  phase_calculate_system_libraries(state, linker_arguments, linker_inputs, newargs, options)
 
   phase_link(linker_arguments, wasm_target)
 
@@ -2620,7 +2620,7 @@ def compile_source_file(i, input_file):
 
 
 @ToolchainProfiler.profile_block('calculate system libraries')
-def phase_calculate_system_libraries(state, linker_arguments, linker_inputs, newargs):
+def phase_calculate_system_libraries(state, linker_arguments, linker_inputs, newargs, options):
   extra_files_to_link = []
   # link in ports and system libraries, if necessary
   if not settings.SIDE_MODULE:
@@ -2630,6 +2630,19 @@ def phase_calculate_system_libraries(state, linker_arguments, linker_inputs, new
   extra_files_to_link += system_libs.calculate(all_linker_inputs, newargs, forced=state.forced_stdlibs)
   linker_arguments.extend(extra_files_to_link)
 
+  if settings.WASMFS and options.embed_files:
+    # wasmfs file embedding is done via emitting C code that contains the data
+    # and code to set them up. we add that as another input, like a system
+    # library, that we compile on the fly here
+    temp_files = shared.configuration.get_temp_files()
+    temp_c = temp_files.get(suffix='.c').name
+    temp_o = unsuffixed(temp_c) + '.o'
+    temp_files.note(temp_o)
+    with open(temp_c, 'w') as f:
+      f.write(package_files(options))
+    shared.check_call([shared.CLANG_CC, temp_c, '-o', temp_o, '-c'] + get_cflags([]))
+    linker_arguments.append(temp_o)
+
 
 @ToolchainProfiler.profile_block('link')
 def phase_link(linker_arguments, wasm_target):
@@ -2707,33 +2720,43 @@ def phase_emscript(options, in_wasm, wasm_target, memfile):
   save_intermediate('original')
 
 
+def package_files(options, target='default'):
+  logger.debug('setting up files')
+  file_args = ['--from-emcc', '--export-name=' + settings.EXPORT_NAME]
+  if options.preload_files:
+    file_args.append('--preload')
+    file_args += options.preload_files
+  if options.embed_files:
+    file_args.append('--embed')
+    file_args += options.embed_files
+  if options.exclude_files:
+    file_args.append('--exclude')
+    file_args += options.exclude_files
+  if options.use_preload_cache:
+    file_args.append('--use-preload-cache')
+  if settings.LZ4:
+    file_args.append('--lz4')
+  if options.use_preload_plugins:
+    file_args.append('--use-preload-plugins')
+  if not settings.ENVIRONMENT_MAY_BE_NODE:
+    file_args.append('--no-node')
+  wasmfs_c = settings.WASMFS and options.embed_files
+  if wasmfs_c:
+    file_args += ['--wasmfs-c']
+  file_code = shared.check_call([shared.FILE_PACKAGER, shared.replace_suffix(target, '.data')] + file_args, stdout=PIPE).stdout
+  if wasmfs_c:
+    return file_code
+  else:
+    options.pre_js = js_manipulation.add_files_pre_js(options.pre_js, file_code)
+
+
 @ToolchainProfiler.profile_block('source transforms')
 def phase_source_transforms(options, target):
   global final_js
 
   # Embed and preload files
   if len(options.preload_files) or len(options.embed_files):
-    logger.debug('setting up files')
-    file_args = ['--from-emcc', '--export-name=' + settings.EXPORT_NAME]
-    if len(options.preload_files):
-      file_args.append('--preload')
-      file_args += options.preload_files
-    if len(options.embed_files):
-      file_args.append('--embed')
-      file_args += options.embed_files
-    if len(options.exclude_files):
-      file_args.append('--exclude')
-      file_args += options.exclude_files
-    if options.use_preload_cache:
-      file_args.append('--use-preload-cache')
-    if settings.LZ4:
-      file_args.append('--lz4')
-    if options.use_preload_plugins:
-      file_args.append('--use-preload-plugins')
-    if not settings.ENVIRONMENT_MAY_BE_NODE:
-      file_args.append('--no-node')
-    file_code = shared.check_call([shared.FILE_PACKAGER, shared.replace_suffix(target, '.data')] + file_args, stdout=PIPE).stdout
-    options.pre_js = js_manipulation.add_files_pre_js(options.pre_js, file_code)
+    package_files(options, target)
 
   # Apply pre and postjs files
   if final_js and (options.pre_js or options.post_js):

diff --git a/system/lib/wasmfs/file.h b/system/lib/wasmfs/file.h
@@ -130,6 +130,8 @@ class File : public std::enable_shared_from_this<File> {
   std::weak_ptr<File> parent;
 
   // This specifies which backend a file is associated with.
+  // TODO: Should this be a shared_ptr? Or do we assume backends are never
+  //       deallocated?
   backend_t backend;
 };
 

diff --git a/system/lib/wasmfs/wasmfs.cpp b/system/lib/wasmfs/wasmfs.cpp
@@ -53,12 +53,13 @@ std::shared_ptr<Directory> WasmFS::initRootDirectory() {
   return rootDirectory;
 }
 
-// Initialize files specified by the --preload-file option.
-// Set up directories and files in wasmFS$preloadedDirs and
-// wasmFS$preloadedFiles from JS. This function will be called before any file
-// operation to ensure any preloaded files are eagerly available for use.
-void WasmFS::preloadFiles() {
-  // Debug builds only: add check to ensure preloadFiles() is called once.
+// If files are embedded in the program, then this symbol is defined. We will
+// call it and it will set those files up.
+__attribute__((__weak__))
+extern "C" void __wasmfs_load_embedded();
+
+void WasmFS::loadInitialFiles() {
+  // Debug builds only: add check to ensure loadInitialFiles() is called once.
 #ifndef NDEBUG
   static std::atomic<int> timesCalled;
   timesCalled++;
@@ -71,6 +72,15 @@ void WasmFS::preloadFiles() {
   // Ensure that files are preloaded from the main thread.
   assert(emscripten_is_main_runtime_thread());
 
+  // First, handle embedded files, if there are any.
+  if (__wasmfs_load_embedded) {
+    __wasmfs_load_embedded();
+  }
+
+  // Handle preloaded files.
+  // Set up directories and files in wasmFS$preloadedDirs and
+  // wasmFS$preloadedFiles from JS. This function will be called before any file
+  // operation to ensure any preloaded files are eagerly available for use.
   auto numFiles = _wasmfs_get_num_preloaded_files();
   auto numDirs = _wasmfs_get_num_preloaded_dirs();
 

diff --git a/system/lib/wasmfs/wasmfs.h b/system/lib/wasmfs/wasmfs.h
@@ -33,15 +33,15 @@ class WasmFS {
   // dev/stderr. Refers to the same std streams in the open file table.
   std::shared_ptr<Directory> initRootDirectory();
 
-  // Initialize files specified by --preload-file option.
-  void preloadFiles();
+  // Initialize files specified by --preload-file and --embed-file options.
+  void loadInitialFiles();
 
 public:
   // Files will be preloaded in this constructor.
   // This global constructor has init_priority 100. Please see wasmfs.cpp.
   // The current working directory is initialized to the root directory.
   WasmFS() : rootDirectory(initRootDirectory()), cwd(rootDirectory) {
-    preloadFiles();
+    loadInitialFiles();
   }
 
   // This get method returns a locked file table.

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -528,6 +528,7 @@ def test_wasm32_unknown_emscripten(self):
     # No other configuration is supported, so always run this.
     self.do_runf(test_file('wasm32-unknown-emscripten.c'), '')
 
+  @also_with_wasmfs # tests file embedding
   def test_cube2md5(self):
     self.emcc_args += ['--embed-file', 'cube2md5.txt']
     shutil.copyfile(test_file('cube2md5.txt'), 'cube2md5.txt')
@@ -8953,7 +8954,7 @@ def setUp(self):
 
 simd2 = make_run('simd2', emcc_args=['-O2', '-msimd128'])
 bulkmem2 = make_run('bulkmem2', emcc_args=['-O2', '-mbulk-memory'])
-wasmfs = make_run('wasmfs', emcc_args=['-s', 'WASMFS'])
+wasmfs = make_run('wasmfs', emcc_args=['-s', 'WASMFS', '--profiling'])
 
 # SAFE_HEAP/STACK_OVERFLOW_CHECK
 core2s = make_run('core2s', emcc_args=['-O2'], settings={'SAFE_HEAP': 1})

diff --git a/tools/file_packager.py b/tools/file_packager.py
@@ -21,7 +21,7 @@
 
 Usage:
 
-  file_packager TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--indexedDB-name=EM_PRELOAD_CACHE] [--separate-metadata] [--lz4] [--use-preload-plugins] [--no-node]
+  file_packager TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--indexedDB-name=EM_PRELOAD_CACHE] [--separate-metadata] [--lz4] [--use-preload-plugins] [--no-node] [--wasmfs-c]
 
   --preload  ,
   --embed    See emcc --help for more details on those options.
@@ -51,6 +51,11 @@
 
   --no-node Whether to support Node.js. By default we do, which emits some extra code.
 
+  --wasmfs-c Whether to emit C code for wasmfs. This only supports embedding
+             (as it literally embeds the data in the C). If you prefer
+             preloading, you can use that normally and wasmfs will interact
+             with the JS normally.
+
 Notes:
 
   * The file packager generates unix-style file paths. So if you are on windows and a file is accessed at
@@ -185,6 +190,7 @@ def main():
   lz4 = False
   use_preload_plugins = False
   support_node = True
+  wasmfs_c = False
 
   for arg in sys.argv[2:]:
     if arg == '--preload':
@@ -218,6 +224,9 @@ def main():
     elif arg == '--no-node':
       support_node = False
       leading = ''
+    elif arg == '--wasmfs-c':
+      wasmfs_c = True
+      leading = ''
     elif arg.startswith('--js-output'):
       jsoutput = arg.split('=', 1)[1] if '=' in arg else None
       leading = ''
@@ -380,6 +389,8 @@ def was_seen(name):
 
   metadata = {'files': []}
 
+  c_output = ''
+
   # Set up folders
   partial_dirs = []
   for file_ in data_files:
@@ -392,6 +403,7 @@ def was_seen(name):
         if partial not in partial_dirs:
           code += ('''Module['FS_createPath'](%s, %s, true, true);\n'''
                    % (json.dumps('/' + '/'.join(parts[:i])), json.dumps(parts[i])))
+          c_output += f'mkdir("{partial}", 0700);\n'
           partial_dirs.append(partial)
 
   if has_preloaded:
@@ -475,13 +487,35 @@ def was_seen(name):
     basename = os.path.basename(filename)
     if file_['mode'] == 'embed':
       # Embed
-      data = base64_encode(utils.read_binary(file_['srcpath']))
-      code += '''var fileData%d = '%s';\n''' % (counter, data)
-      code += ('''Module['FS_createDataFile']('%s', '%s', decodeBase64(fileData%d), true, true, false);\n'''
-               % (dirname, basename, counter))
+      binary = utils.read_binary(file_['srcpath'])
+      if not wasmfs_c:
+        # JS output
+        data = base64_encode(binary)
+        code += '''var fileData%d = '%s';\n''' % (counter, data)
+        code += ('''Module['FS_createDataFile']('%s', '%s', decodeBase64(fileData%d), true, true, false);\n'''
+                 % (dirname, basename, counter))
+      else:
+        # C output.
+        # convert the binary data into a C escaped string, \xAB for hex code AB
+        def escape_for_c(char):
+          if char < 16:
+            # add a 0 to keep the output in two bytes
+            return '\\x0' + hex(char)[-1:]
+          return '\\x' + hex(char)[-2:]
+        data = ''.join([escape_for_c(char) for char in binary])
+        # directories... :( make them
+        c_output += f'''static const char fileData{counter}[] = "{data}";\n'''
+        c_output += f'''
+FILE* file{counter} = fopen("{dirname}" "/" "{basename}", "wb");
+fwrite(fileData{counter}, 1, {len(binary)}, file{counter});
+fclose(file{counter});
+'''
+
       counter += 1
     elif file_['mode'] == 'preload':
       # Preload
+      assert not wasmfs_c, 'wasmfs-c mode only supports embedding'
+
       counter += 1
 
       metadata_el = {
@@ -920,6 +954,18 @@ def was_seen(name):
   })();
   ''' % _metadata_template
 
+  if wasmfs_c:
+    ret = r'''
+#include <stdio.h>
+#include <sys/stat.h>
+
+void __wasmfs_load_embedded() {
+
+%s
+
+}
+''' % c_output
+
   if force or len(data_files):
     if jsoutput is None:
       print(ret)