diff --git a/lib/basisu/.gitrepo b/lib/basisu/.gitrepo
index e997a949e8..b55fb5cb1e 100644
--- a/lib/basisu/.gitrepo
+++ b/lib/basisu/.gitrepo
@@ -6,7 +6,7 @@
 [subrepo]
 	remote = https://github.com/BinomialLLC/basis_universal.git
 	branch = master
-	commit = 5337227c5d25ddda17d0510c4703b0f10f2b0b13
+	commit = 646a9f826131cb0b9e14b5e4740874808315f83a
 	parent = 02c43d5762c04d67643ec22831300691b610be4f
 	method = merge
 	cmdver = 0.4.3
diff --git a/lib/basisu/README.md b/lib/basisu/README.md
index 17a76cc512..2ef7a61d2f 100644
--- a/lib/basisu/README.md
+++ b/lib/basisu/README.md
@@ -50,6 +50,8 @@ The encoder optionally uses Zstandard's single source file compressor (in zstd/z
 
 The command line tool used to create, validate, and transcode/unpack .basis/.KTX2 files is named "basisu". Run basisu without any parameters for help. 
 
+The library and command line tool have no other 3rd party dependencies (that are not already in the repo), so it's pretty easy to build.
+
 To build basisu (without SSE 4.1 support - the default):
 
 ```
@@ -86,11 +88,11 @@ To compress a image to a higher quality UASTC .basis file:
 
 To compress a image to a higher quality UASTC .basis file with RDO post processing, so the .basis file is more compressible:
 
-`basisu -uastc -uastc_level 2 -uastc_rdo_q .75 x.png`
+`basisu -uastc -uastc_level 2 -uastc_rdo_l .75 x.png`
 
 -uastc_level X ranges from 0-4 and controls the UASTC encoder's performance vs. quality tradeoff. Level 0 is very fast, but low quality, level 2 is the default quality, while level 3 is the highest practical quality. Level 4 is impractically slow, but highest quality.
 
--uastc_rdo_q X controls the rate distortion stage's quality setting. The lower this value, the higher the quality, but the larger the compressed file size. Good values to try are between .2-3.0. The default is 1.0. RDO post-processing is currently pretty slow, but we'll be optimizing it over time.
+-uastc_rdo_l X controls the rate distortion stage's quality setting. The lower this value, the higher the quality, but the larger the compressed file size. Good values to try are between .2-3.0. The default is 1.0. RDO post-processing is currently pretty slow, but we'll be optimizing it over time.
 
 UASTC texture video is supported and has been tested. In RDO mode with 7zip LZMA, we've seen average bitrates between 1-2 bpp. ETC1S mode is recommended for texture video, which gets bitrates around .25-.3 bpp.
 
@@ -214,6 +216,69 @@ Both the transcoder and now the compressor (as of 12/17/2020) may be compiled us
 
 To enable compression support compile the JavaScript wrappers in `webgl/transcoding/basis_wrappers.cpp` with `BASISU_SUPPORT_ENCODING` set to 1. See the webgl/encoding directory. 
 
+### Low-level C++ encoder API
+
+You can call the encoder directly, instead of using the command line tool. We'll be adding documentation and some examples by the end of the year. For now, some important notes:
+
+First, ALWAYS call ```basisu::basisu_encoder_init()``` to initialize the library. Otherwise, you'll get undefined behavior or black textures.
+
+Create a job pool, fill in the ```basis_compress_params``` struct, then call ```basisu::basis_compressor::init()```, then ```basisu::basis_compressor::process()```. Like this for UASTC:
+
+```
+bool test()
+{
+	basisu_encoder_init();
+
+	image img;
+	if (!load_image("test.png", img))
+	{
+		printf("Can't load image\n");
+		return false;
+	}
+
+	basis_compressor_params basisCompressorParams;
+
+	basisCompressorParams.m_source_images.push_back(img);
+	basisCompressorParams.m_perceptual = false;
+	basisCompressorParams.m_mip_srgb = false;
+
+	basisCompressorParams.m_write_output_basis_files = true;
+	basisCompressorParams.m_out_filename = "test.basis";
+
+	basisCompressorParams.m_uastc = true;
+	basisCompressorParams.m_rdo_uastc_multithreading = false;
+	basisCompressorParams.m_multithreading = false;
+	basisCompressorParams.m_debug = true;
+	basisCompressorParams.m_status_output = true;
+	basisCompressorParams.m_compute_stats = true;
+	
+	basisu::job_pool jpool(1);
+	basisCompressorParams.m_pJob_pool = &jpool;
+
+	basisu::basis_compressor basisCompressor;
+	basisu::enable_debug_printf(true);
+
+	bool ok = basisCompressor.init(basisCompressorParams);
+	if (ok)
+	{
+		basisu::basis_compressor::error_code result = basisCompressor.process();
+
+		if (result == basisu::basis_compressor::cECSuccess)
+			printf("Success\n");
+		else
+		{
+			printf("Failure\n");
+			ok = false;
+		}
+	}
+	else
+		printf("Failure\n");
+	return ok;
+}
+```
+
+The command line tool uses this API too, so you can always look at that to see what it does given a set of command line options.
+
 ### Repository Licensing with REUSE
 
 The repository has been updated to be compliant with the REUSE licenese
diff --git a/lib/basisu/basisu_tool.cpp b/lib/basisu/basisu_tool.cpp
index 2ab011518a..24479432a2 100644
--- a/lib/basisu/basisu_tool.cpp
+++ b/lib/basisu/basisu_tool.cpp
@@ -127,6 +127,7 @@ static void print_usage()
 		" -disable_hierarchical_endpoint_codebooks: Disable hierarchical endpoint codebook usage, slower but higher quality on some compression levels\n"
 		" -compare_ssim: Compute and display SSIM of image comparison (slow)\n"
 		" -bench: UASTC benchmark mode, for development only\n"
+		" -resample X Y: Resample all input textures to XxY pixels using a box filter\n"
 		" -resample_factor X: Resample all input textures by scale factor X using a box filter\n"
 		" -no_sse: Forbid all SSE instruction set usage\n"
 		" -validate_etc1s: Validate internal ETC1S compressor's data structures during compression (slower, intended for development).\n"
@@ -392,6 +393,13 @@ class command_line_params
 				
 				arg_count++;
 			}
+			else if (strcasecmp(pArg, "-resample") == 0)
+			{
+				REMAINING_ARGS_CHECK(2);
+				m_comp_params.m_resample_width = atoi(arg_v[arg_index + 1]);
+				m_comp_params.m_resample_height = atoi(arg_v[arg_index + 2]);
+				arg_count += 2;
+			}
 			else if (strcasecmp(pArg, "-resample_factor") == 0)
 			{
 				REMAINING_ARGS_CHECK(1);
diff --git a/lib/basisu/contrib/single_file_transcoder/basisu_transcoder-in.cpp b/lib/basisu/contrib/single_file_transcoder/basisu_transcoder-in.cpp
index 19d358951c..830cd35c99 100644
--- a/lib/basisu/contrib/single_file_transcoder/basisu_transcoder-in.cpp
+++ b/lib/basisu/contrib/single_file_transcoder/basisu_transcoder-in.cpp
@@ -1,17 +1,14 @@
 /**
  * Basis Universal single file library. Generated using:
  * \code
- *	./combine.sh -r ../../transcoder -x basisu_transcoder_tables_bc7_m6.inc -o basisu_transcoder.cpp basisu_transcoder-in.cpp
+ *	./combine.sh -r ../../transcoder -o basisu_transcoder.cpp basisu_transcoder-in.cpp
  * \endcode
- * 
- * \note The script above excludes the BC7 mode 6 tables, a choice reflected in
- * the build options.
  */
 
 /*
  * Transcoder build options for known platforms (iOS has ETC, ASTC and PVRTC;
  * Emscripten adds DXT to iOS's options; Android adds PVRTC2 to Emscripten's
- * options; other platforms build all except BC7 mode 6 and FXT1).
+ * options; other platforms build all except FXT1).
  * 
  * See https://github.com/BinomialLLC/basis_universal#shrinking-the-transcoders-compiled-size
  */
@@ -28,11 +25,14 @@
 	#ifndef __ANDROID__
 		#define BASISD_SUPPORT_PVRTC2 0
 	#endif
-#else
-	#define BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY 0
 #endif
 #define BASISD_SUPPORT_FXT1 0
 
+/*
+ * KTX2 support disabled.
+ */
+#define BASISD_SUPPORT_KTX2 0
+
 #include "basisu_transcoder.cpp"
 
 /**
diff --git a/lib/basisu/contrib/single_file_transcoder/create_transcoder.sh b/lib/basisu/contrib/single_file_transcoder/create_transcoder.sh
index 160f5fda1c..a041d2ada3 100755
--- a/lib/basisu/contrib/single_file_transcoder/create_transcoder.sh
+++ b/lib/basisu/contrib/single_file_transcoder/create_transcoder.sh
@@ -4,8 +4,7 @@
 OUT_FILE="tempbin"
 
 echo "Amalgamating files... this can take a while"
-echo "Note: basisu_transcoder_tables_bc7_m6.inc is excluded"
-./combine.sh -r ../../transcoder -x basisu_transcoder_tables_bc7_m6.inc -o basisu_transcoder.cpp basisu_transcoder-in.cpp
+./combine.sh -r ../../transcoder -o basisu_transcoder.cpp basisu_transcoder-in.cpp
 # Did combining work?
 if [ $? -ne 0 ]; then
   echo "Combine script: FAILED"
diff --git a/lib/basisu/contrib/single_file_transcoder/examples/emscripten.cpp b/lib/basisu/contrib/single_file_transcoder/examples/emscripten.cpp
index 740bd26874..e56ae6af49 100644
--- a/lib/basisu/contrib/single_file_transcoder/examples/emscripten.cpp
+++ b/lib/basisu/contrib/single_file_transcoder/examples/emscripten.cpp
@@ -5,8 +5,8 @@
  * \n
  * Compile using:
  * \code
- *	export CC_FLAGS="-std=c++11 -Wall -Wextra -Werror -Os -g0 -flto --llvm-lto 3 -fno-exceptions -fno-rtti -lGL -DNDEBUG=1"
- *	export EM_FLAGS="-s ENVIRONMENT=web -s WASM=1 --shell-file shell.html --closure 1"
+ *	export "CC_FLAGS=-std=c++11 -Wall -Wextra -Werror -Os -g0 -flto --llvm-lto 3 -fno-exceptions -fno-rtti -lGL -DNDEBUG=1"
+ *	export "EM_FLAGS=-s ENVIRONMENT=web -s WASM=1 --shell-file shell.html --closure 1"
  *	emcc $CC_FLAGS $EM_FLAGS -o out.html emscripten.cpp
  * \endcode
  * Alternatively include \c basisu_transcoder.h and compile \c
diff --git a/lib/basisu/encoder/apg_bmp.c b/lib/basisu/encoder/apg_bmp.c
index ef3d015e40..d342b20fc8 100644
--- a/lib/basisu/encoder/apg_bmp.c
+++ b/lib/basisu/encoder/apg_bmp.c
@@ -247,7 +247,7 @@ unsigned char* apg_bmp_read( const char* filename, int* w, int* h, unsigned int*
   }
 
   // allocate memory for the output pixels block. cast to size_t in case width and height are both the max of 65536 and n_dst_chans > 1
-  unsigned char* dst_img_ptr = malloc( (size_t)width * (size_t)height * (size_t)n_dst_chans );
+  unsigned char* dst_img_ptr = (unsigned char*)malloc( (size_t)width * (size_t)height * (size_t)n_dst_chans );
   if ( !dst_img_ptr ) {
     free( record.data );
     return NULL;
@@ -480,7 +480,7 @@ unsigned int apg_bmp_write( const char* filename, unsigned char* pixels_ptr, int
     dib_hdr.bitmask_b = 0x0000FF00;
   }
 
-  uint8_t* dst_pixels_ptr = malloc( dst_pixels_padded_sz );
+  uint8_t* dst_pixels_ptr = (uint8_t*)malloc( dst_pixels_padded_sz );
   if ( !dst_pixels_ptr ) { return 0; }
   {
     size_t dst_byte_idx = 0;
diff --git a/lib/basisu/encoder/basisu_bc7enc.cpp b/lib/basisu/encoder/basisu_bc7enc.cpp
index 06aa7eb8b1..22fdfa603f 100644
--- a/lib/basisu/encoder/basisu_bc7enc.cpp
+++ b/lib/basisu/encoder/basisu_bc7enc.cpp
@@ -174,9 +174,8 @@ static void astc_init()
 	} // range
 }
 
-static inline uint32_t astc_interpolate(uint32_t l, uint32_t h, uint32_t w)
+static inline uint32_t astc_interpolate_linear(uint32_t l, uint32_t h, uint32_t w)
 {
-	// This is for linear values, not sRGB.
 	l = (l << 8) | l;
 	h = (h << 8) | h;
 	uint32_t k = (l * (64 - w) + h * w + 32) >> 6;
@@ -230,7 +229,7 @@ void bc7enc_compress_block_init()
 			{
 				uint32_t high = (h << 4) | h;
 				
-				const int k = astc_interpolate(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]);
+				const int k = astc_interpolate_linear(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]);
 				const int err = (k - c) * (k - c);
 
 				if (err < best.m_error)
@@ -259,7 +258,7 @@ void bc7enc_compress_block_init()
 			{
 				uint32_t high = (h << 4) | h;
 				
-				const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]);
+				const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]);
 				const int err = (k - c) * (k - c);
 
 				if (err < best.m_error)
@@ -288,7 +287,7 @@ void bc7enc_compress_block_init()
 			{
 				uint32_t high = g_astc_sorted_order_unquant[7][h].m_unquant;
 				
-				const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]);
+				const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]);
 				const int err = (k - c) * (k - c);
 
 				if (err < best.m_error)
@@ -317,7 +316,7 @@ void bc7enc_compress_block_init()
 			{
 				uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant;
 				
-				const int k = astc_interpolate(low, high, g_astc_weights4[BC7ENC_ASTC_RANGE13_4BIT_OPTIMAL_INDEX]);
+				const int k = astc_interpolate_linear(low, high, g_astc_weights4[BC7ENC_ASTC_RANGE13_4BIT_OPTIMAL_INDEX]);
 				const int err = (k - c) * (k - c);
 
 				if (err < best.m_error)
@@ -346,7 +345,7 @@ void bc7enc_compress_block_init()
 			{
 				uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant;
 				
-				const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]);
+				const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]);
 				const int err = (k - c) * (k - c);
 
 				if (err < best.m_error)
@@ -375,7 +374,7 @@ void bc7enc_compress_block_init()
 			{
 				uint32_t high = g_astc_sorted_order_unquant[11][h].m_unquant;
 
-				const int k = astc_interpolate(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]);
+				const int k = astc_interpolate_linear(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]);
 				const int err = (k - c) * (k - c);
 
 				if (err < best.m_error)
@@ -650,7 +649,7 @@ static uint64_t pack_astc_4bit_3bit_to_one_color(const color_cell_compressor_par
 		uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i];
 		uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i];
 		
-		p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]);
+		p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]);
 	}
 	p.m_c[3] = 255;
 
@@ -689,7 +688,7 @@ static uint64_t pack_astc_4bit_2bit_to_one_color_rgba(const color_cell_compresso
 		uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i];
 		uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i];
 		
-		p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]);
+		p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]);
 	}
 	
 	uint64_t total_err = 0;
@@ -728,7 +727,7 @@ static uint64_t pack_astc_range7_2bit_to_one_color(const color_cell_compressor_p
 		uint32_t low = g_astc_sorted_order_unquant[7][pResults->m_low_endpoint.m_c[i]].m_unquant;
 		uint32_t high = g_astc_sorted_order_unquant[7][pResults->m_high_endpoint.m_c[i]].m_unquant;
 		
-		p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]);
+		p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]);
 	}
 	p.m_c[3] = 255;
 
@@ -768,7 +767,7 @@ static uint64_t pack_astc_range13_2bit_to_one_color(const color_cell_compressor_
 		uint32_t low = g_astc_sorted_order_unquant[13][pResults->m_low_endpoint.m_c[i]].m_unquant;
 		uint32_t high = g_astc_sorted_order_unquant[13][pResults->m_high_endpoint.m_c[i]].m_unquant;
 		
-		p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]);
+		p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]);
 	}
 	
 	uint64_t total_err = 0;
@@ -807,7 +806,7 @@ static uint64_t pack_astc_range11_5bit_to_one_color(const color_cell_compressor_
 		uint32_t low = g_astc_sorted_order_unquant[11][pResults->m_low_endpoint.m_c[i]].m_unquant;
 		uint32_t high = g_astc_sorted_order_unquant[11][pResults->m_high_endpoint.m_c[i]].m_unquant;
 
-		p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]);
+		p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]);
 	}
 
 	uint64_t total_err = 0;
@@ -863,7 +862,7 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8
 		for (uint32_t i = 1; i < (N - 1); i++)
 		{
 			for (uint32_t j = 0; j < nc; j++)
-				weightedColors[i].m_c[j] = (uint8_t)(astc_interpolate(actualMinColor.m_c[j], actualMaxColor.m_c[j], pParams->m_pSelector_weights[i]));
+				weightedColors[i].m_c[j] = (uint8_t)(astc_interpolate_linear(actualMinColor.m_c[j], actualMaxColor.m_c[j], pParams->m_pSelector_weights[i]));
 		}
 	}
 	else
@@ -1300,7 +1299,7 @@ void check_best_overall_error(const color_cell_compressor_params *pParams, color
 	
 	for (uint32_t i = 1; i < pParams->m_num_selector_weights - 1; i++)
 		for (uint32_t c = 0; c < 4; c++)
-			colors[i].m_c[c] = (uint8_t)astc_interpolate(colors[0].m_c[c], colors[n - 1].m_c[c], pParams->m_pSelector_weights[i]);
+			colors[i].m_c[c] = (uint8_t)astc_interpolate_linear(colors[0].m_c[c], colors[n - 1].m_c[c], pParams->m_pSelector_weights[i]);
 
 	uint64_t total_err = 0;
 	for (uint32_t p = 0; p < pParams->m_num_pixels; p++)
@@ -1815,10 +1814,10 @@ uint64_t color_cell_compression_est_astc(
 	weightedColors[num_weights - 1] = highColor;
 	for (uint32_t i = 1; i < (num_weights - 1); i++)
 	{
-		weightedColors[i].m_c[0] = (uint8_t)astc_interpolate(lowColor.m_c[0], highColor.m_c[0], pWeight_table[i]);
-		weightedColors[i].m_c[1] = (uint8_t)astc_interpolate(lowColor.m_c[1], highColor.m_c[1], pWeight_table[i]);
-		weightedColors[i].m_c[2] = (uint8_t)astc_interpolate(lowColor.m_c[2], highColor.m_c[2], pWeight_table[i]);
-		weightedColors[i].m_c[3] = (num_comps == 4) ? (uint8_t)astc_interpolate(lowColor.m_c[3], highColor.m_c[3], pWeight_table[i]) : 255;
+		weightedColors[i].m_c[0] = (uint8_t)astc_interpolate_linear(lowColor.m_c[0], highColor.m_c[0], pWeight_table[i]);
+		weightedColors[i].m_c[1] = (uint8_t)astc_interpolate_linear(lowColor.m_c[1], highColor.m_c[1], pWeight_table[i]);
+		weightedColors[i].m_c[2] = (uint8_t)astc_interpolate_linear(lowColor.m_c[2], highColor.m_c[2], pWeight_table[i]);
+		weightedColors[i].m_c[3] = (num_comps == 4) ? (uint8_t)astc_interpolate_linear(lowColor.m_c[3], highColor.m_c[3], pWeight_table[i]) : 255;
 	}
 
 	// Compute dots and thresholds
diff --git a/lib/basisu/encoder/basisu_bc7enc.h b/lib/basisu/encoder/basisu_bc7enc.h
index 23469912e2..8d8b7888ca 100644
--- a/lib/basisu/encoder/basisu_bc7enc.h
+++ b/lib/basisu/encoder/basisu_bc7enc.h
@@ -12,6 +12,7 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#pragma once
 #include "basisu_enc.h"
 #include "../transcoder/basisu_transcoder_uastc.h"
 
diff --git a/lib/basisu/encoder/basisu_comp.cpp b/lib/basisu/encoder/basisu_comp.cpp
index 9a4a1c00be..10f96cec4a 100644
--- a/lib/basisu/encoder/basisu_comp.cpp
+++ b/lib/basisu/encoder/basisu_comp.cpp
@@ -145,6 +145,8 @@ namespace basisu
 			PRINT_BOOL_VALUE(m_rdo_uastc_favor_simpler_modes_in_rdo_mode)
 			PRINT_BOOL_VALUE(m_rdo_uastc_multithreading);
 
+			PRINT_INT_VALUE(m_resample_width);
+			PRINT_INT_VALUE(m_resample_height);
 			PRINT_FLOAT_VALUE(m_resample_factor);
 			debug_printf("Has global codebooks: %u\n", m_params.m_pGlobal_codebooks ? 1 : 0);
 			if (m_params.m_pGlobal_codebooks)
@@ -465,7 +467,10 @@ namespace basisu
 					return false;
 				}
 
-				printf("Read source image \"%s\", %ux%u\n", pSource_filename, file_image.get_width(), file_image.get_height());
+				if (m_params.m_status_output)
+				{
+					printf("Read source image \"%s\", %ux%u\n", pSource_filename, file_image.get_width(), file_image.get_height());
+				}
 
 				// Optionally load another image and put a grayscale version of it into the alpha channel.
 				if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size()))
@@ -541,7 +546,19 @@ namespace basisu
 			file_image.resize(64, 64);
 #endif
 
-			if (m_params.m_resample_factor > 0.0f)
+			if (m_params.m_resample_width > 0 && m_params.m_resample_height > 0)
+			{
+				int new_width = basisu::minimum<int>(m_params.m_resample_width, BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);
+				int new_height = basisu::minimum<int>(m_params.m_resample_height, BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);
+
+				debug_printf("Resampling to %ix%i\n", new_width, new_height);
+
+				// TODO: A box filter - kaiser looks too sharp on video. Let the caller control this.
+				image temp_img(new_width, new_height);
+				image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser");
+				temp_img.swap(file_image);
+			}
+			else if (m_params.m_resample_factor > 0.0f)
 			{
 				int new_width = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image.get_width() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);
 				int new_height = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image.get_height() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);
@@ -1413,7 +1430,10 @@ namespace basisu
 				return false;
 			}
 
-			printf("Wrote output .basis/.ktx2 file \"%s\"\n", output_filename.c_str());
+			if (m_params.m_status_output)
+			{
+				printf("Wrote output .basis/.ktx2 file \"%s\"\n", output_filename.c_str());
+			}
 		}
 
 		size_t comp_size = 0;
diff --git a/lib/basisu/encoder/basisu_comp.h b/lib/basisu/encoder/basisu_comp.h
index 748b872cb8..2c3af968f7 100644
--- a/lib/basisu/encoder/basisu_comp.h
+++ b/lib/basisu/encoder/basisu_comp.h
@@ -222,6 +222,8 @@ namespace basisu
 			m_rdo_uastc_smooth_block_max_std_dev(UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV, .01f, 65536.0f),
 			m_rdo_uastc_max_allowed_rms_increase_ratio(UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO, .01f, 100.0f),
 			m_rdo_uastc_skip_block_rms_thresh(UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH, .01f, 100.0f),
+			m_resample_width(0, 1, 16384),
+			m_resample_height(0, 1, 16384),
 			m_resample_factor(0.0f, .00125f, 100.0f),
 			m_ktx2_uastc_supercompression(basist::KTX2_SS_NONE),
 			m_ktx2_zstd_supercompression_level(6, INT_MIN, INT_MAX),
@@ -305,6 +307,8 @@ namespace basisu
 			m_rdo_uastc_favor_simpler_modes_in_rdo_mode.clear();
 			m_rdo_uastc_multithreading.clear();
 
+			m_resample_width.clear();
+			m_resample_height.clear();
 			m_resample_factor.clear();
 
 			m_pGlobal_codebooks = nullptr;
@@ -437,6 +441,8 @@ namespace basisu
 		bool_param<true> m_rdo_uastc_favor_simpler_modes_in_rdo_mode;
 		bool_param<true> m_rdo_uastc_multithreading;
 
+		param<int> m_resample_width;
+		param<int> m_resample_height;
 		param<float> m_resample_factor;
 		const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks;
 
diff --git a/lib/basisu/encoder/basisu_enc.cpp b/lib/basisu/encoder/basisu_enc.cpp
index f02fb62c11..daaf65badc 100644
--- a/lib/basisu/encoder/basisu_enc.cpp
+++ b/lib/basisu/encoder/basisu_enc.cpp
@@ -195,7 +195,7 @@ namespace basisu
 	{
 		QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER*>(pTicks));
 	}
-#elif defined(__APPLE__)
+#elif defined(__APPLE__) || defined(__OpenBSD__)
 #include <sys/time.h>
 	inline void query_counter(timer_ticks* pTicks)
 	{
@@ -1779,8 +1779,6 @@ namespace basisu
 			return nullptr;
 		}
 
-		const uint32_t bytes_per_line = hdr.m_width * tga_bytes_per_pixel;
-
 		const uint8_t *pSrc = pBuf + sizeof(tga_header);
 		uint32_t bytes_remaining = buf_size - sizeof(tga_header);
 
diff --git a/lib/basisu/encoder/basisu_enc.h b/lib/basisu/encoder/basisu_enc.h
index 05c95cbc3b..0ce011452d 100644
--- a/lib/basisu/encoder/basisu_enc.h
+++ b/lib/basisu/encoder/basisu_enc.h
@@ -1634,6 +1634,14 @@ namespace basisu
 
 				if ((!l_weight) || (!r_weight))
 				{
+					l_children.resize(0);
+					new_l_child.set(0.0f);
+					l_ttsum = 0.0f;
+					l_weight = 0;
+					r_children.resize(0);
+					new_r_child.set(0.0f);
+					r_ttsum = 0.0f;
+					r_weight = 0;
 					TrainingVectorType firstVec;
 					for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
 					{
@@ -1660,7 +1668,7 @@ namespace basisu
 						}
 					}
 
-					if (!l_weight)
+					if ((!l_weight) || (!r_weight))
 						return false;
 				}
 
diff --git a/lib/basisu/encoder/basisu_resampler.cpp b/lib/basisu/encoder/basisu_resampler.cpp
index e193ce83ff..f4cedf0031 100644
--- a/lib/basisu/encoder/basisu_resampler.cpp
+++ b/lib/basisu/encoder/basisu_resampler.cpp
@@ -15,14 +15,6 @@
 #include "basisu_resampler.h"
 #include "basisu_resampler_filters.h"
 
-#ifndef max
-#define max(a, b) (((a) > (b)) ? (a) : (b))
-#endif
-
-#ifndef min
-#define min(a, b) (((a) < (b)) ? (a) : (b))
-#endif
-
 #define RESAMPLER_DEBUG 0
 
 namespace basisu
diff --git a/lib/basisu/encoder/cppspmd_sse.h b/lib/basisu/encoder/cppspmd_sse.h
index b39cb82a5f..9a97eeb695 100644
--- a/lib/basisu/encoder/cppspmd_sse.h
+++ b/lib/basisu/encoder/cppspmd_sse.h
@@ -1327,33 +1327,15 @@ struct spmd_kernel
 	CPPSPMD_FORCE_INLINE float reduce_add(vfloat v)	
 	{ 
 		__m128 k3210 = _mm_castsi128_ps(blendv_mask_epi32(_mm_setzero_si128(), _mm_castps_si128(v.m_value), m_exec.m_mask));
-
-//#if CPPSPMD_SSE2
-#if 1
-		// See https://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-sse-vector-sum-or-other-reduction/35270026#35270026
-		__m128 shuf   = _mm_shuffle_ps(k3210, k3210, _MM_SHUFFLE(2, 3, 0, 1));
-		__m128 sums   = _mm_add_ps(k3210, shuf);
-		shuf          = _mm_movehl_ps(shuf, sums);
-		sums          = _mm_add_ss(sums, shuf);
-		return _mm_cvtss_f32(sums);
-#else
-		// This is pretty slow.
-		__m128 a = _mm_hadd_ps(k3210, k3210);
-		__m128 b = _mm_hadd_ps(a, a);
-		return extractf_ps_x(b);
-#endif
+		__m128 temp = _mm_add_ps(_mm_shuffle_ps(k3210, k3210, _MM_SHUFFLE(0, 1, 2, 3)), k3210);
+		return _mm_cvtss_f32(_mm_add_ss(_mm_movehl_ps(temp, temp), temp));
 	}
-
+		
 	CPPSPMD_FORCE_INLINE int reduce_add(vint v)
 	{
 		__m128i k3210 = blendv_mask_epi32(_mm_setzero_si128(), v.m_value, m_exec.m_mask);
-
-		// See https://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-sse-vector-sum-or-other-reduction/35270026#35270026
-		__m128i shuf = _mm_shuffle_epi32(k3210, _MM_SHUFFLE(2, 3, 0, 1));
-		__m128i sums = _mm_add_epi32(k3210, shuf);
-		shuf = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(shuf), _mm_castsi128_ps(sums)));
-		sums = _mm_add_epi32(sums, shuf);
-		return extract_x(sums);
+		__m128i temp = _mm_add_epi32(_mm_shuffle_epi32(k3210, _MM_SHUFFLE(0, 1, 2, 3)), k3210);
+		return extract_x(_mm_add_epi32(_mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(temp), _mm_castsi128_ps(temp))), temp));
 	}
 
 	#include "cppspmd_math_declares.h"
@@ -1686,6 +1668,12 @@ CPPSPMD_FORCE_INLINE vint uniform_shift_right_epi16(const vint& a, const vint& b
 CPPSPMD_FORCE_INLINE vint undefined_vint() { return vint{ _mm_undefined_si128() }; }
 CPPSPMD_FORCE_INLINE vfloat undefined_vfloat() { return vfloat{ _mm_undefined_ps() }; }
 
+CPPSPMD_FORCE_INLINE vint vint_lane_set(int v0, int v1, int v2, int v3) { return vint{ _mm_set_epi32(v3, v2, v1, v0) }; }
+CPPSPMD_FORCE_INLINE vfloat vfloat_lane_set(float v0, float v1, float v2, float v3) { return vfloat{ _mm_set_ps(v3, v2, v1, v0) }; }
+
+CPPSPMD_FORCE_INLINE vint vint_lane_set_r(int v3, int v2, int v1, int v0) { return vint{ _mm_set_epi32(v3, v2, v1, v0) }; }
+CPPSPMD_FORCE_INLINE vfloat vfloat_lane_set_r(float v3, float v2, float v1, float v0) { return vfloat{ _mm_set_ps(v3, v2, v1, v0) }; }
+
 // control is an 8-bit immediate value containing 4 2-bit indices which shuffles the int32's in each 128-bit lane.
 #define VINT_LANE_SHUFFLE_EPI32(a, control) vint(_mm_shuffle_epi32((a).m_value, control))
 
diff --git a/lib/basisu/transcoder/basisu_transcoder.cpp b/lib/basisu/transcoder/basisu_transcoder.cpp
index 29eb3c0d55..0b3733385d 100644
--- a/lib/basisu/transcoder/basisu_transcoder.cpp
+++ b/lib/basisu/transcoder/basisu_transcoder.cpp
@@ -10778,8 +10778,6 @@ namespace basist
 			return false;
 		}
 
-		const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
-
 		if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2)
 		{
 			BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n");
@@ -17336,7 +17334,6 @@ namespace basist
 		
 	bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data)
 	{
-		const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData;
 		const uint64_t comp_size = m_levels[level_index].m_byte_length;
 		
 		const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length;
@@ -17361,6 +17358,7 @@ namespace basist
 		if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
 		{
 #if BASISD_SUPPORT_KTX2_ZSTD
+			const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData;
 			size_t actualUncompSize = ZSTD_decompress(uncomp_data.data(), (size_t)uncomp_size, pComp_data, (size_t)comp_size);
 			if (ZSTD_isError(actualUncompSize))
 			{