Update wavpack library source

8 years ago · 168fc5be6d
parent e0f36dac15
commit 168fc5be6d
48 changed files with 25302 additions and 9530 deletions
--- a/third_party/wavpack/include/Makefile.am
+++ b/third_party/wavpack/include/Makefile.am
@ -0,0 +1,5 @@
+wpinclude_HEADERS = wavpack.h
+wpincludedir = $(prefix)/include/wavpack
+
+MAINTAINERCLEANFILES = \
+	Makefile.in
--- a/third_party/wavpack/include/wavpack.h
+++ b/third_party/wavpack/include/wavpack.h
@ -1,7 +1,7 @@
 ////////////////////////////////////////////////////////////////////////////
 //                           **** WAVPACK ****                            //
 //                  Hybrid Lossless Wavefile Compressor                   //
-//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//                Copyright (c) 1998 - 2016 David Bryant.                 //
 //                          All Rights Reserved.                          //
 //      Distributed under the BSD Software License (see license.txt)      //
 ////////////////////////////////////////////////////////////////////////////
@ -16,10 +16,17 @@

 #include <sys/types.h>

-#if defined(_WIN32) && !defined(__MINGW32__)
-#include <stdint.h>
+#if defined(_MSC_VER) && _MSC_VER < 1600
+typedef unsigned __int64 uint64_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int8 uint8_t;
+typedef __int64 int64_t;
+typedef __int32 int32_t;
+typedef __int16 int16_t;
+typedef __int8  int8_t;
 #else
-#include <inttypes.h>
+#include <stdint.h>
 #endif

 // RIFF / wav header formats (these occur at the beginning of both wav files
@ -41,12 +48,12 @@ typedef struct {
 #define ChunkHeaderFormat "4L"

 typedef struct {
-    unsigned short FormatTag, NumChannels;
+    uint16_t FormatTag, NumChannels;
    uint32_t SampleRate, BytesPerSecond;
-    unsigned short BlockAlign, BitsPerSample;
-    unsigned short cbSize, ValidBitsPerSample;
+    uint16_t BlockAlign, BitsPerSample;
+    uint16_t cbSize, ValidBitsPerSample;
    int32_t ChannelMask;
-    unsigned short SubFormat;
+    uint16_t SubFormat;
    char GUID [14];
 } WaveHeader;

@ -62,13 +69,43 @@ typedef struct {
 typedef struct {
    char ckID [4];
    uint32_t ckSize;
-    short version;
-    unsigned char track_no, index_no;
+    int16_t version;
+    unsigned char block_index_u8;
+    unsigned char total_samples_u8;
    uint32_t total_samples, block_index, block_samples, flags, crc;
 } WavpackHeader;

 #define WavpackHeaderFormat "4LS2LLLLL"

+// Macros to access the 40-bit block_index field
+
+#define GET_BLOCK_INDEX(hdr) ( (int64_t) (hdr).block_index + ((int64_t) (hdr).block_index_u8 << 32) )
+
+#define SET_BLOCK_INDEX(hdr,value) do { \
+    int64_t tmp = (value);              \
+    (hdr).block_index = (uint32_t) tmp; \
+    (hdr).block_index_u8 =              \
+        (unsigned char) (tmp >> 32);    \
+} while (0)
+
+// Macros to access the 40-bit total_samples field, which is complicated by the fact that
+// all 1's in the lower 32 bits indicates "unknown" (regardless of upper 8 bits)
+
+#define GET_TOTAL_SAMPLES(hdr) ( ((hdr).total_samples == (uint32_t) -1) ? -1 : \
+    (int64_t) (hdr).total_samples + ((int64_t) (hdr).total_samples_u8 << 32) - (hdr).total_samples_u8 )
+
+#define SET_TOTAL_SAMPLES(hdr,value) do {       \
+    int64_t tmp = (value);                      \
+    if (tmp < 0)                                \
+        (hdr).total_samples = (uint32_t) -1;    \
+    else {                                      \
+        tmp += (tmp / 0xffffffffLL);            \
+        (hdr).total_samples = (uint32_t) tmp;   \
+        (hdr).total_samples_u8 =                \
+            (unsigned char) (tmp >> 32);        \
+    }                                           \
+} while (0)
+
 // or-values for WavpackHeader.flags
 #define BYTES_STORED    3       // 1-4 bytes/sample
 #define MONO_FLAG       4       // not stereo
@ -95,17 +132,19 @@ typedef struct {
 #define SRATE_MASK      (0xfL << SRATE_LSB)

 #define FALSE_STEREO    0x40000000      // block is stereo, but data is mono
-
-#define IGNORED_FLAGS   0x18000000      // reserved, but ignore if encountered
 #define NEW_SHAPING     0x20000000      // use IIR filter for negative shaping
-#define UNKNOWN_FLAGS   0x80000000      // also reserved, but refuse decode if
-                                        //  encountered

 #define MONO_DATA (MONO_FLAG | FALSE_STEREO)

+// Introduced in WavPack 5.0:
+#define HAS_CHECKSUM    0x10000000      // block contains a trailing checksum
+#define DSD_FLAG        0x80000000      // block is encoded DSD (1-bit PCM)
+
+#define IGNORED_FLAGS   0x08000000      // reserved, but ignore if encountered
+#define UNKNOWN_FLAGS   0x00000000      // we no longer have any of these spares
+
 #define MIN_STREAM_VERS     0x402       // lowest stream version we'll decode
 #define MAX_STREAM_VERS     0x410       // highest stream version we'll decode or encode
-#define CUR_STREAM_VERS     0x407       // stream version we are writing now

 // These are the mask bit definitions for the metadata chunk id byte (see format.txt)

@ -131,11 +170,15 @@ typedef struct {

 #define ID_RIFF_HEADER          (ID_OPTIONAL_DATA | 0x1)
 #define ID_RIFF_TRAILER         (ID_OPTIONAL_DATA | 0x2)
-#define ID_REPLAY_GAIN          (ID_OPTIONAL_DATA | 0x3)    // never used (APEv2)
-#define ID_CUESHEET             (ID_OPTIONAL_DATA | 0x4)    // never used (APEv2)
+#define ID_ALT_HEADER           (ID_OPTIONAL_DATA | 0x3)
+#define ID_ALT_TRAILER          (ID_OPTIONAL_DATA | 0x4)
 #define ID_CONFIG_BLOCK         (ID_OPTIONAL_DATA | 0x5)
 #define ID_MD5_CHECKSUM         (ID_OPTIONAL_DATA | 0x6)
 #define ID_SAMPLE_RATE          (ID_OPTIONAL_DATA | 0x7)
+#define ID_ALT_EXTENSION        (ID_OPTIONAL_DATA | 0x8)
+#define ID_ALT_MD5_CHECKSUM     (ID_OPTIONAL_DATA | 0x9)
+#define ID_NEW_CONFIG_BLOCK     (ID_OPTIONAL_DATA | 0xa)
+#define ID_BLOCK_CHECKSUM       (ID_OPTIONAL_DATA | 0xf)

 ///////////////////////// WavPack Configuration ///////////////////////////////

@ -149,12 +192,13 @@ typedef struct {
    int qmode, flags, xmode, num_channels, float_norm_exp;
    int32_t block_samples, extra_flags, sample_rate, channel_mask;
    unsigned char md5_checksum [16], md5_read;
-    int num_tag_strings;
-    char **tag_strings;
+    int num_tag_strings;                // this field is not used
+    char **tag_strings;                 // this field is not used
 } WavpackConfig;

 #define CONFIG_HYBRID_FLAG      8       // hybrid mode
 #define CONFIG_JOINT_STEREO     0x10    // joint stereo
+#define CONFIG_CROSS_DECORR     0x20    // no-delay cross decorrelation
 #define CONFIG_HYBRID_SHAPE     0x40    // noise shape (hybrid mode only)
 #define CONFIG_FAST_FLAG        0x200   // fast mode
 #define CONFIG_HIGH_FLAG        0x800   // high quality mode
@ -166,6 +210,7 @@ typedef struct {
 #define CONFIG_CREATE_EXE       0x40000 // create executable
 #define CONFIG_CREATE_WVC       0x80000 // create correction file
 #define CONFIG_OPTIMIZE_WVC     0x100000 // maximize bybrid compression
+#define CONFIG_COMPATIBLE_WRITE 0x400000 // write files for decoders < 4.3
 #define CONFIG_CALC_NOISE       0x800000 // calc noise in hybrid mode
 #define CONFIG_EXTRA_MODE       0x2000000 // extra processing mode
 #define CONFIG_SKIP_WVX         0x4000000 // no wvx stream w/ floats & big ints
@ -174,6 +219,32 @@ typedef struct {
 #define CONFIG_PAIR_UNDEF_CHANS 0x20000000 // encode undefined channels in stereo pairs
 #define CONFIG_OPTIMIZE_MONO    0x80000000 // optimize for mono streams posing as stereo

+// The lower 8 bits of qmode indicate the use of new features in version 5 that (presently)
+// only apply to Core Audio Files (CAF) and DSD files, but could apply to other things too.
+// These flags are stored in the file and can be retrieved by a decoder that is aware of
+// them, but the individual bits are meaningless to the library. If ANY of these bits are
+// set then the MD5 sum is written with a new ID so that old decoders will not see it
+// (because these features will cause the MD5 sum to be different and fail).
+
+#define QMODE_BIG_ENDIAN        0x1     // big-endian data format (opposite of WAV format)
+#define QMODE_SIGNED_BYTES      0x2     // 8-bit audio data is signed (opposite of WAV format)
+#define QMODE_UNSIGNED_WORDS    0x4     // audio data (other than 8-bit) is unsigned (opposite of WAV format)
+#define QMODE_REORDERED_CHANS   0x8     // source channels were not Microsoft order, so they were reordered
+#define QMODE_DSD_LSB_FIRST     0x10    // DSD bytes, LSB first (most Sony .dsf files)
+#define QMODE_DSD_MSB_FIRST     0x20    // DSD bytes, MSB first (Philips .dff files)
+#define QMODE_DSD_IN_BLOCKS     0x40    // DSD data is blocked by channels (Sony .dsf only)
+#define QMODE_DSD_AUDIO         (QMODE_DSD_LSB_FIRST | QMODE_DSD_MSB_FIRST)
+
+// The rest of the qmode word is reserved for the private use of the command-line programs
+// and are ignored by the library (and not stored either). They really should not be defined
+// here, but I thought it would be a good idea to have all the definitions together.
+
+#define QMODE_ADOBE_MODE        0x100   // user specified Adobe mode
+#define QMODE_NO_STORE_WRAPPER  0x200   // user specified to not store audio file wrapper (RIFF, CAFF, etc.)
+#define QMODE_CHANS_UNASSIGNED  0x400   // user specified "..." in --channel-order option
+#define QMODE_IGNORE_LENGTH     0x800   // user specified to ignore length in file header
+#define QMODE_RAW_PCM           0x1000  // user specified raw PCM format (no header present)
+
 ////////////// Callbacks used for reading & writing WavPack streams //////////

 typedef struct {
@ -189,18 +260,40 @@ typedef struct {
    int32_t (*write_bytes)(void *id, void *data, int32_t bcount);
 } WavpackStreamReader;

+// Extended version of structure for handling large files and added
+// functionality for truncating and closing files
+
+typedef struct {
+    int32_t (*read_bytes)(void *id, void *data, int32_t bcount);
+    int32_t (*write_bytes)(void *id, void *data, int32_t bcount);
+    int64_t (*get_pos)(void *id);                               // new signature for large files
+    int (*set_pos_abs)(void *id, int64_t pos);                  // new signature for large files
+    int (*set_pos_rel)(void *id, int64_t delta, int mode);      // new signature for large files
+    int (*push_back_byte)(void *id, int c);
+    int64_t (*get_length)(void *id);                            // new signature for large files
+    int (*can_seek)(void *id);
+    int (*truncate_here)(void *id);                             // new function to truncate file at current position
+    int (*close)(void *id);                                     // new function to close file
+} WavpackStreamReader64;
+
 typedef int (*WavpackBlockOutput)(void *id, void *data, int32_t bcount);

 //////////////////////////// function prototypes /////////////////////////////

-// Note: See wputils.c sourcecode for descriptions for using these functions.
-
 typedef void WavpackContext;

 #ifdef __cplusplus
 extern "C" {
 #endif

+#define MAX_WAVPACK_SAMPLES ((1LL << 40) - 257)
+
+WavpackContext *WavpackOpenRawDecoder (
+    void *main_data, int32_t main_size,
+    void *corr_data, int32_t corr_size,
+    int16_t version, char *error, int flags, int norm_offset);
+
+WavpackContext *WavpackOpenFileInputEx64 (WavpackStreamReader64 *reader, void *wv_id, void *wvc_id, char *error, int flags, int norm_offset);
 WavpackContext *WavpackOpenFileInputEx (WavpackStreamReader *reader, void *wv_id, void *wvc_id, char *error, int flags, int norm_offset);
 WavpackContext *WavpackOpenFileInput (const char *infilename, char *error, int flags, int norm_offset);

@ -212,6 +305,16 @@ WavpackContext *WavpackOpenFileInput (const char *infilename, char *error, int f
 #define OPEN_STREAMING  0x20    // "streaming" mode blindly unpacks blocks
                                // w/o regard to header file position info
 #define OPEN_EDIT_TAGS  0x40    // allow editing of tags
+#define OPEN_FILE_UTF8  0x80    // assume filenames are UTF-8 encoded, not ANSI (Windows only)
+
+// new for version 5
+
+#define OPEN_DSD_NATIVE 0x100   // open DSD files as bitstreams
+                                // (returned as 8-bit "samples" stored in 32-bit words)
+#define OPEN_DSD_AS_PCM 0x200   // open DSD files as 24-bit PCM (decimated 8x)
+#define OPEN_ALT_TYPES  0x400   // application is aware of alternate file types & qmode
+                                // (just affects retrieving wrappers & MD5 checksums)
+#define OPEN_NO_CHECKSUM 0x800  // don't verify block checksums before decoding

 int WavpackGetMode (WavpackContext *wpc);

@ -230,16 +333,25 @@ int WavpackGetMode (WavpackContext *wpc);
 #define MODE_XMODE      0x7000  // mask for extra level (1-6, 0=unknown)
 #define MODE_DNS        0x8000

+int WavpackVerifySingleBlock (unsigned char *buffer, int verify_checksum);
+int WavpackGetQualifyMode (WavpackContext *wpc);
 char *WavpackGetErrorMessage (WavpackContext *wpc);
 int WavpackGetVersion (WavpackContext *wpc);
+char *WavpackGetFileExtension (WavpackContext *wpc);
+unsigned char WavpackGetFileFormat (WavpackContext *wpc);
 uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples);
 uint32_t WavpackGetNumSamples (WavpackContext *wpc);
+int64_t WavpackGetNumSamples64 (WavpackContext *wpc);
+uint32_t WavpackGetNumSamplesInFrame (WavpackContext *wpc);
 uint32_t WavpackGetSampleIndex (WavpackContext *wpc);
+int64_t WavpackGetSampleIndex64 (WavpackContext *wpc);
 int WavpackGetNumErrors (WavpackContext *wpc);
 int WavpackLossyBlocks (WavpackContext *wpc);
 int WavpackSeekSample (WavpackContext *wpc, uint32_t sample);
+int WavpackSeekSample64 (WavpackContext *wpc, int64_t sample);
 WavpackContext *WavpackCloseFile (WavpackContext *wpc);
 uint32_t WavpackGetSampleRate (WavpackContext *wpc);
+uint32_t WavpackGetNativeSampleRate (WavpackContext *wpc);
 int WavpackGetBitsPerSample (WavpackContext *wpc);
 int WavpackGetBytesPerSample (WavpackContext *wpc);
 int WavpackGetNumChannels (WavpackContext *wpc);
@ -247,12 +359,15 @@ int WavpackGetChannelMask (WavpackContext *wpc);
 int WavpackGetReducedChannels (WavpackContext *wpc);
 int WavpackGetFloatNormExp (WavpackContext *wpc);
 int WavpackGetMD5Sum (WavpackContext *wpc, unsigned char data [16]);
+void WavpackGetChannelIdentities (WavpackContext *wpc, unsigned char *identities);
+uint32_t WavpackGetChannelLayout (WavpackContext *wpc, unsigned char *reorder);
 uint32_t WavpackGetWrapperBytes (WavpackContext *wpc);
 unsigned char *WavpackGetWrapperData (WavpackContext *wpc);
 void WavpackFreeWrapper (WavpackContext *wpc);
 void WavpackSeekTrailingWrapper (WavpackContext *wpc);
 double WavpackGetProgress (WavpackContext *wpc);
 uint32_t WavpackGetFileSize (WavpackContext *wpc);
+int64_t WavpackGetFileSize64 (WavpackContext *wpc);
 double WavpackGetRatio (WavpackContext *wpc);
 double WavpackGetAverageBitrate (WavpackContext *wpc, int count_wvc);
 double WavpackGetInstantBitrate (WavpackContext *wpc);
@ -268,7 +383,17 @@ int WavpackDeleteTagItem (WavpackContext *wpc, const char *item);
 int WavpackWriteTag (WavpackContext *wpc);

 WavpackContext *WavpackOpenFileOutput (WavpackBlockOutput blockout, void *wv_id, void *wvc_id);
+void WavpackSetFileInformation (WavpackContext *wpc, char *file_extension, unsigned char file_format);
+
+#define WP_FORMAT_WAV   0       // Microsoft RIFF, including BWF and RF64 varients
+#define WP_FORMAT_W64   1       // Sony Wave64
+#define WP_FORMAT_CAF   2       // Apple CoreAudio
+#define WP_FORMAT_DFF   3       // Philips DSDIFF
+#define WP_FORMAT_DSF   4       // Sony DSD Format
+
 int WavpackSetConfiguration (WavpackContext *wpc, WavpackConfig *config, uint32_t total_samples);
+int WavpackSetConfiguration64 (WavpackContext *wpc, WavpackConfig *config, int64_t total_samples, const unsigned char *chan_ids);
+int WavpackSetChannelLayout (WavpackContext *wpc, uint32_t layout_tag, const unsigned char *reorder);
 int WavpackAddWrapper (WavpackContext *wpc, void *data, uint32_t bcount);
 int WavpackStoreMD5Sum (WavpackContext *wpc, unsigned char data [16]);
 int WavpackPackInit (WavpackContext *wpc);
@ -282,6 +407,8 @@ void WavpackFloatNormalize (int32_t *values, int32_t num_values, int delta_exp);

 void WavpackLittleEndianToNative (void *data, char *format);
 void WavpackNativeToLittleEndian (void *data, char *format);
+void WavpackBigEndianToNative (void *data, char *format);
+void WavpackNativeToBigEndian (void *data, char *format);

 uint32_t WavpackGetLibraryVersion (void);
 const char *WavpackGetLibraryVersionString (void);
--- a/third_party/wavpack/src/Makefile.am
+++ b/third_party/wavpack/src/Makefile.am
@ -0,0 +1,57 @@
+lib_LTLIBRARIES = libwavpack.la
+
+libwavpack_la_SOURCES = \
+	common_utils.c \
+	decorr_utils.c \
+	entropy_utils.c \
+	extra1.c \
+	extra2.c \
+	open_utils.c \
+	open_filename.c \
+	open_legacy.c \
+	open_raw.c \
+	pack.c \
+	pack_dns.c \
+	pack_floats.c \
+	pack_utils.c \
+	read_words.c \
+	tags.c \
+	tag_utils.c \
+	unpack.c \
+	unpack_floats.c \
+	unpack_seek.c \
+	unpack_utils.c \
+	write_words.c
+
+if ENABLE_LEGACY
+libwavpack_la_SOURCES += unpack3.c unpack3_open.c unpack3_seek.c
+endif
+
+if ENABLE_DSD
+libwavpack_la_SOURCES += pack_dsd.c unpack_dsd.c
+endif
+
+if ENABLE_X86ASM
+libwavpack_la_SOURCES += pack_x86.S unpack_x86.S
+endif
+
+if ENABLE_X64ASM
+libwavpack_la_SOURCES += pack_x64.S unpack_x64.S
+endif
+
+if ENABLE_ARMASM
+libwavpack_la_SOURCES += unpack_armv7.S
+endif
+
+noinst_HEADERS = \
+	decorr_tables.h \
+	unpack3.h \
+	wavpack_local.h \
+	wavpack_version.h
+
+libwavpack_la_CFLAGS = $(AM_CFLAGS)
+libwavpack_la_LIBADD = $(AM_LDADD) $(LIBM)
+libwavpack_la_LDFLAGS = -version-info $(LT_CURRENT):$(LT_REVISION):$(LT_AGE) -export-symbols-regex '^Wavpack.*$$' -no-undefined
+
+MAINTAINERCLEANFILES = \
+	Makefile.in
--- a/third_party/wavpack/src/bits.c
+++ b/third_party/wavpack/src/bits.c
@ -1,274 +0,0 @@
-////////////////////////////////////////////////////////////////////////////
-//                           **** WAVPACK ****                            //
-//                  Hybrid Lossless Wavefile Compressor                   //
-//              Copyright (c) 1998 - 2006 Conifer Software.               //
-//                          All Rights Reserved.                          //
-//      Distributed under the BSD Software License (see license.txt)      //
-////////////////////////////////////////////////////////////////////////////
-
-// bits.c
-
-// This module provides utilities to support the BitStream structure which is
-// used to read and write all WavPack audio data streams. It also contains a
-// wrapper for the stream I/O functions and a set of functions dealing with
-// endian-ness, both for enhancing portability. Finally, a debug wrapper for
-// the malloc() system is provided.
-
-#include "wavpack_local.h"
-
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <sys/stat.h>
-
-#if defined(WIN32)
-#include <io.h>
-#else
-#if defined(__OS2__)
-#include <io.h>
-#endif
-#include <unistd.h>
-#endif
-
-////////////////////////// Bitstream functions ////////////////////////////////
-
-#if !defined(NO_UNPACK) || defined(INFO_ONLY)
-
-// Open the specified BitStream and associate with the specified buffer.
-
-static void bs_read (Bitstream *bs);
-
-void bs_open_read (Bitstream *bs, void *buffer_start, void *buffer_end)
-{
-    bs->error = bs->sr = bs->bc = 0;
-    bs->ptr = (bs->buf = buffer_start) - 1;
-    bs->end = buffer_end;
-    bs->wrap = bs_read;
-}
-
-// This function is only called from the getbit() and getbits() macros when
-// the BitStream has been exhausted and more data is required. Sinve these
-// bistreams no longer access files, this function simple sets an error and
-// resets the buffer.
-
-static void bs_read (Bitstream *bs)
-{
-    bs->ptr = bs->buf - 1;
-    bs->error = 1;
-}
-
-// This function is called to close the bitstream. It returns the number of
-// full bytes actually read as bits.
-
-uint32_t bs_close_read (Bitstream *bs)
-{
-    uint32_t bytes_read;
-
-    if (bs->bc < sizeof (*(bs->ptr)) * 8)
-        bs->ptr++;
-
-    bytes_read = (uint32_t)(bs->ptr - bs->buf) * sizeof (*(bs->ptr));
-
-    if (!(bytes_read & 1))
-        ++bytes_read;
-
-    CLEAR (*bs);
-    return bytes_read;
-}
-
-#endif
-
-#ifndef NO_PACK
-
-// Open the specified BitStream using the specified buffer pointers. It is
-// assumed that enough buffer space has been allocated for all data that will
-// be written, otherwise an error will be generated.
-
-static void bs_write (Bitstream *bs);
-
-void bs_open_write (Bitstream *bs, void *buffer_start, void *buffer_end)
-{
-    bs->error = bs->sr = bs->bc = 0;
-    bs->ptr = bs->buf = buffer_start;
-    bs->end = buffer_end;
-    bs->wrap = bs_write;
-}
-
-// This function is only called from the putbit() and putbits() macros when
-// the buffer is full, which is now flagged as an error.
-
-static void bs_write (Bitstream *bs)
-{
-    bs->ptr = bs->buf;
-    bs->error = 1;
-}
-
-// This function forces a flushing write of the specified BitStream, and
-// returns the total number of bytes written into the buffer.
-
-uint32_t bs_close_write (Bitstream *bs)
-{
-    uint32_t bytes_written;
-
-    if (bs->error)
-        return (uint32_t) -1;
-
-    while (1) {
-        while (bs->bc)
-            putbit_1 (bs);
-
-        bytes_written = (uint32_t)(bs->ptr - bs->buf) * sizeof (*(bs->ptr));
-
-        if (bytes_written & 1) {
-            putbit_1 (bs);
-        }
-        else
-            break;
-    };
-
-    CLEAR (*bs);
-    return bytes_written;
-}
-
-#endif
-
-/////////////////////// Endian Correction Routines ////////////////////////////
-
-void little_endian_to_native (void *data, char *format)
-{
-    unsigned char *cp = (unsigned char *) data;
-    int32_t temp;
-
-    while (*format) {
-        switch (*format) {
-            case 'L':
-                temp = cp [0] + ((int32_t) cp [1] << 8) + ((int32_t) cp [2] << 16) + ((int32_t) cp [3] << 24);
-                * (int32_t *) cp = temp;
-                cp += 4;
-                break;
-
-            case 'S':
-                temp = cp [0] + (cp [1] << 8);
-                * (short *) cp = (short) temp;
-                cp += 2;
-                break;
-
-            default:
-                if (isdigit (*format))
-                    cp += *format - '0';
-
-                break;
-        }
-
-        format++;
-    }
-}
-
-void native_to_little_endian (void *data, char *format)
-{
-    unsigned char *cp = (unsigned char *) data;
-    int32_t temp;
-
-    while (*format) {
-        switch (*format) {
-            case 'L':
-                temp = * (int32_t *) cp;
-                *cp++ = (unsigned char) temp;
-                *cp++ = (unsigned char) (temp >> 8);
-                *cp++ = (unsigned char) (temp >> 16);
-                *cp++ = (unsigned char) (temp >> 24);
-                break;
-
-            case 'S':
-                temp = * (short *) cp;
-                *cp++ = (unsigned char) temp;
-                *cp++ = (unsigned char) (temp >> 8);
-                break;
-
-            default:
-                if (isdigit (*format))
-                    cp += *format - '0';
-
-                break;
-        }
-
-        format++;
-    }
-}
-
-////////////////////////// Debug Wrapper for Malloc ///////////////////////////
-
-#ifdef DEBUG_ALLOC
-
-void *vptrs [512];
-
-static void *add_ptr (void *ptr)
-{
-    int i;
-
-    for (i = 0; i < 512; ++i)
-        if (!vptrs [i]) {
-            vptrs [i] = ptr;
-            break;
-        }
-
-    if (i == 512)
-        error_line ("too many mallocs!");
-
-    return ptr;
-}
-
-static void *del_ptr (void *ptr)
-{
-    int i;
-
-    for (i = 0; i < 512; ++i)
-        if (vptrs [i] == ptr) {
-            vptrs [i] = NULL;
-            break;
-        }
-
-    if (i == 512)
-        error_line ("free invalid ptr!");
-
-    return ptr;
-}
-
-void *malloc_db (uint32_t size)
-{
-    if (size)
-        return add_ptr (malloc (size));
-    else
-        return NULL;
-}
-
-void free_db (void *ptr)
-{
-    if (ptr)
-        free (del_ptr (ptr));
-}
-
-void *realloc_db (void *ptr, uint32_t size)
-{
-    if (ptr && size)
-        return add_ptr (realloc (del_ptr (ptr), size));
-    else if (size)
-        return malloc_db (size);
-    else
-        free_db (ptr);
-
-    return NULL;
-}
-
-int32_t dump_alloc (void)
-{
-    int i, j;
-
-    for (j = i = 0; i < 512; ++i)
-        if (vptrs [i])
-            j++;
-
-    return j;
-}
-
-#endif
--- a/third_party/wavpack/src/common_utils.c
+++ b/third_party/wavpack/src/common_utils.c
@ -0,0 +1,771 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// common_utils.c
+
+// This module provides a lot of the trivial WavPack API functions and several
+// functions that are common to both reading and writing WavPack files (like
+// WavpackCloseFile()). Functions here are restricted to those that have few
+// external dependancies and this is done so that applications that statically
+// link to the WavPack library (like the command-line utilities on Windows)
+// do not need to include the entire library image if they only use a subset
+// of it. This module will be loaded for ANY WavPack application.
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "wavpack_local.h"
+
+#ifndef LIBWAVPACK_VERSION_STRING
+#include "wavpack_version.h"
+#endif
+
+///////////////////////////// local table storage ////////////////////////////
+
+const uint32_t sample_rates [] = { 6000, 8000, 9600, 11025, 12000, 16000, 22050,
+    24000, 32000, 44100, 48000, 64000, 88200, 96000, 192000 };
+
+///////////////////////////// executable code ////////////////////////////////
+
+// This function obtains general information about an open input file and
+// returns a mask with the following bit values:
+
+// MODE_WVC:  a .wvc file has been found and will be used for lossless
+// MODE_LOSSLESS:  file is lossless (either pure or hybrid)
+// MODE_HYBRID:  file is hybrid mode (either lossy or lossless)
+// MODE_FLOAT:  audio data is 32-bit ieee floating point
+// MODE_VALID_TAG:  file conatins a valid ID3v1 or APEv2 tag
+// MODE_HIGH:  file was created in "high" mode (information only)
+// MODE_FAST:  file was created in "fast" mode (information only)
+// MODE_EXTRA:  file was created using "extra" mode (information only)
+// MODE_APETAG:  file contains a valid APEv2 tag
+// MODE_SFX:  file was created as a "self-extracting" executable
+// MODE_VERY_HIGH:  file was created in the "very high" mode (or in
+//                  the "high" mode prior to 4.4)
+// MODE_MD5:  file contains an MD5 checksum
+// MODE_XMODE:  level used for extra mode (1-6, 0=unknown)
+// MODE_DNS:  dynamic noise shaping
+
+int WavpackGetMode (WavpackContext *wpc)
+{
+    int mode = 0;
+
+    if (wpc) {
+        if (wpc->config.flags & CONFIG_HYBRID_FLAG)
+            mode |= MODE_HYBRID;
+        else if (!(wpc->config.flags & CONFIG_LOSSY_MODE))
+            mode |= MODE_LOSSLESS;
+
+        if (wpc->wvc_flag)
+            mode |= (MODE_LOSSLESS | MODE_WVC);
+
+        if (wpc->lossy_blocks)
+            mode &= ~MODE_LOSSLESS;
+
+        if (wpc->config.flags & CONFIG_FLOAT_DATA)
+            mode |= MODE_FLOAT;
+
+        if (wpc->config.flags & (CONFIG_HIGH_FLAG | CONFIG_VERY_HIGH_FLAG)) {
+            mode |= MODE_HIGH;
+
+            if ((wpc->config.flags & CONFIG_VERY_HIGH_FLAG) ||
+                (wpc->streams && wpc->streams [0] && wpc->streams [0]->wphdr.version < 0x405))
+                    mode |= MODE_VERY_HIGH;
+        }
+
+        if (wpc->config.flags & CONFIG_FAST_FLAG)
+            mode |= MODE_FAST;
+
+        if (wpc->config.flags & CONFIG_EXTRA_MODE)
+            mode |= (MODE_EXTRA | (wpc->config.xmode << 12));
+
+        if (wpc->config.flags & CONFIG_CREATE_EXE)
+            mode |= MODE_SFX;
+
+        if (wpc->config.flags & CONFIG_MD5_CHECKSUM)
+            mode |= MODE_MD5;
+
+        if ((wpc->config.flags & CONFIG_HYBRID_FLAG) && (wpc->config.flags & CONFIG_DYNAMIC_SHAPING) &&
+            wpc->streams && wpc->streams [0] && wpc->streams [0]->wphdr.version >= 0x407)
+                mode |= MODE_DNS;
+
+#ifndef NO_TAGS
+        if (valid_tag (&wpc->m_tag)) {
+            mode |= MODE_VALID_TAG;
+
+            if (valid_tag (&wpc->m_tag) == 'A')
+                mode |= MODE_APETAG;
+        }
+#endif
+
+        mode |= (wpc->config.qmode << 16) & 0xFF0000;
+    }
+
+    return mode;
+}
+
+// This function obtains information about specific file features that were
+// added for version 5.0, specifically qualifications added to support CAF
+// and DSD files. Except for indicating the presence of DSD data, these
+// bits are meant to simply indicate the format of the data in the original
+// source file and do NOT indicate how the library will return the data to
+// the appication (which is always the same). This means that in general an
+// application that simply wants to play or process the audio data need not
+// be concerned about these. If the file is DSD audio, then either of the
+// QMDOE_DSD_LSB_FIRST or QMODE_DSD_MSB_FIRST bits will be set (but the
+// DSD audio is always returned to the caller MSB first).
+
+// QMODE_BIG_ENDIAN        0x1     // big-endian data format (opposite of WAV format)
+// QMODE_SIGNED_BYTES      0x2     // 8-bit audio data is signed (opposite of WAV format)
+// QMODE_UNSIGNED_WORDS    0x4     // audio data (other than 8-bit) is unsigned (opposite of WAV format)
+// QMODE_REORDERED_CHANS   0x8     // source channels were not Microsoft order, so they were reordered
+// QMODE_DSD_LSB_FIRST     0x10    // DSD bytes, LSB first (most Sony .dsf files)
+// QMODE_DSD_MSB_FIRST     0x20    // DSD bytes, MSB first (Philips .dff files)
+// QMODE_DSD_IN_BLOCKS     0x40    // DSD data is blocked by channels (Sony .dsf only)
+
+int WavpackGetQualifyMode (WavpackContext *wpc)
+{
+    return wpc->config.qmode & 0xFF;
+}
+
+// This function returns a pointer to a string describing the last error
+// generated by WavPack.
+
+char *WavpackGetErrorMessage (WavpackContext *wpc)
+{
+    return wpc->error_message;
+}
+
+// Get total number of samples contained in the WavPack file, or -1 if unknown
+
+uint32_t WavpackGetNumSamples (WavpackContext *wpc)
+{
+    return (uint32_t) WavpackGetNumSamples64 (wpc);
+}
+
+int64_t WavpackGetNumSamples64 (WavpackContext *wpc)
+{
+    return wpc ? wpc->total_samples : -1;
+}
+
+// Get the current sample index position, or -1 if unknown
+
+uint32_t WavpackGetSampleIndex (WavpackContext *wpc)
+{
+    return (uint32_t) WavpackGetSampleIndex64 (wpc);
+}
+
+int64_t WavpackGetSampleIndex64 (WavpackContext *wpc)
+{
+    if (wpc) {
+#ifdef ENABLE_LEGACY
+        if (wpc->stream3)
+            return get_sample_index3 (wpc);
+        else if (wpc->streams && wpc->streams [0])
+            return wpc->streams [0]->sample_index;
+#else
+        if (wpc->streams && wpc->streams [0])
+            return wpc->streams [0]->sample_index;
+#endif
+    }
+
+    return -1;
+}
+
+// Get the number of errors encountered so far
+
+int WavpackGetNumErrors (WavpackContext *wpc)
+{
+    return wpc ? wpc->crc_errors : 0;
+}
+
+// return TRUE if any uncorrected lossy blocks were actually written or read
+
+int WavpackLossyBlocks (WavpackContext *wpc)
+{
+    return wpc ? wpc->lossy_blocks : 0;
+}
+
+// Calculate the progress through the file as a double from 0.0 (for begin)
+// to 1.0 (for done). A return value of -1.0 indicates that the progress is
+// unknown.
+
+double WavpackGetProgress (WavpackContext *wpc)
+{
+    if (wpc && wpc->total_samples != -1 && wpc->total_samples != 0)
+        return (double) WavpackGetSampleIndex64 (wpc) / wpc->total_samples;
+    else
+        return -1.0;
+}
+
+// Return the total size of the WavPack file(s) in bytes.
+
+uint32_t WavpackGetFileSize (WavpackContext *wpc)
+{
+    return (uint32_t) (wpc ? wpc->filelen + wpc->file2len : 0);
+}
+
+int64_t WavpackGetFileSize64 (WavpackContext *wpc)
+{
+    return wpc ? wpc->filelen + wpc->file2len : 0;
+}
+
+// Calculate the ratio of the specified WavPack file size to the size of the
+// original audio data as a double greater than 0.0 and (usually) smaller than
+// 1.0. A value greater than 1.0 represents "negative" compression and a
+// return value of 0.0 indicates that the ratio cannot be determined.
+
+double WavpackGetRatio (WavpackContext *wpc)
+{
+    if (wpc && wpc->total_samples != -1 && wpc->filelen) {
+        double output_size = (double) wpc->total_samples * wpc->config.num_channels *
+            wpc->config.bytes_per_sample;
+        double input_size = (double) wpc->filelen + wpc->file2len;
+
+        if (output_size >= 1.0 && input_size >= 1.0)
+            return input_size / output_size;
+    }
+
+    return 0.0;
+}
+
+// Calculate the average bitrate of the WavPack file in bits per second. A
+// return of 0.0 indicates that the bitrate cannot be determined. An option is
+// provided to use (or not use) any attendant .wvc file.
+
+double WavpackGetAverageBitrate (WavpackContext *wpc, int count_wvc)
+{
+    if (wpc && wpc->total_samples != -1 && wpc->filelen) {
+        double output_time = (double) wpc->total_samples / WavpackGetSampleRate (wpc);
+        double input_size = (double) wpc->filelen + (count_wvc ? wpc->file2len : 0);
+
+        if (output_time >= 0.1 && input_size >= 1.0)
+            return input_size * 8.0 / output_time;
+    }
+
+    return 0.0;
+}
+
+// Calculate the bitrate of the current WavPack file block in bits per second.
+// This can be used for an "instant" bit display and gets updated from about
+// 1 to 4 times per second. A return of 0.0 indicates that the bitrate cannot
+// be determined.
+
+double WavpackGetInstantBitrate (WavpackContext *wpc)
+{
+    if (wpc && wpc->stream3)
+        return WavpackGetAverageBitrate (wpc, TRUE);
+
+    if (wpc && wpc->streams && wpc->streams [0] && wpc->streams [0]->wphdr.block_samples) {
+        double output_time = (double) wpc->streams [0]->wphdr.block_samples / WavpackGetSampleRate (wpc);
+        double input_size = 0;
+        int si;
+
+        for (si = 0; si < wpc->num_streams; ++si) {
+            if (wpc->streams [si]->blockbuff)
+                input_size += ((WavpackHeader *) wpc->streams [si]->blockbuff)->ckSize;
+
+            if (wpc->streams [si]->block2buff)
+                input_size += ((WavpackHeader *) wpc->streams [si]->block2buff)->ckSize;
+        }
+
+        if (output_time > 0.0 && input_size >= 1.0)
+            return input_size * 8.0 / output_time;
+    }
+
+    return 0.0;
+}
+
+// This function allows retrieving the Core Audio File channel layout, many of which do not
+// conform to the Microsoft ordering standard that WavPack requires internally (at least for
+// those channels present in the "channel mask"). In addition to the layout tag, this function
+// returns the reordering string (if stored in the file) to allow the unpacker to reorder the
+// channels back to the specified layout (if it wants to restore the CAF order). The number of
+// channels in the layout is determined from the lower nybble of the layout word (and should
+// probably match the number of channels in the file), and if a reorder string is requested
+// then that much space must be allocated. Note that all the reordering is actually done
+// outside of this library, and that if reordering is done then the appropriate qmode bit
+// will be set.
+//
+// Note: Normally this function would not be used by an application unless it specifically
+// wanted to restore a non-standard channel order (to check an MD5, for example) or obtain
+// the Core Audio channel layout ID. For simple file decoding for playback, the channel_mask
+// should provide all the information required unless there are non-Microsoft channels
+// involved, in which case WavpackGetChannelIdentities() will provide the identities of
+// the other channels (if they are known).
+
+uint32_t WavpackGetChannelLayout (WavpackContext *wpc, unsigned char *reorder)
+{
+    if ((wpc->channel_layout & 0xff) && wpc->channel_reordering && reorder)
+        memcpy (reorder, wpc->channel_reordering, wpc->channel_layout & 0xff);
+
+    return wpc->channel_layout;
+}
+
+// This function provides the identities of ALL the channels in the file, including the
+// standard Microsoft channels (which come first, in order, and are numbered 1-18) and also
+// any non-Microsoft channels (which can be in any order and have values from 33-254). The
+// value 0x00 is invalid and 0xFF indicates an "unknown" or "unnassigned" channel. The
+// string is NULL terminated so the caller must supply enough space for the number
+// of channels indicated by WavpackGetNumChannels(), plus one.
+//
+// Note that this function returns the actual order of the channels in the Wavpack file
+// (i.e., the order returned by WavpackUnpackSamples()). If the file includes a "reordering"
+// string because the source file was not in Microsoft order that is NOT taken into account
+// here and really only needs to be considered if doing an MD5 verification or if it's
+// required to restore the original order/file (like wvunpack does).
+
+void WavpackGetChannelIdentities (WavpackContext *wpc, unsigned char *identities)
+{
+    int num_channels = wpc->config.num_channels, index = 1;
+    uint32_t channel_mask = wpc->config.channel_mask;
+    unsigned char *src = wpc->channel_identities;
+
+    while (num_channels--) {
+        if (channel_mask) {
+            while (!(channel_mask & 1)) {
+                channel_mask >>= 1;
+                index++;
+            }
+
+            *identities++ = index++;
+            channel_mask >>= 1;
+        }
+        else if (src && *src)
+            *identities++ = *src++;
+        else
+            *identities++ = 0xff;
+    }
+
+    *identities = 0;
+}
+
+// For local use only. Install a callback to be executed when WavpackCloseFile() is called,
+// usually used to dump some statistics accumulated during encode or decode.
+
+void install_close_callback (WavpackContext *wpc, void cb_func (void *wpc))
+{
+    wpc->close_callback = cb_func;
+}
+
+// Close the specified WavPack file and release all resources used by it.
+// Returns NULL.
+
+WavpackContext *WavpackCloseFile (WavpackContext *wpc)
+{
+    if (wpc->close_callback)
+        wpc->close_callback (wpc);
+
+    if (wpc->streams) {
+        free_streams (wpc);
+
+        if (wpc->streams [0])
+            free (wpc->streams [0]);
+
+        free (wpc->streams);
+    }
+
+#ifdef ENABLE_LEGACY
+    if (wpc->stream3)
+        free_stream3 (wpc);
+#endif
+
+    if (wpc->reader && wpc->reader->close && wpc->wv_in)
+        wpc->reader->close (wpc->wv_in);
+
+    if (wpc->reader && wpc->reader->close && wpc->wvc_in)
+        wpc->reader->close (wpc->wvc_in);
+
+    WavpackFreeWrapper (wpc);
+
+    if (wpc->channel_reordering)
+        free (wpc->channel_reordering);
+
+#ifndef NO_TAGS
+    free_tag (&wpc->m_tag);
+#endif
+
+#ifdef ENABLE_DSD
+    if (wpc->decimation_context)
+        decimate_dsd_destroy (wpc->decimation_context);
+#endif
+
+    free (wpc);
+
+    return NULL;
+}
+
+// These routines are used to access (and free) header and trailer data that
+// was retrieved from the Wavpack file. The header will be available before
+// the samples are decoded and the trailer will be available after all samples
+// have been read.
+
+uint32_t WavpackGetWrapperBytes (WavpackContext *wpc)
+{
+    return wpc ? wpc->wrapper_bytes : 0;
+}
+
+unsigned char *WavpackGetWrapperData (WavpackContext *wpc)
+{
+    return wpc ? wpc->wrapper_data : NULL;
+}
+
+void WavpackFreeWrapper (WavpackContext *wpc)
+{
+    if (wpc && wpc->wrapper_data) {
+        free (wpc->wrapper_data);
+        wpc->wrapper_data = NULL;
+        wpc->wrapper_bytes = 0;
+    }
+}
+
+// Returns the sample rate of the specified WavPack file
+
+uint32_t WavpackGetSampleRate (WavpackContext *wpc)
+{
+    return wpc ? (wpc->dsd_multiplier ? wpc->config.sample_rate * wpc->dsd_multiplier : wpc->config.sample_rate) : 44100;
+}
+
+// Returns the native sample rate of the specified WavPack file
+// (provides the native rate for DSD files rather than the "byte" rate that's used for
+//   seeking, duration, etc. and would generally be used just for user facing reports)
+
+uint32_t WavpackGetNativeSampleRate (WavpackContext *wpc)
+{
+    return wpc ? (wpc->dsd_multiplier ? wpc->config.sample_rate * wpc->dsd_multiplier * 8 : wpc->config.sample_rate) : 44100;
+}
+
+// Returns the number of channels of the specified WavPack file. Note that
+// this is the actual number of channels contained in the file even if the
+// OPEN_2CH_MAX flag was specified when the file was opened.
+
+int WavpackGetNumChannels (WavpackContext *wpc)
+{
+    return wpc ? wpc->config.num_channels : 2;
+}
+
+// Returns the standard Microsoft channel mask for the specified WavPack
+// file. A value of zero indicates that there is no speaker assignment
+// information.
+
+int WavpackGetChannelMask (WavpackContext *wpc)
+{
+    return wpc ? wpc->config.channel_mask : 0;
+}
+
+// Return the normalization value for floating point data (valid only
+// if floating point data is present). A value of 127 indicates that
+// the floating point range is +/- 1.0. Higher values indicate a
+// larger floating point range.
+
+int WavpackGetFloatNormExp (WavpackContext *wpc)
+{
+    return wpc->config.float_norm_exp;
+}
+
+// Returns the actual number of valid bits per sample contained in the
+// original file, which may or may not be a multiple of 8. Floating data
+// always has 32 bits, integers may be from 1 to 32 bits each. When this
+// value is not a multiple of 8, then the "extra" bits are located in the
+// LSBs of the results. That is, values are right justified when unpacked
+// into ints, but are left justified in the number of bytes used by the
+// original data.
+
+int WavpackGetBitsPerSample (WavpackContext *wpc)
+{
+    return wpc ? wpc->config.bits_per_sample : 16;
+}
+
+// Returns the number of bytes used for each sample (1 to 4) in the original
+// file. This is required information for the user of this module because the
+// audio data is returned in the LOWER bytes of the long buffer and must be
+// left-shifted 8, 16, or 24 bits if normalized longs are required.
+
+int WavpackGetBytesPerSample (WavpackContext *wpc)
+{
+    return wpc ? wpc->config.bytes_per_sample : 2;
+}
+
+// If the OPEN_2CH_MAX flag is specified when opening the file, this function
+// will return the actual number of channels decoded from the file (which may
+// or may not be less than the actual number of channels, but will always be
+// 1 or 2). Normally, this will be the front left and right channels of a
+// multichannel file.
+
+int WavpackGetReducedChannels (WavpackContext *wpc)
+{
+    if (wpc)
+        return wpc->reduced_channels ? wpc->reduced_channels : wpc->config.num_channels;
+    else
+        return 2;
+}
+
+// Free all memory allocated for raw WavPack blocks (for all allocated streams)
+// and free all additonal streams. This does not free the default stream ([0])
+// which is always kept around.
+
+void free_streams (WavpackContext *wpc)
+{
+    int si = wpc->num_streams;
+
+    while (si--) {
+        if (wpc->streams [si]->blockbuff) {
+            free (wpc->streams [si]->blockbuff);
+            wpc->streams [si]->blockbuff = NULL;
+        }
+
+        if (wpc->streams [si]->block2buff) {
+            free (wpc->streams [si]->block2buff);
+            wpc->streams [si]->block2buff = NULL;
+        }
+
+        if (wpc->streams [si]->sample_buffer) {
+            free (wpc->streams [si]->sample_buffer);
+            wpc->streams [si]->sample_buffer = NULL;
+        }
+
+        if (wpc->streams [si]->dc.shaping_data) {
+            free (wpc->streams [si]->dc.shaping_data);
+            wpc->streams [si]->dc.shaping_data = NULL;
+        }
+
+#ifdef ENABLE_DSD
+        if (wpc->streams [si]->dsd.probabilities) {
+            free (wpc->streams [si]->dsd.probabilities);
+            wpc->streams [si]->dsd.probabilities = NULL;
+        }
+
+        if (wpc->streams [si]->dsd.summed_probabilities) {
+            free (wpc->streams [si]->dsd.summed_probabilities);
+            wpc->streams [si]->dsd.summed_probabilities = NULL;
+        }
+
+        if (wpc->streams [si]->dsd.value_lookup) {
+            int i;
+
+            for (i = 0; i < wpc->streams [si]->dsd.history_bins; ++i)
+                if (wpc->streams [si]->dsd.value_lookup [i])
+                    free (wpc->streams [si]->dsd.value_lookup [i]);
+
+            free (wpc->streams [si]->dsd.value_lookup);
+            wpc->streams [si]->dsd.value_lookup = NULL;
+        }
+
+        if (wpc->streams [si]->dsd.ptable) {
+            free (wpc->streams [si]->dsd.ptable);
+            wpc->streams [si]->dsd.ptable = NULL;
+        }
+#endif
+
+        if (si) {
+            wpc->num_streams--;
+            free (wpc->streams [si]);
+            wpc->streams [si] = NULL;
+        }
+    }
+
+    wpc->current_stream = 0;
+}
+
+void WavpackFloatNormalize (int32_t *values, int32_t num_values, int delta_exp)
+{
+    f32 *fvalues = (f32 *) values;
+    int exp;
+
+    if (!delta_exp)
+        return;
+
+    while (num_values--) {
+        if ((exp = get_exponent (*fvalues)) == 0 || exp + delta_exp <= 0)
+            *fvalues = 0;
+        else if (exp == 255 || (exp += delta_exp) >= 255) {
+            set_exponent (*fvalues, 255);
+            set_mantissa (*fvalues, 0);
+        }
+        else
+            set_exponent (*fvalues, exp);
+
+        fvalues++;
+    }
+}
+
+void WavpackLittleEndianToNative (void *data, char *format)
+{
+    unsigned char *cp = (unsigned char *) data;
+    int64_t temp;
+
+    while (*format) {
+        switch (*format) {
+            case 'D':
+                temp = cp [0] + ((int64_t) cp [1] << 8) + ((int64_t) cp [2] << 16) + ((int64_t) cp [3] << 24) +
+                    ((int64_t) cp [4] << 32) + ((int64_t) cp [5] << 40) + ((int64_t) cp [6] << 48) + ((int64_t) cp [7] << 56);
+                * (int64_t *) cp = temp;
+                cp += 8;
+                break;
+
+            case 'L':
+                temp = cp [0] + ((int32_t) cp [1] << 8) + ((int32_t) cp [2] << 16) + ((int32_t) cp [3] << 24);
+                * (int32_t *) cp = (int32_t) temp;
+                cp += 4;
+                break;
+
+            case 'S':
+                temp = cp [0] + (cp [1] << 8);
+                * (int16_t *) cp = (int16_t) temp;
+                cp += 2;
+                break;
+
+            default:
+                if (isdigit (*format))
+                    cp += *format - '0';
+
+                break;
+        }
+
+        format++;
+    }
+}
+
+void WavpackNativeToLittleEndian (void *data, char *format)
+{
+    unsigned char *cp = (unsigned char *) data;
+    int64_t temp;
+
+    while (*format) {
+        switch (*format) {
+            case 'D':
+                temp = * (int64_t *) cp;
+                *cp++ = (unsigned char) temp;
+                *cp++ = (unsigned char) (temp >> 8);
+                *cp++ = (unsigned char) (temp >> 16);
+                *cp++ = (unsigned char) (temp >> 24);
+                *cp++ = (unsigned char) (temp >> 32);
+                *cp++ = (unsigned char) (temp >> 40);
+                *cp++ = (unsigned char) (temp >> 48);
+                *cp++ = (unsigned char) (temp >> 56);
+                break;
+
+            case 'L':
+                temp = * (int32_t *) cp;
+                *cp++ = (unsigned char) temp;
+                *cp++ = (unsigned char) (temp >> 8);
+                *cp++ = (unsigned char) (temp >> 16);
+                *cp++ = (unsigned char) (temp >> 24);
+                break;
+
+            case 'S':
+                temp = * (int16_t *) cp;
+                *cp++ = (unsigned char) temp;
+                *cp++ = (unsigned char) (temp >> 8);
+                break;
+
+            default:
+                if (isdigit (*format))
+                    cp += *format - '0';
+
+                break;
+        }
+
+        format++;
+    }
+}
+
+void WavpackBigEndianToNative (void *data, char *format)
+{
+    unsigned char *cp = (unsigned char *) data;
+    int64_t temp;
+
+    while (*format) {
+        switch (*format) {
+            case 'D':
+                temp = cp [7] + ((int64_t) cp [6] << 8) + ((int64_t) cp [5] << 16) + ((int64_t) cp [4] << 24) +
+                    ((int64_t) cp [3] << 32) + ((int64_t) cp [2] << 40) + ((int64_t) cp [1] << 48) + ((int64_t) cp [0] << 56);
+                * (int64_t *) cp = temp;
+                cp += 8;
+                break;
+
+            case 'L':
+                temp = cp [3] + ((int32_t) cp [2] << 8) + ((int32_t) cp [1] << 16) + ((int32_t) cp [0] << 24);
+                * (int32_t *) cp = (int32_t) temp;
+                cp += 4;
+                break;
+
+            case 'S':
+                temp = cp [1] + (cp [0] << 8);
+                * (int16_t *) cp = (int16_t) temp;
+                cp += 2;
+                break;
+
+            default:
+                if (isdigit (*format))
+                    cp += *format - '0';
+
+                break;
+        }
+
+        format++;
+    }
+}
+
+void WavpackNativeToBigEndian (void *data, char *format)
+{
+    unsigned char *cp = (unsigned char *) data;
+    int64_t temp;
+
+    while (*format) {
+        switch (*format) {
+            case 'D':
+                temp = * (int64_t *) cp;
+                *cp++ = (unsigned char) (temp >> 56);
+                *cp++ = (unsigned char) (temp >> 48);
+                *cp++ = (unsigned char) (temp >> 40);
+                *cp++ = (unsigned char) (temp >> 32);
+                *cp++ = (unsigned char) (temp >> 24);
+                *cp++ = (unsigned char) (temp >> 16);
+                *cp++ = (unsigned char) (temp >> 8);
+                *cp++ = (unsigned char) temp;
+                break;
+
+            case 'L':
+                temp = * (int32_t *) cp;
+                *cp++ = (unsigned char) (temp >> 24);
+                *cp++ = (unsigned char) (temp >> 16);
+                *cp++ = (unsigned char) (temp >> 8);
+                *cp++ = (unsigned char) temp;
+                break;
+
+            case 'S':
+                temp = * (int16_t *) cp;
+                *cp++ = (unsigned char) (temp >> 8);
+                *cp++ = (unsigned char) temp;
+                break;
+
+            default:
+                if (isdigit (*format))
+                    cp += *format - '0';
+
+                break;
+        }
+
+        format++;
+    }
+}
+
+uint32_t WavpackGetLibraryVersion (void)
+{
+    return (LIBWAVPACK_MAJOR<<16)
+          |(LIBWAVPACK_MINOR<<8)
+          |(LIBWAVPACK_MICRO<<0);
+}
+
+const char *WavpackGetLibraryVersionString (void)
+{
+    return LIBWAVPACK_VERSION_STRING;
+}
+
--- a/third_party/wavpack/src/decorr_tables.h
+++ b/third_party/wavpack/src/decorr_tables.h
--- a/third_party/wavpack/src/decorr_utils.c
+++ b/third_party/wavpack/src/decorr_utils.c
@ -0,0 +1,204 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// decorr_utils.c
+
+// This module contains the functions that process metadata blocks that are
+// specific to the decorrelator. These would be called any time a WavPack
+// block was parsed. These are in a module separate from the actual unpack
+// decorrelation code (unpack.c) so that if an application just wants to get
+// information from WavPack files (rather than actually decoding audio) then
+// less code needs to be linked.
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "wavpack_local.h"
+
+///////////////////////////// executable code ////////////////////////////////
+
+// Read decorrelation terms from specified metadata block into the
+// decorr_passes array. The terms range from -3 to 8, plus 17 & 18;
+// other values are reserved and generate errors for now. The delta
+// ranges from 0 to 7 with all values valid. Note that the terms are
+// stored in the opposite order in the decorr_passes array compared
+// to packing.
+
+int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    int termcnt = wpmd->byte_length;
+    unsigned char *byteptr = (unsigned char *)wpmd->data;
+    struct decorr_pass *dpp;
+
+    if (termcnt > MAX_NTERMS)
+        return FALSE;
+
+    wps->num_terms = termcnt;
+
+    for (dpp = wps->decorr_passes + termcnt - 1; termcnt--; dpp--) {
+        dpp->term = (int)(*byteptr & 0x1f) - 5;
+        dpp->delta = (*byteptr++ >> 5) & 0x7;
+
+        if (!dpp->term || dpp->term < -3 || (dpp->term > MAX_TERM && dpp->term < 17) || dpp->term > 18 ||
+            ((wps->wphdr.flags & MONO_DATA) && dpp->term < 0))
+                return FALSE;
+    }
+
+    return TRUE;
+}
+
+// Read decorrelation weights from specified metadata block into the
+// decorr_passes array. The weights range +/-1024, but are rounded and
+// truncated to fit in signed chars for metadata storage. Weights are
+// separate for the two channels and are specified from the "last" term
+// (first during encode). Unspecified weights are set to zero.
+
+int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    int termcnt = wpmd->byte_length, tcount;
+    char *byteptr = (char *)wpmd->data;
+    struct decorr_pass *dpp;
+
+    if (!(wps->wphdr.flags & MONO_DATA))
+        termcnt /= 2;
+
+    if (termcnt > wps->num_terms)
+        return FALSE;
+
+    for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++)
+        dpp->weight_A = dpp->weight_B = 0;
+
+    while (--dpp >= wps->decorr_passes && termcnt--) {
+        dpp->weight_A = restore_weight (*byteptr++);
+
+        if (!(wps->wphdr.flags & MONO_DATA))
+            dpp->weight_B = restore_weight (*byteptr++);
+    }
+
+    return TRUE;
+}
+
+// Read decorrelation samples from specified metadata block into the
+// decorr_passes array. The samples are signed 32-bit values, but are
+// converted to signed log2 values for storage in metadata. Values are
+// stored for both channels and are specified from the "last" term
+// (first during encode) with unspecified samples set to zero. The
+// number of samples stored varies with the actual term value, so
+// those must obviously come first in the metadata.
+
+int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    unsigned char *byteptr = (unsigned char *)wpmd->data;
+    unsigned char *endptr = byteptr + wpmd->byte_length;
+    struct decorr_pass *dpp;
+    int tcount;
+
+    for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) {
+        CLEAR (dpp->samples_A);
+        CLEAR (dpp->samples_B);
+    }
+
+    if (wps->wphdr.version == 0x402 && (wps->wphdr.flags & HYBRID_FLAG)) {
+        if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr)
+            return FALSE;
+
+        wps->dc.error [0] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8)));
+        byteptr += 2;
+
+        if (!(wps->wphdr.flags & MONO_DATA)) {
+            wps->dc.error [1] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8)));
+            byteptr += 2;
+        }
+    }
+
+    while (dpp-- > wps->decorr_passes && byteptr < endptr)
+        if (dpp->term > MAX_TERM) {
+            if (byteptr + (wps->wphdr.flags & MONO_DATA ? 4 : 8) > endptr)
+                return FALSE;
+
+            dpp->samples_A [0] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8)));
+            dpp->samples_A [1] = wp_exp2s ((int16_t)(byteptr [2] + (byteptr [3] << 8)));
+            byteptr += 4;
+
+            if (!(wps->wphdr.flags & MONO_DATA)) {
+                dpp->samples_B [0] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8)));
+                dpp->samples_B [1] = wp_exp2s ((int16_t)(byteptr [2] + (byteptr [3] << 8)));
+                byteptr += 4;
+            }
+        }
+        else if (dpp->term < 0) {
+            if (byteptr + 4 > endptr)
+                return FALSE;
+
+            dpp->samples_A [0] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8)));
+            dpp->samples_B [0] = wp_exp2s ((int16_t)(byteptr [2] + (byteptr [3] << 8)));
+            byteptr += 4;
+        }
+        else {
+            int m = 0, cnt = dpp->term;
+
+            while (cnt--) {
+                if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr)
+                    return FALSE;
+
+                dpp->samples_A [m] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8)));
+                byteptr += 2;
+
+                if (!(wps->wphdr.flags & MONO_DATA)) {
+                    dpp->samples_B [m] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8)));
+                    byteptr += 2;
+                }
+
+                m++;
+            }
+        }
+
+    return byteptr == endptr;
+}
+
+// Read the shaping weights from specified metadata block into the
+// WavpackStream structure. Note that there must be two values (even
+// for mono streams) and that the values are stored in the same
+// manner as decorrelation weights. These would normally be read from
+// the "correction" file and are used for lossless reconstruction of
+// hybrid data.
+
+int read_shaping_info (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    if (wpmd->byte_length == 2) {
+        char *byteptr = (char *)wpmd->data;
+
+        wps->dc.shaping_acc [0] = (int32_t) restore_weight (*byteptr++) << 16;
+        wps->dc.shaping_acc [1] = (int32_t) restore_weight (*byteptr++) << 16;
+        return TRUE;
+    }
+    else if (wpmd->byte_length >= (wps->wphdr.flags & MONO_DATA ? 4 : 8)) {
+        unsigned char *byteptr = (unsigned char *)wpmd->data;
+
+        wps->dc.error [0] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8)));
+        wps->dc.shaping_acc [0] = wp_exp2s ((int16_t)(byteptr [2] + (byteptr [3] << 8)));
+        byteptr += 4;
+
+        if (!(wps->wphdr.flags & MONO_DATA)) {
+            wps->dc.error [1] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8)));
+            wps->dc.shaping_acc [1] = wp_exp2s ((int16_t)(byteptr [2] + (byteptr [3] << 8)));
+            byteptr += 4;
+        }
+
+        if (wpmd->byte_length == (wps->wphdr.flags & MONO_DATA ? 6 : 12)) {
+            wps->dc.shaping_delta [0] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8)));
+
+            if (!(wps->wphdr.flags & MONO_DATA))
+                wps->dc.shaping_delta [1] = wp_exp2s ((int16_t)(byteptr [2] + (byteptr [3] << 8)));
+        }
+
+        return TRUE;
+    }
+
+    return FALSE;
+}
--- a/third_party/wavpack/src/entropy_utils.c
+++ b/third_party/wavpack/src/entropy_utils.c
@ -0,0 +1,378 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// entropy_utils.c
+
+// This module contains the functions that process metadata blocks that are
+// specific to the entropy decoder; these would be called any time a WavPack
+// block was parsed. Additionally, it contains tables and functions that are
+// common to both entropy coding and decoding. These are in a module separate
+// from the actual entropy encoder (write_words.c) and decoder (read_words.c)
+// so that if applications that just do a subset of the full WavPack reading
+// and writing can link with a subset of the library.
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "wavpack_local.h"
+
+///////////////////////////// local table storage ////////////////////////////
+
+const uint32_t bitset [] = {
+    1L << 0, 1L << 1, 1L << 2, 1L << 3,
+    1L << 4, 1L << 5, 1L << 6, 1L << 7,
+    1L << 8, 1L << 9, 1L << 10, 1L << 11,
+    1L << 12, 1L << 13, 1L << 14, 1L << 15,
+    1L << 16, 1L << 17, 1L << 18, 1L << 19,
+    1L << 20, 1L << 21, 1L << 22, 1L << 23,
+    1L << 24, 1L << 25, 1L << 26, 1L << 27,
+    1L << 28, 1L << 29, 1L << 30, 1L << 31
+};
+
+const uint32_t bitmask [] = {
+    (1L << 0) - 1, (1L << 1) - 1, (1L << 2) - 1, (1L << 3) - 1,
+    (1L << 4) - 1, (1L << 5) - 1, (1L << 6) - 1, (1L << 7) - 1,
+    (1L << 8) - 1, (1L << 9) - 1, (1L << 10) - 1, (1L << 11) - 1,
+    (1L << 12) - 1, (1L << 13) - 1, (1L << 14) - 1, (1L << 15) - 1,
+    (1L << 16) - 1, (1L << 17) - 1, (1L << 18) - 1, (1L << 19) - 1,
+    (1L << 20) - 1, (1L << 21) - 1, (1L << 22) - 1, (1L << 23) - 1,
+    (1L << 24) - 1, (1L << 25) - 1, (1L << 26) - 1, (1L << 27) - 1,
+    (1L << 28) - 1, (1L << 29) - 1, (1L << 30) - 1, 0x7fffffff
+};
+
+const char nbits_table [] = {
+    0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,     // 0 - 15
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     // 16 - 31
+    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,     // 32 - 47
+    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,     // 48 - 63
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     // 64 - 79
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     // 80 - 95
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     // 96 - 111
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     // 112 - 127
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 128 - 143
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 144 - 159
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 160 - 175
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 176 - 191
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 192 - 207
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 208 - 223
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 224 - 239
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8      // 240 - 255
+};
+
+static const unsigned char log2_table [] = {
+    0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x10, 0x11, 0x12, 0x14, 0x15,
+    0x16, 0x18, 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a,
+    0x2c, 0x2d, 0x2e, 0x2f, 0x31, 0x32, 0x33, 0x34, 0x36, 0x37, 0x38, 0x39, 0x3b, 0x3c, 0x3d, 0x3e,
+    0x3f, 0x41, 0x42, 0x43, 0x44, 0x45, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4d, 0x4e, 0x4f, 0x50, 0x51,
+    0x52, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63,
+    0x64, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x74, 0x75,
+    0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85,
+    0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95,
+    0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
+    0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb2,
+    0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc0,
+    0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcb, 0xcc, 0xcd, 0xce,
+    0xcf, 0xd0, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd8, 0xd9, 0xda, 0xdb,
+    0xdc, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe4, 0xe5, 0xe6, 0xe7, 0xe7,
+    0xe8, 0xe9, 0xea, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xee, 0xef, 0xf0, 0xf1, 0xf1, 0xf2, 0xf3, 0xf4,
+    0xf4, 0xf5, 0xf6, 0xf7, 0xf7, 0xf8, 0xf9, 0xf9, 0xfa, 0xfb, 0xfc, 0xfc, 0xfd, 0xfe, 0xff, 0xff
+};
+
+static const unsigned char exp2_table [] = {
+    0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b,
+    0x0b, 0x0c, 0x0d, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15, 0x16, 0x16,
+    0x17, 0x18, 0x19, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1d, 0x1e, 0x1f, 0x20, 0x20, 0x21, 0x22, 0x23,
+    0x24, 0x24, 0x25, 0x26, 0x27, 0x28, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3a, 0x3b, 0x3c, 0x3d,
+    0x3e, 0x3f, 0x40, 0x41, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x48, 0x49, 0x4a, 0x4b,
+    0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a,
+    0x5b, 0x5c, 0x5d, 0x5e, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+    0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+    0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x87, 0x88, 0x89, 0x8a,
+    0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b,
+    0x9c, 0x9d, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad,
+    0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0,
+    0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc8, 0xc9, 0xca, 0xcb, 0xcd, 0xce, 0xcf, 0xd0, 0xd2, 0xd3, 0xd4,
+    0xd6, 0xd7, 0xd8, 0xd9, 0xdb, 0xdc, 0xdd, 0xde, 0xe0, 0xe1, 0xe2, 0xe4, 0xe5, 0xe6, 0xe8, 0xe9,
+    0xea, 0xec, 0xed, 0xee, 0xf0, 0xf1, 0xf2, 0xf4, 0xf5, 0xf6, 0xf8, 0xf9, 0xfa, 0xfc, 0xfd, 0xff
+};
+
+///////////////////////////// executable code ////////////////////////////////
+
+// Read the median log2 values from the specifed metadata structure, convert
+// them back to 32-bit unsigned values and store them. If length is not
+// exactly correct then we flag and return an error.
+
+int read_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    unsigned char *byteptr = (unsigned char *)wpmd->data;
+
+    if (wpmd->byte_length != ((wps->wphdr.flags & MONO_DATA) ? 6 : 12))
+        return FALSE;
+
+    wps->w.c [0].median [0] = wp_exp2s (byteptr [0] + (byteptr [1] << 8));
+    wps->w.c [0].median [1] = wp_exp2s (byteptr [2] + (byteptr [3] << 8));
+    wps->w.c [0].median [2] = wp_exp2s (byteptr [4] + (byteptr [5] << 8));
+
+    if (!(wps->wphdr.flags & MONO_DATA)) {
+        wps->w.c [1].median [0] = wp_exp2s (byteptr [6] + (byteptr [7] << 8));
+        wps->w.c [1].median [1] = wp_exp2s (byteptr [8] + (byteptr [9] << 8));
+        wps->w.c [1].median [2] = wp_exp2s (byteptr [10] + (byteptr [11] << 8));
+    }
+
+    return TRUE;
+}
+
+// Read the hybrid related values from the specifed metadata structure, convert
+// them back to their internal formats and store them. The extended profile
+// stuff is not implemented yet, so return an error if we get more data than
+// we know what to do with.
+
+int read_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    unsigned char *byteptr = (unsigned char *)wpmd->data;
+    unsigned char *endptr = byteptr + wpmd->byte_length;
+
+    if (wps->wphdr.flags & HYBRID_BITRATE) {
+        if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr)
+            return FALSE;
+
+        wps->w.c [0].slow_level = wp_exp2s (byteptr [0] + (byteptr [1] << 8));
+        byteptr += 2;
+
+        if (!(wps->wphdr.flags & MONO_DATA)) {
+            wps->w.c [1].slow_level = wp_exp2s (byteptr [0] + (byteptr [1] << 8));
+            byteptr += 2;
+        }
+    }
+
+    if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr)
+        return FALSE;
+
+    wps->w.bitrate_acc [0] = (int32_t)(byteptr [0] + (byteptr [1] << 8)) << 16;
+    byteptr += 2;
+
+    if (!(wps->wphdr.flags & MONO_DATA)) {
+        wps->w.bitrate_acc [1] = (int32_t)(byteptr [0] + (byteptr [1] << 8)) << 16;
+        byteptr += 2;
+    }
+
+    if (byteptr < endptr) {
+        if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr)
+            return FALSE;
+
+        wps->w.bitrate_delta [0] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8)));
+        byteptr += 2;
+
+        if (!(wps->wphdr.flags & MONO_DATA)) {
+            wps->w.bitrate_delta [1] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8)));
+            byteptr += 2;
+        }
+
+        if (byteptr < endptr)
+            return FALSE;
+    }
+    else
+        wps->w.bitrate_delta [0] = wps->w.bitrate_delta [1] = 0;
+
+    return TRUE;
+}
+
+// This function is called during both encoding and decoding of hybrid data to
+// update the "error_limit" variable which determines the maximum sample error
+// allowed in the main bitstream. In the HYBRID_BITRATE mode (which is the only
+// currently implemented) this is calculated from the slow_level values and the
+// bitrate accumulators. Note that the bitrate accumulators can be changing.
+
+void update_error_limit (WavpackStream *wps)
+{
+    int bitrate_0 = (wps->w.bitrate_acc [0] += wps->w.bitrate_delta [0]) >> 16;
+
+    if (wps->wphdr.flags & MONO_DATA) {
+        if (wps->wphdr.flags & HYBRID_BITRATE) {
+            int slow_log_0 = (wps->w.c [0].slow_level + SLO) >> SLS;
+
+            if (slow_log_0 - bitrate_0 > -0x100)
+                wps->w.c [0].error_limit = wp_exp2s (slow_log_0 - bitrate_0 + 0x100);
+            else
+                wps->w.c [0].error_limit = 0;
+        }
+        else
+            wps->w.c [0].error_limit = wp_exp2s (bitrate_0);
+    }
+    else {
+        int bitrate_1 = (wps->w.bitrate_acc [1] += wps->w.bitrate_delta [1]) >> 16;
+
+        if (wps->wphdr.flags & HYBRID_BITRATE) {
+            int slow_log_0 = (wps->w.c [0].slow_level + SLO) >> SLS;
+            int slow_log_1 = (wps->w.c [1].slow_level + SLO) >> SLS;
+
+            if (wps->wphdr.flags & HYBRID_BALANCE) {
+                int balance = (slow_log_1 - slow_log_0 + bitrate_1 + 1) >> 1;
+
+                if (balance > bitrate_0) {
+                    bitrate_1 = bitrate_0 * 2;
+                    bitrate_0 = 0;
+                }
+                else if (-balance > bitrate_0) {
+                    bitrate_0 = bitrate_0 * 2;
+                    bitrate_1 = 0;
+                }
+                else {
+                    bitrate_1 = bitrate_0 + balance;
+                    bitrate_0 = bitrate_0 - balance;
+                }
+            }
+
+            if (slow_log_0 - bitrate_0 > -0x100)
+                wps->w.c [0].error_limit = wp_exp2s (slow_log_0 - bitrate_0 + 0x100);
+            else
+                wps->w.c [0].error_limit = 0;
+
+            if (slow_log_1 - bitrate_1 > -0x100)
+                wps->w.c [1].error_limit = wp_exp2s (slow_log_1 - bitrate_1 + 0x100);
+            else
+                wps->w.c [1].error_limit = 0;
+        }
+        else {
+            wps->w.c [0].error_limit = wp_exp2s (bitrate_0);
+            wps->w.c [1].error_limit = wp_exp2s (bitrate_1);
+        }
+    }
+}
+
+// The concept of a base 2 logarithm is used in many parts of WavPack. It is
+// a way of sufficiently accurately representing 32-bit signed and unsigned
+// values storing only 16 bits (actually fewer). It is also used in the hybrid
+// mode for quickly comparing the relative magnitude of large values (i.e.
+// division) and providing smooth exponentials using only addition.
+
+// These are not strict logarithms in that they become linear around zero and
+// can therefore represent both zero and negative values. They have 8 bits
+// of precision and in "roundtrip" conversions the total error never exceeds 1
+// part in 225 except for the cases of +/-115 and +/-195 (which error by 1).
+
+
+// This function returns the log2 for the specified 32-bit unsigned value.
+// The maximum value allowed is about 0xff800000 and returns 8447.
+
+int FASTCALL wp_log2 (uint32_t avalue)
+{
+    int dbits;
+
+    if ((avalue += avalue >> 9) < (1 << 8)) {
+        dbits = nbits_table [avalue];
+        return (dbits << 8) + log2_table [(avalue << (9 - dbits)) & 0xff];
+    }
+    else {
+        if (avalue < (1L << 16))
+            dbits = nbits_table [avalue >> 8] + 8;
+        else if (avalue < (1L << 24))
+            dbits = nbits_table [avalue >> 16] + 16;
+        else
+            dbits = nbits_table [avalue >> 24] + 24;
+
+        return (dbits << 8) + log2_table [(avalue >> (dbits - 9)) & 0xff];
+    }
+}
+
+// This function scans a buffer of longs and accumulates the total log2 value
+// of all the samples. This is useful for determining maximum compression
+// because the bitstream storage required for entropy coding is proportional
+// to the base 2 log of the samples. On some platforms there is an assembly
+// version of this.
+
+#if !defined(OPT_ASM_X86) && !defined(OPT_ASM_X64)
+
+uint32_t log2buffer (int32_t *samples, uint32_t num_samples, int limit)
+{
+    uint32_t result = 0, avalue;
+    int dbits;
+
+    while (num_samples--) {
+        avalue = abs (*samples++);
+
+        if ((avalue += avalue >> 9) < (1 << 8)) {
+            dbits = nbits_table [avalue];
+            result += (dbits << 8) + log2_table [(avalue << (9 - dbits)) & 0xff];
+        }
+        else {
+            if (avalue < (1L << 16))
+                dbits = nbits_table [avalue >> 8] + 8;
+            else if (avalue < (1L << 24))
+                dbits = nbits_table [avalue >> 16] + 16;
+            else
+                dbits = nbits_table [avalue >> 24] + 24;
+
+            result += dbits = (dbits << 8) + log2_table [(avalue >> (dbits - 9)) & 0xff];
+
+            if (limit && dbits >= limit)
+                return (uint32_t) -1;
+        }
+    }
+
+    return result;
+}
+
+#endif
+
+// This function returns the log2 for the specified 32-bit signed value.
+// All input values are valid and the return values are in the range of
+// +/- 8192.
+
+int wp_log2s (int32_t value)
+{
+    return (value < 0) ? -wp_log2 (-value) : wp_log2 (value);
+}
+
+// This function returns the original integer represented by the supplied
+// logarithm (at least within the provided accuracy). The log is signed,
+// but since a full 32-bit value is returned this can be used for unsigned
+// conversions as well (i.e. the input range is -8192 to +8447).
+
+int32_t wp_exp2s (int log)
+{
+    uint32_t value;
+
+    if (log < 0)
+        return -wp_exp2s (-log);
+
+    value = exp2_table [log & 0xff] | 0x100;
+
+    if ((log >>= 8) <= 9)
+        return value >> (9 - log);
+    else
+        return value << (log - 9);
+}
+
+// These two functions convert internal weights (which are normally +/-1024)
+// to and from an 8-bit signed character version for storage in metadata. The
+// weights are clipped here in the case that they are outside that range.
+
+signed char store_weight (int weight)
+{
+    if (weight > 1024)
+        weight = 1024;
+    else if (weight < -1024)
+        weight = -1024;
+
+    if (weight > 0)
+        weight -= (weight + 64) >> 7;
+
+    return (weight + 4) >> 3;
+}
+
+int restore_weight (signed char weight)
+{
+    int result;
+
+    if ((result = (int) weight << 3) > 0)
+        result += (result + 64) >> 7;
+
+    return result;
+}
--- a/third_party/wavpack/src/extra1.c
+++ b/third_party/wavpack/src/extra1.c
@ -1,7 +1,7 @@
 ////////////////////////////////////////////////////////////////////////////
 //                           **** WAVPACK ****                            //
 //                  Hybrid Lossless Wavefile Compressor                   //
-//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
 //                          All Rights Reserved.                          //
 //      Distributed under the BSD Software License (see license.txt)      //
 ////////////////////////////////////////////////////////////////////////////
@ -10,28 +10,41 @@

 // This module handles the "extra" mode for mono files.

-#include "wavpack_local.h"
-
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include <math.h>

-//#define USE_OVERHEAD
-#define LOG_LIMIT 6912
-//#define EXTRA_DUMP
+#include "wavpack_local.h"

-#ifdef DEBUG_ALLOC
-#define malloc malloc_db
-#define realloc realloc_db
-#define free free_db
-void *malloc_db (uint32_t size);
-void *realloc_db (void *ptr, uint32_t size);
-void free_db (void *ptr);
-int32_t dump_alloc (void);
+// This flag causes this module to take into account the size of the header
+// (which grows with more decorrelation passes) when making decisions about
+// adding additional passes (as opposed to just considering the resulting
+// magnitude of the residuals). With really long blocks it seems to actually
+// hurt compression (for reasons I cannot explain), but with short blocks it
+// works okay, so we're enabling it for now.
+
+#define USE_OVERHEAD
+
+// If the log2 value of any sample in a buffer being scanned exceeds this value,
+// we abandon that configuration. This prevents us from going down paths that
+// are wildly unstable.
+
+#define LOG_LIMIT 6912
+
+//#define EXTRA_DUMP        // dump generated filter data  error_line()
+
+#ifdef OPT_ASM_X86
+    #define PACK_DECORR_MONO_PASS_CONT pack_decorr_mono_pass_cont_x86
+#elif defined(OPT_ASM_X64) && (defined (_WIN64) || defined(__CYGWIN__) || defined(__MINGW64__))
+    #define PACK_DECORR_MONO_PASS_CONT pack_decorr_mono_pass_cont_x64win
+#elif defined(OPT_ASM_X64)
+    #define PACK_DECORR_MONO_PASS_CONT pack_decorr_mono_pass_cont_x64
 #endif

-//////////////////////////////// local tables ///////////////////////////////
+#ifdef PACK_DECORR_MONO_PASS_CONT
+    void PACK_DECORR_MONO_PASS_CONT (int32_t *out_buffer, int32_t *in_buffer,  struct decorr_pass *dpp, int32_t sample_count);
+#endif

 typedef struct {
    int32_t *sampleptrs [MAX_NTERMS+2];
@ -42,13 +55,22 @@ typedef struct {

 static void decorr_mono_pass (int32_t *in_samples, int32_t *out_samples, uint32_t num_samples, struct decorr_pass *dpp, int dir)
 {
+    int32_t cont_samples = 0;
    int m = 0, i;

+#ifdef PACK_DECORR_MONO_PASS_CONT
+    if (num_samples > 16 && dir > 0) {
+        int32_t pre_samples = (dpp->term > MAX_TERM) ? 2 : dpp->term;
+        cont_samples = num_samples - pre_samples;
+        num_samples = pre_samples;
+    }
+#endif
+
    dpp->sum_A = 0;

    if (dir < 0) {
-        out_samples += (num_samples - 1);
-        in_samples += (num_samples - 1);
+        out_samples += (num_samples + cont_samples - 1);
+        in_samples += (num_samples + cont_samples - 1);
        dir = -1;
    }
    else
@ -57,7 +79,7 @@ static void decorr_mono_pass (int32_t *in_samples, int32_t *out_samples, uint32_
    dpp->weight_A = restore_weight (store_weight (dpp->weight_A));

    for (i = 0; i < 8; ++i)
-        dpp->samples_A [i] = exp2s (log2s (dpp->samples_A [i]));
+        dpp->samples_A [i] = wp_exp2s (wp_log2s (dpp->samples_A [i]));

    if (dpp->term > MAX_TERM) {
        while (num_samples--) {
@ -108,6 +130,11 @@ static void decorr_mono_pass (int32_t *in_samples, int32_t *out_samples, uint32_
            m = (m + 1) & (MAX_TERM - 1);
        }
    }
+
+#ifdef PACK_DECORR_MONO_PASS_CONT
+    if (cont_samples)
+        PACK_DECORR_MONO_PASS_CONT (out_samples, in_samples, dpp, cont_samples);
+#endif
 }

 static void reverse_mono_decorr (struct decorr_pass *dpp)
@ -224,7 +251,7 @@ static void recurse_mono (WavpackContext *wpc, WavpackExtraInfo *info, int depth
        info->dps [depth].term = term;
        info->dps [depth].delta = delta;
        decorr_mono_buffer (samples, outsamples, wps->wphdr.block_samples, info->dps, depth);
-        bits = log2buffer (outsamples, wps->wphdr.block_samples, info->log_limit);
+        bits = LOG2BUFFER (outsamples, wps->wphdr.block_samples, info->log_limit);

        if (bits != (uint32_t) -1)
            bits += log2overhead (info->dps [0].term, depth + 1);
@ -289,7 +316,7 @@ static void delta_mono (WavpackContext *wpc, WavpackExtraInfo *info)
            decorr_mono_buffer (info->sampleptrs [i], info->sampleptrs [i+1], wps->wphdr.block_samples, info->dps, i);
        }

-        bits = log2buffer (info->sampleptrs [i], wps->wphdr.block_samples, info->log_limit);
+        bits = LOG2BUFFER (info->sampleptrs [i], wps->wphdr.block_samples, info->log_limit);

        if (bits != (uint32_t) -1)
            bits += log2overhead (wps->decorr_passes [0].term, i);
@ -314,7 +341,7 @@ static void delta_mono (WavpackContext *wpc, WavpackExtraInfo *info)
            decorr_mono_buffer (info->sampleptrs [i], info->sampleptrs [i+1], wps->wphdr.block_samples, info->dps, i);
        }

-        bits = log2buffer (info->sampleptrs [i], wps->wphdr.block_samples, info->log_limit);
+        bits = LOG2BUFFER (info->sampleptrs [i], wps->wphdr.block_samples, info->log_limit);

        if (bits != (uint32_t) -1)
            bits += log2overhead (wps->decorr_passes [0].term, i);
@ -358,7 +385,7 @@ static void sort_mono (WavpackContext *wpc, WavpackExtraInfo *info)
            for (i = ri; i < info->nterms && wps->decorr_passes [i].term; ++i)
                decorr_mono_buffer (info->sampleptrs [i], info->sampleptrs [i+1], wps->wphdr.block_samples, info->dps, i);

-            bits = log2buffer (info->sampleptrs [i], wps->wphdr.block_samples, info->log_limit);
+            bits = LOG2BUFFER (info->sampleptrs [i], wps->wphdr.block_samples, info->log_limit);

            if (bits != (uint32_t) -1)
                bits += log2overhead (wps->decorr_passes [0].term, i);
@ -412,13 +439,13 @@ static void analyze_mono (WavpackContext *wpc, int32_t *samples, int do_samples)
    for (i = 0; i < info.nterms && info.dps [i].term; ++i)
        decorr_mono_pass (info.sampleptrs [i], info.sampleptrs [i + 1], wps->wphdr.block_samples, info.dps + i, 1);

-    info.best_bits = log2buffer (info.sampleptrs [info.nterms], wps->wphdr.block_samples, 0) * 1;
+    info.best_bits = LOG2BUFFER (info.sampleptrs [info.nterms], wps->wphdr.block_samples, 0) * 1;
    info.best_bits += log2overhead (info.dps [0].term, i);
    memcpy (info.sampleptrs [info.nterms + 1], info.sampleptrs [i], wps->wphdr.block_samples * 4);

    if (wpc->config.extra_flags & EXTRA_BRANCHES)
        recurse_mono (wpc, &info, 0, (int) floor (wps->delta_decay + 0.5),
-            log2buffer (info.sampleptrs [0], wps->wphdr.block_samples, 0));
+            LOG2BUFFER (info.sampleptrs [0], wps->wphdr.block_samples, 0));

    if (wpc->config.extra_flags & EXTRA_SORT_FIRST)
        sort_mono (wpc, &info);
@ -500,6 +527,12 @@ void execute_mono (WavpackContext *wpc, int32_t *samples, int no_history, int do
    uint32_t best_size = (uint32_t) -1, size;
    int log_limit, pi, i;

+#ifdef SKIP_DECORRELATION
+    CLEAR (wps->decorr_passes);
+    wps->num_terms = 0;
+    return;
+#endif
+
    for (i = 0; i < num_samples; ++i)
        if (samples [i])
            break;
@ -571,7 +604,7 @@ void execute_mono (WavpackContext *wpc, int32_t *samples, int no_history, int do
        }

        wpds = &wps->decorr_specs [c];
-        nterms = (int) strlen (wpds->terms);
+        nterms = (int) strlen ((char *) wpds->terms);

        while (1) {
        memcpy (temp_buffer [0], noisy_buffer ? noisy_buffer : samples, buf_size);
@ -598,7 +631,7 @@ void execute_mono (WavpackContext *wpc, int32_t *samples, int no_history, int do
            decorr_mono_pass (temp_buffer [j&1], temp_buffer [~j&1], num_samples, &temp_decorr_pass, 1);
        }

-        size = log2buffer (temp_buffer [j&1], num_samples, log_limit);
+        size = LOG2BUFFER (temp_buffer [j&1], num_samples, log_limit);

        if (size == (uint32_t) -1 && nterms)
            nterms >>= 1;
--- a/third_party/wavpack/src/extra2.c
+++ b/third_party/wavpack/src/extra2.c
@ -1,7 +1,7 @@
 ////////////////////////////////////////////////////////////////////////////
 //                           **** WAVPACK ****                            //
 //                  Hybrid Lossless Wavefile Compressor                   //
-//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
 //               MMX optimizations (c) 2006 Joachim Henke                 //
 //                          All Rights Reserved.                          //
 //      Distributed under the BSD Software License (see license.txt)      //
@ -11,325 +11,74 @@

 // This module handles the "extra" mode for stereo files.

-#include "wavpack_local.h"
-
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include <math.h>

-//#define USE_OVERHEAD
-#define LOG_LIMIT 6912
-//#define EXTRA_DUMP
+#include "wavpack_local.h"

-#ifdef DEBUG_ALLOC
-#define malloc malloc_db
-#define realloc realloc_db
-#define free free_db
-void *malloc_db (uint32_t size);
-void *realloc_db (void *ptr, uint32_t size);
-void free_db (void *ptr);
-int32_t dump_alloc (void);
+// This flag causes this module to take into account the size of the header
+// (which grows with more decorrelation passes) when making decisions about
+// adding additional passes (as opposed to just considering the resulting
+// magnitude of the residuals). With really long blocks it seems to actually
+// hurt compression (for reasons I cannot explain), but with short blocks it
+// works okay, so we're enabling it for now.
+
+#define USE_OVERHEAD
+
+// If the log2 value of any sample in a buffer being scanned exceeds this value,
+// we abandon that configuration. This prevents us from going down paths that
+// are wildly unstable.
+
+#define LOG_LIMIT 6912
+
+//#define EXTRA_DUMP        // dump generated filter data to error_line()
+
+#ifdef OPT_ASM_X86
+    #define PACK_DECORR_STEREO_PASS_CONT pack_decorr_stereo_pass_cont_x86
+    #define PACK_DECORR_STEREO_PASS_CONT_REV pack_decorr_stereo_pass_cont_rev_x86
+    #define PACK_DECORR_STEREO_PASS_CONT_AVAILABLE pack_cpu_has_feature_x86(CPU_FEATURE_MMX)
+#elif defined(OPT_ASM_X64) && (defined (_WIN64) || defined(__CYGWIN__) || defined(__MINGW64__))
+    #define PACK_DECORR_STEREO_PASS_CONT pack_decorr_stereo_pass_cont_x64win
+    #define PACK_DECORR_STEREO_PASS_CONT_REV pack_decorr_stereo_pass_cont_rev_x64win
+    #define PACK_DECORR_STEREO_PASS_CONT_AVAILABLE 1
+#elif defined(OPT_ASM_X64)
+    #define PACK_DECORR_STEREO_PASS_CONT pack_decorr_stereo_pass_cont_x64
+    #define PACK_DECORR_STEREO_PASS_CONT_REV pack_decorr_stereo_pass_cont_rev_x64
+    #define PACK_DECORR_STEREO_PASS_CONT_AVAILABLE 1
 #endif

-//////////////////////////////// local tables ///////////////////////////////
+#ifdef PACK_DECORR_STEREO_PASS_CONT
+    void PACK_DECORR_STEREO_PASS_CONT (struct decorr_pass *dpp, int32_t *in_buffer, int32_t *out_buffer, int32_t sample_count);
+    void PACK_DECORR_STEREO_PASS_CONT_REV (struct decorr_pass *dpp, int32_t *in_buffer, int32_t *out_buffer, int32_t sample_count);
+#endif

 typedef struct {
    int32_t *sampleptrs [MAX_NTERMS+2];
    struct decorr_pass dps [MAX_NTERMS];
-    int nterms, log_limit, gt16bit;
+    int nterms, log_limit;
    uint32_t best_bits;
 } WavpackExtraInfo;

-#ifdef OPT_MMX
-
-static void decorr_stereo_pass (int32_t *in_samples, int32_t *out_samples, int32_t num_samples, struct decorr_pass *dpp, int dir)
-{
-    const __m64
-        delta = _mm_set1_pi32 (dpp->delta),
-        fill = _mm_set1_pi32 (0x7bff),
-        mask = _mm_set1_pi32 (0x7fff),
-        round = _mm_set1_pi32 (512),
-        zero = _mm_set1_pi32 (0);
-    __m64
-        sum_AB = zero,
-        weight_AB = _mm_set_pi32 (restore_weight (store_weight (dpp->weight_B)), restore_weight (store_weight (dpp->weight_A))),
-        left_right, sam_AB, tmp0, tmp1, samples_AB [MAX_TERM];
-    int k, m = 0;
-
-    if (dir < 0) {
-        out_samples += (num_samples - 1) * 2;
-        in_samples += (num_samples - 1) * 2;
-        dir = -2;
-    }
-    else
-        dir = 2;
-
-    for (k = 0; k < MAX_TERM; ++k) {
-        ((int32_t *) samples_AB) [k * 2] = exp2s (log2s (dpp->samples_A [k]));
-        ((int32_t *) samples_AB) [k * 2 + 1] = exp2s (log2s (dpp->samples_B [k]));
-    }
-
-    if (dpp->term > 0) {
-        if (dpp->term == 17) {
-            while (num_samples--) {
-                left_right = *(__m64 *) in_samples;
-                tmp0 = samples_AB [0];
-                sam_AB = _m_paddd (tmp0, tmp0);
-                sam_AB = _m_psubd (sam_AB, samples_AB [1]);
-                samples_AB [0] = left_right;
-                samples_AB [1] = tmp0;
-
-                tmp0 = _m_paddd (sam_AB, sam_AB);
-                tmp1 = _m_pand (sam_AB, mask);
-                tmp0 = _m_psrldi (tmp0, 16);
-                tmp1 = _m_pmaddwd (tmp1, weight_AB);
-                tmp0 = _m_pmaddwd (tmp0, weight_AB);
-                tmp1 = _m_paddd (tmp1, round);
-                tmp0 = _m_pslldi (tmp0, 5);
-                tmp1 = _m_psradi (tmp1, 10);
-                left_right = _m_psubd (left_right, tmp0);
-                left_right = _m_psubd (left_right, tmp1);
-
-                *(__m64 *) out_samples = left_right;
-
-                tmp0 = _m_pxor (sam_AB, left_right);
-                tmp0 = _m_psradi (tmp0, 31);
-                tmp1 = _m_pxor (delta, tmp0);
-                tmp1 = _m_psubd (tmp1, tmp0);
-                sam_AB = _m_pcmpeqd (sam_AB, zero);
-                tmp0 = _m_pcmpeqd (left_right, zero);
-                tmp0 = _m_por (tmp0, sam_AB);
-                tmp0 = _m_pandn (tmp0, tmp1);
-                weight_AB = _m_paddd (weight_AB, tmp0);
-
-                sum_AB = _m_paddd (sum_AB, weight_AB);
-
-                in_samples += dir;
-                out_samples += dir;
-            }
-        }
-        else if (dpp->term == 18) {
-            while (num_samples--) {
-                left_right = *(__m64 *) in_samples;
-                tmp0 = samples_AB [0];
-                sam_AB = _m_psubd (tmp0, samples_AB [1]);
-                sam_AB = _m_psradi (sam_AB, 1);
-                sam_AB = _m_paddd (sam_AB, tmp0);
-                samples_AB [0] = left_right;
-                samples_AB [1] = tmp0;
-
-                tmp0 = _m_paddd (sam_AB, sam_AB);
-                tmp1 = _m_pand (sam_AB, mask);
-                tmp0 = _m_psrldi (tmp0, 16);
-                tmp1 = _m_pmaddwd (tmp1, weight_AB);
-                tmp0 = _m_pmaddwd (tmp0, weight_AB);
-                tmp1 = _m_paddd (tmp1, round);
-                tmp0 = _m_pslldi (tmp0, 5);
-                tmp1 = _m_psradi (tmp1, 10);
-                left_right = _m_psubd (left_right, tmp0);
-                left_right = _m_psubd (left_right, tmp1);
-
-                *(__m64 *) out_samples = left_right;
-
-                tmp0 = _m_pxor (sam_AB, left_right);
-                tmp0 = _m_psradi (tmp0, 31);
-                tmp1 = _m_pxor (delta, tmp0);
-                tmp1 = _m_psubd (tmp1, tmp0);
-                sam_AB = _m_pcmpeqd (sam_AB, zero);
-                tmp0 = _m_pcmpeqd (left_right, zero);
-                tmp0 = _m_por (tmp0, sam_AB);
-                tmp0 = _m_pandn (tmp0, tmp1);
-                weight_AB = _m_paddd (weight_AB, tmp0);
-
-                sum_AB = _m_paddd (sum_AB, weight_AB);
-
-                in_samples += dir;
-                out_samples += dir;
-            }
-        }
-        else {
-            k = dpp->term & (MAX_TERM - 1);
-            while (num_samples--) {
-                left_right = *(__m64 *) in_samples;
-                sam_AB = samples_AB [m];
-                samples_AB [k] = left_right;
-
-                tmp0 = _m_paddd (sam_AB, sam_AB);
-                tmp1 = _m_pand (sam_AB, mask);
-                tmp0 = _m_psrldi (tmp0, 16);
-                tmp1 = _m_pmaddwd (tmp1, weight_AB);
-                tmp0 = _m_pmaddwd (tmp0, weight_AB);
-                tmp1 = _m_paddd (tmp1, round);
-                tmp0 = _m_pslldi (tmp0, 5);
-                tmp1 = _m_psradi (tmp1, 10);
-                left_right = _m_psubd (left_right, tmp0);
-                left_right = _m_psubd (left_right, tmp1);
-
-                *(__m64 *) out_samples = left_right;
-
-                tmp0 = _m_pxor (sam_AB, left_right);
-                tmp0 = _m_psradi (tmp0, 31);
-                tmp1 = _m_pxor (delta, tmp0);
-                tmp1 = _m_psubd (tmp1, tmp0);
-                sam_AB = _m_pcmpeqd (sam_AB, zero);
-                tmp0 = _m_pcmpeqd (left_right, zero);
-                tmp0 = _m_por (tmp0, sam_AB);
-                tmp0 = _m_pandn (tmp0, tmp1);
-                weight_AB = _m_paddd (weight_AB, tmp0);
-
-                sum_AB = _m_paddd (sum_AB, weight_AB);
-
-                in_samples += dir;
-                out_samples += dir;
-                k = (k + 1) & (MAX_TERM - 1);
-                m = (m + 1) & (MAX_TERM - 1);
-            }
-        }
-    }
-    else {
-        if (dpp->term == -1) {
-            while (num_samples--) {
-                left_right = *(__m64 *) in_samples;
-                sam_AB = samples_AB [0];
-                samples_AB [0] = _m_punpckhdq (left_right, sam_AB);
-                sam_AB = _m_punpckldq (sam_AB, left_right);
-
-                tmp0 = _m_paddd (sam_AB, sam_AB);
-                tmp1 = _m_pand (sam_AB, mask);
-                tmp0 = _m_psrldi (tmp0, 16);
-                tmp1 = _m_pmaddwd (tmp1, weight_AB);
-                tmp0 = _m_pmaddwd (tmp0, weight_AB);
-                tmp1 = _m_paddd (tmp1, round);
-                tmp0 = _m_pslldi (tmp0, 5);
-                tmp1 = _m_psradi (tmp1, 10);
-                left_right = _m_psubd (left_right, tmp0);
-                left_right = _m_psubd (left_right, tmp1);
-
-                *(__m64 *) out_samples = left_right;
-
-                tmp0 = _m_pcmpeqd (sam_AB, zero);
-                tmp1 = _m_pcmpeqd (left_right, zero);
-                tmp0 = _m_por (tmp0, tmp1);
-                tmp0 = _m_pandn (tmp0, delta);
-                sam_AB = _m_pxor (sam_AB, left_right);
-                sam_AB = _m_psradi (sam_AB, 31);
-                tmp1 = _m_psubd (fill, sam_AB);
-                weight_AB = _m_pxor (weight_AB, sam_AB);
-                weight_AB = _m_paddd (weight_AB, tmp1);
-                weight_AB = _m_paddsw (weight_AB, tmp0);
-                weight_AB = _m_psubd (weight_AB, tmp1);
-                weight_AB = _m_pxor (weight_AB, sam_AB);
-
-                sum_AB = _m_paddd (sum_AB, weight_AB);
-
-                in_samples += dir;
-                out_samples += dir;
-            }
-        }
-        else if (dpp->term == -2) {
-            while (num_samples--) {
-                left_right = *(__m64 *) in_samples;
-                sam_AB = samples_AB [0];
-                samples_AB [0] = _m_punpckldq (sam_AB, left_right);
-                sam_AB = _m_punpckhdq (left_right, sam_AB);
-
-                tmp0 = _m_paddd (sam_AB, sam_AB);
-                tmp1 = _m_pand (sam_AB, mask);
-                tmp0 = _m_psrldi (tmp0, 16);
-                tmp1 = _m_pmaddwd (tmp1, weight_AB);
-                tmp0 = _m_pmaddwd (tmp0, weight_AB);
-                tmp1 = _m_paddd (tmp1, round);
-                tmp0 = _m_pslldi (tmp0, 5);
-                tmp1 = _m_psradi (tmp1, 10);
-                left_right = _m_psubd (left_right, tmp0);
-                left_right = _m_psubd (left_right, tmp1);
-
-                *(__m64 *) out_samples = left_right;
-
-                tmp0 = _m_pcmpeqd (sam_AB, zero);
-                tmp1 = _m_pcmpeqd (left_right, zero);
-                tmp0 = _m_por (tmp0, tmp1);
-                tmp0 = _m_pandn (tmp0, delta);
-                sam_AB = _m_pxor (sam_AB, left_right);
-                sam_AB = _m_psradi (sam_AB, 31);
-                tmp1 = _m_psubd (fill, sam_AB);
-                weight_AB = _m_pxor (weight_AB, sam_AB);
-                weight_AB = _m_paddd (weight_AB, tmp1);
-                weight_AB = _m_paddsw (weight_AB, tmp0);
-                weight_AB = _m_psubd (weight_AB, tmp1);
-                weight_AB = _m_pxor (weight_AB, sam_AB);
-
-                sum_AB = _m_paddd (sum_AB, weight_AB);
-
-                in_samples += dir;
-                out_samples += dir;
-            }
-        }
-        else if (dpp->term == -3) {
-            while (num_samples--) {
-                left_right = *(__m64 *) in_samples;
-                sam_AB = samples_AB [0];
-                tmp0 = _m_punpckhdq (left_right, left_right);
-                samples_AB [0] = _m_punpckldq (tmp0, left_right);
-
-                tmp0 = _m_paddd (sam_AB, sam_AB);
-                tmp1 = _m_pand (sam_AB, mask);
-                tmp0 = _m_psrldi (tmp0, 16);
-                tmp1 = _m_pmaddwd (tmp1, weight_AB);
-                tmp0 = _m_pmaddwd (tmp0, weight_AB);
-                tmp1 = _m_paddd (tmp1, round);
-                tmp0 = _m_pslldi (tmp0, 5);
-                tmp1 = _m_psradi (tmp1, 10);
-                left_right = _m_psubd (left_right, tmp0);
-                left_right = _m_psubd (left_right, tmp1);
-
-                *(__m64 *) out_samples = left_right;
-
-                tmp0 = _m_pcmpeqd (sam_AB, zero);
-                tmp1 = _m_pcmpeqd (left_right, zero);
-                tmp0 = _m_por (tmp0, tmp1);
-                tmp0 = _m_pandn (tmp0, delta);
-                sam_AB = _m_pxor (sam_AB, left_right);
-                sam_AB = _m_psradi (sam_AB, 31);
-                tmp1 = _m_psubd (fill, sam_AB);
-                weight_AB = _m_pxor (weight_AB, sam_AB);
-                weight_AB = _m_paddd (weight_AB, tmp1);
-                weight_AB = _m_paddsw (weight_AB, tmp0);
-                weight_AB = _m_psubd (weight_AB, tmp1);
-                weight_AB = _m_pxor (weight_AB, sam_AB);
-
-                sum_AB = _m_paddd (sum_AB, weight_AB);
-
-                in_samples += dir;
-                out_samples += dir;
-            }
-        }
-    }
-    dpp->sum_A = ((int32_t *) &sum_AB) [0];
-    dpp->sum_B = ((int32_t *) &sum_AB) [1];
-    dpp->weight_A = ((int32_t *) &weight_AB) [0];
-    dpp->weight_B = ((int32_t *) &weight_AB) [1];
-
-    for (k = 0; k < MAX_TERM; ++k) {
-        dpp->samples_A [k] = ((int32_t *) samples_AB) [m * 2];
-        dpp->samples_B [k] = ((int32_t *) samples_AB) [m * 2 + 1];
-        m = (m + 1) & (MAX_TERM - 1);
-    }
-    _mm_empty ();
-}
-
-#else
-
 static void decorr_stereo_pass (int32_t *in_samples, int32_t *out_samples, int32_t num_samples, struct decorr_pass *dpp, int dir)
 {
+    int32_t cont_samples = 0;
    int m = 0, i;

+#ifdef PACK_DECORR_STEREO_PASS_CONT
+    if (num_samples > 16 && PACK_DECORR_STEREO_PASS_CONT_AVAILABLE) {
+        int32_t pre_samples = (dpp->term < 0 || dpp->term > MAX_TERM) ? 2 : dpp->term;
+        cont_samples = num_samples - pre_samples;
+        num_samples = pre_samples;
+    }
+#endif
+
    dpp->sum_A = dpp->sum_B = 0;

    if (dir < 0) {
-        out_samples += (num_samples - 1) * 2;
-        in_samples += (num_samples - 1) * 2;
+        out_samples += (num_samples + cont_samples - 1) * 2;
+        in_samples += (num_samples + cont_samples - 1) * 2;
        dir = -2;
    }
    else
@ -339,8 +88,8 @@ static void decorr_stereo_pass (int32_t *in_samples, int32_t *out_samples, int32
    dpp->weight_B = restore_weight (store_weight (dpp->weight_B));

    for (i = 0; i < 8; ++i) {
-        dpp->samples_A [i] = exp2s (log2s (dpp->samples_A [i]));
-        dpp->samples_B [i] = exp2s (log2s (dpp->samples_B [i]));
+        dpp->samples_A [i] = wp_exp2s (wp_log2s (dpp->samples_A [i]));
+        dpp->samples_B [i] = wp_exp2s (wp_log2s (dpp->samples_B [i]));
    }

    switch (dpp->term) {
@ -511,184 +260,15 @@ static void decorr_stereo_pass (int32_t *in_samples, int32_t *out_samples, int32

            break;
    }
-}

+#ifdef PACK_DECORR_STEREO_PASS_CONT
+    if (cont_samples) {
+        if (dir < 0)
+            PACK_DECORR_STEREO_PASS_CONT_REV (dpp, in_samples, out_samples, cont_samples);
+        else
+            PACK_DECORR_STEREO_PASS_CONT (dpp, in_samples, out_samples, cont_samples);
+    }
 #endif
-
-static void decorr_stereo_pass_quick (int32_t *in_samples, int32_t *out_samples, int32_t num_samples, struct decorr_pass *dpp, int dir)
-{
-    int m = 0, i;
-
-    if (dir < 0) {
-        out_samples += (num_samples - 1) * 2;
-        in_samples += (num_samples - 1) * 2;
-        dir = -2;
-    }
-    else
-        dir = 2;
-
-    dpp->weight_A = restore_weight (store_weight (dpp->weight_A));
-    dpp->weight_B = restore_weight (store_weight (dpp->weight_B));
-
-    for (i = 0; i < 8; ++i) {
-        dpp->samples_A [i] = exp2s (log2s (dpp->samples_A [i]));
-        dpp->samples_B [i] = exp2s (log2s (dpp->samples_B [i]));
-    }
-
-    switch (dpp->term) {
-
-        case 2:
-            while (num_samples--) {
-                int32_t sam, tmp;
-
-                sam = dpp->samples_A [0];
-                dpp->samples_A [0] = dpp->samples_A [1];
-                out_samples [0] = tmp = (dpp->samples_A [1] = in_samples [0]) - apply_weight_i (dpp->weight_A, sam);
-                update_weight (dpp->weight_A, dpp->delta, sam, tmp);
-
-                sam = dpp->samples_B [0];
-                dpp->samples_B [0] = dpp->samples_B [1];
-                out_samples [1] = tmp = (dpp->samples_B [1] = in_samples [1]) - apply_weight_i (dpp->weight_B, sam);
-                update_weight (dpp->weight_B, dpp->delta, sam, tmp);
-
-                in_samples += dir;
-                out_samples += dir;
-            }
-
-            break;
-
-        case 17:
-            while (num_samples--) {
-                int32_t sam, tmp;
-
-                sam = 2 * dpp->samples_A [0] - dpp->samples_A [1];
-                dpp->samples_A [1] = dpp->samples_A [0];
-                out_samples [0] = tmp = (dpp->samples_A [0] = in_samples [0]) - apply_weight_i (dpp->weight_A, sam);
-                update_weight (dpp->weight_A, dpp->delta, sam, tmp);
-
-                sam = 2 * dpp->samples_B [0] - dpp->samples_B [1];
-                dpp->samples_B [1] = dpp->samples_B [0];
-                out_samples [1] = tmp = (dpp->samples_B [0] = in_samples [1]) - apply_weight_i (dpp->weight_B, sam);
-                update_weight (dpp->weight_B, dpp->delta, sam, tmp);
-
-                in_samples += dir;
-                out_samples += dir;
-            }
-
-            break;
-
-        case 18:
-            while (num_samples--) {
-                int32_t sam, tmp;
-
-                sam = dpp->samples_A [0] + ((dpp->samples_A [0] - dpp->samples_A [1]) >> 1);
-                dpp->samples_A [1] = dpp->samples_A [0];
-                out_samples [0] = tmp = (dpp->samples_A [0] = in_samples [0]) - apply_weight_i (dpp->weight_A, sam);
-                update_weight (dpp->weight_A, dpp->delta, sam, tmp);
-
-                sam = dpp->samples_B [0] + ((dpp->samples_B [0] - dpp->samples_B [1]) >> 1);
-                dpp->samples_B [1] = dpp->samples_B [0];
-                out_samples [1] = tmp = (dpp->samples_B [0] = in_samples [1]) - apply_weight_i (dpp->weight_B, sam);
-                update_weight (dpp->weight_B, dpp->delta, sam, tmp);
-
-                in_samples += dir;
-                out_samples += dir;
-            }
-
-            break;
-
-        default: {
-            int k = dpp->term & (MAX_TERM - 1);
-
-            while (num_samples--) {
-                int32_t sam, tmp;
-
-                sam = dpp->samples_A [m];
-                out_samples [0] = tmp = (dpp->samples_A [k] = in_samples [0]) - apply_weight_i (dpp->weight_A, sam);
-                update_weight (dpp->weight_A, dpp->delta, sam, tmp);
-
-                sam = dpp->samples_B [m];
-                out_samples [1] = tmp = (dpp->samples_B [k] = in_samples [1]) - apply_weight_i (dpp->weight_B, sam);
-                update_weight (dpp->weight_B, dpp->delta, sam, tmp);
-
-                in_samples += dir;
-                out_samples += dir;
-                m = (m + 1) & (MAX_TERM - 1);
-                k = (k + 1) & (MAX_TERM - 1);
-            }
-
-            if (m) {
-                int32_t temp_A [MAX_TERM], temp_B [MAX_TERM];
-                int k;
-
-                memcpy (temp_A, dpp->samples_A, sizeof (dpp->samples_A));
-                memcpy (temp_B, dpp->samples_B, sizeof (dpp->samples_B));
-
-                for (k = 0; k < MAX_TERM; k++) {
-                    dpp->samples_A [k] = temp_A [m];
-                    dpp->samples_B [k] = temp_B [m];
-                    m = (m + 1) & (MAX_TERM - 1);
-                }
-            }
-
-            break;
-        }
-
-        case -1:
-            while (num_samples--) {
-                int32_t sam_A, sam_B, tmp;
-
-                sam_A = dpp->samples_A [0];
-                out_samples [0] = tmp = (sam_B = in_samples [0]) - apply_weight_i (dpp->weight_A, sam_A);
-                update_weight_clip (dpp->weight_A, dpp->delta, sam_A, tmp);
-
-                out_samples [1] = tmp = (dpp->samples_A [0] = in_samples [1]) - apply_weight_i (dpp->weight_B, sam_B);
-                update_weight_clip (dpp->weight_B, dpp->delta, sam_B, tmp);
-
-                in_samples += dir;
-                out_samples += dir;
-            }
-
-            break;
-
-        case -2:
-            while (num_samples--) {
-                int32_t sam_A, sam_B, tmp;
-
-                sam_B = dpp->samples_B [0];
-                out_samples [1] = tmp = (sam_A = in_samples [1]) - apply_weight_i (dpp->weight_B, sam_B);
-                update_weight_clip (dpp->weight_B, dpp->delta, sam_B, tmp);
-
-                out_samples [0] = tmp = (dpp->samples_B [0] = in_samples [0]) - apply_weight_i (dpp->weight_A, sam_A);
-                update_weight_clip (dpp->weight_A, dpp->delta, sam_A, tmp);
-
-                in_samples += dir;
-                out_samples += dir;
-            }
-
-            break;
-
-        case -3:
-            while (num_samples--) {
-                int32_t sam_A, sam_B, tmp;
-
-                sam_A = dpp->samples_A [0];
-                sam_B = dpp->samples_B [0];
-
-                dpp->samples_A [0] = tmp = in_samples [1];
-                out_samples [1] = tmp -= apply_weight_i (dpp->weight_B, sam_B);
-                update_weight_clip (dpp->weight_B, dpp->delta, sam_B, tmp);
-
-                dpp->samples_B [0] = tmp = in_samples [0];
-                out_samples [0] = tmp -= apply_weight_i (dpp->weight_A, sam_A);
-                update_weight_clip (dpp->weight_A, dpp->delta, sam_A, tmp);
-
-                in_samples += dir;
-                out_samples += dir;
-            }
-
-            break;
-    }
 }

 static void reverse_decorr (struct decorr_pass *dpp)
@ -788,10 +368,7 @@ static void decorr_stereo_buffer (WavpackExtraInfo *info, int32_t *samples, int3
 //    if (memcmp (dppi, &dp, sizeof (dp)))
 //      error_line ("decorr_passes don't match, delta = %d", delta);

-    if (info->gt16bit)
-        decorr_stereo_pass (samples, outsamples, num_samples, &dp, 1);
-    else
-        decorr_stereo_pass_quick (samples, outsamples, num_samples, &dp, 1);
+    decorr_stereo_pass (samples, outsamples, num_samples, &dp, 1);
 }

 static int log2overhead (int first_term, int num_terms)
@ -837,7 +414,7 @@ static void recurse_stereo (WavpackContext *wpc, WavpackExtraInfo *info, int dep
        info->dps [depth].term = term;
        info->dps [depth].delta = delta;
        decorr_stereo_buffer (info, samples, outsamples, wps->wphdr.block_samples, depth);
-        bits = log2buffer (outsamples, wps->wphdr.block_samples * 2, info->log_limit);
+        bits = LOG2BUFFER (outsamples, wps->wphdr.block_samples * 2, info->log_limit);

        if (bits != (uint32_t) -1)
            bits += log2overhead (info->dps [0].term, depth + 1);
@ -903,7 +480,7 @@ static void delta_stereo (WavpackContext *wpc, WavpackExtraInfo *info)
            decorr_stereo_buffer (info, info->sampleptrs [i], info->sampleptrs [i+1], wps->wphdr.block_samples, i);
        }

-        bits = log2buffer (info->sampleptrs [i], wps->wphdr.block_samples * 2, info->log_limit);
+        bits = LOG2BUFFER (info->sampleptrs [i], wps->wphdr.block_samples * 2, info->log_limit);

        if (bits != (uint32_t) -1)
            bits += log2overhead (wps->decorr_passes [0].term, i);
@ -928,7 +505,7 @@ static void delta_stereo (WavpackContext *wpc, WavpackExtraInfo *info)
            decorr_stereo_buffer (info, info->sampleptrs [i], info->sampleptrs [i+1], wps->wphdr.block_samples, i);
        }

-        bits = log2buffer (info->sampleptrs [i], wps->wphdr.block_samples * 2, info->log_limit);
+        bits = LOG2BUFFER (info->sampleptrs [i], wps->wphdr.block_samples * 2, info->log_limit);

        if (bits != (uint32_t) -1)
            bits += log2overhead (wps->decorr_passes [0].term, i);
@ -972,7 +549,7 @@ static void sort_stereo (WavpackContext *wpc, WavpackExtraInfo *info)
            for (i = ri; i < info->nterms && wps->decorr_passes [i].term; ++i)
                decorr_stereo_buffer (info, info->sampleptrs [i], info->sampleptrs [i+1], wps->wphdr.block_samples, i);

-            bits = log2buffer (info->sampleptrs [i], wps->wphdr.block_samples * 2, info->log_limit);
+            bits = LOG2BUFFER (info->sampleptrs [i], wps->wphdr.block_samples * 2, info->log_limit);

            if (bits != (uint32_t) -1)
                bits += log2overhead (wps->decorr_passes [0].term, i);
@ -1001,8 +578,6 @@ static void analyze_stereo (WavpackContext *wpc, int32_t *samples, int do_sample
    WavpackExtraInfo info;
    int i;

-    info.gt16bit = ((wps->wphdr.flags & MAG_MASK) >> MAG_LSB) >= 16;
-
 #ifdef LOG_LIMIT
    info.log_limit = (((wps->wphdr.flags & MAG_MASK) >> MAG_LSB) + 4) * 256;

@ -1026,18 +601,15 @@ static void analyze_stereo (WavpackContext *wpc, int32_t *samples, int do_sample
    memcpy (info.sampleptrs [0], samples, wps->wphdr.block_samples * 8);

    for (i = 0; i < info.nterms && info.dps [i].term; ++i)
-        if (info.gt16bit)
-            decorr_stereo_pass (info.sampleptrs [i], info.sampleptrs [i + 1], wps->wphdr.block_samples, info.dps + i, 1);
-        else
-            decorr_stereo_pass_quick (info.sampleptrs [i], info.sampleptrs [i + 1], wps->wphdr.block_samples, info.dps + i, 1);
+        decorr_stereo_pass (info.sampleptrs [i], info.sampleptrs [i + 1], wps->wphdr.block_samples, info.dps + i, 1);

-    info.best_bits = log2buffer (info.sampleptrs [info.nterms], wps->wphdr.block_samples * 2, 0) * 1;
+    info.best_bits = LOG2BUFFER (info.sampleptrs [info.nterms], wps->wphdr.block_samples * 2, 0) * 1;
    info.best_bits += log2overhead (info.dps [0].term, i);
    memcpy (info.sampleptrs [info.nterms + 1], info.sampleptrs [i], wps->wphdr.block_samples * 8);

    if (wpc->config.extra_flags & EXTRA_BRANCHES)
        recurse_stereo (wpc, &info, 0, (int) floor (wps->delta_decay + 0.5),
-            log2buffer (info.sampleptrs [0], wps->wphdr.block_samples * 2, 0));
+            LOG2BUFFER (info.sampleptrs [0], wps->wphdr.block_samples * 2, 0));

    if (wpc->config.extra_flags & EXTRA_SORT_FIRST)
        sort_stereo (wpc, &info);
@ -1137,6 +709,12 @@ void execute_stereo (WavpackContext *wpc, int32_t *samples, int no_history, int
    uint32_t best_size = (uint32_t) -1, size;
    int log_limit, force_js = 0, force_ts = 0, pi, i;

+#ifdef SKIP_DECORRELATION
+    CLEAR (wps->decorr_passes);
+    wps->num_terms = 0;
+    return;
+#endif
+
    for (i = 0; i < num_samples * 2; ++i)
        if (samples [i])
            break;
@ -1216,7 +794,7 @@ void execute_stereo (WavpackContext *wpc, int32_t *samples, int no_history, int
        }

        wpds = &wps->decorr_specs [c];
-        nterms = (int) strlen (wpds->terms);
+        nterms = (int) strlen ((char *) wpds->terms);

        while (1) {
            if (force_js || (wpds->joint_stereo && !force_ts)) {
@ -1258,14 +836,10 @@ void execute_stereo (WavpackContext *wpc, int32_t *samples, int no_history, int
                    reverse_decorr (&temp_decorr_pass);

                memcpy (save_decorr_passes + j, &temp_decorr_pass, sizeof (struct decorr_pass));
-
-                if (((wps->wphdr.flags & MAG_MASK) >> MAG_LSB) >= 16)
-                    decorr_stereo_pass (temp_buffer [j&1], temp_buffer [~j&1], num_samples, &temp_decorr_pass, 1);
-                else
-                    decorr_stereo_pass_quick (temp_buffer [j&1], temp_buffer [~j&1], num_samples, &temp_decorr_pass, 1);
+                decorr_stereo_pass (temp_buffer [j&1], temp_buffer [~j&1], num_samples, &temp_decorr_pass, 1);
            }

-            size = log2buffer (temp_buffer [j&1], num_samples * 2, log_limit);
+            size = LOG2BUFFER (temp_buffer [j&1], num_samples * 2, log_limit);

            if (size == (uint32_t) -1 && nterms)
                nterms >>= 1;
--- a/third_party/wavpack/src/float.c
+++ b/third_party/wavpack/src/float.c
@ -1,371 +0,0 @@
-////////////////////////////////////////////////////////////////////////////
-//                           **** WAVPACK ****                            //
-//                  Hybrid Lossless Wavefile Compressor                   //
-//              Copyright (c) 1998 - 2006 Conifer Software.               //
-//                          All Rights Reserved.                          //
-//      Distributed under the BSD Software License (see license.txt)      //
-////////////////////////////////////////////////////////////////////////////
-
-// float.c
-
-#include "wavpack_local.h"
-
-#include <stdlib.h>
-
-#ifdef DEBUG_ALLOC
-#define malloc malloc_db
-#define realloc realloc_db
-#define free free_db
-void *malloc_db (uint32_t size);
-void *realloc_db (void *ptr, uint32_t size);
-void free_db (void *ptr);
-int32_t dump_alloc (void);
-#endif
-
-#ifndef NO_PACK
-
-void write_float_info (WavpackStream *wps, WavpackMetadata *wpmd)
-{
-    char *byteptr;
-
-    byteptr = wpmd->data = malloc (4);
-    wpmd->id = ID_FLOAT_INFO;
-    *byteptr++ = wps->float_flags;
-    *byteptr++ = wps->float_shift;
-    *byteptr++ = wps->float_max_exp;
-    *byteptr++ = wps->float_norm_exp;
-    wpmd->byte_length = (int32_t)(byteptr - (char *) wpmd->data);
-}
-
-int scan_float_data (WavpackStream *wps, f32 *values, int32_t num_values)
-{
-    int32_t shifted_ones = 0, shifted_zeros = 0, shifted_both = 0;
-    int32_t false_zeros = 0, neg_zeros = 0;
-    uint32_t ordata = 0, crc = 0xffffffff;
-    int32_t count, value, shift_count;
-    int max_exp = 0;
-    f32 *dp;
-
-    wps->float_shift = wps->float_flags = 0;
-
-    for (dp = values, count = num_values; count--; dp++) {
-        crc = crc * 27 + get_mantissa (*dp) * 9 + get_exponent (*dp) * 3 + get_sign (*dp);
-
-        if (get_exponent (*dp) > max_exp && get_exponent (*dp) < 255)
-            max_exp = get_exponent (*dp);
-    }
-
-    wps->crc_x = crc;
-
-    for (dp = values, count = num_values; count--; dp++) {
-        if (get_exponent (*dp) == 255) {
-            wps->float_flags |= FLOAT_EXCEPTIONS;
-            value = 0x1000000;
-            shift_count = 0;
-        }
-        else if (get_exponent (*dp)) {
-            shift_count = max_exp - get_exponent (*dp);
-            value = 0x800000 + get_mantissa (*dp);
-        }
-        else {
-            shift_count = max_exp ? max_exp - 1 : 0;
-            value = get_mantissa (*dp);
-
-//          if (get_mantissa (*dp))
-//              denormals++;
-        }
-
-        if (shift_count < 25)
-            value >>= shift_count;
-        else
-            value = 0;
-
-        if (!value) {
-            if (get_exponent (*dp) || get_mantissa (*dp))
-                ++false_zeros;
-            else if (get_sign (*dp))
-                ++neg_zeros;
-        }
-        else if (shift_count) {
-            int32_t mask = (1 << shift_count) - 1;
-
-            if (!(get_mantissa (*dp) & mask))
-                shifted_zeros++;
-            else if ((get_mantissa (*dp) & mask) == mask)
-                shifted_ones++;
-            else
-                shifted_both++;
-        }
-
-        ordata |= value;
-        * (int32_t *) dp = (get_sign (*dp)) ? -value : value;
-    }
-
-    wps->float_max_exp = max_exp;
-
-    if (shifted_both)
-        wps->float_flags |= FLOAT_SHIFT_SENT;
-    else if (shifted_ones && !shifted_zeros)
-        wps->float_flags |= FLOAT_SHIFT_ONES;
-    else if (shifted_ones && shifted_zeros)
-        wps->float_flags |= FLOAT_SHIFT_SAME;
-    else if (ordata && !(ordata & 1)) {
-        while (!(ordata & 1)) {
-            wps->float_shift++;
-            ordata >>= 1;
-        }
-
-        for (dp = values, count = num_values; count--; dp++)
-            * (int32_t *) dp >>= wps->float_shift;
-    }
-
-    wps->wphdr.flags &= ~MAG_MASK;
-
-    while (ordata) {
-        wps->wphdr.flags += 1 << MAG_LSB;
-        ordata >>= 1;
-    }
-
-    if (false_zeros || neg_zeros)
-        wps->float_flags |= FLOAT_ZEROS_SENT;
-
-    if (neg_zeros)
-        wps->float_flags |= FLOAT_NEG_ZEROS;
-
-//  error_line ("samples = %d, max exp = %d, pre-shift = %d, denormals = %d",
-//      num_values, max_exp, wps->float_shift, denormals);
-//  if (wps->float_flags & FLOAT_EXCEPTIONS)
-//      error_line ("exceptions!");
-//  error_line ("shifted ones/zeros/both = %d/%d/%d, true/neg/false zeros = %d/%d/%d",
-//      shifted_ones, shifted_zeros, shifted_both, true_zeros, neg_zeros, false_zeros);
-
-    return wps->float_flags & (FLOAT_EXCEPTIONS | FLOAT_ZEROS_SENT | FLOAT_SHIFT_SENT | FLOAT_SHIFT_SAME);
-}
-
-void send_float_data (WavpackStream *wps, f32 *values, int32_t num_values)
-{
-    int max_exp = wps->float_max_exp;
-    int32_t count, value, shift_count;
-    f32 *dp;
-
-    for (dp = values, count = num_values; count--; dp++) {
-        if (get_exponent (*dp) == 255) {
-            if (get_mantissa (*dp)) {
-                putbit_1 (&wps->wvxbits);
-                putbits (get_mantissa (*dp), 23, &wps->wvxbits);
-            }
-            else {
-                putbit_0 (&wps->wvxbits);
-            }
-
-            value = 0x1000000;
-            shift_count = 0;
-        }
-        else if (get_exponent (*dp)) {
-            shift_count = max_exp - get_exponent (*dp);
-            value = 0x800000 + get_mantissa (*dp);
-        }
-        else {
-            shift_count = max_exp ? max_exp - 1 : 0;
-            value = get_mantissa (*dp);
-        }
-
-        if (shift_count < 25)
-            value >>= shift_count;
-        else
-            value = 0;
-
-        if (!value) {
-            if (wps->float_flags & FLOAT_ZEROS_SENT) {
-                if (get_exponent (*dp) || get_mantissa (*dp)) {
-                    putbit_1 (&wps->wvxbits);
-                    putbits (get_mantissa (*dp), 23, &wps->wvxbits);
-
-                    if (max_exp >= 25) {
-                        putbits (get_exponent (*dp), 8, &wps->wvxbits);
-                    }
-
-                    putbit (get_sign (*dp), &wps->wvxbits);
-                }
-                else {
-                    putbit_0 (&wps->wvxbits);
-
-                    if (wps->float_flags & FLOAT_NEG_ZEROS)
-                        putbit (get_sign (*dp), &wps->wvxbits);
-                }
-            }
-        }
-        else if (shift_count) {
-            if (wps->float_flags & FLOAT_SHIFT_SENT) {
-                int32_t data = get_mantissa (*dp) & ((1 << shift_count) - 1);
-                putbits (data, shift_count, &wps->wvxbits);
-            }
-            else if (wps->float_flags & FLOAT_SHIFT_SAME) {
-                putbit (get_mantissa (*dp) & 1, &wps->wvxbits);
-            }
-        }
-    }
-}
-
-#endif
-
-#if !defined(NO_UNPACK) || defined(INFO_ONLY)
-
-int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd)
-{
-    int bytecnt = wpmd->byte_length;
-    char *byteptr = wpmd->data;
-
-    if (bytecnt != 4)
-        return FALSE;
-
-    wps->float_flags = *byteptr++;
-    wps->float_shift = *byteptr++;
-    wps->float_max_exp = *byteptr++;
-    wps->float_norm_exp = *byteptr;
-    return TRUE;
-}
-
-#endif
-
-#ifndef NO_UNPACK
-
-static void float_values_nowvx (WavpackStream *wps, int32_t *values, int32_t num_values);
-
-void float_values (WavpackStream *wps, int32_t *values, int32_t num_values)
-{
-    uint32_t crc = wps->crc_x;
-
-    if (!bs_is_open (&wps->wvxbits)) {
-        float_values_nowvx (wps, values, num_values);
-        return;
-    }
-
-    while (num_values--) {
-        int shift_count = 0, exp = wps->float_max_exp;
-        f32 outval = 0;
-        uint32_t temp;
-
-        if (*values == 0) {
-            if (wps->float_flags & FLOAT_ZEROS_SENT) {
-                if (getbit (&wps->wvxbits)) {
-                    getbits (&temp, 23, &wps->wvxbits);
-                    set_mantissa (outval, temp);
-
-                    if (exp >= 25) {
-                        getbits (&temp, 8, &wps->wvxbits);
-                        set_exponent (outval, temp);
-                    }
-
-                    set_sign (outval, getbit (&wps->wvxbits));
-                }
-                else if (wps->float_flags & FLOAT_NEG_ZEROS)
-                    set_sign (outval, getbit (&wps->wvxbits));
-            }
-        }
-        else {
-            *values <<= wps->float_shift;
-
-            if (*values < 0) {
-                *values = -*values;
-                set_sign (outval, 1);
-            }
-
-            if (*values == 0x1000000) {
-                if (getbit (&wps->wvxbits)) {
-                    getbits (&temp, 23, &wps->wvxbits);
-                    set_mantissa (outval, temp);
-                }
-
-                set_exponent (outval, 255);
-            }
-            else {
-                if (exp)
-                    while (!(*values & 0x800000) && --exp) {
-                        shift_count++;
-                        *values <<= 1;
-                    }
-
-                if (shift_count) {
-                    if ((wps->float_flags & FLOAT_SHIFT_ONES) ||
-                        ((wps->float_flags & FLOAT_SHIFT_SAME) && getbit (&wps->wvxbits)))
-                            *values |= ((1 << shift_count) - 1);
-                    else if (wps->float_flags & FLOAT_SHIFT_SENT) {
-                        getbits (&temp, shift_count, &wps->wvxbits);
-                        *values |= temp & ((1 << shift_count) - 1);
-                    }
-                }
-
-                set_mantissa (outval, *values);
-                set_exponent (outval, exp);
-            }
-        }
-
-        crc = crc * 27 + get_mantissa (outval) * 9 + get_exponent (outval) * 3 + get_sign (outval);
-        * (f32 *) values++ = outval;
-    }
-
-    wps->crc_x = crc;
-}
-
-static void float_values_nowvx (WavpackStream *wps, int32_t *values, int32_t num_values)
-{
-    while (num_values--) {
-        int shift_count = 0, exp = wps->float_max_exp;
-        f32 outval = 0;
-
-        if (*values) {
-            *values <<= wps->float_shift;
-
-            if (*values < 0) {
-                *values = -*values;
-                set_sign (outval, 1);
-            }
-
-            if (*values >= 0x1000000) {
-                while (*values & 0xf000000) {
-                    *values >>= 1;
-                    ++exp;
-                }
-            }
-            else if (exp) {
-                while (!(*values & 0x800000) && --exp) {
-                    shift_count++;
-                    *values <<= 1;
-                }
-
-                if (shift_count && (wps->float_flags & FLOAT_SHIFT_ONES))
-                    *values |= ((1 << shift_count) - 1);
-            }
-
-            set_mantissa (outval, *values);
-            set_exponent (outval, exp);
-        }
-
-        * (f32 *) values++ = outval;
-    }
-}
-
-#endif
-
-void WavpackFloatNormalize (int32_t *values, int32_t num_values, int delta_exp)
-{
-    f32 *fvalues = (f32 *) values;
-    int exp;
-
-    if (!delta_exp)
-        return;
-
-    while (num_values--) {
-        if ((exp = get_exponent (*fvalues)) == 0 || exp + delta_exp <= 0)
-            *fvalues = 0;
-        else if (exp == 255 || (exp += delta_exp) >= 255) {
-            set_exponent (*fvalues, 255);
-            set_mantissa (*fvalues, 0);
-        }
-        else
-            set_exponent (*fvalues, exp);
-
-        fvalues++;
-    }
-}
--- a/third_party/wavpack/src/libwavpack.vcproj
+++ b/third_party/wavpack/src/libwavpack.vcproj
@ -0,0 +1,576 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9.00"
+	Name="libwavpack"
+	ProjectGUID="{5CCCB9CF-0384-458F-BA08-72B73866840F}"
+	RootNamespace="libwavpack"
+	Keyword="Win32Proj"
+	TargetFrameworkVersion="131072"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+		<DefaultToolFile
+			FileName="masm.rules"
+		/>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="4"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="MASM"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="MASM64"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				PreprocessorDefinitions="WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;ENABLE_DSD"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="1"
+				UsePrecompiledHeader="0"
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="false"
+				DebugInformationFormat="4"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="MASM"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="MASM64"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				PreprocessorDefinitions="WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;ENABLE_DSD"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="1"
+				UsePrecompiledHeader="0"
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="false"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="4"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="MASM"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="MASM64"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				InlineFunctionExpansion="2"
+				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				OmitFramePointers="true"
+				PreprocessorDefinitions="WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;ENABLE_DSD;OPT_ASM_X86"
+				StringPooling="true"
+				ExceptionHandling="0"
+				RuntimeLibrary="0"
+				BufferSecurityCheck="false"
+				EnableFunctionLevelLinking="true"
+				DisableLanguageExtensions="false"
+				RuntimeTypeInfo="false"
+				UsePrecompiledHeader="0"
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="false"
+				DebugInformationFormat="0"
+				CompileAs="0"
+				OmitDefaultLibName="true"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				IgnoreAllDefaultLibraries="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|x64"
+			OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="MASM"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="MASM64"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				InlineFunctionExpansion="2"
+				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				OmitFramePointers="true"
+				PreprocessorDefinitions="WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;ENABLE_DSD;OPT_ASM_X64"
+				StringPooling="true"
+				ExceptionHandling="0"
+				RuntimeLibrary="0"
+				BufferSecurityCheck="false"
+				EnableFunctionLevelLinking="true"
+				DisableLanguageExtensions="false"
+				RuntimeTypeInfo="false"
+				UsePrecompiledHeader="0"
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="false"
+				DebugInformationFormat="0"
+				CompileAs="0"
+				OmitDefaultLibName="true"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				IgnoreAllDefaultLibraries="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+			<File
+				RelativePath=".\decorr_tables.h"
+				>
+			</File>
+			<File
+				RelativePath=".\unpack3.h"
+				>
+			</File>
+			<File
+				RelativePath=".\wavpack_local.h"
+				>
+			</File>
+			<File
+				RelativePath=".\wavpack_version.h"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+			>
+		</Filter>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath=".\common_utils.c"
+				>
+			</File>
+			<File
+				RelativePath=".\decorr_utils.c"
+				>
+			</File>
+			<File
+				RelativePath=".\entropy_utils.c"
+				>
+			</File>
+			<File
+				RelativePath=".\extra1.c"
+				>
+			</File>
+			<File
+				RelativePath=".\extra2.c"
+				>
+			</File>
+			<File
+				RelativePath=".\open_utils.c"
+				>
+			</File>
+			<File
+				RelativePath=".\open_filename.c"
+				>
+			</File>
+			<File
+				RelativePath=".\open_legacy.c"
+				>
+			</File>
+			<File
+				RelativePath=".\open_raw.c"
+				>
+			</File>
+			<File
+				RelativePath=".\pack.c"
+				>
+			</File>
+			<File
+				RelativePath=".\pack_dns.c"
+				>
+			</File>
+			<File
+				RelativePath=".\pack_floats.c"
+				>
+			</File>
+			<File
+				RelativePath=".\pack_utils.c"
+				>
+			</File>
+			<File
+				RelativePath=".\pack_dsd.c"
+				>
+			</File>
+			<File
+				RelativePath=".\pack_x64.asm"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="MASM64"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="MASM64"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\pack_x86.asm"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="MASM"
+						UseSafeExceptionHandlers="true"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="MASM"
+						UseSafeExceptionHandlers="true"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="MASM64"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="MASM64"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\read_words.c"
+				>
+			</File>
+			<File
+				RelativePath=".\tag_utils.c"
+				>
+			</File>
+			<File
+				RelativePath=".\tags.c"
+				>
+			</File>
+			<File
+				RelativePath=".\unpack.c"
+				>
+			</File>
+			<File
+				RelativePath=".\unpack3.c"
+				>
+			</File>
+			<File
+				RelativePath=".\unpack3_open.c"
+				>
+			</File>
+			<File
+				RelativePath=".\unpack3_seek.c"
+				>
+			</File>
+			<File
+				RelativePath=".\unpack_floats.c"
+				>
+			</File>
+			<File
+				RelativePath=".\unpack_seek.c"
+				>
+			</File>
+			<File
+				RelativePath=".\unpack_utils.c"
+				>
+			</File>
+			<File
+				RelativePath=".\unpack_dsd.c"
+				>
+			</File>
+			<File
+				RelativePath=".\unpack_x64.asm"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="MASM64"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="MASM64"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\unpack_x86.asm"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="MASM"
+						UseSafeExceptionHandlers="true"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="MASM"
+						UseSafeExceptionHandlers="true"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="MASM64"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="MASM64"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\write_words.c"
+				>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
--- a/third_party/wavpack/src/metadata.c
+++ b/third_party/wavpack/src/metadata.c
@ -1,313 +0,0 @@
-////////////////////////////////////////////////////////////////////////////
-//                           **** WAVPACK ****                            //
-//                  Hybrid Lossless Wavefile Compressor                   //
-//              Copyright (c) 1998 - 2006 Conifer Software.               //
-//                          All Rights Reserved.                          //
-//      Distributed under the BSD Software License (see license.txt)      //
-////////////////////////////////////////////////////////////////////////////
-
-// metadata.c
-
-// This module handles the metadata structure introduced in WavPack 4.0
-
-#include "wavpack_local.h"
-
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef DEBUG_ALLOC
-#define malloc malloc_db
-#define realloc realloc_db
-#define free free_db
-void *malloc_db (uint32_t size);
-void *realloc_db (void *ptr, uint32_t size);
-void free_db (void *ptr);
-int32_t dump_alloc (void);
-#endif
-
-#if !defined(NO_UNPACK) || defined(INFO_ONLY)
-
-int read_metadata_buff (WavpackMetadata *wpmd, unsigned char *blockbuff, unsigned char **buffptr)
-{
-    WavpackHeader *wphdr = (WavpackHeader *) blockbuff;
-    unsigned char *buffend = blockbuff + wphdr->ckSize + 8;
-
-    if (buffend - *buffptr < 2)
-        return FALSE;
-
-    wpmd->id = *(*buffptr)++;
-    wpmd->byte_length = *(*buffptr)++ << 1;
-
-    if (wpmd->id & ID_LARGE) {
-        wpmd->id &= ~ID_LARGE;
-
-        if (buffend - *buffptr < 2)
-            return FALSE;
-
-        wpmd->byte_length += *(*buffptr)++ << 9;
-        wpmd->byte_length += *(*buffptr)++ << 17;
-    }
-
-    if (wpmd->id & ID_ODD_SIZE) {
-        wpmd->id &= ~ID_ODD_SIZE;
-        wpmd->byte_length--;
-    }
-
-    if (wpmd->byte_length) {
-        if (buffend - *buffptr < wpmd->byte_length + (wpmd->byte_length & 1)) {
-            wpmd->data = NULL;
-            return FALSE;
-        }
-
-        wpmd->data = *buffptr;
-        (*buffptr) += wpmd->byte_length + (wpmd->byte_length & 1);
-    }
-    else
-        wpmd->data = NULL;
-
-    return TRUE;
-}
-
-int process_metadata (WavpackContext *wpc, WavpackMetadata *wpmd)
-{
-    WavpackStream *wps = wpc->streams [wpc->current_stream];
-
-    switch (wpmd->id) {
-        case ID_DUMMY:
-            return TRUE;
-
-        case ID_DECORR_TERMS:
-            return read_decorr_terms (wps, wpmd);
-
-        case ID_DECORR_WEIGHTS:
-            return read_decorr_weights (wps, wpmd);
-
-        case ID_DECORR_SAMPLES:
-            return read_decorr_samples (wps, wpmd);
-
-        case ID_ENTROPY_VARS:
-            return read_entropy_vars (wps, wpmd);
-
-        case ID_HYBRID_PROFILE:
-            return read_hybrid_profile (wps, wpmd);
-
-        case ID_SHAPING_WEIGHTS:
-            return read_shaping_info (wps, wpmd);
-
-        case ID_FLOAT_INFO:
-            return read_float_info (wps, wpmd);
-
-        case ID_INT32_INFO:
-            return read_int32_info (wps, wpmd);
-
-        case ID_CHANNEL_INFO:
-            return read_channel_info (wpc, wpmd);
-
-        case ID_CONFIG_BLOCK:
-            return read_config_info (wpc, wpmd);
-
-        case ID_SAMPLE_RATE:
-            return read_sample_rate (wpc, wpmd);
-
-        case ID_WV_BITSTREAM:
-            return init_wv_bitstream (wps, wpmd);
-
-        case ID_WVC_BITSTREAM:
-            return init_wvc_bitstream (wps, wpmd);
-
-        case ID_WVX_BITSTREAM:
-            return init_wvx_bitstream (wps, wpmd);
-
-        case ID_RIFF_HEADER: case ID_RIFF_TRAILER:
-            return read_wrapper_data (wpc, wpmd);
-
-        case ID_MD5_CHECKSUM:
-            if (wpmd->byte_length == 16) {
-                memcpy (wpc->config.md5_checksum, wpmd->data, 16);
-                wpc->config.flags |= CONFIG_MD5_CHECKSUM;
-                wpc->config.md5_read = 1;
-            }
-
-            return TRUE;
-
-        default:
-            return (wpmd->id & ID_OPTIONAL_DATA) ? TRUE : FALSE;
-    }
-}
-
-#endif
-
-#ifndef NO_PACK
-
-int copy_metadata (WavpackMetadata *wpmd, unsigned char *buffer_start, unsigned char *buffer_end)
-{
-    uint32_t mdsize = wpmd->byte_length + (wpmd->byte_length & 1);
-    WavpackHeader *wphdr = (WavpackHeader *) buffer_start;
-
-    if (wpmd->byte_length & 1)
-        ((char *) wpmd->data) [wpmd->byte_length] = 0;
-
-    mdsize += (wpmd->byte_length > 510) ? 4 : 2;
-    buffer_start += wphdr->ckSize + 8;
-
-    if (buffer_start + mdsize >= buffer_end)
-        return FALSE;
-
-    buffer_start [0] = wpmd->id | (wpmd->byte_length & 1 ? ID_ODD_SIZE : 0);
-    buffer_start [1] = (wpmd->byte_length + 1) >> 1;
-
-    if (wpmd->byte_length > 510) {
-        buffer_start [0] |= ID_LARGE;
-        buffer_start [2] = (wpmd->byte_length + 1) >> 9;
-        buffer_start [3] = (wpmd->byte_length + 1) >> 17;
-    }
-
-    if (wpmd->data && wpmd->byte_length) {
-        if (wpmd->byte_length > 510) {
-            buffer_start [0] |= ID_LARGE;
-            buffer_start [2] = (wpmd->byte_length + 1) >> 9;
-            buffer_start [3] = (wpmd->byte_length + 1) >> 17;
-            memcpy (buffer_start + 4, wpmd->data, mdsize - 4);
-        }
-        else
-            memcpy (buffer_start + 2, wpmd->data, mdsize - 2);
-    }
-
-    wphdr->ckSize += mdsize;
-    return TRUE;
-}
-
-int add_to_metadata (WavpackContext *wpc, void *data, uint32_t bcount, unsigned char id)
-{
-    WavpackMetadata *mdp;
-    unsigned char *src = data;
-
-    while (bcount) {
-        if (wpc->metacount) {
-            uint32_t bc = bcount;
-
-            mdp = wpc->metadata + wpc->metacount - 1;
-
-            if (mdp->id == id) {
-                if (wpc->metabytes + bcount > 1000000)
-                    bc = 1000000 - wpc->metabytes;
-
-                mdp->data = realloc (mdp->data, mdp->byte_length + bc);
-                memcpy ((char *) mdp->data + mdp->byte_length, src, bc);
-                mdp->byte_length += bc;
-                wpc->metabytes += bc;
-                bcount -= bc;
-                src += bc;
-
-                if (wpc->metabytes >= 1000000 && !write_metadata_block (wpc))
-                    return FALSE;
-            }
-        }
-
-        if (bcount) {
-            wpc->metadata = realloc (wpc->metadata, (wpc->metacount + 1) * sizeof (WavpackMetadata));
-            mdp = wpc->metadata + wpc->metacount++;
-            mdp->byte_length = 0;
-            mdp->data = NULL;
-            mdp->id = id;
-        }
-    }
-
-    return TRUE;
-}
-
-static char *write_metadata (WavpackMetadata *wpmd, char *outdata)
-{
-    unsigned char id = wpmd->id, wordlen [3];
-
-    wordlen [0] = (wpmd->byte_length + 1) >> 1;
-    wordlen [1] = (wpmd->byte_length + 1) >> 9;
-    wordlen [2] = (wpmd->byte_length + 1) >> 17;
-
-    if (wpmd->byte_length & 1) {
-//      ((char *) wpmd->data) [wpmd->byte_length] = 0;
-        id |= ID_ODD_SIZE;
-    }
-
-    if (wordlen [1] || wordlen [2])
-        id |= ID_LARGE;
-
-    *outdata++ = id;
-    *outdata++ = wordlen [0];
-
-    if (id & ID_LARGE) {
-        *outdata++ = wordlen [1];
-        *outdata++ = wordlen [2];
-    }
-
-    if (wpmd->data && wpmd->byte_length) {
-        memcpy (outdata, wpmd->data, wpmd->byte_length);
-        outdata += wpmd->byte_length;
-
-        if (wpmd->byte_length & 1)
-            *outdata++ = 0;
-    }
-
-    return outdata;
-}
-
-int write_metadata_block (WavpackContext *wpc)
-{
-    char *block_buff, *block_ptr;
-    WavpackHeader *wphdr;
-
-    if (wpc->metacount) {
-        int metacount = wpc->metacount, block_size = sizeof (WavpackHeader);
-        WavpackMetadata *wpmdp = wpc->metadata;
-
-        while (metacount--) {
-            block_size += wpmdp->byte_length + (wpmdp->byte_length & 1);
-            block_size += (wpmdp->byte_length > 510) ? 4 : 2;
-            wpmdp++;
-        }
-
-        wphdr = (WavpackHeader *) (block_buff = malloc (block_size));
-
-        CLEAR (*wphdr);
-        memcpy (wphdr->ckID, "wvpk", 4);
-        wphdr->total_samples = wpc->total_samples;
-        wphdr->version = wpc->stream_version;
-        wphdr->ckSize = block_size - 8;
-        wphdr->block_samples = 0;
-
-        block_ptr = (char *)(wphdr + 1);
-
-        wpmdp = wpc->metadata;
-
-        while (wpc->metacount) {
-            block_ptr = write_metadata (wpmdp, block_ptr);
-            wpc->metabytes -= wpmdp->byte_length;
-            free_metadata (wpmdp++);
-            wpc->metacount--;
-        }
-
-        free (wpc->metadata);
-        wpc->metadata = NULL;
-        native_to_little_endian ((WavpackHeader *) block_buff, WavpackHeaderFormat);
-
-        if (!wpc->blockout (wpc->wv_out, block_buff, block_size)) {
-            free (block_buff);
-            strcpy (wpc->error_message, "can't write WavPack data, disk probably full!");
-            return FALSE;
-        }
-
-        free (block_buff);
-    }
-
-    return TRUE;
-}
-
-#endif
-
-void free_metadata (WavpackMetadata *wpmd)
-{
-    if (wpmd->data) {
-        free (wpmd->data);
-        wpmd->data = NULL;
-    }
-}
--- a/third_party/wavpack/src/open_filename.c
+++ b/third_party/wavpack/src/open_filename.c
@ -0,0 +1,304 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// open_filename.c
+
+// This module provides all the code required to open an existing WavPack
+// file, by filename, for reading. It does not contain the actual code to
+// unpack audio data and this was done so that programs that just want to
+// query WavPack files for information (like, for example, taggers) don't
+// need to link in a lot of unnecessary code.
+//
+// To allow opening files by filename, this code provides an interface
+// between the reader callback mechanism that WavPack uses internally and
+// the standard fstream C library. Note that in applications that do not
+// require opening files by filename, this module can be omitted (which
+// might make building easier).
+//
+// For Unicode support on Windows, a flag has been added (OPEN_FILE_UTF8)
+// that forces the filename string to be assumed UTF-8 and converted to
+// a widechar string suitable for _wfopen(). Without this flag we revert
+// to the previous behavior of simply calling fopen() and hoping that the
+// local character set works. This is ignored on non-Windows platforms
+// (which is okay because they are probably UTF-8 anyway).
+
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <io.h>
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "wavpack_local.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#if (defined(__GNUC__) || defined(__sun)) && !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#ifdef __OS2__
+#include <io.h>
+#endif
+
+#ifdef _WIN32
+#define fileno _fileno
+static FILE *fopen_utf8 (const char *filename_utf8, const char *mode_utf8);
+#if !defined(S_ISREG) && defined(S_IFMT) && defined(S_IFREG)
+#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#endif
+#endif
+
+#ifdef HAVE_FSEEKO
+#define fseek fseeko
+#define ftell ftello
+#endif
+
+static int32_t read_bytes (void *id, void *data, int32_t bcount)
+{
+    return (int32_t) fread (data, 1, bcount, (FILE*) id);
+}
+
+static int64_t get_pos (void *id)
+{
+#ifdef _WIN32
+    return _ftelli64 ((FILE*) id);
+#else
+    return ftell ((FILE*) id);
+#endif
+}
+
+static int set_pos_abs (void *id, int64_t pos)
+{
+#ifdef _WIN32
+    return _fseeki64 (id, pos, SEEK_SET);
+#else
+    return fseek (id, pos, SEEK_SET);
+#endif
+}
+
+static int set_pos_rel (void *id, int64_t delta, int mode)
+{
+#ifdef _WIN32
+    return _fseeki64 (id, delta, mode);
+#else
+    return fseek (id, delta, mode);
+#endif
+}
+
+static int push_back_byte (void *id, int c)
+{
+    return ungetc (c, id);
+}
+
+#ifdef _WIN32
+
+static int64_t get_length (void *id)
+{
+    LARGE_INTEGER Size;
+    HANDLE        fHandle;
+
+    if (id == NULL)
+        return 0;
+
+    fHandle = (HANDLE)_get_osfhandle(_fileno((FILE*) id));
+    if (fHandle == INVALID_HANDLE_VALUE)
+        return 0;
+
+    Size.u.LowPart = GetFileSize(fHandle, &Size.u.HighPart);
+
+    if (Size.u.LowPart == INVALID_FILE_SIZE && GetLastError() != NO_ERROR)
+        return 0;
+
+    return (int64_t)Size.QuadPart;
+}
+
+#else
+
+static int64_t get_length (void *id)
+{
+    FILE *file = id;
+    struct stat statbuf;
+
+    if (!file || fstat (fileno (file), &statbuf) || !S_ISREG(statbuf.st_mode))
+        return 0;
+
+    return statbuf.st_size;
+}
+
+#endif
+
+static int can_seek (void *id)
+{
+    FILE *file = id;
+    struct stat statbuf;
+
+    return file && !fstat (fileno (file), &statbuf) && S_ISREG(statbuf.st_mode);
+}
+
+static int32_t write_bytes (void *id, void *data, int32_t bcount)
+{
+    return (int32_t) fwrite (data, 1, bcount, (FILE*) id);
+}
+
+#ifdef _WIN32
+
+static int truncate_here (void *id)
+{
+    FILE *file = id;
+    int64_t curr_pos = _ftelli64 (file);
+
+    return _chsize_s (fileno (file), curr_pos);
+}
+
+#else
+
+static int truncate_here (void *id)
+{
+    FILE *file = id;
+    off_t curr_pos = ftell (file);
+
+    return ftruncate (fileno (file), curr_pos);
+}
+
+#endif
+
+static int close_stream (void *id)
+{
+    return fclose ((FILE*) id);
+}
+
+//  int32_t (*read_bytes)(void *id, void *data, int32_t bcount);
+//  int32_t (*write_bytes)(void *id, void *data, int32_t bcount);
+//  int64_t (*get_pos)(void *id);                               // new signature for large files
+//  int (*set_pos_abs)(void *id, int64_t pos);                  // new signature for large files
+//  int (*set_pos_rel)(void *id, int64_t delta, int mode);      // new signature for large files
+//  int (*push_back_byte)(void *id, int c);
+//  int64_t (*get_length)(void *id);                            // new signature for large files
+//  int (*can_seek)(void *id);
+//  int (*truncate_here)(void *id);                             // new function to truncate file at current position
+//  int (*close)(void *id);                                     // new function to close file
+
+static WavpackStreamReader64 freader = {
+    read_bytes, write_bytes, get_pos, set_pos_abs, set_pos_rel,
+    push_back_byte, get_length, can_seek, truncate_here, close_stream
+};
+
+// This function attempts to open the specified WavPack file for reading. If
+// this fails for any reason then an appropriate message is copied to "error"
+// (which must accept 80 characters) and NULL is returned, otherwise a
+// pointer to a WavpackContext structure is returned (which is used to call
+// all other functions in this module). A filename beginning with "-" is
+// assumed to be stdin. The "flags" argument has the following bit mask
+// values to specify details of the open operation:
+
+// OPEN_WVC:  attempt to open/read "correction" file
+// OPEN_TAGS:  attempt to read ID3v1 / APEv2 tags (requires seekable file)
+// OPEN_WRAPPER:  make audio wrapper available (i.e. RIFF) to caller
+// OPEN_2CH_MAX:  open only first stream of multichannel file (usually L/R)
+// OPEN_NORMALIZE:  normalize floating point data to +/- 1.0 (w/ offset exp)
+// OPEN_STREAMING:  blindly unpacks blocks w/o regard to header file position
+// OPEN_EDIT_TAGS:  allow editing of tags (file must be writable)
+// OPEN_FILE_UTF8:  assume infilename is UTF-8 encoded (Windows only)
+
+// Version 4.2 of the WavPack library adds the OPEN_STREAMING flag. This is
+// essentially a "raw" mode where the library will simply decode any blocks
+// fed it through the reader callback, regardless of where those blocks came
+// from in a stream. The only requirement is that complete WavPack blocks are
+// fed to the decoder (and this may require multiple blocks in multichannel
+// mode) and that complete blocks are decoded (even if all samples are not
+// actually required). All the blocks must contain the same number of channels
+// and bit resolution, and the correction data must be either present or not.
+// All other parameters may change from block to block (like lossy/lossless).
+// Obviously, in this mode any seeking must be performed by the application
+// (and again, decoding must start at the beginning of the block containing
+// the seek sample).
+
+WavpackContext *WavpackOpenFileInput (const char *infilename, char *error, int flags, int norm_offset)
+{
+    char *file_mode = (flags & OPEN_EDIT_TAGS) ? "r+b" : "rb";
+    FILE *(*fopen_func)(const char *, const char *) = fopen;
+    FILE *wv_id, *wvc_id;
+
+#ifdef _WIN32
+    if (flags & OPEN_FILE_UTF8)
+        fopen_func = fopen_utf8;
+#endif
+
+    if (*infilename == '-') {
+        wv_id = stdin;
+#if defined(_WIN32)
+        _setmode (fileno (stdin), O_BINARY);
+#endif
+#if defined(__OS2__)
+        setmode (fileno (stdin), O_BINARY);
+#endif
+    }
+    else if ((wv_id = fopen_func (infilename, file_mode)) == NULL) {
+        if (error) strcpy (error, (flags & OPEN_EDIT_TAGS) ? "can't open file for editing" : "can't open file");
+        return NULL;
+    }
+
+    if (wv_id != stdin && (flags & OPEN_WVC)) {
+        char *in2filename = malloc (strlen (infilename) + 10);
+
+        strcpy (in2filename, infilename);
+        strcat (in2filename, "c");
+        wvc_id = fopen_func (in2filename, "rb");
+        free (in2filename);
+    }
+    else
+        wvc_id = NULL;
+
+    return WavpackOpenFileInputEx64 (&freader, wv_id, wvc_id, error, flags, norm_offset);
+}
+
+#ifdef _WIN32
+
+// The following code Copyright (c) 2004-2012 LoRd_MuldeR <mulder2@gmx.de>
+// (see cli/win32_unicode_support.c for full license)
+
+static wchar_t *utf8_to_utf16(const char *input)
+{
+	wchar_t *Buffer;
+	int BuffSize = 0, Result = 0;
+
+	BuffSize = MultiByteToWideChar(CP_UTF8, 0, input, -1, NULL, 0);
+	Buffer = (wchar_t*) malloc(sizeof(wchar_t) * BuffSize);
+	if(Buffer)
+	{
+		Result = MultiByteToWideChar(CP_UTF8, 0, input, -1, Buffer, BuffSize);
+	}
+
+	return ((Result > 0) && (Result <= BuffSize)) ? Buffer : NULL;
+}
+
+
+static FILE *fopen_utf8(const char *filename_utf8, const char *mode_utf8)
+{
+	FILE *ret = NULL;
+	wchar_t *filename_utf16 = utf8_to_utf16(filename_utf8);
+	wchar_t *mode_utf16 = utf8_to_utf16(mode_utf8);
+	
+	if(filename_utf16 && mode_utf16)
+	{
+		ret = _wfopen(filename_utf16, mode_utf16);
+	}
+
+	if(filename_utf16) free(filename_utf16);
+	if(mode_utf16) free(mode_utf16);
+
+	return ret;
+}
+
+#endif
+
+
--- a/third_party/wavpack/src/open_legacy.c
+++ b/third_party/wavpack/src/open_legacy.c
@ -0,0 +1,114 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//                Copyright (c) 1998 - 2016 David Bryant.                 //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// open_legacy.c
+
+// This code provides an interface between the new reader callback mechanism that
+// WavPack uses internally and the old reader callback functions that did not
+// provide large file support.
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "wavpack_local.h"
+
+typedef struct {
+    WavpackStreamReader *reader;
+    void *id;
+} WavpackReaderTranslator;
+
+static int32_t trans_read_bytes (void *id, void *data, int32_t bcount)
+{
+    WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id;
+    return trans->reader->read_bytes (trans->id, data, bcount);
+}
+
+static int32_t trans_write_bytes (void *id, void *data, int32_t bcount)
+{
+    WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id;
+    return trans->reader->write_bytes (trans->id, data, bcount);
+}
+
+static int64_t trans_get_pos (void *id)
+{
+    WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id;
+    return trans->reader->get_pos (trans->id);
+}
+
+static int trans_set_pos_abs (void *id, int64_t pos)
+{
+    WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id;
+    return trans->reader->set_pos_abs (trans->id, (uint32_t) pos);
+}
+
+static int trans_set_pos_rel (void *id, int64_t delta, int mode)
+{
+    WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id;
+    return trans->reader->set_pos_rel (trans->id, (int32_t) delta, mode);
+}
+
+static int trans_push_back_byte (void *id, int c)
+{
+    WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id;
+    return trans->reader->push_back_byte (trans->id, c);
+}
+
+static int64_t trans_get_length (void *id)
+{
+    WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id;
+    return trans->reader->get_length (trans->id);
+}
+
+static int trans_can_seek (void *id)
+{
+    WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id;
+    return trans->reader->can_seek (trans->id);
+}
+
+static int trans_close_stream (void *id)
+{
+    free (id);
+    return 0;
+}
+
+static WavpackStreamReader64 trans_reader = {
+    trans_read_bytes, trans_write_bytes, trans_get_pos, trans_set_pos_abs, trans_set_pos_rel,
+    trans_push_back_byte, trans_get_length, trans_can_seek, NULL, trans_close_stream
+};
+
+// This function is identical to WavpackOpenFileInput64() except that instead
+// of providing the new 64-bit reader callbacks, the old reader callbacks are
+// utilized and a translation layer is employed. It is provided as a compatibility
+// function for existing applications. To ensure that streaming applications using
+// this function continue to work, the OPEN_NO_CHECKSUM flag is forced on when
+// the OPEN_STREAMING flag is set.
+
+WavpackContext *WavpackOpenFileInputEx (WavpackStreamReader *reader, void *wv_id, void *wvc_id, char *error, int flags, int norm_offset)
+{
+    WavpackReaderTranslator *trans_wv = NULL, *trans_wvc = NULL;
+
+    // this prevents existing streaming applications from failing if they try to pass
+    // in blocks that have been modified from the original (e.g., Matroska blocks)
+
+    if (flags & OPEN_STREAMING)
+        flags |= OPEN_NO_CHECKSUM;
+
+    if (wv_id) {
+        trans_wv = (WavpackReaderTranslator *)malloc (sizeof (WavpackReaderTranslator));
+        trans_wv->reader = reader;
+        trans_wv->id = wv_id;
+    }
+
+    if (wvc_id) {
+        trans_wvc = (WavpackReaderTranslator *)malloc (sizeof (WavpackReaderTranslator));
+        trans_wvc->reader = reader;
+        trans_wvc->id = wvc_id;
+    }
+
+    return WavpackOpenFileInputEx64 (&trans_reader, trans_wv, trans_wvc, error, flags, norm_offset);
+}
--- a/third_party/wavpack/src/open_raw.c
+++ b/third_party/wavpack/src/open_raw.c
@ -0,0 +1,315 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//                Copyright (c) 1998 - 2016 David Bryant.                 //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// open_raw.c
+
+// This code provides the ability to decode WavPack frames directly from
+// memory for use in a streaming application. It can handle full blocks
+// or the headerless block data provided by Matroska and the DirectShow
+// WavPack splitter. For information about how Matroska stores WavPack,
+// see: https://www.matroska.org/technical/specs/codecid/wavpack.html
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "wavpack_local.h"
+
+typedef struct {
+    unsigned char *sptr, *dptr, *eptr, free_required;
+} RawSegment;
+
+typedef struct {
+    RawSegment *segments;
+    int num_segments, curr_segment;
+    unsigned char ungetc_char, ungetc_flag;
+} WavpackRawContext;
+
+static int32_t raw_read_bytes (void *id, void *data, int32_t bcount)
+{
+    WavpackRawContext *rcxt = id;
+    unsigned char *outptr = data;
+
+    while (bcount) {
+        if (rcxt->ungetc_flag) {
+            *outptr++ = rcxt->ungetc_char;
+            rcxt->ungetc_flag = 0;
+            bcount--;
+        }
+        else if (rcxt->curr_segment < rcxt->num_segments) {
+            RawSegment *segptr = rcxt->segments + rcxt->curr_segment;
+            int bytes_to_copy = (int)(segptr->eptr - segptr->dptr);
+
+            if (bytes_to_copy > bcount)
+                bytes_to_copy = bcount;
+
+            memcpy (outptr, segptr->dptr, bytes_to_copy);
+            outptr += bytes_to_copy;
+            bcount -= bytes_to_copy;
+
+            if ((segptr->dptr += bytes_to_copy) == segptr->eptr)
+                rcxt->curr_segment++;
+        }
+        else
+            break;
+    }
+
+    return (int32_t)(outptr - (unsigned char *) data);
+}
+
+static int32_t raw_write_bytes (void *id, void *data, int32_t bcount)
+{
+    return 0;
+}
+
+static int64_t raw_get_pos (void *id)
+{
+    return 0;
+}
+
+static int raw_set_pos_abs (void *id, int64_t pos)
+{
+    return 0;
+}
+
+static int raw_set_pos_rel (void *id, int64_t delta, int mode)
+{
+    return 0;
+}
+
+static int raw_push_back_byte (void *id, int c)
+{
+    WavpackRawContext *rcxt = id;
+    rcxt->ungetc_char = c;
+    rcxt->ungetc_flag = 1;
+    return c; 
+}
+
+static int64_t raw_get_length (void *id)
+{
+    return 0;
+}
+
+static int raw_can_seek (void *id)
+{
+    return 0;
+}
+
+static int raw_close_stream (void *id)
+{
+    WavpackRawContext *rcxt = id;
+    int i;
+
+    if (rcxt) {
+        for (i = 0; i < rcxt->num_segments; ++i)
+            if (rcxt->segments [i].sptr && rcxt->segments [i].free_required)
+                free (rcxt->segments [i].sptr);
+
+        if (rcxt->segments) free (rcxt->segments);
+        free (rcxt);
+    }
+
+    return 0;
+}
+
+static WavpackStreamReader64 raw_reader = {
+    raw_read_bytes, raw_write_bytes, raw_get_pos, raw_set_pos_abs, raw_set_pos_rel,
+    raw_push_back_byte, raw_get_length, raw_can_seek, NULL, raw_close_stream
+};
+
+// This function is similar to WavpackOpenFileInput() except that instead of
+// providing a filename to open, the caller provides pointers to buffered
+// WavPack frames (both standard and, optionally, correction data). It
+// decodes only a single frame. Note that in this context, a "frame" is a
+// collection of WavPack blocks that represent all the channels present. In
+// the case of mono or [most] stereo streams, this is the same thing, but
+// for multichannel streams each frame consists of several WavPack blocks
+// (which can contain only 1 or 2 channels).
+
+WavpackContext *WavpackOpenRawDecoder (
+    void *main_data, int32_t main_size,
+    void *corr_data, int32_t corr_size,
+    int16_t version, char *error, int flags, int norm_offset)
+{
+    WavpackRawContext *raw_wv = NULL, *raw_wvc = NULL;
+
+    // if the WavPack data does not contain headers we assume Matroska-style storage
+    // and recreate the missing headers
+
+    if (strncmp (main_data, "wvpk", 4)) {
+        uint32_t multiple_blocks = 0, block_size, block_samples = 0, wphdr_flags, crc;
+        uint32_t main_bytes = main_size, corr_bytes = corr_size;
+        unsigned char *mcp = main_data;
+        unsigned char *ccp = corr_data;
+        int msi = 0, csi = 0;
+
+        raw_wv = malloc (sizeof (WavpackRawContext));
+        memset (raw_wv, 0, sizeof (WavpackRawContext));
+
+        if (corr_data && corr_size) {
+            raw_wvc = malloc (sizeof (WavpackRawContext));
+            memset (raw_wvc, 0, sizeof (WavpackRawContext));
+        }
+
+        while (main_bytes >= 12) {
+            WavpackHeader *wphdr = malloc (sizeof (WavpackHeader));
+
+            if (!msi) {
+                block_samples = *mcp++;
+                block_samples += *mcp++ << 8;
+                block_samples += *mcp++ << 16;
+                block_samples += *mcp++ << 24;
+                main_bytes -= 4;
+            }
+
+            wphdr_flags = *mcp++;
+            wphdr_flags += *mcp++ << 8;
+            wphdr_flags += *mcp++ << 16;
+            wphdr_flags += *mcp++ << 24;
+            main_bytes -= 4;
+
+            // if the first block does not have the FINAL_BLOCK flag set,
+            // then there are multiple blocks
+
+            if (!msi && !(wphdr_flags & FINAL_BLOCK))
+                multiple_blocks = 1;
+
+            crc = *mcp++;
+            crc += *mcp++ << 8;
+            crc += *mcp++ << 16;
+            crc += *mcp++ << 24;
+            main_bytes -= 4;
+
+            if (multiple_blocks) {
+                block_size = *mcp++;
+                block_size += *mcp++ << 8;
+                block_size += *mcp++ << 16;
+                block_size += *mcp++ << 24;
+                main_bytes -= 4;
+            }
+            else
+                block_size = main_bytes;
+
+            if (block_size > main_bytes) {
+                if (error) strcpy (error, "main block overran available data!");
+                raw_close_stream (raw_wv);
+                raw_close_stream (raw_wvc);
+                return NULL;
+            } 
+
+            memset (wphdr, 0, sizeof (WavpackHeader));
+            memcpy (wphdr->ckID, "wvpk", 4);
+            wphdr->ckSize = sizeof (WavpackHeader) - 8 + block_size;
+            SET_TOTAL_SAMPLES (*wphdr, block_samples);
+            wphdr->block_samples = block_samples;
+            wphdr->version = version;
+            wphdr->flags = wphdr_flags;
+            wphdr->crc = crc;
+            WavpackLittleEndianToNative (wphdr, WavpackHeaderFormat);
+
+            raw_wv->num_segments += 2;
+            raw_wv->segments = realloc (raw_wv->segments, sizeof (RawSegment) * raw_wv->num_segments);
+            raw_wv->segments [msi].dptr = raw_wv->segments [msi].sptr = (unsigned char *) wphdr;
+            raw_wv->segments [msi].eptr = raw_wv->segments [msi].dptr + sizeof (WavpackHeader);
+            raw_wv->segments [msi++].free_required = 1;
+            raw_wv->segments [msi].dptr = raw_wv->segments [msi].sptr = mcp;
+            raw_wv->segments [msi].eptr = raw_wv->segments [msi].dptr + block_size;
+            raw_wv->segments [msi++].free_required = 0;
+            main_bytes -= block_size;
+            mcp += block_size;
+
+            if (corr_data && corr_bytes >= 4) {
+                wphdr = malloc (sizeof (WavpackHeader));
+
+                crc = *ccp++;
+                crc += *ccp++ << 8;
+                crc += *ccp++ << 16;
+                crc += *ccp++ << 24;
+                corr_bytes -= 4;
+
+                if (multiple_blocks) {
+                    block_size = *ccp++;
+                    block_size += *ccp++ << 8;
+                    block_size += *ccp++ << 16;
+                    block_size += *ccp++ << 24;
+                    corr_bytes -= 4;
+                }
+                else
+                    block_size = corr_bytes;
+
+                if (block_size > corr_bytes) {
+                    if (error) strcpy (error, "correction block overran available data!");
+                    raw_close_stream (raw_wv);
+                    raw_close_stream (raw_wvc);
+                    return NULL;
+                } 
+
+                memset (wphdr, 0, sizeof (WavpackHeader));
+                memcpy (wphdr->ckID, "wvpk", 4);
+                wphdr->ckSize = sizeof (WavpackHeader) - 8 + block_size;
+                SET_TOTAL_SAMPLES (*wphdr, block_samples);
+                wphdr->block_samples = block_samples;
+                wphdr->version = version;
+                wphdr->flags = wphdr_flags;
+                wphdr->crc = crc;
+                WavpackLittleEndianToNative (wphdr, WavpackHeaderFormat);
+
+                raw_wvc->num_segments += 2;
+                raw_wvc->segments = realloc (raw_wvc->segments, sizeof (RawSegment) * raw_wvc->num_segments);
+                raw_wvc->segments [csi].dptr = raw_wvc->segments [csi].sptr = (unsigned char *) wphdr;
+                raw_wvc->segments [csi].eptr = raw_wvc->segments [csi].dptr + sizeof (WavpackHeader);
+                raw_wvc->segments [csi++].free_required = 1;
+                raw_wvc->segments [csi].dptr = raw_wvc->segments [csi].sptr = ccp;
+                raw_wvc->segments [csi].eptr = raw_wvc->segments [csi].dptr + block_size;
+                raw_wvc->segments [csi++].free_required = 0;
+                corr_bytes -= block_size;
+                ccp += block_size;
+            }
+        }
+
+        if (main_bytes || (corr_data && corr_bytes)) {
+            if (error) strcpy (error, "leftover multiblock data!");
+            raw_close_stream (raw_wv);
+            raw_close_stream (raw_wvc);
+            return NULL;
+        }
+    }
+    else {      // the case of WavPack blocks with headers is much easier...
+        if (main_data) {
+            raw_wv = malloc (sizeof (WavpackRawContext));
+            memset (raw_wv, 0, sizeof (WavpackRawContext));
+            raw_wv->num_segments = 1;
+            raw_wv->segments = malloc (sizeof (RawSegment) * raw_wv->num_segments);
+            raw_wv->segments [0].dptr = raw_wv->segments [0].sptr = main_data;
+            raw_wv->segments [0].eptr = raw_wv->segments [0].dptr + main_size;
+            raw_wv->segments [0].free_required = 0;
+        }
+
+        if (corr_data && corr_size) {
+            raw_wvc = malloc (sizeof (WavpackRawContext));
+            memset (raw_wvc, 0, sizeof (WavpackRawContext));
+            raw_wvc->num_segments = 1;
+            raw_wvc->segments = malloc (sizeof (RawSegment) * raw_wvc->num_segments);
+            raw_wvc->segments [0].dptr = raw_wvc->segments [0].sptr = corr_data;
+            raw_wvc->segments [0].eptr = raw_wvc->segments [0].dptr + corr_size;
+            raw_wvc->segments [0].free_required = 0;
+        }
+    }
+
+    return WavpackOpenFileInputEx64 (&raw_reader, raw_wv, raw_wvc, error, flags | OPEN_STREAMING | OPEN_NO_CHECKSUM, norm_offset);
+}
+
+// Return the number of samples represented by the current (and in the raw case, only) frame.
+
+uint32_t WavpackGetNumSamplesInFrame (WavpackContext *wpc)
+{
+    if (wpc && wpc->streams && wpc->streams [0])
+        return wpc->streams [0]->wphdr.block_samples;
+    else
+        return -1;
+}
+
--- a/third_party/wavpack/src/open_utils.c
+++ b/third_party/wavpack/src/open_utils.c
--- a/third_party/wavpack/src/pack.c
+++ b/third_party/wavpack/src/pack.c
--- a/third_party/wavpack/src/pack_dns.c
+++ b/third_party/wavpack/src/pack_dns.c
@ -0,0 +1,191 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// pack_dns.c
+
+// This module handles the implementation of "dynamic noise shaping" which is
+// designed to move the spectrum of the quantization noise introduced by lossy
+// compression up or down in frequency so that it is more likely to be masked
+// by the source material.
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "wavpack_local.h"
+
+static void best_floating_line (short *values, int num_values, double *initial_y, double *final_y, short *max_error);
+
+void dynamic_noise_shaping (WavpackContext *wpc, int32_t *buffer, int shortening_allowed)
+{
+    WavpackStream *wps = wpc->streams [wpc->current_stream];
+    int32_t sample_count = wps->wphdr.block_samples;
+    struct decorr_pass *ap = &wps->analysis_pass;
+    uint32_t flags = wps->wphdr.flags;
+    int32_t *bptr, temp, sam;
+    short *swptr;
+    int sc;
+
+    if (!wps->num_terms && sample_count > 8) {
+        if (flags & MONO_DATA)
+            for (bptr = buffer + sample_count - 3, sc = sample_count - 2; sc--;) {
+                sam = (3 * bptr [1] - bptr [2]) >> 1;
+                temp = *bptr-- - apply_weight (ap->weight_A, sam);
+                update_weight (ap->weight_A, 2, sam, temp);
+            }
+        else
+            for (bptr = buffer + (sample_count - 3) * 2 + 1, sc = sample_count - 2; sc--;) {
+                sam = (3 * bptr [2] - bptr [4]) >> 1;
+                temp = *bptr-- - apply_weight (ap->weight_B, sam);
+                update_weight (ap->weight_B, 2, sam, temp);
+                sam = (3 * bptr [2] - bptr [4]) >> 1;
+                temp = *bptr-- - apply_weight (ap->weight_A, sam);
+                update_weight (ap->weight_A, 2, sam, temp);
+            }
+    }
+
+    if (sample_count > wps->dc.shaping_samples) {
+        sc = sample_count - wps->dc.shaping_samples;
+        swptr = wps->dc.shaping_data + wps->dc.shaping_samples;
+        bptr = buffer + wps->dc.shaping_samples * ((flags & MONO_DATA) ? 1 : 2);
+
+        if (flags & MONO_DATA)
+            while (sc--) {
+                sam = (3 * ap->samples_A [0] - ap->samples_A [1]) >> 1;
+                temp = *bptr - apply_weight (ap->weight_A, sam);
+                update_weight (ap->weight_A, 2, sam, temp);
+                ap->samples_A [1] = ap->samples_A [0];
+                ap->samples_A [0] = *bptr++;
+                *swptr++ = (ap->weight_A < 256) ? 1024 : 1536 - ap->weight_A * 2;
+            }
+        else
+            while (sc--) {
+                sam = (3 * ap->samples_A [0] - ap->samples_A [1]) >> 1;
+                temp = *bptr - apply_weight (ap->weight_A, sam);
+                update_weight (ap->weight_A, 2, sam, temp);
+                ap->samples_A [1] = ap->samples_A [0];
+                ap->samples_A [0] = *bptr++;
+
+                sam = (3 * ap->samples_B [0] - ap->samples_B [1]) >> 1;
+                temp = *bptr - apply_weight (ap->weight_B, sam);
+                update_weight (ap->weight_B, 2, sam, temp);
+                ap->samples_B [1] = ap->samples_B [0];
+                ap->samples_B [0] = *bptr++;
+
+                *swptr++ = (ap->weight_A + ap->weight_B < 512) ? 1024 : 1536 - ap->weight_A - ap->weight_B;
+            }
+
+        wps->dc.shaping_samples = sample_count;
+    }
+
+    if (wpc->wvc_flag) {
+        int max_allowed_error = 1000000 / wpc->ave_block_samples;
+        short max_error, trial_max_error;
+        double initial_y, final_y;
+
+        if (max_allowed_error < 128)
+            max_allowed_error = 128;
+
+        best_floating_line (wps->dc.shaping_data, sample_count, &initial_y, &final_y, &max_error);
+
+        if (shortening_allowed && max_error > max_allowed_error) {
+            int min_samples = 0, max_samples = sample_count, trial_count;
+            double trial_initial_y, trial_final_y;
+
+            while (1) {
+                trial_count = (min_samples + max_samples) / 2;
+
+                best_floating_line (wps->dc.shaping_data, trial_count, &trial_initial_y,
+                    &trial_final_y, &trial_max_error);
+
+                if (trial_max_error < max_allowed_error) {
+                    max_error = trial_max_error;
+                    min_samples = trial_count;
+                    initial_y = trial_initial_y;
+                    final_y = trial_final_y;
+                }
+                else
+                    max_samples = trial_count;
+
+                if (min_samples > 10000 || max_samples - min_samples < 2)
+                    break;
+            }
+
+            sample_count = min_samples;
+        }
+
+        if (initial_y < -512) initial_y = -512;
+        else if (initial_y > 1024) initial_y = 1024;
+
+        if (final_y < -512) final_y = -512;
+        else if (final_y > 1024) final_y = 1024;
+#if 0
+        error_line ("%.2f sec, sample count = %5d, max error = %3d, range = %5d, %5d, actual = %5d, %5d",
+            (double) wps->sample_index / wpc->config.sample_rate, sample_count, max_error,
+            (int) floor (initial_y), (int) floor (final_y),
+            wps->dc.shaping_data [0], wps->dc.shaping_data [sample_count-1]);
+#endif
+        if (sample_count != wps->wphdr.block_samples)
+            wps->wphdr.block_samples = sample_count;
+
+        if (wpc->wvc_flag) {
+            wps->dc.shaping_acc [0] = wps->dc.shaping_acc [1] = (int32_t) floor (initial_y * 65536.0 + 0.5);
+
+            wps->dc.shaping_delta [0] = wps->dc.shaping_delta [1] =
+                (int32_t) floor ((final_y - initial_y) / (sample_count - 1) * 65536.0 + 0.5);
+
+            wps->dc.shaping_array = NULL;
+        }
+        else
+            wps->dc.shaping_array = wps->dc.shaping_data;
+    }
+    else
+        wps->dc.shaping_array = wps->dc.shaping_data;
+}
+
+// Given an array of integer data (in shorts), find the linear function that most closely
+// represents it (based on minimum sum of absolute errors). This is returned as the double
+// precision initial & final Y values of the best-fit line. The function can also optionally
+// compute and return a maximum error value (as a short). Note that the ends of the resulting
+// line may fall way outside the range of input values, so some sort of clipping may be
+// needed.
+
+static void best_floating_line (short *values, int num_values, double *initial_y, double *final_y, short *max_error)
+{
+    double left_sum = 0.0, right_sum = 0.0, center_x = (num_values - 1) / 2.0, center_y, m;
+    int i;
+
+    for (i = 0; i < num_values >> 1; ++i) {
+        right_sum += values [num_values - i - 1];
+        left_sum += values [i];
+    }
+
+    if (num_values & 1) {
+        right_sum += values [num_values >> 1] * 0.5;
+        left_sum += values [num_values >> 1] * 0.5;
+    }
+
+    center_y = (right_sum + left_sum) / num_values;
+    m = (right_sum - left_sum) / ((double) num_values * num_values) * 4.0;
+
+    if (initial_y)
+        *initial_y = center_y - m * center_x;
+
+    if (final_y)
+        *final_y = center_y + m * center_x;
+
+    if (max_error) {
+        double max = 0.0;
+
+        for (i = 0; i < num_values; ++i)
+            if (fabs (values [i] - (center_y + (i - center_x) * m)) > max)
+                max = fabs (values [i] - (center_y + (i - center_x) * m));
+
+        *max_error = (short) floor (max + 0.5);
+    }
+}
--- a/third_party/wavpack/src/pack_dsd.c
+++ b/third_party/wavpack/src/pack_dsd.c
@ -0,0 +1,669 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** DSDPACK ****                            //
+//         Lossless DSD (Direct Stream Digital) Audio Compressor          //
+//                Copyright (c) 2013 - 2016 David Bryant.                 //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// pack_dsd.c
+
+// This module actually handles the compression of the DSD audio data.
+
+#ifdef ENABLE_DSD
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "wavpack_local.h"
+
+///////////////////////////// executable code ////////////////////////////////
+
+// This function initializes everything required to pack WavPack DSD bitstreams
+// and must be called BEFORE any other function in this module.
+
+void pack_dsd_init (WavpackContext *wpc)
+{
+    WavpackStream *wps = wpc->streams [wpc->current_stream];
+
+    wps->sample_index = 0;
+}
+
+// Pack an entire block of samples (either mono or stereo) into a completed
+// WavPack block. This function is actually a shell for pack_samples() and
+// performs tasks like handling any shift required by the format, preprocessing
+// of floating point data or integer data over 24 bits wide, and implementing
+// the "extra" mode (via the extra?.c modules). It is assumed that there is
+// sufficient space for the completed block at "wps->blockbuff" and that
+// "wps->blockend" points to the end of the available space. A return value of
+// FALSE indicates an error.
+
+// Pack an entire block of samples (either mono or stereo) into a completed
+// WavPack block. It is assumed that there is sufficient space for the
+// completed block at "wps->blockbuff" and that "wps->blockend" points to the
+// end of the available space. A return value of FALSE indicates an error.
+// Any unsent metadata is transmitted first, then required metadata for this
+// block is sent, and finally the compressed integer data is sent. If a "wpx"
+// stream is required for floating point data or large integer data, then this
+// must be handled outside this function. To find out how much data was written
+// the caller must look at the ckSize field of the written WavpackHeader, NOT
+// the one in the WavpackStream.
+
+static int encode_buffer_high (WavpackStream *wps, int32_t *buffer, int num_samples, unsigned char *destination);
+static int encode_buffer_fast (WavpackStream *wps, int32_t *buffer, int num_samples, unsigned char *destination);
+
+int pack_dsd_block (WavpackContext *wpc, int32_t *buffer)
+{
+    WavpackStream *wps = wpc->streams [wpc->current_stream];
+    uint32_t flags = wps->wphdr.flags, mult = wpc->dsd_multiplier, data_count;
+    uint32_t sample_count = wps->wphdr.block_samples;
+    unsigned char *dsd_encoding, dsd_power = 0;
+    int32_t res;
+
+    // This code scans stereo data to check whether it can be stored as mono data
+    // (i.e., all L/R samples identical).
+
+    if (!(flags & MONO_FLAG)) {
+        int32_t *sptr, *dptr, i;
+
+        for (sptr = buffer, i = 0; i < (int32_t) sample_count; sptr += 2, i++)
+            if ((sptr [0] ^ sptr [1]) & 0xff)
+                break;
+
+        if (i == sample_count) {
+            wps->wphdr.flags = flags |= FALSE_STEREO;
+            dptr = buffer;
+            sptr = buffer;
+
+            for (i = sample_count; i--; sptr++)
+                *dptr++ = *sptr++;
+        }
+        else
+            wps->wphdr.flags = flags &= ~FALSE_STEREO;
+    }
+
+    wps->wphdr.ckSize = sizeof (WavpackHeader) - 8;
+    memcpy (wps->blockbuff, &wps->wphdr, sizeof (WavpackHeader));
+
+    if (wpc->metacount) {
+        WavpackMetadata *wpmdp = wpc->metadata;
+
+        while (wpc->metacount) {
+            copy_metadata (wpmdp, wps->blockbuff, wps->blockend);
+            wpc->metabytes -= wpmdp->byte_length;
+            free_metadata (wpmdp++);
+            wpc->metacount--;
+        }
+
+        free (wpc->metadata);
+        wpc->metadata = NULL;
+    }
+
+    if (!sample_count)
+        return TRUE;
+
+    send_general_metadata (wpc);
+    memcpy (&wps->wphdr, wps->blockbuff, sizeof (WavpackHeader));
+
+    dsd_encoding = wps->blockbuff + ((WavpackHeader *) wps->blockbuff)->ckSize + 12;
+
+    while (mult >>= 1)
+        dsd_power++;
+
+    *dsd_encoding++ = dsd_power;
+
+    if (wpc->config.flags & CONFIG_HIGH_FLAG) {
+        int fast_res = encode_buffer_fast (wps, buffer, sample_count, dsd_encoding);
+
+        res = encode_buffer_high (wps, buffer, sample_count, dsd_encoding);
+
+        if ((fast_res != -1) && (res == -1 || res > fast_res))
+            res = encode_buffer_fast (wps, buffer, sample_count, dsd_encoding);
+    }
+    else
+        res = encode_buffer_fast (wps, buffer, sample_count, dsd_encoding);
+
+    if (res == -1) {
+        int num_samples = sample_count * ((flags & MONO_DATA) ? 1 : 2);
+        uint32_t crc = 0xffffffff;
+
+        *dsd_encoding++ = 0;
+
+        data_count = num_samples + 2;
+
+        while (num_samples--)
+            crc += (crc << 1) + (*dsd_encoding++ = *buffer++);
+
+        ((WavpackHeader *) wps->blockbuff)->crc = crc;
+    }
+    else
+        data_count = res + 1;
+
+    if (data_count) {
+        unsigned char *cptr = wps->blockbuff + ((WavpackHeader *) wps->blockbuff)->ckSize + 8;
+
+        if (data_count & 1) {
+            cptr [data_count + 4] = 0;
+            *cptr++ = ID_DSD_BLOCK | ID_LARGE | ID_ODD_SIZE;
+            data_count++;
+        }
+        else
+            *cptr++ = ID_DSD_BLOCK | ID_LARGE;
+
+        *cptr++ = data_count >> 1;
+        *cptr++ = data_count >> 9;
+        *cptr++ = data_count >> 17;
+        ((WavpackHeader *) wps->blockbuff)->ckSize += data_count + 4;
+    }
+
+    wps->sample_index += sample_count;
+    return TRUE;
+}
+
+/*------------------------------------------------------------------------------------------------------------------------*/
+
+// #define DSD_BYTE_READY(low,high) (((low) >> 24) == ((high) >> 24))
+// #define DSD_BYTE_READY(low,high) (!(((low) ^ (high)) >> 24))
+#define DSD_BYTE_READY(low,high) (!(((low) ^ (high)) & 0xff000000))
+
+#define MAX_HISTORY_BITS    5
+#define MAX_PROBABILITY     0xa0    // set to 0xff to disable RLE encoding for probabilities table
+
+#if (MAX_PROBABILITY < 0xff)
+
+static int rle_encode (unsigned char *src, int bcount, unsigned char *destination)
+{
+    int max_rle_zeros = 0xff - MAX_PROBABILITY;
+    unsigned char *dp = destination;
+    int zcount = 0;
+
+    while (bcount--) {
+        if (*src) {
+            while (zcount) {
+                *dp++ = MAX_PROBABILITY + (zcount > max_rle_zeros ? max_rle_zeros : zcount);
+                zcount -= (zcount > max_rle_zeros ? max_rle_zeros : zcount);
+            }
+
+            *dp++ = *src++;
+        }
+        else {
+            zcount++;
+            src++;
+        }
+    }
+
+    while (zcount) {
+        *dp++ = MAX_PROBABILITY + (zcount > max_rle_zeros ? max_rle_zeros : zcount);
+        zcount -= (zcount > max_rle_zeros ? max_rle_zeros : zcount);
+    }
+
+    *dp++ = 0;
+
+    return (int)(dp - destination);
+}
+
+#endif
+
+static void calculate_probabilities (int hist [256], unsigned char probs [256], unsigned short prob_sums [256])
+{
+    int divisor, min_value, max_value, sum_values;
+    int min_hits = 0x7fffffff, max_hits = 0, i;
+
+    for (i = 0; i < 256; ++i) {
+        if (hist [i] < min_hits) min_hits = hist [i];
+        if (hist [i] > max_hits) max_hits = hist [i];
+    }
+
+    if (max_hits == 0) {
+        memset (probs, 0, sizeof (*probs) * 256);
+        memset (prob_sums, 0, sizeof (*prob_sums) * 256);
+        return;
+    }
+
+//  fprintf (stderr, "process_histogram(): hits = %d to %d\n", min_hits, max_hits);
+
+    if (max_hits > MAX_PROBABILITY)
+        divisor = ((max_hits << 8) + (MAX_PROBABILITY >> 1)) / MAX_PROBABILITY;
+    else
+        divisor = 0;
+
+    while (1) {
+        min_value = 0x7fffffff; max_value = 0; sum_values = 0;
+
+        for (i = 0; i < 256; ++i) {
+            int value;
+
+            if (hist [i]) {
+                if (divisor) {
+                    if (!(value = ((hist [i] << 8) + (divisor >> 1)) / divisor))
+                        value = 1;
+                }
+                else
+                    value = hist [i];
+
+                if (value < min_value) min_value = value;
+                if (value > max_value) max_value = value;
+            }
+            else
+                value = 0;
+
+            prob_sums [i] = sum_values += value;
+            probs [i] = value;
+        }
+
+        if (max_value > MAX_PROBABILITY) {
+            divisor++;
+            continue;
+        }
+
+#if 0   // this code reduces probability values when they are completely redundant (i.e., common divisor), but
+        // this doesn't really happen often enough to make it worthwhile
+
+        if (min_value > 1) {
+            for (i = 0; i < 256; ++i)
+                if (probs [i] % min_value)
+                    break;
+
+            if (i == 256) {
+                for (i = 0; i < 256; ++i) {
+                    prob_sums [i] /= min_value;
+                    probs [i] /= min_value;
+                }
+
+                // fprintf (stderr, "fixed min_value = %d, divisor = %d, probs_sum = %d\n", min_value, divisor, prob_sums [255]);
+            }
+        }
+#endif
+
+        break;
+    }
+}
+
+static int encode_buffer_fast (WavpackStream *wps, int32_t *buffer, int num_samples, unsigned char *destination)
+{
+    uint32_t flags = wps->wphdr.flags, crc = 0xffffffff;
+    unsigned int low = 0, high = 0xffffffff, mult;
+    unsigned short (*summed_probabilities) [256];
+    unsigned char (*probabilities) [256];
+    unsigned char *dp = destination, *ep;
+    int history_bins, bc, p0 = 0, p1 = 0;
+    int total_summed_probabilities = 0;
+    int (*histogram) [256];
+    int32_t *bp = buffer;
+    char history_bits;
+
+    if (!(flags & MONO_DATA))
+        num_samples *= 2;
+
+    if (num_samples < 280)
+        return -1;
+    else if (num_samples < 560)
+        history_bits = 0;
+    else if (num_samples < 1725)
+        history_bits = 1;
+    else if (num_samples < 5000)
+        history_bits = 2;
+    else if (num_samples < 14000)
+        history_bits = 3;
+    else if (num_samples < 28000)
+        history_bits = 4;
+    else if (num_samples < 76000)
+        history_bits = 5;
+    else if (num_samples < 130000)
+        history_bits = 6;
+    else if (num_samples < 300000)
+        history_bits = 7;
+    else
+        history_bits = 8;
+
+    if (history_bits > MAX_HISTORY_BITS)
+        history_bits = MAX_HISTORY_BITS;
+
+    history_bins = 1 << history_bits;
+    histogram = malloc (sizeof (*histogram) * history_bins);
+    memset (histogram, 0, sizeof (*histogram) * history_bins);
+    probabilities = malloc (sizeof (*probabilities) * history_bins);
+    summed_probabilities = malloc (sizeof (*summed_probabilities) * history_bins);
+
+    bc = num_samples;
+
+    if (flags & MONO_DATA)
+        while (bc--) {
+            crc += (crc << 1) + (*bp & 0xff);
+            histogram [p0] [*bp & 0xff]++;
+            p0 = *bp++ & (history_bins-1);
+        }
+    else
+        while (bc--) {
+            crc += (crc << 1) + (*bp & 0xff);
+            histogram [p0] [*bp & 0xff]++;
+            p0 = p1;
+            p1 = *bp++ & (history_bins-1);
+        }
+
+    for (p0 = 0; p0 < history_bins; p0++) {
+        calculate_probabilities (histogram [p0], probabilities [p0], summed_probabilities [p0]);
+        total_summed_probabilities += summed_probabilities [p0] [255];
+    }
+
+    ((WavpackHeader *) wps->blockbuff)->crc = crc;
+
+    // This code detects the case where the required value lookup tables grow silly big and cuts them back down. This would
+    // normally only happen with large blocks or poorly compressible data. The target is to guarantee that the total memory
+    // required for all three decode tables will be 2K bytes per history bin.
+
+    while (total_summed_probabilities > history_bins * 1280) {
+        int max_sum = 0, sum_values = 0, largest_bin = 0;
+
+        for (p0 = 0; p0 < history_bins; ++p0)
+            if (summed_probabilities [p0] [255] > max_sum) {
+                max_sum = summed_probabilities [p0] [255];
+                largest_bin = p0;
+            }
+
+        total_summed_probabilities -= max_sum;
+        p0 = largest_bin;
+
+        for (p1 = 0; p1 < 256; ++p1)
+            summed_probabilities [p0] [p1] = sum_values += probabilities [p0] [p1] = (probabilities [p0] [p1] + 1) >> 1;
+
+        total_summed_probabilities += summed_probabilities [p0] [255];
+        // fprintf (stderr, "processed bin 0x%02x, bin: %d --> %d, new sum = %d\n",
+        //     p0, max_sum, summed_probabilities [p0] [255], total_summed_probabilities);
+    }
+
+    free (histogram);
+    bp = buffer;
+    bc = num_samples;
+    *dp++ = 1;
+    *dp++ = history_bits;
+    *dp++ = MAX_PROBABILITY;
+    ep = destination + num_samples - 10;
+
+#if (MAX_PROBABILITY < 0xff)
+    dp += rle_encode ((unsigned char *) probabilities, sizeof (*probabilities) * history_bins, dp);
+#else
+    memcpy (dp, probabilities, sizeof (*probabilities) * history_bins);
+    dp += sizeof (*probabilities) * history_bins;
+#endif
+
+    p0 = p1 = 0;
+
+    while (dp < ep && bc--) {
+
+        mult = (high - low) / summed_probabilities [p0] [255];
+
+        if (!mult) {
+            high = low;
+
+            while (DSD_BYTE_READY (high, low)) {
+                *dp++ = high >> 24;
+                high = (high << 8) | 0xff;
+                low <<= 8;
+            }
+
+            mult = (high - low) / summed_probabilities [p0] [255];
+        }
+
+        if (*bp & 0xff)
+            low += summed_probabilities [p0] [(*bp & 0xff)-1] * mult;
+
+        high = low + probabilities [p0] [*bp & 0xff] * mult - 1;
+
+        while (DSD_BYTE_READY (high, low)) {
+            *dp++ = high >> 24;
+            high = (high << 8) | 0xff;
+            low <<= 8;
+        }
+
+        if (flags & MONO_DATA)
+            p0 = *bp++ & (history_bins-1);
+        else {
+            p0 = p1;
+            p1 = *bp++ & (history_bins-1);
+        }
+    }
+
+    high = low;
+
+    while (DSD_BYTE_READY (high, low)) {
+        *dp++ = high >> 24;
+        high = (high << 8) | 0xff;
+        low <<= 8;
+    }
+
+    free (summed_probabilities);
+    free (probabilities);
+
+    if (dp < ep)
+        return (int)(dp - destination);
+    else
+        return -1;
+}
+
+/*------------------------------------------------------------------------------------------------------------------------*/
+
+#define PTABLE_BITS 8
+#define PTABLE_BINS (1<<PTABLE_BITS)
+#define PTABLE_MASK (PTABLE_BINS-1)
+
+#define INITIAL_TERM (1536/PTABLE_BINS)
+
+#define UP   0x010000fe
+#define DOWN 0x00010000
+#define DECAY 8
+
+#define PRECISION 20
+#define VALUE_ONE (1 << PRECISION)
+#define PRECISION_USE 12
+
+#define RATE_S 20
+
+static void init_ptable (int *table, int rate_i, int rate_s)
+{
+    int value = 0x808000, rate = rate_i << 8, c, i;
+
+    for (c = (rate + 128) >> 8; c--;)
+        value += (DOWN - value) >> DECAY;
+
+    for (i = 0; i < PTABLE_BINS/2; ++i) {
+        table [i] = value;
+        table [PTABLE_BINS-1-i] = 0x100ffff - value;
+
+        if (value > 0x010000) {
+            rate += (rate * rate_s + 128) >> 8;
+
+            for (c = (rate + 64) >> 7; c--;)
+                value += (DOWN - value) >> DECAY;
+        }
+    }
+}
+
+static int normalize_ptable (int *ptable)
+{
+    int rate = 0, min_error, error_sum, i;
+    int ntable [PTABLE_BINS];
+
+    init_ptable (ntable, rate, RATE_S);
+
+    for (min_error = i = 0; i < PTABLE_BINS; ++i)
+        min_error += abs (ptable [i] - ntable [i]) >> 8;
+
+    while (1) {
+        init_ptable (ntable, ++rate, RATE_S);
+
+        for (error_sum = i = 0; i < PTABLE_BINS; ++i)
+            error_sum += abs (ptable [i] - ntable [i]) >> 8;
+
+        if (error_sum < min_error)
+            min_error = error_sum;
+        else
+            break;
+    }
+
+    return rate - 1;
+}
+
+static int encode_buffer_high (WavpackStream *wps, int32_t *buffer, int num_samples, unsigned char *destination)
+{
+    int channel, stereo = (wps->wphdr.flags & MONO_DATA) ? 0 : 1;
+    uint32_t crc = 0xffffffff, high = 0xffffffff, low = 0;
+    unsigned char *dp = destination, *ep;
+    DSDfilters *sp;
+
+    if (num_samples * (stereo + 1) < 280)
+        return -1;
+
+    *dp++ = 3;
+    ep = destination + num_samples * (stereo + 1) - 10;
+
+    if (!wps->sample_index) {
+        if (!wps->dsd.ptable)
+            wps->dsd.ptable = malloc (PTABLE_BINS * sizeof (*wps->dsd.ptable));
+
+        init_ptable (wps->dsd.ptable, INITIAL_TERM, RATE_S);
+
+        for (channel = 0; channel < 2; ++channel) {
+            sp = wps->dsd.filters + channel;
+
+            sp->filter1 = sp->filter2 = sp->filter3 = sp->filter4 = sp->filter5 = VALUE_ONE / 2;
+            sp->filter6 = sp->factor = 0;
+        }
+
+        *dp++ = INITIAL_TERM;
+        *dp++ = RATE_S;
+    }
+    else {
+        int rate = normalize_ptable (wps->dsd.ptable);
+        init_ptable (wps->dsd.ptable, rate, RATE_S);
+        *dp++ = rate;
+        *dp++ = RATE_S;
+    }
+
+    for (channel = 0; channel <= stereo; ++channel) {
+        sp = wps->dsd.filters + channel;
+
+        *dp = sp->filter1 >> (PRECISION - 8);
+        sp->filter1 = *dp++ << (PRECISION - 8);
+
+        *dp = sp->filter2 >> (PRECISION - 8);
+        sp->filter2 = *dp++ << (PRECISION - 8);
+
+        *dp = sp->filter3 >> (PRECISION - 8);
+        sp->filter3 = *dp++ << (PRECISION - 8);
+
+        *dp = sp->filter4 >> (PRECISION - 8);
+        sp->filter4 = *dp++ << (PRECISION - 8);
+
+        *dp = sp->filter5 >> (PRECISION - 8);
+        sp->filter5 = *dp++ << (PRECISION - 8);
+
+        *dp++ = sp->factor;
+        *dp++ = sp->factor >> 8;
+        sp->filter6 = 0;
+        sp->factor = (sp->factor << 16) >> 16;
+    }
+
+    sp = wps->dsd.filters;
+
+    while (dp < ep && num_samples--) {
+        int bitcount = 8;
+
+        crc += (crc << 1) + (sp->byte = *buffer++ & 0xff);
+        sp [0].value = sp [0].filter1 - sp [0].filter5 + ((sp [0].filter6 * sp [0].factor) >> 2);
+
+        if (stereo) {
+            crc += (crc << 1) + (sp [1].byte = *buffer++ & 0xff);
+            sp [1].value = sp [1].filter1 - sp [1].filter5 + ((sp [1].filter6 * sp [1].factor) >> 2);
+        }
+
+        while (bitcount--) {
+            int32_t *pp = wps->dsd.ptable + ((sp [0].value >> (PRECISION - PRECISION_USE)) & PTABLE_MASK);
+
+            if (sp [0].byte & 0x80) {
+                high = low + ((high - low) >> 8) * (*pp >> 16);
+                *pp += (UP - *pp) >> DECAY;
+                sp [0].filter0 = -1;
+            }
+            else {
+                low += 1 + ((high - low) >> 8) * (*pp >> 16);
+                *pp += (DOWN - *pp) >> DECAY;
+                sp [0].filter0 = 0;
+            }
+
+            while (DSD_BYTE_READY (high, low)) {
+                *dp++ = high >> 24;
+                high = (high << 8) | 0xff;
+                low <<= 8;
+            }
+
+            sp [0].value += sp [0].filter6 << 3;
+            sp [0].factor += (((sp [0].value ^ sp [0].filter0) >> 31) | 1) & ((sp [0].value ^ (sp [0].value - (sp [0].filter6 << 4))) >> 31);
+            sp [0].filter1 += ((sp [0].filter0 & VALUE_ONE) - sp [0].filter1) >> 6;
+            sp [0].filter2 += ((sp [0].filter0 & VALUE_ONE) - sp [0].filter2) >> 4;
+            sp [0].filter3 += (sp [0].filter2 - sp [0].filter3) >> 4;
+            sp [0].filter4 += (sp [0].filter3 - sp [0].filter4) >> 4;
+            sp [0].value = (sp [0].filter4 - sp [0].filter5) >> 4;
+            sp [0].filter5 += sp [0].value;
+            sp [0].filter6 += (sp [0].value - sp [0].filter6) >> 3;
+            sp [0].value = sp [0].filter1 - sp [0].filter5 + ((sp [0].filter6 * sp [0].factor) >> 2);
+            sp [0].byte <<= 1;
+
+            if (!stereo)
+                continue;
+
+            pp = wps->dsd.ptable + ((sp [1].value >> (PRECISION - PRECISION_USE)) & PTABLE_MASK);
+
+            if (sp [1].byte & 0x80) {
+                high = low + ((high - low) >> 8) * (*pp >> 16);
+                *pp += (UP - *pp) >> DECAY;
+                sp [1].filter0 = -1;
+            }
+            else {
+                low += 1 + ((high - low) >> 8) * (*pp >> 16);
+                *pp += (DOWN - *pp) >> DECAY;
+                sp [1].filter0 = 0;
+            }
+
+            while (DSD_BYTE_READY (high, low)) {
+                *dp++ = high >> 24;
+                high = (high << 8) | 0xff;
+                low <<= 8;
+            }
+
+            sp [1].value += sp [1].filter6 << 3;
+            sp [1].factor += (((sp [1].value ^ sp [1].filter0) >> 31) | 1) & ((sp [1].value ^ (sp [1].value - (sp [1].filter6 << 4))) >> 31);
+            sp [1].filter1 += ((sp [1].filter0 & VALUE_ONE) - sp [1].filter1) >> 6;
+            sp [1].filter2 += ((sp [1].filter0 & VALUE_ONE) - sp [1].filter2) >> 4;
+            sp [1].filter3 += (sp [1].filter2 - sp [1].filter3) >> 4;
+            sp [1].filter4 += (sp [1].filter3 - sp [1].filter4) >> 4;
+            sp [1].value = (sp [1].filter4 - sp [1].filter5) >> 4;
+            sp [1].filter5 += sp [1].value;
+            sp [1].filter6 += (sp [1].value - sp [1].filter6) >> 3;
+            sp [1].value = sp [1].filter1 - sp [1].filter5 + ((sp [1].filter6 * sp [1].factor) >> 2);
+            sp [1].byte <<= 1;
+        }
+
+        sp [0].factor -= (sp->factor + 512) >> 10;
+
+        if (stereo)
+            sp [1].factor -= (sp [1].factor + 512) >> 10;
+    }
+
+    ((WavpackHeader *) wps->blockbuff)->crc = crc;
+    high = low;
+
+    while (DSD_BYTE_READY (high, low)) {
+        *dp++ = high >> 24;
+        high = (high << 8) | 0xff;
+        low <<= 8;
+    }
+
+    if (dp < ep)
+        return (int)(dp - destination);
+    else
+        return -1;
+}
+
+#endif      // ENABLE_DSD
--- a/third_party/wavpack/src/pack_floats.c
+++ b/third_party/wavpack/src/pack_floats.c
@ -0,0 +1,270 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// pack_floats.c
+
+// This module deals with the compression of floating-point data. Note that no
+// floating point math is involved here...the values are only processed with
+// the macros that directly access the mantissa, exponent, and sign fields.
+// That's why we use the f32 type instead of the built-in float type.
+
+#include <stdlib.h>
+
+#include "wavpack_local.h"
+
+//#define DISPLAY_DIAGNOSTICS
+
+// Scan the provided buffer of floating-point values and (1) convert the
+// significant portion of the data to integers for compression using the
+// regular WavPack algorithms (which only operate on integers) and (2)
+// determine whether the data requires a second stream for lossless
+// storage (which will usually be the case except when the floating-point
+// data was originally integer data). The converted integers are returned
+// "in-place" and a return value of TRUE indicates that a second stream
+// is required.
+
+int scan_float_data (WavpackStream *wps, f32 *values, int32_t num_values)
+{
+    int32_t shifted_ones = 0, shifted_zeros = 0, shifted_both = 0;
+    int32_t false_zeros = 0, neg_zeros = 0;
+#ifdef DISPLAY_DIAGNOSTICS
+    int32_t true_zeros = 0, denormals = 0, exceptions = 0;
+#endif
+    uint32_t ordata = 0, crc = 0xffffffff;
+    int32_t count, value, shift_count;
+    int max_mag = 0, max_exp = 0;
+    f32 *dp;
+
+    wps->float_shift = wps->float_flags = 0;
+
+    // First loop goes through all the data and (1) calculates the CRC and (2) finds the
+    // max magnitude that does not have an exponent of 255 (reserved for +/-inf and NaN).
+    for (dp = values, count = num_values; count--; dp++) {
+        crc = crc * 27 + get_mantissa (*dp) * 9 + get_exponent (*dp) * 3 + get_sign (*dp);
+
+        if (get_exponent (*dp) < 255 && get_magnitude (*dp) > max_mag)
+            max_mag = get_magnitude (*dp);
+    }
+
+    wps->crc_x = crc;
+
+    // round up the magnitude so that when we convert the floating-point values to integers,
+    // they will be (at most) just over 24-bits signed precision
+    if (get_exponent (max_mag))
+        max_exp = get_exponent (max_mag + 0x7F0000);
+
+    for (dp = values, count = num_values; count--; dp++) {
+        // Exponent of 255 is reserved for +/-inf (mantissa = 0) or NaN (mantissa != 0).
+        // we use a value one greater than 24-bits unsigned for this.
+        if (get_exponent (*dp) == 255) {
+#ifdef DISPLAY_DIAGNOSTICS
+            exceptions++;
+#endif
+            wps->float_flags |= FLOAT_EXCEPTIONS;
+            value = 0x1000000;
+            shift_count = 0;
+        }
+        // This is the regular case. We generate a 24-bit unsigned value with the implied
+        // '1' MSB set and calculate a shift that will make it line up with the biggest
+        // samples in this block (although that shift would obviously shift out real data).
+        else if (get_exponent (*dp)) {
+            shift_count = max_exp - get_exponent (*dp);
+            value = 0x800000 + get_mantissa (*dp);
+        }
+        // Zero exponent means either +/- zero (mantissa = 0) or denormals (mantissa != 0).
+        // shift_count is set so that denormals (without an implied '1') will line up with
+        // regular values (with their implied '1' added at bit 23). Trust me. We don't care
+        // about the shift with zero.
+        else {
+            shift_count = max_exp ? max_exp - 1 : 0;
+            value = get_mantissa (*dp);
+
+#ifdef DISPLAY_DIAGNOSTICS
+            if (get_mantissa (*dp))
+                denormals++;
+#endif
+        }
+
+        if (shift_count < 25)
+            value >>= shift_count;      // perform the shift if there could be anything left
+        else
+            value = 0;                  // else just zero the value
+
+        // If we are going to encode an integer zero, then this might be a "false zero" which
+        // means that there are significant bits but they're completely shifted out, or a
+        // "negative zero" which is simply a floating point value that we have to encode
+        // (and converting it to a positive zero would be an error).
+        if (!value) {
+            if (get_exponent (*dp) || get_mantissa (*dp))
+                ++false_zeros;
+            else if (get_sign (*dp))
+                ++neg_zeros;
+#ifdef DISPLAY_DIAGNOSTICS
+            else
+                ++true_zeros;
+#endif
+        }
+        // If we are going to shift something (but not everything) out of our integer before
+        // encoding, then we generate a mask corresponding to the bits that will be shifted
+        // out and increment the counter for the 3 possible cases of (1) all zeros, (2) all
+        // ones, and (3) a mix of ones and zeros.
+        else if (shift_count) {
+            int32_t mask = (1 << shift_count) - 1;
+
+            if (!(get_mantissa (*dp) & mask))
+                shifted_zeros++;
+            else if ((get_mantissa (*dp) & mask) == mask)
+                shifted_ones++;
+            else
+                shifted_both++;
+        }
+
+        // "or" all the integer values together, and store the final integer with applied sign
+
+        ordata |= value;
+        * (int32_t *) dp = (get_sign (*dp)) ? -value : value;
+    }
+
+    wps->float_max_exp = max_exp;   // on decode, we use this to calculate actual exponent
+
+    // Now, based on our various counts, we determine the scheme required to encode the bits
+    // shifted out. Usually these will simply have to be sent literally, but in some rare cases
+    // we can get away with always assuming ones shifted out, or assuming all the bits shifted
+    // out in each value are the same (which means we only have to send a single bit).
+    if (shifted_both)
+        wps->float_flags |= FLOAT_SHIFT_SENT;
+    else if (shifted_ones && !shifted_zeros)
+        wps->float_flags |= FLOAT_SHIFT_ONES;
+    else if (shifted_ones && shifted_zeros)
+        wps->float_flags |= FLOAT_SHIFT_SAME;
+    // Another case is that we only shift out zeros (or maybe nothing), and in that case we
+    // check to see if our data actually has less than 24 or 25 bits of resolution, which means
+    // that we reduce can the magnitude of the integers we are encoding (which saves all those
+    // bits). The number of bits of reduced resolution is stored in float_shift.
+    else if (ordata && !(ordata & 1)) {
+        while (!(ordata & 1)) {
+            wps->float_shift++;
+            ordata >>= 1;
+        }
+
+        // here we shift out all those zeros in the integer data we will encode
+        for (dp = values, count = num_values; count--; dp++)
+            * (int32_t *) dp >>= wps->float_shift;
+    }
+
+    // Here we calculate the actual magnitude used by our integer data, although this is just
+    // used for informational purposes during encode/decode to possibly use faster math.
+
+    wps->wphdr.flags &= ~MAG_MASK;
+
+    while (ordata) {
+        wps->wphdr.flags += 1 << MAG_LSB;
+        ordata >>= 1;
+    }
+
+    // Finally, we have to set some flags that guide how we encode various types of "zeros".
+    // If none of these are set (which is the most common situation), then every integer
+    // zero in the decoded data will simply become a floating-point zero.
+
+    if (false_zeros || neg_zeros)
+        wps->float_flags |= FLOAT_ZEROS_SENT;
+
+    if (neg_zeros)
+        wps->float_flags |= FLOAT_NEG_ZEROS;
+
+#ifdef DISPLAY_DIAGNOSTICS
+    {
+        int32_t *ip, min = 0x7fffffff, max = 0x80000000;
+        for (ip = (int32_t *) values, count = num_values; count--; ip++) {
+            if (*ip < min) min = *ip;
+            if (*ip > max) max = *ip;
+        }
+
+        fprintf (stderr, "integer range = %d to %d\n", min, max);
+    }
+
+    fprintf (stderr, "samples = %d, max exp = %d, pre-shift = %d, denormals = %d, exceptions = %d, max_mag = %x\n",
+        num_values, max_exp, wps->float_shift, denormals, exceptions, max_mag);
+    fprintf (stderr, "shifted ones/zeros/both = %d/%d/%d, true/neg/false zeros = %d/%d/%d\n",
+        shifted_ones, shifted_zeros, shifted_both, true_zeros, neg_zeros, false_zeros);
+#endif
+
+    return wps->float_flags & (FLOAT_EXCEPTIONS | FLOAT_ZEROS_SENT | FLOAT_SHIFT_SENT | FLOAT_SHIFT_SAME);
+}
+
+// Given a buffer of float data, convert the data to integers (which is what the WavPack compression
+// algorithms require) and write the other data required for lossless compression (which includes
+// significant bits shifted out of the integers, plus information about +/- zeros and exceptions
+// like NaN and +/- infinities) into the wvxbits stream (which is assumed to be opened). Note that
+// for this work correctly, scan_float_data() must have been called on the original data to set
+// the appropiate flags in float_flags and max_exp.
+
+void send_float_data (WavpackStream *wps, f32 *values, int32_t num_values)
+{
+    int max_exp = wps->float_max_exp;
+    int32_t count, value, shift_count;
+    f32 *dp;
+
+    for (dp = values, count = num_values; count--; dp++) {
+        if (get_exponent (*dp) == 255) {
+            if (get_mantissa (*dp)) {
+                putbit_1 (&wps->wvxbits);
+                putbits (get_mantissa (*dp), 23, &wps->wvxbits);
+            }
+            else {
+                putbit_0 (&wps->wvxbits);
+            }
+
+            value = 0x1000000;
+            shift_count = 0;
+        }
+        else if (get_exponent (*dp)) {
+            shift_count = max_exp - get_exponent (*dp);
+            value = 0x800000 + get_mantissa (*dp);
+        }
+        else {
+            shift_count = max_exp ? max_exp - 1 : 0;
+            value = get_mantissa (*dp);
+        }
+
+        if (shift_count < 25)
+            value >>= shift_count;
+        else
+            value = 0;
+
+        if (!value) {
+            if (wps->float_flags & FLOAT_ZEROS_SENT) {
+                if (get_exponent (*dp) || get_mantissa (*dp)) {
+                    putbit_1 (&wps->wvxbits);
+                    putbits (get_mantissa (*dp), 23, &wps->wvxbits);
+
+                    if (max_exp >= 25) {
+                        putbits (get_exponent (*dp), 8, &wps->wvxbits);
+                    }
+
+                    putbit (get_sign (*dp), &wps->wvxbits);
+                }
+                else {
+                    putbit_0 (&wps->wvxbits);
+
+                    if (wps->float_flags & FLOAT_NEG_ZEROS)
+                        putbit (get_sign (*dp), &wps->wvxbits);
+                }
+            }
+        }
+        else if (shift_count) {
+            if (wps->float_flags & FLOAT_SHIFT_SENT) {
+                int32_t data = get_mantissa (*dp) & ((1 << shift_count) - 1);
+                putbits (data, shift_count, &wps->wvxbits);
+            }
+            else if (wps->float_flags & FLOAT_SHIFT_SAME) {
+                putbit (get_mantissa (*dp) & 1, &wps->wvxbits);
+            }
+        }
+    }
+}
--- a/third_party/wavpack/src/pack_utils.c
+++ b/third_party/wavpack/src/pack_utils.c
--- a/third_party/wavpack/src/pack_x64.S
+++ b/third_party/wavpack/src/pack_x64.S
--- a/third_party/wavpack/src/pack_x64.asm
+++ b/third_party/wavpack/src/pack_x64.asm
--- a/third_party/wavpack/src/pack_x86.S
+++ b/third_party/wavpack/src/pack_x86.S
--- a/third_party/wavpack/src/pack_x86.asm
+++ b/third_party/wavpack/src/pack_x86.asm
--- a/third_party/wavpack/src/read_words.c
+++ b/third_party/wavpack/src/read_words.c
@ -0,0 +1,614 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// read_words.c
+
+// This module provides entropy word decoding functions using
+// a variation on the Rice method.  This was introduced in version 3.93
+// because it allows splitting the data into a "lossy" stream and a
+// "correction" stream in a very efficient manner and is therefore ideal
+// for the "hybrid" mode.  For 4.0, the efficiency of this method was
+// significantly improved by moving away from the normal Rice restriction of
+// using powers of two for the modulus divisions and now the method can be
+// used for both hybrid and pure lossless encoding.
+
+// Samples are divided by median probabilities at 5/7 (71.43%), 10/49 (20.41%),
+// and 20/343 (5.83%). Each zone has 3.5 times fewer samples than the
+// previous. Using standard Rice coding on this data would result in 1.4
+// bits per sample average (not counting sign bit). However, there is a
+// very simple encoding that is over 99% efficient with this data and
+// results in about 1.22 bits per sample.
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "wavpack_local.h"
+
+#if defined (HAVE___BUILTIN_CTZ) || defined (_WIN64)
+#define USE_CTZ_OPTIMIZATION    // use ctz intrinsic (or Windows equivalent) to count trailing ones
+#else
+#define USE_NEXT8_OPTIMIZATION  // optimization using a table to count trailing ones
+#endif
+
+#define USE_BITMASK_TABLES      // use tables instead of shifting for certain masking operations
+
+///////////////////////////// local table storage ////////////////////////////
+
+#ifdef USE_NEXT8_OPTIMIZATION
+static const char ones_count_table [] = {
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8
+};
+#endif
+
+///////////////////////////// executable code ////////////////////////////////
+
+static uint32_t __inline read_code (Bitstream *bs, uint32_t maxcode);
+
+// Read the next word from the bitstream "wvbits" and return the value. This
+// function can be used for hybrid or lossless streams, but since an
+// optimized version is available for lossless this function would normally
+// be used for hybrid only. If a hybrid lossless stream is being read then
+// the "correction" offset is written at the specified pointer. A return value
+// of WORD_EOF indicates that the end of the bitstream was reached (all 1s) or
+// some other error occurred.
+
+int32_t FASTCALL get_word (WavpackStream *wps, int chan, int32_t *correction)
+{
+    register struct entropy_data *c = wps->w.c + chan;
+    uint32_t ones_count, low, mid, high;
+    int32_t value;
+    int sign;
+
+    if (!wps->wvbits.ptr)
+        return WORD_EOF;
+
+    if (correction)
+        *correction = 0;
+
+    if (!(wps->w.c [0].median [0] & ~1) && !wps->w.holding_zero && !wps->w.holding_one && !(wps->w.c [1].median [0] & ~1)) {
+        uint32_t mask;
+        int cbits;
+
+        if (wps->w.zeros_acc) {
+            if (--wps->w.zeros_acc) {
+                c->slow_level -= (c->slow_level + SLO) >> SLS;
+                return 0;
+            }
+        }
+        else {
+            for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits);
+
+            if (cbits == 33)
+                return WORD_EOF;
+
+            if (cbits < 2)
+                wps->w.zeros_acc = cbits;
+            else {
+                for (mask = 1, wps->w.zeros_acc = 0; --cbits; mask <<= 1)
+                    if (getbit (&wps->wvbits))
+                        wps->w.zeros_acc |= mask;
+
+                wps->w.zeros_acc |= mask;
+            }
+
+            if (wps->w.zeros_acc) {
+                c->slow_level -= (c->slow_level + SLO) >> SLS;
+                CLEAR (wps->w.c [0].median);
+                CLEAR (wps->w.c [1].median);
+                return 0;
+            }
+        }
+    }
+
+    if (wps->w.holding_zero)
+        ones_count = wps->w.holding_zero = 0;
+    else {
+#ifdef USE_CTZ_OPTIMIZATION
+        while (wps->wvbits.bc < LIMIT_ONES) {
+            if (++(wps->wvbits.ptr) == wps->wvbits.end)
+                wps->wvbits.wrap (&wps->wvbits);
+
+            wps->wvbits.sr |= *(wps->wvbits.ptr) << wps->wvbits.bc;
+            wps->wvbits.bc += sizeof (*(wps->wvbits.ptr)) * 8;
+        }
+
+#ifdef _WIN32
+        { unsigned long res; _BitScanForward (&res, (unsigned long)~wps->wvbits.sr); ones_count = (uint32_t) res; }
+#else
+        ones_count = __builtin_ctz (~wps->wvbits.sr);
+#endif
+
+        if (ones_count >= LIMIT_ONES) {
+            wps->wvbits.bc -= ones_count;
+            wps->wvbits.sr >>= ones_count;
+
+            for (; ones_count < (LIMIT_ONES + 1) && getbit (&wps->wvbits); ++ones_count);
+
+            if (ones_count == (LIMIT_ONES + 1))
+                return WORD_EOF;
+
+            if (ones_count == LIMIT_ONES) {
+                uint32_t mask;
+                int cbits;
+
+                for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits);
+
+                if (cbits == 33)
+                    return WORD_EOF;
+
+                if (cbits < 2)
+                    ones_count = cbits;
+                else {
+                    for (mask = 1, ones_count = 0; --cbits; mask <<= 1)
+                        if (getbit (&wps->wvbits))
+                            ones_count |= mask;
+
+                    ones_count |= mask;
+                }
+
+                ones_count += LIMIT_ONES;
+            }
+        }
+        else {
+            wps->wvbits.bc -= ones_count + 1;
+            wps->wvbits.sr >>= ones_count + 1;
+        }
+#elif defined (USE_NEXT8_OPTIMIZATION)
+        int next8;
+
+        if (wps->wvbits.bc < 8) {
+            if (++(wps->wvbits.ptr) == wps->wvbits.end)
+                wps->wvbits.wrap (&wps->wvbits);
+
+            next8 = (wps->wvbits.sr |= *(wps->wvbits.ptr) << wps->wvbits.bc) & 0xff;
+            wps->wvbits.bc += sizeof (*(wps->wvbits.ptr)) * 8;
+        }
+        else
+            next8 = wps->wvbits.sr & 0xff;
+
+        if (next8 == 0xff) {
+            wps->wvbits.bc -= 8;
+            wps->wvbits.sr >>= 8;
+
+            for (ones_count = 8; ones_count < (LIMIT_ONES + 1) && getbit (&wps->wvbits); ++ones_count);
+
+            if (ones_count == (LIMIT_ONES + 1))
+                return WORD_EOF;
+
+            if (ones_count == LIMIT_ONES) {
+                uint32_t mask;
+                int cbits;
+
+                for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits);
+
+                if (cbits == 33)
+                    return WORD_EOF;
+
+                if (cbits < 2)
+                    ones_count = cbits;
+                else {
+                    for (mask = 1, ones_count = 0; --cbits; mask <<= 1)
+                        if (getbit (&wps->wvbits))
+                            ones_count |= mask;
+
+                    ones_count |= mask;
+                }
+
+                ones_count += LIMIT_ONES;
+            }
+        }
+        else {
+            wps->wvbits.bc -= (ones_count = ones_count_table [next8]) + 1;
+            wps->wvbits.sr >>= ones_count + 1;
+        }
+#else
+        for (ones_count = 0; ones_count < (LIMIT_ONES + 1) && getbit (&wps->wvbits); ++ones_count);
+
+        if (ones_count >= LIMIT_ONES) {
+            uint32_t mask;
+            int cbits;
+
+            if (ones_count == (LIMIT_ONES + 1))
+                return WORD_EOF;
+
+            for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits);
+
+            if (cbits == 33)
+                return WORD_EOF;
+
+            if (cbits < 2)
+                ones_count = cbits;
+            else {
+                for (mask = 1, ones_count = 0; --cbits; mask <<= 1)
+                    if (getbit (&wps->wvbits))
+                        ones_count |= mask;
+
+                ones_count |= mask;
+            }
+
+            ones_count += LIMIT_ONES;
+        }
+#endif
+
+        if (wps->w.holding_one) {
+            wps->w.holding_one = ones_count & 1;
+            ones_count = (ones_count >> 1) + 1;
+        }
+        else {
+            wps->w.holding_one = ones_count & 1;
+            ones_count >>= 1;
+        }
+
+        wps->w.holding_zero = ~wps->w.holding_one & 1;
+    }
+
+    if ((wps->wphdr.flags & HYBRID_FLAG) && !chan)
+        update_error_limit (wps);
+
+    if (ones_count == 0) {
+        low = 0;
+        high = GET_MED (0) - 1;
+        DEC_MED0 ();
+    }
+    else {
+        low = GET_MED (0);
+        INC_MED0 ();
+
+        if (ones_count == 1) {
+            high = low + GET_MED (1) - 1;
+            DEC_MED1 ();
+        }
+        else {
+            low += GET_MED (1);
+            INC_MED1 ();
+
+            if (ones_count == 2) {
+                high = low + GET_MED (2) - 1;
+                DEC_MED2 ();
+            }
+            else {
+                low += (ones_count - 2) * GET_MED (2);
+                high = low + GET_MED (2) - 1;
+                INC_MED2 ();
+            }
+        }
+    }
+
+    low &= 0x7fffffff;
+    high &= 0x7fffffff;
+
+    if (low > high)         // make sure high and low make sense
+        high = low;
+
+    mid = (high + low + 1) >> 1;
+
+    if (!c->error_limit)
+        mid = read_code (&wps->wvbits, high - low) + low;
+    else while (high - low > c->error_limit) {
+        if (getbit (&wps->wvbits))
+            mid = (high + (low = mid) + 1) >> 1;
+        else
+            mid = ((high = mid - 1) + low + 1) >> 1;
+    }
+
+    sign = getbit (&wps->wvbits);
+
+    if (bs_is_open (&wps->wvcbits) && c->error_limit) {
+        value = read_code (&wps->wvcbits, high - low) + low;
+
+        if (correction)
+            *correction = sign ? (mid - value) : (value - mid);
+    }
+
+    if (wps->wphdr.flags & HYBRID_BITRATE) {
+        c->slow_level -= (c->slow_level + SLO) >> SLS;
+        c->slow_level += wp_log2 (mid);
+    }
+
+    return sign ? ~mid : mid;
+}
+
+// This is an optimized version of get_word() that is used for lossless only
+// (error_limit == 0). Also, rather than obtaining a single sample, it can be
+// used to obtain an entire buffer of either mono or stereo samples.
+
+int32_t get_words_lossless (WavpackStream *wps, int32_t *buffer, int32_t nsamples)
+{
+    struct entropy_data *c = wps->w.c;
+    uint32_t ones_count, low, high;
+    Bitstream *bs = &wps->wvbits;
+    int32_t csamples;
+#ifdef USE_NEXT8_OPTIMIZATION
+    int32_t next8;
+#endif
+
+    if (nsamples && !bs->ptr) {
+        memset (buffer, 0, (wps->wphdr.flags & MONO_DATA) ? nsamples * 4 : nsamples * 8);
+        return nsamples;
+    }
+
+    if (!(wps->wphdr.flags & MONO_DATA))
+        nsamples *= 2;
+
+    for (csamples = 0; csamples < nsamples; ++csamples) {
+        if (!(wps->wphdr.flags & MONO_DATA))
+            c = wps->w.c + (csamples & 1);
+
+        if (wps->w.holding_zero) {
+            wps->w.holding_zero = 0;
+            low = read_code (bs, GET_MED (0) - 1);
+            DEC_MED0 ();
+            buffer [csamples] = (getbit (bs)) ? ~low : low;
+
+            if (++csamples == nsamples)
+                break;
+
+            if (!(wps->wphdr.flags & MONO_DATA))
+                c = wps->w.c + (csamples & 1);
+        }
+
+        if (wps->w.c [0].median [0] < 2 && !wps->w.holding_one && wps->w.c [1].median [0] < 2) {
+            uint32_t mask;
+            int cbits;
+
+            if (wps->w.zeros_acc) {
+                if (--wps->w.zeros_acc) {
+                    buffer [csamples] = 0;
+                    continue;
+                }
+            }
+            else {
+                for (cbits = 0; cbits < 33 && getbit (bs); ++cbits);
+
+                if (cbits == 33)
+                    break;
+
+                if (cbits < 2)
+                    wps->w.zeros_acc = cbits;
+                else {
+                    for (mask = 1, wps->w.zeros_acc = 0; --cbits; mask <<= 1)
+                        if (getbit (bs))
+                            wps->w.zeros_acc |= mask;
+
+                    wps->w.zeros_acc |= mask;
+                }
+
+                if (wps->w.zeros_acc) {
+                    CLEAR (wps->w.c [0].median);
+                    CLEAR (wps->w.c [1].median);
+                    buffer [csamples] = 0;
+                    continue;
+                }
+            }
+        }
+
+#ifdef USE_CTZ_OPTIMIZATION
+        while (bs->bc < LIMIT_ONES) {
+            if (++(bs->ptr) == bs->end)
+                bs->wrap (bs);
+
+            bs->sr |= *(bs->ptr) << bs->bc;
+            bs->bc += sizeof (*(bs->ptr)) * 8;
+        }
+
+#ifdef _WIN32
+        { unsigned long res; _BitScanForward (&res, (unsigned long)~wps->wvbits.sr); ones_count = (uint32_t) res; }
+#else
+        ones_count = __builtin_ctz (~wps->wvbits.sr);
+#endif
+
+        if (ones_count >= LIMIT_ONES) {
+            bs->bc -= ones_count;
+            bs->sr >>= ones_count;
+
+            for (; ones_count < (LIMIT_ONES + 1) && getbit (bs); ++ones_count);
+
+            if (ones_count == (LIMIT_ONES + 1))
+                break;
+
+            if (ones_count == LIMIT_ONES) {
+                uint32_t mask;
+                int cbits;
+
+                for (cbits = 0; cbits < 33 && getbit (bs); ++cbits);
+
+                if (cbits == 33)
+                    break;
+
+                if (cbits < 2)
+                    ones_count = cbits;
+                else {
+                    for (mask = 1, ones_count = 0; --cbits; mask <<= 1)
+                        if (getbit (bs))
+                            ones_count |= mask;
+
+                    ones_count |= mask;
+                }
+
+                ones_count += LIMIT_ONES;
+            }
+        }
+        else {
+            bs->bc -= ones_count + 1;
+            bs->sr >>= ones_count + 1;
+        }
+#elif defined (USE_NEXT8_OPTIMIZATION)
+        if (bs->bc < 8) {
+            if (++(bs->ptr) == bs->end)
+                bs->wrap (bs);
+
+            next8 = (bs->sr |= *(bs->ptr) << bs->bc) & 0xff;
+            bs->bc += sizeof (*(bs->ptr)) * 8;
+        }
+        else
+            next8 = bs->sr & 0xff;
+
+        if (next8 == 0xff) {
+            bs->bc -= 8;
+            bs->sr >>= 8;
+
+            for (ones_count = 8; ones_count < (LIMIT_ONES + 1) && getbit (bs); ++ones_count);
+
+            if (ones_count == (LIMIT_ONES + 1))
+                break;
+
+            if (ones_count == LIMIT_ONES) {
+                uint32_t mask;
+                int cbits;
+
+                for (cbits = 0; cbits < 33 && getbit (bs); ++cbits);
+
+                if (cbits == 33)
+                    break;
+
+                if (cbits < 2)
+                    ones_count = cbits;
+                else {
+                    for (mask = 1, ones_count = 0; --cbits; mask <<= 1)
+                        if (getbit (bs))
+                            ones_count |= mask;
+
+                    ones_count |= mask;
+                }
+
+                ones_count += LIMIT_ONES;
+            }
+        }
+        else {
+            bs->bc -= (ones_count = ones_count_table [next8]) + 1;
+            bs->sr >>= ones_count + 1;
+        }
+#else
+        for (ones_count = 0; ones_count < (LIMIT_ONES + 1) && getbit (bs); ++ones_count);
+
+        if (ones_count >= LIMIT_ONES) {
+            uint32_t mask;
+            int cbits;
+
+            if (ones_count == (LIMIT_ONES + 1))
+                break;
+
+            for (cbits = 0; cbits < 33 && getbit (bs); ++cbits);
+
+            if (cbits == 33)
+                break;
+
+            if (cbits < 2)
+                ones_count = cbits;
+            else {
+                for (mask = 1, ones_count = 0; --cbits; mask <<= 1)
+                    if (getbit (bs))
+                        ones_count |= mask;
+
+                ones_count |= mask;
+            }
+
+            ones_count += LIMIT_ONES;
+        }
+#endif
+
+        low = wps->w.holding_one;
+        wps->w.holding_one = ones_count & 1;
+        wps->w.holding_zero = ~ones_count & 1;
+        ones_count = (ones_count >> 1) + low;
+
+        if (ones_count == 0) {
+            low = 0;
+            high = GET_MED (0) - 1;
+            DEC_MED0 ();
+        }
+        else {
+            low = GET_MED (0);
+            INC_MED0 ();
+
+            if (ones_count == 1) {
+                high = low + GET_MED (1) - 1;
+                DEC_MED1 ();
+            }
+            else {
+                low += GET_MED (1);
+                INC_MED1 ();
+
+                if (ones_count == 2) {
+                    high = low + GET_MED (2) - 1;
+                    DEC_MED2 ();
+                }
+                else {
+                    low += (ones_count - 2) * GET_MED (2);
+                    high = low + GET_MED (2) - 1;
+                    INC_MED2 ();
+                }
+            }
+        }
+
+        low += read_code (bs, high - low);
+        buffer [csamples] = (getbit (bs)) ? ~low : low;
+    }
+
+    return (wps->wphdr.flags & MONO_DATA) ? csamples : (csamples / 2);
+}
+
+// Read a single unsigned value from the specified bitstream with a value
+// from 0 to maxcode. If there are exactly a power of two number of possible
+// codes then this will read a fixed number of bits; otherwise it reads the
+// minimum number of bits and then determines whether another bit is needed
+// to define the code.
+
+static uint32_t __inline read_code (Bitstream *bs, uint32_t maxcode)
+{
+    unsigned long local_sr;
+    uint32_t extras, code;
+    int bitcount;
+
+    if (maxcode < 2)
+        return maxcode ? getbit (bs) : 0;
+
+    bitcount = count_bits (maxcode);
+#ifdef USE_BITMASK_TABLES
+    extras = bitset [bitcount] - maxcode - 1;
+#else
+    extras = (1 << bitcount) - maxcode - 1;
+#endif
+
+    local_sr = bs->sr;
+
+    while (bs->bc < bitcount) {
+        if (++(bs->ptr) == bs->end)
+            bs->wrap (bs);
+
+        local_sr |= (long)*(bs->ptr) << bs->bc;
+        bs->bc += sizeof (*(bs->ptr)) * 8;
+    }
+
+#ifdef USE_BITMASK_TABLES
+    if ((code = local_sr & bitmask [bitcount - 1]) >= extras)
+#else
+    if ((code = local_sr & ((1 << (bitcount - 1)) - 1)) >= extras)
+#endif
+        code = (code << 1) - extras + ((local_sr >> (bitcount - 1)) & 1);
+    else
+        bitcount--;
+
+    if (sizeof (local_sr) < 8 && bs->bc > sizeof (local_sr) * 8) {
+        bs->bc -= bitcount;
+        bs->sr = *(bs->ptr) >> (sizeof (*(bs->ptr)) * 8 - bs->bc);
+    }
+    else {
+        bs->bc -= bitcount;
+        bs->sr = local_sr >> bitcount;
+    }
+
+    return code;
+}
--- a/third_party/wavpack/src/tag_utils.c
+++ b/third_party/wavpack/src/tag_utils.c
@ -0,0 +1,597 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// tag_utils.c
+
+// This module provides the high-level API for creating, reading and editing
+// APEv2 tags on WavPack files. Read-only support is also provided for ID3v1
+// tags, but their use is not recommended.
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "wavpack_local.h"
+
+#ifdef _WIN32
+#define stricmp(x,y) _stricmp(x,y)
+#else
+#define stricmp strcasecmp
+#endif
+
+static int get_ape_tag_item (M_Tag *m_tag, const char *item, char *value, int size, int type);
+static int get_id3_tag_item (M_Tag *m_tag, const char *item, char *value, int size);
+static int get_ape_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size, int type);
+static int get_id3_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size);
+static int append_ape_tag_item (WavpackContext *wpc, const char *item, const char *value, int vsize, int type);
+static int write_tag_blockout (WavpackContext *wpc);
+static int write_tag_reader (WavpackContext *wpc);
+static void tagcpy (char *dest, char *src, int tag_size);
+static int tagdata (char *src, int tag_size);
+
+//////////////////// Global functions part of external API /////////////////////////
+
+// Count and return the total number of tag items in the specified file.
+
+int WavpackGetNumTagItems (WavpackContext *wpc)
+{
+    int i = 0;
+
+    while (WavpackGetTagItemIndexed (wpc, i, NULL, 0))
+        ++i;
+
+    return i;
+}
+
+// Count and return the total number of binary tag items in the specified file. This applies
+// only to APEv2 tags and was implemented as a separate function to avoid breaking the old API.
+
+int WavpackGetNumBinaryTagItems (WavpackContext *wpc)
+{
+    int i = 0;
+
+    while (WavpackGetBinaryTagItemIndexed (wpc, i, NULL, 0))
+        ++i;
+
+    return i;
+}
+
+// Attempt to get the specified item from the specified file's ID3v1 or APEv2
+// tag. The "size" parameter specifies the amount of space available at "value",
+// if the desired item will not fit in this space then ellipses (...) will
+// be appended and the string terminated. Only text data are supported. The
+// actual length of the string is returned (or 0 if no matching value found).
+// Note that with APEv2 tags the length might not be the same as the number of
+// characters because UTF-8 encoding is used. Also, APEv2 tags can have multiple
+// (NULL separated) strings for a single value (this is why the length is
+// returned). If this function is called with a NULL "value" pointer (or a
+// zero "length") then only the actual length of the value data is returned
+// (not counting the terminating NULL). This can be used to determine the
+// actual memory to be allocated beforehand.
+
+int WavpackGetTagItem (WavpackContext *wpc, const char *item, char *value, int size)
+{
+    M_Tag *m_tag = &wpc->m_tag;
+
+    if (value && size)
+        *value = 0;
+
+    if (m_tag->ape_tag_hdr.ID [0] == 'A')
+        return get_ape_tag_item (m_tag, item, value, size, APE_TAG_TYPE_TEXT);
+    else if (m_tag->id3_tag.tag_id [0] == 'T')
+        return get_id3_tag_item (m_tag, item, value, size);
+    else
+        return 0;
+}
+
+// Attempt to get the specified binary item from the specified file's APEv2
+// tag. The "size" parameter specifies the amount of space available at "value".
+// If the desired item will not fit in this space then nothing will be copied
+// and 0 will be returned, otherwise the actual size will be returned. If this
+// function is called with a NULL "value" pointer (or a zero "length") then only
+// the actual length of the value data is returned and can be used to determine
+// the actual memory to be allocated beforehand.
+
+int WavpackGetBinaryTagItem (WavpackContext *wpc, const char *item, char *value, int size)
+{
+    M_Tag *m_tag = &wpc->m_tag;
+
+    if (value && size)
+        *value = 0;
+
+    if (m_tag->ape_tag_hdr.ID [0] == 'A')
+        return get_ape_tag_item (m_tag, item, value, size, APE_TAG_TYPE_BINARY);
+    else
+        return 0;
+}
+
+// This function looks up the tag item name by index and is used when the
+// application wants to access all the items in the file's ID3v1 or APEv2 tag.
+// Note that this function accesses only the item's name; WavpackGetTagItem()
+// still must be called to get the actual value. The "size" parameter specifies
+// the amount of space available at "item", if the desired item will not fit in
+// this space then ellipses (...) will be appended and the string terminated.
+// The actual length of the string is returned (or 0 if no item exists for
+// index). If this function is called with a NULL "value" pointer (or a
+// zero "length") then only the actual length of the item name is returned
+// (not counting the terminating NULL). This can be used to determine the
+// actual memory to be allocated beforehand. For binary tag values use the
+// otherwise identical WavpackGetBinaryTagItemIndexed ();
+
+int WavpackGetTagItemIndexed (WavpackContext *wpc, int index, char *item, int size)
+{
+    M_Tag *m_tag = &wpc->m_tag;
+
+    if (item && size)
+        *item = 0;
+
+    if (m_tag->ape_tag_hdr.ID [0] == 'A')
+        return get_ape_tag_item_indexed (m_tag, index, item, size, APE_TAG_TYPE_TEXT);
+    else if (m_tag->id3_tag.tag_id [0] == 'T')
+        return get_id3_tag_item_indexed (m_tag, index, item, size);
+    else
+        return 0;
+}
+
+int WavpackGetBinaryTagItemIndexed (WavpackContext *wpc, int index, char *item, int size)
+{
+    M_Tag *m_tag = &wpc->m_tag;
+
+    if (item && size)
+        *item = 0;
+
+    if (m_tag->ape_tag_hdr.ID [0] == 'A')
+        return get_ape_tag_item_indexed (m_tag, index, item, size, APE_TAG_TYPE_BINARY);
+    else
+        return 0;
+}
+
+// These two functions are used to append APEv2 tags to WavPack files; one is
+// for text values (UTF-8 encoded) and the other is for binary values. If no tag
+// has been started, then an empty one will be allocated first. When finished,
+// use WavpackWriteTag() to write the completed tag to the file. The purpose of
+// the passed size parameter is obvious for binary values, but might not be for
+// text values. Keep in mind that APEv2 text values can have multiple values
+// that are NULL separated, so the size is required to know the extent of the
+// value (although the final terminating NULL is not included in the passed
+// size). If the specified item already exists, it will be replaced with the
+// new value. ID3v1 tags are not supported.
+
+int WavpackAppendTagItem (WavpackContext *wpc, const char *item, const char *value, int vsize)
+{
+    while (WavpackDeleteTagItem (wpc, item));
+    return append_ape_tag_item (wpc, item, value, vsize, APE_TAG_TYPE_TEXT);
+}
+
+int WavpackAppendBinaryTagItem (WavpackContext *wpc, const char *item, const char *value, int vsize)
+{
+    while (WavpackDeleteTagItem (wpc, item));
+    return append_ape_tag_item (wpc, item, value, vsize, APE_TAG_TYPE_BINARY);
+}
+
+// Delete the specified tag item from the APEv2 tag on the specified WavPack file
+// (fields cannot be deleted from ID3v1 tags). A return value of TRUE indicates
+// that the item was found and successfully deleted.
+
+int WavpackDeleteTagItem (WavpackContext *wpc, const char *item)
+{
+    M_Tag *m_tag = &wpc->m_tag;
+
+    if (m_tag->ape_tag_hdr.ID [0] == 'A') {
+        unsigned char *p = m_tag->ape_tag_data;
+        unsigned char *q = p + m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr);
+        int i;
+
+        for (i = 0; i < m_tag->ape_tag_hdr.item_count; ++i) {
+            int vsize, isize;
+
+            vsize = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 8;   // skip flags because we don't need them
+            for (isize = 0; p[isize] && p + isize < q; ++isize);
+
+            if (vsize < 0 || vsize > m_tag->ape_tag_hdr.length || p + isize + vsize + 1 > q)
+                break;
+
+            if (isize && vsize && !stricmp (item, (char *) p)) {
+                unsigned char *d = p - 8;
+
+                p += isize + vsize + 1;
+
+                while (p < q)
+                    *d++ = *p++;
+
+                m_tag->ape_tag_hdr.length = (int32_t)(d - m_tag->ape_tag_data) + sizeof (APE_Tag_Hdr);
+                m_tag->ape_tag_hdr.item_count--;
+                return 1;
+            }
+            else
+                p += isize + vsize + 1;
+        }
+    }
+
+    return 0;
+}
+
+// Once a APEv2 tag has been created with WavpackAppendTag(), this function is
+// used to write the completed tag to the end of the WavPack file. Note that
+// this function uses the same "blockout" function that is used to write
+// regular WavPack blocks, although that's where the similarity ends. It is also
+// used to write tags that have been edited on existing files.
+
+int WavpackWriteTag (WavpackContext *wpc)
+{
+    if (wpc->blockout)      // this is the case for creating fresh WavPack files
+        return write_tag_blockout (wpc);
+    else                    // otherwise we are editing existing tags (OPEN_EDIT_TAGS)
+        return write_tag_reader (wpc);
+}
+
+////////////////////////// local static functions /////////////////////////////
+
+static int get_ape_tag_item (M_Tag *m_tag, const char *item, char *value, int size, int type)
+{
+    unsigned char *p = m_tag->ape_tag_data;
+    unsigned char *q = p + m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr);
+    int i;
+
+    for (i = 0; i < m_tag->ape_tag_hdr.item_count && q - p > 8; ++i) {
+        int vsize, flags, isize;
+
+        vsize = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4;
+        flags = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4;
+        for (isize = 0; p[isize] && p + isize < q; ++isize);
+
+        if (vsize < 0 || vsize > m_tag->ape_tag_hdr.length || p + isize + vsize + 1 > q)
+            break;
+
+        if (isize && vsize && !stricmp (item, (char *) p) && ((flags & 6) >> 1) == type) {
+
+            if (!value || !size)
+                return vsize;
+
+            if (type == APE_TAG_TYPE_BINARY) {
+                if (vsize <= size) {
+                    memcpy (value, p + isize + 1, vsize);
+                    return vsize;
+                }
+                else
+                    return 0;
+            }
+            else if (vsize < size) {
+                memcpy (value, p + isize + 1, vsize);
+                value [vsize] = 0;
+                return vsize;
+            }
+            else if (size >= 4) {
+                memcpy (value, p + isize + 1, size - 1);
+                value [size - 4] = value [size - 3] = value [size - 2] = '.';
+                value [size - 1] = 0;
+                return size - 1;
+            }
+            else
+                return 0;
+        }
+        else
+            p += isize + vsize + 1;
+    }
+
+    return 0;
+}
+
+static int get_id3_tag_item (M_Tag *m_tag, const char *item, char *value, int size)
+{
+    char lvalue [64];
+    int len;
+
+    lvalue [0] = 0;
+
+    if (!stricmp (item, "title"))
+        tagcpy (lvalue, m_tag->id3_tag.title, sizeof (m_tag->id3_tag.title));
+    else if (!stricmp (item, "artist"))
+        tagcpy (lvalue, m_tag->id3_tag.artist, sizeof (m_tag->id3_tag.artist));
+    else if (!stricmp (item, "album"))
+        tagcpy (lvalue, m_tag->id3_tag.album, sizeof (m_tag->id3_tag.album));
+    else if (!stricmp (item, "year"))
+        tagcpy (lvalue, m_tag->id3_tag.year, sizeof (m_tag->id3_tag.year));
+    else if (!stricmp (item, "comment"))
+        tagcpy (lvalue, m_tag->id3_tag.comment, sizeof (m_tag->id3_tag.comment));
+    else if (!stricmp (item, "track") && m_tag->id3_tag.comment [29] && !m_tag->id3_tag.comment [28])
+        sprintf (lvalue, "%d", m_tag->id3_tag.comment [29]);
+    else
+        return 0;
+
+    len = (int) strlen (lvalue);
+
+    if (!value || !size)
+        return len;
+
+    if (len < size) {
+        strcpy (value, lvalue);
+        return len;
+    }
+    else if (size >= 4) {
+        strncpy (value, lvalue, size - 1);
+        value [size - 4] = value [size - 3] = value [size - 2] = '.';
+        value [size - 1] = 0;
+        return size - 1;
+    }
+    else
+        return 0;
+}
+
+static int get_ape_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size, int type)
+{
+    unsigned char *p = m_tag->ape_tag_data;
+    unsigned char *q = p + m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr);
+    int i;
+
+    for (i = 0; i < m_tag->ape_tag_hdr.item_count && index >= 0 && q - p > 8; ++i) {
+        int vsize, flags, isize;
+
+        vsize = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4;
+        flags = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4;
+        for (isize = 0; p[isize] && p + isize < q; ++isize);
+
+        if (vsize < 0 || vsize > m_tag->ape_tag_hdr.length || p + isize + vsize + 1 > q)
+            break;
+
+        if (isize && vsize && ((flags & 6) >> 1) == type && !index--) {
+
+            if (!item || !size)
+                return isize;
+
+            if (isize < size) {
+                memcpy (item, p, isize);
+                item [isize] = 0;
+                return isize;
+            }
+            else if (size >= 4) {
+                memcpy (item, p, size - 1);
+                item [size - 4] = item [size - 3] = item [size - 2] = '.';
+                item [size - 1] = 0;
+                return size - 1;
+            }
+            else
+                return 0;
+        }
+        else
+            p += isize + vsize + 1;
+    }
+
+    return 0;
+}
+
+static int get_id3_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size)
+{
+    char lvalue [16];
+    int len;
+
+    lvalue [0] = 0;
+
+    if (tagdata (m_tag->id3_tag.title, sizeof (m_tag->id3_tag.title)) && !index--)
+        strcpy (lvalue, "Title");
+    else if (tagdata (m_tag->id3_tag.artist, sizeof (m_tag->id3_tag.artist)) && !index--)
+        strcpy (lvalue, "Artist");
+    else if (tagdata (m_tag->id3_tag.album, sizeof (m_tag->id3_tag.album)) && !index--)
+        strcpy (lvalue, "Album");
+    else if (tagdata (m_tag->id3_tag.year, sizeof (m_tag->id3_tag.year)) && !index--)
+        strcpy (lvalue, "Year");
+    else if (tagdata (m_tag->id3_tag.comment, sizeof (m_tag->id3_tag.comment)) && !index--)
+        strcpy (lvalue, "Comment");
+    else if (m_tag->id3_tag.comment [29] && !m_tag->id3_tag.comment [28] && !index--)
+        strcpy (lvalue, "Track");
+    else
+        return 0;
+
+    len = (int) strlen (lvalue);
+
+    if (!item || !size)
+        return len;
+
+    if (len < size) {
+        strcpy (item, lvalue);
+        return len;
+    }
+    else if (size >= 4) {
+        strncpy (item, lvalue, size - 1);
+        item [size - 4] = item [size - 3] = item [size - 2] = '.';
+        item [size - 1] = 0;
+        return size - 1;
+    }
+    else
+        return 0;
+}
+
+static int append_ape_tag_item (WavpackContext *wpc, const char *item, const char *value, int vsize, int type)
+{
+    M_Tag *m_tag = &wpc->m_tag;
+    int isize = (int) strlen (item);
+
+    if (!m_tag->ape_tag_hdr.ID [0]) {
+        strncpy (m_tag->ape_tag_hdr.ID, "APETAGEX", sizeof (m_tag->ape_tag_hdr.ID));
+        m_tag->ape_tag_hdr.version = 2000;
+        m_tag->ape_tag_hdr.length = sizeof (m_tag->ape_tag_hdr);
+        m_tag->ape_tag_hdr.item_count = 0;
+        m_tag->ape_tag_hdr.flags = APE_TAG_CONTAINS_HEADER;  // we will include header on tags we originate
+    }
+
+    if (m_tag->ape_tag_hdr.ID [0] == 'A') {
+        int new_item_len = vsize + isize + 9, flags = type << 1;
+        unsigned char *p;
+
+        if (m_tag->ape_tag_hdr.length + new_item_len > APE_TAG_MAX_LENGTH) {
+            strcpy (wpc->error_message, "APEv2 tag exceeds maximum allowed length!");
+            return FALSE;
+        }
+
+        m_tag->ape_tag_hdr.item_count++;
+        m_tag->ape_tag_hdr.length += new_item_len;
+        p = m_tag->ape_tag_data = (unsigned char*)realloc (m_tag->ape_tag_data, m_tag->ape_tag_hdr.length);
+        p += m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr) - new_item_len;
+
+        *p++ = (unsigned char) vsize;
+        *p++ = (unsigned char) (vsize >> 8);
+        *p++ = (unsigned char) (vsize >> 16);
+        *p++ = (unsigned char) (vsize >> 24);
+
+        *p++ = (unsigned char) flags;
+        *p++ = (unsigned char) (flags >> 8);
+        *p++ = (unsigned char) (flags >> 16);
+        *p++ = (unsigned char) (flags >> 24);
+
+        strcpy ((char *) p, item);
+        p += isize + 1;
+        memcpy (p, value, vsize);
+
+        return TRUE;
+    }
+    else
+        return FALSE;
+}
+
+// Append the stored APEv2 tag to the file being created using the "blockout" function callback.
+
+static int write_tag_blockout (WavpackContext *wpc)
+{
+    M_Tag *m_tag = &wpc->m_tag;
+    int result = TRUE;
+
+    if (m_tag->ape_tag_hdr.ID [0] == 'A' && m_tag->ape_tag_hdr.item_count) {
+
+        // only write header if it's specified in the flags
+
+        if (m_tag->ape_tag_hdr.flags & APE_TAG_CONTAINS_HEADER) {
+            m_tag->ape_tag_hdr.flags |= APE_TAG_THIS_IS_HEADER;
+            WavpackNativeToLittleEndian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
+            result = wpc->blockout (wpc->wv_out, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr));
+            WavpackLittleEndianToNative (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
+        }
+
+        if (m_tag->ape_tag_hdr.length > sizeof (m_tag->ape_tag_hdr))
+            result = wpc->blockout (wpc->wv_out, m_tag->ape_tag_data, m_tag->ape_tag_hdr.length - sizeof (m_tag->ape_tag_hdr));
+
+        m_tag->ape_tag_hdr.flags &= ~APE_TAG_THIS_IS_HEADER;    // this is NOT header
+        WavpackNativeToLittleEndian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
+        result = wpc->blockout (wpc->wv_out, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr));
+        WavpackLittleEndianToNative (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
+    }
+
+    if (!result)
+        strcpy (wpc->error_message, "can't write WavPack data, disk probably full!");
+
+    return result;
+}
+
+// Write the [potentially] edited tag to the existing WavPack file using the reader callback functions.
+
+static int write_tag_reader (WavpackContext *wpc)
+{
+    M_Tag *m_tag = &wpc->m_tag;
+    int32_t tag_size = 0;
+    int result;
+
+    // before we write an edited (or new) tag into an existing file, make sure it's safe and possible
+
+    if (m_tag->tag_begins_file) {
+        strcpy (wpc->error_message, "can't edit tags located at the beginning of files!");
+        return FALSE;
+    }
+
+    if (!wpc->reader->can_seek (wpc->wv_in)) {
+        strcpy (wpc->error_message, "can't edit tags on pipes or unseekable files!");
+        return FALSE;
+    }
+
+    if (!(wpc->open_flags & OPEN_EDIT_TAGS)) {
+        strcpy (wpc->error_message, "can't edit tags without OPEN_EDIT_TAGS flag!");
+        return FALSE;
+    }
+
+    if (m_tag->ape_tag_hdr.ID [0] == 'A' && m_tag->ape_tag_hdr.item_count &&
+        m_tag->ape_tag_hdr.length > sizeof (m_tag->ape_tag_hdr))
+            tag_size = m_tag->ape_tag_hdr.length;
+
+    // only write header if it's specified in the flags
+
+    if (tag_size && (m_tag->ape_tag_hdr.flags & APE_TAG_CONTAINS_HEADER))
+        tag_size += sizeof (m_tag->ape_tag_hdr);
+
+    result = !wpc->reader->set_pos_rel (wpc->wv_in, m_tag->tag_file_pos, SEEK_END);
+
+    if (result && tag_size < -m_tag->tag_file_pos && !wpc->reader->truncate_here) {
+        int nullcnt = (int) (-m_tag->tag_file_pos - tag_size);
+        char zero [1] = { 0 };
+
+        while (nullcnt--)
+            wpc->reader->write_bytes (wpc->wv_in, &zero, 1);
+    }
+
+    if (result && tag_size) {
+        if (m_tag->ape_tag_hdr.flags & APE_TAG_CONTAINS_HEADER) {
+            m_tag->ape_tag_hdr.flags |= APE_TAG_THIS_IS_HEADER;
+            WavpackNativeToLittleEndian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
+            result = (wpc->reader->write_bytes (wpc->wv_in, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr)) == sizeof (m_tag->ape_tag_hdr));
+            WavpackLittleEndianToNative (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
+        }
+
+        result = (wpc->reader->write_bytes (wpc->wv_in, m_tag->ape_tag_data, m_tag->ape_tag_hdr.length - sizeof (m_tag->ape_tag_hdr)) == sizeof (m_tag->ape_tag_hdr));
+        m_tag->ape_tag_hdr.flags &= ~APE_TAG_THIS_IS_HEADER;    // this is NOT header
+        WavpackNativeToLittleEndian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
+        result = (wpc->reader->write_bytes (wpc->wv_in, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr)) == sizeof (m_tag->ape_tag_hdr));
+        WavpackLittleEndianToNative (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
+    }
+
+    if (result && tag_size < -m_tag->tag_file_pos && wpc->reader->truncate_here)
+        result = !wpc->reader->truncate_here (wpc->wv_in);
+
+    if (!result)
+        strcpy (wpc->error_message, "can't write WavPack data, disk probably full!");
+
+    return result;
+}
+
+// Copy the specified ID3v1 tag value (with specified field size) from the
+// source pointer to the destination, eliminating leading spaces and trailing
+// spaces and nulls.
+
+static void tagcpy (char *dest, char *src, int tag_size)
+{
+    char *s1 = src, *s2 = src + tag_size - 1;
+
+    if (*s2 && !s2 [-1])
+        s2--;
+
+    while (s1 <= s2)
+        if (*s1 == ' ')
+            ++s1;
+        else if (!*s2 || *s2 == ' ')
+            --s2;
+        else
+            break;
+
+    while (*s1 && s1 <= s2)
+        *dest++ = *s1++;
+
+    *dest = 0;
+}
+
+static int tagdata (char *src, int tag_size)
+{
+    char *s1 = src, *s2 = src + tag_size - 1;
+
+    if (*s2 && !s2 [-1])
+        s2--;
+
+    while (s1 <= s2)
+        if (*s1 == ' ')
+            ++s1;
+        else if (!*s2 || *s2 == ' ')
+            --s2;
+        else
+            break;
+
+    return (*s1 && s1 <= s2);
+}
--- a/third_party/wavpack/src/tags.c
+++ b/third_party/wavpack/src/tags.c
@ -1,247 +1,23 @@
 ////////////////////////////////////////////////////////////////////////////
 //                           **** WAVPACK ****                            //
 //                  Hybrid Lossless Wavefile Compressor                   //
-//              Copyright (c) 1998 - 2009 Conifer Software.               //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
 //                          All Rights Reserved.                          //
 //      Distributed under the BSD Software License (see license.txt)      //
 ////////////////////////////////////////////////////////////////////////////

 // tags.c

-// This module provides support for reading and writing metadata tags.
+// This module provides support for reading metadata tags (either ID3v1 or
+// APEv2) from WavPack files. No actual creation or manipulation of the tags
+// is done in this module; this is just internal code to load the tags into
+// memory. The high-level API functions are in the tag_utils.c module.

 #include <stdlib.h>
 #include <string.h>

 #include "wavpack_local.h"

-#ifdef WIN32
-#define stricmp(x,y) _stricmp(x,y)
-#define fileno _fileno
-#else
-#define stricmp strcasecmp
-#endif
-
-#ifdef DEBUG_ALLOC
-#define malloc malloc_db
-#define realloc realloc_db
-#define free free_db
-void *malloc_db (uint32_t size);
-void *realloc_db (void *ptr, uint32_t size);
-void free_db (void *ptr);
-int32_t dump_alloc (void);
-#endif
-
-#ifndef NO_TAGS
-
-static int get_ape_tag_item (M_Tag *m_tag, const char *item, char *value, int size, int type);
-static int get_id3_tag_item (M_Tag *m_tag, const char *item, char *value, int size);
-static int get_ape_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size, int type);
-static int get_id3_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size);
-static int append_ape_tag_item (WavpackContext *wpc, const char *item, const char *value, int vsize, int type);
-static int write_tag_blockout (WavpackContext *wpc);
-static int write_tag_reader (WavpackContext *wpc);
-static void tagcpy (char *dest, char *src, int tag_size);
-static int tagdata (char *src, int tag_size);
-
-//////////////////// Global functions part of external API /////////////////////////
-
-// Count and return the total number of tag items in the specified file.
-
-int WavpackGetNumTagItems (WavpackContext *wpc)
-{
-    int i = 0;
-
-    while (WavpackGetTagItemIndexed (wpc, i, NULL, 0))
-        ++i;
-
-    return i;
-}
-
-// Count and return the total number of binary tag items in the specified file. This applies
-// only to APEv2 tags and was implemented as a separate function to avoid breaking the old API.
-
-int WavpackGetNumBinaryTagItems (WavpackContext *wpc)
-{
-    int i = 0;
-
-    while (WavpackGetBinaryTagItemIndexed (wpc, i, NULL, 0))
-        ++i;
-
-    return i;
-}
-
-// Attempt to get the specified item from the specified file's ID3v1 or APEv2
-// tag. The "size" parameter specifies the amount of space available at "value",
-// if the desired item will not fit in this space then ellipses (...) will
-// be appended and the string terminated. Only text data are supported. The
-// actual length of the string is returned (or 0 if no matching value found).
-// Note that with APEv2 tags the length might not be the same as the number of
-// characters because UTF-8 encoding is used. Also, APEv2 tags can have multiple
-// (NULL separated) strings for a single value (this is why the length is
-// returned). If this function is called with a NULL "value" pointer (or a
-// zero "length") then only the actual length of the value data is returned
-// (not counting the terminating NULL). This can be used to determine the
-// actual memory to be allocated beforehand.
-
-int WavpackGetTagItem (WavpackContext *wpc, const char *item, char *value, int size)
-{
-    M_Tag *m_tag = &wpc->m_tag;
-
-    if (value && size)
-        *value = 0;
-
-    if (m_tag->ape_tag_hdr.ID [0] == 'A')
-        return get_ape_tag_item (m_tag, item, value, size, APE_TAG_TYPE_TEXT);
-    else if (m_tag->id3_tag.tag_id [0] == 'T')
-        return get_id3_tag_item (m_tag, item, value, size);
-    else
-        return 0;
-}
-
-// Attempt to get the specified binary item from the specified file's APEv2
-// tag. The "size" parameter specifies the amount of space available at "value".
-// If the desired item will not fit in this space then nothing will be copied
-// and 0 will be returned, otherwise the actual size will be returned. If this
-// function is called with a NULL "value" pointer (or a zero "length") then only
-// the actual length of the value data is returned and can be used to determine
-// the actual memory to be allocated beforehand.
-
-int WavpackGetBinaryTagItem (WavpackContext *wpc, const char *item, char *value, int size)
-{
-    M_Tag *m_tag = &wpc->m_tag;
-
-    if (value && size)
-        *value = 0;
-
-    if (m_tag->ape_tag_hdr.ID [0] == 'A')
-        return get_ape_tag_item (m_tag, item, value, size, APE_TAG_TYPE_BINARY);
-    else
-        return 0;
-}
-
-// This function looks up the tag item name by index and is used when the
-// application wants to access all the items in the file's ID3v1 or APEv2 tag.
-// Note that this function accesses only the item's name; WavpackGetTagItem()
-// still must be called to get the actual value. The "size" parameter specifies
-// the amount of space available at "item", if the desired item will not fit in
-// this space then ellipses (...) will be appended and the string terminated.
-// The actual length of the string is returned (or 0 if no item exists for
-// index). If this function is called with a NULL "value" pointer (or a
-// zero "length") then only the actual length of the item name is returned
-// (not counting the terminating NULL). This can be used to determine the
-// actual memory to be allocated beforehand. For binary tag values use the
-// otherwise identical WavpackGetBinaryTagItemIndexed ();
-
-int WavpackGetTagItemIndexed (WavpackContext *wpc, int index, char *item, int size)
-{
-    M_Tag *m_tag = &wpc->m_tag;
-
-    if (item && size)
-        *item = 0;
-
-    if (m_tag->ape_tag_hdr.ID [0] == 'A')
-        return get_ape_tag_item_indexed (m_tag, index, item, size, APE_TAG_TYPE_TEXT);
-    else if (m_tag->id3_tag.tag_id [0] == 'T')
-        return get_id3_tag_item_indexed (m_tag, index, item, size);
-    else
-        return 0;
-}
-
-int WavpackGetBinaryTagItemIndexed (WavpackContext *wpc, int index, char *item, int size)
-{
-    M_Tag *m_tag = &wpc->m_tag;
-
-    if (item && size)
-        *item = 0;
-
-    if (m_tag->ape_tag_hdr.ID [0] == 'A')
-        return get_ape_tag_item_indexed (m_tag, index, item, size, APE_TAG_TYPE_BINARY);
-    else
-        return 0;
-}
-
-// These two functions are used to append APEv2 tags to WavPack files; one is
-// for text values (UTF-8 encoded) and the other is for binary values. If no tag
-// has been started, then an empty one will be allocated first. When finished,
-// use WavpackWriteTag() to write the completed tag to the file. The purpose of
-// the passed size parameter is obvious for binary values, but might not be for
-// text values. Keep in mind that APEv2 text values can have multiple values
-// that are NULL separated, so the size is required to know the extent of the
-// value (although the final terminating NULL is not included in the passed
-// size). If the specified item already exists, it will be replaced with the
-// new value. ID3v1 tags are not supported.
-
-int WavpackAppendTagItem (WavpackContext *wpc, const char *item, const char *value, int vsize)
-{
-    while (WavpackDeleteTagItem (wpc, item));
-    return append_ape_tag_item (wpc, item, value, vsize, APE_TAG_TYPE_TEXT);
-}
-
-int WavpackAppendBinaryTagItem (WavpackContext *wpc, const char *item, const char *value, int vsize)
-{
-    while (WavpackDeleteTagItem (wpc, item));
-    return append_ape_tag_item (wpc, item, value, vsize, APE_TAG_TYPE_BINARY);
-}
-
-// Delete the specified tag item from the APEv2 tag on the specified WavPack file
-// (fields cannot be deleted from ID3v1 tags). A return value of TRUE indicates
-// that the item was found and successfully deleted.
-
-int WavpackDeleteTagItem (WavpackContext *wpc, const char *item)
-{
-    M_Tag *m_tag = &wpc->m_tag;
-
-    if (m_tag->ape_tag_hdr.ID [0] == 'A') {
-        unsigned char *p = m_tag->ape_tag_data;
-        unsigned char *q = p + m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr);
-        int i;
-
-        for (i = 0; i < m_tag->ape_tag_hdr.item_count; ++i) {
-            int vsize, isize;
-
-            vsize = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 8;   // skip flags because we don't need them
-            for (isize = 0; p[isize] && p + isize < q; ++isize);
-
-            if (vsize < 0 || vsize > m_tag->ape_tag_hdr.length || p + isize + vsize + 1 > q)
-                break;
-
-            if (isize && vsize && !stricmp (item, (char *) p)) {
-                unsigned char *d = p - 8;
-
-                p += isize + vsize + 1;
-
-                while (p < q)
-                    *d++ = *p++;
-
-                m_tag->ape_tag_hdr.length = (int32_t)(d - m_tag->ape_tag_data) + sizeof (APE_Tag_Hdr);
-                m_tag->ape_tag_hdr.item_count--;
-                return 1;
-            }
-            else
-                p += isize + vsize + 1;
-        }
-    }
-
-    return 0;
-}
-
-// Once a APEv2 tag has been created with WavpackAppendTag(), this function is
-// used to write the completed tag to the end of the WavPack file. Note that
-// this function uses the same "blockout" function that is used to write
-// regular WavPack blocks, although that's where the similarity ends. It is also
-// used to write tags that have been edited on existing files.
-
-int WavpackWriteTag (WavpackContext *wpc)
-{
-    if (wpc->blockout)      // this is the case for creating fresh WavPack files
-        return write_tag_blockout (wpc);
-    else                    // otherwise we are editing existing tags (OPEN_EDIT_TAGS)
-        return write_tag_reader (wpc);
-}
-
-//////// Utility functions provided to other modules (but not part of lib API) /////////
-
 // This function attempts to load an ID3v1 or APEv2 tag from the specified
 // file into the specified M_Tag structure. The ID3 tag fits in completely,
 // but an APEv2 tag is variable length and so space must be allocated here
@ -278,12 +54,12 @@ int load_tag (WavpackContext *wpc)
        if (wpc->reader->read_bytes (wpc->wv_in, &m_tag->ape_tag_hdr, sizeof (APE_Tag_Hdr)) == sizeof (APE_Tag_Hdr) &&
            !strncmp (m_tag->ape_tag_hdr.ID, "APETAGEX", 8)) {

-                little_endian_to_native (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
+                WavpackLittleEndianToNative (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);

                if (m_tag->ape_tag_hdr.version == 2000 && m_tag->ape_tag_hdr.item_count &&
                    m_tag->ape_tag_hdr.length > sizeof (m_tag->ape_tag_hdr) &&
                    m_tag->ape_tag_hdr.length <= APE_TAG_MAX_LENGTH &&
-                    (m_tag->ape_tag_data = malloc (m_tag->ape_tag_hdr.length)) != NULL) {
+                    (m_tag->ape_tag_data = (unsigned char *)malloc (m_tag->ape_tag_hdr.length)) != NULL) {

                        ape_tag_items = m_tag->ape_tag_hdr.item_count;
                        ape_tag_length = m_tag->ape_tag_hdr.length;
@ -315,7 +91,7 @@ int load_tag (WavpackContext *wpc)
                                        return FALSE;       // something's wrong...
                                }

-                                little_endian_to_native (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
+                                WavpackLittleEndianToNative (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);

                                if (m_tag->ape_tag_hdr.version != 2000 || m_tag->ape_tag_hdr.item_count != ape_tag_items ||
                                    m_tag->ape_tag_hdr.length != ape_tag_length) {
@ -401,366 +177,3 @@ void free_tag (M_Tag *m_tag)
        m_tag->ape_tag_data = NULL;
    }
 }
-
-////////////////////////// local static functions /////////////////////////////
-
-static int get_ape_tag_item (M_Tag *m_tag, const char *item, char *value, int size, int type)
-{
-    unsigned char *p = m_tag->ape_tag_data;
-    unsigned char *q = p + m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr);
-    int i;
-
-    for (i = 0; i < m_tag->ape_tag_hdr.item_count && q - p > 8; ++i) {
-        int vsize, flags, isize;
-
-        vsize = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4;
-        flags = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4;
-        for (isize = 0; p[isize] && p + isize < q; ++isize);
-
-        if (vsize < 0 || vsize > m_tag->ape_tag_hdr.length || p + isize + vsize + 1 > q)
-            break;
-
-        if (isize && vsize && !stricmp (item, (char *) p) && ((flags & 6) >> 1) == type) {
-
-            if (!value || !size)
-                return vsize;
-
-            if (type == APE_TAG_TYPE_BINARY) {
-                if (vsize <= size) {
-                    memcpy (value, p + isize + 1, vsize);
-                    return vsize;
-                }
-                else
-                    return 0;
-            }
-            else if (vsize < size) {
-                memcpy (value, p + isize + 1, vsize);
-                value [vsize] = 0;
-                return vsize;
-            }
-            else if (size >= 4) {
-                memcpy (value, p + isize + 1, size - 1);
-                value [size - 4] = value [size - 3] = value [size - 2] = '.';
-                value [size - 1] = 0;
-                return size - 1;
-            }
-            else
-                return 0;
-        }
-        else
-            p += isize + vsize + 1;
-    }
-
-    return 0;
-}
-
-static int get_id3_tag_item (M_Tag *m_tag, const char *item, char *value, int size)
-{
-    char lvalue [64];
-    int len;
-
-    lvalue [0] = 0;
-
-    if (!stricmp (item, "title"))
-        tagcpy (lvalue, m_tag->id3_tag.title, sizeof (m_tag->id3_tag.title));
-    else if (!stricmp (item, "artist"))
-        tagcpy (lvalue, m_tag->id3_tag.artist, sizeof (m_tag->id3_tag.artist));
-    else if (!stricmp (item, "album"))
-        tagcpy (lvalue, m_tag->id3_tag.album, sizeof (m_tag->id3_tag.album));
-    else if (!stricmp (item, "year"))
-        tagcpy (lvalue, m_tag->id3_tag.year, sizeof (m_tag->id3_tag.year));
-    else if (!stricmp (item, "comment"))
-        tagcpy (lvalue, m_tag->id3_tag.comment, sizeof (m_tag->id3_tag.comment));
-    else if (!stricmp (item, "track") && m_tag->id3_tag.comment [29] && !m_tag->id3_tag.comment [28])
-        sprintf (lvalue, "%d", m_tag->id3_tag.comment [29]);
-    else
-        return 0;
-
-    len = (int) strlen (lvalue);
-
-    if (!value || !size)
-        return len;
-
-    if (len < size) {
-        strcpy (value, lvalue);
-        return len;
-    }
-    else if (size >= 4) {
-        strncpy (value, lvalue, size - 1);
-        value [size - 4] = value [size - 3] = value [size - 2] = '.';
-        value [size - 1] = 0;
-        return size - 1;
-    }
-    else
-        return 0;
-}
-
-static int get_ape_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size, int type)
-{
-    unsigned char *p = m_tag->ape_tag_data;
-    unsigned char *q = p + m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr);
-    int i;
-
-    for (i = 0; i < m_tag->ape_tag_hdr.item_count && index >= 0 && q - p > 8; ++i) {
-        int vsize, flags, isize;
-
-        vsize = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4;
-        flags = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4;
-        for (isize = 0; p[isize] && p + isize < q; ++isize);
-
-        if (vsize < 0 || vsize > m_tag->ape_tag_hdr.length || p + isize + vsize + 1 > q)
-            break;
-
-        if (isize && vsize && ((flags & 6) >> 1) == type && !index--) {
-
-            if (!item || !size)
-                return isize;
-
-            if (isize < size) {
-                memcpy (item, p, isize);
-                item [isize] = 0;
-                return isize;
-            }
-            else if (size >= 4) {
-                memcpy (item, p, size - 1);
-                item [size - 4] = item [size - 3] = item [size - 2] = '.';
-                item [size - 1] = 0;
-                return size - 1;
-            }
-            else
-                return 0;
-        }
-        else
-            p += isize + vsize + 1;
-    }
-
-    return 0;
-}
-
-static int get_id3_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size)
-{
-    char lvalue [16];
-    int len;
-
-    lvalue [0] = 0;
-
-    if (tagdata (m_tag->id3_tag.title, sizeof (m_tag->id3_tag.title)) && !index--)
-        strcpy (lvalue, "Title");
-    else if (tagdata (m_tag->id3_tag.artist, sizeof (m_tag->id3_tag.artist)) && !index--)
-        strcpy (lvalue, "Artist");
-    else if (tagdata (m_tag->id3_tag.album, sizeof (m_tag->id3_tag.album)) && !index--)
-        strcpy (lvalue, "Album");
-    else if (tagdata (m_tag->id3_tag.year, sizeof (m_tag->id3_tag.year)) && !index--)
-        strcpy (lvalue, "Year");
-    else if (tagdata (m_tag->id3_tag.comment, sizeof (m_tag->id3_tag.comment)) && !index--)
-        strcpy (lvalue, "Comment");
-    else if (m_tag->id3_tag.comment [29] && !m_tag->id3_tag.comment [28] && !index--)
-        strcpy (lvalue, "Track");
-    else
-        return 0;
-
-    len = (int) strlen (lvalue);
-
-    if (!item || !size)
-        return len;
-
-    if (len < size) {
-        strcpy (item, lvalue);
-        return len;
-    }
-    else if (size >= 4) {
-        strncpy (item, lvalue, size - 1);
-        item [size - 4] = item [size - 3] = item [size - 2] = '.';
-        item [size - 1] = 0;
-        return size - 1;
-    }
-    else
-        return 0;
-}
-
-static int append_ape_tag_item (WavpackContext *wpc, const char *item, const char *value, int vsize, int type)
-{
-    M_Tag *m_tag = &wpc->m_tag;
-    int isize = (int) strlen (item);
-
-    if (!m_tag->ape_tag_hdr.ID [0]) {
-        strncpy (m_tag->ape_tag_hdr.ID, "APETAGEX", sizeof (m_tag->ape_tag_hdr.ID));
-        m_tag->ape_tag_hdr.version = 2000;
-        m_tag->ape_tag_hdr.length = sizeof (m_tag->ape_tag_hdr);
-        m_tag->ape_tag_hdr.item_count = 0;
-        m_tag->ape_tag_hdr.flags = APE_TAG_CONTAINS_HEADER;  // we will include header on tags we originate
-    }
-
-    if (m_tag->ape_tag_hdr.ID [0] == 'A') {
-        int new_item_len = vsize + isize + 9, flags = type << 1;
-        unsigned char *p;
-
-        if (m_tag->ape_tag_hdr.length + new_item_len > APE_TAG_MAX_LENGTH) {
-            strcpy (wpc->error_message, "APEv2 tag exceeds maximum allowed length!");
-            return FALSE;
-        }
-
-        m_tag->ape_tag_hdr.item_count++;
-        m_tag->ape_tag_hdr.length += new_item_len;
-        p = m_tag->ape_tag_data = realloc (m_tag->ape_tag_data, m_tag->ape_tag_hdr.length);
-        p += m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr) - new_item_len;
-
-        *p++ = (unsigned char) vsize;
-        *p++ = (unsigned char) (vsize >> 8);
-        *p++ = (unsigned char) (vsize >> 16);
-        *p++ = (unsigned char) (vsize >> 24);
-
-        *p++ = (unsigned char) flags;
-        *p++ = (unsigned char) (flags >> 8);
-        *p++ = (unsigned char) (flags >> 16);
-        *p++ = (unsigned char) (flags >> 24);
-
-        strcpy ((char *) p, item);
-        p += isize + 1;
-        memcpy (p, value, vsize);
-
-        return TRUE;
-    }
-    else
-        return FALSE;
-}
-
-static int write_tag_blockout (WavpackContext *wpc)
-{
-    M_Tag *m_tag = &wpc->m_tag;
-    int result = TRUE;
-
-    if (m_tag->ape_tag_hdr.ID [0] == 'A' && m_tag->ape_tag_hdr.item_count) {
-
-        // only write header if it's specified in the flags
-
-        if (m_tag->ape_tag_hdr.flags & APE_TAG_CONTAINS_HEADER) {
-            m_tag->ape_tag_hdr.flags |= APE_TAG_THIS_IS_HEADER;
-            native_to_little_endian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
-            result = wpc->blockout (wpc->wv_out, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr));
-            little_endian_to_native (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
-        }
-
-        if (m_tag->ape_tag_hdr.length > sizeof (m_tag->ape_tag_hdr))
-            result = wpc->blockout (wpc->wv_out, m_tag->ape_tag_data, m_tag->ape_tag_hdr.length - sizeof (m_tag->ape_tag_hdr));
-
-        m_tag->ape_tag_hdr.flags &= ~APE_TAG_THIS_IS_HEADER;    // this is NOT header
-        native_to_little_endian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
-        result = wpc->blockout (wpc->wv_out, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr));
-        little_endian_to_native (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
-    }
-
-    if (!result)
-        strcpy (wpc->error_message, "can't write WavPack data, disk probably full!");
-
-    return result;
-}
-
-static int write_tag_reader (WavpackContext *wpc)
-{
-    M_Tag *m_tag = &wpc->m_tag;
-    int32_t tag_size = 0;
-    int result;
-
-    // before we write an edited (or new) tag into an existing file, make sure it's safe and possible
-
-    if (m_tag->tag_begins_file) {
-        strcpy (wpc->error_message, "can't edit tags located at the beginning of files!");
-        return FALSE;
-    }
-
-    if (!wpc->reader->can_seek (wpc->wv_in)) {
-        strcpy (wpc->error_message, "can't edit tags on pipes or unseekable files!");
-        return FALSE;
-    }
-
-    if (!(wpc->open_flags & OPEN_EDIT_TAGS)) {
-        strcpy (wpc->error_message, "can't edit tags without OPEN_EDIT_TAGS flag!");
-        return FALSE;
-    }
-
-    if (m_tag->ape_tag_hdr.ID [0] == 'A' && m_tag->ape_tag_hdr.item_count &&
-        m_tag->ape_tag_hdr.length > sizeof (m_tag->ape_tag_hdr))
-            tag_size = m_tag->ape_tag_hdr.length;
-
-    // only write header if it's specified in the flags
-
-    if (m_tag->ape_tag_hdr.flags & APE_TAG_CONTAINS_HEADER)
-        tag_size += sizeof (m_tag->ape_tag_hdr);
-
-    result = !wpc->reader->set_pos_rel (wpc->wv_in, m_tag->tag_file_pos, SEEK_END);
-
-    if (result && tag_size < -m_tag->tag_file_pos) {
-        int nullcnt = -m_tag->tag_file_pos - tag_size;
-        char zero [1] = { 0 };
-
-        while (nullcnt--)
-            wpc->reader->write_bytes (wpc->wv_in, &zero, 1);
-    }
-
-    if (result && tag_size) {
-        if (m_tag->ape_tag_hdr.flags & APE_TAG_CONTAINS_HEADER) {
-            m_tag->ape_tag_hdr.flags |= APE_TAG_THIS_IS_HEADER;
-            native_to_little_endian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
-            result = (wpc->reader->write_bytes (wpc->wv_in, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr)) == sizeof (m_tag->ape_tag_hdr));
-            little_endian_to_native (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
-        }
-
-        result = (wpc->reader->write_bytes (wpc->wv_in, m_tag->ape_tag_data, m_tag->ape_tag_hdr.length - sizeof (m_tag->ape_tag_hdr)) == sizeof (m_tag->ape_tag_hdr));
-        m_tag->ape_tag_hdr.flags &= ~APE_TAG_THIS_IS_HEADER;    // this is NOT header
-        native_to_little_endian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
-        result = (wpc->reader->write_bytes (wpc->wv_in, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr)) == sizeof (m_tag->ape_tag_hdr));
-        little_endian_to_native (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format);
-    }
-
-    if (!result)
-        strcpy (wpc->error_message, "can't write WavPack data, disk probably full!");
-
-    return result;
-}
-
-// Copy the specified ID3v1 tag value (with specified field size) from the
-// source pointer to the destination, eliminating leading spaces and trailing
-// spaces and nulls.
-
-static void tagcpy (char *dest, char *src, int tag_size)
-{
-    char *s1 = src, *s2 = src + tag_size - 1;
-
-    if (*s2 && !s2 [-1])
-        s2--;
-
-    while (s1 <= s2)
-        if (*s1 == ' ')
-            ++s1;
-        else if (!*s2 || *s2 == ' ')
-            --s2;
-        else
-            break;
-
-    while (*s1 && s1 <= s2)
-        *dest++ = *s1++;
-
-    *dest = 0;
-}
-
-static int tagdata (char *src, int tag_size)
-{
-    char *s1 = src, *s2 = src + tag_size - 1;
-
-    if (*s2 && !s2 [-1])
-        s2--;
-
-    while (s1 <= s2)
-        if (*s1 == ' ')
-            ++s1;
-        else if (!*s2 || *s2 == ' ')
-            --s2;
-        else
-            break;
-
-    return (*s1 && s1 <= s2);
-}
-
-#endif
-
--- a/third_party/wavpack/src/unpack.c
+++ b/third_party/wavpack/src/unpack.c
--- a/third_party/wavpack/src/unpack3.c
+++ b/third_party/wavpack/src/unpack3.c
@ -1,7 +1,7 @@
 ////////////////////////////////////////////////////////////////////////////
 //                           **** WAVPACK ****                            //
 //                  Hybrid Lossless Wavefile Compressor                   //
-//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
 //                          All Rights Reserved.                          //
 //      Distributed under the BSD Software License (see license.txt)      //
 ////////////////////////////////////////////////////////////////////////////
@ -12,387 +12,24 @@
 // not including "raw" files. As these modes are all obsolete and are no
 // longer written, this code will not be fully documented other than the
 // global functions. However, full documenation is provided in the version
-// 3.97 source code.
+// 3.97 source code. Note that this module does only the low-level sample
+// unpacking; the actual opening of the file (and obtaining information
+// from it) is handled in the unpack3_open.c module.
+
+#ifdef ENABLE_LEGACY

 #include <stdlib.h>
-#include <stdio.h>
 #include <string.h>
-#include <math.h>

 #include "wavpack_local.h"
 #include "unpack3.h"

 #define ATTEMPT_ERROR_MUTING

-#ifdef DEBUG_ALLOC
-#define malloc malloc_db
-#define realloc realloc_db
-#define free free_db
-void *malloc_db (uint32_t size);
-void *realloc_db (void *ptr, uint32_t size);
-void free_db (void *ptr);
-int32_t dump_alloc (void);
-#endif
-
-static void unpack_init3 (WavpackStream3 *wps);
-static int bs_open_read3 (Bitstream3 *bs, WavpackStreamReader *reader, void *id);
-static void bs_close_read3 (Bitstream3 *bs);
-#ifndef NO_SEEKING
-static void bs_restore3 (Bitstream3 *bs);
-#endif
-
-// This provides an extension to the WavpackOpenFileRead () function contained
-// in the wputils.c module. It is assumed that an 'R' had been read as the
-// first character of the file/stream (indicating a non-raw pre version 4.0
-// WavPack file) and had been pushed back onto the stream (or simply seeked
-// back to).
-
-WavpackContext *open_file3 (WavpackContext *wpc, char *error)
-{
-    RiffChunkHeader RiffChunkHeader;
-    ChunkHeader ChunkHeader;
-    WavpackHeader3 wphdr;
-    WavpackStream3 *wps;
-    WaveHeader3 wavhdr;
-
-    CLEAR (wavhdr);
-    wpc->stream3 = wps = (WavpackStream3 *) malloc (sizeof (WavpackStream3));
-    CLEAR (*wps);
-
-    if (wpc->reader->read_bytes (wpc->wv_in, &RiffChunkHeader, sizeof (RiffChunkHeader)) !=
-        sizeof (RiffChunkHeader)) {
-            if (error) strcpy (error, "not a valid WavPack file!");
-            return WavpackCloseFile (wpc);
-    }
-
-    if (!strncmp (RiffChunkHeader.ckID, "RIFF", 4) && !strncmp (RiffChunkHeader.formType, "WAVE", 4)) {
-
-        if (wpc->open_flags & OPEN_WRAPPER) {
-            wpc->wrapper_data = malloc (wpc->wrapper_bytes = sizeof (RiffChunkHeader));
-            memcpy (wpc->wrapper_data, &RiffChunkHeader, sizeof (RiffChunkHeader));
-        }
-
-    // If the first chunk is a wave RIFF header, then read the various chunks
-    // until we get to the "data" chunk (and WavPack header should follow). If
-    // the first chunk is not a RIFF, then we assume a "raw" WavPack file and
-    // the WavPack header must be first.
-
-        while (1) {
-
-            if (wpc->reader->read_bytes (wpc->wv_in, &ChunkHeader, sizeof (ChunkHeader)) !=
-                sizeof (ChunkHeader)) {
-                    if (error) strcpy (error, "not a valid WavPack file!");
-                    return WavpackCloseFile (wpc);
-            }
-            else {
-                if (wpc->open_flags & OPEN_WRAPPER) {
-                    wpc->wrapper_data = realloc (wpc->wrapper_data, wpc->wrapper_bytes + sizeof (ChunkHeader));
-                    memcpy (wpc->wrapper_data + wpc->wrapper_bytes, &ChunkHeader, sizeof (ChunkHeader));
-                    wpc->wrapper_bytes += sizeof (ChunkHeader);
-                }
-
-                little_endian_to_native (&ChunkHeader, ChunkHeaderFormat);
-
-                if (!strncmp (ChunkHeader.ckID, "fmt ", 4)) {
-
-                    if (ChunkHeader.ckSize < sizeof (wavhdr) ||
-                        wpc->reader->read_bytes (wpc->wv_in, &wavhdr, sizeof (wavhdr)) != sizeof (wavhdr)) {
-                            if (error) strcpy (error, "not a valid WavPack file!");
-                            return WavpackCloseFile (wpc);
-                    }
-                    else if (wpc->open_flags & OPEN_WRAPPER) {
-                        wpc->wrapper_data = realloc (wpc->wrapper_data, wpc->wrapper_bytes + sizeof (wavhdr));
-                        memcpy (wpc->wrapper_data + wpc->wrapper_bytes, &wavhdr, sizeof (wavhdr));
-                        wpc->wrapper_bytes += sizeof (wavhdr);
-                    }
-
-                    little_endian_to_native (&wavhdr, WaveHeader3Format);
-
-                    if (ChunkHeader.ckSize > sizeof (wavhdr)) {
-                        uint32_t bytes_to_skip = (ChunkHeader.ckSize + 1 - sizeof (wavhdr)) & ~1L;
-
-                        if (bytes_to_skip > 1024 * 1024) {
-                            if (error) strcpy (error, "not a valid WavPack file!");
-                            return WavpackCloseFile (wpc);
-                        }
-
-                        if (wpc->open_flags & OPEN_WRAPPER) {
-                            wpc->wrapper_data = realloc (wpc->wrapper_data, wpc->wrapper_bytes + bytes_to_skip);
-                            wpc->reader->read_bytes (wpc->wv_in, wpc->wrapper_data + wpc->wrapper_bytes, bytes_to_skip);
-                            wpc->wrapper_bytes += bytes_to_skip;
-                        }
-                        else {
-                            unsigned char *temp = malloc (bytes_to_skip);
-                            wpc->reader->read_bytes (wpc->wv_in, temp, bytes_to_skip);
-                            free (temp);
-                        }
-                    }
-                }
-                else if (!strncmp (ChunkHeader.ckID, "data", 4))
-                    break;
-                else if ((ChunkHeader.ckSize + 1) & ~1L) {
-                    uint32_t bytes_to_skip = (ChunkHeader.ckSize + 1) & ~1L;
-
-                    if (bytes_to_skip > 1024 * 1024) {
-                        if (error) strcpy (error, "not a valid WavPack file!");
-                        return WavpackCloseFile (wpc);
-                    }
-
-                    if (wpc->open_flags & OPEN_WRAPPER) {
-                        wpc->wrapper_data = realloc (wpc->wrapper_data, wpc->wrapper_bytes + bytes_to_skip);
-                        wpc->reader->read_bytes (wpc->wv_in, wpc->wrapper_data + wpc->wrapper_bytes, bytes_to_skip);
-                        wpc->wrapper_bytes += bytes_to_skip;
-                    }
-                    else {
-                        unsigned char *temp = malloc (bytes_to_skip);
-                        wpc->reader->read_bytes (wpc->wv_in, temp, bytes_to_skip);
-                        free (temp);
-                    }
-                }
-            }
-        }
-    }
-    else {
-        if (error) strcpy (error, "not a valid WavPack file!");
-        return WavpackCloseFile (wpc);
-    }
-
-    if (wavhdr.FormatTag != 1 || !wavhdr.NumChannels || wavhdr.NumChannels > 2 ||
-        !wavhdr.SampleRate || wavhdr.BitsPerSample < 16 || wavhdr.BitsPerSample > 24 ||
-        wavhdr.BlockAlign / wavhdr.NumChannels > 3 || wavhdr.BlockAlign % wavhdr.NumChannels ||
-        wavhdr.BlockAlign / wavhdr.NumChannels < (wavhdr.BitsPerSample + 7) / 8) {
-            if (error) strcpy (error, "not a valid WavPack file!");
-            return WavpackCloseFile (wpc);
-    }
-
-    wpc->total_samples = ChunkHeader.ckSize / wavhdr.NumChannels /
-        ((wavhdr.BitsPerSample > 16) ? 3 : 2);
-
-    if (wpc->reader->read_bytes (wpc->wv_in, &wphdr, 10) != 10) {
-        if (error) strcpy (error, "not a valid WavPack file!");
-        return WavpackCloseFile (wpc);
-    }
-
-    if (((char *) &wphdr) [8] == 2 && (wpc->reader->read_bytes (wpc->wv_in, ((char *) &wphdr) + 10, 2) != 2)) {
-        if (error) strcpy (error, "not a valid WavPack file!");
-        return WavpackCloseFile (wpc);
-    }
-    else if (((char *) &wphdr) [8] == 3 && (wpc->reader->read_bytes (wpc->wv_in, ((char *) &wphdr) + 10,
-        sizeof (wphdr) - 10) != sizeof (wphdr) - 10)) {
-            if (error) strcpy (error, "not a valid WavPack file!");
-            return WavpackCloseFile (wpc);
-    }
-
-    little_endian_to_native (&wphdr, WavpackHeader3Format);
-
-    // make sure this is a version we know about
-
-    if (strncmp (wphdr.ckID, "wvpk", 4) || wphdr.version < 1 || wphdr.version > 3) {
-        if (error) strcpy (error, "not a valid WavPack file!");
-        return WavpackCloseFile (wpc);
-    }
-
-    // Because I ran out of flag bits in the WavPack header, an amazingly ugly
-    // kludge was forced upon me! This code takes care of preparing the flags
-    // field for internal use and checking for unknown formats we can't decode
-
-    if (wphdr.version == 3) {
-
-        if (wphdr.flags & EXTREME_DECORR) {
-
-            if ((wphdr.flags & NOT_STORED_FLAGS) ||
-                ((wphdr.bits) &&
-                (((wphdr.flags & NEW_HIGH_FLAG) &&
-                (wphdr.flags & (FAST_FLAG | HIGH_FLAG))) ||
-                (wphdr.flags & CROSS_DECORR)))) {
-                    if (error) strcpy (error, "not a valid WavPack file!");
-                    return WavpackCloseFile (wpc);
-            }
-
-            if (wphdr.flags & CANCEL_EXTREME)
-                wphdr.flags &= ~(EXTREME_DECORR | CANCEL_EXTREME);
-        }
-        else
-            wphdr.flags &= ~CROSS_DECORR;
-    }
-
-    // check to see if we should look for a "correction" file, and if so try
-    // to open it for reading, then set WVC_FLAG accordingly
-
-    if (wpc->wvc_in && wphdr.version == 3 && wphdr.bits && (wphdr.flags & NEW_HIGH_FLAG)) {
-        wpc->file2len = wpc->reader->get_length (wpc->wvc_in);
-        wphdr.flags |= WVC_FLAG;
-        wpc->wvc_flag = TRUE;
-    }
-    else
-        wphdr.flags &= ~WVC_FLAG;
-
-    // check WavPack version to handle special requirements of versions
-    // before 3.0 that had smaller headers
-
-    if (wphdr.version < 3) {
-        wphdr.total_samples = wpc->total_samples;
-        wphdr.flags = wavhdr.NumChannels == 1 ? MONO_FLAG : 0;
-        wphdr.shift = 16 - wavhdr.BitsPerSample;
-
-        if (wphdr.version == 1)
-            wphdr.bits = 0;
-    }
-
-    wpc->config.sample_rate = wavhdr.SampleRate;
-    wpc->config.num_channels = wavhdr.NumChannels;
-    wpc->config.channel_mask = 5 - wavhdr.NumChannels;
-
-    if (wphdr.flags & MONO_FLAG)
-        wpc->config.flags |= CONFIG_MONO_FLAG;
-
-    if (wphdr.flags & EXTREME_DECORR)
-        wpc->config.flags |= CONFIG_HIGH_FLAG;
-
-    if (wphdr.bits) {
-        if (wphdr.flags & NEW_HIGH_FLAG)
-            wpc->config.flags |= CONFIG_HYBRID_FLAG;
-        else
-            wpc->config.flags |= CONFIG_LOSSY_MODE;
-    }
-    else if (!(wphdr.flags & HIGH_FLAG))
-        wpc->config.flags |= CONFIG_FAST_FLAG;
-
-    wpc->config.bytes_per_sample = (wphdr.flags & BYTES_3) ? 3 : 2;
-    wpc->config.bits_per_sample = wavhdr.BitsPerSample;
-
-    memcpy (&wps->wphdr, &wphdr, sizeof (wphdr));
-    wps->wvbits.bufsiz = wps->wvcbits.bufsiz = 1024 * 1024;
-    return wpc;
-}
-
-// return currently decoded sample index
-
-uint32_t get_sample_index3 (WavpackContext *wpc)
-{
-    WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3;
-
-    return (wps) ? wps->sample_index : (uint32_t) -1;
-}
-
-int get_version3 (WavpackContext *wpc)
-{
-    WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3;
-
-    return (wps) ? wps->wphdr.version : 0;
-}
-
-void free_stream3 (WavpackContext *wpc)
-{
-    WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3;
-
-    if (wps) {
-#ifndef NO_SEEKING
-        if (wps->unpack_data)
-            free (wps->unpack_data);
-#endif
-        if (wps->wphdr.flags & WVC_FLAG)
-            bs_close_read3 (&wps->wvcbits);
-
-        bs_close_read3 (&wps->wvbits);
-
-        free (wps);
-    }
-}
-
-static void bs_read3 (Bitstream3 *bs)
-{
-    uint32_t bytes_read;
-
-    bytes_read = bs->reader->read_bytes (bs->id, bs->buf, bs->bufsiz);
-    bs->end = bs->buf + bytes_read;
-    bs->fpos += bytes_read;
-
-    if (bs->end == bs->buf) {
-        memset (bs->buf, -1, bs->bufsiz);
-        bs->end += bs->bufsiz;
-    }
-
-    bs->ptr = bs->buf;
-}
-
-// Open the specified BitStream and associate with the specified file. The
-// "bufsiz" field of the structure must be preset with the desired buffer
-// size and the file's read pointer must be set to where the desired bit
-// data is located.  A return value of TRUE indicates an error in
-// allocating buffer space.
-
-static int bs_open_read3 (Bitstream3 *bs, WavpackStreamReader *reader, void *id)
-{
-    bs->fpos = (bs->reader = reader)->get_pos (bs->id = id);
-
-    if (!bs->buf)
-        bs->buf = (unsigned char *) malloc (bs->bufsiz);
-
-    bs->end = bs->buf + bs->bufsiz;
-    bs->ptr = bs->end - 1;
-    bs->sr = bs->bc = 0;
-    bs->error = bs->buf ? 0 : 1;
-    bs->wrap = bs_read3;
-    return bs->error;
-}
-
-#ifndef NO_SEEKING
-
-// This function is called after a call to unpack_restore() has restored
-// the BitStream structure to a previous state and causes any required data
-// to be read from the file. This function is NOT supported for overlapped
-// operation.
-
-static void bs_restore3 (Bitstream3 *bs)
-{
-    uint32_t bytes_to_read = (uint32_t)(bs->end - bs->ptr - 1), bytes_read;
-
-    bs->reader->set_pos_abs (bs->id, bs->fpos - bytes_to_read);
-
-    if (bytes_to_read > 0) {
-
-        bytes_read = bs->reader->read_bytes (bs->id, bs->ptr + 1, bytes_to_read);
-
-        if (bytes_to_read != bytes_read)
-            bs->end = bs->ptr + 1 + bytes_read;
-    }
-}
-
-#endif
-
-// This function is called to release any resources used by the BitStream
-// and position the file pointer to the first byte past the read bits.
-
-static void bs_close_read3 (Bitstream3 *bs)
-{
-    if (bs->buf) {
-        free (bs->buf);
-        CLEAR (*bs);
-    }
-}
-
-static uint32_t bs_unused_bytes (Bitstream3 *bs)
-{
-    if (bs->bc < 8) {
-        bs->bc += 8;
-        bs->ptr++;
-    }
-
-    return (uint32_t)(bs->end - bs->ptr);
-}
-
-static unsigned char *bs_unused_data (Bitstream3 *bs)
-{
-    if (bs->bc < 8) {
-        bs->bc += 8;
-        bs->ptr++;
-    }
-
-    return bs->ptr;
-}
-
-#ifndef NO_UNPACK
+static int bs_open_read3 (Bitstream3 *bs, WavpackStreamReader64 *reader, void *id);
+static uint32_t bs_unused_bytes (Bitstream3 *bs);
+static unsigned char *bs_unused_data (Bitstream3 *bs);
+static void init_words3 (WavpackStream3 *wps);

 //////////////////////////////// local macros /////////////////////////////////

@ -426,13 +63,13 @@ static const signed char extreme_terms [] = { 1,1,1,2,4,-1,1,2,3,6,-2,8,5,7,4,1,
 static const signed char default_terms [] = { 1,1,1,-1,2,1,-2 };
 static const signed char simple_terms []  = { 1,1,1,1 };

+///////////////////////////// executable code ////////////////////////////////
+
 // This function initializes everything required to unpack WavPack
 // bitstreams and must be called before any unpacking is performed. Note
 // that the (WavpackHeader3 *) in the WavpackStream3 struct must be valid.

-static void init_words3 (WavpackStream3 *wps);
-
-static void unpack_init3 (WavpackStream3 *wps)
+void unpack_init3 (WavpackStream3 *wps)
 {
    int flags = wps->wphdr.flags;
    struct decorr_pass *dpp;
@ -461,9 +98,6 @@ static void unpack_init3 (WavpackStream3 *wps)

 #ifndef NO_SEEKING

-#define SAVE(destin, item) { memcpy (destin, &item, sizeof (item)); destin = (char *) destin + sizeof (item); }
-#define RESTORE(item, source) { memcpy (&item, source, sizeof (item)); source = (char *) source + sizeof (item); }
-
 // This function returns the size (in bytes) required to save the unpacking
 // context. Note that the (WavpackHeader3 *) in the WavpackStream3 struct
 // must be valid.
@ -504,7 +138,7 @@ static int unpack_size (WavpackStream3 *wps)
    }

    if (flags & (HIGH_FLAG | NEW_HIGH_FLAG))
-        for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++)
+        for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) {
            if (dpp->term > 0) {
                byte_sum += sizeof (dpp->samples_A [0]) * dpp->term;
                byte_sum += sizeof (dpp->weight_A);
@ -518,6 +152,7 @@ static int unpack_size (WavpackStream3 *wps)
                byte_sum += sizeof (dpp->samples_A [0]) + sizeof (dpp->samples_B [0]);
                byte_sum += sizeof (dpp->weight_A) + sizeof (dpp->weight_B);
            }
+        }

    return byte_sum;
 }
@ -572,7 +207,7 @@ static void *unpack_save (WavpackStream3 *wps, void *destin)
    }

    if (flags & (HIGH_FLAG | NEW_HIGH_FLAG))
-        for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++)
+        for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) {
            if (dpp->term > 0) {
                int count = dpp->term;
                int index = wps->dc.m;
@ -602,177 +237,11 @@ static void *unpack_save (WavpackStream3 *wps, void *destin)
                SAVE (destin, dpp->samples_A [0]);
                SAVE (destin, dpp->samples_B [0]);
            }
+        }

    return destin;
 }

-// This function restores the unpacking context from the specified pointer
-// and returns the updated pointer. After this call, unpack_samples() will
-// continue where it left off immediately before unpack_save() was called.
-// If the WavPack files and bitstreams might have been closed and reopened,
-// then the "keep_resources" flag should be set to avoid using the "old"
-// resources that were originally saved (and are probably now invalid).
-
-static void *unpack_restore (WavpackStream3 *wps, void *source, int keep_resources)
-{
-    int flags = wps->wphdr.flags, tcount;
-    struct decorr_pass *dpp;
-    FILE *temp_file;
-    unsigned char *temp_buf;
-
-    unpack_init3 (wps);
-    temp_file = wps->wvbits.id;
-    temp_buf = wps->wvbits.buf;
-    RESTORE (wps->wvbits, source);
-
-    if (keep_resources) {
-        wps->wvbits.id = temp_file;
-        wps->wvbits.ptr += temp_buf - wps->wvbits.buf;
-        wps->wvbits.end += temp_buf - wps->wvbits.buf;
-        wps->wvbits.buf = temp_buf;
-    }
-
-    bs_restore3 (&wps->wvbits);
-
-    if (flags & WVC_FLAG) {
-        temp_file = wps->wvcbits.id;
-        temp_buf = wps->wvcbits.buf;
-        RESTORE (wps->wvcbits, source);
-
-        if (keep_resources) {
-            wps->wvcbits.id = temp_file;
-            wps->wvcbits.ptr += temp_buf - wps->wvcbits.buf;
-            wps->wvcbits.end += temp_buf - wps->wvcbits.buf;
-            wps->wvcbits.buf = temp_buf;
-        }
-
-        bs_restore3 (&wps->wvcbits);
-    }
-
-    if (wps->wphdr.version == 3) {
-        if (wps->wphdr.bits) {
-            RESTORE (wps->w4, source);
-        }
-        else {
-            RESTORE (wps->w1, source);
-        }
-
-        RESTORE (wps->w3, source);
-        RESTORE (wps->dc.crc, source);
-    }
-    else
-        RESTORE (wps->w2, source);
-
-    if (wps->wphdr.bits) {
-        RESTORE (wps->dc.error, source);
-    }
-    else {
-        RESTORE (wps->dc.sum_level, source);
-        RESTORE (wps->dc.left_level, source);
-        RESTORE (wps->dc.right_level, source);
-        RESTORE (wps->dc.diff_level, source);
-    }
-
-    if (flags & OVER_20) {
-        RESTORE (wps->dc.last_extra_bits, source);
-        RESTORE (wps->dc.extra_bits_count, source);
-    }
-
-    if (!(flags & EXTREME_DECORR)) {
-        RESTORE (wps->dc.sample, source);
-        RESTORE (wps->dc.weight, source);
-    }
-
-    if (flags & (HIGH_FLAG | NEW_HIGH_FLAG))
-        for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++)
-            if (dpp->term > 0) {
-                int count = dpp->term;
-                int index = wps->dc.m;
-
-                RESTORE (dpp->weight_A, source);
-
-                while (count--) {
-                    RESTORE (dpp->samples_A [index], source);
-                    index = (index + 1) & (MAX_TERM - 1);
-                }
-
-                if (!(flags & MONO_FLAG)) {
-                    count = dpp->term;
-                    index = wps->dc.m;
-
-                    RESTORE (dpp->weight_B, source);
-
-                    while (count--) {
-                        RESTORE (dpp->samples_B [index], source);
-                        index = (index + 1) & (MAX_TERM - 1);
-                    }
-                }
-            }
-            else {
-                RESTORE (dpp->weight_A, source);
-                RESTORE (dpp->weight_B, source);
-                RESTORE (dpp->samples_A [0], source);
-                RESTORE (dpp->samples_B [0], source);
-            }
-
-    return source;
-}
-
-// This is an extension for WavpackSeekSample (). Note that because WavPack
-// files created prior to version 4.0 are not inherently seekable, this
-// function could take a long time if a forward seek is requested to an
-// area that has not been played (or seeked through) yet.
-
-
-int seek_sample3 (WavpackContext *wpc, uint32_t desired_index)
-{
-    int points_index = desired_index / ((wpc->total_samples >> 8) + 1);
-    WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3;
-
-    if (desired_index >= wpc->total_samples)
-        return FALSE;
-
-    while (points_index)
-        if (wps->index_points [points_index].saved &&
-            wps->index_points [points_index].sample_index <= desired_index)
-                break;
-        else
-            points_index--;
-
-    if (wps->index_points [points_index].saved)
-        if (wps->index_points [points_index].sample_index > wps->sample_index ||
-            wps->sample_index > desired_index) {
-                wps->sample_index = wps->index_points [points_index].sample_index;
-                unpack_restore (wps, wps->unpack_data + points_index * wps->unpack_size, TRUE);
-        }
-
-    if (desired_index > wps->sample_index) {
-        int32_t *buffer = (int32_t *) malloc (1024 * (wps->wphdr.flags & MONO_FLAG ? 4 : 8));
-        uint32_t samples_to_skip = desired_index - wps->sample_index;
-
-        while (1) {
-            if (samples_to_skip > 1024) {
-                if (unpack_samples3 (wpc, buffer, 1024) == 1024)
-                    samples_to_skip -= 1024;
-                else
-                    break;
-            }
-            else {
-                samples_to_skip -= unpack_samples3 (wpc, buffer, samples_to_skip);
-                break;
-            }
-        }
-
-        free (buffer);
-
-        if (samples_to_skip)
-            return FALSE;
-    }
-
-    return TRUE;
-}
-
-
 #endif

 // This monster actually unpacks the WavPack bitstream(s) into the specified
@ -797,7 +266,7 @@ int32_t unpack_samples3 (WavpackContext *wpc, int32_t *buffer, uint32_t sample_c
    WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3;
    int shift = wps->wphdr.shift, flags = wps->wphdr.flags, min_weight = 0, m = wps->dc.m, tcount;
 #ifndef NO_SEEKING
-    int points_index = wps->sample_index / ((wpc->total_samples >> 8) + 1);
+    int points_index = wps->sample_index / (((uint32_t) wpc->total_samples >> 8) + 1);
 #endif
    int32_t min_value, max_value, min_shifted, max_shifted;
    int32_t correction [2], crc = wps->dc.crc;
@ -820,7 +289,7 @@ int32_t unpack_samples3 (WavpackContext *wpc, int32_t *buffer, uint32_t sample_c
 #endif

    if (wps->sample_index + sample_count > wpc->total_samples)
-        sample_count = wpc->total_samples - wps->sample_index;
+        sample_count = (uint32_t) (wpc->total_samples - wps->sample_index);

    if (!sample_count)
        return 0;
@ -1705,22 +1174,22 @@ int32_t unpack_samples3 (WavpackContext *wpc, int32_t *buffer, uint32_t sample_c
            wpc->crc_errors++;

        if (wpc->open_flags & OPEN_WRAPPER) {
-            unsigned char *temp = malloc (1024);
+            unsigned char *temp = (unsigned char *)malloc (1024);
            uint32_t bcount;

            if (bs_unused_bytes (&wps->wvbits)) {
-                wpc->wrapper_data = realloc (wpc->wrapper_data, wpc->wrapper_bytes + bs_unused_bytes (&wps->wvbits));
+                wpc->wrapper_data = (unsigned char *)realloc (wpc->wrapper_data, wpc->wrapper_bytes + bs_unused_bytes (&wps->wvbits));
                memcpy (wpc->wrapper_data + wpc->wrapper_bytes, bs_unused_data (&wps->wvbits), bs_unused_bytes (&wps->wvbits));
                wpc->wrapper_bytes += bs_unused_bytes (&wps->wvbits);
            }

            while (1) {
-                bcount = wpc->reader->read_bytes (wpc->wv_in, temp, sizeof (temp));
+                bcount = wpc->reader->read_bytes (wpc->wv_in, temp, 1024);

                if (!bcount)
                    break;

-                wpc->wrapper_data = realloc (wpc->wrapper_data, wpc->wrapper_bytes + bcount);
+                wpc->wrapper_data = (unsigned char *)realloc (wpc->wrapper_data, wpc->wrapper_bytes + bcount);
                memcpy (wpc->wrapper_data + wpc->wrapper_bytes, temp, bcount);
                wpc->wrapper_bytes += bcount;
            }
@ -1733,7 +1202,7 @@ int32_t unpack_samples3 (WavpackContext *wpc, int32_t *buffer, uint32_t sample_c
                for (c = 0; c < 16 && wpc->wrapper_data [c] == 0xff; ++c);

                if (c == 16) {
-                    memcpy (wpc->wrapper_data, wpc->wrapper_data + 16, wpc->wrapper_bytes - 16);
+                    memmove (wpc->wrapper_data, wpc->wrapper_data + 16, wpc->wrapper_bytes - 16);
                    wpc->wrapper_bytes -= 16;
                }
                else {
@ -1753,12 +1222,6 @@ int32_t unpack_samples3 (WavpackContext *wpc, int32_t *buffer, uint32_t sample_c
    return i;
 }

-///////////////////////////// local table storage ////////////////////////////
-
-extern const uint32_t bitset [];
-extern const uint32_t bitmask [];
-extern const char nbits_table [];
-
 // This function initializes everything required to receive words with this
 // module and must be called BEFORE any other function in this module.

@ -1775,18 +1238,6 @@ static void init_words3 (WavpackStream3 *wps)
        wps->w4.bitrate = (wps->wphdr.bits / 2) - 768;
 }

-// This macro counts the number of bits that are required to specify the
-// unsigned 32-bit value, counting from the LSB to the most significant bit
-// that is set. Return range is 0 - 32.
-
-#define count_bits(av) ( \
- (av) < (1 << 8) ? nbits_table [av] : \
-  ( \
-   (av) < (1L << 16) ? nbits_table [(av) >> 8] + 8 : \
-   ((av) < (1L << 24) ? nbits_table [(av) >> 16] + 16 : nbits_table [(av) >> 24] + 24) \
-  ) \
-)
-
 static int32_t FASTCALL get_word1 (WavpackStream3 *wps, int chan)
 {
    uint32_t tmp1, tmp2, avalue;
@ -2033,7 +1484,7 @@ static int32_t FASTCALL get_word3 (WavpackStream3 *wps, int chan)
    }
 }

-static int FASTCALL _log2 (uint32_t avalue);
+static int FASTCALL wp3_log2 (uint32_t avalue);

 static int32_t FASTCALL get_word4 (WavpackStream3 *wps, int chan, int32_t *correction)
 {
@ -2076,22 +1527,22 @@ static int32_t FASTCALL get_word4 (WavpackStream3 *wps, int chan, int32_t *corre
        int slow_log_0, slow_log_1, balance;

        if (wps->wphdr.flags & MONO_FLAG) {
-            wps->w4.bits_acc [0] += wps->w4.bitrate + _log2 (wps->w4.fast_level [0]) - _log2 (wps->w4.slow_level [0]) + (3 << 8);
+            wps->w4.bits_acc [0] += wps->w4.bitrate + wp3_log2 (wps->w4.fast_level [0]) - wp3_log2 (wps->w4.slow_level [0]) + (3 << 8);

            if (wps->w4.bits_acc [0] < 0)
                wps->w4.bits_acc [0] = 0;
        }
        else {
-            slow_log_0 = _log2 (wps->w4.slow_level [0]);
-            slow_log_1 = _log2 (wps->w4.slow_level [1]);
+            slow_log_0 = wp3_log2 (wps->w4.slow_level [0]);
+            slow_log_1 = wp3_log2 (wps->w4.slow_level [1]);

            if (wps->wphdr.flags & JOINT_STEREO)
                balance = (slow_log_1 - slow_log_0 + 257) >> 1;
            else
                balance = (slow_log_1 - slow_log_0 + 1) >> 1;

-            wps->w4.bits_acc [0] += wps->w4.bitrate - balance + _log2 (wps->w4.fast_level [0]) - slow_log_0 + (3 << 8);
-            wps->w4.bits_acc [1] += wps->w4.bitrate + balance + _log2 (wps->w4.fast_level [1]) - slow_log_1 + (3 << 8);
+            wps->w4.bits_acc [0] += wps->w4.bitrate - balance + wp3_log2 (wps->w4.fast_level [0]) - slow_log_0 + (3 << 8);
+            wps->w4.bits_acc [1] += wps->w4.bitrate + balance + wp3_log2 (wps->w4.fast_level [1]) - slow_log_1 + (3 << 8);

            if (wps->w4.bits_acc [0] + wps->w4.bits_acc [1] < 0)
                wps->w4.bits_acc [0] = wps->w4.bits_acc [1] = 0;
@ -2171,7 +1622,7 @@ static int32_t FASTCALL get_word4 (WavpackStream3 *wps, int chan, int32_t *corre
 // fraction) from the supplied value. Using logarithms makes comparing
 // signal level values and calculating fractional bitrates much easier.

-static int FASTCALL _log2 (uint32_t avalue)
+static int FASTCALL wp3_log2 (uint32_t avalue)
 {
    int dbits;

@ -2191,5 +1642,62 @@ static int FASTCALL _log2 (uint32_t avalue)
    }
 }

-#endif
+static void bs_read3 (Bitstream3 *bs)
+{
+    uint32_t bytes_read;
+
+    bytes_read = bs->reader->read_bytes (bs->id, bs->buf, bs->bufsiz);
+    bs->end = bs->buf + bytes_read;
+    bs->fpos += bytes_read;
+
+    if (bs->end == bs->buf) {
+        memset (bs->buf, -1, bs->bufsiz);
+        bs->end += bs->bufsiz;
+    }
+
+    bs->ptr = bs->buf;
+}
+
+// Open the specified BitStream and associate with the specified file. The
+// "bufsiz" field of the structure must be preset with the desired buffer
+// size and the file's read pointer must be set to where the desired bit
+// data is located.  A return value of TRUE indicates an error in
+// allocating buffer space.
+
+static int bs_open_read3 (Bitstream3 *bs, WavpackStreamReader64 *reader, void *id)
+{
+    bs->fpos = (bs->reader = reader)->get_pos (bs->id = id);
+
+    if (!bs->buf)
+        bs->buf = (unsigned char *) malloc (bs->bufsiz);
+
+    bs->end = bs->buf + bs->bufsiz;
+    bs->ptr = bs->end - 1;
+    bs->sr = bs->bc = 0;
+    bs->error = bs->buf ? 0 : 1;
+    bs->wrap = bs_read3;
+    return bs->error;
+}
+
+static uint32_t bs_unused_bytes (Bitstream3 *bs)
+{
+    if (bs->bc < 8) {
+        bs->bc += 8;
+        bs->ptr++;
+    }
+
+    return (uint32_t)(bs->end - bs->ptr);
+}
+
+static unsigned char *bs_unused_data (Bitstream3 *bs)
+{
+    if (bs->bc < 8) {
+        bs->bc += 8;
+        bs->ptr++;
+    }
+
+    return bs->ptr;
+}
+
+#endif  // ENABLE_LEGACY

--- a/third_party/wavpack/src/unpack3.h
+++ b/third_party/wavpack/src/unpack3.h
@ -12,9 +12,9 @@
 // decoding old (versions 1, 2 & 3) WavPack files.

 typedef struct {
-    unsigned short FormatTag, NumChannels;
+    uint16_t FormatTag, NumChannels;
    uint32_t SampleRate, BytesPerSecond;
-    unsigned short BlockAlign, BitsPerSample;
+    uint16_t BlockAlign, BitsPerSample;
 } WaveHeader3;

 #define WaveHeader3Format "SSLLSS"
@ -22,9 +22,9 @@ typedef struct {
 typedef struct {
    char ckID [4];
    int32_t ckSize;
-    short version;
-    short bits;                 // added for version 2.00
-    short flags, shift;         // added for version 3.00
+    int16_t version;
+    int16_t bits;                 // added for version 2.00
+    int16_t flags, shift;         // added for version 3.00
    int32_t total_samples, crc, crc2;
    char extension [4], extra_bc, extras [3];
 } WavpackHeader3;
@ -62,8 +62,9 @@ typedef struct {
 typedef struct bs3 {
    void (*wrap)(struct bs3 *bs);
    unsigned char *buf, *end, *ptr;
-    uint32_t bufsiz, fpos, sr;
-    WavpackStreamReader *reader;
+    uint32_t bufsiz, sr;
+    int64_t fpos;
+    WavpackStreamReader64 *reader;
    int error, bc;
    void *id;
 } Bitstream3;
@ -111,3 +112,8 @@ typedef struct {
        int bits_acc [2], bitrate;
    } w4;
 } WavpackStream3;
+
+#define SAVE(destin, item) { memcpy (destin, &item, sizeof (item)); destin = (char *) destin + sizeof (item); }
+#define RESTORE(item, source) { memcpy (&item, source, sizeof (item)); source = (char *) source + sizeof (item); }
+
+void unpack_init3 (WavpackStream3 *wps);
--- a/third_party/wavpack/src/unpack3_open.c
+++ b/third_party/wavpack/src/unpack3_open.c
@ -0,0 +1,289 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// unpack3_open.c
+
+// This module provides an extension to the open_utils.c module for handling
+// WavPack files prior to version 4.0, not including "raw" files. As these
+// modes are all obsolete and are no longer written, this code will not be
+// fully documented other than the global functions. However, full documenation
+// is provided in the version 3.97 source code. Note that this module only
+// provides the functionality of opening the files and obtaining information
+// from them; the actual audio decoding is located in the unpack3.c module.
+
+#ifdef ENABLE_LEGACY
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "wavpack_local.h"
+#include "unpack3.h"
+
+#define ATTEMPT_ERROR_MUTING
+
+// This provides an extension to the WavpackOpenFileRead () function contained
+// in the wputils.c module. It is assumed that an 'R' had been read as the
+// first character of the file/stream (indicating a non-raw pre version 4.0
+// WavPack file) and had been pushed back onto the stream (or simply seeked
+// back to).
+
+WavpackContext *open_file3 (WavpackContext *wpc, char *error)
+{
+    RiffChunkHeader RiffChunkHeader;
+    ChunkHeader ChunkHeader;
+    WavpackHeader3 wphdr;
+    WavpackStream3 *wps;
+    WaveHeader3 wavhdr;
+
+    CLEAR (wavhdr);
+    wpc->stream3 = wps = (WavpackStream3 *) malloc (sizeof (WavpackStream3));
+    CLEAR (*wps);
+
+    if (wpc->reader->read_bytes (wpc->wv_in, &RiffChunkHeader, sizeof (RiffChunkHeader)) !=
+        sizeof (RiffChunkHeader)) {
+            if (error) strcpy (error, "not a valid WavPack file!");
+            return WavpackCloseFile (wpc);
+    }
+
+    if (!strncmp (RiffChunkHeader.ckID, "RIFF", 4) && !strncmp (RiffChunkHeader.formType, "WAVE", 4)) {
+
+        if (wpc->open_flags & OPEN_WRAPPER) {
+            wpc->wrapper_data = (unsigned char *)malloc (wpc->wrapper_bytes = sizeof (RiffChunkHeader));
+            memcpy (wpc->wrapper_data, &RiffChunkHeader, sizeof (RiffChunkHeader));
+        }
+
+    // If the first chunk is a wave RIFF header, then read the various chunks
+    // until we get to the "data" chunk (and WavPack header should follow). If
+    // the first chunk is not a RIFF, then we assume a "raw" WavPack file and
+    // the WavPack header must be first.
+
+        while (1) {
+
+            if (wpc->reader->read_bytes (wpc->wv_in, &ChunkHeader, sizeof (ChunkHeader)) !=
+                sizeof (ChunkHeader)) {
+                    if (error) strcpy (error, "not a valid WavPack file!");
+                    return WavpackCloseFile (wpc);
+            }
+            else {
+                if (wpc->open_flags & OPEN_WRAPPER) {
+                    wpc->wrapper_data = (unsigned char *)realloc (wpc->wrapper_data, wpc->wrapper_bytes + sizeof (ChunkHeader));
+                    memcpy (wpc->wrapper_data + wpc->wrapper_bytes, &ChunkHeader, sizeof (ChunkHeader));
+                    wpc->wrapper_bytes += sizeof (ChunkHeader);
+                }
+
+                WavpackLittleEndianToNative (&ChunkHeader, ChunkHeaderFormat);
+
+                if (!strncmp (ChunkHeader.ckID, "fmt ", 4)) {
+
+                    if (ChunkHeader.ckSize < sizeof (wavhdr) ||
+                        wpc->reader->read_bytes (wpc->wv_in, &wavhdr, sizeof (wavhdr)) != sizeof (wavhdr)) {
+                            if (error) strcpy (error, "not a valid WavPack file!");
+                            return WavpackCloseFile (wpc);
+                    }
+                    else if (wpc->open_flags & OPEN_WRAPPER) {
+                        wpc->wrapper_data = (unsigned char *)realloc (wpc->wrapper_data, wpc->wrapper_bytes + sizeof (wavhdr));
+                        memcpy (wpc->wrapper_data + wpc->wrapper_bytes, &wavhdr, sizeof (wavhdr));
+                        wpc->wrapper_bytes += sizeof (wavhdr);
+                    }
+
+                    WavpackLittleEndianToNative (&wavhdr, WaveHeader3Format);
+
+                    if (ChunkHeader.ckSize > sizeof (wavhdr)) {
+                        uint32_t bytes_to_skip = (ChunkHeader.ckSize + 1 - sizeof (wavhdr)) & ~1L;
+
+                        if (bytes_to_skip > 1024 * 1024) {
+                            if (error) strcpy (error, "not a valid WavPack file!");
+                            return WavpackCloseFile (wpc);
+                        }
+
+                        if (wpc->open_flags & OPEN_WRAPPER) {
+                            wpc->wrapper_data = (unsigned char *)realloc (wpc->wrapper_data, wpc->wrapper_bytes + bytes_to_skip);
+                            wpc->reader->read_bytes (wpc->wv_in, wpc->wrapper_data + wpc->wrapper_bytes, bytes_to_skip);
+                            wpc->wrapper_bytes += bytes_to_skip;
+                        }
+                        else {
+                            unsigned char *temp = (unsigned char *)malloc (bytes_to_skip);
+                            wpc->reader->read_bytes (wpc->wv_in, temp, bytes_to_skip);
+                            free (temp);
+                        }
+                    }
+                }
+                else if (!strncmp (ChunkHeader.ckID, "data", 4))
+                    break;
+                else if ((ChunkHeader.ckSize + 1) & ~1L) {
+                    uint32_t bytes_to_skip = (ChunkHeader.ckSize + 1) & ~1L;
+
+                    if (bytes_to_skip > 1024 * 1024) {
+                        if (error) strcpy (error, "not a valid WavPack file!");
+                        return WavpackCloseFile (wpc);
+                    }
+
+                    if (wpc->open_flags & OPEN_WRAPPER) {
+                        wpc->wrapper_data = (unsigned char *)realloc (wpc->wrapper_data, wpc->wrapper_bytes + bytes_to_skip);
+                        wpc->reader->read_bytes (wpc->wv_in, wpc->wrapper_data + wpc->wrapper_bytes, bytes_to_skip);
+                        wpc->wrapper_bytes += bytes_to_skip;
+                    }
+                    else {
+                        unsigned char *temp = (unsigned char *)malloc (bytes_to_skip);
+                        wpc->reader->read_bytes (wpc->wv_in, temp, bytes_to_skip);
+                        free (temp);
+                    }
+                }
+            }
+        }
+    }
+    else {
+        if (error) strcpy (error, "not a valid WavPack file!");
+        return WavpackCloseFile (wpc);
+    }
+
+    if (wavhdr.FormatTag != 1 || !wavhdr.NumChannels || wavhdr.NumChannels > 2 ||
+        !wavhdr.SampleRate || wavhdr.BitsPerSample < 16 || wavhdr.BitsPerSample > 24 ||
+        wavhdr.BlockAlign / wavhdr.NumChannels > 3 || wavhdr.BlockAlign % wavhdr.NumChannels ||
+        wavhdr.BlockAlign / wavhdr.NumChannels < (wavhdr.BitsPerSample + 7) / 8) {
+            if (error) strcpy (error, "not a valid WavPack file!");
+            return WavpackCloseFile (wpc);
+    }
+
+    wpc->total_samples = ChunkHeader.ckSize / wavhdr.NumChannels /
+        ((wavhdr.BitsPerSample > 16) ? 3 : 2);
+
+    if (wpc->reader->read_bytes (wpc->wv_in, &wphdr, 10) != 10) {
+        if (error) strcpy (error, "not a valid WavPack file!");
+        return WavpackCloseFile (wpc);
+    }
+
+    if (((char *) &wphdr) [8] == 2 && (wpc->reader->read_bytes (wpc->wv_in, ((char *) &wphdr) + 10, 2) != 2)) {
+        if (error) strcpy (error, "not a valid WavPack file!");
+        return WavpackCloseFile (wpc);
+    }
+    else if (((char *) &wphdr) [8] == 3 && (wpc->reader->read_bytes (wpc->wv_in, ((char *) &wphdr) + 10,
+        sizeof (wphdr) - 10) != sizeof (wphdr) - 10)) {
+            if (error) strcpy (error, "not a valid WavPack file!");
+            return WavpackCloseFile (wpc);
+    }
+
+    WavpackLittleEndianToNative (&wphdr, WavpackHeader3Format);
+
+    // make sure this is a version we know about
+
+    if (strncmp (wphdr.ckID, "wvpk", 4) || wphdr.version < 1 || wphdr.version > 3) {
+        if (error) strcpy (error, "not a valid WavPack file!");
+        return WavpackCloseFile (wpc);
+    }
+
+    // Because I ran out of flag bits in the WavPack header, an amazingly ugly
+    // kludge was forced upon me! This code takes care of preparing the flags
+    // field for internal use and checking for unknown formats we can't decode
+
+    if (wphdr.version == 3) {
+
+        if (wphdr.flags & EXTREME_DECORR) {
+
+            if ((wphdr.flags & NOT_STORED_FLAGS) ||
+                ((wphdr.bits) &&
+                (((wphdr.flags & NEW_HIGH_FLAG) &&
+                (wphdr.flags & (FAST_FLAG | HIGH_FLAG))) ||
+                (wphdr.flags & CROSS_DECORR)))) {
+                    if (error) strcpy (error, "not a valid WavPack file!");
+                    return WavpackCloseFile (wpc);
+            }
+
+            if (wphdr.flags & CANCEL_EXTREME)
+                wphdr.flags &= ~(EXTREME_DECORR | CANCEL_EXTREME);
+        }
+        else
+            wphdr.flags &= ~CROSS_DECORR;
+    }
+
+    // check to see if we should look for a "correction" file, and if so try
+    // to open it for reading, then set WVC_FLAG accordingly
+
+    if (wpc->wvc_in && wphdr.version == 3 && wphdr.bits && (wphdr.flags & NEW_HIGH_FLAG)) {
+        wpc->file2len = wpc->reader->get_length (wpc->wvc_in);
+        wphdr.flags |= WVC_FLAG;
+        wpc->wvc_flag = TRUE;
+    }
+    else
+        wphdr.flags &= ~WVC_FLAG;
+
+    // check WavPack version to handle special requirements of versions
+    // before 3.0 that had smaller headers
+
+    if (wphdr.version < 3) {
+        wphdr.total_samples = (int32_t) wpc->total_samples;
+        wphdr.flags = wavhdr.NumChannels == 1 ? MONO_FLAG : 0;
+        wphdr.shift = 16 - wavhdr.BitsPerSample;
+
+        if (wphdr.version == 1)
+            wphdr.bits = 0;
+    }
+
+    wpc->config.sample_rate = wavhdr.SampleRate;
+    wpc->config.num_channels = wavhdr.NumChannels;
+    wpc->config.channel_mask = 5 - wavhdr.NumChannels;
+
+    if (wphdr.flags & MONO_FLAG)
+        wpc->config.flags |= CONFIG_MONO_FLAG;
+
+    if (wphdr.flags & EXTREME_DECORR)
+        wpc->config.flags |= CONFIG_HIGH_FLAG;
+
+    if (wphdr.bits) {
+        if (wphdr.flags & NEW_HIGH_FLAG)
+            wpc->config.flags |= CONFIG_HYBRID_FLAG;
+        else
+            wpc->config.flags |= CONFIG_LOSSY_MODE;
+    }
+    else if (!(wphdr.flags & HIGH_FLAG))
+        wpc->config.flags |= CONFIG_FAST_FLAG;
+
+    wpc->config.bytes_per_sample = (wphdr.flags & BYTES_3) ? 3 : 2;
+    wpc->config.bits_per_sample = wavhdr.BitsPerSample;
+
+    memcpy (&wps->wphdr, &wphdr, sizeof (wphdr));
+    wps->wvbits.bufsiz = wps->wvcbits.bufsiz = 1024 * 1024;
+    return wpc;
+}
+
+// return currently decoded sample index
+
+uint32_t get_sample_index3 (WavpackContext *wpc)
+{
+    WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3;
+
+    return (wps) ? wps->sample_index : (uint32_t) -1;
+}
+
+int get_version3 (WavpackContext *wpc)
+{
+    WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3;
+
+    return (wps) ? wps->wphdr.version : 0;
+}
+
+void free_stream3 (WavpackContext *wpc)
+{
+    WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3;
+
+    if (wps) {
+#ifndef NO_SEEKING
+        if (wps->unpack_data)
+            free (wps->unpack_data);
+#endif
+        if ((wps->wphdr.flags & WVC_FLAG) && wps->wvcbits.buf)
+            free (wps->wvcbits.buf);
+
+        if (wps->wvbits.buf)
+            free (wps->wvbits.buf);
+
+        free (wps);
+    }
+}
+
+#endif  // ENABLE_LEGACY
--- a/third_party/wavpack/src/unpack3_seek.c
+++ b/third_party/wavpack/src/unpack3_seek.c
@ -0,0 +1,212 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// unpack3_seek.c
+
+// This module provides seeking support for WavPack files prior to version 4.0.
+
+#ifdef ENABLE_LEGACY
+#ifndef NO_SEEKING
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "wavpack_local.h"
+#include "unpack3.h"
+
+static void *unpack_restore (WavpackStream3 *wps, void *source, int keep_resources);
+static void bs_restore3 (Bitstream3 *bs);
+
+// This is an extension for WavpackSeekSample (). Note that because WavPack
+// files created prior to version 4.0 are not inherently seekable, this
+// function could take a long time if a forward seek is requested to an
+// area that has not been played (or seeked through) yet.
+
+int seek_sample3 (WavpackContext *wpc, uint32_t desired_index)
+{
+    int points_index = desired_index / (((uint32_t) wpc->total_samples >> 8) + 1);
+    WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3;
+
+    if (desired_index >= wpc->total_samples)
+        return FALSE;
+
+    while (points_index)
+        if (wps->index_points [points_index].saved &&
+            wps->index_points [points_index].sample_index <= desired_index)
+                break;
+        else
+            points_index--;
+
+    if (wps->index_points [points_index].saved)
+        if (wps->index_points [points_index].sample_index > wps->sample_index ||
+            wps->sample_index > desired_index) {
+                wps->sample_index = wps->index_points [points_index].sample_index;
+                unpack_restore (wps, wps->unpack_data + points_index * wps->unpack_size, TRUE);
+        }
+
+    if (desired_index > wps->sample_index) {
+        int32_t *buffer = (int32_t *) malloc (1024 * (wps->wphdr.flags & MONO_FLAG ? 4 : 8));
+        uint32_t samples_to_skip = desired_index - wps->sample_index;
+
+        while (1) {
+            if (samples_to_skip > 1024) {
+                if (unpack_samples3 (wpc, buffer, 1024) == 1024)
+                    samples_to_skip -= 1024;
+                else
+                    break;
+            }
+            else {
+                samples_to_skip -= unpack_samples3 (wpc, buffer, samples_to_skip);
+                break;
+            }
+        }
+
+        free (buffer);
+
+        if (samples_to_skip)
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+// This function restores the unpacking context from the specified pointer
+// and returns the updated pointer. After this call, unpack_samples() will
+// continue where it left off immediately before unpack_save() was called.
+// If the WavPack files and bitstreams might have been closed and reopened,
+// then the "keep_resources" flag should be set to avoid using the "old"
+// resources that were originally saved (and are probably now invalid).
+
+static void *unpack_restore (WavpackStream3 *wps, void *source, int keep_resources)
+{
+    int flags = wps->wphdr.flags, tcount;
+    struct decorr_pass *dpp;
+    FILE *temp_file;
+    unsigned char *temp_buf;
+
+    unpack_init3 (wps);
+    temp_file = wps->wvbits.id;
+    temp_buf = wps->wvbits.buf;
+    RESTORE (wps->wvbits, source);
+
+    if (keep_resources) {
+        wps->wvbits.id = temp_file;
+        wps->wvbits.ptr += temp_buf - wps->wvbits.buf;
+        wps->wvbits.end += temp_buf - wps->wvbits.buf;
+        wps->wvbits.buf = temp_buf;
+    }
+
+    bs_restore3 (&wps->wvbits);
+
+    if (flags & WVC_FLAG) {
+        temp_file = wps->wvcbits.id;
+        temp_buf = wps->wvcbits.buf;
+        RESTORE (wps->wvcbits, source);
+
+        if (keep_resources) {
+            wps->wvcbits.id = temp_file;
+            wps->wvcbits.ptr += temp_buf - wps->wvcbits.buf;
+            wps->wvcbits.end += temp_buf - wps->wvcbits.buf;
+            wps->wvcbits.buf = temp_buf;
+        }
+
+        bs_restore3 (&wps->wvcbits);
+    }
+
+    if (wps->wphdr.version == 3) {
+        if (wps->wphdr.bits) {
+            RESTORE (wps->w4, source);
+        }
+        else {
+            RESTORE (wps->w1, source);
+        }
+
+        RESTORE (wps->w3, source);
+        RESTORE (wps->dc.crc, source);
+    }
+    else
+        RESTORE (wps->w2, source);
+
+    if (wps->wphdr.bits) {
+        RESTORE (wps->dc.error, source);
+    }
+    else {
+        RESTORE (wps->dc.sum_level, source);
+        RESTORE (wps->dc.left_level, source);
+        RESTORE (wps->dc.right_level, source);
+        RESTORE (wps->dc.diff_level, source);
+    }
+
+    if (flags & OVER_20) {
+        RESTORE (wps->dc.last_extra_bits, source);
+        RESTORE (wps->dc.extra_bits_count, source);
+    }
+
+    if (!(flags & EXTREME_DECORR)) {
+        RESTORE (wps->dc.sample, source);
+        RESTORE (wps->dc.weight, source);
+    }
+
+    if (flags & (HIGH_FLAG | NEW_HIGH_FLAG))
+        for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) {
+            if (dpp->term > 0) {
+                int count = dpp->term;
+                int index = wps->dc.m;
+
+                RESTORE (dpp->weight_A, source);
+
+                while (count--) {
+                    RESTORE (dpp->samples_A [index], source);
+                    index = (index + 1) & (MAX_TERM - 1);
+                }
+
+                if (!(flags & MONO_FLAG)) {
+                    count = dpp->term;
+                    index = wps->dc.m;
+
+                    RESTORE (dpp->weight_B, source);
+
+                    while (count--) {
+                        RESTORE (dpp->samples_B [index], source);
+                        index = (index + 1) & (MAX_TERM - 1);
+                    }
+                }
+            }
+            else {
+                RESTORE (dpp->weight_A, source);
+                RESTORE (dpp->weight_B, source);
+                RESTORE (dpp->samples_A [0], source);
+                RESTORE (dpp->samples_B [0], source);
+            }
+        }
+
+    return source;
+}
+
+// This function is called after a call to unpack_restore() has restored
+// the BitStream structure to a previous state and causes any required data
+// to be read from the file. This function is NOT supported for overlapped
+// operation.
+
+static void bs_restore3 (Bitstream3 *bs)
+{
+    uint32_t bytes_to_read = (uint32_t)(bs->end - bs->ptr - 1), bytes_read;
+
+    bs->reader->set_pos_abs (bs->id, bs->fpos - bytes_to_read);
+
+    if (bytes_to_read > 0) {
+
+        bytes_read = bs->reader->read_bytes (bs->id, bs->ptr + 1, bytes_to_read);
+
+        if (bytes_to_read != bytes_read)
+            bs->end = bs->ptr + 1 + bytes_read;
+    }
+}
+
+#endif      // NO_SEEKING
+#endif      // ENABLE_LEGACY
--- a/third_party/wavpack/src/unpack_armv7.S
+++ b/third_party/wavpack/src/unpack_armv7.S
@ -0,0 +1,887 @@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@                           **** WAVPACK ****                            @@
+@@                  Hybrid Lossless Wavefile Compressor                   @@
+@@              Copyright (c) 1998 - 2015 Conifer Software.               @@
+@@                          All Rights Reserved.                          @@
+@@      Distributed under the BSD Software License (see license.txt)      @@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+        .text
+        .align
+        .global         unpack_decorr_stereo_pass_cont_armv7
+        .global         unpack_decorr_mono_pass_cont_armv7
+
+/* This is an assembly optimized version of the following WavPack function:
+ *
+ * void decorr_stereo_pass_cont (struct decorr_pass *dpp,
+ *                               int32_t *buffer,
+ *                               int32_t sample_counti,
+ *                               int32_t long_math);
+ *
+ * It performs a single pass of stereo decorrelation on the provided buffer.
+ * Note that this version of the function requires that up to 8 previous stereo
+ * samples are visible and correct. In other words, it ignores the "samples_*"
+ * fields in the decorr_pass structure and gets the history data directly
+ * from the buffer. It does, however, return the appropriate history samples
+ * to the decorr_pass structure before returning.
+ *
+ * This should work on all ARM architectures. This version of the code
+ * checks the magnitude of the decorrelation sample with a pair of shifts
+ * to avoid possible overflow (and therefore ignores the "long_math" arg).
+ * Previously I used the SSAT instruction for this, but then discovered that
+ * SSAT is not universally available (although on the armv7 I'm testing on
+ * it is slightly faster than the shifts).
+ *
+ * A mono version follows below. 
+ */
+
+/*
+ * on entry:
+ *
+ * r0 = struct decorr_pass *dpp
+ * r1 = int32_t *buffer
+ * r2 = int32_t sample_count
+ * r3 = int32_t long_math
+ */
+
+unpack_decorr_stereo_pass_cont_armv7:
+
+        stmfd   sp!, {r4 - r8, r10, r11, lr}
+
+        mov     r5, r0                  @ r5 = dpp
+        mov     r11, #512               @ r11 = 512 for rounding
+        ldr     r6, [r0, #4]            @ r6 = dpp->delta
+        ldr     r4, [r0, #8]            @ r4 = dpp->weight_A
+        ldr     r0, [r0, #12]           @ r0 = dpp->weight_B
+        cmp     r2, #0                  @ exit if no samples to process
+        beq     common_exit
+
+        add     r7, r1, r2, asl #3      @ r7 = buffer ending position
+        ldr     r2, [r5, #0]            @ r2 = dpp->term
+        cmp     r2, #0
+        bmi     minus_term
+
+        ldr     lr, [r1, #-16]          @ load 2 sample history from buffer
+        ldr     r10, [r1, #-12]         @  for terms 2, 17, and 18
+        ldr     r8, [r1, #-8]
+        ldr     r3, [r1, #-4]
+        cmp     r2, #17
+        beq     term_17_loop
+        cmp     r2, #18
+        beq     term_18_loop
+        cmp     r2, #2
+        beq     term_2_loop
+        b       term_default_loop       @ else handle default (1-8, except 2)
+
+minus_term:
+        mov     r10, #1024              @ r10 = -1024 for weight clipping
+        rsb     r10, r10, #0            @  (only used for negative terms)
+        cmn     r2, #1
+        beq     term_minus_1
+        cmn     r2, #2
+        beq     term_minus_2
+        cmn     r2, #3
+        beq     term_minus_3
+        b       common_exit
+
+/*
+ ******************************************************************************
+ * Loop to handle term = 17 condition
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 =
+ * r2 = current sample          r10 = second previous right sample
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = current decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous left sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_17_loop:
+        rsb     ip, lr, r8, asl #1      @ decorr value = (2 * prev) - 2nd prev
+        mov     lr, r8                  @ previous becomes 2nd previous
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mov     r8, ip, lsl #11         @ check magnitude by shifting left then right
+        cmp     ip, r8, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S117
+        cmp     ip, #0
+        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
+        add     r8, r2, r8, asr #10     @  shift, and add to new sample
+        b       S118
+
+S117:   mov     r8, #0                  @ use 64-bit multiply to avoid overflow
+        smlal   r11, r8, r4, ip
+        add     r8, r2, r8, lsl #22
+        add     r8, r8, r11, lsr #10
+        mov     r11, #512
+
+S118:   strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     S325
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+S325:   rsb     ip, r10, r3, asl #1     @ do same thing for right channel
+        mov     r10, r3
+        ldr     r2, [r1], #4
+        mov     r3, ip, lsl #11         @ check magnitude by shifting left then right
+        cmp     ip, r3, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S119
+        cmp     ip, #0
+        mla     r3, ip, r0, r11
+        add     r3, r2, r3, asr #10
+        b       S120
+
+S119:   mov     r3, #0
+        smlal   r11, r3, r0, ip
+        add     r3, r2, r3, lsl #22
+        add     r3, r3, r11, lsr #10
+        mov     r11, #512
+
+S120:   strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     S329
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+
+S329:   cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_17_loop
+        b       store_1718              @ common exit for terms 17 & 18
+
+/*
+ ******************************************************************************
+ * Loop to handle term = 18 condition
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 =
+ * r2 = current sample          r10 = second previous right sample
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous left sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_18_loop:
+        sub     ip, r8, lr              @ decorr value =
+        mov     lr, r8                  @  ((3 * prev) - 2nd prev) >> 1
+        add     ip, r8, ip, asr #1
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mov     r8, ip, lsl #11         @ check magnitude by shifting left then right
+        cmp     ip, r8, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S121
+        cmp     ip, #0
+        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
+        add     r8, r2, r8, asr #10     @  shift, and add to new sample
+        b       S122
+
+S121:   mov     r8, #0                  @ use 64-bit multiply to avoid overflow
+        smlal   r11, r8, r4, ip
+        add     r8, r2, r8, lsl #22
+        add     r8, r8, r11, lsr #10
+        mov     r11, #512
+
+S122:   strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     S337
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+S337:   sub     ip, r3, r10             @ do same thing for right channel
+        mov     r10, r3
+        add     ip, r3, ip, asr #1
+        ldr     r2, [r1], #4
+        mov     r3, ip, lsl #11         @ check magnitude by shifting left then right
+        cmp     ip, r3, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S123
+        cmp     ip, #0
+        mla     r3, ip, r0, r11
+        add     r3, r2, r3, asr #10
+        b       S124
+
+S123:   mov     r3, #0
+        smlal   r11, r3, r0, ip
+        add     r3, r2, r3, lsl #22
+        add     r3, r3, r11, lsr #10
+        mov     r11, #512
+
+S124:   strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     S341
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+
+S341:   cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_18_loop
+
+/* common exit for terms 17 & 18 */
+
+store_1718:
+        str     r3, [r5, #48]           @ store sample history into struct
+        str     r8, [r5, #16]
+        str     r10, [r5, #52]
+        str     lr, [r5, #20]
+        b       common_exit             @ and return
+
+/*
+ ******************************************************************************
+ * Loop to handle term = 2 condition
+ * (note that this case can be handled by the default term handler (1-8), but
+ * this special case is faster because it doesn't have to read memory twice)
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 =
+ * r2 = current sample          r10 = second previous right sample
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous left sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_2_loop:
+        mov     ip, lr                  @ get decorrelation value
+        mov     lr, r8                  @ previous becomes 2nd previous
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mov     r8, ip, lsl #11         @ check magnitude by shifting left then right
+        cmp     ip, r8, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S125
+        cmp     ip, #0
+        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
+        add     r8, r2, r8, asr #10     @  shift, and add to new sample
+        b       S126
+
+S125:   mov     r8, #0                  @ use 64-bit multiply to avoid overflow
+        smlal   r11, r8, r4, ip
+        add     r8, r2, r8, lsl #22
+        add     r8, r8, r11, lsr #10
+        mov     r11, #512
+
+S126:   strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     S225
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+S225:   mov     ip, r10                 @ do same thing for right channel
+        mov     r10, r3
+        ldr     r2, [r1], #4
+        mov     r3, ip, lsl #11         @ check magnitude by shifting left then right
+        cmp     ip, r3, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S127
+        cmp     ip, #0
+        mla     r3, ip, r0, r11
+        add     r3, r2, r3, asr #10
+        b       S128
+
+S127:   mov     r3, #0
+        smlal   r11, r3, r0, ip
+        add     r3, r2, r3, lsl #22
+        add     r3, r3, r11, lsr #10
+        mov     r11, #512
+
+S128:   strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     S229
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+
+S229:   cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_2_loop
+        b       default_term_exit       @ this exit updates all dpp->samples
+
+/*
+ ******************************************************************************
+ * Loop to handle default term condition
+ *
+ * r0 = dpp->weight_B           r8 = result accumulator
+ * r1 = bptr                    r9 =
+ * r2 = dpp->term               r10 =
+ * r3 = decorrelation value     r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = current sample
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr =
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_default_loop:
+        ldr     ip, [r1]                @ get original sample
+        ldr     r3, [r1, -r2, asl #3]   @ get decorrelation value based on term
+        mov     r8, r3, lsl #11         @ check magnitude by shifting left then right
+        cmp     r3, r8, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S135
+        cmp     r3, #0
+        mla     r8, r3, r4, r11         @ mult decorr value by weight, round,
+        add     r8, ip, r8, asr #10     @  shift and add to new sample
+        b       S136
+
+S135:   mov     r8, #0                  @ use 64-bit multiply to avoid overflow
+        smlal   r11, r8, r4, r3
+        add     r8, ip, r8, lsl #22
+        add     r8, r8, r11, lsr #10
+        mov     r11, #512
+
+S136:   str     r8, [r1], #4            @ store update sample
+        cmpne   ip, #0
+        beq     S350
+        teq     ip, r3                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+S350:   ldr     ip, [r1]                @ do the same thing for right channel
+        ldr     r3, [r1, -r2, asl #3]
+        mov     r8, r3, lsl #11         @ check magnitude by shifting left then right
+        cmp     r3, r8, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S137
+        cmp     r3, #0
+        mla     r8, r3, r0, r11
+        add     r8, ip, r8, asr #10
+        b       S138
+
+S137:   mov     r8, #0
+        smlal   r11, r8, r0, r3
+        add     r8, ip, r8, lsl #22
+        add     r8, r8, r11, lsr #10
+        mov     r11, #512
+
+S138:   str     r8, [r1], #4
+        cmpne   ip, #0
+        beq     S354
+        teq     ip, r3
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+
+S354:   cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_default_loop
+
+/*
+ * This exit is used by terms 1-8 to store the previous "term" samples (up to 8)
+ * into the decorr pass structure history
+ */
+
+default_term_exit:
+        ldr     r2, [r5, #0]            @ r2 = dpp->term
+
+S358:   sub     r2, r2, #1
+        sub     r1, r1, #8
+        ldr     r3, [r1, #4]            @ get right sample and store in dpp->samples_B [r2]
+        add     r6, r5, #48
+        str     r3, [r6, r2, asl #2]
+        ldr     r3, [r1, #0]            @ get left sample and store in dpp->samples_A [r2]
+        add     r6, r5, #16
+        str     r3, [r6, r2, asl #2]
+        cmp     r2, #0
+        bne     S358
+        b       common_exit
+
+/*
+ ******************************************************************************
+ * Loop to handle term = -1 condition
+ *
+ * r0 = dpp->weight_B           r8 =
+ * r1 = bptr                    r9 =
+ * r2 = intermediate result     r10 = -1024 (for clipping)
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = current sample
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = updated left sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_minus_1:
+        ldr     r3, [r1, #-4]
+
+term_minus_1_loop:
+        ldr     ip, [r1]                @ for left channel the decorrelation value
+                                        @  is the previous right sample (in r3)
+        mov     lr, r3, lsl #11         @ check magnitude by shifting left then right
+        cmp     r3, lr, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S142
+        cmp     r3, #0
+        mla     r2, r3, r4, r11
+        add     lr, ip, r2, asr #10
+        b       S143
+
+S142:   mov     lr, #0                  @ use 64-bit multiply to avoid overflow
+        smlal   r11, lr, r4, r3
+        add     lr, ip, lr, lsl #22
+        add     lr, lr, r11, lsr #10
+        mov     r11, #512
+
+S143:   str     lr, [r1], #8
+        cmpne   ip, #0
+        beq     S361
+        teq     ip, r3                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+        cmp     r4, #1024
+        movgt   r4, #1024
+        cmp     r4, r10
+        movlt   r4, r10
+
+S361:   ldr     r2, [r1, #-4]           @ for right channel the decorrelation value
+                                        @  is the just updated right sample (in lr)
+        mov     r3, lr, lsl #11         @ check magnitude by shifting left then right
+        cmp     lr, r3, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S144
+        cmp     lr, #0
+        mla     r3, lr, r0, r11
+        add     r3, r2, r3, asr #10
+        b       S145
+
+S144:   mov     r3, #0
+        smlal   r11, r3, r0, lr
+        add     r3, r2, r3, lsl #22
+        add     r3, r3, r11, lsr #10
+        mov     r11, #512
+
+S145:   strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     S369
+        teq     r2, lr
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+        cmp     r0, #1024               @ then clip weight to +/-1024
+        movgt   r0, #1024
+        cmp     r0, r10
+        movlt   r0, r10
+
+S369:   cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_minus_1_loop
+
+        str     r3, [r5, #16]           @ else store right sample and exit
+        b       common_exit
+
+/*
+ ******************************************************************************
+ * Loop to handle term = -2 condition
+ * (note that the channels are processed in the reverse order here)
+ *
+ * r0 = dpp->weight_B           r8 =
+ * r1 = bptr                    r9 =
+ * r2 = intermediate result     r10 = -1024 (for clipping)
+ * r3 = previous left sample    r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = current sample
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = updated right sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_minus_2:
+        ldr     r3, [r1, #-8]
+
+term_minus_2_loop:
+        ldr     ip, [r1, #4]            @ for right channel the decorrelation value
+                                        @  is the previous left sample (in r3)
+        mov     lr, r3, lsl #11         @ check magnitude by shifting left then right
+        cmp     r3, lr, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S146
+        cmp     r3, #0
+        mla     r2, r3, r0, r11
+        add     lr, ip, r2, asr #10
+        b       S147
+
+S146:   mov     lr, #0                  @ use 64-bit multiply to avoid overflow
+        smlal   r11, lr, r0, r3
+        add     lr, ip, lr, lsl #22
+        add     lr, lr, r11, lsr #10
+        mov     r11, #512
+
+S147:   strne   lr, [r1, #4]
+        cmpne   ip, #0
+        beq     S380
+        teq     ip, r3                  @ update weight based on signs
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+        cmp     r0, #1024               @ then clip weight to +/-1024
+        movgt   r0, #1024
+        cmp     r0, r10
+        movlt   r0, r10
+
+S380:   ldr     r2, [r1, #0]            @ for left channel the decorrelation value
+                                        @  is the just updated left sample (in lr)
+        mov     r3, lr, lsl #11         @ check magnitude by shifting left then right
+        cmp     lr, r3, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S148
+        cmp     lr, #0
+        mla     r3, lr, r4, r11
+        add     r3, r2, r3, asr #10
+        b       S149
+
+S148:   mov     r3, #0
+        smlal   r11, r3, r4, lr
+        add     r3, r2, r3, lsl #22
+        add     r3, r3, r11, lsr #10
+        mov     r11, #512
+
+S149:   str     r3, [r1], #8
+        cmpne   r2, #0
+        beq     S388
+        teq     r2, lr
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+        cmp     r4, #1024
+        movgt   r4, #1024
+        cmp     r4, r10
+        movlt   r4, r10
+
+S388:   cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_minus_2_loop
+
+        str     r3, [r5, #48]           @ else store left channel and exit
+        b       common_exit
+
+/*
+ ******************************************************************************
+ * Loop to handle term = -3 condition
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 =
+ * r2 = current left sample     r10 = -1024 (for clipping)
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = intermediate result
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr =
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_minus_3:
+        ldr     r3, [r1, #-4]           @ load previous samples
+        ldr     r8, [r1, #-8]
+
+term_minus_3_loop:
+        ldr     ip, [r1]
+        mov     r2, r3, lsl #11         @ check magnitude by shifting left then right
+        cmp     r3, r2, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S160
+        cmp     r3, #0
+        mla     r2, r3, r4, r11
+        add     r2, ip, r2, asr #10
+        b       S161
+
+S160:   mov     r2, #0                  @ use 64-bit multiply to avoid overflow
+        smlal   r11, r2, r4, r3
+        add     r2, ip, r2, lsl #22
+        add     r2, r2, r11, lsr #10
+        mov     r11, #512
+
+S161:   str     r2, [r1], #4
+        cmpne   ip, #0
+        beq     S399
+        teq     ip, r3                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+        cmp     r4, #1024               @ then clip weight to +/-1024
+        movgt   r4, #1024
+        cmp     r4, r10
+        movlt   r4, r10
+
+S399:   mov     ip, r8                  @ ip = previous left we use now
+        mov     r8, r2                  @ r8 = current left we use next time
+        ldr     r2, [r1], #4
+        mov     r3, ip, lsl #11         @ check magnitude by shifting left then right
+        cmp     ip, r3, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S162
+        cmp     ip, #0
+        mla     r3, ip, r0, r11
+        add     r3, r2, r3, asr #10
+        b       S163
+
+S162:   mov     r3, #0
+        smlal   r11, r3, r0, ip
+        add     r3, r2, r3, lsl #22
+        add     r3, r3, r11, lsr #10
+        mov     r11, #512
+
+S163:   strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     S407
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+        cmp     r0, #1024
+        movgt   r0, #1024
+        cmp     r0, r10
+        movlt   r0, r10
+
+S407:   cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_minus_3_loop
+
+        str     r3, [r5, #16]           @ else store previous samples & exit
+        str     r8, [r5, #48]
+
+/*
+ * Before finally exiting we must store weights back for next time
+ */
+
+common_exit:
+        str     r4, [r5, #8]
+        str     r0, [r5, #12]
+        ldmfd   sp!, {r4 - r8, r10, r11, pc}
+
+
+
+/* This is a mono version of the function above. It does not handle negative terms.
+ *
+ * void decorr_mono_pass_cont (struct decorr_pass *dpp,
+ *                             int32_t *buffer,
+ *                             int32_t sample_counti,
+ *                             int32_t long_math);
+ * on entry:
+ *
+ * r0 = struct decorr_pass *dpp
+ * r1 = int32_t *buffer
+ * r2 = int32_t sample_count
+ * r3 = int32_t long_math
+ */
+
+unpack_decorr_mono_pass_cont_armv7:
+
+        stmfd   sp!, {r4 - r8, r11, lr}
+
+        mov     r5, r0                  @ r5 = dpp
+        mov     r11, #512               @ r11 = 512 for rounding
+        ldr     r6, [r0, #4]            @ r6 = dpp->delta
+        ldr     r4, [r0, #8]            @ r4 = dpp->weight_A
+        cmp     r2, #0                  @ exit if no samples to process
+        beq     mono_common_exit
+
+        add     r7, r1, r2, asl #2      @ r7 = buffer ending position
+        ldr     r2, [r5, #0]            @ r2 = dpp->term
+
+        ldr     lr, [r1, #-8]           @ load 2 sample history from buffer
+        ldr     r8, [r1, #-4]
+        cmp     r2, #17
+        beq     mono_term_17_loop
+        cmp     r2, #18
+        beq     mono_term_18_loop
+        cmp     r2, #2
+        beq     mono_term_2_loop
+        b       mono_term_default_loop  @ else handle default (1-8, except 2)
+
+/*
+ ******************************************************************************
+ * Loop to handle term = 17 condition
+ *
+ * r0 =                         r8 = previous sample
+ * r1 = bptr                    r9 =
+ * r2 = current sample          r10 =
+ * r3 =                         r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = current decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+mono_term_17_loop:
+        rsb     ip, lr, r8, asl #1      @ decorr value = (2 * prev) - 2nd prev
+        mov     lr, r8                  @ previous becomes 2nd previous
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mov     r8, ip, lsl #11         @ check magnitude by shifting left then right
+        cmp     ip, r8, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S717
+        cmp     ip, #0
+        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
+        add     r8, r2, r8, asr #10     @  shift, and add to new sample
+        b       S718
+
+S717:   mov     r8, #0
+        smlal   r11, r8, r4, ip
+        add     r8, r2, r8, lsl #22
+        add     r8, r8, r11, lsr #10
+        mov     r11, #512
+
+S718:   strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     S129
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+S129:   cmp     r7, r1                  @ loop back if more samples to do
+        bhi     mono_term_17_loop
+        b       mono_store_1718         @ common exit for terms 17 & 18
+
+/*
+ ******************************************************************************
+ * Loop to handle term = 18 condition
+ *
+ * r0 =                         r8 = previous sample
+ * r1 = bptr                    r9 =
+ * r2 = current sample          r10 =
+ * r3 =                         r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+mono_term_18_loop:
+        sub     ip, r8, lr              @ decorr value =
+        mov     lr, r8                  @  ((3 * prev) - 2nd prev) >> 1
+        add     ip, r8, ip, asr #1
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mov     r8, ip, lsl #11         @ check magnitude by shifting left then right
+        cmp     ip, r8, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S817
+        cmp     ip, #0
+        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
+        add     r8, r2, r8, asr #10     @  shift, and add to new sample
+        b       S818
+
+S817:   mov     r8, #0
+        smlal   r11, r8, r4, ip
+        add     r8, r2, r8, lsl #22
+        add     r8, r8, r11, lsr #10
+        mov     r11, #512
+
+S818:   strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     S141
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+S141:   cmp     r7, r1                  @ loop back if more samples to do
+        bhi     mono_term_18_loop
+
+/* common exit for terms 17 & 18 */
+
+mono_store_1718:
+        str     r8, [r5, #16]           @ store sample history into struct
+        str     lr, [r5, #20]
+        b       mono_common_exit        @ and return
+
+/*
+ ******************************************************************************
+ * Loop to handle term = 2 condition
+ * (note that this case can be handled by the default term handler (1-8), but
+ * this special case is faster because it doesn't have to read memory twice)
+ *
+ * r0 =                         r8 = previous sample
+ * r1 = bptr                    r9 =
+ * r2 = current sample          r10 =
+ * r3 =                         r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+mono_term_2_loop:
+        mov     ip, lr                  @ get decorrelation value
+        mov     lr, r8                  @ previous becomes 2nd previous
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mov     r8, ip, lsl #11         @ check magnitude by shifting left then right
+        cmp     ip, r8, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S917
+        cmp     ip, #0
+        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
+        add     r8, r2, r8, asr #10     @  shift, and add to new sample
+        b       S918
+
+S917:   mov     r8, #0
+        smlal   r11, r8, r4, ip
+        add     r8, r2, r8, lsl #22
+        add     r8, r8, r11, lsr #10
+        mov     r11, #512
+
+S918:   strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     S029
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+S029:   cmp     r7, r1                  @ loop back if more samples to do
+        bhi     mono_term_2_loop
+        b       mono_default_term_exit  @ this exit updates all dpp->samples
+
+/*
+ ******************************************************************************
+ * Loop to handle default term condition
+ *
+ * r0 =                         r8 = result accumulator
+ * r1 = bptr                    r9 =
+ * r2 = dpp->term               r10 =
+ * r3 = decorrelation value     r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = current sample
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr =
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+mono_term_default_loop:
+        ldr     ip, [r1]                @ get original sample
+        ldr     r3, [r1, -r2, asl #2]   @ get decorrelation value based on term
+        mov     r8, r3, lsl #11         @ check magnitude by shifting left then right
+        cmp     r3, r8, asr #11         @  and comparing, branch to 64-bit math if different
+        bne     S617
+        mla     r8, r3, r4, r11         @ mult decorr value by weight, round,
+        add     r8, ip, r8, asr #10     @  shift and add to new sample
+        b       S618
+
+S617:   mov     r8, #0
+        smlal   r11, r8, r4, r3
+        add     r8, ip, r8, lsl #22
+        add     r8, r8, r11, lsr #10
+        mov     r11, #512
+
+S618:   str     r8, [r1], #4            @ store update sample
+        cmp     r3, #0
+        cmpne   ip, #0
+        beq     S154
+        teq     ip, r3                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+S154:   cmp     r7, r1                  @ loop back if more samples to do
+        bhi     mono_term_default_loop
+
+/*
+ * This exit is used by terms 1-8 to store the previous "term" samples (up to 8)
+ * into the decorr pass structure history
+ */
+
+mono_default_term_exit:
+        ldr     r2, [r5, #0]            @ r2 = dpp->term
+
+S158:   sub     r2, r2, #1
+        sub     r1, r1, #4
+        ldr     r3, [r1, #0]            @ get sample and store in dpp->samples_A [r2]
+        add     r6, r5, #16
+        str     r3, [r6, r2, asl #2]
+        cmp     r2, #0
+        bne     S158
+        b       mono_common_exit
+
+/*
+ * Before finally exiting we must store weight back for next time
+ */
+
+mono_common_exit:
+        str     r4, [r5, #8]
+        ldmfd   sp!, {r4 - r8, r11, pc}
+
+#ifdef __ELF__
+        .section .note.GNU-stack,"",%progbits
+#endif
+
--- a/third_party/wavpack/src/unpack_dsd.c
+++ b/third_party/wavpack/src/unpack_dsd.c
@ -0,0 +1,616 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** DSDPACK ****                            //
+//         Lossless DSD (Direct Stream Digital) Audio Compressor          //
+//                Copyright (c) 2013 - 2016 David Bryant.                 //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// unpack_dsd.c
+
+// This module actually handles the uncompression of the DSD audio data.
+
+#ifdef ENABLE_DSD
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "wavpack_local.h"
+
+///////////////////////////// executable code ////////////////////////////////
+
+// This function initialzes the main range-encoded data for DSD audio samples
+
+static int init_dsd_block_fast (WavpackStream *wps, WavpackMetadata *wpmd);
+static int init_dsd_block_high (WavpackStream *wps, WavpackMetadata *wpmd);
+static int decode_fast (WavpackStream *wps, int32_t *output, int sample_count);
+static int decode_high (WavpackStream *wps, int32_t *output, int sample_count);
+
+int init_dsd_block (WavpackContext *wpc, WavpackMetadata *wpmd)
+{
+    WavpackStream *wps = wpc->streams [wpc->current_stream];
+
+    if (wpmd->byte_length < 2)
+        return FALSE;
+
+    wps->dsd.byteptr = (unsigned char *)wpmd->data;
+    wps->dsd.endptr = wps->dsd.byteptr + wpmd->byte_length;
+    wpc->dsd_multiplier = 1 << *wps->dsd.byteptr++;
+    wps->dsd.mode = *wps->dsd.byteptr++;
+
+    if (!wps->dsd.mode) {
+        if (wps->dsd.endptr - wps->dsd.byteptr != wps->wphdr.block_samples * (wps->wphdr.flags & MONO_DATA ? 1 : 2)) {
+            return FALSE;
+        }
+
+        wps->dsd.ready = 1;
+        return TRUE;
+    }
+
+    if (wps->dsd.mode == 1)
+        return init_dsd_block_fast (wps, wpmd);
+    else if (wps->dsd.mode == 3)
+        return init_dsd_block_high (wps, wpmd);
+    else
+        return FALSE;
+}
+
+int32_t unpack_dsd_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count)
+{
+    WavpackStream *wps = wpc->streams [wpc->current_stream];
+    uint32_t flags = wps->wphdr.flags;
+
+    // don't attempt to decode past the end of the block, but watch out for overflow!
+
+    if (wps->sample_index + sample_count > GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples &&
+        GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples - wps->sample_index < sample_count)
+            sample_count = (uint32_t) (GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples - wps->sample_index);
+
+    if (GET_BLOCK_INDEX (wps->wphdr) > wps->sample_index || wps->wphdr.block_samples < sample_count)
+        wps->mute_error = TRUE;
+
+    if (!wps->mute_error) {
+        if (!wps->dsd.mode) {
+            int total_samples = sample_count * ((flags & MONO_DATA) ? 1 : 2);
+            int32_t *bptr = buffer;
+
+            if (wps->dsd.endptr - wps->dsd.byteptr < total_samples)
+                total_samples = (int)(wps->dsd.endptr - wps->dsd.byteptr);
+
+            while (total_samples--)
+                wps->crc += (wps->crc << 1) + (*bptr++ = *wps->dsd.byteptr++);
+        }
+        else if (wps->dsd.mode == 1) {
+            if (!decode_fast (wps, buffer, sample_count))
+                wps->mute_error = TRUE;
+        }
+        else if (!decode_high (wps, buffer, sample_count))
+            wps->mute_error = TRUE;
+    }
+
+    if (wps->mute_error) {
+        int samples_to_null;
+        if (wpc->reduced_channels == 1 || wpc->config.num_channels == 1 || (flags & MONO_FLAG))
+            samples_to_null = sample_count;
+        else
+            samples_to_null = sample_count * 2;
+
+        while (samples_to_null--)
+            *buffer++ = 0x55;
+
+        wps->sample_index += sample_count;
+        return sample_count;
+    }
+
+    if (flags & FALSE_STEREO) {
+        int32_t *dptr = buffer + sample_count * 2;
+        int32_t *sptr = buffer + sample_count;
+        int32_t c = sample_count;
+
+        while (c--) {
+            *--dptr = *--sptr;
+            *--dptr = *sptr;
+        }
+    }
+
+    wps->sample_index += sample_count;
+
+    return sample_count;
+}
+
+/*------------------------------------------------------------------------------------------------------------------------*/
+
+// #define DSD_BYTE_READY(low,high) (((low) >> 24) == ((high) >> 24))
+// #define DSD_BYTE_READY(low,high) (!(((low) ^ (high)) >> 24))
+#define DSD_BYTE_READY(low,high) (!(((low) ^ (high)) & 0xff000000))
+#define MAX_HISTORY_BITS    5
+
+static int init_dsd_block_fast (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    unsigned char history_bits, max_probability;
+    int total_summed_probabilities = 0, i;
+
+    if (wps->dsd.byteptr == wps->dsd.endptr)
+        return FALSE;
+
+    history_bits = *wps->dsd.byteptr++;
+
+    if (wps->dsd.byteptr == wps->dsd.endptr || history_bits > MAX_HISTORY_BITS)
+        return FALSE;
+
+    wps->dsd.history_bins = 1 << history_bits;
+
+    wps->dsd.value_lookup = (unsigned char **)malloc (sizeof (*wps->dsd.value_lookup) * wps->dsd.history_bins);
+    memset (wps->dsd.value_lookup, 0, sizeof (*wps->dsd.value_lookup) * wps->dsd.history_bins);
+    wps->dsd.summed_probabilities = (int16_t (*)[256])malloc (sizeof (*wps->dsd.summed_probabilities) * wps->dsd.history_bins);
+    wps->dsd.probabilities = (unsigned char (*)[256])malloc (sizeof (*wps->dsd.probabilities) * wps->dsd.history_bins);
+
+    max_probability = *wps->dsd.byteptr++;
+
+    if (max_probability < 0xff) {
+        unsigned char *outptr = (unsigned char *) wps->dsd.probabilities;
+        unsigned char *outend = outptr + sizeof (*wps->dsd.probabilities) * wps->dsd.history_bins;
+
+        while (outptr < outend && wps->dsd.byteptr < wps->dsd.endptr) {
+            int code = *wps->dsd.byteptr++;
+
+            if (code > max_probability) {
+                int zcount = code - max_probability;
+
+                while (outptr < outend && zcount--)
+                    *outptr++ = 0;
+            }
+            else if (code)
+                *outptr++ = code;
+            else
+                break;
+        }
+
+        if (outptr < outend || (wps->dsd.byteptr < wps->dsd.endptr && *wps->dsd.byteptr++))
+            return FALSE;
+    }
+    else if (wps->dsd.endptr - wps->dsd.byteptr > (int) sizeof (*wps->dsd.probabilities) * wps->dsd.history_bins) {
+        memcpy (wps->dsd.probabilities, wps->dsd.byteptr, sizeof (*wps->dsd.probabilities) * wps->dsd.history_bins);
+        wps->dsd.byteptr += sizeof (*wps->dsd.probabilities) * wps->dsd.history_bins;
+    }
+    else
+        return FALSE;
+
+    for (wps->dsd.p0 = 0; wps->dsd.p0 < wps->dsd.history_bins; ++wps->dsd.p0) {
+        int32_t sum_values;
+        unsigned char *vp;
+
+        for (sum_values = i = 0; i < 256; ++i)
+            wps->dsd.summed_probabilities [wps->dsd.p0] [i] = sum_values += wps->dsd.probabilities [wps->dsd.p0] [i];
+
+        if (sum_values) {
+            total_summed_probabilities += sum_values;
+            vp = wps->dsd.value_lookup [wps->dsd.p0] = (unsigned char *)malloc (sum_values);
+
+            for (i = 0; i < 256; i++) {
+                int c = wps->dsd.probabilities [wps->dsd.p0] [i];
+
+                while (c--)
+                    *vp++ = i;
+            }
+        }
+    }
+
+    if (wps->dsd.endptr - wps->dsd.byteptr < 4 || total_summed_probabilities > wps->dsd.history_bins * 1280)
+        return FALSE;
+
+    for (i = 4; i--;)
+        wps->dsd.value = (wps->dsd.value << 8) | *wps->dsd.byteptr++;
+
+    wps->dsd.p0 = wps->dsd.p1 = 0;
+    wps->dsd.low = 0; wps->dsd.high = 0xffffffff;
+    wps->dsd.ready = 1;
+
+    return TRUE;
+}
+
+static int decode_fast (WavpackStream *wps, int32_t *output, int sample_count)
+{
+    int total_samples = sample_count;
+
+    if (!(wps->wphdr.flags & MONO_DATA))
+        total_samples *= 2;
+
+    while (total_samples--) {
+        int mult, index, code, i;
+
+        if (!wps->dsd.summed_probabilities [wps->dsd.p0] [255])
+            return 0;
+
+        mult = (wps->dsd.high - wps->dsd.low) / wps->dsd.summed_probabilities [wps->dsd.p0] [255];
+
+        if (!mult) {
+            if (wps->dsd.endptr - wps->dsd.byteptr >= 4)
+                for (i = 4; i--;)
+                    wps->dsd.value = (wps->dsd.value << 8) | *wps->dsd.byteptr++;
+
+            wps->dsd.low = 0;
+            wps->dsd.high = 0xffffffff;
+            mult = wps->dsd.high / wps->dsd.summed_probabilities [wps->dsd.p0] [255];
+
+            if (!mult)
+                return 0;
+        }
+
+        index = (wps->dsd.value - wps->dsd.low) / mult;
+
+        if (index >= wps->dsd.summed_probabilities [wps->dsd.p0] [255])
+            return 0;
+
+        if ((*output++ = code = wps->dsd.value_lookup [wps->dsd.p0] [index]))
+            wps->dsd.low += wps->dsd.summed_probabilities [wps->dsd.p0] [code-1] * mult;
+
+        wps->dsd.high = wps->dsd.low + wps->dsd.probabilities [wps->dsd.p0] [code] * mult - 1;
+        wps->crc += (wps->crc << 1) + code;
+
+        if (wps->wphdr.flags & MONO_DATA)
+            wps->dsd.p0 = code & (wps->dsd.history_bins-1);
+        else {
+            wps->dsd.p0 = wps->dsd.p1;
+            wps->dsd.p1 = code & (wps->dsd.history_bins-1);
+        }
+
+        while (DSD_BYTE_READY (wps->dsd.high, wps->dsd.low) && wps->dsd.byteptr < wps->dsd.endptr) {
+            wps->dsd.value = (wps->dsd.value << 8) | *wps->dsd.byteptr++;
+            wps->dsd.high = (wps->dsd.high << 8) | 0xff;
+            wps->dsd.low <<= 8;
+        }
+    }
+
+    return sample_count;
+}
+
+/*------------------------------------------------------------------------------------------------------------------------*/
+
+#define PTABLE_BITS 8
+#define PTABLE_BINS (1<<PTABLE_BITS)
+#define PTABLE_MASK (PTABLE_BINS-1)
+
+#define UP   0x010000fe
+#define DOWN 0x00010000
+#define DECAY 8
+
+#define PRECISION 20
+#define VALUE_ONE (1 << PRECISION)
+#define PRECISION_USE 12
+
+#define RATE_S 20
+
+static void init_ptable (int *table, int rate_i, int rate_s)
+{
+    int value = 0x808000, rate = rate_i << 8, c, i;
+
+    for (c = (rate + 128) >> 8; c--;)
+        value += (DOWN - value) >> DECAY;
+
+    for (i = 0; i < PTABLE_BINS/2; ++i) {
+        table [i] = value;
+        table [PTABLE_BINS-1-i] = 0x100ffff - value;
+
+        if (value > 0x010000) {
+            rate += (rate * rate_s + 128) >> 8;
+
+            for (c = (rate + 64) >> 7; c--;)
+                value += (DOWN - value) >> DECAY;
+        }
+    }
+}
+
+static int init_dsd_block_high (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    uint32_t flags = wps->wphdr.flags;
+    int channel, rate_i, rate_s, i;
+
+    if (wps->dsd.endptr - wps->dsd.byteptr < ((flags & MONO_DATA) ? 13 : 20))
+        return FALSE;
+
+    rate_i = *wps->dsd.byteptr++;
+    rate_s = *wps->dsd.byteptr++;
+
+    if (rate_s != RATE_S)
+        return FALSE;
+
+    wps->dsd.ptable = (int32_t *)malloc (PTABLE_BINS * sizeof (*wps->dsd.ptable));
+    init_ptable (wps->dsd.ptable, rate_i, rate_s);
+
+    for (channel = 0; channel < ((flags & MONO_DATA) ? 1 : 2); ++channel) {
+        DSDfilters *sp = wps->dsd.filters + channel;
+
+        sp->filter1 = *wps->dsd.byteptr++ << (PRECISION - 8);
+        sp->filter2 = *wps->dsd.byteptr++ << (PRECISION - 8);
+        sp->filter3 = *wps->dsd.byteptr++ << (PRECISION - 8);
+        sp->filter4 = *wps->dsd.byteptr++ << (PRECISION - 8);
+        sp->filter5 = *wps->dsd.byteptr++ << (PRECISION - 8);
+        sp->filter6 = 0;
+        sp->factor = *wps->dsd.byteptr++ & 0xff;
+        sp->factor |= (*wps->dsd.byteptr++ << 8) & 0xff00;
+        sp->factor = (sp->factor << 16) >> 16;
+    }
+
+    wps->dsd.high = 0xffffffff;
+    wps->dsd.low = 0x0;
+
+    for (i = 4; i--;)
+        wps->dsd.value = (wps->dsd.value << 8) | *wps->dsd.byteptr++;
+
+    wps->dsd.ready = 1;
+
+    return TRUE;
+}
+
+static int decode_high (WavpackStream *wps, int32_t *output, int sample_count)
+{
+    int total_samples = sample_count, stereo = (wps->wphdr.flags & MONO_DATA) ? 0 : 1;
+    DSDfilters *sp = wps->dsd.filters;
+
+    while (total_samples--) {
+        int bitcount = 8;
+
+        sp [0].value = sp [0].filter1 - sp [0].filter5 + ((sp [0].filter6 * sp [0].factor) >> 2);
+
+        if (stereo)
+            sp [1].value = sp [1].filter1 - sp [1].filter5 + ((sp [1].filter6 * sp [1].factor) >> 2);
+
+        while (bitcount--) {
+            int32_t *pp = wps->dsd.ptable + ((sp [0].value >> (PRECISION - PRECISION_USE)) & PTABLE_MASK);
+            uint32_t split = wps->dsd.low + ((wps->dsd.high - wps->dsd.low) >> 8) * (*pp >> 16);
+
+            if (wps->dsd.value <= split) {
+                wps->dsd.high = split;
+                *pp += (UP - *pp) >> DECAY;
+                sp [0].filter0 = -1;
+            }
+            else {
+                wps->dsd.low = split + 1;
+                *pp += (DOWN - *pp) >> DECAY;
+                sp [0].filter0 = 0;
+            }
+
+            while (DSD_BYTE_READY (wps->dsd.high, wps->dsd.low) && wps->dsd.byteptr < wps->dsd.endptr) {
+                wps->dsd.value = (wps->dsd.value << 8) | *wps->dsd.byteptr++;
+                wps->dsd.high = (wps->dsd.high << 8) | 0xff;
+                wps->dsd.low <<= 8;
+            }
+
+            sp [0].value += sp [0].filter6 << 3;
+            sp [0].byte = (sp [0].byte << 1) | (sp [0].filter0 & 1);
+            sp [0].factor += (((sp [0].value ^ sp [0].filter0) >> 31) | 1) & ((sp [0].value ^ (sp [0].value - (sp [0].filter6 << 4))) >> 31);
+            sp [0].filter1 += ((sp [0].filter0 & VALUE_ONE) - sp [0].filter1) >> 6;
+            sp [0].filter2 += ((sp [0].filter0 & VALUE_ONE) - sp [0].filter2) >> 4;
+            sp [0].filter3 += (sp [0].filter2 - sp [0].filter3) >> 4;
+            sp [0].filter4 += (sp [0].filter3 - sp [0].filter4) >> 4;
+            sp [0].value = (sp [0].filter4 - sp [0].filter5) >> 4;
+            sp [0].filter5 += sp [0].value;
+            sp [0].filter6 += (sp [0].value - sp [0].filter6) >> 3;
+            sp [0].value = sp [0].filter1 - sp [0].filter5 + ((sp [0].filter6 * sp [0].factor) >> 2);
+
+            if (!stereo)
+                continue;
+
+            pp = wps->dsd.ptable + ((sp [1].value >> (PRECISION - PRECISION_USE)) & PTABLE_MASK);
+            split = wps->dsd.low + ((wps->dsd.high - wps->dsd.low) >> 8) * (*pp >> 16);
+
+            if (wps->dsd.value <= split) {
+                wps->dsd.high = split;
+                *pp += (UP - *pp) >> DECAY;
+                sp [1].filter0 = -1;
+            }
+            else {
+                wps->dsd.low = split + 1;
+                *pp += (DOWN - *pp) >> DECAY;
+                sp [1].filter0 = 0;
+            }
+
+            while (DSD_BYTE_READY (wps->dsd.high, wps->dsd.low) && wps->dsd.byteptr < wps->dsd.endptr) {
+                wps->dsd.value = (wps->dsd.value << 8) | *wps->dsd.byteptr++;
+                wps->dsd.high = (wps->dsd.high << 8) | 0xff;
+                wps->dsd.low <<= 8;
+            }
+
+            sp [1].value += sp [1].filter6 << 3;
+            sp [1].byte = (sp [1].byte << 1) | (sp [1].filter0 & 1);
+            sp [1].factor += (((sp [1].value ^ sp [1].filter0) >> 31) | 1) & ((sp [1].value ^ (sp [1].value - (sp [1].filter6 << 4))) >> 31);
+            sp [1].filter1 += ((sp [1].filter0 & VALUE_ONE) - sp [1].filter1) >> 6;
+            sp [1].filter2 += ((sp [1].filter0 & VALUE_ONE) - sp [1].filter2) >> 4;
+            sp [1].filter3 += (sp [1].filter2 - sp [1].filter3) >> 4;
+            sp [1].filter4 += (sp [1].filter3 - sp [1].filter4) >> 4;
+            sp [1].value = (sp [1].filter4 - sp [1].filter5) >> 4;
+            sp [1].filter5 += sp [1].value;
+            sp [1].filter6 += (sp [1].value - sp [1].filter6) >> 3;
+            sp [1].value = sp [1].filter1 - sp [1].filter5 + ((sp [1].filter6 * sp [1].factor) >> 2);
+        }
+
+        wps->crc += (wps->crc << 1) + (*output++ = sp [0].byte & 0xff);
+        sp [0].factor -= (sp [0].factor + 512) >> 10;
+
+        if (stereo) {
+            wps->crc += (wps->crc << 1) + (*output++ = wps->dsd.filters [1].byte & 0xff);
+            wps->dsd.filters [1].factor -= (wps->dsd.filters [1].factor + 512) >> 10;
+        }
+    }
+
+    return sample_count;
+}
+
+/*------------------------------------------------------------------------------------------------------------------------*/
+
+#if 0
+
+// 80 term DSD decimation filter
+// < 1 dB down at 20 kHz
+// > 108 dB stopband attenuation (fs/16)
+
+static const int32_t decm_filter [] = {
+    4, 17, 56, 147, 336, 693, 1320, 2359,
+    4003, 6502, 10170, 15392, 22623, 32389, 45275, 61920,
+    82994, 109174, 141119, 179431, 224621, 277068, 336983, 404373,
+    479004, 560384, 647741, 740025, 835917, 933849, 1032042, 1128551,
+    1221329, 1308290, 1387386, 1456680, 1514425, 1559128, 1589610, 1605059,
+    1605059, 1589610, 1559128, 1514425, 1456680, 1387386, 1308290, 1221329,
+    1128551, 1032042, 933849, 835917, 740025, 647741, 560384, 479004,
+    404373, 336983, 277068, 224621, 179431, 141119, 109174, 82994,
+    61920, 45275, 32389, 22623, 15392, 10170, 6502, 4003,
+    2359, 1320, 693, 336, 147, 56, 17, 4,
+};
+
+#define NUM_FILTER_TERMS 80
+
+#else
+
+// 56 term decimation filter
+// < 0.5 dB down at 20 kHz
+// > 100 dB stopband attenuation (fs/12)
+
+static const int32_t decm_filter [] = {
+    4, 17, 56, 147, 336, 692, 1315, 2337,
+    3926, 6281, 9631, 14216, 20275, 28021, 37619, 49155,
+    62616, 77870, 94649, 112551, 131049, 149507, 167220, 183448,
+    197472, 208636, 216402, 220385, 220385, 216402, 208636, 197472,
+    183448, 167220, 149507, 131049, 112551, 94649, 77870, 62616,
+    49155, 37619, 28021, 20275, 14216, 9631, 6281, 3926,
+    2337, 1315, 692, 336, 147, 56, 17, 4,
+};
+
+#define NUM_FILTER_TERMS 56
+
+#endif
+
+#define HISTORY_BYTES ((NUM_FILTER_TERMS+7)/8)
+
+typedef struct {
+    unsigned char delay [HISTORY_BYTES];
+} DecimationChannel;
+
+typedef struct {
+    int32_t conv_tables [HISTORY_BYTES] [256];
+    DecimationChannel *chans;
+    int num_channels;
+} DecimationContext;
+
+void *decimate_dsd_init (int num_channels)
+{
+    DecimationContext *context = (DecimationContext *)malloc (sizeof (DecimationContext));
+    double filter_sum = 0, filter_scale;
+    int skipped_terms, i, j;
+
+    if (!context)
+        return context;
+
+    memset (context, 0, sizeof (*context));
+    context->num_channels = num_channels;
+    context->chans = (DecimationChannel *)malloc (num_channels * sizeof (DecimationChannel));
+
+    if (!context->chans) {
+        free (context);
+        return NULL;
+    }
+
+    for (i = 0; i < NUM_FILTER_TERMS; ++i)
+        filter_sum += decm_filter [i];
+
+    filter_scale = ((1 << 23) - 1) / filter_sum * 16.0;
+    // fprintf (stderr, "convolution, %d terms, %f sum, %f scale\n", NUM_FILTER_TERMS, filter_sum, filter_scale);
+
+    for (skipped_terms = i = 0; i < NUM_FILTER_TERMS; ++i) {
+        int scaled_term = (int) floor (decm_filter [i] * filter_scale + 0.5);
+
+        if (scaled_term) {
+            for (j = 0; j < 256; ++j)
+                if (j & (0x80 >> (i & 0x7)))
+                    context->conv_tables [i >> 3] [j] += scaled_term;
+                else
+                    context->conv_tables [i >> 3] [j] -= scaled_term;
+        }
+        else
+            skipped_terms++;
+    }
+
+    // fprintf (stderr, "%d terms skipped\n", skipped_terms);
+
+    decimate_dsd_reset (context);
+
+    return context;
+}
+
+void decimate_dsd_reset (void *decimate_context)
+{
+    DecimationContext *context = (DecimationContext *) decimate_context;
+    int chan = 0, i;
+
+    if (!context)
+        return;
+
+    for (chan = 0; chan < context->num_channels; ++chan)
+        for (i = 0; i < HISTORY_BYTES; ++i)
+            context->chans [chan].delay [i] = 0x55;
+}
+
+void decimate_dsd_run (void *decimate_context, int32_t *samples, int num_samples)
+{
+    DecimationContext *context = (DecimationContext *) decimate_context;
+    int chan = 0;
+
+    if (!context)
+        return;
+
+    while (num_samples) {
+        DecimationChannel *sp = context->chans + chan;
+        int sum = 0;
+
+#if (HISTORY_BYTES == 10)
+        sum += context->conv_tables [0] [sp->delay [0] = sp->delay [1]];
+        sum += context->conv_tables [1] [sp->delay [1] = sp->delay [2]];
+        sum += context->conv_tables [2] [sp->delay [2] = sp->delay [3]];
+        sum += context->conv_tables [3] [sp->delay [3] = sp->delay [4]];
+        sum += context->conv_tables [4] [sp->delay [4] = sp->delay [5]];
+        sum += context->conv_tables [5] [sp->delay [5] = sp->delay [6]];
+        sum += context->conv_tables [6] [sp->delay [6] = sp->delay [7]];
+        sum += context->conv_tables [7] [sp->delay [7] = sp->delay [8]];
+        sum += context->conv_tables [8] [sp->delay [8] = sp->delay [9]];
+        sum += context->conv_tables [9] [sp->delay [9] = *samples];
+#elif (HISTORY_BYTES == 7)
+        sum += context->conv_tables [0] [sp->delay [0] = sp->delay [1]];
+        sum += context->conv_tables [1] [sp->delay [1] = sp->delay [2]];
+        sum += context->conv_tables [2] [sp->delay [2] = sp->delay [3]];
+        sum += context->conv_tables [3] [sp->delay [3] = sp->delay [4]];
+        sum += context->conv_tables [4] [sp->delay [4] = sp->delay [5]];
+        sum += context->conv_tables [5] [sp->delay [5] = sp->delay [6]];
+        sum += context->conv_tables [6] [sp->delay [6] = *samples];
+#else
+        int i;
+
+        for (i = 0; i < HISTORY_BYTES-1; ++i)
+            sum += context->conv_tables [i] [sp->delay [i] = sp->delay [i+1]];
+
+        sum += context->conv_tables [i] [sp->delay [i] = *samples];
+#endif
+
+        *samples++ = sum >> 4;
+
+        if (++chan == context->num_channels) {
+            num_samples--;
+            chan = 0;
+        }
+    }
+}
+
+void decimate_dsd_destroy (void *decimate_context)
+{
+    DecimationContext *context = (DecimationContext *) decimate_context;
+
+    if (!context)
+        return;
+
+    if (context->chans)
+        free (context->chans);
+
+    free (context);
+}
+
+#endif      // ENABLE_DSD
--- a/third_party/wavpack/src/unpack_floats.c
+++ b/third_party/wavpack/src/unpack_floats.c
@ -0,0 +1,134 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// unpack_floats.c
+
+// This module deals with the restoration of floating-point data. Note that no
+// floating point math is involved here...the values are only processed with
+// the macros that directly access the mantissa, exponent, and sign fields.
+// That's why we use the f32 type instead of the built-in float type.
+
+#include <stdlib.h>
+
+#include "wavpack_local.h"
+
+static void float_values_nowvx (WavpackStream *wps, int32_t *values, int32_t num_values);
+
+void float_values (WavpackStream *wps, int32_t *values, int32_t num_values)
+{
+    uint32_t crc = wps->crc_x;
+
+    if (!bs_is_open (&wps->wvxbits)) {
+        float_values_nowvx (wps, values, num_values);
+        return;
+    }
+
+    while (num_values--) {
+        int shift_count = 0, exp = wps->float_max_exp;
+        f32 outval = 0;
+        uint32_t temp;
+
+        if (*values == 0) {
+            if (wps->float_flags & FLOAT_ZEROS_SENT) {
+                if (getbit (&wps->wvxbits)) {
+                    getbits (&temp, 23, &wps->wvxbits);
+                    set_mantissa (outval, temp);
+
+                    if (exp >= 25) {
+                        getbits (&temp, 8, &wps->wvxbits);
+                        set_exponent (outval, temp);
+                    }
+
+                    set_sign (outval, getbit (&wps->wvxbits));
+                }
+                else if (wps->float_flags & FLOAT_NEG_ZEROS)
+                    set_sign (outval, getbit (&wps->wvxbits));
+            }
+        }
+        else {
+            *values <<= wps->float_shift;
+
+            if (*values < 0) {
+                *values = -*values;
+                set_sign (outval, 1);
+            }
+
+            if (*values == 0x1000000) {
+                if (getbit (&wps->wvxbits)) {
+                    getbits (&temp, 23, &wps->wvxbits);
+                    set_mantissa (outval, temp);
+                }
+
+                set_exponent (outval, 255);
+            }
+            else {
+                if (exp)
+                    while (!(*values & 0x800000) && --exp) {
+                        shift_count++;
+                        *values <<= 1;
+                    }
+
+                if (shift_count) {
+                    if ((wps->float_flags & FLOAT_SHIFT_ONES) ||
+                        ((wps->float_flags & FLOAT_SHIFT_SAME) && getbit (&wps->wvxbits)))
+                            *values |= ((1 << shift_count) - 1);
+                    else if (wps->float_flags & FLOAT_SHIFT_SENT) {
+                        getbits (&temp, shift_count, &wps->wvxbits);
+                        *values |= temp & ((1 << shift_count) - 1);
+                    }
+                }
+
+                set_mantissa (outval, *values);
+                set_exponent (outval, exp);
+            }
+        }
+
+        crc = crc * 27 + get_mantissa (outval) * 9 + get_exponent (outval) * 3 + get_sign (outval);
+        * (f32 *) values++ = outval;
+    }
+
+    wps->crc_x = crc;
+}
+
+static void float_values_nowvx (WavpackStream *wps, int32_t *values, int32_t num_values)
+{
+    while (num_values--) {
+        int shift_count = 0, exp = wps->float_max_exp;
+        f32 outval = 0;
+
+        if (*values) {
+            *values <<= wps->float_shift;
+
+            if (*values < 0) {
+                *values = -*values;
+                set_sign (outval, 1);
+            }
+
+            if (*values >= 0x1000000) {
+                while (*values & 0xf000000) {
+                    *values >>= 1;
+                    ++exp;
+                }
+            }
+            else if (exp) {
+                while (!(*values & 0x800000) && --exp) {
+                    shift_count++;
+                    *values <<= 1;
+                }
+
+                if (shift_count && (wps->float_flags & FLOAT_SHIFT_ONES))
+                    *values |= ((1 << shift_count) - 1);
+            }
+
+            set_mantissa (outval, *values);
+            set_exponent (outval, exp);
+        }
+
+        * (f32 *) values++ = outval;
+    }
+}
--- a/third_party/wavpack/src/unpack_seek.c
+++ b/third_party/wavpack/src/unpack_seek.c
@ -0,0 +1,375 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// unpack_seek.c
+
+// This module provides the high-level API for unpacking audio data from
+// a specific sample index (i.e., seeking).
+
+#ifndef NO_SEEKING
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "wavpack_local.h"
+
+///////////////////////////// executable code ////////////////////////////////
+
+static int64_t find_sample (WavpackContext *wpc, void *infile, int64_t header_pos, int64_t sample);
+
+// Seek to the specifed sample index, returning TRUE on success. Note that
+// files generated with version 4.0 or newer will seek almost immediately.
+// Older files can take quite long if required to seek through unplayed
+// portions of the file, but will create a seek map so that reverse seeks
+// (or forward seeks to already scanned areas) will be very fast. After a
+// FALSE return the file should not be accessed again (other than to close
+// it); this is a fatal error.
+
+int WavpackSeekSample (WavpackContext *wpc, uint32_t sample)
+{
+    return WavpackSeekSample64 (wpc, sample);
+}
+
+int WavpackSeekSample64 (WavpackContext *wpc, int64_t sample)
+{
+    WavpackStream *wps = wpc->streams ? wpc->streams [wpc->current_stream = 0] : NULL;
+    uint32_t bcount, samples_to_skip, samples_to_decode = 0;
+    int32_t *buffer;
+
+    if (wpc->total_samples == -1 || sample >= wpc->total_samples ||
+        !wpc->reader->can_seek (wpc->wv_in) || (wpc->open_flags & OPEN_STREAMING) ||
+        (wpc->wvc_flag && !wpc->reader->can_seek (wpc->wvc_in)))
+            return FALSE;
+
+#ifdef ENABLE_LEGACY
+    if (wpc->stream3)
+        return seek_sample3 (wpc, (uint32_t) sample);
+#endif
+
+#ifdef ENABLE_DSD
+    if (wpc->decimation_context) {      // the decimation code needs some context to be sample accurate
+        if (sample < 16) {
+            samples_to_decode = (uint32_t) sample;
+            sample = 0;
+        }
+        else {
+            samples_to_decode = 16;
+            sample -= 16;
+        }
+    }
+#endif
+
+    if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) || sample < GET_BLOCK_INDEX (wps->wphdr) ||
+        sample >= GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples) {
+
+            free_streams (wpc);
+            wpc->filepos = find_sample (wpc, wpc->wv_in, wpc->filepos, sample);
+
+            if (wpc->filepos == -1)
+                return FALSE;
+
+            if (wpc->wvc_flag) {
+                wpc->file2pos = find_sample (wpc, wpc->wvc_in, 0, sample);
+
+                if (wpc->file2pos == -1)
+                    return FALSE;
+            }
+    }
+
+    if (!wps->blockbuff) {
+        wpc->reader->set_pos_abs (wpc->wv_in, wpc->filepos);
+        wpc->reader->read_bytes (wpc->wv_in, &wps->wphdr, sizeof (WavpackHeader));
+        WavpackLittleEndianToNative (&wps->wphdr, WavpackHeaderFormat);
+        wps->blockbuff = (unsigned char *)malloc (wps->wphdr.ckSize + 8);
+        memcpy (wps->blockbuff, &wps->wphdr, sizeof (WavpackHeader));
+
+        if (wpc->reader->read_bytes (wpc->wv_in, wps->blockbuff + sizeof (WavpackHeader), wps->wphdr.ckSize - 24) !=
+            wps->wphdr.ckSize - 24) {
+                free_streams (wpc);
+                return FALSE;
+        }
+
+        // render corrupt blocks harmless
+        if (!WavpackVerifySingleBlock (wps->blockbuff, !(wpc->open_flags & OPEN_NO_CHECKSUM))) {
+            wps->wphdr.ckSize = sizeof (WavpackHeader) - 8;
+            wps->wphdr.block_samples = 0;
+            memcpy (wps->blockbuff, &wps->wphdr, 32);
+        }
+
+        SET_BLOCK_INDEX (wps->wphdr, GET_BLOCK_INDEX (wps->wphdr) - wpc->initial_index);
+        memcpy (wps->blockbuff, &wps->wphdr, sizeof (WavpackHeader));
+        wps->init_done = FALSE;
+
+        if (wpc->wvc_flag) {
+            wpc->reader->set_pos_abs (wpc->wvc_in, wpc->file2pos);
+            wpc->reader->read_bytes (wpc->wvc_in, &wps->wphdr, sizeof (WavpackHeader));
+            WavpackLittleEndianToNative (&wps->wphdr, WavpackHeaderFormat);
+            wps->block2buff = (unsigned char *)malloc (wps->wphdr.ckSize + 8);
+            memcpy (wps->block2buff, &wps->wphdr, sizeof (WavpackHeader));
+
+            if (wpc->reader->read_bytes (wpc->wvc_in, wps->block2buff + sizeof (WavpackHeader), wps->wphdr.ckSize - 24) !=
+                wps->wphdr.ckSize - 24) {
+                    free_streams (wpc);
+                    return FALSE;
+            }
+
+            // render corrupt blocks harmless
+            if (!WavpackVerifySingleBlock (wps->block2buff, !(wpc->open_flags & OPEN_NO_CHECKSUM))) {
+                wps->wphdr.ckSize = sizeof (WavpackHeader) - 8;
+                wps->wphdr.block_samples = 0;
+                memcpy (wps->block2buff, &wps->wphdr, 32);
+            }
+
+            SET_BLOCK_INDEX (wps->wphdr, GET_BLOCK_INDEX (wps->wphdr) - wpc->initial_index);
+            memcpy (wps->block2buff, &wps->wphdr, sizeof (WavpackHeader));
+        }
+
+        if (!wps->init_done && !unpack_init (wpc)) {
+            free_streams (wpc);
+            return FALSE;
+        }
+
+        wps->init_done = TRUE;
+    }
+
+    while (!wpc->reduced_channels && !(wps->wphdr.flags & FINAL_BLOCK)) {
+        if (++wpc->current_stream == wpc->num_streams) {
+
+            if (wpc->num_streams == wpc->max_streams) {
+                free_streams (wpc);
+                return FALSE;
+            }
+
+            wpc->streams = (WavpackStream **)realloc (wpc->streams, (wpc->num_streams + 1) * sizeof (wpc->streams [0]));
+            wps = wpc->streams [wpc->num_streams++] = (WavpackStream *)malloc (sizeof (WavpackStream));
+            CLEAR (*wps);
+            bcount = read_next_header (wpc->reader, wpc->wv_in, &wps->wphdr);
+
+            if (bcount == (uint32_t) -1) {
+                free_streams (wpc);
+                return FALSE;
+            }
+
+            wps->blockbuff = (unsigned char *)malloc (wps->wphdr.ckSize + 8);
+            memcpy (wps->blockbuff, &wps->wphdr, 32);
+
+            if (wpc->reader->read_bytes (wpc->wv_in, wps->blockbuff + 32, wps->wphdr.ckSize - 24) !=
+                wps->wphdr.ckSize - 24) {
+                    free_streams (wpc);
+                    return FALSE;
+            }
+
+            // render corrupt blocks harmless
+            if (!WavpackVerifySingleBlock (wps->blockbuff, !(wpc->open_flags & OPEN_NO_CHECKSUM))) {
+                wps->wphdr.ckSize = sizeof (WavpackHeader) - 8;
+                wps->wphdr.block_samples = 0;
+                memcpy (wps->blockbuff, &wps->wphdr, 32);
+            }
+
+            wps->init_done = FALSE;
+
+            if (wpc->wvc_flag && !read_wvc_block (wpc)) {
+                free_streams (wpc);
+                return FALSE;
+            }
+
+            if (!wps->init_done && !unpack_init (wpc)) {
+                free_streams (wpc);
+                return FALSE;
+            }
+
+            wps->init_done = TRUE;
+        }
+        else
+            wps = wpc->streams [wpc->current_stream];
+    }
+
+    if (sample < wps->sample_index) {
+        for (wpc->current_stream = 0; wpc->current_stream < wpc->num_streams; wpc->current_stream++)
+            if (!unpack_init (wpc))
+                return FALSE;
+            else
+                wpc->streams [wpc->current_stream]->init_done = TRUE;
+    }
+
+    samples_to_skip = (uint32_t) (sample - wps->sample_index);
+
+    if (samples_to_skip > 131072) {
+        free_streams (wpc);
+        return FALSE;
+    }
+
+    if (samples_to_skip) {
+        buffer = (int32_t *)malloc (samples_to_skip * 8);
+
+        for (wpc->current_stream = 0; wpc->current_stream < wpc->num_streams; wpc->current_stream++)
+#ifdef ENABLE_DSD
+            if (wpc->streams [wpc->current_stream]->wphdr.flags & DSD_FLAG)
+                unpack_dsd_samples (wpc, buffer, samples_to_skip);
+            else
+#endif
+                unpack_samples (wpc, buffer, samples_to_skip);
+
+        free (buffer);
+    }
+
+    wpc->current_stream = 0;
+
+#ifdef ENABLE_DSD
+    if (wpc->decimation_context)
+        decimate_dsd_reset (wpc->decimation_context);
+
+    if (samples_to_decode) {
+        buffer = (int32_t *)malloc (samples_to_decode * wpc->config.num_channels * 4);
+
+        if (buffer) {
+            WavpackUnpackSamples (wpc, buffer, samples_to_decode);
+            free (buffer);
+        }
+    }
+#endif
+
+    return TRUE;
+}
+
+// Find a valid WavPack header, searching either from the current file position
+// (or from the specified position if not -1) and store it (endian corrected)
+// at the specified pointer. The return value is the exact file position of the
+// header, although we may have actually read past it. Because this function
+// is used for seeking to a specific audio sample, it only considers blocks
+// that contain audio samples for the initial stream to be valid.
+
+#define BUFSIZE 4096
+
+static int64_t find_header (WavpackStreamReader64 *reader, void *id, int64_t filepos, WavpackHeader *wphdr)
+{
+    unsigned char *buffer = (unsigned char *)malloc (BUFSIZE), *sp = buffer, *ep = buffer;
+
+    if (filepos != (uint32_t) -1 && reader->set_pos_abs (id, filepos)) {
+        free (buffer);
+        return -1;
+    }
+
+    while (1) {
+        int bleft;
+
+        if (sp < ep) {
+            bleft = (int)(ep - sp);
+            memcpy (buffer, sp, bleft);
+            ep -= (sp - buffer);
+            sp = buffer;
+        }
+        else {
+            if (sp > ep)
+                if (reader->set_pos_rel (id, (int32_t)(sp - ep), SEEK_CUR)) {
+                    free (buffer);
+                    return -1;
+                }
+
+            sp = ep = buffer;
+            bleft = 0;
+        }
+
+        ep += reader->read_bytes (id, ep, BUFSIZE - bleft);
+
+        if (ep - sp < 32) {
+            free (buffer);
+            return -1;
+        }
+
+        while (sp + 32 <= ep)
+            if (*sp++ == 'w' && *sp == 'v' && *++sp == 'p' && *++sp == 'k' &&
+                !(*++sp & 1) && sp [2] < 16 && !sp [3] && (sp [2] || sp [1] || *sp >= 24) && sp [5] == 4 &&
+                sp [4] >= (MIN_STREAM_VERS & 0xff) && sp [4] <= (MAX_STREAM_VERS & 0xff) && sp [18] < 3 && !sp [19]) {
+                    memcpy (wphdr, sp - 4, sizeof (*wphdr));
+                    WavpackLittleEndianToNative (wphdr, WavpackHeaderFormat);
+
+                    if (wphdr->block_samples && (wphdr->flags & INITIAL_BLOCK)) {
+                        free (buffer);
+                        return reader->get_pos (id) - (ep - sp + 4);
+                    }
+
+                    if (wphdr->ckSize > 1024)
+                        sp += wphdr->ckSize - 1024;
+            }
+    }
+}
+
+// Find the WavPack block that contains the specified sample. If "header_pos"
+// is zero, then no information is assumed except the total number of samples
+// in the file and its size in bytes. If "header_pos" is non-zero then we
+// assume that it is the file position of the valid header image contained in
+// the first stream and we can limit our search to either the portion above
+// or below that point. If a .wvc file is being used, then this must be called
+// for that file also.
+
+static int64_t find_sample (WavpackContext *wpc, void *infile, int64_t header_pos, int64_t sample)
+{
+    WavpackStream *wps = wpc->streams [wpc->current_stream];
+    int64_t file_pos1 = 0, file_pos2 = wpc->reader->get_length (infile);
+    int64_t sample_pos1 = 0, sample_pos2 = wpc->total_samples;
+    double ratio = 0.96;
+    int file_skip = 0;
+
+    if (sample >= wpc->total_samples)
+        return -1;
+
+    if (header_pos && wps->wphdr.block_samples) {
+        if (GET_BLOCK_INDEX (wps->wphdr) > sample) {
+            sample_pos2 = GET_BLOCK_INDEX (wps->wphdr);
+            file_pos2 = header_pos;
+        }
+        else if (GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples <= sample) {
+            sample_pos1 = GET_BLOCK_INDEX (wps->wphdr);
+            file_pos1 = header_pos;
+        }
+        else
+            return header_pos;
+    }
+
+    while (1) {
+        double bytes_per_sample;
+        int64_t seek_pos;
+
+        bytes_per_sample = (double) file_pos2 - file_pos1;
+        bytes_per_sample /= sample_pos2 - sample_pos1;
+        seek_pos = file_pos1 + (file_skip ? 32 : 0);
+        seek_pos += (int64_t)(bytes_per_sample * (sample - sample_pos1) * ratio);
+        seek_pos = find_header (wpc->reader, infile, seek_pos, &wps->wphdr);
+
+        if (seek_pos != (int64_t) -1)
+            SET_BLOCK_INDEX (wps->wphdr, GET_BLOCK_INDEX (wps->wphdr) - wpc->initial_index);
+
+        if (seek_pos == (int64_t) -1 || seek_pos >= file_pos2) {
+            if (ratio > 0.0) {
+                if ((ratio -= 0.24) < 0.0)
+                    ratio = 0.0;
+            }
+            else
+                return -1;
+        }
+        else if (GET_BLOCK_INDEX (wps->wphdr) > sample) {
+            sample_pos2 = GET_BLOCK_INDEX (wps->wphdr);
+            file_pos2 = seek_pos;
+        }
+        else if (GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples <= sample) {
+
+            if (seek_pos == file_pos1)
+                file_skip = 1;
+            else {
+                sample_pos1 = GET_BLOCK_INDEX (wps->wphdr);
+                file_pos1 = seek_pos;
+            }
+        }
+        else
+            return seek_pos;
+    }
+}
+
+#endif
+
--- a/third_party/wavpack/src/unpack_utils.c
+++ b/third_party/wavpack/src/unpack_utils.c
@ -0,0 +1,411 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// unpack_utils.c
+
+// This module provides the high-level API for unpacking audio data from
+// WavPack files. It manages the buffers used to interleave the data passed
+// back to the application from the individual streams. The actual audio
+// stream decompression is handled in the unpack.c module.
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "wavpack_local.h"
+
+///////////////////////////// executable code ////////////////////////////////
+
+// Unpack the specified number of samples from the current file position.
+// Note that "samples" here refers to "complete" samples, which would be
+// 2 longs for stereo files or even more for multichannel files, so the
+// required memory at "buffer" is 4 * samples * num_channels bytes. The
+// audio data is returned right-justified in 32-bit longs in the endian
+// mode native to the executing processor. So, if the original data was
+// 16-bit, then the values returned would be +/-32k. Floating point data
+// can also be returned if the source was floating point data (and this
+// can be optionally normalized to +/-1.0 by using the appropriate flag
+// in the call to WavpackOpenFileInput ()). The actual number of samples
+// unpacked is returned, which should be equal to the number requested unless
+// the end of fle is encountered or an error occurs. After all samples have
+// been unpacked then 0 will be returned.
+
+uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples)
+{
+    WavpackStream *wps = wpc->streams ? wpc->streams [wpc->current_stream = 0] : NULL;
+    int num_channels = wpc->config.num_channels, file_done = FALSE;
+    uint32_t bcount, samples_unpacked = 0, samples_to_unpack;
+    int32_t *bptr = buffer;
+
+#ifdef ENABLE_LEGACY
+    if (wpc->stream3)
+        return unpack_samples3 (wpc, buffer, samples);
+#endif
+
+    while (samples) {
+
+        // if the current block has no audio, or it's not the first block of a multichannel
+        // sequence, or the sample we're on is past the last sample in this block...we need
+        // to free up the streams and read the next block
+
+        if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) ||
+            wps->sample_index >= GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples) {
+
+                int64_t nexthdrpos;
+
+                if (wpc->wrapper_bytes >= MAX_WRAPPER_BYTES)
+                    break;
+
+                free_streams (wpc);
+                nexthdrpos = wpc->reader->get_pos (wpc->wv_in);
+                bcount = read_next_header (wpc->reader, wpc->wv_in, &wps->wphdr);
+
+                if (bcount == (uint32_t) -1)
+                    break;
+
+                wpc->filepos = nexthdrpos + bcount;
+
+                // allocate the memory for the entire raw block and read it in
+
+                wps->blockbuff = (unsigned char *)malloc (wps->wphdr.ckSize + 8);
+
+                if (!wps->blockbuff)
+                    break;
+
+                memcpy (wps->blockbuff, &wps->wphdr, 32);
+
+                if (wpc->reader->read_bytes (wpc->wv_in, wps->blockbuff + 32, wps->wphdr.ckSize - 24) !=
+                    wps->wphdr.ckSize - 24) {
+                        strcpy (wpc->error_message, "can't read all of last block!");
+                        wps->wphdr.block_samples = 0;
+                        wps->wphdr.ckSize = 24;
+                        break;
+                }
+
+                // render corrupt blocks harmless
+                if (!WavpackVerifySingleBlock (wps->blockbuff, !(wpc->open_flags & OPEN_NO_CHECKSUM))) {
+                    wps->wphdr.ckSize = sizeof (WavpackHeader) - 8;
+                    wps->wphdr.block_samples = 0;
+                    memcpy (wps->blockbuff, &wps->wphdr, 32);
+                }
+
+                // potentially adjusting block_index must be done AFTER verifying block
+
+                if (wpc->open_flags & OPEN_STREAMING)
+                    SET_BLOCK_INDEX (wps->wphdr, wps->sample_index = 0);
+                else
+                    SET_BLOCK_INDEX (wps->wphdr, GET_BLOCK_INDEX (wps->wphdr) - wpc->initial_index);
+
+                memcpy (wps->blockbuff, &wps->wphdr, 32);
+                wps->init_done = FALSE;     // we have not yet called unpack_init() for this block
+
+                // if this block has audio, but not the sample index we were expecting, flag an error
+
+                if (wps->wphdr.block_samples && wps->sample_index != GET_BLOCK_INDEX (wps->wphdr))
+                    wpc->crc_errors++;
+
+                // if this block has audio, and we're in hybrid lossless mode, read the matching wvc block
+
+                if (wps->wphdr.block_samples && wpc->wvc_flag)
+                    read_wvc_block (wpc);
+
+                // if the block does NOT have any audio, call unpack_init() to process non-audio stuff
+
+                if (!wps->wphdr.block_samples) {
+                    if (!wps->init_done && !unpack_init (wpc))
+                        wpc->crc_errors++;
+
+                    wps->init_done = TRUE;
+                }
+        }
+
+        // if the current block has no audio, or it's not the first block of a multichannel
+        // sequence, or the sample we're on is past the last sample in this block...we need
+        // to loop back and read the next block
+
+        if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) ||
+            wps->sample_index >= GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples)
+                continue;
+
+        // There seems to be some missing data, like a block was corrupted or something.
+        // If it's not too much data, just fill in with silence here and loop back.
+
+        if (wps->sample_index < GET_BLOCK_INDEX (wps->wphdr)) {
+            int32_t zvalue = (wps->wphdr.flags & DSD_FLAG) ? 0x55 : 0;
+
+            samples_to_unpack = (uint32_t) (GET_BLOCK_INDEX (wps->wphdr) - wps->sample_index);
+
+            if (!samples_to_unpack || samples_to_unpack > 262144) {
+                strcpy (wpc->error_message, "discontinuity found, aborting file!");
+                wps->wphdr.block_samples = 0;
+                wps->wphdr.ckSize = 24;
+                break;
+            }
+
+            if (samples_to_unpack > samples)
+                samples_to_unpack = samples;
+
+            wps->sample_index += samples_to_unpack;
+            samples_unpacked += samples_to_unpack;
+            samples -= samples_to_unpack;
+
+            samples_to_unpack *= (wpc->reduced_channels ? wpc->reduced_channels : num_channels);
+
+            while (samples_to_unpack--)
+                *bptr++ = zvalue;
+
+            continue;
+        }
+
+        // calculate number of samples to process from this block, then initialize the decoder for
+        // this block if we haven't already
+
+        samples_to_unpack = (uint32_t) (GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples - wps->sample_index);
+
+        if (samples_to_unpack > samples)
+            samples_to_unpack = samples;
+
+        if (!wps->init_done && !unpack_init (wpc))
+            wpc->crc_errors++;
+
+        wps->init_done = TRUE;
+
+        // if this block is not the final block of a multichannel sequence (and we're not truncating
+        // to stereo), then enter this conditional block...otherwise we just unpack the samples directly
+
+        if (!wpc->reduced_channels && !(wps->wphdr.flags & FINAL_BLOCK)) {
+            int32_t *temp_buffer = (int32_t *)malloc (samples_to_unpack * 8), *src, *dst;
+            int offset = 0;     // offset to next channel in sequence (0 to num_channels - 1)
+            uint32_t samcnt;
+
+            // since we are getting samples from multiple bocks in a multichannel sequence, we must
+            // allocate a temporary buffer to unpack to so that we can re-interleave the samples
+
+	    if (!temp_buffer)
+		break;
+
+            // loop through all the streams...
+
+            while (1) {
+
+                // if the stream has not been allocated and corresponding block read, do that here...
+
+                if (wpc->current_stream == wpc->num_streams) {
+                    wpc->streams = (WavpackStream **)realloc (wpc->streams, (wpc->num_streams + 1) * sizeof (wpc->streams [0]));
+
+                    if (!wpc->streams)
+			break;
+
+                    wps = wpc->streams [wpc->num_streams++] = (WavpackStream *)malloc (sizeof (WavpackStream));
+
+                    if (!wps)
+			break;
+
+                    CLEAR (*wps);
+                    bcount = read_next_header (wpc->reader, wpc->wv_in, &wps->wphdr);
+
+                    if (bcount == (uint32_t) -1) {
+                        wpc->streams [0]->wphdr.block_samples = 0;
+                        wpc->streams [0]->wphdr.ckSize = 24;
+                        file_done = TRUE;
+                        break;
+                    }
+
+                    wps->blockbuff = (unsigned char *)malloc (wps->wphdr.ckSize + 8);
+
+                    if (!wps->blockbuff)
+		        break;
+
+                    memcpy (wps->blockbuff, &wps->wphdr, 32);
+
+                    if (wpc->reader->read_bytes (wpc->wv_in, wps->blockbuff + 32, wps->wphdr.ckSize - 24) !=
+                        wps->wphdr.ckSize - 24) {
+                            wpc->streams [0]->wphdr.block_samples = 0;
+                            wpc->streams [0]->wphdr.ckSize = 24;
+                            file_done = TRUE;
+                            break;
+                    }
+
+                    // render corrupt blocks harmless
+                    if (!WavpackVerifySingleBlock (wps->blockbuff, !(wpc->open_flags & OPEN_NO_CHECKSUM))) {
+                        wps->wphdr.ckSize = sizeof (WavpackHeader) - 8;
+                        wps->wphdr.block_samples = 0;
+                        memcpy (wps->blockbuff, &wps->wphdr, 32);
+                    }
+
+                    // potentially adjusting block_index must be done AFTER verifying block
+
+                    if (wpc->open_flags & OPEN_STREAMING)
+                        SET_BLOCK_INDEX (wps->wphdr, wps->sample_index = 0);
+                    else
+                        SET_BLOCK_INDEX (wps->wphdr, GET_BLOCK_INDEX (wps->wphdr) - wpc->initial_index);
+
+                    memcpy (wps->blockbuff, &wps->wphdr, 32);
+
+                    // if this block has audio, and we're in hybrid lossless mode, read the matching wvc block
+
+                    if (wpc->wvc_flag)
+                        read_wvc_block (wpc);
+
+                    // initialize the unpacker for this block
+
+                    if (!unpack_init (wpc))
+                        wpc->crc_errors++;
+
+                    wps->init_done = TRUE;
+                }
+                else
+                    wps = wpc->streams [wpc->current_stream];
+
+                // unpack the correct number of samples (either mono or stereo) into the temp buffer
+
+#ifdef ENABLE_DSD
+                if (wps->wphdr.flags & DSD_FLAG)
+                    unpack_dsd_samples (wpc, src = temp_buffer, samples_to_unpack);
+                else
+#endif
+                    unpack_samples (wpc, src = temp_buffer, samples_to_unpack);
+
+                samcnt = samples_to_unpack;
+                dst = bptr + offset;
+
+                // if the block is mono, copy the samples from the single channel into the destination
+                // using num_channels as the stride
+
+                if (wps->wphdr.flags & MONO_FLAG) {
+                    while (samcnt--) {
+                        dst [0] = *src++;
+                        dst += num_channels;
+                    }
+
+                    offset++;
+                }
+
+                // if the block is stereo, and we don't have room for two more channels, just copy one
+                // and flag an error
+
+                else if (offset == num_channels - 1) {
+                    while (samcnt--) {
+                        dst [0] = src [0];
+                        dst += num_channels;
+                        src += 2;
+                    }
+
+                    wpc->crc_errors++;
+                    offset++;
+                }
+
+                // otherwise copy the stereo samples into the destination
+
+                else {
+                    while (samcnt--) {
+                        dst [0] = *src++;
+                        dst [1] = *src++;
+                        dst += num_channels;
+                    }
+
+                    offset += 2;
+                }
+
+                // check several clues that we're done with this set of blocks and exit if we are; else do next stream
+
+                if ((wps->wphdr.flags & FINAL_BLOCK) || wpc->current_stream == wpc->max_streams - 1 || offset == num_channels)
+                    break;
+                else
+                    wpc->current_stream++;
+            }
+
+            // if we didn't get all the channels we expected, mute the buffer and flag an error
+
+            if (offset != num_channels) {
+                if (wps->wphdr.flags & DSD_FLAG) {
+                    int samples_to_zero = samples_to_unpack * num_channels;
+                    int32_t *zptr = bptr;
+
+                    while (samples_to_zero--)
+                        *zptr++ = 0x55;
+                }
+                else
+                    memset (bptr, 0, samples_to_unpack * num_channels * 4);
+
+                wpc->crc_errors++;
+            }
+
+            // go back to the first stream (we're going to leave them all loaded for now because they might have more samples)
+            // and free the temp buffer
+
+            wps = wpc->streams [wpc->current_stream = 0];
+            free (temp_buffer);
+        }
+        // catch the error situation where we have only one channel but run into a stereo block
+        // (this avoids overwriting the caller's buffer)
+        else if (!(wps->wphdr.flags & MONO_FLAG) && (num_channels == 1 || wpc->reduced_channels == 1)) {
+            memset (bptr, 0, samples_to_unpack * sizeof (*bptr));
+            wps->sample_index += samples_to_unpack;
+            wpc->crc_errors++;
+        }
+#ifdef ENABLE_DSD
+        else if (wps->wphdr.flags & DSD_FLAG)
+            unpack_dsd_samples (wpc, bptr, samples_to_unpack);
+#endif
+        else
+            unpack_samples (wpc, bptr, samples_to_unpack);
+
+        if (file_done) {
+            strcpy (wpc->error_message, "can't read all of last block!");
+            break;
+        }
+
+        if (wpc->reduced_channels)
+            bptr += samples_to_unpack * wpc->reduced_channels;
+        else
+            bptr += samples_to_unpack * num_channels;
+
+        samples_unpacked += samples_to_unpack;
+        samples -= samples_to_unpack;
+
+        // if we just finished a block, check for a calculated crc error
+        // (and back up the streams a little if possible in case we passed a header)
+
+        if (wps->sample_index == GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples) {
+            if (check_crc_error (wpc)) {
+                int32_t *zptr = bptr, zvalue = (wps->wphdr.flags & DSD_FLAG) ? 0x55 : 0;
+                uint32_t samples_to_zero = wps->wphdr.block_samples;
+
+                if (samples_to_zero > samples_to_unpack)
+                    samples_to_zero = samples_to_unpack;
+
+                samples_to_zero *= (wpc->reduced_channels ? wpc->reduced_channels : num_channels);
+
+                while (samples_to_zero--)
+                    *--zptr = zvalue;
+
+                if (wps->blockbuff && wpc->reader->can_seek (wpc->wv_in)) {
+                    int32_t rseek = ((WavpackHeader *) wps->blockbuff)->ckSize / 3;
+                    wpc->reader->set_pos_rel (wpc->wv_in, (rseek > 16384) ? -16384 : -rseek, SEEK_CUR);
+                }
+
+                if (wpc->wvc_flag && wps->block2buff && wpc->reader->can_seek (wpc->wvc_in)) {
+                    int32_t rseek = ((WavpackHeader *) wps->block2buff)->ckSize / 3;
+                    wpc->reader->set_pos_rel (wpc->wvc_in, (rseek > 16384) ? -16384 : -rseek, SEEK_CUR);
+                }
+
+                wpc->crc_errors++;
+            }
+        }
+
+        if (wpc->total_samples != -1 && wps->sample_index == wpc->total_samples)
+            break;
+    }
+
+#ifdef ENABLE_DSD
+    if (wpc->decimation_context)
+        decimate_dsd_run (wpc->decimation_context, buffer, samples_unpacked);
+#endif
+
+    return samples_unpacked;
+}
--- a/third_party/wavpack/src/unpack_x64.S
+++ b/third_party/wavpack/src/unpack_x64.S
@ -0,0 +1,957 @@
+############################################################################
+##                           **** WAVPACK ****                            ##
+##                  Hybrid Lossless Wavefile Compressor                   ##
+##              Copyright (c) 1998 - 2015 Conifer Software.               ##
+##                          All Rights Reserved.                          ##
+##      Distributed under the BSD Software License (see license.txt)      ##
+############################################################################
+
+        .intel_syntax noprefix
+        .text
+
+        .globl  _unpack_decorr_stereo_pass_cont_x64win
+        .globl  _unpack_decorr_mono_pass_cont_x64win
+
+        .globl  unpack_decorr_stereo_pass_cont_x64win
+        .globl  unpack_decorr_mono_pass_cont_x64win
+
+        .globl  _unpack_decorr_stereo_pass_cont_x64
+        .globl  _unpack_decorr_mono_pass_cont_x64
+
+        .globl  unpack_decorr_stereo_pass_cont_x64
+        .globl  unpack_decorr_mono_pass_cont_x64
+
+# This is an assembly optimized version of the following WavPack function:
+#
+# void unpack_decorr_stereo_pass_cont (struct decorr_pass *dpp,
+#                                      int32_t *buffer,
+#                                      int32_t sample_count,
+#                                      int32_t long_math;
+#
+# It performs a single pass of stereo decorrelation on the provided buffer.
+# Note that this version of the function requires that up to 8 previous
+# stereo samples are visible and correct. In other words, it ignores the
+# "samples_*" fields in the decorr_pass structure and gets the history data
+# directly from the buffer. It does, however, return the appropriate history
+# samples to the decorr_pass structure before returning.
+#
+# The "long_math" argument is used to specify that a 32-bit multiply is
+# not enough for the "apply_weight" operation (although in this case it
+# would only apply to the -1 and -2 terms because the MMX code does not have
+# this limitation) but we ignore the parameter and use the overflow detection
+# of the "imul" instruction to switch automatically to the "long_math" loop.
+#
+# This is written to work on an X86-64 processor (also called the AMD64)
+# running in 64-bit mode and generally uses the MMX extensions to improve
+# the performance by processing both stereo channels together. Unfortunately
+# this is not easily used for terms -1 and -2, so these terms are handled
+# sequentially with regular assembler code.
+#
+# This version has entry points for both the System V ABI and the Windows
+# X64 ABI. It does not use the "red zone" or the "shadow area"; it saves the
+# non-volatile registers for both ABIs on the stack and allocates another
+# 8 bytes on the stack to store the dpp pointer. Note that it does NOT
+# provide unwind data for the Windows ABI (the unpack_x64.asm module for
+# MSVC does). The arguments are passed in registers:
+#
+# System V  Windows  
+#   rdi       rcx      struct decorr_pass *dpp
+#   rsi       rdx      int32_t *buffer
+#   edx       r8       int32_t sample_count
+#   ecx       r9       int32_t long_math
+#
+# registers after entry:
+#
+#   rdi         bptr
+#   rsi         eptr
+#
+# stack usage:
+#
+# [rsp+0] = *dpp
+#
+
+_unpack_decorr_stereo_pass_cont_x64win:
+unpack_decorr_stereo_pass_cont_x64win:
+        push    rbp
+        push    rbx
+        push    rdi
+        push    rsi
+        sub     rsp, 8
+        mov     rdi, rcx                    # copy params from win regs to Linux regs
+        mov     rsi, rdx                    # so we can leave following code similar
+        mov     rdx, r8
+        mov     rcx, r9
+        jmp     entry                       # jump into common portion
+
+_unpack_decorr_stereo_pass_cont_x64:
+unpack_decorr_stereo_pass_cont_x64:
+        push    rbp
+        push    rbx
+        push    rdi
+        push    rsi
+        sub     rsp, 8
+
+entry:  mov     [rsp], rdi                  # store dpp* at [rsp]
+        and     edx, edx                    # if sample_count is zero, do nothing
+        jz      done
+
+        mov     rdi, rsi                    # rdi = bptr
+        lea     rsi, [rdi+rdx*8]            # rsi = eptr
+
+        mov     rax, [rsp]                  # get term from dpp struct & vector to handler
+        mov     eax, [rax]
+        cmp     al, 17
+        je      term_17_entry
+        cmp     al, 18
+        je      term_18_entry
+        cmp     al, -1
+        je      term_minus_1_entry
+        cmp     al, -2
+        je      term_minus_2_entry
+        cmp     al, -3
+        je      term_minus_3_entry
+
+#
+# registers in default term loop:
+#
+#   rbx         term * -8 (for indexing correlation sample)
+#   rdi         bptr
+#   rsi         eptr
+#
+#   mm0, mm1    scratch
+#   mm2         original sample values
+#   mm3         correlation sample
+#   mm4         zero (for pcmpeqd)
+#   mm5         weights
+#   mm6         delta
+#   mm7         512 (for rounding)
+#
+
+default_term_entry:
+        imul    rbx, rax, -8                # set RBX to term * -8
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  # mm7 = round (512)
+        mov     rdx, [rsp]                  # set RDX to *dpp
+        mov     eax, [rdx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  # mm6 = delta (0-7)
+        mov     eax, 0xFFFF                 # mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  # mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [rdx+8]                # mm5 = weight_AB masked to 16 bits
+        pxor    mm4, mm4                    # mm4 = zero (for pcmpeqd)
+        jmp     default_term_loop
+
+        .balign  64
+default_term_loop:
+        movq    mm3, [rdi+rbx]              # mm3 = sam_AB
+        movq    mm1, mm3
+        movq    mm0, mm3
+        paddd   mm1, mm1
+        psrld   mm0, 15
+        psrlw   mm1, 1
+        pmaddwd mm0, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [rdi]                  # mm2 = left_right
+        pslld   mm0, 5
+        paddd   mm1, mm7                    # add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm0, mm2
+        paddd   mm0, mm1                    # add shifted sums
+        movq    [rdi], mm0                  # store result
+        movq    mm0, mm3
+        pxor    mm0, mm2
+        psrad   mm0, 31                     # mm0 = sign (sam_AB ^ left_right)
+        add     rdi, 8
+        pcmpeqd mm2, mm4                    # mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm4                    # mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    # mm2 = 1s if either was zero
+        pandn   mm2, mm6                    # mask delta with zeros check
+        pxor    mm5, mm0
+        paddw   mm5, mm2                    # and add to weight_AB
+        pxor    mm5, mm0
+        cmp     rdi, rsi                    # compare bptr and eptr to see if we're done
+        jb      default_term_loop
+
+        pslld   mm5, 16                     # sign-extend 16-bit weights back to dwords
+        psrad   mm5, 16
+        mov     rdx, [rsp]                  # point to dpp
+        movq    [rdx+8], mm5                # put weight_AB back
+        emms
+
+        mov     ecx, [rdx]                  # ecx = dpp->term
+
+default_store_samples:
+        dec     ecx
+        sub     rdi, 8                      # back up one full sample
+        mov     eax, [rdi+4]
+        mov     [rdx+rcx*4+48], eax         # store samples_B [ecx]
+        mov     eax, [rdi]
+        mov     [rdx+rcx*4+16], eax         # store samples_A [ecx]
+        test    ecx, ecx
+        jnz     default_store_samples
+        jmp     done
+
+#
+# registers in term 17 & 18 loops:
+#
+#   rdi         bptr
+#   rsi         eptr
+#
+#   mm0, mm1    scratch
+#   mm2         original sample values
+#   mm3         correlation samples
+#   mm4         last calculated values (so we don't need to reload)
+#   mm5         weights
+#   mm6         delta
+#   mm7         512 (for rounding)
+#
+
+term_17_entry:
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  # mm7 = round (512)
+        mov     rdx, [rsp]                  # set RDX to *dpp
+        mov     eax, [rdx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  # mm6 = delta (0-7)
+        mov     eax, 0xFFFF                 # mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  # mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [rdx+8]                # mm5 = weight_AB masked to 16 bits
+        movq    mm4, [rdi-8]                # preload last calculated values in mm4
+        jmp     term_17_loop
+
+        .balign  64
+term_17_loop:
+        paddd   mm4, mm4
+        psubd   mm4, [rdi-16]               # mm3 = sam_AB
+        movq    mm3, mm4
+        movq    mm1, mm3
+        paddd   mm1, mm1
+        psrld   mm4, 15
+        psrlw   mm1, 1
+        pmaddwd mm4, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [rdi]                  # mm2 = left_right
+        pslld   mm4, 5
+        paddd   mm1, mm7                    # add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm4, mm2
+        paddd   mm4, mm1                    # add shifted sums
+        movq    mm0, mm3
+        movq    [rdi], mm4                  # store result
+        pxor    mm0, mm2
+        psrad   mm0, 31                     # mm0 = sign (sam_AB ^ left_right)
+        add     rdi, 8
+        pxor    mm1, mm1                    # mm1 = zero
+        pcmpeqd mm2, mm1                    # mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm1                    # mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    # mm2 = 1s if either was zero
+        pandn   mm2, mm6                    # mask delta with zeros check
+        pxor    mm5, mm0
+        paddw   mm5, mm2                    # and add to weight_AB
+        pxor    mm5, mm0
+        cmp     rdi, rsi                    # compare bptr and eptr to see if we're done
+        jb      term_17_loop
+        jmp     term_1718_exit              # terms 17 & 18 treat samples_AB[] the same
+
+term_18_entry:
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  # mm7 = round (512)
+        mov     rdx, [rsp]                  # set RDX to *dpp
+        mov     eax, [rdx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  # mm6 = delta (0-7)
+        mov     eax, 0xFFFF                 # mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  # mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [rdx+8]                # mm5 = weight_AB masked to 16 bits
+        movq    mm4, [rdi-8]                # preload last calculated values in mm4
+        jmp     term_18_loop
+
+        .balign  64
+term_18_loop:
+        movq    mm3, mm4
+        psubd   mm3, [rdi-16]
+        psrad   mm3, 1
+        paddd   mm3, mm4                    # mm3 = sam_AB
+        movq    mm1, mm3
+        movq    mm4, mm3
+        paddd   mm1, mm1
+        psrld   mm4, 15
+        psrlw   mm1, 1
+        pmaddwd mm4, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [rdi]                  # mm2 = left_right
+        pslld   mm4, 5
+        paddd   mm1, mm7                    # add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm4, mm2
+        paddd   mm4, mm1                    # add shifted sums
+        movq    mm0, mm3
+        movq    [rdi], mm4                  # store result
+        pxor    mm0, mm2
+        psrad   mm0, 31                     # mm0 = sign (sam_AB ^ left_right)
+        add     rdi, 8
+        pxor    mm1, mm1                    # mm1 = zero
+        pcmpeqd mm2, mm1                    # mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm1                    # mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    # mm2 = 1s if either was zero
+        pandn   mm2, mm6                    # mask delta with zeros check
+        pxor    mm5, mm0
+        paddw   mm5, mm2                    # and add to weight_AB
+        pxor    mm5, mm0
+        cmp     rdi, rsi                    # compare bptr and eptr to see if we're done
+        jb      term_18_loop
+
+term_1718_exit:
+        pslld   mm5, 16                     # sign-extend 16-bit weights back to dwords
+        psrad   mm5, 16
+        mov     rdx, [rsp]                  # point to dpp
+        movq    [rdx+8], mm5                # put weight_AB back
+        emms
+
+        mov     eax, [rdi-4]                # dpp->samples_B [0] = bptr [-1];
+        mov     [rdx+48], eax
+        mov     eax, [rdi-8]                # dpp->samples_A [0] = bptr [-2];
+        mov     [rdx+16], eax
+        mov     eax, [rdi-12]               # dpp->samples_B [1] = bptr [-3];
+        mov     [rdx+52], eax
+        mov     eax, [rdi-16]               # dpp->samples_A [1] = bptr [-4];
+        mov     [rdx+20], eax
+        jmp     done
+
+#
+# registers in term -1 & -2 loops:
+#
+#   eax,ebx,edx scratch
+#   ecx         weight_A
+#   ebp         weight_B
+#   rdi         bptr
+#   rsi         eptr
+#   r8d         delta
+#
+
+term_minus_1_entry:
+        cld
+        mov     rdx, [rsp]                  # point to dpp
+        mov     ecx, [rdx+8]                # ecx = weight_A
+        mov     ebp, [rdx+12]               # ebp = weight_B
+        mov     r8d, [rdx+4]                # r8d = delta
+        mov     eax, [rdi-4]
+        jmp     term_minus_1_loop
+
+        .balign  64
+term_minus_1_loop:
+        mov     ebx, eax
+        imul    eax, ecx
+        mov     edx, [rdi]
+        jo      OV11
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L182
+        test    edx, edx
+        je      L182
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L183
+        mov     ecx, edx
+L183:   xor     ecx, ebx
+L182:   mov     ebx, eax
+        imul    eax, ebp
+        mov     edx, [rdi]
+        jo      OV12
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L187
+        test    edx, edx
+        je      L187
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L188
+        mov     ebp, edx
+L188:   xor     ebp, ebx
+L187:   cmp     rdi, rsi                    # compare bptr and eptr to see if we're done
+        jb      term_minus_1_loop
+        jmp     term_minus_1_done
+
+OV11:   mov     eax, ebx                    # restore previous sample into eax
+        jmp     long_term_minus_1_loop
+
+OV12:   mov     eax, ebx                    # restore previous sample into eax
+        jmp     L282
+
+        .balign  64
+long_term_minus_1_loop:
+        mov     ebx, eax
+        imul    ecx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [rdi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L282
+        test    edx, edx
+        je      L282
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L283
+        mov     ecx, edx
+L283:   xor     ecx, ebx
+L282:   mov     ebx, eax
+        imul    ebp
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [rdi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L287
+        test    edx, edx
+        je      L287
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L288
+        mov     ebp, edx
+L288:   xor     ebp, ebx
+L287:   cmp     rdi, rsi                    # compare bptr and eptr to see if we're done
+        jb      long_term_minus_1_loop
+
+term_minus_1_done:
+        mov     rdx, [rsp]                  # point to dpp
+        mov     [rdx+8], ecx                # store weights back
+        mov     [rdx+12], ebp
+        mov     eax, [rdi-4]                # dpp->samples_A [0] = bptr [-1];
+        mov     [rdx+16], eax
+        jmp     done
+
+term_minus_2_entry:
+        mov     rdx, [rsp]                  # point to dpp
+        mov     ecx, [rdx+8]                # ecx = weight_A
+        mov     ebp, [rdx+12]               # ebp = weight_B
+        mov     r8d, [rdx+4]                # r8d = delta
+        mov     eax, [rdi-8]
+        jmp     term_minus_2_loop
+
+        .balign  64
+term_minus_2_loop:
+        mov     ebx, eax
+        imul    eax, ebp
+        mov     edx, [rdi+4]
+        jo      OV21
+        sar     eax, 10
+        adc     eax, edx
+        mov     [rdi+4], eax
+        test    ebx, ebx
+        je      L194
+        test    edx, edx
+        je      L194
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L195
+        mov     ebp, edx
+L195:   xor     ebp, ebx
+L194:   mov     ebx, eax
+        imul    eax, ecx
+        mov     edx, [rdi]
+        jo      OV22
+        sar     eax, 10
+        adc     eax, edx
+        mov     [rdi], eax
+        test    ebx, ebx
+        je      L199
+        test    edx, edx
+        je      L199
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L200
+        mov     ecx, edx
+L200:   xor     ecx, ebx
+L199:   add     rdi, 8
+        cmp     rdi, rsi                    # compare bptr and eptr to see if we're done
+        jb      term_minus_2_loop
+        jmp     term_minus_2_done
+
+OV21:   mov     eax, ebx                    # restore previous sample into eax
+        jmp     long_term_minus_2_loop
+
+OV22:   mov     eax, ebx                    # restore previous sample into eax
+        jmp     L294
+
+        .balign  64
+long_term_minus_2_loop:
+        mov     ebx, eax
+        imul    ebp
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [rdi+4]
+        add     eax, edx
+        mov     [rdi+4], eax
+        test    ebx, ebx
+        je      L294
+        test    edx, edx
+        je      L294
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L295
+        mov     ebp, edx
+L295:   xor     ebp, ebx
+L294:   mov     ebx, eax
+        imul    ecx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [rdi]
+        add     eax, edx
+        mov     [rdi], eax
+        test    ebx, ebx
+        je      L299
+        test    edx, edx
+        je      L299
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L300
+        mov     ecx, edx
+L300:   xor     ecx, ebx
+L299:   add     rdi, 8
+        cmp     rdi, rsi                    # compare bptr and eptr to see if we're done
+        jb      long_term_minus_2_loop
+
+term_minus_2_done:
+        mov     rdx, [rsp]                  # point to dpp
+        mov     [rdx+8], ecx                # store weights back
+        mov     [rdx+12], ebp
+        mov     eax, [rdi-8]                # dpp->samples_B [0] = bptr [-2];
+        mov     [rdx+48], eax
+        jmp     done
+
+#
+# registers in term -3 loop:
+#
+#   rdi         bptr
+#   rsi         eptr
+#
+#   mm0, mm1    scratch
+#   mm2         original sample values
+#   mm3         correlation samples
+#   mm4         last calculated values (so we don't need to reload)
+#   mm5         weights
+#   mm6         delta
+#   mm7         512 (for rounding)
+#
+
+term_minus_3_entry:
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  # mm7 = round (512)
+        mov     rdx, [rsp]                  # set RDX to *dpp
+        mov     eax, [rdx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  # mm6 = delta (0-7)
+        mov     eax, 0xFFFF                 # mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  # mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [rdx+8]                # mm5 = weight_AB masked to 16 bits
+        movq    mm4, [rdi-8]
+        jmp     term_minus_3_loop
+
+        .balign  64
+term_minus_3_loop:
+        movq    mm3, mm4
+        psrlq   mm3, 32
+        punpckldq mm3, mm4                  # mm3 = sam_AB
+        movq    mm1, mm3
+        movq    mm4, mm3
+        pslld   mm1, 1
+        psrld   mm4, 15
+        psrlw   mm1, 1
+        pmaddwd mm4, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [rdi]                  # mm2 = left_right
+        pslld   mm4, 5
+        paddd   mm1, mm7                    # add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm4, mm2
+        paddd   mm4, mm1                    # add shifted sums
+        movq    [rdi], mm4                  # store result
+        movq    mm0, mm3
+        pxor    mm0, mm2
+        psrad   mm0, 31                     # mm0 = sign (sam_AB ^ left_right)
+        add     rdi, 8
+        pxor    mm1, mm1                    # mm1 = zero
+        pcmpeqd mm2, mm1                    # mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm1                    # mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    # mm2 = 1s if either was zero
+        pandn   mm2, mm6                    # mask delta with zeros check
+        pcmpeqd mm1, mm1
+        psubd   mm1, mm7
+        psubd   mm1, mm7
+        psubd   mm1, mm0
+        pxor    mm5, mm0
+        paddw   mm5, mm1
+        paddusw mm5, mm2                    # and add to weight_AB
+        psubw   mm5, mm1
+        pxor    mm5, mm0
+        cmp     rdi, rsi                    # compare bptr and eptr to see if we're done
+        jb      term_minus_3_loop
+
+        pslld   mm5, 16                     # sign-extend 16-bit weights back to dwords
+        psrad   mm5, 16
+        mov     rdx, [rsp]                  # point to dpp
+        movq    [rdx+8], mm5                # put weight_AB back
+        emms
+
+        mov     edx, [rdi-4]                # dpp->samples_A [0] = bptr [-1];
+        mov     rax, [rsp] 
+        mov     [rax+16], edx
+        mov     edx, [rdi-8]                # dpp->samples_B [0] = bptr [-2];
+        mov     [rax+48], edx
+
+done:   add     rsp, 8
+        pop     rsi
+        pop     rdi
+        pop     rbx
+        pop     rbp
+        ret
+
+#######################################################################################################################
+#
+# This is the mono version of the above function. It does not use MMX and does not handle negative terms.
+#
+# void unpack_decorr_mono_pass_cont (struct decorr_pass *dpp,
+#                                    int32_t *buffer,
+#                                    int32_t sample_count,
+#                                    int32_t long_math;
+# arguments on entry:
+#
+# System V  Windows  
+#   rdi       rcx      struct decorr_pass *dpp
+#   rsi       rdx      int32_t *buffer
+#   edx       r8       int32_t sample_count
+#   ecx       r9       int32_t long_math
+#
+# registers after entry:
+#
+#   rdi         bptr
+#   rsi         eptr
+#
+# stack usage:
+#
+# [rsp+0] = *dpp
+#
+
+_unpack_decorr_mono_pass_cont_x64win:
+unpack_decorr_mono_pass_cont_x64win:
+        push    rbp
+        push    rbx
+        push    rdi
+        push    rsi
+        sub     rsp, 8
+
+        mov     rdi, rcx                    # copy params from win regs to Linux regs
+        mov     rsi, rdx                    # so we can leave following code similar
+        mov     rdx, r8
+        mov     rcx, r9
+        jmp     mentry                      # jump into common portion
+
+_unpack_decorr_mono_pass_cont_x64:
+unpack_decorr_mono_pass_cont_x64:
+        push    rbp
+        push    rbx
+        push    rdi
+        push    rsi
+        sub     rsp, 8
+
+mentry: mov     [rsp], rdi                  # store dpp* into [rsp]
+        and     edx, edx                    # if sample_count is zero, do nothing
+        jz      mono_done
+
+        cld                                 # we use stosd
+        mov     rdi, rsi                    # rdi = bptr
+        lea     rsi, [rdi+rdx*4]            # rsi = eptr
+
+        mov     rax, [rsp]                  # get term from dpp struct & vector to handler
+        mov     eax, [rax]
+        cmp     al, 17
+        je      mono_17_entry
+        cmp     al, 18
+        je      mono_18_entry
+
+#
+# registers during default term processing loop:
+#   rdi         active buffer pointer
+#   rsi         end of buffer pointer
+#   r8d         delta
+#   ecx         weight_A
+#   ebx         term * -4
+#   eax,edx     scratch
+#
+
+default_mono_entry:
+        imul    rbx, rax, -4                # set rbx to term * -4 for decorrelation index
+        mov     rdx, [rsp]
+        mov     ecx, [rdx+8]                # ecx = weight, r8d = delta
+        mov     r8d, [rdx+4]
+        jmp     default_mono_loop
+
+#
+# registers during processing loop for terms 17 & 18:
+#   rdi         active buffer pointer
+#   rsi         end of buffer pointer
+#   r8d         delta
+#   ecx         weight_A
+#   ebp         previously calculated value
+#   ebx         calculated correlation sample
+#   eax,edx     scratch
+#
+
+mono_17_entry:
+        mov     rdx, [rsp]                  # rdx = dpp*
+        mov     ecx, [rdx+8]                # ecx = weight, r8d = delta
+        mov     r8d, [rdx+4]
+        mov     ebp, [rdi-4]
+        jmp     mono_17_loop
+
+mono_18_entry:
+        mov     rdx, [rsp]                  # rdx = dpp*
+        mov     ecx, [rdx+8]                # ecx = weight, r8d = delta
+        mov     r8d, [rdx+4]
+        mov     ebp, [rdi-4]
+        jmp     mono_18_loop
+
+        .balign  64
+default_mono_loop:
+        mov     eax, [rdi+rbx]
+        imul    eax, ecx
+        mov     edx, [rdi]
+        jo      long_default_mono_loop
+        sar     eax, 10
+        adc     eax, edx
+        mov     [rdi], eax
+        mov     eax, [rdi+rbx]
+        add     rdi, 4
+        test    edx, edx
+        je      L100
+        test    eax, eax
+        je      L100
+        xor     eax, edx
+        cdq
+        xor     ecx, edx
+        add     ecx, r8d
+        xor     ecx, edx
+L100:   cmp     rdi, rsi                    # compare bptr and eptr to see if we're done
+        jb      default_mono_loop
+        jmp     default_mono_done
+
+        .balign  64
+long_default_mono_loop:
+        mov     eax, [rdi+rbx]
+        imul    ecx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [rdi]
+        add     eax, edx
+        mov     [rdi], eax
+        mov     eax, [rdi+rbx]
+        add     rdi, 4
+        test    edx, edx
+        je      L101
+        test    eax, eax
+        je      L101
+        xor     eax, edx
+        cdq
+        xor     ecx, edx
+        add     ecx, r8d
+        xor     ecx, edx
+L101:   cmp     rdi, rsi                    # compare bptr and eptr to see if we're done
+        jb      long_default_mono_loop
+
+default_mono_done:
+        mov     rdx, [rsp]                  # edx = dpp*
+        mov     [rdx+8], ecx                # store weight_A back
+        mov     ecx, [rdx]                  # ecx = dpp->term
+
+default_mono_store_samples:
+        dec     ecx
+        sub     rdi, 4                      # back up one full sample
+        mov     eax, [rdi]
+        mov     [rdx+rcx*4+16], eax         # store samples_A [ecx]
+        test    ecx, ecx
+        jnz     default_mono_store_samples
+        jmp     mono_done
+
+        .balign  64
+mono_17_loop:
+        lea     ebx, [ebp+ebp]
+        sub     ebx, [rdi-8]
+        mov     eax, ecx
+        imul    eax, ebx
+        mov     edx, [rdi]
+        jo      long_mono_17_loop
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L117
+        test    edx, edx
+        je      L117
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        xor     ecx, ebx
+L117:   cmp     rdi, rsi                    # compare bptr and eptr to see if we're done
+        jb      mono_17_loop
+        jmp     mono_1718_exit
+
+        .balign  64
+long_mono_17_loop:
+        lea     ebx, [ebp+ebp]
+        sub     ebx, [rdi-8]
+        mov     eax, ecx
+        imul    ebx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [rdi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L217
+        test    edx, edx
+        je      L217
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        xor     ecx, ebx
+L217:   cmp     rdi, rsi                    # compare bptr and eptr to see if we're done
+        jb      long_mono_17_loop
+        jmp     mono_1718_exit
+
+        .balign  64
+mono_18_loop:
+        lea     ebx, [ebp+ebp*2]
+        sub     ebx, [rdi-8]
+        sar     ebx, 1
+        mov     eax, ecx
+        imul    eax, ebx
+        mov     edx, [rdi]
+        jo      long_mono_18_loop
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L118
+        test    edx, edx
+        je      L118
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        xor     ecx, ebx
+L118:   cmp     rdi, rsi                    # compare bptr and eptr to see if we're done
+        jb      mono_18_loop
+        jmp     mono_1718_exit
+
+        .balign  64
+long_mono_18_loop:
+        lea     ebx, [ebp+ebp*2]
+        sub     ebx, [rdi-8]
+        sar     ebx, 1
+        mov     eax, ecx
+        imul    ebx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [rdi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L218
+        test    edx, edx
+        je      L218
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        xor     ecx, ebx
+L218:   cmp     rdi, rsi                    # compare bptr and eptr to see if we're done
+        jb      long_mono_18_loop
+
+mono_1718_exit:
+        mov     rdx, [rsp]                  # edx = dpp*
+        mov     [rdx+8], ecx                # store weight_A back
+        mov     eax, [rdi-4]                # dpp->samples_A [0] = bptr [-1];
+        mov     [rdx+16], eax
+        mov     eax, [rdi-8]                # dpp->samples_A [1] = bptr [-2];
+        mov     [rdx+20], eax
+
+mono_done:
+        add     rsp, 8
+        pop     rsi
+        pop     rdi
+        pop     rbx
+        pop     rbp
+        ret
+
+#ifdef __ELF__
+        .section .note.GNU-stack,"",@progbits
+#endif
+
--- a/third_party/wavpack/src/unpack_x64.asm
+++ b/third_party/wavpack/src/unpack_x64.asm
@ -0,0 +1,930 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;                           **** WAVPACK ****                            ;;
+;;                  Hybrid Lossless Wavefile Compressor                   ;;
+;;              Copyright (c) 1998 - 2015 Conifer Software.               ;;
+;;                          All Rights Reserved.                          ;;
+;;      Distributed under the BSD Software License (see license.txt)      ;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+        include <ksamd64.inc>
+
+asmcode segment page 'CODE'
+
+; This is an assembly optimized version of the following WavPack function:
+;
+; void unpack_decorr_stereo_pass_cont (struct decorr_pass *dpp,
+;                                      int32_t *buffer,
+;                                      int32_t sample_count,
+;                                      int32_t long_math;
+;
+; It performs a single pass of stereo decorrelation on the provided buffer.
+; Note that this version of the function requires that up to 8 previous
+; stereo samples are visible and correct. In other words, it ignores the
+; "samples_*" fields in the decorr_pass structure and gets the history data
+; directly from the buffer. It does, however, return the appropriate history
+; samples to the decorr_pass structure before returning.
+;
+; The "long_math" argument is used to specify that a 32-bit multiply is
+; not enough for the "apply_weight" operation (although in this case it
+; would only apply to the -1 and -2 terms because the MMX code does not have
+; this limitation) but we ignore the parameter and use the overflow detection
+; of the "imul" instruction to switch automatically to the "long_math" loop.
+;
+; This is written to work on an X86-64 processor (also called the AMD64)
+; running in 64-bit mode and generally uses the MMX extensions to improve
+; the performance by processing both stereo channels together. Unfortunately
+; this is not easily used for terms -1 and -2, so these terms are handled
+; sequentially with regular assembler code.
+;
+; This version is for 64-bit Windows. The arguments are passed in registers:
+;
+;   rcx     struct decorr_pass *dpp
+;   rdx     int32_t *buffer
+;   r8d     int32_t sample_count
+;   r9d     int32_t long_math
+;
+; registers after entry:
+;
+;   rdi         bptr
+;   rsi         eptr
+;   ecx         long_math (only used for terms -1 and -2)
+;
+; stack usage:
+;
+; [rsp+0] = *dpp
+;
+
+unpack_decorr_stereo_pass_cont_x64win proc public frame
+        push_reg    rbp                     ; save non-volatile registers on stack
+        push_reg    rbx                     ; (alphabetically)
+        push_reg    rdi
+        push_reg    rsi
+        alloc_stack 8                       ; allocate 8 bytes on stack & align to 16 bytes
+        end_prologue
+
+        mov     [rsp], rcx                  ; [rsp] = *dpp
+        mov     rdi, rcx                    ; copy params from win regs to Linux regs
+        mov     rsi, rdx                    ; so we can leave following code similar
+        mov     rdx, r8
+        mov     rcx, r9
+
+        and     edx, edx                    ; if sample_count is zero, do nothing
+        jz      done
+
+        mov     rdi, rsi                    ; rdi = bptr
+        lea     rsi, [rdi+rdx*8]            ; rsi = eptr
+
+        mov     rax, [rsp]                  ; get term from dpp struct & vector to handler
+        mov     eax, [rax]
+        cmp     al, 17
+        je      term_17_entry
+        cmp     al, 18
+        je      term_18_entry
+        cmp     al, -1
+        je      term_minus_1_entry
+        cmp     al, -2
+        je      term_minus_2_entry
+        cmp     al, -3
+        je      term_minus_3_entry
+
+;
+; registers in default term loop:
+;
+;   rbx         term * -8 (for indexing correlation sample)
+;   rdi         bptr
+;   rsi         eptr
+;
+;   mm0, mm1    scratch
+;   mm2         original sample values
+;   mm3         correlation sample
+;   mm4         zero (for pcmpeqd)
+;   mm5         weights
+;   mm6         delta
+;   mm7         512 (for rounding)
+;
+
+default_term_entry:
+        imul    rbx, rax, -8                ; set RBX to term * -8
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  ; mm7 = round (512)
+        mov     rdx, [rsp]                  ; set RDX to *dpp
+        mov     eax, [rdx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  ; mm6 = delta (0-7)
+        mov     eax, 0FFFFh                 ; mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  ; mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [rdx+8]                ; mm5 = weight_AB masked to 16 bits
+        pxor    mm4, mm4                    ; mm4 = zero (for pcmpeqd)
+        jmp     default_term_loop
+
+        align  64
+default_term_loop:
+        movq    mm3, [rdi+rbx]              ; mm3 = sam_AB
+        movq    mm1, mm3
+        movq    mm0, mm3
+        paddd   mm1, mm1
+        psrld   mm0, 15
+        psrlw   mm1, 1
+        pmaddwd mm0, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [rdi]                  ; mm2 = left_right
+        pslld   mm0, 5
+        paddd   mm1, mm7                    ; add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm0, mm2
+        paddd   mm0, mm1                    ; add shifted sums
+        movq    [rdi], mm0                  ; store result
+        movq    mm0, mm3
+        pxor    mm0, mm2
+        psrad   mm0, 31                     ; mm0 = sign (sam_AB ^ left_right)
+        add     rdi, 8
+        pcmpeqd mm2, mm4                    ; mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm4                    ; mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    ; mm2 = 1s if either was zero
+        pandn   mm2, mm6                    ; mask delta with zeros check
+        pxor    mm5, mm0
+        paddw   mm5, mm2                    ; and add to weight_AB
+        pxor    mm5, mm0
+        cmp     rdi, rsi                    ; compare bptr and eptr to see if we're done
+        jb      default_term_loop
+
+        pslld   mm5, 16                     ; sign-extend 16-bit weights back to dwords
+        psrad   mm5, 16
+        mov     rdx, [rsp]                  ; point to dpp
+        movq    [rdx+8], mm5                ; put weight_AB back
+        emms
+
+        mov     ecx, [rdx]                  ; ecx = dpp->term
+
+default_store_samples:
+        dec     ecx
+        sub     rdi, 8                      ; back up one full sample
+        mov     eax, [rdi+4]
+        mov     [rdx+rcx*4+48], eax         ; store samples_B [ecx]
+        mov     eax, [rdi]
+        mov     [rdx+rcx*4+16], eax         ; store samples_A [ecx]
+        test    ecx, ecx
+        jnz     default_store_samples
+        jmp     done
+
+;
+; registers in term 17 & 18 loops:
+;
+;   rdi         bptr
+;   rsi         eptr
+;
+;   mm0, mm1    scratch
+;   mm2         original sample values
+;   mm3         correlation samples
+;   mm4         last calculated values (so we don't need to reload)
+;   mm5         weights
+;   mm6         delta
+;   mm7         512 (for rounding)
+;
+
+term_17_entry:
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  ; mm7 = round (512)
+        mov     rdx, [rsp]                  ; set RDX to *dpp
+        mov     eax, [rdx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  ; mm6 = delta (0-7)
+        mov     eax, 0FFFFh                 ; mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  ; mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [rdx+8]                ; mm5 = weight_AB masked to 16 bits
+        movq    mm4, [rdi-8]                ; preload last calculated values in mm4
+        jmp     term_17_loop
+
+        align  64
+term_17_loop:
+        paddd   mm4, mm4
+        psubd   mm4, [rdi-16]               ; mm3 = sam_AB
+        movq    mm3, mm4
+        movq    mm1, mm3
+        paddd   mm1, mm1
+        psrld   mm4, 15
+        psrlw   mm1, 1
+        pmaddwd mm4, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [rdi]                  ; mm2 = left_right
+        pslld   mm4, 5
+        paddd   mm1, mm7                    ; add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm4, mm2
+        paddd   mm4, mm1                    ; add shifted sums
+        movq    mm0, mm3
+        movq    [rdi], mm4                  ; store result
+        pxor    mm0, mm2
+        psrad   mm0, 31                     ; mm0 = sign (sam_AB ^ left_right)
+        add     rdi, 8
+        pxor    mm1, mm1                    ; mm1 = zero
+        pcmpeqd mm2, mm1                    ; mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm1                    ; mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    ; mm2 = 1s if either was zero
+        pandn   mm2, mm6                    ; mask delta with zeros check
+        pxor    mm5, mm0
+        paddw   mm5, mm2                    ; and add to weight_AB
+        pxor    mm5, mm0
+        cmp     rdi, rsi                    ; compare bptr and eptr to see if we're done
+        jb      term_17_loop
+        jmp     term_1718_exit              ; terms 17 & 18 treat samples_AB[] the same
+
+term_18_entry:
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  ; mm7 = round (512)
+        mov     rdx, [rsp]                  ; set RDX to *dpp
+        mov     eax, [rdx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  ; mm6 = delta (0-7)
+        mov     eax, 0FFFFh                 ; mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  ; mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [rdx+8]                ; mm5 = weight_AB masked to 16 bits
+        movq    mm4, [rdi-8]                ; preload last calculated values in mm4
+        jmp     term_18_loop
+
+        align  64
+term_18_loop:
+        movq    mm3, mm4
+        psubd   mm3, [rdi-16]
+        psrad   mm3, 1
+        paddd   mm3, mm4                    ; mm3 = sam_AB
+        movq    mm1, mm3
+        movq    mm4, mm3
+        paddd   mm1, mm1
+        psrld   mm4, 15
+        psrlw   mm1, 1
+        pmaddwd mm4, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [rdi]                  ; mm2 = left_right
+        pslld   mm4, 5
+        paddd   mm1, mm7                    ; add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm4, mm2
+        paddd   mm4, mm1                    ; add shifted sums
+        movq    mm0, mm3
+        movq    [rdi], mm4                  ; store result
+        pxor    mm0, mm2
+        psrad   mm0, 31                     ; mm0 = sign (sam_AB ^ left_right)
+        add     rdi, 8
+        pxor    mm1, mm1                    ; mm1 = zero
+        pcmpeqd mm2, mm1                    ; mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm1                    ; mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    ; mm2 = 1s if either was zero
+        pandn   mm2, mm6                    ; mask delta with zeros check
+        pxor    mm5, mm0
+        paddw   mm5, mm2                    ; and add to weight_AB
+        pxor    mm5, mm0
+        cmp     rdi, rsi                    ; compare bptr and eptr to see if we're done
+        jb      term_18_loop
+
+term_1718_exit:
+        pslld   mm5, 16                     ; sign-extend 16-bit weights back to dwords
+        psrad   mm5, 16
+        mov     rdx, [rsp]                  ; point to dpp
+        movq    [rdx+8], mm5                ; put weight_AB back
+        emms
+
+        mov     eax, [rdi-4]                ; dpp->samples_B [0] = bptr [-1];
+        mov     [rdx+48], eax
+        mov     eax, [rdi-8]                ; dpp->samples_A [0] = bptr [-2];
+        mov     [rdx+16], eax
+        mov     eax, [rdi-12]               ; dpp->samples_B [1] = bptr [-3];
+        mov     [rdx+52], eax
+        mov     eax, [rdi-16]               ; dpp->samples_A [1] = bptr [-4];
+        mov     [rdx+20], eax
+        jmp     done
+
+;
+; registers in term -1 & -2 loops:
+;
+;   eax,ebx,edx scratch
+;   ecx         weight_A
+;   ebp         weight_B
+;   rdi         bptr
+;   rsi         eptr
+;   r8d         delta
+;
+
+term_minus_1_entry:
+        cld
+        mov     rdx, [rsp]                  ; point to dpp
+        mov     ecx, [rdx+8]                ; ecx = weight_A
+        mov     ebp, [rdx+12]               ; ebp = weight_B
+        mov     r8d, [rdx+4]                ; r8d = delta
+        mov     eax, [rdi-4]
+        jmp     term_minus_1_loop
+
+        align  64
+term_minus_1_loop:
+        mov     ebx, eax
+        imul    eax, ecx
+        mov     edx, [rdi]
+        jo      OV11
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L182
+        test    edx, edx
+        je      L182
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L183
+        mov     ecx, edx
+L183:   xor     ecx, ebx
+L182:   mov     ebx, eax
+        imul    eax, ebp
+        mov     edx, [rdi]
+        jo      OV12
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L187
+        test    edx, edx
+        je      L187
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L188
+        mov     ebp, edx
+L188:   xor     ebp, ebx
+L187:   cmp     rdi, rsi                    ; compare bptr and eptr to see if we're done
+        jb      term_minus_1_loop
+        jmp     term_minus_1_done
+
+OV11:   mov     eax, ebx                    ; restore previous sample into eax
+        jmp     long_term_minus_1_loop
+
+OV12:   mov     eax, ebx                    ; restore previous sample into eax
+        jmp     L282
+
+        align  64
+long_term_minus_1_loop:
+        mov     ebx, eax
+        imul    ecx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [rdi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L282
+        test    edx, edx
+        je      L282
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L283
+        mov     ecx, edx
+L283:   xor     ecx, ebx
+L282:   mov     ebx, eax
+        imul    ebp
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [rdi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L287
+        test    edx, edx
+        je      L287
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L288
+        mov     ebp, edx
+L288:   xor     ebp, ebx
+L287:   cmp     rdi, rsi                    ; compare bptr and eptr to see if we're done
+        jb      long_term_minus_1_loop
+
+term_minus_1_done:
+        mov     rdx, [rsp]                  ; point to dpp
+        mov     [rdx+8], ecx                ; store weights back
+        mov     [rdx+12], ebp
+        mov     eax, [rdi-4]                ; dpp->samples_A [0] = bptr [-1];
+        mov     [rdx+16], eax
+        jmp     done
+
+term_minus_2_entry:
+        mov     rdx, [rsp]                  ; point to dpp
+        mov     ecx, [rdx+8]                ; ecx = weight_A
+        mov     ebp, [rdx+12]               ; ebp = weight_B
+        mov     r8d, [rdx+4]                ; r8d = delta
+        mov     eax, [rdi-8]
+        jmp     term_minus_2_loop
+
+        align  64
+term_minus_2_loop:
+        mov     ebx, eax
+        imul    eax, ebp
+        mov     edx, [rdi+4]
+        jo      OV21
+        sar     eax, 10
+        adc     eax, edx
+        mov     [rdi+4], eax
+        test    ebx, ebx
+        je      L194
+        test    edx, edx
+        je      L194
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L195
+        mov     ebp, edx
+L195:   xor     ebp, ebx
+L194:   mov     ebx, eax
+        imul    eax, ecx
+        mov     edx, [rdi]
+        jo      OV22
+        sar     eax, 10
+        adc     eax, edx
+        mov     [rdi], eax
+        test    ebx, ebx
+        je      L199
+        test    edx, edx
+        je      L199
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L200
+        mov     ecx, edx
+L200:   xor     ecx, ebx
+L199:   add     rdi, 8
+        cmp     rdi, rsi                    ; compare bptr and eptr to see if we're done
+        jb      term_minus_2_loop
+        jmp     term_minus_2_done
+
+OV21:   mov     eax, ebx                    ; restore previous sample into eax
+        jmp     long_term_minus_2_loop
+
+OV22:   mov     eax, ebx                    ; restore previous sample into eax
+        jmp     L294
+
+        align  64
+long_term_minus_2_loop:
+        mov     ebx, eax
+        imul    ebp
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [rdi+4]
+        add     eax, edx
+        mov     [rdi+4], eax
+        test    ebx, ebx
+        je      L294
+        test    edx, edx
+        je      L294
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L295
+        mov     ebp, edx
+L295:   xor     ebp, ebx
+L294:   mov     ebx, eax
+        imul    ecx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [rdi]
+        add     eax, edx
+        mov     [rdi], eax
+        test    ebx, ebx
+        je      L299
+        test    edx, edx
+        je      L299
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L300
+        mov     ecx, edx
+L300:   xor     ecx, ebx
+L299:   add     rdi, 8
+        cmp     rdi, rsi                    ; compare bptr and eptr to see if we're done
+        jb      long_term_minus_2_loop
+
+term_minus_2_done:
+        mov     rdx, [rsp]                  ; point to dpp
+        mov     [rdx+8], ecx                ; store weights back
+        mov     [rdx+12], ebp
+        mov     eax, [rdi-8]                ; dpp->samples_B [0] = bptr [-2];
+        mov     [rdx+48], eax
+        jmp     done
+
+;
+; registers in term -3 loop:
+;
+;   rdi         bptr
+;   rsi         eptr
+;
+;   mm0, mm1    scratch
+;   mm2         original sample values
+;   mm3         correlation samples
+;   mm4         last calculated values (so we don't need to reload)
+;   mm5         weights
+;   mm6         delta
+;   mm7         512 (for rounding)
+;
+
+term_minus_3_entry:
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  ; mm7 = round (512)
+        mov     rdx, [rsp]                  ; set RDX to *dpp
+        mov     eax, [rdx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  ; mm6 = delta (0-7)
+        mov     eax, 0FFFFh                 ; mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  ; mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [rdx+8]                ; mm5 = weight_AB masked to 16 bits
+        movq    mm4, [rdi-8]
+        jmp     term_minus_3_loop
+
+        align  64
+term_minus_3_loop:
+        movq    mm3, mm4
+        psrlq   mm3, 32
+        punpckldq mm3, mm4                  ; mm3 = sam_AB
+        movq    mm1, mm3
+        movq    mm4, mm3
+        pslld   mm1, 1
+        psrld   mm4, 15
+        psrlw   mm1, 1
+        pmaddwd mm4, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [rdi]                  ; mm2 = left_right
+        pslld   mm4, 5
+        paddd   mm1, mm7                    ; add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm4, mm2
+        paddd   mm4, mm1                    ; add shifted sums
+        movq    [rdi], mm4                  ; store result
+        movq    mm0, mm3
+        pxor    mm0, mm2
+        psrad   mm0, 31                     ; mm0 = sign (sam_AB ^ left_right)
+        add     rdi, 8
+        pxor    mm1, mm1                    ; mm1 = zero
+        pcmpeqd mm2, mm1                    ; mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm1                    ; mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    ; mm2 = 1s if either was zero
+        pandn   mm2, mm6                    ; mask delta with zeros check
+        pcmpeqd mm1, mm1
+        psubd   mm1, mm7
+        psubd   mm1, mm7
+        psubd   mm1, mm0
+        pxor    mm5, mm0
+        paddw   mm5, mm1
+        paddusw mm5, mm2                    ; and add to weight_AB
+        psubw   mm5, mm1
+        pxor    mm5, mm0
+        cmp     rdi, rsi                    ; compare bptr and eptr to see if we're done
+        jb      term_minus_3_loop
+
+        pslld   mm5, 16                     ; sign-extend 16-bit weights back to dwords
+        psrad   mm5, 16
+        mov     rdx, [rsp]                  ; point to dpp
+        movq    [rdx+8], mm5                ; put weight_AB back
+        emms
+
+        mov     edx, [rdi-4]                ; dpp->samples_A [0] = bptr [-1];
+        mov     rax, [rsp] 
+        mov     [rax+16], edx
+        mov     edx, [rdi-8]                ; dpp->samples_B [0] = bptr [-2];
+        mov     [rax+48], edx
+
+done:   add     rsp, 8                      ; begin epilog by deallocating stack
+        pop     rsi                         ; restore non-volatile registers & return
+        pop     rdi
+        pop     rbx
+        pop     rbp
+        ret
+
+unpack_decorr_stereo_pass_cont_x64win endp
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; This is the mono version of the above function. It does not use MMX and does not
+; handle negative terms (since they don't apply to mono), but is otherwise similar.
+;
+; void unpack_decorr_mono_pass_cont (struct decorr_pass *dpp,
+;                                    int32_t *buffer,
+;                                    int32_t sample_count,
+;                                    int32_t long_math;
+; arguments on entry:
+;
+;   rcx     struct decorr_pass *dpp
+;   rdx     int32_t *buffer
+;   r8d     int32_t sample_count
+;   r9d     int32_t long_math
+;
+; registers after entry:
+;
+;   rdi         bptr
+;   rsi         eptr
+;   ecx         long_math
+;
+; stack usage:
+;
+; [rsp+0] = *dpp
+;
+
+unpack_decorr_mono_pass_cont_x64win proc public frame
+        push_reg    rbp                     ; save non-volatile registers on stack
+        push_reg    rbx                     ; (alphabetically)
+        push_reg    rdi
+        push_reg    rsi
+        alloc_stack 8                       ; allocate 8 bytes on stack & align to 16 bytes
+        end_prologue
+
+        mov     [rsp], rcx                  ; [rsp] = *dpp
+        mov     rdi, rcx                    ; copy params from win regs to Linux regs
+        mov     rsi, rdx                    ; so we can leave following code similar
+        mov     rdx, r8
+        mov     rcx, r9
+
+        and     edx, edx                    ; if sample_count is zero, do nothing
+        jz      mono_done
+
+        cld
+        mov     rdi, rsi                    ; rdi = bptr
+        lea     rsi, [rdi+rdx*4]            ; rsi = eptr
+
+        mov     rax, [rsp]                  ; get term from dpp struct & vector to handler
+        mov     eax, [rax]
+        cmp     al, 17
+        je      mono_17_entry
+        cmp     al, 18
+        je      mono_18_entry
+
+;
+; registers during default term processing loop:
+;   rdi         active buffer pointer
+;   rsi         end of buffer pointer
+;   r8d         delta
+;   ecx         weight_A
+;   ebx         term * -4
+;   eax,edx     scratch
+;
+
+default_mono_entry:
+        imul    rbx, rax, -4                ; set rbx to term * -4 for decorrelation index
+        mov     rdx, [rsp]
+        mov     ecx, [rdx+8]                ; ecx = weight, r8d = delta
+        mov     r8d, [rdx+4]
+        jmp     default_mono_loop
+
+;
+; registers during processing loop for terms 17 & 18:
+;   rdi         active buffer pointer
+;   rsi         end of buffer pointer
+;   r8d         delta
+;   ecx         weight_A
+;   ebp         previously calculated value
+;   ebx         calculated correlation sample
+;   eax,edx     scratch
+;
+
+mono_17_entry:
+        mov     rdx, [rsp]                  ; rdx = dpp*
+        mov     ecx, [rdx+8]                ; ecx = weight, r8d = delta
+        mov     r8d, [rdx+4]
+        mov     ebp, [rdi-4]
+        jmp     mono_17_loop
+
+mono_18_entry:
+        mov     rdx, [rsp]                  ; rdx = dpp*
+        mov     ecx, [rdx+8]                ; ecx = weight, r8d = delta
+        mov     r8d, [rdx+4]
+        mov     ebp, [rdi-4]
+        jmp     mono_18_loop
+
+        align  64
+default_mono_loop:
+        mov     eax, [rdi+rbx]
+        imul    eax, ecx
+        mov     edx, [rdi]
+        jo      long_default_mono_loop
+        sar     eax, 10
+        adc     eax, edx
+        mov     [rdi], eax
+        mov     eax, [rdi+rbx]
+        add     rdi, 4
+        test    edx, edx
+        je      L100
+        test    eax, eax
+        je      L100
+        xor     eax, edx
+        cdq
+        xor     ecx, edx
+        add     ecx, r8d
+        xor     ecx, edx
+L100:   cmp     rdi, rsi                    ; compare bptr and eptr to see if we're done
+        jb      default_mono_loop
+        jmp     default_mono_done
+
+        align  64
+long_default_mono_loop:
+        mov     eax, [rdi+rbx]
+        imul    ecx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [rdi]
+        add     eax, edx
+        mov     [rdi], eax
+        mov     eax, [rdi+rbx]
+        add     rdi, 4
+        test    edx, edx
+        je      L101
+        test    eax, eax
+        je      L101
+        xor     eax, edx
+        cdq
+        xor     ecx, edx
+        add     ecx, r8d
+        xor     ecx, edx
+L101:   cmp     rdi, rsi                    ; compare bptr and eptr to see if we're done
+        jb      long_default_mono_loop
+
+default_mono_done:
+        mov     rdx, [rsp]                  ; edx = dpp*
+        mov     [rdx+8], ecx                ; store weight_A back
+        mov     ecx, [rdx]                  ; ecx = dpp->term
+
+default_mono_store_samples:
+        dec     ecx
+        sub     rdi, 4                      ; back up one full sample
+        mov     eax, [rdi]
+        mov     [rdx+rcx*4+16], eax         ; store samples_A [ecx]
+        test    ecx, ecx
+        jnz     default_mono_store_samples
+        jmp     mono_done
+
+        align  64
+mono_17_loop:
+        lea     ebx, [ebp+ebp]
+        sub     ebx, [rdi-8]
+        mov     eax, ecx
+        imul    eax, ebx
+        mov     edx, [rdi]
+        jo      long_mono_17_loop
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L117
+        test    edx, edx
+        je      L117
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        xor     ecx, ebx
+L117:   cmp     rdi, rsi                    ; compare bptr and eptr to see if we're done
+        jb      mono_17_loop
+        jmp     mono_1718_exit
+
+        align  64
+long_mono_17_loop:
+        lea     ebx, [ebp+ebp]
+        sub     ebx, [rdi-8]
+        mov     eax, ecx
+        imul    ebx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [rdi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L217
+        test    edx, edx
+        je      L217
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        xor     ecx, ebx
+L217:   cmp     rdi, rsi                    ; compare bptr and eptr to see if we're done
+        jb      long_mono_17_loop
+        jmp     mono_1718_exit
+
+        align  64
+mono_18_loop:
+        lea     ebx, [ebp+ebp*2]
+        sub     ebx, [rdi-8]
+        sar     ebx, 1
+        mov     eax, ecx
+        imul    eax, ebx
+        mov     edx, [rdi]
+        jo      long_mono_18_loop
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L118
+        test    edx, edx
+        je      L118
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        xor     ecx, ebx
+L118:   cmp     rdi, rsi                    ; compare bptr and eptr to see if we're done
+        jb      mono_18_loop
+        jmp     mono_1718_exit
+
+        align  64
+long_mono_18_loop:
+        lea     ebx, [ebp+ebp*2]
+        sub     ebx, [rdi-8]
+        sar     ebx, 1
+        mov     eax, ecx
+        imul    ebx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [rdi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L218
+        test    edx, edx
+        je      L218
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, r8d
+        xor     ecx, ebx
+L218:   cmp     rdi, rsi                    ; compare bptr and eptr to see if we're done
+        jb      long_mono_18_loop
+
+mono_1718_exit:
+        mov     rdx, [rsp]                  ; edx = dpp*
+        mov     [rdx+8], ecx                ; store weight_A back
+        mov     eax, [rdi-4]                ; dpp->samples_A [0] = bptr [-1];
+        mov     [rdx+16], eax
+        mov     eax, [rdi-8]                ; dpp->samples_A [1] = bptr [-2];
+        mov     [rdx+20], eax
+
+mono_done:
+        add     rsp, 8                      ; begin epilog by deallocating stack
+        pop     rsi                         ; restore non-volatile registers & return
+        pop     rdi
+        pop     rbx
+        pop     rbp
+        ret
+
+unpack_decorr_mono_pass_cont_x64win endp
+
+asmcode ends
+
+        end
+
+
--- a/third_party/wavpack/src/unpack_x86.S
+++ b/third_party/wavpack/src/unpack_x86.S
@ -0,0 +1,970 @@
+############################################################################
+##                           **** WAVPACK ****                            ##
+##                  Hybrid Lossless Wavefile Compressor                   ##
+##              Copyright (c) 1998 - 2015 Conifer Software.               ##
+##                          All Rights Reserved.                          ##
+##      Distributed under the BSD Software License (see license.txt)      ##
+############################################################################
+
+        .intel_syntax noprefix
+        .text
+
+        .globl  _unpack_decorr_stereo_pass_cont_x86
+        .globl  _unpack_decorr_mono_pass_cont_x86
+        .globl  _unpack_cpu_has_feature_x86
+
+        .globl  unpack_decorr_stereo_pass_cont_x86
+        .globl  unpack_decorr_mono_pass_cont_x86
+        .globl  unpack_cpu_has_feature_x86
+
+
+# This module contains X86 assembly optimized versions of functions required
+# to decode WavPack files. Note that the stereo versions of these functions
+# use the MMX registers and instructions of the X86 processor, and so a
+# helper function is provided to make a runtime check for that feature.
+
+# This is an assembly optimized version of the following WavPack function:
+#
+# void unpack_decorr_stereo_pass_cont (struct decorr_pass *dpp,
+#                                      int32_t *buffer,
+#                                      int32_t sample_count,
+#                                      int32_t long_math;
+#
+# It performs a single pass of stereo decorrelation on the provided buffer.
+# Note that this version of the function requires that up to 8 previous
+# stereo samples are visible and correct. In other words, it ignores the
+# "samples_*" fields in the decorr_pass structure and gets the history data
+# directly from the buffer. It does, however, return the appropriate history
+# samples to the decorr_pass structure before returning.
+#
+# The "long_math" argument is used to specify that a 32-bit multiply is
+# not enough for the "apply_weight" operation (although in this case it
+# would only apply to the -1 and -2 terms because the MMX code does not have
+# this limitation) but we ignore the parameter and use the overflow detection
+# of the "imul" instruction to switch automatically to the "long_math" loop.
+#
+# This is written to work on an IA-32 processor and uses the MMX extensions
+# to improve the performance by processing both stereo channels together.
+# For terms -1 and -2 the MMX extensions are not usable, and so these are
+# performed independently without them.
+#
+# arguments on entry:
+#
+#   struct decorr_pass *dpp     [ebp+8]
+#   int32_t *buffer             [ebp+12]
+#   int32_t sample_count        [ebp+16]
+#   int32_t long_math           [ebp+20]
+#
+# registers after entry:
+#
+#   rdi         bptr
+#   rsi         eptr
+#
+# on stack (used for terms -1 and -2 only):
+# 
+#   int32_t delta             DWORD [esp]
+#
+
+_unpack_decorr_stereo_pass_cont_x86:
+unpack_decorr_stereo_pass_cont_x86:
+        push    ebp
+        mov     ebp, esp
+        push    ebx
+        push    esi
+        push    edi
+
+        mov     edx, [ebp+8]                # copy delta from dpp to top of stack
+        mov     eax, [edx+4]
+        push    eax
+
+        mov     edi, [ebp+12]               # edi = buffer
+        mov     eax, [ebp+16]               # get sample_count and divide by 8
+        shl     eax, 3
+        jz      done                        # exit now if there's nothing to do
+
+        add     eax, edi                    # else add to buffer point to make eptr
+        mov     esi, eax
+    
+        mov     eax, [ebp+8]                # get term from dpp and vector appropriately
+        mov     eax, [eax]
+        cmp     eax, 17
+        je      term_17_entry
+        cmp     eax, 18
+        je      term_18_entry
+        cmp     eax, -1
+        je      term_minus_1_entry
+        cmp     eax, -2
+        je      term_minus_2_entry
+        cmp     eax, -3
+        je      term_minus_3_entry
+
+#
+# registers during default term processing loop:
+#   edi         active buffer pointer
+#   esi         end of buffer pointer
+#
+# MMX:
+#   mm0, mm1    scratch
+#   mm2         original sample values
+#   mm3         correlation samples
+#   mm4         zero (for pcmpeqd)
+#   mm5         weights
+#   mm6         delta
+#   mm7         512 (for rounding)
+#
+
+default_term_entry:
+        imul    ebx, eax, -8                # set ebx to term * -8 for decorrelation index
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  # mm7 = round (512)
+        mov     edx, [ebp+8]                # edx = *dpp
+        mov     eax, [edx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  # mm6 = delta (0-7)
+        mov     eax, 0xFFFF                 # mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  # mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [edx+8]                # mm5 = weight_AB masked to 16 bits
+        pxor    mm4, mm4                    # mm4 = zero (for pcmpeqd)
+        jmp     default_term_loop
+
+        .balign  64
+default_term_loop:
+        movq    mm3, [edi+ebx]              # mm3 = sam_AB
+        movq    mm1, mm3
+        movq    mm0, mm3
+        paddd   mm1, mm1
+        psrld   mm0, 15
+        psrlw   mm1, 1
+        pmaddwd mm0, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [edi]                  # mm2 = left_right
+        pslld   mm0, 5
+        paddd   mm1, mm7                    # add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm0, mm2
+        paddd   mm0, mm1                    # add shifted sums
+        movq    [edi], mm0                  # store result
+        movq    mm0, mm3
+        pxor    mm0, mm2
+        psrad   mm0, 31                     # mm0 = sign (sam_AB ^ left_right)
+        add     edi, 8
+        pcmpeqd mm2, mm4                    # mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm4                    # mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    # mm2 = 1s if either was zero
+        pandn   mm2, mm6                    # mask delta with zeros check
+        pxor    mm5, mm0
+        paddw   mm5, mm2                    # and add to weight_AB
+        pxor    mm5, mm0
+        cmp     edi, esi                    # compare bptr and eptr to see if we're done
+        jb      default_term_loop
+
+        pslld   mm5, 16                     # sign-extend 16-bit weights back to dwords
+        psrad   mm5, 16
+        mov     eax, [ebp+8]                # point to dpp
+        movq    [eax+8], mm5                # put weight_AB back
+        emms
+        mov     edx, [ebp+8]                # access dpp with edx
+        mov     ecx, [edx]                  # ecx = dpp->term
+
+default_store_samples:
+        dec     ecx
+        sub     edi, 8                      # back up one full sample
+        mov     eax, [edi+4]
+        mov     [edx+ecx*4+48], eax         # store samples_B [ecx]
+        mov     eax, [edi]
+        mov     [edx+ecx*4+16], eax         # store samples_A [ecx]
+        test    ecx, ecx
+        jnz     default_store_samples
+
+        jmp     done
+
+#
+# registers during processing loop for terms 17 & 18:
+#   edi         active buffer pointer
+#   esi         end of buffer pointer
+#
+# MMX:
+#   mm0, mm1    scratch
+#   mm2         original sample values
+#   mm3         calculated correlation samples
+#   mm4         last calculated values (so we don't need to reload)
+#   mm5         weights
+#   mm6         delta
+#   mm7         512 (for rounding)
+#
+
+term_17_entry:
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  # mm7 = round (512)
+        mov     edx, [ebp+8]                # point to dpp & get delta
+        mov     eax, [edx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  # mm6 = delta (0-7)
+        mov     eax, 0xFFFF                 # mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  # mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [edx+8]                # mm5 = weight_AB masked to 16 bits
+        movq    mm4, [edi-8]                # preload previous calculated values
+        jmp     term_17_loop
+
+        .balign  64
+term_17_loop:
+        paddd   mm4, mm4
+        psubd   mm4, [edi-16]               # mm3 = sam_AB
+        movq    mm3, mm4
+        movq    mm1, mm3
+        paddd   mm1, mm1
+        psrld   mm4, 15
+        psrlw   mm1, 1
+        pmaddwd mm4, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [edi]                  # mm2 = left_right
+        pslld   mm4, 5
+        paddd   mm1, mm7                    # add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm4, mm2
+        paddd   mm4, mm1                    # add shifted sums
+        movq    mm0, mm3
+        movq    [edi], mm4                  # store result
+        pxor    mm0, mm2
+        psrad   mm0, 31                     # mm0 = sign (sam_AB ^ left_right)
+        add     edi, 8
+        pxor    mm1, mm1                    # mm1 = zero
+        pcmpeqd mm2, mm1                    # mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm1                    # mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    # mm2 = 1s if either was zero
+        pandn   mm2, mm6                    # mask delta with zeros check
+        pxor    mm5, mm0
+        paddw   mm5, mm2                    # and add to weight_AB
+        pxor    mm5, mm0
+        cmp     edi, esi                    # compare bptr and eptr to see if we're done
+        jb      term_17_loop
+
+        pslld   mm5, 16                     # sign-extend 16-bit weights back to dwords
+        psrad   mm5, 16
+        mov     eax, [ebp+8]                # point to dpp
+        movq    [eax+8], mm5                # put weight_AB back
+        emms
+        jmp     term_1718_exit
+
+term_18_entry:
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  # mm7 = round (512)
+        mov     edx, [ebp+8]                # point to dpp & get delta
+        mov     eax, [edx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  # mm6 = delta (0-7)
+        mov     eax, 0xFFFF                 # mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  # mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [edx+8]                # mm5 = weight_AB masked to 16 bits
+        movq    mm4, [edi-8]                # preload previous calculated value
+        jmp     term_18_loop
+
+        .balign  64
+term_18_loop:
+        movq    mm3, mm4
+        psubd   mm3, [edi-16]
+        psrad   mm3, 1
+        paddd   mm3, mm4                    # mm3 = sam_AB
+        movq    mm1, mm3
+        movq    mm4, mm3
+        paddd   mm1, mm1
+        psrld   mm4, 15
+        psrlw   mm1, 1
+        pmaddwd mm4, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [edi]                  # mm2 = left_right
+        pslld   mm4, 5
+        paddd   mm1, mm7                    # add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm4, mm2
+        paddd   mm4, mm1                    # add shifted sums
+        movq    mm0, mm3
+        movq    [edi], mm4                  # store result
+        pxor    mm0, mm2
+        psrad   mm0, 31                     # mm0 = sign (sam_AB ^ left_right)
+        add     edi, 8
+        pxor    mm1, mm1                    # mm1 = zero
+        pcmpeqd mm2, mm1                    # mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm1                    # mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    # mm2 = 1s if either was zero
+        pandn   mm2, mm6                    # mask delta with zeros check
+        pxor    mm5, mm0
+        paddw   mm5, mm2                    # and add to weight_AB
+        pxor    mm5, mm0
+        cmp     edi, esi                    # compare bptr and eptr to see if we're done
+        jb      term_18_loop
+
+        pslld   mm5, 16                     # sign-extend 16-bit weights back to dwords
+        psrad   mm5, 16
+        mov     eax, [ebp+8]                # point to dpp
+        movq    [eax+8], mm5                # put weight_AB back
+        emms
+
+term_1718_exit:
+        mov     edx, [edi-4]                # dpp->samples_B [0] = bptr [-1];
+        mov     eax, [ebp+8]
+        mov     [eax+48], edx
+        mov     edx, [edi-8]                # dpp->samples_A [0] = bptr [-2];
+        mov     [eax+16], edx
+        mov     edx, [edi-12]               # dpp->samples_B [1] = bptr [-3];
+        mov     [eax+52], edx
+        mov     edx, [edi-16]               # dpp->samples_A [1] = bptr [-4];
+        mov     [eax+20], edx
+        jmp     done
+
+#
+# registers in term -1 & -2 loops:
+#
+#   eax,ebx,edx scratch
+#   ecx         weight_A
+#   ebp         weight_B
+#   edi         bptr
+#   esi         eptr
+#
+
+term_minus_1_entry:
+        cld                                 # we use stosd here...
+        mov     eax, [ebp+8]                # point to dpp
+        mov     ecx, [eax+8]                # ecx = weight_A and ebp = weight_B
+        mov     ebp, [eax+12]
+        mov     eax, [edi-4]
+        jmp     term_minus_1_loop
+
+        .balign  64
+term_minus_1_loop:
+        mov     ebx, eax
+        imul    eax, ecx
+        mov     edx, [edi]
+        jo      OV11
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L182
+        test    edx, edx
+        je      L182
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L183
+        mov     ecx, edx
+L183:   xor     ecx, ebx
+L182:   mov     ebx, eax
+        imul    eax, ebp
+        mov     edx, [edi]
+        jo      OV12
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L189
+        test    edx, edx
+        je      L189
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L188
+        mov     ebp, edx
+L188:   xor     ebp, ebx
+L189:   cmp     edi, esi                    # compare bptr and eptr to see if we're done
+        jb      term_minus_1_loop
+        jmp     term_minus_1_done
+
+OV11:   mov     eax, ebx                    # restore previous sample into eax
+        jmp     long_term_minus_1_loop
+
+OV12:   mov     eax, ebx                    # restore previous sample into eax
+        jmp     L282
+
+        .balign  64
+long_term_minus_1_loop:
+        mov     ebx, eax
+        imul    ecx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [edi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L282
+        test    edx, edx
+        je      L282
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L283
+        mov     ecx, edx
+L283:   xor     ecx, ebx
+L282:   mov     ebx, eax
+        imul    ebp
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [edi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L289
+        test    edx, edx
+        je      L289
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L288
+        mov     ebp, edx
+L288:   xor     ebp, ebx
+L289:   cmp     edi, esi                    # compare bptr and eptr to see if we're done
+        jb      long_term_minus_1_loop
+
+term_minus_1_done:
+        mov     edx, ebp
+        mov     ebp, esp                    # restore ebp (we've pushed 4 DWORDS)
+        add     ebp, 16
+        mov     eax, [ebp+8]                # point to dpp
+        mov     [eax+8], ecx
+        mov     [eax+12], edx
+        mov     edx, [edi-4]                # dpp->samples_A [0] = bptr [-1]
+        mov     [eax+16], edx
+        jmp     done
+
+
+term_minus_2_entry:
+        mov     eax, [ebp+8]                # point to dpp
+        mov     ecx, [eax+8]                # ecx = weight_A and ebp = weight_B
+        mov     ebp, [eax+12]
+        mov     eax, [edi-8]
+        jmp     term_minus_2_loop
+
+        .balign  64
+term_minus_2_loop:
+        mov     ebx, eax
+        imul    eax, ebp
+        mov     edx, [edi+4]
+        jo      OV21
+        sar     eax, 10
+        adc     eax, edx
+        mov     [edi+4], eax
+        test    ebx, ebx
+        je      L194
+        test    edx, edx
+        je      L194
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L195
+        mov     ebp, edx
+L195:   xor     ebp, ebx
+L194:   mov     ebx, eax
+        imul    eax, ecx
+        mov     edx, [edi]
+        jo      OV22
+        sar     eax, 10
+        adc     eax, edx
+        mov     [edi], eax
+        add     edi, 8
+        test    ebx, ebx
+        je      L201
+        test    edx, edx
+        je      L201
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L200
+        mov     ecx, edx
+L200:   xor     ecx, ebx
+L201:   cmp     edi, esi                    # compare bptr and eptr to see if we're done
+        jb      term_minus_2_loop
+        jmp     term_minus_2_done
+
+OV21:   mov     eax, ebx                    # restore previous sample into eax
+        jmp     long_term_minus_2_loop
+
+OV22:   mov     eax, ebx                    # restore previous sample into eax
+        jmp     L294
+
+        .balign  64
+long_term_minus_2_loop:
+        mov     ebx, eax
+        imul    ebp
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [edi+4]
+        add     eax, edx
+        mov     [edi+4], eax
+        test    ebx, ebx
+        je      L294
+        test    edx, edx
+        je      L294
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L295
+        mov     ebp, edx
+L295:   xor     ebp, ebx
+L294:   mov     ebx, eax
+        imul    ecx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [edi]
+        add     eax, edx
+        mov     [edi], eax
+        add     edi, 8
+        test    ebx, ebx
+        je      L301
+        test    edx, edx
+        je      L301
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L300
+        mov     ecx, edx
+L300:   xor     ecx, ebx
+L301:   cmp     edi, esi                    # compare bptr and eptr to see if we're done
+        jb      long_term_minus_2_loop
+
+term_minus_2_done:
+        mov     edx, ebp
+        lea     ebp, [esp+16]               # restore ebp (we've pushed 4 DWORDS)
+        mov     eax, [ebp+8]                # point to dpp
+        mov     [eax+8], ecx
+        mov     [eax+12], edx
+        mov     edx, [edi-8]                # dpp->samples_B [0] = bptr [-2];
+        mov     [eax+48], edx
+        jmp     done
+
+#
+# registers during processing loop for term -3:
+#   edi         active buffer pointer
+#   esi         end of buffer pointer
+#
+# MMX:
+#   mm0, mm1    scratch
+#   mm2         original sample values
+#   mm3         calculated correlation samples
+#   mm4         last calculated values (so we don't need to reload)
+#   mm5         weights
+#   mm6         delta
+#   mm7         512 (for rounding)
+#
+
+term_minus_3_entry:
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  # mm7 = round (512)
+        mov     edx, [ebp+8]                # point to dpp & get delta
+        mov     eax, [edx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  # mm6 = delta (0-7)
+        mov     eax, 0xFFFF                 # mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  # mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [edx+8]                # mm5 = weight_AB masked to 16 bits
+        movq    mm4, [edi-8]                # preload previous calculated values
+        jmp     term_minus_3_loop
+
+        .balign  64
+term_minus_3_loop:
+        movq    mm3, mm4                    # mm3 = swap dwords (mm4)
+        psrlq   mm3, 32
+        punpckldq mm3, mm4                  # mm3 = sam_AB
+        movq    mm1, mm3
+        movq    mm4, mm3
+        pslld   mm1, 1
+        psrld   mm4, 15
+        psrlw   mm1, 1
+        pmaddwd mm4, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [edi]                  # mm2 = left_right
+        pslld   mm4, 5
+        paddd   mm1, mm7                    # add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm4, mm2
+        paddd   mm4, mm1                    # add shifted sums
+        movq    [edi], mm4                  # store result
+        movq    mm0, mm3
+        pxor    mm0, mm2
+        psrad   mm0, 31                     # mm0 = sign (sam_AB ^ left_right)
+        add     edi, 8
+        pxor    mm1, mm1                    # mm1 = zero
+        pcmpeqd mm2, mm1                    # mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm1                    # mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    # mm2 = 1s if either was zero
+        pandn   mm2, mm6                    # mask delta with zeros check
+        pcmpeqd mm1, mm1
+        psubd   mm1, mm7
+        psubd   mm1, mm7
+        psubd   mm1, mm0
+        pxor    mm5, mm0
+        paddw   mm5, mm1
+        paddusw mm5, mm2                    # and add to weight_AB
+        psubw   mm5, mm1
+        pxor    mm5, mm0
+        cmp     edi, esi                    # compare bptr and eptr to see if we're done
+        jb      term_minus_3_loop
+
+        pslld   mm5, 16                     # sign-extend 16-bit weights back to dwords
+        psrad   mm5, 16
+        mov     eax, [ebp+8]                # point to dpp
+        movq    [eax+8], mm5                # put weight_AB back
+        emms
+        mov     edx, [edi-4]                # dpp->samples_A [0] = bptr [-1];
+        mov     eax, [ebp+8]
+        mov     [eax+16], edx
+        mov     edx, [edi-8]                # dpp->samples_B [0] = bptr [-2];
+        mov     [eax+48], edx
+
+done:   pop     eax                         # pop delta & saved regs
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+
+#######################################################################################################################
+#
+# This is the mono version of the above function. It does not use MMX and does not handle negative terms.
+#
+# void unpack_decorr_mono_pass_cont (struct decorr_pass *dpp,
+#                                    int32_t *buffer,
+#                                    int32_t sample_count,
+#                                    int32_t long_math;
+# arguments on entry:
+#
+#   struct decorr_pass *dpp     [ebp+8]
+#   int32_t *buffer             [ebp+12]
+#   int32_t sample_count        [ebp+16]
+#   int32_t long_math           [ebp+20]
+#
+# registers after entry:
+#
+#   rdi         bptr
+#   rsi         eptr
+#
+# on stack:
+#
+#   int16_t delta             DWORD [esp]
+#
+
+_unpack_decorr_mono_pass_cont_x86:
+unpack_decorr_mono_pass_cont_x86:
+        push    ebp
+        mov     ebp, esp
+        push    ebx
+        push    esi
+        push    edi
+        cld
+
+        mov     edx, [ebp+8]                # copy delta from dpp to local stack
+        mov     eax, [edx+4]
+        push    eax
+
+        mov     edi, [ebp+12]               # edi = buffer
+        mov     eax, [ebp+16]               # get sample_count and multiply by 4
+        shl     eax, 2
+        jz      mono_done                   # exit now if there's nothing to do
+        lea     esi, [edi+eax]              # else add to buffer point to make eptr
+
+        mov     eax, [ebp+8]                # get term from dpp and vector appropriately
+        mov     eax, [eax]
+        cmp     eax, 17
+        je      mono_17_entry
+        cmp     eax, 18
+        je      mono_18_entry
+
+#
+# registers during default term processing loop:
+#   edi         active buffer pointer
+#   esi         end of buffer pointer
+#   ecx         weight_A
+#   ebp         free
+#   ebx         term * -4
+#   eax,edx     scratch
+#
+
+default_mono_entry:
+        imul    ebx, eax, -4                # set ebx to term * -4 for decorrelation index
+        mov     edx, [ebp+8]                # edx = dpp*
+        mov     ecx, [edx+8]                # ecx = weight
+        jmp     default_mono_loop
+
+#
+# registers during processing loop for terms 17 & 18:
+#   edi         active buffer pointer
+#   esi         end of buffer pointer
+#   ecx         weight_A
+#   ebp         previously calculated value
+#   ebx         calculated correlation sample
+#   eax,edx     scratch
+#
+
+mono_17_entry:
+        mov     edx, [ebp+8]                # edx = dpp*
+        mov     ecx, [edx+8]                # ecx = weight_A
+        mov     ebp, [edi-4]
+        jmp     mono_17_loop
+
+mono_18_entry:
+        mov     edx, [ebp+8]                # edx = dpp*
+        mov     ecx, [edx+8]                # ecx = weight_A
+        mov     ebp, [edi-4]
+        jmp     mono_18_loop
+
+        .balign  64
+default_mono_loop:
+        mov     eax, [edi+ebx]
+        imul    eax, ecx
+        mov     edx, [edi]
+        jo      long_default_mono_loop
+        sar     eax, 10
+        adc     eax, edx
+        mov     [edi], eax
+        mov     eax, [edi+ebx]
+        add     edi, 4
+        test    edx, edx
+        je      L100
+        test    eax, eax
+        je      L100
+        xor     eax, edx
+        cdq
+        xor     ecx, edx
+        add     ecx, [esp]
+        xor     ecx, edx
+L100:   cmp     edi, esi                    # compare bptr and eptr to see if we're done
+        jb      default_mono_loop
+        jmp     default_mono_done
+
+        .balign  64
+long_default_mono_loop:
+        mov     eax, [edi+ebx]
+        imul    ecx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [edi]
+        add     eax, edx
+        mov     [edi], eax
+        mov     eax, [edi+ebx]
+        add     edi, 4
+        test    edx, edx
+        je      L101
+        test    eax, eax
+        je      L101
+        xor     eax, edx
+        cdq
+        xor     ecx, edx
+        add     ecx, [esp]
+        xor     ecx, edx
+L101:   cmp     edi, esi                    # compare bptr and eptr to see if we're done
+        jb      long_default_mono_loop
+
+default_mono_done:
+        mov     edx, [ebp+8]                # edx = dpp*
+        mov     [edx+8], ecx                # store weight_A back
+        mov     ecx, [edx]                  # ecx = dpp->term
+
+default_mono_store_samples:
+        dec     ecx
+        sub     edi, 4                      # back up one full sample
+        mov     eax, [edi]
+        mov     [edx+ecx*4+16], eax         # store samples_A [ecx]
+        test    ecx, ecx
+        jnz     default_mono_store_samples
+        jmp     mono_done
+
+        .balign  64
+mono_17_loop:
+        lea     ebx, [ebp+ebp]
+        sub     ebx, [edi-8]
+        mov     eax, ecx
+        imul    eax, ebx
+        mov     edx, [edi]
+        jo      long_mono_17_loop
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L117
+        test    edx, edx
+        je      L117
+        mov     eax, [esp]
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     eax, ebx
+        sub     eax, ebx
+        add     ecx, eax
+L117:   cmp     edi, esi                    # compare bptr and eptr to see if we're done
+        jb      mono_17_loop
+        jmp     mono_1718_exit
+
+        .balign  64
+long_mono_17_loop:
+        lea     ebx, [ebp+ebp]
+        sub     ebx, [edi-8]
+        mov     eax, ecx
+        imul    ebx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [edi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L217
+        test    edx, edx
+        je      L217
+        mov     eax, [esp]
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     eax, ebx
+        sub     eax, ebx
+        add     ecx, eax
+L217:   cmp     edi, esi                    # compare bptr and eptr to see if we're done
+        jb      long_mono_17_loop
+        jmp     mono_1718_exit
+
+        .balign  64
+mono_18_loop:
+        lea     ebx, [ebp+ebp*2]
+        sub     ebx, [edi-8]
+        sar     ebx, 1
+        mov     eax, ecx
+        imul    eax, ebx
+        mov     edx, [edi]
+        jo      long_mono_18_loop
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L118
+        test    edx, edx
+        je      L118
+        mov     eax, [esp]
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     eax, ebx
+        sub     eax, ebx
+        add     ecx, eax
+L118:   cmp     edi, esi                    # compare bptr and eptr to see if we're done
+        jb      mono_18_loop
+        jmp     mono_1718_exit
+
+        .balign  64
+long_mono_18_loop:
+        lea     ebx, [ebp+ebp*2]
+        sub     ebx, [edi-8]
+        sar     ebx, 1
+        mov     eax, ecx
+        imul    ebx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [edi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L218
+        test    edx, edx
+        je      L218
+        mov     eax, [esp]
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     eax, ebx
+        sub     eax, ebx
+        add     ecx, eax
+L218:   cmp     edi, esi                    # compare bptr and eptr to see if we're done
+        jb      long_mono_18_loop
+
+mono_1718_exit:
+        lea     ebp, [esp+16]               # restore ebp (we've pushed 4 DWORDS)
+        mov     edx, [ebp+8]                # edx = dpp*
+        mov     [edx+8], ecx                # store weight_A back
+        mov     eax, [edi-4]                # dpp->samples_A [0] = bptr [-1];
+        mov     [edx+16], eax
+        mov     eax, [edi-8]                # dpp->samples_A [1] = bptr [-2];
+        mov     [edx+20], eax
+
+mono_done:
+        pop     eax                         # pop delta & saved regs
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+
+# Helper function to determine if specified CPU feature is available (used here for MMX).
+# Input parameter is index of feature to be checked (EDX from CPUID(1) only, MMX = 23).
+# Return value is the specified bit (0 or 1) or 0 if CPUID is not supported.
+
+_unpack_cpu_has_feature_x86:
+unpack_cpu_has_feature_x86:
+        pushfd                              # save eflags
+        pushfd                              # push another copy
+        xor     dword ptr [esp], 0x200000   # toggle ID bit on stack & pop it back into eflags
+        popfd
+        pushfd                              # store possibly modified eflags
+        pop     eax                         # and pop back into eax
+        xor     eax, [esp]                  # compare to original pushed eflags
+        popfd                               # restore original eflags
+        and     eax, 0x200000               # eax = 1 if eflags ID bit was changable
+        jz      oldcpu                      # return zero if CPUID is not available (wow!)
+
+        push    ebx                         # we must save ebx
+        mov     eax, 1                      # do cpuid (1) to get features into edx
+        cpuid
+        mov     eax, edx                    # copy into eax for shift
+        mov     cl, [esp+8]                 # get parameter and shift that bit index into LSB
+        sar     eax, cl
+        and     eax, 1
+        pop     ebx                         # restore ebx and return 0 or 1
+
+oldcpu: ret                                 # return value in eax
+
+#ifdef __ELF__
+        .section .note.GNU-stack,"",@progbits
+#endif
+
--- a/third_party/wavpack/src/unpack_x86.asm
+++ b/third_party/wavpack/src/unpack_x86.asm
@ -0,0 +1,958 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;                           **** WAVPACK ****                            ;;
+;;                  Hybrid Lossless Wavefile Compressor                   ;;
+;;              Copyright (c) 1998 - 2015 Conifer Software.               ;;
+;;                          All Rights Reserved.                          ;;
+;;      Distributed under the BSD Software License (see license.txt)      ;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+        .686
+        .mmx
+        .model  flat
+asmcode segment page 'CODE'
+        public  _unpack_decorr_stereo_pass_cont_x86
+        public  _unpack_decorr_mono_pass_cont_x86
+        public  _unpack_cpu_has_feature_x86
+
+; This is an assembly optimized version of the following WavPack function:
+;
+; void unpack_decorr_stereo_pass_cont (struct decorr_pass *dpp,
+;                                      int32_t *buffer,
+;                                      int32_t sample_count,
+;                                      int32_t long_math;
+;
+; It performs a single pass of stereo decorrelation on the provided buffer.
+; Note that this version of the function requires that up to 8 previous
+; stereo samples are visible and correct. In other words, it ignores the
+; "samples_*" fields in the decorr_pass structure and gets the history data
+; directly from the buffer. It does, however, return the appropriate history
+; samples to the decorr_pass structure before returning.
+;
+; The "long_math" argument is used to specify that a 32-bit multiply is
+; not enough for the "apply_weight" operation (although in this case it
+; would only apply to the -1 and -2 terms because the MMX code does not have
+; this limitation) but we ignore the parameter and use the overflow detection
+; of the "imul" instruction to switch automatically to the "long_math" loop.
+;
+; This is written to work on an IA-32 processor and uses the MMX extensions
+; to improve the performance by processing both stereo channels together.
+; For terms -1 and -2 the MMX extensions are not usable, and so these are
+; performed independently without them.
+;
+; arguments on entry:
+;
+;   struct decorr_pass *dpp     [ebp+8]
+;   int32_t *buffer             [ebp+12]
+;   int32_t sample_count        [ebp+16]
+;   int32_t long_math           [ebp+20]
+;
+; registers after entry:
+;
+;   rdi         bptr
+;   rsi         eptr
+;
+; on stack (used for terms -1 and -2 only):
+; 
+;   int32_t delta             DWORD [esp]
+;
+
+_unpack_decorr_stereo_pass_cont_x86:
+        push    ebp
+        mov     ebp, esp
+        push    ebx
+        push    esi
+        push    edi
+
+        mov     edx, [ebp+8]                ; copy delta from dpp to top of stack
+        mov     eax, [edx+4]
+        push    eax
+
+        mov     edi, [ebp+12]               ; edi = buffer
+        mov     eax, [ebp+16]               ; get sample_count and divide by 8
+        sal     eax, 3
+        jz      done                        ; exit now if there's nothing to do
+
+        add     eax, edi                    ; else add to buffer point to make eptr
+        mov     esi, eax
+    
+        mov     eax, [ebp+8]                ; get term from dpp and vector appropriately
+        mov     eax, [eax]
+        cmp     eax, 17
+        je      term_17_entry
+        cmp     eax, 18
+        je      term_18_entry
+        cmp     eax, -1
+        je      term_minus_1_entry
+        cmp     eax, -2
+        je      term_minus_2_entry
+        cmp     eax, -3
+        je      term_minus_3_entry
+
+;
+; registers during default term processing loop:
+;   edi         active buffer pointer
+;   esi         end of buffer pointer
+;
+; MMX:
+;   mm0, mm1    scratch
+;   mm2         original sample values
+;   mm3         correlation samples
+;   mm4         zero (for pcmpeqd)
+;   mm5         weights
+;   mm6         delta
+;   mm7         512 (for rounding)
+;
+
+default_term_entry:
+        imul    ebx, eax, -8                ; set ebx to term * -8 for decorrelation index
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  ; mm7 = round (512)
+        mov     edx, [ebp+8]                ; edx = *dpp
+        mov     eax, [edx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  ; mm6 = delta (0-7)
+        mov     eax, 0FFFFh                 ; mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  ; mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [edx+8]                ; mm5 = weight_AB masked to 16 bits
+        pxor    mm4, mm4                    ; mm4 = zero (for pcmpeqd)
+        jmp     default_term_loop
+
+        align  64
+default_term_loop:
+        movq    mm3, [edi+ebx]              ; mm3 = sam_AB
+        movq    mm1, mm3
+        movq    mm0, mm3
+        paddd   mm1, mm1
+        psrld   mm0, 15
+        psrlw   mm1, 1
+        pmaddwd mm0, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [edi]                  ; mm2 = left_right
+        pslld   mm0, 5
+        paddd   mm1, mm7                    ; add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm0, mm2
+        paddd   mm0, mm1                    ; add shifted sums
+        movq    [edi], mm0                  ; store result
+        movq    mm0, mm3
+        pxor    mm0, mm2
+        psrad   mm0, 31                     ; mm0 = sign (sam_AB ^ left_right)
+        add     edi, 8
+        pcmpeqd mm2, mm4                    ; mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm4                    ; mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    ; mm2 = 1s if either was zero
+        pandn   mm2, mm6                    ; mask delta with zeros check
+        pxor    mm5, mm0
+        paddw   mm5, mm2                    ; and add to weight_AB
+        pxor    mm5, mm0
+        cmp     edi, esi                    ; compare bptr and eptr to see if we're done
+        jb      default_term_loop
+
+        pslld   mm5, 16                     ; sign-extend 16-bit weights back to dwords
+        psrad   mm5, 16
+        mov     eax, [ebp+8]                ; point to dpp
+        movq    [eax+8], mm5                ; put weight_AB back
+        emms
+        mov     edx, [ebp+8]                ; access dpp with edx
+        mov     ecx, [edx]                  ; ecx = dpp->term
+
+default_store_samples:
+        dec     ecx
+        sub     edi, 8                      ; back up one full sample
+        mov     eax, [edi+4]
+        mov     [edx+ecx*4+48], eax         ; store samples_B [ecx]
+        mov     eax, [edi]
+        mov     [edx+ecx*4+16], eax         ; store samples_A [ecx]
+        test    ecx, ecx
+        jnz     default_store_samples
+
+        jmp     done
+
+;
+; registers during processing loop for terms 17 & 18:
+;   edi         active buffer pointer
+;   esi         end of buffer pointer
+;
+; MMX:
+;   mm0, mm1    scratch
+;   mm2         original sample values
+;   mm3         calculated correlation samples
+;   mm4         last calculated values (so we don't need to reload)
+;   mm5         weights
+;   mm6         delta
+;   mm7         512 (for rounding)
+;
+
+term_17_entry:
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  ; mm7 = round (512)
+        mov     edx, [ebp+8]                ; point to dpp & get delta
+        mov     eax, [edx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  ; mm6 = delta (0-7)
+        mov     eax, 0FFFFh                 ; mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  ; mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [edx+8]                ; mm5 = weight_AB masked to 16 bits
+        movq    mm4, [edi-8]                ; preload previous calculated values
+        jmp     term_17_loop
+
+        align  64
+term_17_loop:
+        paddd   mm4, mm4
+        psubd   mm4, [edi-16]               ; mm3 = sam_AB
+        movq    mm3, mm4
+        movq    mm1, mm3
+        paddd   mm1, mm1
+        psrld   mm4, 15
+        psrlw   mm1, 1
+        pmaddwd mm4, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [edi]                  ; mm2 = left_right
+        pslld   mm4, 5
+        paddd   mm1, mm7                    ; add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm4, mm2
+        paddd   mm4, mm1                    ; add shifted sums
+        movq    mm0, mm3
+        movq    [edi], mm4                  ; store result
+        pxor    mm0, mm2
+        psrad   mm0, 31                     ; mm0 = sign (sam_AB ^ left_right)
+        add     edi, 8
+        pxor    mm1, mm1                    ; mm1 = zero
+        pcmpeqd mm2, mm1                    ; mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm1                    ; mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    ; mm2 = 1s if either was zero
+        pandn   mm2, mm6                    ; mask delta with zeros check
+        pxor    mm5, mm0
+        paddw   mm5, mm2                    ; and add to weight_AB
+        pxor    mm5, mm0
+        cmp     edi, esi                    ; compare bptr and eptr to see if we're done
+        jb      term_17_loop
+
+        pslld   mm5, 16                     ; sign-extend 16-bit weights back to dwords
+        psrad   mm5, 16
+        mov     eax, [ebp+8]                ; point to dpp
+        movq    [eax+8], mm5                ; put weight_AB back
+        emms
+        jmp     term_1718_exit
+
+term_18_entry:
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  ; mm7 = round (512)
+        mov     edx, [ebp+8]                ; point to dpp & get delta
+        mov     eax, [edx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  ; mm6 = delta (0-7)
+        mov     eax, 0FFFFh                 ; mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  ; mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [edx+8]                ; mm5 = weight_AB masked to 16 bits
+        movq    mm4, [edi-8]                ; preload previous calculated value
+        jmp     term_18_loop
+
+        align  64
+term_18_loop:
+        movq    mm3, mm4
+        psubd   mm3, [edi-16]
+        psrad   mm3, 1
+        paddd   mm3, mm4                    ; mm3 = sam_AB
+        movq    mm1, mm3
+        movq    mm4, mm3
+        paddd   mm1, mm1
+        psrld   mm4, 15
+        psrlw   mm1, 1
+        pmaddwd mm4, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [edi]                  ; mm2 = left_right
+        pslld   mm4, 5
+        paddd   mm1, mm7                    ; add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm4, mm2
+        paddd   mm4, mm1                    ; add shifted sums
+        movq    mm0, mm3
+        movq    [edi], mm4                  ; store result
+        pxor    mm0, mm2
+        psrad   mm0, 31                     ; mm0 = sign (sam_AB ^ left_right)
+        add     edi, 8
+        pxor    mm1, mm1                    ; mm1 = zero
+        pcmpeqd mm2, mm1                    ; mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm1                    ; mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    ; mm2 = 1s if either was zero
+        pandn   mm2, mm6                    ; mask delta with zeros check
+        pxor    mm5, mm0
+        paddw   mm5, mm2                    ; and add to weight_AB
+        pxor    mm5, mm0
+        cmp     edi, esi                    ; compare bptr and eptr to see if we're done
+        jb      term_18_loop
+
+        pslld   mm5, 16                     ; sign-extend 16-bit weights back to dwords
+        psrad   mm5, 16
+        mov     eax, [ebp+8]                ; point to dpp
+        movq    [eax+8], mm5                ; put weight_AB back
+        emms
+
+term_1718_exit:
+        mov     edx, [edi-4]                ; dpp->samples_B [0] = bptr [-1];
+        mov     eax, [ebp+8]
+        mov     [eax+48], edx
+        mov     edx, [edi-8]                ; dpp->samples_A [0] = bptr [-2];
+        mov     [eax+16], edx
+        mov     edx, [edi-12]               ; dpp->samples_B [1] = bptr [-3];
+        mov     [eax+52], edx
+        mov     edx, [edi-16]               ; dpp->samples_A [1] = bptr [-4];
+        mov     [eax+20], edx
+        jmp     done
+
+;
+; registers in term -1 & -2 loops:
+;
+;   eax,ebx,edx scratch
+;   ecx         weight_A
+;   ebp         weight_B
+;   edi         bptr
+;   esi         eptr
+;
+
+term_minus_1_entry:
+        cld                                 ; we use stosd here...
+        mov     eax, [ebp+8]                ; point to dpp
+        mov     ecx, [eax+8]                ; ecx = weight_A and ebp = weight_B
+        mov     ebp, [eax+12]
+        mov     eax, [edi-4]
+        jmp     term_minus_1_loop
+
+        align  64
+term_minus_1_loop:
+        mov     ebx, eax
+        imul    eax, ecx
+        mov     edx, [edi]
+        jo      OV11
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L182
+        test    edx, edx
+        je      L182
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L183
+        mov     ecx, edx
+L183:   xor     ecx, ebx
+L182:   mov     ebx, eax
+        imul    eax, ebp
+        mov     edx, [edi]
+        jo      OV12
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L189
+        test    edx, edx
+        je      L189
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L188
+        mov     ebp, edx
+L188:   xor     ebp, ebx
+L189:   cmp     edi, esi                    ; compare bptr and eptr to see if we're done
+        jb      term_minus_1_loop
+        jmp     term_minus_1_done
+
+OV11:   mov     eax, ebx                    ; restore previous sample into eax
+        jmp     long_term_minus_1_loop
+
+OV12:   mov     eax, ebx                    ; restore previous sample into eax
+        jmp     L282
+
+        align  64
+long_term_minus_1_loop:
+        mov     ebx, eax
+        imul    ecx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [edi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L282
+        test    edx, edx
+        je      L282
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L283
+        mov     ecx, edx
+L283:   xor     ecx, ebx
+L282:   mov     ebx, eax
+        imul    ebp
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [edi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        je      L289
+        test    edx, edx
+        je      L289
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L288
+        mov     ebp, edx
+L288:   xor     ebp, ebx
+L289:   cmp     edi, esi                    ; compare bptr and eptr to see if we're done
+        jb      long_term_minus_1_loop
+
+term_minus_1_done:
+        mov     edx, ebp
+        mov     ebp, esp                    ; restore ebp (we've pushed 4 DWORDS)
+        add     ebp, 16
+        mov     eax, [ebp+8]                ; point to dpp
+        mov     [eax+8], ecx
+        mov     [eax+12], edx
+        mov     edx, [edi-4]                ; dpp->samples_A [0] = bptr [-1]
+        mov     [eax+16], edx
+        jmp     done
+
+
+term_minus_2_entry:
+        mov     eax, [ebp+8]                ; point to dpp
+        mov     ecx, [eax+8]                ; ecx = weight_A and ebp = weight_B
+        mov     ebp, [eax+12]
+        mov     eax, [edi-8]
+        jmp     term_minus_2_loop
+
+        align  64
+term_minus_2_loop:
+        mov     ebx, eax
+        imul    eax, ebp
+        mov     edx, [edi+4]
+        jo      OV21
+        sar     eax, 10
+        adc     eax, edx
+        mov     [edi+4], eax
+        test    ebx, ebx
+        je      L194
+        test    edx, edx
+        je      L194
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L195
+        mov     ebp, edx
+L195:   xor     ebp, ebx
+L194:   mov     ebx, eax
+        imul    eax, ecx
+        mov     edx, [edi]
+        jo      OV22
+        sar     eax, 10
+        adc     eax, edx
+        mov     [edi], eax
+        add     edi, 8
+        test    ebx, ebx
+        je      L201
+        test    edx, edx
+        je      L201
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L200
+        mov     ecx, edx
+L200:   xor     ecx, ebx
+L201:   cmp     edi, esi                    ; compare bptr and eptr to see if we're done
+        jb      term_minus_2_loop
+        jmp     term_minus_2_done
+
+OV21:   mov     eax, ebx                    ; restore previous sample into eax
+        jmp     long_term_minus_2_loop
+
+OV22:   mov     eax, ebx                    ; restore previous sample into eax
+        jmp     L294
+
+        align  64
+long_term_minus_2_loop:
+        mov     ebx, eax
+        imul    ebp
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [edi+4]
+        add     eax, edx
+        mov     [edi+4], eax
+        test    ebx, ebx
+        je      L294
+        test    edx, edx
+        je      L294
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ebp, ebx
+        add     ebp, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ebp, edx
+        jle     L295
+        mov     ebp, edx
+L295:   xor     ebp, ebx
+L294:   mov     ebx, eax
+        imul    ecx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [edi]
+        add     eax, edx
+        mov     [edi], eax
+        add     edi, 8
+        test    ebx, ebx
+        je      L301
+        test    edx, edx
+        je      L301
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     ecx, ebx
+        add     ecx, [esp]
+        mov     edx, 1024
+        add     edx, ebx
+        cmp     ecx, edx
+        jle     L300
+        mov     ecx, edx
+L300:   xor     ecx, ebx
+L301:   cmp     edi, esi                    ; compare bptr and eptr to see if we're done
+        jb      long_term_minus_2_loop
+
+term_minus_2_done:
+        mov     edx, ebp
+        lea     ebp, [esp+16]               ; restore ebp (we've pushed 4 DWORDS)
+        mov     eax, [ebp+8]                ; point to dpp
+        mov     [eax+8], ecx
+        mov     [eax+12], edx
+        mov     edx, [edi-8]                ; dpp->samples_B [0] = bptr [-2];
+        mov     [eax+48], edx
+        jmp     done
+
+;
+; registers during processing loop for term -3:
+;   edi         active buffer pointer
+;   esi         end of buffer pointer
+;
+; MMX:
+;   mm0, mm1    scratch
+;   mm2         original sample values
+;   mm3         calculated correlation samples
+;   mm4         last calculated values (so we don't need to reload)
+;   mm5         weights
+;   mm6         delta
+;   mm7         512 (for rounding)
+;
+
+term_minus_3_entry:
+        mov     eax, 512
+        movd    mm7, eax
+        punpckldq mm7, mm7                  ; mm7 = round (512)
+        mov     edx, [ebp+8]                ; point to dpp & get delta
+        mov     eax, [edx+4]
+        movd    mm6, eax
+        punpckldq mm6, mm6                  ; mm6 = delta (0-7)
+        mov     eax, 0FFFFh                 ; mask high weights to zero for PMADDWD
+        movd    mm5, eax
+        punpckldq mm5, mm5                  ; mm5 = weight mask 0x0000FFFF0000FFFF
+        pand    mm5, [edx+8]                ; mm5 = weight_AB masked to 16 bits
+        movq    mm4, [edi-8]                ; preload previous calculated values
+        jmp     term_minus_3_loop
+
+        align  64
+term_minus_3_loop:
+        movq    mm3, mm4                    ; mm3 = swap dwords (mm4)
+        psrlq   mm3, 32
+        punpckldq mm3, mm4                  ; mm3 = sam_AB
+        movq    mm1, mm3
+        movq    mm4, mm3
+        pslld   mm1, 1
+        psrld   mm4, 15
+        psrlw   mm1, 1
+        pmaddwd mm4, mm5
+        pmaddwd mm1, mm5
+        movq    mm2, [edi]                  ; mm2 = left_right
+        pslld   mm4, 5
+        paddd   mm1, mm7                    ; add 512 for rounding
+        psrad   mm1, 10
+        paddd   mm4, mm2
+        paddd   mm4, mm1                    ; add shifted sums
+        movq    [edi], mm4                  ; store result
+        movq    mm0, mm3
+        pxor    mm0, mm2
+        psrad   mm0, 31                     ; mm0 = sign (sam_AB ^ left_right)
+        add     edi, 8
+        pxor    mm1, mm1                    ; mm1 = zero
+        pcmpeqd mm2, mm1                    ; mm2 = 1s if left_right was zero
+        pcmpeqd mm3, mm1                    ; mm3 = 1s if sam_AB was zero
+        por     mm2, mm3                    ; mm2 = 1s if either was zero
+        pandn   mm2, mm6                    ; mask delta with zeros check
+        pcmpeqd mm1, mm1
+        psubd   mm1, mm7
+        psubd   mm1, mm7
+        psubd   mm1, mm0
+        pxor    mm5, mm0
+        paddw   mm5, mm1
+        paddusw mm5, mm2                    ; and add to weight_AB
+        psubw   mm5, mm1
+        pxor    mm5, mm0
+        cmp     edi, esi                    ; compare bptr and eptr to see if we're done
+        jb      term_minus_3_loop
+
+        pslld   mm5, 16                     ; sign-extend 16-bit weights back to dwords
+        psrad   mm5, 16
+        mov     eax, [ebp+8]                ; point to dpp
+        movq    [eax+8], mm5                ; put weight_AB back
+        emms
+        mov     edx, [edi-4]                ; dpp->samples_A [0] = bptr [-1];
+        mov     eax, [ebp+8]
+        mov     [eax+16], edx
+        mov     edx, [edi-8]                ; dpp->samples_B [0] = bptr [-2];
+        mov     [eax+48], edx
+
+done:   pop     eax                         ; pop delta & saved regs
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; This is the mono version of the above function. It does not use MMX and does not handle negative terms.
+;
+; void unpack_decorr_mono_pass_cont (struct decorr_pass *dpp,
+;                                    int32_t *buffer,
+;                                    int32_t sample_count,
+;                                    int32_t long_math;
+; arguments on entry:
+;
+;   struct decorr_pass *dpp     [ebp+8]
+;   int32_t *buffer             [ebp+12]
+;   int32_t sample_count        [ebp+16]
+;   int32_t long_math           [ebp+20]
+;
+; registers after entry:
+;
+;   rdi         bptr
+;   rsi         eptr
+;
+; on stack:
+;
+;   int16_t delta             DWORD [esp]
+;
+
+_unpack_decorr_mono_pass_cont_x86:
+        push    ebp
+        mov     ebp, esp
+        push    ebx
+        push    esi
+        push    edi
+        cld
+
+        mov     edx, [ebp+8]                ; copy delta from dpp to local stack
+        mov     eax, [edx+4]
+        push    eax
+
+        mov     edi, [ebp+12]               ; edi = buffer
+        mov     eax, [ebp+16]               ; get sample_count and multiply by 4
+        sal     eax, 2
+        jz      mono_done                   ; exit now if there's nothing to do
+        lea     esi, [edi+eax]              ; else add to buffer point to make eptr
+
+        mov     eax, [ebp+8]                ; get term from dpp and vector appropriately
+        mov     eax, [eax]
+        cmp     eax, 17
+        je      mono_17_entry
+        cmp     eax, 18
+        je      mono_18_entry
+
+;
+; registers during default term processing loop:
+;   edi         active buffer pointer
+;   esi         end of buffer pointer
+;   ecx         weight_A
+;   ebp         free
+;   ebx         term * -4
+;   eax,edx     scratch
+;
+
+default_mono_entry:
+        imul    ebx, eax, -4                ; set ebx to term * -4 for decorrelation index
+        mov     edx, [ebp+8]                ; edx = dpp*
+        mov     ecx, [edx+8]                ; ecx = weight
+        jmp     default_mono_loop
+
+;
+; registers during processing loop for terms 17 & 18:
+;   edi         active buffer pointer
+;   esi         end of buffer pointer
+;   ecx         weight_A
+;   ebp         previously calculated value
+;   ebx         calculated correlation sample
+;   eax,edx     scratch
+;
+
+mono_17_entry:
+        mov     edx, [ebp+8]                ; edx = dpp*
+        mov     ecx, [edx+8]                ; ecx = weight_A
+        mov     ebp, [edi-4]
+        jmp     mono_17_loop
+
+mono_18_entry:
+        mov     edx, [ebp+8]                ; edx = dpp*
+        mov     ecx, [edx+8]                ; ecx = weight_A
+        mov     ebp, [edi-4]
+        jmp     mono_18_loop
+
+        align  64
+default_mono_loop:
+        mov     eax, [edi+ebx]
+        imul    eax, ecx
+        mov     edx, [edi]
+        jo      long_default_mono_loop
+        sar     eax, 10
+        adc     eax, edx
+        mov     [edi], eax
+        mov     eax, [edi+ebx]
+        add     edi, 4
+        test    edx, edx
+        je      L100
+        test    eax, eax
+        je      L100
+        xor     eax, edx
+        cdq
+        xor     ecx, edx
+        add     ecx, [esp]
+        xor     ecx, edx
+L100:   cmp     edi, esi                    ; compare bptr and eptr to see if we're done
+        jb      default_mono_loop
+        jmp     default_mono_done
+
+        align  64
+long_default_mono_loop:
+        mov     eax, [edi+ebx]
+        imul    ecx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [edi]
+        add     eax, edx
+        mov     [edi], eax
+        mov     eax, [edi+ebx]
+        add     edi, 4
+        test    edx, edx
+        je      L101
+        test    eax, eax
+        je      L101
+        xor     eax, edx
+        cdq
+        xor     ecx, edx
+        add     ecx, [esp]
+        xor     ecx, edx
+L101:   cmp     edi, esi                    ; compare bptr and eptr to see if we're done
+        jb      long_default_mono_loop
+
+default_mono_done:
+        mov     edx, [ebp+8]                ; edx = dpp*
+        mov     [edx+8], ecx                ; store weight_A back
+        mov     ecx, [edx]                  ; ecx = dpp->term
+
+default_mono_store_samples:
+        dec     ecx
+        sub     edi, 4                      ; back up one full sample
+        mov     eax, [edi]
+        mov     [edx+ecx*4+16], eax         ; store samples_A [ecx]
+        test    ecx, ecx
+        jnz     default_mono_store_samples
+        jmp     mono_done
+
+        align  64
+mono_17_loop:
+        lea     ebx, [ebp+ebp]
+        sub     ebx, [edi-8]
+        mov     eax, ecx
+        imul    eax, ebx
+        mov     edx, [edi]
+        jo      long_mono_17_loop
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L117
+        test    edx, edx
+        je      L117
+        mov     eax, [esp]
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     eax, ebx
+        sub     eax, ebx
+        add     ecx, eax
+L117:   cmp     edi, esi                    ; compare bptr and eptr to see if we're done
+        jb      mono_17_loop
+        jmp     mono_1718_exit
+
+        align  64
+long_mono_17_loop:
+        lea     ebx, [ebp+ebp]
+        sub     ebx, [edi-8]
+        mov     eax, ecx
+        imul    ebx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [edi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L217
+        test    edx, edx
+        je      L217
+        mov     eax, [esp]
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     eax, ebx
+        sub     eax, ebx
+        add     ecx, eax
+L217:   cmp     edi, esi                    ; compare bptr and eptr to see if we're done
+        jb      long_mono_17_loop
+        jmp     mono_1718_exit
+
+        align  64
+mono_18_loop:
+        lea     ebx, [ebp+ebp*2]
+        sub     ebx, [edi-8]
+        sar     ebx, 1
+        mov     eax, ecx
+        imul    eax, ebx
+        mov     edx, [edi]
+        jo      long_mono_18_loop
+        sar     eax, 10
+        adc     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L118
+        test    edx, edx
+        je      L118
+        mov     eax, [esp]
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     eax, ebx
+        sub     eax, ebx
+        add     ecx, eax
+L118:   cmp     edi, esi                    ; compare bptr and eptr to see if we're done
+        jb      mono_18_loop
+        jmp     mono_1718_exit
+
+        align  64
+long_mono_18_loop:
+        lea     ebx, [ebp+ebp*2]
+        sub     ebx, [edi-8]
+        sar     ebx, 1
+        mov     eax, ecx
+        imul    ebx
+        shl     edx, 22
+        shr     eax, 10
+        adc     eax, edx
+        mov     edx, [edi]
+        add     eax, edx
+        stosd
+        test    ebx, ebx
+        mov     ebp, eax
+        je      L218
+        test    edx, edx
+        je      L218
+        mov     eax, [esp]
+        xor     ebx, edx
+        sar     ebx, 31
+        xor     eax, ebx
+        sub     eax, ebx
+        add     ecx, eax
+L218:   cmp     edi, esi                    ; compare bptr and eptr to see if we're done
+        jb      long_mono_18_loop
+
+mono_1718_exit:
+        lea     ebp, [esp+16]               ; restore ebp (we've pushed 4 DWORDS)
+        mov     edx, [ebp+8]                ; edx = dpp*
+        mov     [edx+8], ecx                ; store weight_A back
+        mov     eax, [edi-4]                ; dpp->samples_A [0] = bptr [-1];
+        mov     [edx+16], eax
+        mov     eax, [edi-8]                ; dpp->samples_A [1] = bptr [-2];
+        mov     [edx+20], eax
+
+mono_done:
+        pop     eax                         ; pop delta & saved regs
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+
+; Helper function to determine if specified CPU feature is available (used here for MMX).
+; Input parameter is index of feature to be checked (EDX from CPUID(1) only, MMX = 23).
+; Return value is the specified bit (0 or 1) or 0 if CPUID is not supported.
+
+_unpack_cpu_has_feature_x86:
+        pushfd                              ; save eflags
+        pushfd                              ; push another copy
+        xor     dword ptr [esp], 200000h    ; toggle ID bit on stack & pop it back into eflags
+        popfd
+        pushfd                              ; store possibly modified eflags
+        pop     eax                         ; and pop back into eax
+        xor     eax, [esp]                  ; compare to original pushed eflags
+        popfd                               ; restore original eflags
+        and     eax, 200000h                ; eax = 1 if eflags ID bit was changable
+        jz      oldcpu                      ; return zero if CPUID is not available (wow!)
+
+        push    ebx                         ; we must save ebx
+        mov     eax, 1                      ; do cpuid (1) to get features into edx
+        cpuid
+        mov     eax, edx                    ; copy into eax for shift
+        mov     cl, [esp+8]                 ; get parameter and shift that bit index into LSB
+        sar     eax, cl
+        and     eax, 1
+        pop     ebx                         ; restore ebx and return 0 or 1
+
+oldcpu: ret                                 ; return value in eax
+
+asmcode ends
+
+        end
+
--- a/third_party/wavpack/src/wavpack_local.h
+++ b/third_party/wavpack/src/wavpack_local.h
@ -1,7 +1,7 @@
 ////////////////////////////////////////////////////////////////////////////
 //                           **** WAVPACK ****                            //
 //                  Hybrid Lossless Wavefile Compressor                   //
-//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
 //                          All Rights Reserved.                          //
 //      Distributed under the BSD Software License (see license.txt)      //
 ////////////////////////////////////////////////////////////////////////////
@ -11,19 +11,17 @@
 #ifndef WAVPACK_LOCAL_H
 #define WAVPACK_LOCAL_H

-#ifndef __has_builtin
-#define __has_builtin(x) 0
-#endif
-
-#if defined(WIN32)
+#if defined(_WIN32)
+#define strdup(x) _strdup(x)
 #define FASTCALL __fastcall
 #else
 #define FASTCALL
 #endif

-#if defined(WIN32) || \
-    (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN))
-#define BITSTREAM_SHORTS    // use "shorts" for reading/writing bitstreams
+#if defined(_WIN32) || \
+    (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || \
+    (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+#define BITSTREAM_SHORTS    // use 16-bit "shorts" for reading/writing bitstreams (instead of chars)
                            //  (only works on little-endian machines)
 #endif

@ -31,7 +29,7 @@

 // This header file contains all the definitions required by WavPack.

-#if defined(_WIN32) && !defined(__MINGW32__)
+#if defined(_MSC_VER) && _MSC_VER < 1600
 #include <stdlib.h>
 typedef unsigned __int64 uint64_t;
 typedef unsigned __int32 uint32_t;
@ -41,14 +39,13 @@ typedef __int64 int64_t;
 typedef __int32 int32_t;
 typedef __int16 int16_t;
 typedef __int8  int8_t;
-typedef float float32_t;
 #else
-#include <inttypes.h>
+#include <stdint.h>
 #endif

 // Because the C99 specification states that "The order of allocation of
-// bit-ﬁelds within a unit (high-order to low-order or low-order to
-// high-order) is implementation-deﬁned" (6.7.2.1), I decided to change
+// bit-fields within a unit (high-order to low-order or low-order to
+// high-order) is implementation-defined" (6.7.2.1), I decided to change
 // the representation of floating-point values from a structure of
 // bit-fields to a 32-bit integer with access macros. Note that the WavPack
 // library doesn't use any floating-point math to implement compression of
@ -58,6 +55,7 @@ typedef float float32_t;
 typedef int32_t f32;

 #define get_mantissa(f)     ((f) & 0x7fffff)
+#define get_magnitude(f)    ((f) & 0x7fffffff)
 #define get_exponent(f)     (((f) >> 23) & 0xff)
 #define get_sign(f)         (((f) >> 31) & 0x1)

@ -92,7 +90,8 @@ typedef struct {
 #define APE_TAG_MAX_LENGTH      (1024 * 1024 * 16)

 typedef struct {
-    int32_t tag_file_pos, tag_begins_file;
+    int64_t tag_file_pos;
+    int tag_begins_file;
    ID3_Tag id3_tag;
    APE_Tag_Hdr ape_tag_hdr;
    unsigned char *ape_tag_data;
@ -115,12 +114,12 @@ typedef struct {
 #define ChunkHeaderFormat "4L"

 typedef struct {
-    unsigned short FormatTag, NumChannels;
+    uint16_t FormatTag, NumChannels;
    uint32_t SampleRate, BytesPerSecond;
-    unsigned short BlockAlign, BitsPerSample;
-    unsigned short cbSize, ValidBitsPerSample;
+    uint16_t BlockAlign, BitsPerSample;
+    uint16_t cbSize, ValidBitsPerSample;
    int32_t ChannelMask;
-    unsigned short SubFormat;
+    uint16_t SubFormat;
    char GUID [14];
 } WaveHeader;

@ -135,13 +134,43 @@ typedef struct {
 typedef struct {
    char ckID [4];
    uint32_t ckSize;
-    short version;
-    unsigned char track_no, index_no;
+    int16_t version;
+    unsigned char block_index_u8;
+    unsigned char total_samples_u8;
    uint32_t total_samples, block_index, block_samples, flags, crc;
 } WavpackHeader;

 #define WavpackHeaderFormat "4LS2LLLLL"

+// Macros to access the 40-bit block_index field
+
+#define GET_BLOCK_INDEX(hdr) ( (int64_t) (hdr).block_index + ((int64_t) (hdr).block_index_u8 << 32) )
+
+#define SET_BLOCK_INDEX(hdr,value) do { \
+    int64_t tmp = (value);              \
+    (hdr).block_index = (uint32_t) tmp; \
+    (hdr).block_index_u8 =              \
+        (unsigned char) (tmp >> 32);    \
+} while (0)
+
+// Macros to access the 40-bit total_samples field, which is complicated by the fact that
+// all 1's in the lower 32 bits indicates "unknown" (regardless of upper 8 bits)
+
+#define GET_TOTAL_SAMPLES(hdr) ( ((hdr).total_samples == (uint32_t) -1) ? -1 : \
+    (int64_t) (hdr).total_samples + ((int64_t) (hdr).total_samples_u8 << 32) - (hdr).total_samples_u8 )
+
+#define SET_TOTAL_SAMPLES(hdr,value) do {       \
+    int64_t tmp = (value);                      \
+    if (tmp < 0)                                \
+        (hdr).total_samples = (uint32_t) -1;    \
+    else {                                      \
+        tmp += (tmp / (int64_t) 0xffffffff);    \
+        (hdr).total_samples = (uint32_t) tmp;   \
+        (hdr).total_samples_u8 =                \
+            (unsigned char) (tmp >> 32);        \
+    }                                           \
+} while (0)
+
 // or-values for "flags"

 #define BYTES_STORED    3       // 1-4 bytes/sample
@ -169,17 +198,21 @@ typedef struct {
 #define SRATE_MASK      (0xfL << SRATE_LSB)

 #define FALSE_STEREO    0x40000000      // block is stereo, but data is mono
-
-#define IGNORED_FLAGS   0x18000000      // reserved, but ignore if encountered
 #define NEW_SHAPING     0x20000000      // use IIR filter for negative shaping
-#define UNKNOWN_FLAGS   0x80000000      // also reserved, but refuse decode if
-                                        //  encountered

 #define MONO_DATA (MONO_FLAG | FALSE_STEREO)

+// Introduced in WavPack 5.0:
+#define HAS_CHECKSUM    0x10000000      // block contains a trailing checksum
+#define DSD_FLAG        0x80000000      // block is encoded DSD (1-bit PCM)
+
+#define IGNORED_FLAGS   0x08000000      // reserved, but ignore if encountered
+#define UNKNOWN_FLAGS   0x00000000      // we no longer have any of these spares
+
 #define MIN_STREAM_VERS     0x402       // lowest stream version we'll decode
 #define MAX_STREAM_VERS     0x410       // highest stream version we'll decode or encode
-#define CUR_STREAM_VERS     0x407       // stream version we are [normally] writing now
+                                        // (only stream version to support mono optimization)
+#define CUR_STREAM_VERS     0x407       // universally compatible stream version


 //////////////////////////// WavPack Metadata /////////////////////////////////
@ -211,14 +244,20 @@ typedef struct {
 #define ID_WVC_BITSTREAM        0xb
 #define ID_WVX_BITSTREAM        0xc
 #define ID_CHANNEL_INFO         0xd
+#define ID_DSD_BLOCK            0xe

 #define ID_RIFF_HEADER          (ID_OPTIONAL_DATA | 0x1)
 #define ID_RIFF_TRAILER         (ID_OPTIONAL_DATA | 0x2)
-#define ID_REPLAY_GAIN          (ID_OPTIONAL_DATA | 0x3)
-#define ID_CUESHEET             (ID_OPTIONAL_DATA | 0x4)
+#define ID_ALT_HEADER           (ID_OPTIONAL_DATA | 0x3)
+#define ID_ALT_TRAILER          (ID_OPTIONAL_DATA | 0x4)
 #define ID_CONFIG_BLOCK         (ID_OPTIONAL_DATA | 0x5)
 #define ID_MD5_CHECKSUM         (ID_OPTIONAL_DATA | 0x6)
 #define ID_SAMPLE_RATE          (ID_OPTIONAL_DATA | 0x7)
+#define ID_ALT_EXTENSION        (ID_OPTIONAL_DATA | 0x8)
+#define ID_ALT_MD5_CHECKSUM     (ID_OPTIONAL_DATA | 0x9)
+#define ID_NEW_CONFIG_BLOCK     (ID_OPTIONAL_DATA | 0xa)
+#define ID_CHANNEL_IDENTITIES   (ID_OPTIONAL_DATA | 0xb)
+#define ID_BLOCK_CHECKSUM       (ID_OPTIONAL_DATA | 0xf)

 ///////////////////////// WavPack Configuration ///////////////////////////////

@ -255,6 +294,7 @@ typedef struct {
 #define CONFIG_CREATE_EXE       0x40000 // create executable
 #define CONFIG_CREATE_WVC       0x80000 // create correction file
 #define CONFIG_OPTIMIZE_WVC     0x100000 // maximize bybrid compression
+#define CONFIG_COMPATIBLE_WRITE 0x400000 // write files for decoders < 4.3
 #define CONFIG_CALC_NOISE       0x800000 // calc noise in hybrid mode
 #define CONFIG_LOSSY_MODE       0x1000000 // obsolete (for information)
 #define CONFIG_EXTRA_MODE       0x2000000 // extra processing mode
@ -264,6 +304,8 @@ typedef struct {
 #define CONFIG_PAIR_UNDEF_CHANS 0x20000000 // encode undefined channels in stereo pairs
 #define CONFIG_OPTIMIZE_MONO    0x80000000 // optimize for mono streams posing as stereo

+#define QMODE_DSD_AUDIO         0x30    // if either of these is set in qmode (version 5.0)
+
 /*
 * These config flags were never actually used, or are no longer used, or are
 * used for something else now. They may be used in the future for what they
@ -305,7 +347,7 @@ typedef struct {

 typedef struct bs {
 #ifdef BITSTREAM_SHORTS
-    unsigned short *buf, *end, *ptr;
+    uint16_t *buf, *end, *ptr;
 #else
    unsigned char *buf, *end, *ptr;
 #endif
@ -320,8 +362,10 @@ typedef struct bs {
 #define MAX_NTERMS 16
 #define MAX_TERM 8

+// Note that this structure is directly accessed in assembly files, so modify with care
+
 struct decorr_pass {
-    int term, delta, weight_A, weight_B;
+    int32_t term, delta, weight_A, weight_B;
    int32_t samples_A [MAX_TERM], samples_B [MAX_TERM];
    int32_t aweight_A, aweight_B;
    int32_t sum_A, sum_B;
@ -342,6 +386,10 @@ struct words_data {
    struct entropy_data c [2];
 };

+typedef struct {
+    int32_t value, filter0, filter1, filter2, filter3, filter4, filter5, filter6, factor, byte;
+} DSDfilters;
+
 typedef struct {
    WavpackHeader wphdr;
    struct words_data w;
@ -350,9 +398,10 @@ typedef struct {
    unsigned char *block2buff, *block2end;
    int32_t *sample_buffer;

+    int64_t sample_index;
    int bits, num_terms, mute_error, joint_stereo, false_stereo, shift;
    int num_decorrs, num_passes, best_decorr, mask_decorr;
-    uint32_t sample_index, crc, crc_x, crc_wvx;
+    uint32_t crc, crc_x, crc_wvx;
    Bitstream wvbits, wvcbits, wvxbits;
    int init_done, wvc_skip;
    float delta_decay;
@ -363,12 +412,22 @@ typedef struct {
    struct {
        int32_t shaping_acc [2], shaping_delta [2], error [2];
        double noise_sum, noise_ave, noise_max;
-        short *shaping_data, *shaping_array;
+        int16_t *shaping_data, *shaping_array;
        int32_t shaping_samples;
    } dc;

    struct decorr_pass decorr_passes [MAX_NTERMS], analysis_pass;
    const WavpackDecorrSpec *decorr_specs;
+
+    struct {
+        unsigned char *byteptr, *endptr, (*probabilities) [256], **value_lookup, mode, ready;
+        int history_bins, p0, p1;
+        int16_t (*summed_probabilities) [256];
+        uint32_t low, high, value;
+        DSDfilters filters [2];
+        int32_t *ptable;
+    } dsd;
+
 } WavpackStream;

 // flags for float_flags:
@ -399,6 +458,22 @@ typedef struct {
    int32_t (*write_bytes)(void *id, void *data, int32_t bcount);
 } WavpackStreamReader;

+// Extended version of structure for handling large files and added
+// functionality for truncating and closing files
+
+typedef struct {
+    int32_t (*read_bytes)(void *id, void *data, int32_t bcount);
+    int32_t (*write_bytes)(void *id, void *data, int32_t bcount);
+    int64_t (*get_pos)(void *id);                               // new signature for large files
+    int (*set_pos_abs)(void *id, int64_t pos);                  // new signature for large files
+    int (*set_pos_rel)(void *id, int64_t delta, int mode);      // new signature for large files
+    int (*push_back_byte)(void *id, int c);
+    int64_t (*get_length)(void *id);                            // new signature for large files
+    int (*can_seek)(void *id);
+    int (*truncate_here)(void *id);                             // new function to truncate file at current position
+    int (*close)(void *id);                                     // new function to close file
+} WavpackStreamReader64;
+
 typedef int (*WavpackBlockOutput)(void *id, void *data, int32_t bcount);

 typedef struct {
@ -414,12 +489,13 @@ typedef struct {
    WavpackBlockOutput blockout;
    void *wv_out, *wvc_out;

-    WavpackStreamReader *reader;
+    WavpackStreamReader64 *reader;
    void *wv_in, *wvc_in;

-    uint32_t filelen, file2len, filepos, file2pos, total_samples, crc_errors, first_flags;
-    int wvc_flag, open_flags, norm_offset, reduced_channels, lossy_blocks, close_files;
-    uint32_t block_samples, ave_block_samples, block_boundary, max_samples, acc_samples, initial_index, riff_trailer_bytes;
+    int64_t filelen, file2len, filepos, file2pos, total_samples, initial_index;
+    uint32_t crc_errors, first_flags;
+    int wvc_flag, open_flags, norm_offset, reduced_channels, lossy_blocks, version_five;
+    uint32_t block_samples, ave_block_samples, block_boundary, max_samples, acc_samples, riff_trailer_bytes;
    int riff_header_added, riff_header_created;
    M_Tag m_tag;

@ -427,6 +503,13 @@ typedef struct {
    WavpackStream **streams;
    void *stream3;

+    // these items were added in 5.0 to support alternate file types (especially CAF & DSD)
+    unsigned char file_format, *channel_reordering, *channel_identities;
+    uint32_t channel_layout, dsd_multiplier;
+    void *decimation_context;
+    char file_extension [8];
+
+    void (*close_callback)(void *wpc);
    char error_message [80];
 } WavpackContext;

@ -434,6 +517,11 @@ typedef struct {

 #define CLEAR(destin) memset (&destin, 0, sizeof (destin));

+//////////////////////////////// decorrelation //////////////////////////////
+// modules: pack.c, unpack.c, unpack_floats.c, extra1.c, extra2.c
+
+// #define SKIP_DECORRELATION   // experimental switch to disable all decorrelation on encode
+
 // These macros implement the weight application and update operations
 // that are at the heart of the decorrelation loops. Note that there are
 // sometimes two and even three versions of each macro. Theses should be
@ -449,15 +537,17 @@ typedef struct {
 #if 1   // PERFCOND - apply decorrelation weight when 32-bit overflow is possible
 #define apply_weight_f(weight, sample) (((((sample & 0xffff) * weight) >> 9) + \
    (((sample & ~0xffff) >> 9) * weight) + 1) >> 1)
+#elif 1
+#define apply_weight_f(weight, sample) ((int32_t)((weight * (int64_t) sample + 512) >> 10))
 #else
 #define apply_weight_f(weight, sample) ((int32_t)floor(((double) weight * sample + 512.0) / 1024.0))
 #endif

-#if 1   // PERFCOND - universal version that checks input magnitude (or simply uses 64-bit ints)
-#define apply_weight(weight, sample) (sample != (short) sample ? \
+#if 1   // PERFCOND - universal version that checks input magnitude or always uses long version
+#define apply_weight(weight, sample) (sample != (int16_t) sample ? \
    apply_weight_f (weight, sample) : apply_weight_i (weight, sample))
 #else
-#define apply_weight(weight, sample) ((int32_t)((weight * (int64_t) sample + 512) >> 10))
+#define apply_weight(weight, sample) (apply_weight_f (weight, sample))
 #endif

 #if 1   // PERFCOND
@ -471,9 +561,6 @@ typedef struct {
    if (source && result) (source ^ result) < 0 ? (weight -= delta) : (weight += delta);
 #endif

-#define update_weight_d2(weight, delta, source, result) \
-    if (source && result) weight -= (((source ^ result) >> 29) & 4) - 2;
-
 #define update_weight_clip(weight, delta, source, result) \
    if (source && result) { \
        const int32_t s = (source ^ result) >> 31; \
@ -481,29 +568,59 @@ typedef struct {
        weight = (weight ^ s) - s; \
    }

-#define update_weight_clip_d2(weight, delta, source, result) \
-    if (source && result) { \
-        const int32_t s = (source ^ result) >> 31; \
-        if ((weight = (weight ^ s) + (2 - s)) > 1024) weight = 1024; \
-        weight = (weight ^ s) - s; \
-    }
+void pack_init (WavpackContext *wpc);
+int pack_block (WavpackContext *wpc, int32_t *buffer);
+void send_general_metadata (WavpackContext *wpc);
+void free_metadata (WavpackMetadata *wpmd);
+int copy_metadata (WavpackMetadata *wpmd, unsigned char *buffer_start, unsigned char *buffer_end);
+double WavpackGetEncodedNoise (WavpackContext *wpc, double *peak);
+int unpack_init (WavpackContext *wpc);
+int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd);
+int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd);
+int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd);
+int read_shaping_info (WavpackStream *wps, WavpackMetadata *wpmd);
+int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count);
+int check_crc_error (WavpackContext *wpc);
+int scan_float_data (WavpackStream *wps, f32 *values, int32_t num_values);
+void send_float_data (WavpackStream *wps, f32 *values, int32_t num_values);
+void float_values (WavpackStream *wps, int32_t *values, int32_t num_values);
+void dynamic_noise_shaping (WavpackContext *wpc, int32_t *buffer, int shortening_allowed);
+void execute_stereo (WavpackContext *wpc, int32_t *samples, int no_history, int do_samples);
+void execute_mono (WavpackContext *wpc, int32_t *samples, int no_history, int do_samples);

-// bits.c
+////////////////////////// DSD related (including decimation) //////////////////////////
+// modules: pack_dsd.c unpack_dsd.c

-void bs_open_read (Bitstream *bs, void *buffer_start, void *buffer_end);
-void bs_open_write (Bitstream *bs, void *buffer_start, void *buffer_end);
-uint32_t bs_close_read (Bitstream *bs);
-uint32_t bs_close_write (Bitstream *bs);
+void pack_dsd_init (WavpackContext *wpc);
+int pack_dsd_block (WavpackContext *wpc, int32_t *buffer);
+int init_dsd_block (WavpackContext *wpc, WavpackMetadata *wpmd);
+int32_t unpack_dsd_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count);

-int DoReadFile (FILE *hFile, void *lpBuffer, uint32_t nNumberOfBytesToRead, uint32_t *lpNumberOfBytesRead);
-int DoWriteFile (FILE *hFile, void *lpBuffer, uint32_t nNumberOfBytesToWrite, uint32_t *lpNumberOfBytesWritten);
-uint32_t DoGetFileSize (FILE *hFile), DoGetFilePosition (FILE *hFile);
-int DoSetFilePositionRelative (FILE *hFile, int32_t pos, int mode);
-int DoSetFilePositionAbsolute (FILE *hFile, uint32_t pos);
-int DoUngetc (int c, FILE *hFile), DoDeleteFile (char *filename);
-int DoCloseHandle (FILE *hFile), DoTruncateFile (FILE *hFile);
+void *decimate_dsd_init (int num_channels);
+void decimate_dsd_reset (void *decimate_context);
+void decimate_dsd_run (void *decimate_context, int32_t *samples, int num_samples);
+void decimate_dsd_destroy (void *decimate_context);
+
+///////////////////////////////// CPU feature detection ////////////////////////////////
+
+int unpack_cpu_has_feature_x86 (int findex), pack_cpu_has_feature_x86 (int findex);
+
+#define CPU_FEATURE_MMX     23
+
+///////////////////////////// pre-4.0 version decoding ////////////////////////////
+// modules: unpack3.c, unpack3_open.c, unpack3_seek.c
+
+WavpackContext *open_file3 (WavpackContext *wpc, char *error);
+int32_t unpack_samples3 (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count);
+int seek_sample3 (WavpackContext *wpc, uint32_t desired_index);
+uint32_t get_sample_index3 (WavpackContext *wpc);
+void free_stream3 (WavpackContext *wpc);
+int get_version3 (WavpackContext *wpc);
+
+////////////////////////////// bitstream macros & functions /////////////////////////////

 #define bs_is_open(bs) ((bs)->ptr != NULL)
+uint32_t bs_close_read (Bitstream *bs);

 #define getbit(bs) ( \
    (((bs)->bc) ? \
@ -564,56 +681,51 @@ int DoCloseHandle (FILE *hFile), DoTruncateFile (FILE *hFile);
        } while ((bs)->bc >= sizeof (*((bs)->ptr)) * 8); \
 } while (0)

-void little_endian_to_native (void *data, char *format);
-void native_to_little_endian (void *data, char *format);
+///////////////////////////// entropy encoder / decoder ////////////////////////////
+// modules: entropy_utils.c, read_words.c, write_words.c

-// pack.c
+// these control the time constant "slow_level" which is used for hybrid mode
+// that controls bitrate as a function of residual level (HYBRID_BITRATE).
+#define SLS 8
+#define SLO ((1 << (SLS - 1)))

-void pack_init (WavpackContext *wpc);
-int pack_block (WavpackContext *wpc, int32_t *buffer);
-double WavpackGetEncodedNoise (WavpackContext *wpc, double *peak);
+#define LIMIT_ONES 16   // maximum consecutive 1s sent for "div" data

-// unpack.c
+// these control the time constant of the 3 median level breakpoints
+#define DIV0 128        // 5/7 of samples
+#define DIV1 64         // 10/49 of samples
+#define DIV2 32         // 20/343 of samples

-int unpack_init (WavpackContext *wpc);
-int init_wv_bitstream (WavpackStream *wps, WavpackMetadata *wpmd);
-int init_wvc_bitstream (WavpackStream *wps, WavpackMetadata *wpmd);
-int init_wvx_bitstream (WavpackStream *wps, WavpackMetadata *wpmd);
-int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd);
-int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd);
-int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd);
-int read_shaping_info (WavpackStream *wps, WavpackMetadata *wpmd);
-int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd);
-int read_int32_info (WavpackStream *wps, WavpackMetadata *wpmd);
-int read_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd);
-int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd);
-int read_sample_rate (WavpackContext *wpc, WavpackMetadata *wpmd);
-int read_wrapper_data (WavpackContext *wpc, WavpackMetadata *wpmd);
-int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count);
-int check_crc_error (WavpackContext *wpc);
+// this macro retrieves the specified median breakpoint (without frac; min = 1)
+#define GET_MED(med) (((c->median [med]) >> 4) + 1)

-// unpack3.c
+// These macros update the specified median breakpoints. Note that the median
+// is incremented when the sample is higher than the median, else decremented.
+// They are designed so that the median will never drop below 1 and the value
+// is essentially stationary if there are 2 increments for every 5 decrements.

-WavpackContext *open_file3 (WavpackContext *wpc, char *error);
-int32_t unpack_samples3 (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count);
-int seek_sample3 (WavpackContext *wpc, uint32_t desired_index);
-uint32_t get_sample_index3 (WavpackContext *wpc);
-void free_stream3 (WavpackContext *wpc);
-int get_version3 (WavpackContext *wpc);
+#define INC_MED0() (c->median [0] += ((c->median [0] + DIV0) / DIV0) * 5)
+#define DEC_MED0() (c->median [0] -= ((c->median [0] + (DIV0-2)) / DIV0) * 2)
+#define INC_MED1() (c->median [1] += ((c->median [1] + DIV1) / DIV1) * 5)
+#define DEC_MED1() (c->median [1] -= ((c->median [1] + (DIV1-2)) / DIV1) * 2)
+#define INC_MED2() (c->median [2] += ((c->median [2] + DIV2) / DIV2) * 5)
+#define DEC_MED2() (c->median [2] -= ((c->median [2] + (DIV2-2)) / DIV2) * 2)

-// metadata.c stuff
-
-int read_metadata_buff (WavpackMetadata *wpmd, unsigned char *blockbuff, unsigned char **buffptr);
-int write_metadata_block (WavpackContext *wpc);
-int copy_metadata (WavpackMetadata *wpmd, unsigned char *buffer_start, unsigned char *buffer_end);
-int add_to_metadata (WavpackContext *wpc, void *data, uint32_t bcount, unsigned char id);
-int process_metadata (WavpackContext *wpc, WavpackMetadata *wpmd);
-void free_metadata (WavpackMetadata *wpmd);
-
-// words.c stuff
+#ifdef HAVE___BUILTIN_CLZ
+#define count_bits(av) ((av) ? 32 - __builtin_clz (av) : 0)
+#elif defined (_WIN64)
+static __inline int count_bits (uint32_t av) { unsigned long res; return _BitScanReverse (&res, av) ? (int)(res + 1) : 0; }
+#else
+#define count_bits(av) ( \
+ (av) < (1 << 8) ? nbits_table [av] : \
+  ( \
+   (av) < (1L << 16) ? nbits_table [(av) >> 8] + 8 : \
+   ((av) < (1L << 24) ? nbits_table [(av) >> 16] + 16 : nbits_table [(av) >> 24] + 24) \
+  ) \
+)
+#endif

 void init_words (WavpackStream *wps);
-void word_set_bitrate (WavpackStream *wps);
 void write_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd);
 void write_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd);
 int read_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd);
@ -625,34 +737,39 @@ int32_t get_words_lossless (WavpackStream *wps, int32_t *buffer, int32_t nsample
 void flush_word (WavpackStream *wps);
 int32_t nosend_word (WavpackStream *wps, int32_t value, int chan);
 void scan_word (WavpackStream *wps, int32_t *samples, uint32_t num_samples, int dir);
+void update_error_limit (WavpackStream *wps);

-int log2s (int32_t value);
-int32_t exp2s (int log);
-uint32_t log2buffer (int32_t *samples, uint32_t num_samples, int limit);
+extern const uint32_t bitset [32];
+extern const uint32_t bitmask [32];
+extern const char nbits_table [256];
+
+int wp_log2s (int32_t value);
+int32_t wp_exp2s (int log);
+int FASTCALL wp_log2 (uint32_t avalue);
+
+#ifdef OPT_ASM_X86
+#define LOG2BUFFER log2buffer_x86
+#elif defined(OPT_ASM_X64) && (defined (_WIN64) || defined(__CYGWIN__) || defined(__MINGW64__))
+#define LOG2BUFFER log2buffer_x64win
+#elif defined(OPT_ASM_X64)
+#define LOG2BUFFER log2buffer_x64
+#else
+#define LOG2BUFFER log2buffer
+#endif
+
+uint32_t LOG2BUFFER (int32_t *samples, uint32_t num_samples, int limit);

 signed char store_weight (int weight);
 int restore_weight (signed char weight);

 #define WORD_EOF ((int32_t)(1L << 31))

-// float.c
-
-void write_float_info (WavpackStream *wps, WavpackMetadata *wpmd);
-int scan_float_data (WavpackStream *wps, f32 *values, int32_t num_values);
-void send_float_data (WavpackStream *wps, f32 *values, int32_t num_values);
-int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd);
-void float_values (WavpackStream *wps, int32_t *values, int32_t num_values);
 void WavpackFloatNormalize (int32_t *values, int32_t num_values, int delta_exp);

-// extra?.c
-
-// void analyze_stereo (WavpackContext *wpc, int32_t *samples);
-// void analyze_mono (WavpackContext *wpc, int32_t *samples);
-void execute_stereo (WavpackContext *wpc, int32_t *samples, int no_history, int do_samples);
-void execute_mono (WavpackContext *wpc, int32_t *samples, int no_history, int do_samples);
-
-// wputils.c
+/////////////////////////// high-level unpacking API and support ////////////////////////////
+// modules: open_utils.c, unpack_utils.c, unpack_seek.c, unpack_floats.c

+WavpackContext *WavpackOpenFileInputEx64 (WavpackStreamReader64 *reader, void *wv_id, void *wvc_id, char *error, int flags, int norm_offset);
 WavpackContext *WavpackOpenFileInputEx (WavpackStreamReader *reader, void *wv_id, void *wvc_id, char *error, int flags, int norm_offset);
 WavpackContext *WavpackOpenFileInput (const char *infilename, char *error, int flags, int norm_offset);

@ -664,6 +781,16 @@ WavpackContext *WavpackOpenFileInput (const char *infilename, char *error, int f
 #define OPEN_STREAMING  0x20    // "streaming" mode blindly unpacks blocks
                                // w/o regard to header file position info
 #define OPEN_EDIT_TAGS  0x40    // allow editing of tags
+#define OPEN_FILE_UTF8  0x80    // assume filenames are UTF-8 encoded, not ANSI (Windows only)
+
+// new for version 5
+
+#define OPEN_DSD_NATIVE 0x100   // open DSD files as bitstreams
+                                // (returned as 8-bit "samples" stored in 32-bit words)
+#define OPEN_DSD_AS_PCM 0x200   // open DSD files as 24-bit PCM (decimated 8x)
+#define OPEN_ALT_TYPES  0x400   // application is aware of alternate file types & qmode
+                                // (just affects retrieving wrappers & MD5 checksums)
+#define OPEN_NO_CHECKSUM 0x800  // don't verify block checksums before decoding

 int WavpackGetMode (WavpackContext *wpc);

@ -682,15 +809,38 @@ int WavpackGetMode (WavpackContext *wpc);
 #define MODE_XMODE      0x7000  // mask for extra level (1-6, 0=unknown)
 #define MODE_DNS        0x8000

-char *WavpackGetErrorMessage (WavpackContext *wpc);
+int WavpackGetQualifyMode (WavpackContext *wpc);
 int WavpackGetVersion (WavpackContext *wpc);
 uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples);
-uint32_t WavpackGetNumSamples (WavpackContext *wpc);
-uint32_t WavpackGetSampleIndex (WavpackContext *wpc);
-int WavpackGetNumErrors (WavpackContext *wpc);
-int WavpackLossyBlocks (WavpackContext *wpc);
 int WavpackSeekSample (WavpackContext *wpc, uint32_t sample);
-WavpackContext *WavpackCloseFile (WavpackContext *wpc);
+int WavpackSeekSample64 (WavpackContext *wpc, int64_t sample);
+int WavpackGetMD5Sum (WavpackContext *wpc, unsigned char data [16]);
+
+int WavpackVerifySingleBlock (unsigned char *buffer, int verify_checksum);
+uint32_t read_next_header (WavpackStreamReader64 *reader, void *id, WavpackHeader *wphdr);
+int read_wvc_block (WavpackContext *wpc);
+
+/////////////////////////// high-level packing API and support ////////////////////////////
+// modules: pack_utils.c, pack_floats.c
+
+WavpackContext *WavpackOpenFileOutput (WavpackBlockOutput blockout, void *wv_id, void *wvc_id);
+int WavpackSetConfiguration (WavpackContext *wpc, WavpackConfig *config, uint32_t total_samples);
+int WavpackSetConfiguration64 (WavpackContext *wpc, WavpackConfig *config, int64_t total_samples, const unsigned char *chan_ids);
+int WavpackPackInit (WavpackContext *wpc);
+int WavpackAddWrapper (WavpackContext *wpc, void *data, uint32_t bcount);
+int WavpackPackSamples (WavpackContext *wpc, int32_t *sample_buffer, uint32_t sample_count);
+int WavpackFlushSamples (WavpackContext *wpc);
+int WavpackStoreMD5Sum (WavpackContext *wpc, unsigned char data [16]);
+void WavpackSeekTrailingWrapper (WavpackContext *wpc);
+void WavpackUpdateNumSamples (WavpackContext *wpc, void *first_block);
+void *WavpackGetWrapperLocation (void *first_block, uint32_t *size);
+
+/////////////////////////////////// common utilities ////////////////////////////////////
+// module: common_utils.c
+
+extern const uint32_t sample_rates [16];
+uint32_t WavpackGetLibraryVersion (void);
+const char *WavpackGetLibraryVersionString (void);
 uint32_t WavpackGetSampleRate (WavpackContext *wpc);
 int WavpackGetBitsPerSample (WavpackContext *wpc);
 int WavpackGetBytesPerSample (WavpackContext *wpc);
@ -698,34 +848,33 @@ int WavpackGetNumChannels (WavpackContext *wpc);
 int WavpackGetChannelMask (WavpackContext *wpc);
 int WavpackGetReducedChannels (WavpackContext *wpc);
 int WavpackGetFloatNormExp (WavpackContext *wpc);
-int WavpackGetMD5Sum (WavpackContext *wpc, unsigned char data [16]);
+uint32_t WavpackGetNumSamples (WavpackContext *wpc);
+int64_t WavpackGetNumSamples64 (WavpackContext *wpc);
+uint32_t WavpackGetSampleIndex (WavpackContext *wpc);
+int64_t WavpackGetSampleIndex64 (WavpackContext *wpc);
+char *WavpackGetErrorMessage (WavpackContext *wpc);
+int WavpackGetNumErrors (WavpackContext *wpc);
+int WavpackLossyBlocks (WavpackContext *wpc);
 uint32_t WavpackGetWrapperBytes (WavpackContext *wpc);
 unsigned char *WavpackGetWrapperData (WavpackContext *wpc);
 void WavpackFreeWrapper (WavpackContext *wpc);
-void WavpackSeekTrailingWrapper (WavpackContext *wpc);
 double WavpackGetProgress (WavpackContext *wpc);
 uint32_t WavpackGetFileSize (WavpackContext *wpc);
+int64_t WavpackGetFileSize64 (WavpackContext *wpc);
 double WavpackGetRatio (WavpackContext *wpc);
 double WavpackGetAverageBitrate (WavpackContext *wpc, int count_wvc);
 double WavpackGetInstantBitrate (WavpackContext *wpc);
-
-WavpackContext *WavpackOpenFileOutput (WavpackBlockOutput blockout, void *wv_id, void *wvc_id);
-int WavpackSetConfiguration (WavpackContext *wpc, WavpackConfig *config, uint32_t total_samples);
-int WavpackAddWrapper (WavpackContext *wpc, void *data, uint32_t bcount);
-int WavpackStoreMD5Sum (WavpackContext *wpc, unsigned char data [16]);
-int WavpackPackInit (WavpackContext *wpc);
-int WavpackPackSamples (WavpackContext *wpc, int32_t *sample_buffer, uint32_t sample_count);
-int WavpackFlushSamples (WavpackContext *wpc);
-void WavpackUpdateNumSamples (WavpackContext *wpc, void *first_block);
-void *WavpackGetWrapperLocation (void *first_block, uint32_t *size);
-
+WavpackContext *WavpackCloseFile (WavpackContext *wpc);
 void WavpackLittleEndianToNative (void *data, char *format);
 void WavpackNativeToLittleEndian (void *data, char *format);
+void WavpackBigEndianToNative (void *data, char *format);
+void WavpackNativeToBigEndian (void *data, char *format);

-uint32_t WavpackGetLibraryVersion (void);
-const char *WavpackGetLibraryVersionString (void);
+void install_close_callback (WavpackContext *wpc, void cb_func (void *wpc));
+void free_streams (WavpackContext *wpc);

-// tags.c
+/////////////////////////////////// tag utilities ////////////////////////////////////
+// modules: tags.c, tag_utils.c

 int WavpackGetNumTagItems (WavpackContext *wpc);
 int WavpackGetTagItem (WavpackContext *wpc, const char *item, char *value, int size);
@ -742,58 +891,5 @@ void free_tag (M_Tag *m_tag);
 int valid_tag (M_Tag *m_tag);
 int editable_tag (M_Tag *m_tag);

-///////////////////////////// SIMD helper macros /////////////////////////////
-
-#ifdef OPT_MMX
-
-#if defined (__GNUC__) && !defined (__INTEL_COMPILER)
-//directly map to gcc's native builtins for faster code
-
-#if __GNUC__ < 4
-typedef int __di __attribute__ ((__mode__ (__DI__)));
-typedef int __m64 __attribute__ ((__mode__ (__V2SI__)));
-typedef int __v4hi __attribute__ ((__mode__ (__V4HI__)));
-#define _m_paddsw(m1, m2) (__m64) __builtin_ia32_paddsw ((__v4hi) m1, (__v4hi) m2)
-#define _m_pand(m1, m2) (__m64) __builtin_ia32_pand ((__di) m1, (__di) m2)
-#define _m_pandn(m1, m2) (__m64) __builtin_ia32_pandn ((__di) m1, (__di) m2)
-#define _m_pmaddwd(m1, m2) __builtin_ia32_pmaddwd ((__v4hi) m1, (__v4hi) m2)
-#define _m_por(m1, m2) (__m64) __builtin_ia32_por ((__di) m1, (__di) m2)
-#define _m_pxor(m1, m2) (__m64) __builtin_ia32_pxor ((__di) m1, (__di) m2)
-#else
-typedef int __m64 __attribute__ ((__vector_size__ (8)));
-typedef short __m64_16 __attribute__ ((__vector_size__ (8)));
-#define _m_paddsw(m1, m2) (__m64) __builtin_ia32_paddsw ((__m64_16) m1, (__m64_16) m2)
-#define _m_pand(m1, m2) __builtin_ia32_pand (m1, m2)
-#define _m_pandn(m1, m2) __builtin_ia32_pandn (m1, m2)
-#define _m_pmaddwd(m1, m2) __builtin_ia32_pmaddwd ((__m64_16) m1, (__m64_16) m2)
-#define _m_por(m1, m2) __builtin_ia32_por (m1, m2)
-#define _m_pxor(m1, m2) __builtin_ia32_pxor (m1, m2)
 #endif

-#define _m_paddd(m1, m2) __builtin_ia32_paddd (m1, m2)
-#define _m_pcmpeqd(m1, m2) __builtin_ia32_pcmpeqd (m1, m2)
-
-#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) || __GNUC__ > 4 || __has_builtin(__builtin_ia32_pslldi)
-#	define _m_pslldi(m1, m2) __builtin_ia32_pslldi ((__m64)m1, m2)
-#	define _m_psradi(m1, m2) __builtin_ia32_psradi ((__m64)m1, m2)
-#	define _m_psrldi(m1, m2) __builtin_ia32_psrldi ((__m64)m1, m2)
-#else
-#	define _m_pslldi(m1, m2) __builtin_ia32_pslld (m1, m2)
-#	define _m_psradi(m1, m2) __builtin_ia32_psrad (m1, m2)
-#	define _m_psrldi(m1, m2) __builtin_ia32_psrld (m1, m2)
-#endif
-
-#define _m_psubd(m1, m2) __builtin_ia32_psubd (m1, m2)
-#define _m_punpckhdq(m1, m2) __builtin_ia32_punpckhdq (m1, m2)
-#define _m_punpckldq(m1, m2) __builtin_ia32_punpckldq (m1, m2)
-#define _mm_empty() __builtin_ia32_emms ()
-#define _mm_set_pi32(m1, m2) { m2, m1 }
-#define _mm_set1_pi32(m) { m, m }
-
-#else
-#include <mmintrin.h>
-#endif
-
-#endif //OPT_MMX
-
-#endif
--- a/third_party/wavpack/src/wavpack_version.h
+++ b/third_party/wavpack/src/wavpack_version.h
@ -11,9 +11,9 @@
 #ifndef WAVPACK_VERSION_H
 #define WAVPACK_VERSION_H

-#define LIBWAVPACK_MAJOR 4
-#define LIBWAVPACK_MINOR 70
+#define LIBWAVPACK_MAJOR 5
+#define LIBWAVPACK_MINOR 1
 #define LIBWAVPACK_MICRO 0
-#define LIBWAVPACK_VERSION_STRING "4.70.0"
+#define LIBWAVPACK_VERSION_STRING "5.1.0"

 #endif
--- a/third_party/wavpack/src/words.c
+++ b/third_party/wavpack/src/words.c
--- a/third_party/wavpack/src/wputils.c
+++ b/third_party/wavpack/src/wputils.c
--- a/third_party/wavpack/src/write_words.c
+++ b/third_party/wavpack/src/write_words.c
@ -0,0 +1,688 @@
+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2013 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// write_words.c
+
+// This module provides entropy word encoding functions using
+// a variation on the Rice method.  This was introduced in version 3.93
+// because it allows splitting the data into a "lossy" stream and a
+// "correction" stream in a very efficient manner and is therefore ideal
+// for the "hybrid" mode.  For 4.0, the efficiency of this method was
+// significantly improved by moving away from the normal Rice restriction of
+// using powers of two for the modulus divisions and now the method can be
+// used for both hybrid and pure lossless encoding.
+
+// Samples are divided by median probabilities at 5/7 (71.43%), 10/49 (20.41%),
+// and 20/343 (5.83%). Each zone has 3.5 times fewer samples than the
+// previous. Using standard Rice coding on this data would result in 1.4
+// bits per sample average (not counting sign bit). However, there is a
+// very simple encoding that is over 99% efficient with this data and
+// results in about 1.22 bits per sample.
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "wavpack_local.h"
+
+///////////////////////////// executable code ////////////////////////////////
+
+// Initialize entropy encoder for the specified stream. In lossless mode there
+// are no parameters to select; in hybrid mode the bitrate mode and value need
+// be initialized.
+
+static void word_set_bitrate (WavpackStream *wps);
+
+void init_words (WavpackStream *wps)
+{
+    CLEAR (wps->w);
+
+    if (wps->wphdr.flags & HYBRID_FLAG)
+        word_set_bitrate (wps);
+}
+
+// Set up parameters for hybrid mode based on header flags and "bits" field.
+// This is currently only set up for the HYBRID_BITRATE mode in which the
+// allowed error varies with the residual level (from "slow_level"). The
+// simpler mode (which is not used yet) has the error level directly
+// controlled from the metadata.
+
+static void word_set_bitrate (WavpackStream *wps)
+{
+    int bitrate_0, bitrate_1;
+
+    if (wps->wphdr.flags & HYBRID_BITRATE) {
+        if (wps->wphdr.flags & FALSE_STEREO)
+            bitrate_0 = (wps->bits * 2 - 512) < 568 ? 0 : (wps->bits * 2 - 512) - 568;
+        else
+            bitrate_0 = wps->bits < 568 ? 0 : wps->bits - 568;
+
+        if (!(wps->wphdr.flags & MONO_DATA)) {
+
+            if (wps->wphdr.flags & HYBRID_BALANCE)
+                bitrate_1 = (wps->wphdr.flags & JOINT_STEREO) ? 256 : 0;
+            else {
+                bitrate_1 = bitrate_0;
+
+                if (wps->wphdr.flags & JOINT_STEREO) {
+                    if (bitrate_0 < 128) {
+                        bitrate_1 += bitrate_0;
+                        bitrate_0 = 0;
+                    }
+                    else {
+                        bitrate_0 -= 128;
+                        bitrate_1 += 128;
+                    }
+                }
+            }
+        }
+        else
+            bitrate_1 = 0;
+    }
+    else
+        bitrate_0 = bitrate_1 = 0;
+
+    wps->w.bitrate_acc [0] = (int32_t) bitrate_0 << 16;
+    wps->w.bitrate_acc [1] = (int32_t) bitrate_1 << 16;
+}
+
+// Allocates the correct space in the metadata structure and writes the
+// current median values to it. Values are converted from 32-bit unsigned
+// to our internal 16-bit wp_log2 values, and read_entropy_vars () is called
+// to read the values back because we must compensate for the loss through
+// the log function.
+
+void write_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    unsigned char *byteptr;
+    int temp;
+
+    byteptr = wpmd->data = malloc (12);
+    wpmd->id = ID_ENTROPY_VARS;
+
+    *byteptr++ = temp = wp_log2 (wps->w.c [0].median [0]);
+    *byteptr++ = temp >> 8;
+    *byteptr++ = temp = wp_log2 (wps->w.c [0].median [1]);
+    *byteptr++ = temp >> 8;
+    *byteptr++ = temp = wp_log2 (wps->w.c [0].median [2]);
+    *byteptr++ = temp >> 8;
+
+    if (!(wps->wphdr.flags & MONO_DATA)) {
+        *byteptr++ = temp = wp_log2 (wps->w.c [1].median [0]);
+        *byteptr++ = temp >> 8;
+        *byteptr++ = temp = wp_log2 (wps->w.c [1].median [1]);
+        *byteptr++ = temp >> 8;
+        *byteptr++ = temp = wp_log2 (wps->w.c [1].median [2]);
+        *byteptr++ = temp >> 8;
+    }
+
+    wpmd->byte_length = (int32_t)(byteptr - (unsigned char *) wpmd->data);
+    read_entropy_vars (wps, wpmd);
+}
+
+// Allocates enough space in the metadata structure and writes the current
+// high word of the bitrate accumulator and the slow_level values to it. The
+// slow_level values are converted from 32-bit unsigned to our internal 16-bit
+// wp_log2 values. Afterward, read_entropy_vars () is called to read the values
+// back because we must compensate for the loss through the log function and
+// the truncation of the bitrate.
+
+void write_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    unsigned char *byteptr;
+    int temp;
+
+    word_set_bitrate (wps);
+    byteptr = wpmd->data = malloc (512);
+    wpmd->id = ID_HYBRID_PROFILE;
+
+    if (wps->wphdr.flags & HYBRID_BITRATE) {
+        *byteptr++ = temp = wp_log2s (wps->w.c [0].slow_level);
+        *byteptr++ = temp >> 8;
+
+        if (!(wps->wphdr.flags & MONO_DATA)) {
+            *byteptr++ = temp = wp_log2s (wps->w.c [1].slow_level);
+            *byteptr++ = temp >> 8;
+        }
+    }
+
+    *byteptr++ = temp = wps->w.bitrate_acc [0] >> 16;
+    *byteptr++ = temp >> 8;
+
+    if (!(wps->wphdr.flags & MONO_DATA)) {
+        *byteptr++ = temp = wps->w.bitrate_acc [1] >> 16;
+        *byteptr++ = temp >> 8;
+    }
+
+    if (wps->w.bitrate_delta [0] | wps->w.bitrate_delta [1]) {
+        *byteptr++ = temp = wp_log2s (wps->w.bitrate_delta [0]);
+        *byteptr++ = temp >> 8;
+
+        if (!(wps->wphdr.flags & MONO_DATA)) {
+            *byteptr++ = temp = wp_log2s (wps->w.bitrate_delta [1]);
+            *byteptr++ = temp >> 8;
+        }
+    }
+
+    wpmd->byte_length = (int32_t)(byteptr - (unsigned char *) wpmd->data);
+    read_hybrid_profile (wps, wpmd);
+}
+
+// This function writes the specified word to the open bitstream "wvbits" and,
+// if the bitstream "wvcbits" is open, writes any correction data there. This
+// function will work for either lossless or hybrid but because a version
+// optimized for lossless exits below, it would normally be used for the hybrid
+// mode only. The return value is the actual value stored to the stream (even
+// if a correction file is being created) and is used as feedback to the
+// predictor.
+
+int32_t FASTCALL send_word (WavpackStream *wps, int32_t value, int chan)
+{
+    struct entropy_data *c = wps->w.c + chan;
+    uint32_t ones_count, low, mid, high;
+    int sign = (value < 0) ? 1 : 0;
+
+    if (wps->w.c [0].median [0] < 2 && !wps->w.holding_zero && wps->w.c [1].median [0] < 2) {
+        if (wps->w.zeros_acc) {
+            if (value)
+                flush_word (wps);
+            else {
+                c->slow_level -= (c->slow_level + SLO) >> SLS;
+                wps->w.zeros_acc++;
+                return 0;
+            }
+        }
+        else if (value)
+            putbit_0 (&wps->wvbits);
+        else {
+            c->slow_level -= (c->slow_level + SLO) >> SLS;
+            CLEAR (wps->w.c [0].median);
+            CLEAR (wps->w.c [1].median);
+            wps->w.zeros_acc = 1;
+            return 0;
+        }
+    }
+
+    if (sign)
+        value = ~value;
+
+    if ((wps->wphdr.flags & HYBRID_FLAG) && !chan)
+        update_error_limit (wps);
+
+    if (value < (int32_t) GET_MED (0)) {
+        ones_count = low = 0;
+        high = GET_MED (0) - 1;
+        DEC_MED0 ();
+    }
+    else {
+        low = GET_MED (0);
+        INC_MED0 ();
+
+        if (value - low < GET_MED (1)) {
+            ones_count = 1;
+            high = low + GET_MED (1) - 1;
+            DEC_MED1 ();
+        }
+        else {
+            low += GET_MED (1);
+            INC_MED1 ();
+
+            if (value - low < GET_MED (2)) {
+                ones_count = 2;
+                high = low + GET_MED (2) - 1;
+                DEC_MED2 ();
+            }
+            else {
+                ones_count = 2 + (value - low) / GET_MED (2);
+                low += (ones_count - 2) * GET_MED (2);
+                high = low + GET_MED (2) - 1;
+                INC_MED2 ();
+            }
+        }
+    }
+
+    mid = (high + low + 1) >> 1;
+
+    if (wps->w.holding_zero) {
+        if (ones_count)
+            wps->w.holding_one++;
+
+        flush_word (wps);
+
+        if (ones_count) {
+            wps->w.holding_zero = 1;
+            ones_count--;
+        }
+        else
+            wps->w.holding_zero = 0;
+    }
+    else
+        wps->w.holding_zero = 1;
+
+    wps->w.holding_one = ones_count * 2;
+
+    if (!c->error_limit) {
+        if (high != low) {
+            uint32_t maxcode = high - low, code = value - low;
+            int bitcount = count_bits (maxcode);
+            uint32_t extras = bitset [bitcount] - maxcode - 1;
+
+            if (code < extras) {
+                wps->w.pend_data |= code << wps->w.pend_count;
+                wps->w.pend_count += bitcount - 1;
+            }
+            else {
+                wps->w.pend_data |= ((code + extras) >> 1) << wps->w.pend_count;
+                wps->w.pend_count += bitcount - 1;
+                wps->w.pend_data |= ((code + extras) & 1) << wps->w.pend_count++;
+            }
+        }
+
+        mid = value;
+    }
+    else
+        while (high - low > c->error_limit)
+            if (value < (int32_t) mid) {
+                mid = ((high = mid - 1) + low + 1) >> 1;
+                wps->w.pend_count++;
+            }
+            else {
+                mid = (high + (low = mid) + 1) >> 1;
+                wps->w.pend_data |= bitset [wps->w.pend_count++];
+            }
+
+    wps->w.pend_data |= ((int32_t) sign << wps->w.pend_count++);
+
+    if (!wps->w.holding_zero)
+        flush_word (wps);
+
+    if (bs_is_open (&wps->wvcbits) && c->error_limit) {
+        uint32_t code = value - low, maxcode = high - low;
+        int bitcount = count_bits (maxcode);
+        uint32_t extras = bitset [bitcount] - maxcode - 1;
+
+        if (bitcount) {
+            if (code < extras)
+                putbits (code, bitcount - 1, &wps->wvcbits);
+            else {
+                putbits ((code + extras) >> 1, bitcount - 1, &wps->wvcbits);
+                putbit ((code + extras) & 1, &wps->wvcbits);
+            }
+        }
+    }
+
+    if (wps->wphdr.flags & HYBRID_BITRATE) {
+        c->slow_level -= (c->slow_level + SLO) >> SLS;
+        c->slow_level += wp_log2 (mid);
+    }
+
+    return sign ? ~mid : mid;
+}
+
+// This function is an optimized version of send_word() that only handles
+// lossless (error_limit == 0) and sends an entire buffer of either mono or
+// stereo data rather than a single sample. Unlike the generalized
+// send_word(), it does not return values because it always encodes
+// the exact value passed.
+
+void send_words_lossless (WavpackStream *wps, int32_t *buffer, int32_t nsamples)
+{
+    struct entropy_data *c = wps->w.c;
+    int32_t value, csamples;
+
+    if (!(wps->wphdr.flags & MONO_DATA))
+        nsamples *= 2;
+
+    for (csamples = 0; csamples < nsamples; ++csamples) {
+        int sign = ((value = *buffer++) < 0) ? 1 : 0;
+        uint32_t ones_count, low, high;
+
+        if (!(wps->wphdr.flags & MONO_DATA))
+            c = wps->w.c + (csamples & 1);
+
+        if (wps->w.c [0].median [0] < 2 && !wps->w.holding_zero && wps->w.c [1].median [0] < 2) {
+            if (wps->w.zeros_acc) {
+                if (value)
+                    flush_word (wps);
+                else {
+                    wps->w.zeros_acc++;
+                    continue;
+                }
+            }
+            else if (value)
+                putbit_0 (&wps->wvbits);
+            else {
+                CLEAR (wps->w.c [0].median);
+                CLEAR (wps->w.c [1].median);
+                wps->w.zeros_acc = 1;
+                continue;
+            }
+        }
+
+        if (sign)
+            value = ~value;
+
+        if (value < (int32_t) GET_MED (0)) {
+            ones_count = low = 0;
+            high = GET_MED (0) - 1;
+            DEC_MED0 ();
+        }
+        else {
+            low = GET_MED (0);
+            INC_MED0 ();
+
+            if (value - low < GET_MED (1)) {
+                ones_count = 1;
+                high = low + GET_MED (1) - 1;
+                DEC_MED1 ();
+            }
+            else {
+                low += GET_MED (1);
+                INC_MED1 ();
+
+                if (value - low < GET_MED (2)) {
+                    ones_count = 2;
+                    high = low + GET_MED (2) - 1;
+                    DEC_MED2 ();
+                }
+                else {
+                    ones_count = 2 + (value - low) / GET_MED (2);
+                    low += (ones_count - 2) * GET_MED (2);
+                    high = low + GET_MED (2) - 1;
+                    INC_MED2 ();
+                }
+            }
+        }
+
+        if (wps->w.holding_zero) {
+            if (ones_count)
+                wps->w.holding_one++;
+
+            flush_word (wps);
+
+            if (ones_count) {
+                wps->w.holding_zero = 1;
+                ones_count--;
+            }
+            else
+                wps->w.holding_zero = 0;
+        }
+        else
+            wps->w.holding_zero = 1;
+
+        wps->w.holding_one = ones_count * 2;
+
+        if (high != low) {
+            uint32_t maxcode = high - low, code = value - low;
+            int bitcount = count_bits (maxcode);
+            uint32_t extras = bitset [bitcount] - maxcode - 1;
+
+            if (code < extras) {
+                wps->w.pend_data |= code << wps->w.pend_count;
+                wps->w.pend_count += bitcount - 1;
+            }
+            else {
+                wps->w.pend_data |= ((code + extras) >> 1) << wps->w.pend_count;
+                wps->w.pend_count += bitcount - 1;
+                wps->w.pend_data |= ((code + extras) & 1) << wps->w.pend_count++;
+            }
+        }
+
+        wps->w.pend_data |= ((int32_t) sign << wps->w.pend_count++);
+
+        if (!wps->w.holding_zero)
+            flush_word (wps);
+    }
+}
+
+// Used by send_word() and send_word_lossless() to actually send most the
+// accumulated data onto the bitstream. This is also called directly from
+// clients when all words have been sent.
+
+void flush_word (WavpackStream *wps)
+{
+    if (wps->w.zeros_acc) {
+        int cbits = count_bits (wps->w.zeros_acc);
+
+        while (cbits--)
+            putbit_1 (&wps->wvbits);
+
+        putbit_0 (&wps->wvbits);
+
+        while (wps->w.zeros_acc > 1) {
+            putbit (wps->w.zeros_acc & 1, &wps->wvbits);
+            wps->w.zeros_acc >>= 1;
+        }
+
+        wps->w.zeros_acc = 0;
+    }
+
+    if (wps->w.holding_one) {
+#ifdef LIMIT_ONES
+        if (wps->w.holding_one >= LIMIT_ONES) {
+            int cbits;
+
+            putbits ((1L << LIMIT_ONES) - 1, LIMIT_ONES + 1, &wps->wvbits);
+            wps->w.holding_one -= LIMIT_ONES;
+            cbits = count_bits (wps->w.holding_one);
+
+            while (cbits--)
+                putbit_1 (&wps->wvbits);
+
+            putbit_0 (&wps->wvbits);
+
+            while (wps->w.holding_one > 1) {
+                putbit (wps->w.holding_one & 1, &wps->wvbits);
+                wps->w.holding_one >>= 1;
+            }
+
+            wps->w.holding_zero = 0;
+        }
+        else
+            putbits (bitmask [wps->w.holding_one], wps->w.holding_one, &wps->wvbits);
+
+        wps->w.holding_one = 0;
+#else
+        do {
+            putbit_1 (&wps->wvbits);
+        } while (--wps->w.holding_one);
+#endif
+    }
+
+    if (wps->w.holding_zero) {
+        putbit_0 (&wps->wvbits);
+        wps->w.holding_zero = 0;
+    }
+
+    if (wps->w.pend_count) {
+        putbits (wps->w.pend_data, wps->w.pend_count, &wps->wvbits);
+        wps->w.pend_data = wps->w.pend_count = 0;
+    }
+}
+
+// This function is similar to send_word() except that no data is actually
+// written to any stream, but it does return the value that would have been
+// sent to a hybrid stream. It is used to determine beforehand how much noise
+// will be added to samples.
+
+int32_t nosend_word (WavpackStream *wps, int32_t value, int chan)
+{
+    struct entropy_data *c = wps->w.c + chan;
+    uint32_t ones_count, low, mid, high;
+    int sign = (value < 0) ? 1 : 0;
+
+    if (sign)
+        value = ~value;
+
+    if ((wps->wphdr.flags & HYBRID_FLAG) && !chan)
+        update_error_limit (wps);
+
+    if (value < (int32_t) GET_MED (0)) {
+        low = 0;
+        high = GET_MED (0) - 1;
+        DEC_MED0 ();
+    }
+    else {
+        low = GET_MED (0);
+        INC_MED0 ();
+
+        if (value - low < GET_MED (1)) {
+            high = low + GET_MED (1) - 1;
+            DEC_MED1 ();
+        }
+        else {
+            low += GET_MED (1);
+            INC_MED1 ();
+
+            if (value - low < GET_MED (2)) {
+                high = low + GET_MED (2) - 1;
+                DEC_MED2 ();
+            }
+            else {
+                ones_count = 2 + (value - low) / GET_MED (2);
+                low += (ones_count - 2) * GET_MED (2);
+                high = low + GET_MED (2) - 1;
+                INC_MED2 ();
+            }
+        }
+    }
+
+    mid = (high + low + 1) >> 1;
+
+    if (!c->error_limit)
+        mid = value;
+    else
+        while (high - low > c->error_limit)
+            if (value < (int32_t) mid)
+                mid = ((high = mid - 1) + low + 1) >> 1;
+            else
+                mid = (high + (low = mid) + 1) >> 1;
+
+    c->slow_level -= (c->slow_level + SLO) >> SLS;
+    c->slow_level += wp_log2 (mid);
+
+    return sign ? ~mid : mid;
+}
+
+// This function is used to scan some number of samples to set the variables
+// "slow_level" and the "median" array. In pure symetrical encoding mode this
+// would not be needed because these values would simply be continued from the
+// previous block. However, in the -X modes and the 32-bit modes we cannot do
+// this because parameters may change between blocks and the variables might
+// not apply. This function can work in mono or stereo and can scan a block
+// in either direction.
+
+static void scan_word_pass (WavpackStream *wps, int32_t *samples, uint32_t num_samples, int dir)
+{
+    uint32_t flags = wps->wphdr.flags, value, low;
+    struct entropy_data *c = wps->w.c;
+    int chan;
+
+    if (flags & MONO_DATA) {
+        if (dir < 0) {
+            samples += (num_samples - 1);
+            dir = -1;
+        }
+        else
+            dir = 1;
+    }
+    else {
+        if (dir < 0) {
+            samples += (num_samples - 1) * 2;
+            dir = -2;
+        }
+        else
+            dir = 2;
+    }
+
+    while (num_samples--) {
+
+        value = labs (samples [chan = 0]);
+
+        if (flags & HYBRID_BITRATE) {
+            wps->w.c [0].slow_level -= (wps->w.c [0].slow_level + SLO) >> SLS;
+            wps->w.c [0].slow_level += wp_log2 (value);
+        }
+
+        if (value < GET_MED (0)) {
+            DEC_MED0 ();
+        }
+        else {
+            low = GET_MED (0);
+            INC_MED0 ();
+
+            if (value - low < GET_MED (1)) {
+                DEC_MED1 ();
+            }
+            else {
+                low += GET_MED (1);
+                INC_MED1 ();
+
+                if (value - low < GET_MED (2)) {
+                    DEC_MED2 ();
+                }
+                else {
+                    INC_MED2 ();
+                }
+            }
+        }
+
+        if (!(flags & MONO_DATA)) {
+            value = labs (samples [chan = 1]);
+            c++;
+
+            if (wps->wphdr.flags & HYBRID_BITRATE) {
+                wps->w.c [1].slow_level -= (wps->w.c [1].slow_level + SLO) >> SLS;
+                wps->w.c [1].slow_level += wp_log2 (value);
+            }
+
+            if (value < GET_MED (0)) {
+                DEC_MED0 ();
+            }
+            else {
+                low = GET_MED (0);
+                INC_MED0 ();
+
+                if (value - low < GET_MED (1)) {
+                    DEC_MED1 ();
+                }
+                else {
+                    low += GET_MED (1);
+                    INC_MED1 ();
+
+                    if (value - low < GET_MED (2)) {
+                        DEC_MED2 ();
+                    }
+                    else {
+                        INC_MED2 ();
+                    }
+                }
+            }
+
+            c--;
+        }
+
+        samples += dir;
+    }
+}
+
+// Wrapper for scan_word_pass() than ensures that at least 2048 samples are processed by
+// potentially making multiple passes through the data. See description of scan_word_pass()
+// for more details.
+
+void scan_word (WavpackStream *wps, int32_t *samples, uint32_t num_samples, int dir)
+{
+    init_words (wps);
+
+    if (num_samples) {
+        int passes = (2048 + num_samples - 1) / num_samples;    // i.e., ceil (2048.0 / num_samples)
+
+        while (passes--)
+            scan_word_pass (wps, samples, num_samples, dir);
+    }
+}
+