diff --git a/examples/src/Main.cpp b/examples/src/Main.cpp index a7202c9..103f2c2 100644 --- a/examples/src/Main.cpp +++ b/examples/src/Main.cpp @@ -70,11 +70,13 @@ int main(int argc, const char **argv) try //loader.Load(fileData.get(), "test_data/ad_hoc/KittyPurr16_Stereo.flac"); //loader.Load(fileData.get(), "test_data/ad_hoc/KittyPurr16_Mono.flac"); //loader.Load(fileData.get(), "test_data/ad_hoc/KittyPurr24_Stereo.flac"); + //auto memory = ReadFile("test_data/ad_hoc/KittyPurr24_Stereo.flac"); // broken + //loader.Load(fileData.get(), "flac", memory.buffer); // broken // Single-channel opus //loader.Load(fileData.get(), "test_data/ad_hoc/detodos.opus"); // "Firefox: From All, To All" - // 1 + 2 channel wavepack + // 1 + 2 channel wavpack //loader.Load(fileData.get(), "test_data/ad_hoc/TestBeat_Float32.wv"); //loader.Load(fileData.get(), "test_data/ad_hoc/TestBeat_Float32_Mono.wv"); //loader.Load(fileData.get(), "test_data/ad_hoc/TestBeat_Int16.wv"); @@ -110,7 +112,7 @@ int main(int argc, const char **argv) try // Resample std::vector outputBuffer; outputBuffer.reserve(fileData->samples.size()); - linear_resample(44100.0 / 48000.0, fileData->samples, outputBuffer, fileData->samples.size()); + linear_resample(44100.0 / 48000.0, fileData->samples, outputBuffer, (uint32_t) fileData->samples.size()); std::cout << "Input Samples: " << fileData->samples.size() << std::endl; std::cout << "Output Samples: " << outputBuffer.size() << std::endl; diff --git a/include/libnyquist/AudioDecoder.h b/include/libnyquist/AudioDecoder.h index 69336f7..9200ea3 100644 --- a/include/libnyquist/AudioDecoder.h +++ b/include/libnyquist/AudioDecoder.h @@ -68,7 +68,9 @@ class NyquistIO void AddDecoderToTable(std::shared_ptr decoder); std::map> decoderTable; NO_MOVE(NyquistIO); + public: + NyquistIO(); ~NyquistIO(); void Load(AudioData * data, const std::string & path); diff --git a/include/libnyquist/Common.h b/include/libnyquist/Common.h index 827c8b5..3641811 100644 --- a/include/libnyquist/Common.h +++ b/include/libnyquist/Common.h @@ -201,6 +201,57 @@ inline std::array Unpack(uint32_t a) return output; } +////////////////////////// +// Resampling Utilities // +////////////////////////// + +// This is a naieve implementation of a resampling filter where a lerp is used as a bad low-pass. +// It very far from the ideal case and should be used with caution (or not at all) on signals that matter. +// It is included here to upsample 44.1k to 48k for the purposes of microphone input => Opus, where the the +// nominal frequencies of speech are particularly far from Nyquist. +inline void linear_resample(const double rate, const std::vector & input, std::vector & output, const uint32_t samplesToProcess) +{ + double virtualReadIndex = 0; + double a, b, i, sample; + uint32_t n = samplesToProcess - 1; + while (n--) + { + uint32_t readIndex = static_cast(virtualReadIndex); + i = virtualReadIndex - readIndex; + a = input[readIndex + 0]; + b = input[readIndex + 1]; + sample = (1.0 - i) * a + i * b; // linear interpolate + output.push_back(static_cast(sample)); + virtualReadIndex += rate; + } +} + +inline double sample_hermite_4p_3o(double x, double * y) +{ + static double c0, c1, c2, c3; + c0 = y[1]; + c1 = (1.0 / 2.0)*(y[2] - y[0]); + c2 = (y[0] - (5.0 / 2.0)*y[1]) + (2.0*y[2] - (1.0 / 2.0)*y[3]); + c3 = (1.0 / 2.0)*(y[3] - y[0]) + (3.0 / 2.0)*(y[1] - y[2]); + return ((c3*x + c2)*x + c1)*x + c0; +} + +inline void hermite_resample(const double rate, const std::vector & input, std::vector & output, const uint32_t samplesToProcess) +{ + double virtualReadIndex = 1; + double i, sample; + uint32_t n = samplesToProcess - 1; + while (n--) + { + uint32_t readIndex = static_cast(virtualReadIndex); + i = virtualReadIndex - readIndex; + double samps[4] = { input[readIndex - 1], input[readIndex], input[readIndex + 1], input[readIndex + 2] }; + sample = sample_hermite_4p_3o(i, samps); // cubic hermite interpolate over 4 samples + output.push_back(static_cast(sample)); + virtualReadIndex += rate; + } +} + ////////////////////////// // Conversion Utilities // ////////////////////////// diff --git a/include/libnyquist/IMA4Util.h b/include/libnyquist/IMA4Util.h index 8fe684d..1549793 100644 --- a/include/libnyquist/IMA4Util.h +++ b/include/libnyquist/IMA4Util.h @@ -100,7 +100,7 @@ namespace nqr const uint8_t * data = state.inBuffer; // Loop over the interleaved channels - for (int32_t ch = 0; ch < num_channels; ch++) + for (uint32_t ch = 0; ch < num_channels; ch++) { const int byteOffset = ch * 4; diff --git a/include/libnyquist/ModplugDecoder.h b/include/libnyquist/ModplugDecoder.h index f0b152f..b8a6f4a 100644 --- a/include/libnyquist/ModplugDecoder.h +++ b/include/libnyquist/ModplugDecoder.h @@ -42,4 +42,4 @@ struct ModplugDecoder : public nqr::BaseDecoder } // end namespace nqr -#endif \ No newline at end of file +#endif diff --git a/include/libnyquist/WavEncoder.h b/include/libnyquist/WavEncoder.h index f66a03b..dd1da47 100644 --- a/include/libnyquist/WavEncoder.h +++ b/include/libnyquist/WavEncoder.h @@ -32,53 +32,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace nqr { - // This is a naieve implementation of a resampling filter where a lerp is used as a bad low-pass. - // It very far from the ideal case and should be used with caution (or not at all) on signals that matter. - // It is included here to upsample 44.1k to 48k for the purposes of microphone input => Opus, where the the - // nominal frequencies of speech are particularly far from Nyquist. - static inline void linear_resample(const double rate, const std::vector & input, std::vector & output, size_t samplesToProcess) - { - double virtualReadIndex = 0; - double a, b, i, sample; - uint32_t n = samplesToProcess - 1; - while (n--) - { - uint32_t readIndex = static_cast(virtualReadIndex); - i = virtualReadIndex - readIndex; - a = input[readIndex + 0]; - b = input[readIndex + 1]; - sample = (1.0 - i) * a + i * b; // linear interpolate - output.push_back(sample); - virtualReadIndex += rate; - } - } - - static inline double sample_hermite_4p_3o(double x, double * y) - { - static double c0, c1, c2, c3; - c0 = y[1]; - c1 = (1.0/2.0)*(y[2]-y[0]); - c2 = (y[0] - (5.0/2.0)*y[1]) + (2.0*y[2] - (1.0/2.0)*y[3]); - c3 = (1.0/2.0)*(y[3]-y[0]) + (3.0/2.0)*(y[1]-y[2]); - return ((c3*x+c2)*x+c1)*x+c0; - } - - static inline void hermite_resample(const double rate, const std::vector & input, std::vector & output, size_t samplesToProcess) - { - double virtualReadIndex = 1; - double i, sample; - uint32_t n = samplesToProcess - 1; - while (n--) - { - uint32_t readIndex = static_cast(virtualReadIndex); - i = virtualReadIndex - readIndex; - double samps[4] = {input[readIndex - 1], input[readIndex], input[readIndex + 1], input[readIndex + 2]}; - sample = sample_hermite_4p_3o(i, samps); // cubic hermite interpolate over 4 samples - output.push_back(sample); - virtualReadIndex += rate; - } - } - enum EncoderError { NoError, diff --git a/libnyquist.vcxproj/v140/libnyquist.vcxproj b/libnyquist.vcxproj/v140/libnyquist.vcxproj index bdf8c43..94e6360 100644 --- a/libnyquist.vcxproj/v140/libnyquist.vcxproj +++ b/libnyquist.vcxproj/v140/libnyquist.vcxproj @@ -36,7 +36,6 @@ - @@ -161,4 +160,4 @@ - \ No newline at end of file + diff --git a/libnyquist.vcxproj/v140/libnyquist.vcxproj.filters b/libnyquist.vcxproj/v140/libnyquist.vcxproj.filters index 50dac55..41ca4db 100644 --- a/libnyquist.vcxproj/v140/libnyquist.vcxproj.filters +++ b/libnyquist.vcxproj/v140/libnyquist.vcxproj.filters @@ -46,9 +46,6 @@ src - - src\deps - src\deps @@ -114,4 +111,4 @@ {d839471f-71d1-471e-95e0-c33af9bb64bc} - \ No newline at end of file + diff --git a/libnyquist.vcxproj/v141/libnyquist.vcxproj b/libnyquist.vcxproj/v141/libnyquist.vcxproj index 9de6b6a..e090e96 100644 --- a/libnyquist.vcxproj/v141/libnyquist.vcxproj +++ b/libnyquist.vcxproj/v141/libnyquist.vcxproj @@ -35,7 +35,32 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -106,8 +131,9 @@ Level3 Disabled true - _CRT_SECURE_NO_WARNINGS;_MBCS;D_SCL_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNING;WIN32;_WIN32;USE_ALLOCA;OPUS_BUILD;%(PreprocessorDefinitions) + DEBUG;_ITERATOR_DEBUG_LEVEL=0;_CRT_SECURE_NO_WARNINGS;_MBCS;D_SCL_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNING;WIN32;_WIN32;USE_ALLOCA;OPUS_BUILD;%(PreprocessorDefinitions) $(ProjectDir)..\..\third_party\;$(ProjectDir)..\..\include\libnyquist\;$(ProjectDir)..\..\third_party\libvorbis\include;$(ProjectDir)..\..\third_party\libogg\include;$(ProjectDir)..\..\third_party\wavpack\include;$(ProjectDir)..\..\third_party\flac\src\include;$(ProjectDir)..\..\third_party\opus\celt;$(ProjectDir)..\..\third_party\opus\libopus\include;$(ProjectDir)..\..\third_party\opus\libopus\src;$(ProjectDir)..\..\third_party\opus\opusfile\include;$(ProjectDir)..\..\third_party\opus\opusfile\src;$(ProjectDir)..\..\third_party\opus\opusfile\src\include;$(ProjectDir)..\..\third_party\opus\silk;$(ProjectDir)..\..\third_party\opus\silk\float;$(ProjectDir)..\..\third_party\musepack\include;$(ProjectDir)..\..\third_party\musepack\libmpcenc;$(ProjectDir)..\..\third_party\musepack\libmpcdec;%(AdditionalIncludeDirectories) + MultiThreadedDebug true @@ -118,8 +144,9 @@ Level3 Disabled true - _CRT_SECURE_NO_WARNINGS;_MBCS;D_SCL_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNING;WIN32;_WIN32;USE_ALLOCA;OPUS_BUILD;%(PreprocessorDefinitions) + DEBUG;_ITERATOR_DEBUG_LEVEL=0;_CRT_SECURE_NO_WARNINGS;_MBCS;D_SCL_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNING;WIN32;_WIN32;USE_ALLOCA;OPUS_BUILD;%(PreprocessorDefinitions) $(ProjectDir)..\..\third_party\;$(ProjectDir)..\..\include\libnyquist\;$(ProjectDir)..\..\third_party\libvorbis\include;$(ProjectDir)..\..\third_party\libogg\include;$(ProjectDir)..\..\third_party\wavpack\include;$(ProjectDir)..\..\third_party\flac\src\include;$(ProjectDir)..\..\third_party\opus\celt;$(ProjectDir)..\..\third_party\opus\libopus\include;$(ProjectDir)..\..\third_party\opus\libopus\src;$(ProjectDir)..\..\third_party\opus\opusfile\include;$(ProjectDir)..\..\third_party\opus\opusfile\src;$(ProjectDir)..\..\third_party\opus\opusfile\src\include;$(ProjectDir)..\..\third_party\opus\silk;$(ProjectDir)..\..\third_party\opus\silk\float;$(ProjectDir)..\..\third_party\musepack\include;$(ProjectDir)..\..\third_party\musepack\libmpcenc;$(ProjectDir)..\..\third_party\musepack\libmpcdec;%(AdditionalIncludeDirectories) + MultiThreadedDebugDLL true @@ -134,6 +161,7 @@ true _CRT_SECURE_NO_WARNINGS;_MBCS;D_SCL_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNING;WIN32;_WIN32;USE_ALLOCA;OPUS_BUILD;%(PreprocessorDefinitions) $(ProjectDir)..\..\third_party\;$(ProjectDir)..\..\include\libnyquist\;$(ProjectDir)..\..\third_party\libvorbis\include;$(ProjectDir)..\..\third_party\libogg\include;$(ProjectDir)..\..\third_party\wavpack\include;$(ProjectDir)..\..\third_party\flac\src\include;$(ProjectDir)..\..\third_party\opus\celt;$(ProjectDir)..\..\third_party\opus\libopus\include;$(ProjectDir)..\..\third_party\opus\libopus\src;$(ProjectDir)..\..\third_party\opus\opusfile\include;$(ProjectDir)..\..\third_party\opus\opusfile\src;$(ProjectDir)..\..\third_party\opus\opusfile\src\include;$(ProjectDir)..\..\third_party\opus\silk;$(ProjectDir)..\..\third_party\opus\silk\float;$(ProjectDir)..\..\third_party\musepack\include;$(ProjectDir)..\..\third_party\musepack\libmpcenc;$(ProjectDir)..\..\third_party\musepack\libmpcdec;%(AdditionalIncludeDirectories) + MultiThreaded true @@ -150,6 +178,7 @@ true _CRT_SECURE_NO_WARNINGS;_MBCS;D_SCL_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNING;WIN32;_WIN32;USE_ALLOCA;OPUS_BUILD;%(PreprocessorDefinitions) $(ProjectDir)..\..\third_party\;$(ProjectDir)..\..\include\libnyquist\;$(ProjectDir)..\..\third_party\libvorbis\include;$(ProjectDir)..\..\third_party\libogg\include;$(ProjectDir)..\..\third_party\wavpack\include;$(ProjectDir)..\..\third_party\flac\src\include;$(ProjectDir)..\..\third_party\opus\celt;$(ProjectDir)..\..\third_party\opus\libopus\include;$(ProjectDir)..\..\third_party\opus\libopus\src;$(ProjectDir)..\..\third_party\opus\opusfile\include;$(ProjectDir)..\..\third_party\opus\opusfile\src;$(ProjectDir)..\..\third_party\opus\opusfile\src\include;$(ProjectDir)..\..\third_party\opus\silk;$(ProjectDir)..\..\third_party\opus\silk\float;$(ProjectDir)..\..\third_party\musepack\include;$(ProjectDir)..\..\third_party\musepack\libmpcenc;$(ProjectDir)..\..\third_party\musepack\libmpcdec;%(AdditionalIncludeDirectories) + MultiThreadedDLL true diff --git a/libnyquist.vcxproj/v141/libnyquist.vcxproj.filters b/libnyquist.vcxproj/v141/libnyquist.vcxproj.filters index c58377b..e7425ec 100644 --- a/libnyquist.vcxproj/v141/libnyquist.vcxproj.filters +++ b/libnyquist.vcxproj/v141/libnyquist.vcxproj.filters @@ -43,20 +43,92 @@ src - - src\deps - src\deps src + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + + + src\deps\WavpackDependencies + - - src - include @@ -93,6 +165,9 @@ include\util + + include + @@ -107,5 +182,8 @@ {d839471f-71d1-471e-95e0-c33af9bb64bc} + + {4ed41d64-0c09-4382-8ee9-70aad6043650} + \ No newline at end of file diff --git a/libnyquist.xcodeproj/project.pbxproj b/libnyquist.xcodeproj/project.pbxproj index 157e941..323926d 100644 --- a/libnyquist.xcodeproj/project.pbxproj +++ b/libnyquist.xcodeproj/project.pbxproj @@ -22,7 +22,6 @@ 08B91DA11AC73B8A00335131 /* OpusDecoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 08B91D971AC73B8A00335131 /* OpusDecoder.cpp */; }; 08B91DA21AC73B8A00335131 /* WavDecoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 08B91D981AC73B8A00335131 /* WavDecoder.cpp */; }; 08B91DA31AC73B8A00335131 /* WavPackDecoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 08B91D991AC73B8A00335131 /* WavPackDecoder.cpp */; }; - 08D0EC751C6DA41300FCDA23 /* WavPackDependencies.c in Sources */ = {isa = PBXBuildFile; fileRef = 08D0EC741C6DA41300FCDA23 /* WavPackDependencies.c */; }; 08FFC72D1CA702EC005812D6 /* ModplugDependencies.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 08FFC72C1CA702EC005812D6 /* ModplugDependencies.cpp */; }; 08FFC72F1CA7038D005812D6 /* ModplugDecoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 08FFC72E1CA7038D005812D6 /* ModplugDecoder.cpp */; }; /* End PBXBuildFile section */ @@ -72,7 +71,6 @@ 08B91D981AC73B8A00335131 /* WavDecoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = WavDecoder.cpp; path = src/WavDecoder.cpp; sourceTree = SOURCE_ROOT; }; 08B91D991AC73B8A00335131 /* WavPackDecoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = WavPackDecoder.cpp; path = src/WavPackDecoder.cpp; sourceTree = SOURCE_ROOT; }; 08C83B7C1C25D7780071EED6 /* IMA4Util.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = IMA4Util.h; path = include/libnyquist/IMA4Util.h; sourceTree = SOURCE_ROOT; }; - 08D0EC741C6DA41300FCDA23 /* WavPackDependencies.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = WavPackDependencies.c; path = src/WavPackDependencies.c; sourceTree = SOURCE_ROOT; }; 08FFC72C1CA702EC005812D6 /* ModplugDependencies.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ModplugDependencies.cpp; path = src/ModplugDependencies.cpp; sourceTree = SOURCE_ROOT; }; 08FFC72E1CA7038D005812D6 /* ModplugDecoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ModplugDecoder.cpp; path = src/ModplugDecoder.cpp; sourceTree = SOURCE_ROOT; }; /* End PBXFileReference section */ @@ -92,7 +90,6 @@ isa = PBXGroup; children = ( 08FFC72C1CA702EC005812D6 /* ModplugDependencies.cpp */, - 08D0EC741C6DA41300FCDA23 /* WavPackDependencies.c */, 081FFB181ADF803800673073 /* FlacDependencies.c */, 086DADAB1ADF9DF30031F793 /* VorbisDependencies.c */, 0804D13E1AE69F0100F4B1FD /* OpusDependencies.c */, @@ -264,7 +261,6 @@ 08FFC72D1CA702EC005812D6 /* ModplugDependencies.cpp in Sources */, 08B91D9E1AC73B8A00335131 /* FlacDecoder.cpp in Sources */, 08FFC72F1CA7038D005812D6 /* ModplugDecoder.cpp in Sources */, - 08D0EC751C6DA41300FCDA23 /* WavPackDependencies.c in Sources */, 086DADAD1AE029860031F793 /* VorbisDependencies.c in Sources */, 08B91DA31AC73B8A00335131 /* WavPackDecoder.cpp in Sources */, 08B91D9D1AC73B8A00335131 /* Common.cpp in Sources */, diff --git a/src/AudioDecoder.cpp b/src/AudioDecoder.cpp index 4082efe..df99b80 100644 --- a/src/AudioDecoder.cpp +++ b/src/AudioDecoder.cpp @@ -134,4 +134,4 @@ void NyquistIO::BuildDecoderTable() AddDecoderToTable(std::make_shared()); AddDecoderToTable(std::make_shared()); AddDecoderToTable(std::make_shared()); -} \ No newline at end of file +} diff --git a/src/Common.cpp b/src/Common.cpp index 1309659..5841c28 100644 --- a/src/Common.cpp +++ b/src/Common.cpp @@ -24,6 +24,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "Common.h" +#include using namespace nqr; @@ -104,9 +105,10 @@ void nqr::ConvertToFloat32(float * dst, const uint8_t * src, const size_t N, PCM else if (f == PCM_FLT) { - const float * dataPtr = reinterpret_cast(src); + memcpy(dst, src, N * sizeof(float)); + /* const float * dataPtr = reinterpret_cast(src); for (size_t i = 0; i < N; ++i) - dst[i] = (float) Read32(dataPtr[i]); + dst[i] = (float) Read32(dataPtr[i]); */ } else if (f == PCM_DBL) { diff --git a/src/FlacDecoder.cpp b/src/FlacDecoder.cpp index 78823f9..b89cd5f 100644 --- a/src/FlacDecoder.cpp +++ b/src/FlacDecoder.cpp @@ -24,26 +24,23 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "FlacDecoder.h" -#include "flac/all.h" -#include "flac/stream_decoder.h" +#include "FLAC/all.h" +#include "FLAC/stream_decoder.h" #include "AudioDecoder.h" +#include using namespace nqr; +// FLAC is a big-endian format. All values are unsigned. + class FlacDecoderInternal { public: - // FLAC is a big-endian format. All values are unsigned. - FlacDecoderInternal(AudioData * d, std::string filepath) : d(d) + FlacDecoderInternal(AudioData * d, const std::string & filepath) : d(d) { - - ///////////////////////////// - // Initialize FLAC library // - ///////////////////////////// - decoderInternal = FLAC__stream_decoder_new(); FLAC__stream_decoder_set_metadata_respond(decoderInternal, FLAC__METADATA_TYPE_STREAMINFO); @@ -57,10 +54,49 @@ public: this) == FLAC__STREAM_DECODER_INIT_STATUS_OK; FLAC__stream_decoder_set_md5_checking(decoderInternal, true); + + if (initialized) + { + // Find the size and allocate memory + FLAC__stream_decoder_process_until_end_of_metadata(decoderInternal); + + // Read memory out into our temporary internalBuffer + FLAC__stream_decoder_process_until_end_of_stream(decoderInternal); + + // Presently unneeded, but useful for reference + // FLAC__ChannelAssignment channelAssignment = FLAC__stream_decoder_get_channel_assignment(decoderInternal); + + // Fill out remaining user data + d->lengthSeconds = (float) numSamples / (float) d->sampleRate; + + auto totalSamples = numSamples * d->channelCount; + + // Next, process internal buffer into the user-visible samples array + ConvertToFloat32(d->samples.data(), internalBuffer.data(), totalSamples, d->sourceFormat); + } + else throw std::runtime_error("Unable to initialize FLAC decoder"); + } + + FlacDecoderInternal(AudioData * d, const std::vector & memory) : d(d), data(std::move(memory)), dataPos(0) + { + decoderInternal = FLAC__stream_decoder_new(); - ////////////////////// - // Read Stream Data // - ///////////////////// + FLAC__stream_decoder_set_metadata_respond(decoderInternal, FLAC__METADATA_TYPE_STREAMINFO); + + bool initialized = FLAC__stream_decoder_init_stream( + decoderInternal, + read_callback, + seek_callback, + tell_callback, + length_callback, + eof_callback, + s_writeCallback, + s_metadataCallback, + s_errorCallback, + this + ) == FLAC__STREAM_DECODER_INIT_STATUS_OK; + + FLAC__stream_decoder_set_md5_checking(decoderInternal, true); if (initialized) { @@ -81,12 +117,7 @@ public: // Next, process internal buffer into the user-visible samples array ConvertToFloat32(d->samples.data(), internalBuffer.data(), totalSamples, d->sourceFormat); } - - else - { - throw std::runtime_error("Unable to initialize FLAC decoder"); - } - + else throw std::runtime_error("Unable to initialize FLAC decoder"); } ~FlacDecoderInternal() @@ -121,16 +152,14 @@ public: static FLAC__StreamDecoderWriteStatus s_writeCallback(const FLAC__StreamDecoder *, const FLAC__Frame* frame, const FLAC__int32 * const buffer[], void * userPtr) { FlacDecoderInternal * decoder = reinterpret_cast(userPtr); - const size_t bytesPerSample = GetFormatBitsPerSample(decoder->d->sourceFormat) / 8; - auto dataPtr = decoder->internalBuffer.data(); for (uint32_t i = 0; i < frame->header.blocksize; i++) { - for(int j = 0; j < decoder->d->channelCount; j++) + for (int j = 0; j < decoder->d->channelCount; j++) { - memcpy(dataPtr + decoder->bufferPosition, &buffer[j][i], bytesPerSample); + std::memcpy(dataPtr + decoder->bufferPosition, &buffer[j][i], bytesPerSample); decoder->bufferPosition += bytesPerSample; } } @@ -145,7 +174,51 @@ public: static void s_errorCallback (const FLAC__StreamDecoder *, FLAC__StreamDecoderErrorStatus status, void *) { - std::cerr << "FLAC Decoder Error: " << FLAC__StreamDecoderErrorStatusString[status] << std::endl; + throw std::runtime_error("FLAC decode exception " + std::string(FLAC__StreamDecoderErrorStatusString[status])); + } + + static FLAC__StreamDecoderReadStatus read_callback(const FLAC__StreamDecoder *decoder, FLAC__byte buffer[], size_t *bytes, void *client_data) + { + FlacDecoderInternal *decoderInternal = (FlacDecoderInternal *)client_data; + size_t readLength = std::min(*bytes, decoderInternal->data.size() - decoderInternal->dataPos); + + if (readLength > 0) + { + std::memcpy(buffer, decoderInternal->data.data(), readLength); + decoderInternal->dataPos += readLength; + *bytes = readLength; + if (decoderInternal->dataPos < decoderInternal->data.size()) return FLAC__STREAM_DECODER_READ_STATUS_CONTINUE; + else return FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM; + } + else return FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM; + } + + static FLAC__StreamDecoderSeekStatus seek_callback(const FLAC__StreamDecoder *decoder, FLAC__uint64 absolute_byte_offset, void *client_data) + { + FlacDecoderInternal *decoderInternal = (FlacDecoderInternal *)client_data; + size_t newPos = std::min(absolute_byte_offset, decoderInternal->data.size() - decoderInternal->dataPos); + decoderInternal->dataPos = newPos; + return FLAC__STREAM_DECODER_SEEK_STATUS_OK; + } + + static FLAC__StreamDecoderTellStatus tell_callback(const FLAC__StreamDecoder *decoder, FLAC__uint64 *absolute_byte_offset, void *client_data) + { + FlacDecoderInternal *decoderInternal = (FlacDecoderInternal *)client_data; + *absolute_byte_offset = decoderInternal->dataPos; + return FLAC__STREAM_DECODER_TELL_STATUS_OK; + } + + static FLAC__StreamDecoderLengthStatus length_callback(const FLAC__StreamDecoder *decoder, FLAC__uint64 *stream_length, void *client_data) + { + FlacDecoderInternal *decoderInternal = (FlacDecoderInternal *)client_data; + *stream_length = decoderInternal->data.size(); + return FLAC__STREAM_DECODER_LENGTH_STATUS_OK; + } + + static FLAC__bool eof_callback(const FLAC__StreamDecoder *decoder, void *client_data) + { + FlacDecoderInternal *decoderInternal = (FlacDecoderInternal *)client_data; + return decoderInternal->dataPos == decoderInternal->data.size(); } private: @@ -153,7 +226,8 @@ private: NO_COPY(FlacDecoderInternal); FLAC__StreamDecoder * decoderInternal; - + std::vector data; + size_t dataPos; size_t bufferPosition = 0; size_t numSamples = 0; @@ -173,10 +247,10 @@ void FlacDecoder::LoadFromPath(AudioData * data, const std::string & path) void FlacDecoder::LoadFromBuffer(AudioData * data, const std::vector & memory) { - throw LoadBufferNotImplEx(); + FlacDecoderInternal decoder(data, memory); } std::vector FlacDecoder::GetSupportedFileExtensions() { return {"flac"}; -} \ No newline at end of file +} diff --git a/src/FlacDependencies.c b/src/FlacDependencies.c index 772075a..0cb8523 100644 --- a/src/FlacDependencies.c +++ b/src/FlacDependencies.c @@ -77,25 +77,25 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define HAVE_LROUND 1 -#include "flac/all.h" +#include "FLAC/all.h" #if defined(_MSC_VER) -#include "flac/src/win_utf8_io.c" +#include "FLAC/src/win_utf8_io.c" #endif -#include "flac/src/bitmath.c" -#include "flac/src/bitreader.c" -#include "flac/src/bitwriter.c" -#include "flac/src/cpu.c" -#include "flac/src/crc.c" -#include "flac/src/fixed.c" -#include "flac/src/float.c" -#include "flac/src/format.c" -#include "flac/src/lpc.c" -#include "flac/src/md5.c" -#include "flac/src/memory.c" -#include "flac/src/stream_decoder.c" -#include "flac/src/window.c" +#include "FLAC/src/bitmath.c" +#include "FLAC/src/bitreader.c" +#include "FLAC/src/bitwriter.c" +#include "FLAC/src/cpu.c" +#include "FLAC/src/crc.c" +#include "FLAC/src/fixed.c" +#include "FLAC/src/float.c" +#include "FLAC/src/format.c" +#include "FLAC/src/lpc.c" +#include "FLAC/src/md5.c" +#include "FLAC/src/memory.c" +#include "FLAC/src/stream_decoder.c" +#include "FLAC/src/window.c" #undef VERSION @@ -105,4 +105,4 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if (_MSC_VER) #pragma warning (pop) -#endif \ No newline at end of file +#endif diff --git a/src/ModplugDecoder.cpp b/src/ModplugDecoder.cpp index badd2c4..7d277cd 100644 --- a/src/ModplugDecoder.cpp +++ b/src/ModplugDecoder.cpp @@ -53,10 +53,7 @@ public: ModPlug_SetSettings(&mps); mpf = ModPlug_Load((const void*)fileData.data(), fileData.size()); - if (!mpf) - { - throw std::runtime_error("could not load module"); - } + if (!mpf) throw std::runtime_error("could not load module"); d->sampleRate = 44100; d->channelCount = 2; @@ -68,43 +65,46 @@ public: auto totalSamples = (44100LL * len_ms) / 1000; d->samples.resize(totalSamples * d->channelCount); - auto readInternal = [&]() + auto read_func = [&]() { const float invf = 1 / (float)0x7fffffff; - float *ptr = d->samples.data(); - float *end = d->samples.data() + d->samples.size(); + float *ptr = d->samples.data(); + float *end = d->samples.data() + d->samples.size(); - while( ptr < end ) { - int res = ModPlug_Read( mpf, (void*)ptr, (end - ptr) * sizeof(float) ); - int samples_read = res / (sizeof(float) * 2); + while (ptr < end) + { + int res = ModPlug_Read(mpf, (void*)ptr, (end - ptr) * sizeof(float)); + int samples_read = res / (sizeof(float) * 2); - if( totalSamples < samples_read ) { - samples_read = totalSamples; - } + if (totalSamples < samples_read) + { + samples_read = totalSamples; + } - for( int i = 0; i < samples_read; ++i ) { - *ptr++ = *((int*)ptr) * invf; - *ptr++ = *((int*)ptr) * invf; - } + for (int i = 0; i < samples_read; ++i) + { + *ptr++ = *((int*)ptr) * invf; + *ptr++ = *((int*)ptr) * invf; + } - totalSamples -= samples_read; - } + totalSamples -= samples_read; + } return ptr >= end; }; - if (!readInternal()) + if (!read_func()) + { throw std::runtime_error("could not read any data"); + } ModPlug_Unload(mpf); } private: - ModPlugFile* mpf; - + ModPlugFile * mpf; NO_MOVE(ModplugInternal); - AudioData * d; }; @@ -128,4 +128,3 @@ std::vector ModplugDecoder::GetSupportedFileExtensions() { return {"pat","mid", "mod","s3m","xm","it","669","amf","ams","dbm","dmf","dsm","far","mdl","med","mtm","okt","ptm","stm","ult","umx","mt2","psm"}; } - diff --git a/src/MusepackDecoder.cpp b/src/MusepackDecoder.cpp index f95bbea..494e1de 100644 --- a/src/MusepackDecoder.cpp +++ b/src/MusepackDecoder.cpp @@ -110,9 +110,7 @@ public: reader.tell = tell_mem; mpcDemux = mpc_demux_init(&reader); - - if (!mpcDemux) - throw std::runtime_error("could not initialize mpc demuxer"); + if (!mpcDemux) throw std::runtime_error("could not initialize mpc demuxer"); mpc_demux_get_info(mpcDemux, &streamInfo); @@ -190,4 +188,4 @@ void MusepackDecoder::LoadFromBuffer(AudioData * data, const std::vector MusepackDecoder::GetSupportedFileExtensions() { return {"mpc", "mpp"}; -} \ No newline at end of file +} diff --git a/src/OpusDecoder.cpp b/src/OpusDecoder.cpp index 16d51f8..f720694 100644 --- a/src/OpusDecoder.cpp +++ b/src/OpusDecoder.cpp @@ -174,4 +174,4 @@ void nqr::OpusDecoder::LoadFromBuffer(AudioData * data, const std::vector nqr::OpusDecoder::GetSupportedFileExtensions() { return {"opus"}; -} \ No newline at end of file +} diff --git a/src/WavDecoder.cpp b/src/WavDecoder.cpp index 6dace0f..57dab64 100644 --- a/src/WavDecoder.cpp +++ b/src/WavDecoder.cpp @@ -26,6 +26,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "WavDecoder.h" #include "RiffUtils.h" #include "IMA4Util.h" +#include using namespace nqr; @@ -200,7 +201,7 @@ void WavDecoder::LoadFromBuffer(AudioData * data, const std::vector & m uint32_t frameOffset = 0; uint32_t frameCount = DataChunkInfo.size / s.frame_size; - for (int i = 0; i < frameCount; ++i) + for (uint32_t i = 0; i < frameCount; ++i) { decode_ima_adpcm(s, adpcm_pcm16.data() + frameOffset, wavHeader.channel_count); s.inBuffer += s.frame_size; diff --git a/src/WavEncoder.cpp b/src/WavEncoder.cpp index a56cbd9..f45b0e4 100644 --- a/src/WavEncoder.cpp +++ b/src/WavEncoder.cpp @@ -79,28 +79,25 @@ int WavEncoder::WriteFile(const EncoderParams p, const AudioData * d, const std: sampleDataOptionalMix.resize(sampleDataSize * 2); MonoToStereo(sampleData, sampleDataOptionalMix.data(), sampleDataSize); // Mix - // Re-point data + // Re-point data sampleData = sampleDataOptionalMix.data(); sampleDataSize = sampleDataOptionalMix.size(); } - // Stereo => Mono else if (d->channelCount == 2 && p.channelCount == 1) { sampleDataOptionalMix.resize(sampleDataSize / 2); StereoToMono(sampleData, sampleDataOptionalMix.data(), sampleDataSize); // Mix - // Re-point data + // Re-point data sampleData = sampleDataOptionalMix.data(); sampleDataSize = sampleDataOptionalMix.size(); } - else if (d->channelCount == p.channelCount) { // No op } - else { return EncoderError::UnsupportedChannelMix; diff --git a/src/WavPackDecoder.cpp b/src/WavPackDecoder.cpp index 63d3a9f..abdbfc9 100644 --- a/src/WavPackDecoder.cpp +++ b/src/WavPackDecoder.cpp @@ -25,6 +25,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "WavPackDecoder.h" #include "wavpack.h" +#include using namespace nqr; @@ -33,11 +34,67 @@ class WavPackInternal public: - WavPackInternal(AudioData * d, const std::string path) : d(d) + WavPackInternal(AudioData * d, const std::string & path) : d(d) { char errorStr[128]; context = WavpackOpenFileInput(path.c_str(), errorStr, OPEN_WVC | OPEN_NORMALIZE, 0); + if (!context) throw std::runtime_error("Not a WavPack file"); + + auto bitdepth = WavpackGetBitsPerSample(context); + + d->sampleRate = WavpackGetSampleRate(context); + d->channelCount = WavpackGetNumChannels(context); + d->lengthSeconds = double(getLengthInSeconds()); + d->frameSize = d->channelCount * bitdepth; + + //@todo support channel masks + // WavpackGetChannelMask + + auto totalSamples = size_t(getTotalSamples()); + + int mode = WavpackGetMode(context); + bool isFloatingPoint = (MODE_FLOAT & mode); + + d->sourceFormat = MakeFormatForBits(bitdepth, isFloatingPoint, false); + + /// From the WavPack docs: + /// "... required memory at "buffer" is 4 * samples * num_channels bytes. The + /// audio data is returned right-justified in 32-bit longs in the endian + /// mode native to the executing processor." + d->samples.resize(totalSamples * d->channelCount); + + if (!isFloatingPoint) + internalBuffer.resize(totalSamples * d->channelCount); + + if (!readInternal(totalSamples)) + throw std::runtime_error("could not read any data"); + + // Next, process internal buffer into the user-visible samples array + if (!isFloatingPoint) + ConvertToFloat32(d->samples.data(), internalBuffer.data(), totalSamples * d->channelCount, d->sourceFormat); + + } + + WavPackInternal(AudioData * d, const std::vector & memory) : d(d), data(std::move(memory)), dataPos(0) + { + WavpackStreamReader64 reader = + { + read_bytes, + write_bytes, + get_pos, + set_pos_abs, + set_pos_rel, + push_back_byte, + get_length, + can_seek, + truncate_here, + close, + }; + + char errorStr[128]; + context = WavpackOpenFileInputEx64(&reader, this, nullptr, errorStr, OPEN_WVC | OPEN_NORMALIZE, 0); + if (!context) { throw std::runtime_error("Not a WavPack file"); @@ -60,11 +117,6 @@ public: d->sourceFormat = MakeFormatForBits(bitdepth, isFloatingPoint, false); - /* From the docs: - "... required memory at "buffer" is 4 * samples * num_channels bytes. The - audio data is returned right-justified in 32-bit longs in the endian - mode native to the executing processor." - */ d->samples.resize(totalSamples * d->channelCount); if (!isFloatingPoint) @@ -89,8 +141,6 @@ public: size_t framesRemaining = requestedFrameCount; size_t totalFramesRead = 0; - // int frameSize = d->channelCount * WavpackGetBitsPerSample(context); - // The samples returned are handled differently based on the file's mode int mode = WavpackGetMode(context); @@ -111,8 +161,7 @@ public: } // EOF - //if (framesRead == 0) - // break; + //if (framesRead == 0) break; totalFramesRead += framesRead; framesRemaining -= framesRead; @@ -120,6 +169,115 @@ public: return totalFramesRead; } + + static int32_t read_bytes(void * id, void * data, int32_t byte_count) + { + if (id != nullptr) + { + WavPackInternal *decoder = (WavPackInternal *)id; + int32_t readLength = std::min(byte_count, decoder->data.size() - decoder->dataPos); + if (readLength > 0) + { + std::memcpy(data, decoder->data.data(), readLength); + decoder->dataPos += readLength; + return readLength; + } + else return 0; + } + return 0; + } + static int32_t write_bytes(void * id, void * data, int32_t byte_count) + { + if (id != nullptr) + { + WavPackInternal *decoder = (WavPackInternal *)id; + int32_t writeLength = std::min(byte_count, decoder->data.size() - decoder->dataPos); + if (writeLength > 0) + { + std::memcpy(decoder->data.data(), data, writeLength); + decoder->dataPos += writeLength; + return writeLength; + } + else return 0; + } + return 0; + } + static int64_t get_pos(void *id) + { + if (id != nullptr) + { + WavPackInternal *decoder = (WavPackInternal *)id; + return decoder->dataPos; + } + return 0; + } + static int set_pos_abs(void *id, int64_t pos) + { + if (id != nullptr) + { + WavPackInternal *decoder = (WavPackInternal *)id; + size_t newPos = std::min(pos, decoder->data.size()); + decoder->dataPos = newPos; + return newPos; + } + return 0; + } + static int set_pos_rel(void *id, int64_t delta, int mode) + { + if (id != nullptr) + { + WavPackInternal *decoder = (WavPackInternal *)id; + size_t newPos = 0; + if (mode == SEEK_SET) newPos = delta; + else if (mode == SEEK_CUR) newPos = decoder->dataPos + delta; + else if (mode == SEEK_END) newPos = decoder->data.size() + delta; + newPos = std::min(newPos, decoder->data.size()); + decoder->dataPos = newPos; + return newPos; + } + return 0; + } + static int push_back_byte(void *id, int c) + { + if (id != nullptr) + { + WavPackInternal *decoder = (WavPackInternal *)id; + decoder->dataPos--; + decoder->data[decoder->dataPos] = c; + return 1; + } + return 0; + } + static int64_t get_length(void *id) + { + if (id != nullptr) + { + WavPackInternal *decoder = (WavPackInternal *)id; + return decoder->data.size(); + } + return 0; + } + static int can_seek(void *id) + { + if (id != nullptr) return 1; + return 0; + } + + static int truncate_here(void *id) + { + if (id != nullptr) + { + WavPackInternal *decoder = (WavPackInternal *)id; + decoder->data.resize(decoder->dataPos); + return 1; + } + return 0; + } + static int close(void *id) + { + if (id != nullptr) return 1; + return 0; + } private: @@ -130,6 +288,8 @@ private: WavpackContext * context; //@todo unique_ptr AudioData * d; + std::vector data; + size_t dataPos; std::vector internalBuffer; @@ -149,7 +309,7 @@ void WavPackDecoder::LoadFromPath(AudioData * data, const std::string & path) void WavPackDecoder::LoadFromBuffer(AudioData * data, const std::vector & memory) { - throw LoadBufferNotImplEx(); + WavPackInternal decoder(data, memory); } std::vector WavPackDecoder::GetSupportedFileExtensions() diff --git a/src/WavPackDependencies.c b/src/WavPackDependencies.c deleted file mode 100644 index 1fec6d0..0000000 --- a/src/WavPackDependencies.c +++ /dev/null @@ -1,68 +0,0 @@ -/* -Copyright (c) 2015, Dimitri Diakopoulos All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if (_MSC_VER) - #pragma warning (push) - #pragma warning (disable: 181 111 4267 4996 4244 4701 4702 4133 4100 4127 4206 4312 4505 4365 4005 4013 4334 4703) -#endif - -#ifdef __clang__ - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wconversion" - #pragma clang diagnostic ignored "-Wshadow" - #pragma clang diagnostic ignored "-Wdeprecated-register" -#endif - -#ifdef _WIN32 - #ifndef WIN32 - #define WIN32 - #endif -#endif - -#include "wavpack/src/bits.c" -#include "wavpack/src/extra1.c" -#define WavpackExtraInfo WavpackExtraInfo_alt -#define log2overhead log2overhead_alt -#define xtable xtable_alt -#include "wavpack/src/extra2.c" -#include "wavpack/src/float.c" -#include "wavpack/src/metadata.c" -#define decorr_stereo_pass decorr_stereo_pass_alt -#include "wavpack/src/pack.c" -#include "wavpack/src/tags.c" -#undef decorr_stereo_pass -#define decorr_stereo_pass decorr_stereo_pass_alt_2 -#include "wavpack/src/unpack.c" -#include "wavpack/src/unpack3.c" -#include "wavpack/src/words.c" -#include "wavpack/src/wputils.c" - -#ifdef __clang__ - #pragma clang diagnostic pop -#endif - -#if (_MSC_VER) - #pragma warning (pop) -#endif diff --git a/third_party/flac/all.h b/third_party/FLAC/all.h similarity index 100% rename from third_party/flac/all.h rename to third_party/FLAC/all.h diff --git a/third_party/flac/assert.h b/third_party/FLAC/assert.h similarity index 100% rename from third_party/flac/assert.h rename to third_party/FLAC/assert.h diff --git a/third_party/flac/callback.h b/third_party/FLAC/callback.h similarity index 100% rename from third_party/flac/callback.h rename to third_party/FLAC/callback.h diff --git a/third_party/flac/export.h b/third_party/FLAC/export.h similarity index 100% rename from third_party/flac/export.h rename to third_party/FLAC/export.h diff --git a/third_party/flac/format.h b/third_party/FLAC/format.h similarity index 100% rename from third_party/flac/format.h rename to third_party/FLAC/format.h diff --git a/third_party/flac/metadata.h b/third_party/FLAC/metadata.h similarity index 100% rename from third_party/flac/metadata.h rename to third_party/FLAC/metadata.h diff --git a/third_party/flac/ordinals.h b/third_party/FLAC/ordinals.h similarity index 100% rename from third_party/flac/ordinals.h rename to third_party/FLAC/ordinals.h diff --git a/third_party/flac/src/bitmath.c b/third_party/FLAC/src/bitmath.c similarity index 100% rename from third_party/flac/src/bitmath.c rename to third_party/FLAC/src/bitmath.c diff --git a/third_party/flac/src/bitreader.c b/third_party/FLAC/src/bitreader.c similarity index 100% rename from third_party/flac/src/bitreader.c rename to third_party/FLAC/src/bitreader.c diff --git a/third_party/flac/src/bitwriter.c b/third_party/FLAC/src/bitwriter.c similarity index 100% rename from third_party/flac/src/bitwriter.c rename to third_party/FLAC/src/bitwriter.c diff --git a/third_party/flac/src/cpu.c b/third_party/FLAC/src/cpu.c similarity index 100% rename from third_party/flac/src/cpu.c rename to third_party/FLAC/src/cpu.c diff --git a/third_party/flac/src/crc.c b/third_party/FLAC/src/crc.c similarity index 100% rename from third_party/flac/src/crc.c rename to third_party/FLAC/src/crc.c diff --git a/third_party/flac/src/fixed.c b/third_party/FLAC/src/fixed.c similarity index 100% rename from third_party/flac/src/fixed.c rename to third_party/FLAC/src/fixed.c diff --git a/third_party/flac/src/fixed_intrin_sse2.c b/third_party/FLAC/src/fixed_intrin_sse2.c similarity index 100% rename from third_party/flac/src/fixed_intrin_sse2.c rename to third_party/FLAC/src/fixed_intrin_sse2.c diff --git a/third_party/flac/src/fixed_intrin_ssse3.c b/third_party/FLAC/src/fixed_intrin_ssse3.c similarity index 100% rename from third_party/flac/src/fixed_intrin_ssse3.c rename to third_party/FLAC/src/fixed_intrin_ssse3.c diff --git a/third_party/flac/src/float.c b/third_party/FLAC/src/float.c similarity index 100% rename from third_party/flac/src/float.c rename to third_party/FLAC/src/float.c diff --git a/third_party/flac/src/format.c b/third_party/FLAC/src/format.c similarity index 100% rename from third_party/flac/src/format.c rename to third_party/FLAC/src/format.c diff --git a/third_party/flac/src/ia32/cpu_asm.nasm b/third_party/FLAC/src/ia32/cpu_asm.nasm similarity index 100% rename from third_party/flac/src/ia32/cpu_asm.nasm rename to third_party/FLAC/src/ia32/cpu_asm.nasm diff --git a/third_party/flac/src/ia32/fixed_asm.nasm b/third_party/FLAC/src/ia32/fixed_asm.nasm similarity index 100% rename from third_party/flac/src/ia32/fixed_asm.nasm rename to third_party/FLAC/src/ia32/fixed_asm.nasm diff --git a/third_party/flac/src/ia32/lpc_asm.nasm b/third_party/FLAC/src/ia32/lpc_asm.nasm similarity index 100% rename from third_party/flac/src/ia32/lpc_asm.nasm rename to third_party/FLAC/src/ia32/lpc_asm.nasm diff --git a/third_party/flac/src/ia32/nasm.h b/third_party/FLAC/src/ia32/nasm.h similarity index 100% rename from third_party/flac/src/ia32/nasm.h rename to third_party/FLAC/src/ia32/nasm.h diff --git a/third_party/flac/src/include/private/all.h b/third_party/FLAC/src/include/private/all.h similarity index 100% rename from third_party/flac/src/include/private/all.h rename to third_party/FLAC/src/include/private/all.h diff --git a/third_party/flac/src/include/private/bitmath.h b/third_party/FLAC/src/include/private/bitmath.h similarity index 100% rename from third_party/flac/src/include/private/bitmath.h rename to third_party/FLAC/src/include/private/bitmath.h diff --git a/third_party/flac/src/include/private/bitreader.h b/third_party/FLAC/src/include/private/bitreader.h similarity index 100% rename from third_party/flac/src/include/private/bitreader.h rename to third_party/FLAC/src/include/private/bitreader.h diff --git a/third_party/flac/src/include/private/bitwriter.h b/third_party/FLAC/src/include/private/bitwriter.h similarity index 100% rename from third_party/flac/src/include/private/bitwriter.h rename to third_party/FLAC/src/include/private/bitwriter.h diff --git a/third_party/flac/src/include/private/cpu.h b/third_party/FLAC/src/include/private/cpu.h similarity index 100% rename from third_party/flac/src/include/private/cpu.h rename to third_party/FLAC/src/include/private/cpu.h diff --git a/third_party/flac/src/include/private/crc.h b/third_party/FLAC/src/include/private/crc.h similarity index 100% rename from third_party/flac/src/include/private/crc.h rename to third_party/FLAC/src/include/private/crc.h diff --git a/third_party/flac/src/include/private/fixed.h b/third_party/FLAC/src/include/private/fixed.h similarity index 100% rename from third_party/flac/src/include/private/fixed.h rename to third_party/FLAC/src/include/private/fixed.h diff --git a/third_party/flac/src/include/private/float.h b/third_party/FLAC/src/include/private/float.h similarity index 100% rename from third_party/flac/src/include/private/float.h rename to third_party/FLAC/src/include/private/float.h diff --git a/third_party/flac/src/include/private/format.h b/third_party/FLAC/src/include/private/format.h similarity index 100% rename from third_party/flac/src/include/private/format.h rename to third_party/FLAC/src/include/private/format.h diff --git a/third_party/flac/src/include/private/lpc.h b/third_party/FLAC/src/include/private/lpc.h similarity index 100% rename from third_party/flac/src/include/private/lpc.h rename to third_party/FLAC/src/include/private/lpc.h diff --git a/third_party/flac/src/include/private/macros.h b/third_party/FLAC/src/include/private/macros.h similarity index 100% rename from third_party/flac/src/include/private/macros.h rename to third_party/FLAC/src/include/private/macros.h diff --git a/third_party/flac/src/include/private/md5.h b/third_party/FLAC/src/include/private/md5.h similarity index 100% rename from third_party/flac/src/include/private/md5.h rename to third_party/FLAC/src/include/private/md5.h diff --git a/third_party/flac/src/include/private/memory.h b/third_party/FLAC/src/include/private/memory.h similarity index 100% rename from third_party/flac/src/include/private/memory.h rename to third_party/FLAC/src/include/private/memory.h diff --git a/third_party/flac/src/include/private/metadata.h b/third_party/FLAC/src/include/private/metadata.h similarity index 100% rename from third_party/flac/src/include/private/metadata.h rename to third_party/FLAC/src/include/private/metadata.h diff --git a/third_party/flac/src/include/private/ogg_decoder_aspect.h b/third_party/FLAC/src/include/private/ogg_decoder_aspect.h similarity index 100% rename from third_party/flac/src/include/private/ogg_decoder_aspect.h rename to third_party/FLAC/src/include/private/ogg_decoder_aspect.h diff --git a/third_party/flac/src/include/private/ogg_encoder_aspect.h b/third_party/FLAC/src/include/private/ogg_encoder_aspect.h similarity index 100% rename from third_party/flac/src/include/private/ogg_encoder_aspect.h rename to third_party/FLAC/src/include/private/ogg_encoder_aspect.h diff --git a/third_party/flac/src/include/private/ogg_helper.h b/third_party/FLAC/src/include/private/ogg_helper.h similarity index 100% rename from third_party/flac/src/include/private/ogg_helper.h rename to third_party/FLAC/src/include/private/ogg_helper.h diff --git a/third_party/flac/src/include/private/ogg_mapping.h b/third_party/FLAC/src/include/private/ogg_mapping.h similarity index 100% rename from third_party/flac/src/include/private/ogg_mapping.h rename to third_party/FLAC/src/include/private/ogg_mapping.h diff --git a/third_party/flac/src/include/private/stream_encoder.h b/third_party/FLAC/src/include/private/stream_encoder.h similarity index 100% rename from third_party/flac/src/include/private/stream_encoder.h rename to third_party/FLAC/src/include/private/stream_encoder.h diff --git a/third_party/flac/src/include/private/stream_encoder_framing.h b/third_party/FLAC/src/include/private/stream_encoder_framing.h similarity index 100% rename from third_party/flac/src/include/private/stream_encoder_framing.h rename to third_party/FLAC/src/include/private/stream_encoder_framing.h diff --git a/third_party/flac/src/include/private/window.h b/third_party/FLAC/src/include/private/window.h similarity index 100% rename from third_party/flac/src/include/private/window.h rename to third_party/FLAC/src/include/private/window.h diff --git a/third_party/flac/src/include/protected/all.h b/third_party/FLAC/src/include/protected/all.h similarity index 100% rename from third_party/flac/src/include/protected/all.h rename to third_party/FLAC/src/include/protected/all.h diff --git a/third_party/flac/src/include/protected/stream_decoder.h b/third_party/FLAC/src/include/protected/stream_decoder.h similarity index 100% rename from third_party/flac/src/include/protected/stream_decoder.h rename to third_party/FLAC/src/include/protected/stream_decoder.h diff --git a/third_party/flac/src/include/protected/stream_encoder.h b/third_party/FLAC/src/include/protected/stream_encoder.h similarity index 100% rename from third_party/flac/src/include/protected/stream_encoder.h rename to third_party/FLAC/src/include/protected/stream_encoder.h diff --git a/third_party/flac/src/include/share/alloc.h b/third_party/FLAC/src/include/share/alloc.h similarity index 100% rename from third_party/flac/src/include/share/alloc.h rename to third_party/FLAC/src/include/share/alloc.h diff --git a/third_party/flac/src/include/share/compat.h b/third_party/FLAC/src/include/share/compat.h similarity index 100% rename from third_party/flac/src/include/share/compat.h rename to third_party/FLAC/src/include/share/compat.h diff --git a/third_party/flac/src/include/share/endswap.h b/third_party/FLAC/src/include/share/endswap.h similarity index 100% rename from third_party/flac/src/include/share/endswap.h rename to third_party/FLAC/src/include/share/endswap.h diff --git a/third_party/flac/src/include/share/getopt.h b/third_party/FLAC/src/include/share/getopt.h similarity index 100% rename from third_party/flac/src/include/share/getopt.h rename to third_party/FLAC/src/include/share/getopt.h diff --git a/third_party/flac/src/include/share/macros.h b/third_party/FLAC/src/include/share/macros.h similarity index 100% rename from third_party/flac/src/include/share/macros.h rename to third_party/FLAC/src/include/share/macros.h diff --git a/third_party/flac/src/include/share/private.h b/third_party/FLAC/src/include/share/private.h similarity index 100% rename from third_party/flac/src/include/share/private.h rename to third_party/FLAC/src/include/share/private.h diff --git a/third_party/flac/src/include/share/safe_str.h b/third_party/FLAC/src/include/share/safe_str.h similarity index 100% rename from third_party/flac/src/include/share/safe_str.h rename to third_party/FLAC/src/include/share/safe_str.h diff --git a/third_party/flac/src/include/share/utf8.h b/third_party/FLAC/src/include/share/utf8.h similarity index 100% rename from third_party/flac/src/include/share/utf8.h rename to third_party/FLAC/src/include/share/utf8.h diff --git a/third_party/flac/src/include/share/win_utf8_io.h b/third_party/FLAC/src/include/share/win_utf8_io.h similarity index 100% rename from third_party/flac/src/include/share/win_utf8_io.h rename to third_party/FLAC/src/include/share/win_utf8_io.h diff --git a/third_party/flac/src/lpc.c b/third_party/FLAC/src/lpc.c similarity index 100% rename from third_party/flac/src/lpc.c rename to third_party/FLAC/src/lpc.c diff --git a/third_party/flac/src/lpc_intrin_avx2.c b/third_party/FLAC/src/lpc_intrin_avx2.c similarity index 100% rename from third_party/flac/src/lpc_intrin_avx2.c rename to third_party/FLAC/src/lpc_intrin_avx2.c diff --git a/third_party/flac/src/lpc_intrin_sse.c b/third_party/FLAC/src/lpc_intrin_sse.c similarity index 100% rename from third_party/flac/src/lpc_intrin_sse.c rename to third_party/FLAC/src/lpc_intrin_sse.c diff --git a/third_party/flac/src/lpc_intrin_sse2.c b/third_party/FLAC/src/lpc_intrin_sse2.c similarity index 100% rename from third_party/flac/src/lpc_intrin_sse2.c rename to third_party/FLAC/src/lpc_intrin_sse2.c diff --git a/third_party/flac/src/lpc_intrin_sse41.c b/third_party/FLAC/src/lpc_intrin_sse41.c similarity index 100% rename from third_party/flac/src/lpc_intrin_sse41.c rename to third_party/FLAC/src/lpc_intrin_sse41.c diff --git a/third_party/flac/src/md5.c b/third_party/FLAC/src/md5.c similarity index 100% rename from third_party/flac/src/md5.c rename to third_party/FLAC/src/md5.c diff --git a/third_party/flac/src/memory.c b/third_party/FLAC/src/memory.c similarity index 100% rename from third_party/flac/src/memory.c rename to third_party/FLAC/src/memory.c diff --git a/third_party/flac/src/metadata_iterators.c b/third_party/FLAC/src/metadata_iterators.c similarity index 100% rename from third_party/flac/src/metadata_iterators.c rename to third_party/FLAC/src/metadata_iterators.c diff --git a/third_party/flac/src/metadata_object.c b/third_party/FLAC/src/metadata_object.c similarity index 100% rename from third_party/flac/src/metadata_object.c rename to third_party/FLAC/src/metadata_object.c diff --git a/third_party/flac/src/ogg_decoder_aspect.c b/third_party/FLAC/src/ogg_decoder_aspect.c similarity index 100% rename from third_party/flac/src/ogg_decoder_aspect.c rename to third_party/FLAC/src/ogg_decoder_aspect.c diff --git a/third_party/flac/src/ogg_encoder_aspect.c b/third_party/FLAC/src/ogg_encoder_aspect.c similarity index 100% rename from third_party/flac/src/ogg_encoder_aspect.c rename to third_party/FLAC/src/ogg_encoder_aspect.c diff --git a/third_party/flac/src/ogg_helper.c b/third_party/FLAC/src/ogg_helper.c similarity index 100% rename from third_party/flac/src/ogg_helper.c rename to third_party/FLAC/src/ogg_helper.c diff --git a/third_party/flac/src/ogg_mapping.c b/third_party/FLAC/src/ogg_mapping.c similarity index 100% rename from third_party/flac/src/ogg_mapping.c rename to third_party/FLAC/src/ogg_mapping.c diff --git a/third_party/flac/src/stream_decoder.c b/third_party/FLAC/src/stream_decoder.c similarity index 100% rename from third_party/flac/src/stream_decoder.c rename to third_party/FLAC/src/stream_decoder.c diff --git a/third_party/flac/src/stream_encoder.c b/third_party/FLAC/src/stream_encoder.c similarity index 100% rename from third_party/flac/src/stream_encoder.c rename to third_party/FLAC/src/stream_encoder.c diff --git a/third_party/flac/src/stream_encoder_framing.c b/third_party/FLAC/src/stream_encoder_framing.c similarity index 100% rename from third_party/flac/src/stream_encoder_framing.c rename to third_party/FLAC/src/stream_encoder_framing.c diff --git a/third_party/flac/src/stream_encoder_intrin_avx2.c b/third_party/FLAC/src/stream_encoder_intrin_avx2.c similarity index 100% rename from third_party/flac/src/stream_encoder_intrin_avx2.c rename to third_party/FLAC/src/stream_encoder_intrin_avx2.c diff --git a/third_party/flac/src/stream_encoder_intrin_sse2.c b/third_party/FLAC/src/stream_encoder_intrin_sse2.c similarity index 100% rename from third_party/flac/src/stream_encoder_intrin_sse2.c rename to third_party/FLAC/src/stream_encoder_intrin_sse2.c diff --git a/third_party/flac/src/stream_encoder_intrin_ssse3.c b/third_party/FLAC/src/stream_encoder_intrin_ssse3.c similarity index 100% rename from third_party/flac/src/stream_encoder_intrin_ssse3.c rename to third_party/FLAC/src/stream_encoder_intrin_ssse3.c diff --git a/third_party/flac/src/win_utf8_io.c b/third_party/FLAC/src/win_utf8_io.c similarity index 100% rename from third_party/flac/src/win_utf8_io.c rename to third_party/FLAC/src/win_utf8_io.c diff --git a/third_party/flac/src/window.c b/third_party/FLAC/src/window.c similarity index 100% rename from third_party/flac/src/window.c rename to third_party/FLAC/src/window.c diff --git a/third_party/flac/stream_decoder.h b/third_party/FLAC/stream_decoder.h similarity index 100% rename from third_party/flac/stream_decoder.h rename to third_party/FLAC/stream_decoder.h diff --git a/third_party/flac/stream_encoder.h b/third_party/FLAC/stream_encoder.h similarity index 100% rename from third_party/flac/stream_encoder.h rename to third_party/FLAC/stream_encoder.h diff --git a/third_party/libmodplug/src/snd_dsp.cpp b/third_party/libmodplug/src/snd_dsp.cpp index 29d142e..3a2969d 100644 --- a/third_party/libmodplug/src/snd_dsp.cpp +++ b/third_party/libmodplug/src/snd_dsp.cpp @@ -98,12 +98,10 @@ static LONG DolbyLoFilterDelay[XBASSBUFFERSIZE]; static LONG DolbyHiFilterBuffer[FILTERBUFFERSIZE]; static LONG SurroundBuffer[SURROUNDBUFFERSIZE]; -/* // Access the main temporary mix buffer directly: avoids an extra pointer -extern int MixSoundBuffer[MIXBUFFERSIZE*2]; +extern int MixSoundBuffer[MIXBUFFERSIZE*4]; //cextern int MixReverbBuffer[MIXBUFFERSIZE*2]; extern int MixReverbBuffer[MIXBUFFERSIZE*2]; -*/ static UINT GetMaskFromSize(UINT len) //----------------------------------- diff --git a/third_party/libogg/include/ogg/os_types.h b/third_party/libogg/include/ogg/os_types.h index 8bf8210..ac72e33 100644 --- a/third_party/libogg/include/ogg/os_types.h +++ b/third_party/libogg/include/ogg/os_types.h @@ -140,7 +140,12 @@ #else -# include +#include +typedef int16_t ogg_int16_t; +typedef uint16_t ogg_uint16_t; +typedef int32_t ogg_int32_t; +typedef uint32_t ogg_uint32_t; +typedef int64_t ogg_int64_t; #endif diff --git a/third_party/wavpack/include/Makefile.am b/third_party/wavpack/include/Makefile.am new file mode 100644 index 0000000..4284506 --- /dev/null +++ b/third_party/wavpack/include/Makefile.am @@ -0,0 +1,5 @@ +wpinclude_HEADERS = wavpack.h +wpincludedir = $(prefix)/include/wavpack + +MAINTAINERCLEANFILES = \ + Makefile.in diff --git a/third_party/wavpack/include/wavpack.h b/third_party/wavpack/include/wavpack.h index 885c52f..374924b 100644 --- a/third_party/wavpack/include/wavpack.h +++ b/third_party/wavpack/include/wavpack.h @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////////////////////// // **** WAVPACK **** // // Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // +// Copyright (c) 1998 - 2016 David Bryant. // // All Rights Reserved. // // Distributed under the BSD Software License (see license.txt) // //////////////////////////////////////////////////////////////////////////// @@ -16,10 +16,17 @@ #include -#if defined(_WIN32) && !defined(__MINGW32__) -#include +#if defined(_MSC_VER) && _MSC_VER < 1600 +typedef unsigned __int64 uint64_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int8 uint8_t; +typedef __int64 int64_t; +typedef __int32 int32_t; +typedef __int16 int16_t; +typedef __int8 int8_t; #else -#include +#include #endif // RIFF / wav header formats (these occur at the beginning of both wav files @@ -41,12 +48,12 @@ typedef struct { #define ChunkHeaderFormat "4L" typedef struct { - unsigned short FormatTag, NumChannels; + uint16_t FormatTag, NumChannels; uint32_t SampleRate, BytesPerSecond; - unsigned short BlockAlign, BitsPerSample; - unsigned short cbSize, ValidBitsPerSample; + uint16_t BlockAlign, BitsPerSample; + uint16_t cbSize, ValidBitsPerSample; int32_t ChannelMask; - unsigned short SubFormat; + uint16_t SubFormat; char GUID [14]; } WaveHeader; @@ -62,13 +69,43 @@ typedef struct { typedef struct { char ckID [4]; uint32_t ckSize; - short version; - unsigned char track_no, index_no; + int16_t version; + unsigned char block_index_u8; + unsigned char total_samples_u8; uint32_t total_samples, block_index, block_samples, flags, crc; } WavpackHeader; #define WavpackHeaderFormat "4LS2LLLLL" +// Macros to access the 40-bit block_index field + +#define GET_BLOCK_INDEX(hdr) ( (int64_t) (hdr).block_index + ((int64_t) (hdr).block_index_u8 << 32) ) + +#define SET_BLOCK_INDEX(hdr,value) do { \ + int64_t tmp = (value); \ + (hdr).block_index = (uint32_t) tmp; \ + (hdr).block_index_u8 = \ + (unsigned char) (tmp >> 32); \ +} while (0) + +// Macros to access the 40-bit total_samples field, which is complicated by the fact that +// all 1's in the lower 32 bits indicates "unknown" (regardless of upper 8 bits) + +#define GET_TOTAL_SAMPLES(hdr) ( ((hdr).total_samples == (uint32_t) -1) ? -1 : \ + (int64_t) (hdr).total_samples + ((int64_t) (hdr).total_samples_u8 << 32) - (hdr).total_samples_u8 ) + +#define SET_TOTAL_SAMPLES(hdr,value) do { \ + int64_t tmp = (value); \ + if (tmp < 0) \ + (hdr).total_samples = (uint32_t) -1; \ + else { \ + tmp += (tmp / 0xffffffffLL); \ + (hdr).total_samples = (uint32_t) tmp; \ + (hdr).total_samples_u8 = \ + (unsigned char) (tmp >> 32); \ + } \ +} while (0) + // or-values for WavpackHeader.flags #define BYTES_STORED 3 // 1-4 bytes/sample #define MONO_FLAG 4 // not stereo @@ -95,17 +132,19 @@ typedef struct { #define SRATE_MASK (0xfL << SRATE_LSB) #define FALSE_STEREO 0x40000000 // block is stereo, but data is mono - -#define IGNORED_FLAGS 0x18000000 // reserved, but ignore if encountered #define NEW_SHAPING 0x20000000 // use IIR filter for negative shaping -#define UNKNOWN_FLAGS 0x80000000 // also reserved, but refuse decode if - // encountered #define MONO_DATA (MONO_FLAG | FALSE_STEREO) +// Introduced in WavPack 5.0: +#define HAS_CHECKSUM 0x10000000 // block contains a trailing checksum +#define DSD_FLAG 0x80000000 // block is encoded DSD (1-bit PCM) + +#define IGNORED_FLAGS 0x08000000 // reserved, but ignore if encountered +#define UNKNOWN_FLAGS 0x00000000 // we no longer have any of these spares + #define MIN_STREAM_VERS 0x402 // lowest stream version we'll decode #define MAX_STREAM_VERS 0x410 // highest stream version we'll decode or encode -#define CUR_STREAM_VERS 0x407 // stream version we are writing now // These are the mask bit definitions for the metadata chunk id byte (see format.txt) @@ -131,11 +170,15 @@ typedef struct { #define ID_RIFF_HEADER (ID_OPTIONAL_DATA | 0x1) #define ID_RIFF_TRAILER (ID_OPTIONAL_DATA | 0x2) -#define ID_REPLAY_GAIN (ID_OPTIONAL_DATA | 0x3) // never used (APEv2) -#define ID_CUESHEET (ID_OPTIONAL_DATA | 0x4) // never used (APEv2) +#define ID_ALT_HEADER (ID_OPTIONAL_DATA | 0x3) +#define ID_ALT_TRAILER (ID_OPTIONAL_DATA | 0x4) #define ID_CONFIG_BLOCK (ID_OPTIONAL_DATA | 0x5) #define ID_MD5_CHECKSUM (ID_OPTIONAL_DATA | 0x6) #define ID_SAMPLE_RATE (ID_OPTIONAL_DATA | 0x7) +#define ID_ALT_EXTENSION (ID_OPTIONAL_DATA | 0x8) +#define ID_ALT_MD5_CHECKSUM (ID_OPTIONAL_DATA | 0x9) +#define ID_NEW_CONFIG_BLOCK (ID_OPTIONAL_DATA | 0xa) +#define ID_BLOCK_CHECKSUM (ID_OPTIONAL_DATA | 0xf) ///////////////////////// WavPack Configuration /////////////////////////////// @@ -149,12 +192,13 @@ typedef struct { int qmode, flags, xmode, num_channels, float_norm_exp; int32_t block_samples, extra_flags, sample_rate, channel_mask; unsigned char md5_checksum [16], md5_read; - int num_tag_strings; - char **tag_strings; + int num_tag_strings; // this field is not used + char **tag_strings; // this field is not used } WavpackConfig; #define CONFIG_HYBRID_FLAG 8 // hybrid mode #define CONFIG_JOINT_STEREO 0x10 // joint stereo +#define CONFIG_CROSS_DECORR 0x20 // no-delay cross decorrelation #define CONFIG_HYBRID_SHAPE 0x40 // noise shape (hybrid mode only) #define CONFIG_FAST_FLAG 0x200 // fast mode #define CONFIG_HIGH_FLAG 0x800 // high quality mode @@ -166,6 +210,7 @@ typedef struct { #define CONFIG_CREATE_EXE 0x40000 // create executable #define CONFIG_CREATE_WVC 0x80000 // create correction file #define CONFIG_OPTIMIZE_WVC 0x100000 // maximize bybrid compression +#define CONFIG_COMPATIBLE_WRITE 0x400000 // write files for decoders < 4.3 #define CONFIG_CALC_NOISE 0x800000 // calc noise in hybrid mode #define CONFIG_EXTRA_MODE 0x2000000 // extra processing mode #define CONFIG_SKIP_WVX 0x4000000 // no wvx stream w/ floats & big ints @@ -174,6 +219,32 @@ typedef struct { #define CONFIG_PAIR_UNDEF_CHANS 0x20000000 // encode undefined channels in stereo pairs #define CONFIG_OPTIMIZE_MONO 0x80000000 // optimize for mono streams posing as stereo +// The lower 8 bits of qmode indicate the use of new features in version 5 that (presently) +// only apply to Core Audio Files (CAF) and DSD files, but could apply to other things too. +// These flags are stored in the file and can be retrieved by a decoder that is aware of +// them, but the individual bits are meaningless to the library. If ANY of these bits are +// set then the MD5 sum is written with a new ID so that old decoders will not see it +// (because these features will cause the MD5 sum to be different and fail). + +#define QMODE_BIG_ENDIAN 0x1 // big-endian data format (opposite of WAV format) +#define QMODE_SIGNED_BYTES 0x2 // 8-bit audio data is signed (opposite of WAV format) +#define QMODE_UNSIGNED_WORDS 0x4 // audio data (other than 8-bit) is unsigned (opposite of WAV format) +#define QMODE_REORDERED_CHANS 0x8 // source channels were not Microsoft order, so they were reordered +#define QMODE_DSD_LSB_FIRST 0x10 // DSD bytes, LSB first (most Sony .dsf files) +#define QMODE_DSD_MSB_FIRST 0x20 // DSD bytes, MSB first (Philips .dff files) +#define QMODE_DSD_IN_BLOCKS 0x40 // DSD data is blocked by channels (Sony .dsf only) +#define QMODE_DSD_AUDIO (QMODE_DSD_LSB_FIRST | QMODE_DSD_MSB_FIRST) + +// The rest of the qmode word is reserved for the private use of the command-line programs +// and are ignored by the library (and not stored either). They really should not be defined +// here, but I thought it would be a good idea to have all the definitions together. + +#define QMODE_ADOBE_MODE 0x100 // user specified Adobe mode +#define QMODE_NO_STORE_WRAPPER 0x200 // user specified to not store audio file wrapper (RIFF, CAFF, etc.) +#define QMODE_CHANS_UNASSIGNED 0x400 // user specified "..." in --channel-order option +#define QMODE_IGNORE_LENGTH 0x800 // user specified to ignore length in file header +#define QMODE_RAW_PCM 0x1000 // user specified raw PCM format (no header present) + ////////////// Callbacks used for reading & writing WavPack streams ////////// typedef struct { @@ -189,18 +260,40 @@ typedef struct { int32_t (*write_bytes)(void *id, void *data, int32_t bcount); } WavpackStreamReader; +// Extended version of structure for handling large files and added +// functionality for truncating and closing files + +typedef struct { + int32_t (*read_bytes)(void *id, void *data, int32_t bcount); + int32_t (*write_bytes)(void *id, void *data, int32_t bcount); + int64_t (*get_pos)(void *id); // new signature for large files + int (*set_pos_abs)(void *id, int64_t pos); // new signature for large files + int (*set_pos_rel)(void *id, int64_t delta, int mode); // new signature for large files + int (*push_back_byte)(void *id, int c); + int64_t (*get_length)(void *id); // new signature for large files + int (*can_seek)(void *id); + int (*truncate_here)(void *id); // new function to truncate file at current position + int (*close)(void *id); // new function to close file +} WavpackStreamReader64; + typedef int (*WavpackBlockOutput)(void *id, void *data, int32_t bcount); //////////////////////////// function prototypes ///////////////////////////// -// Note: See wputils.c sourcecode for descriptions for using these functions. - typedef void WavpackContext; #ifdef __cplusplus extern "C" { #endif +#define MAX_WAVPACK_SAMPLES ((1LL << 40) - 257) + +WavpackContext *WavpackOpenRawDecoder ( + void *main_data, int32_t main_size, + void *corr_data, int32_t corr_size, + int16_t version, char *error, int flags, int norm_offset); + +WavpackContext *WavpackOpenFileInputEx64 (WavpackStreamReader64 *reader, void *wv_id, void *wvc_id, char *error, int flags, int norm_offset); WavpackContext *WavpackOpenFileInputEx (WavpackStreamReader *reader, void *wv_id, void *wvc_id, char *error, int flags, int norm_offset); WavpackContext *WavpackOpenFileInput (const char *infilename, char *error, int flags, int norm_offset); @@ -212,6 +305,16 @@ WavpackContext *WavpackOpenFileInput (const char *infilename, char *error, int f #define OPEN_STREAMING 0x20 // "streaming" mode blindly unpacks blocks // w/o regard to header file position info #define OPEN_EDIT_TAGS 0x40 // allow editing of tags +#define OPEN_FILE_UTF8 0x80 // assume filenames are UTF-8 encoded, not ANSI (Windows only) + +// new for version 5 + +#define OPEN_DSD_NATIVE 0x100 // open DSD files as bitstreams + // (returned as 8-bit "samples" stored in 32-bit words) +#define OPEN_DSD_AS_PCM 0x200 // open DSD files as 24-bit PCM (decimated 8x) +#define OPEN_ALT_TYPES 0x400 // application is aware of alternate file types & qmode + // (just affects retrieving wrappers & MD5 checksums) +#define OPEN_NO_CHECKSUM 0x800 // don't verify block checksums before decoding int WavpackGetMode (WavpackContext *wpc); @@ -230,16 +333,25 @@ int WavpackGetMode (WavpackContext *wpc); #define MODE_XMODE 0x7000 // mask for extra level (1-6, 0=unknown) #define MODE_DNS 0x8000 +int WavpackVerifySingleBlock (unsigned char *buffer, int verify_checksum); +int WavpackGetQualifyMode (WavpackContext *wpc); char *WavpackGetErrorMessage (WavpackContext *wpc); int WavpackGetVersion (WavpackContext *wpc); +char *WavpackGetFileExtension (WavpackContext *wpc); +unsigned char WavpackGetFileFormat (WavpackContext *wpc); uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples); uint32_t WavpackGetNumSamples (WavpackContext *wpc); +int64_t WavpackGetNumSamples64 (WavpackContext *wpc); +uint32_t WavpackGetNumSamplesInFrame (WavpackContext *wpc); uint32_t WavpackGetSampleIndex (WavpackContext *wpc); +int64_t WavpackGetSampleIndex64 (WavpackContext *wpc); int WavpackGetNumErrors (WavpackContext *wpc); int WavpackLossyBlocks (WavpackContext *wpc); int WavpackSeekSample (WavpackContext *wpc, uint32_t sample); +int WavpackSeekSample64 (WavpackContext *wpc, int64_t sample); WavpackContext *WavpackCloseFile (WavpackContext *wpc); uint32_t WavpackGetSampleRate (WavpackContext *wpc); +uint32_t WavpackGetNativeSampleRate (WavpackContext *wpc); int WavpackGetBitsPerSample (WavpackContext *wpc); int WavpackGetBytesPerSample (WavpackContext *wpc); int WavpackGetNumChannels (WavpackContext *wpc); @@ -247,12 +359,15 @@ int WavpackGetChannelMask (WavpackContext *wpc); int WavpackGetReducedChannels (WavpackContext *wpc); int WavpackGetFloatNormExp (WavpackContext *wpc); int WavpackGetMD5Sum (WavpackContext *wpc, unsigned char data [16]); +void WavpackGetChannelIdentities (WavpackContext *wpc, unsigned char *identities); +uint32_t WavpackGetChannelLayout (WavpackContext *wpc, unsigned char *reorder); uint32_t WavpackGetWrapperBytes (WavpackContext *wpc); unsigned char *WavpackGetWrapperData (WavpackContext *wpc); void WavpackFreeWrapper (WavpackContext *wpc); void WavpackSeekTrailingWrapper (WavpackContext *wpc); double WavpackGetProgress (WavpackContext *wpc); uint32_t WavpackGetFileSize (WavpackContext *wpc); +int64_t WavpackGetFileSize64 (WavpackContext *wpc); double WavpackGetRatio (WavpackContext *wpc); double WavpackGetAverageBitrate (WavpackContext *wpc, int count_wvc); double WavpackGetInstantBitrate (WavpackContext *wpc); @@ -268,7 +383,17 @@ int WavpackDeleteTagItem (WavpackContext *wpc, const char *item); int WavpackWriteTag (WavpackContext *wpc); WavpackContext *WavpackOpenFileOutput (WavpackBlockOutput blockout, void *wv_id, void *wvc_id); +void WavpackSetFileInformation (WavpackContext *wpc, char *file_extension, unsigned char file_format); + +#define WP_FORMAT_WAV 0 // Microsoft RIFF, including BWF and RF64 varients +#define WP_FORMAT_W64 1 // Sony Wave64 +#define WP_FORMAT_CAF 2 // Apple CoreAudio +#define WP_FORMAT_DFF 3 // Philips DSDIFF +#define WP_FORMAT_DSF 4 // Sony DSD Format + int WavpackSetConfiguration (WavpackContext *wpc, WavpackConfig *config, uint32_t total_samples); +int WavpackSetConfiguration64 (WavpackContext *wpc, WavpackConfig *config, int64_t total_samples, const unsigned char *chan_ids); +int WavpackSetChannelLayout (WavpackContext *wpc, uint32_t layout_tag, const unsigned char *reorder); int WavpackAddWrapper (WavpackContext *wpc, void *data, uint32_t bcount); int WavpackStoreMD5Sum (WavpackContext *wpc, unsigned char data [16]); int WavpackPackInit (WavpackContext *wpc); @@ -282,6 +407,8 @@ void WavpackFloatNormalize (int32_t *values, int32_t num_values, int delta_exp); void WavpackLittleEndianToNative (void *data, char *format); void WavpackNativeToLittleEndian (void *data, char *format); +void WavpackBigEndianToNative (void *data, char *format); +void WavpackNativeToBigEndian (void *data, char *format); uint32_t WavpackGetLibraryVersion (void); const char *WavpackGetLibraryVersionString (void); diff --git a/third_party/wavpack/src/bits.c b/third_party/wavpack/src/bits.c deleted file mode 100644 index 2eab38b..0000000 --- a/third_party/wavpack/src/bits.c +++ /dev/null @@ -1,274 +0,0 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -// bits.c - -// This module provides utilities to support the BitStream structure which is -// used to read and write all WavPack audio data streams. It also contains a -// wrapper for the stream I/O functions and a set of functions dealing with -// endian-ness, both for enhancing portability. Finally, a debug wrapper for -// the malloc() system is provided. - -#include "wavpack_local.h" - -#include -#include -#include -#include - -#if defined(WIN32) -#include -#else -#if defined(__OS2__) -#include -#endif -#include -#endif - -////////////////////////// Bitstream functions //////////////////////////////// - -#if !defined(NO_UNPACK) || defined(INFO_ONLY) - -// Open the specified BitStream and associate with the specified buffer. - -static void bs_read (Bitstream *bs); - -void bs_open_read (Bitstream *bs, void *buffer_start, void *buffer_end) -{ - bs->error = bs->sr = bs->bc = 0; - bs->ptr = (bs->buf = buffer_start) - 1; - bs->end = buffer_end; - bs->wrap = bs_read; -} - -// This function is only called from the getbit() and getbits() macros when -// the BitStream has been exhausted and more data is required. Sinve these -// bistreams no longer access files, this function simple sets an error and -// resets the buffer. - -static void bs_read (Bitstream *bs) -{ - bs->ptr = bs->buf - 1; - bs->error = 1; -} - -// This function is called to close the bitstream. It returns the number of -// full bytes actually read as bits. - -uint32_t bs_close_read (Bitstream *bs) -{ - uint32_t bytes_read; - - if (bs->bc < sizeof (*(bs->ptr)) * 8) - bs->ptr++; - - bytes_read = (uint32_t)(bs->ptr - bs->buf) * sizeof (*(bs->ptr)); - - if (!(bytes_read & 1)) - ++bytes_read; - - CLEAR (*bs); - return bytes_read; -} - -#endif - -#ifndef NO_PACK - -// Open the specified BitStream using the specified buffer pointers. It is -// assumed that enough buffer space has been allocated for all data that will -// be written, otherwise an error will be generated. - -static void bs_write (Bitstream *bs); - -void bs_open_write (Bitstream *bs, void *buffer_start, void *buffer_end) -{ - bs->error = bs->sr = bs->bc = 0; - bs->ptr = bs->buf = buffer_start; - bs->end = buffer_end; - bs->wrap = bs_write; -} - -// This function is only called from the putbit() and putbits() macros when -// the buffer is full, which is now flagged as an error. - -static void bs_write (Bitstream *bs) -{ - bs->ptr = bs->buf; - bs->error = 1; -} - -// This function forces a flushing write of the specified BitStream, and -// returns the total number of bytes written into the buffer. - -uint32_t bs_close_write (Bitstream *bs) -{ - uint32_t bytes_written; - - if (bs->error) - return (uint32_t) -1; - - while (1) { - while (bs->bc) - putbit_1 (bs); - - bytes_written = (uint32_t)(bs->ptr - bs->buf) * sizeof (*(bs->ptr)); - - if (bytes_written & 1) { - putbit_1 (bs); - } - else - break; - }; - - CLEAR (*bs); - return bytes_written; -} - -#endif - -/////////////////////// Endian Correction Routines //////////////////////////// - -void little_endian_to_native (void *data, char *format) -{ - unsigned char *cp = (unsigned char *) data; - int32_t temp; - - while (*format) { - switch (*format) { - case 'L': - temp = cp [0] + ((int32_t) cp [1] << 8) + ((int32_t) cp [2] << 16) + ((int32_t) cp [3] << 24); - * (int32_t *) cp = temp; - cp += 4; - break; - - case 'S': - temp = cp [0] + (cp [1] << 8); - * (short *) cp = (short) temp; - cp += 2; - break; - - default: - if (isdigit (*format)) - cp += *format - '0'; - - break; - } - - format++; - } -} - -void native_to_little_endian (void *data, char *format) -{ - unsigned char *cp = (unsigned char *) data; - int32_t temp; - - while (*format) { - switch (*format) { - case 'L': - temp = * (int32_t *) cp; - *cp++ = (unsigned char) temp; - *cp++ = (unsigned char) (temp >> 8); - *cp++ = (unsigned char) (temp >> 16); - *cp++ = (unsigned char) (temp >> 24); - break; - - case 'S': - temp = * (short *) cp; - *cp++ = (unsigned char) temp; - *cp++ = (unsigned char) (temp >> 8); - break; - - default: - if (isdigit (*format)) - cp += *format - '0'; - - break; - } - - format++; - } -} - -////////////////////////// Debug Wrapper for Malloc /////////////////////////// - -#ifdef DEBUG_ALLOC - -void *vptrs [512]; - -static void *add_ptr (void *ptr) -{ - int i; - - for (i = 0; i < 512; ++i) - if (!vptrs [i]) { - vptrs [i] = ptr; - break; - } - - if (i == 512) - error_line ("too many mallocs!"); - - return ptr; -} - -static void *del_ptr (void *ptr) -{ - int i; - - for (i = 0; i < 512; ++i) - if (vptrs [i] == ptr) { - vptrs [i] = NULL; - break; - } - - if (i == 512) - error_line ("free invalid ptr!"); - - return ptr; -} - -void *malloc_db (uint32_t size) -{ - if (size) - return add_ptr (malloc (size)); - else - return NULL; -} - -void free_db (void *ptr) -{ - if (ptr) - free (del_ptr (ptr)); -} - -void *realloc_db (void *ptr, uint32_t size) -{ - if (ptr && size) - return add_ptr (realloc (del_ptr (ptr), size)); - else if (size) - return malloc_db (size); - else - free_db (ptr); - - return NULL; -} - -int32_t dump_alloc (void) -{ - int i, j; - - for (j = i = 0; i < 512; ++i) - if (vptrs [i]) - j++; - - return j; -} - -#endif diff --git a/third_party/wavpack/src/common_utils.c b/third_party/wavpack/src/common_utils.c new file mode 100644 index 0000000..c53db91 --- /dev/null +++ b/third_party/wavpack/src/common_utils.c @@ -0,0 +1,771 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// common_utils.c + +// This module provides a lot of the trivial WavPack API functions and several +// functions that are common to both reading and writing WavPack files (like +// WavpackCloseFile()). Functions here are restricted to those that have few +// external dependancies and this is done so that applications that statically +// link to the WavPack library (like the command-line utilities on Windows) +// do not need to include the entire library image if they only use a subset +// of it. This module will be loaded for ANY WavPack application. + +#include +#include +#include + +#include "wavpack_local.h" + +#ifndef LIBWAVPACK_VERSION_STRING +#include "wavpack_version.h" +#endif + +///////////////////////////// local table storage //////////////////////////// + +const uint32_t sample_rates [] = { 6000, 8000, 9600, 11025, 12000, 16000, 22050, + 24000, 32000, 44100, 48000, 64000, 88200, 96000, 192000 }; + +///////////////////////////// executable code //////////////////////////////// + +// This function obtains general information about an open input file and +// returns a mask with the following bit values: + +// MODE_WVC: a .wvc file has been found and will be used for lossless +// MODE_LOSSLESS: file is lossless (either pure or hybrid) +// MODE_HYBRID: file is hybrid mode (either lossy or lossless) +// MODE_FLOAT: audio data is 32-bit ieee floating point +// MODE_VALID_TAG: file conatins a valid ID3v1 or APEv2 tag +// MODE_HIGH: file was created in "high" mode (information only) +// MODE_FAST: file was created in "fast" mode (information only) +// MODE_EXTRA: file was created using "extra" mode (information only) +// MODE_APETAG: file contains a valid APEv2 tag +// MODE_SFX: file was created as a "self-extracting" executable +// MODE_VERY_HIGH: file was created in the "very high" mode (or in +// the "high" mode prior to 4.4) +// MODE_MD5: file contains an MD5 checksum +// MODE_XMODE: level used for extra mode (1-6, 0=unknown) +// MODE_DNS: dynamic noise shaping + +int WavpackGetMode (WavpackContext *wpc) +{ + int mode = 0; + + if (wpc) { + if (wpc->config.flags & CONFIG_HYBRID_FLAG) + mode |= MODE_HYBRID; + else if (!(wpc->config.flags & CONFIG_LOSSY_MODE)) + mode |= MODE_LOSSLESS; + + if (wpc->wvc_flag) + mode |= (MODE_LOSSLESS | MODE_WVC); + + if (wpc->lossy_blocks) + mode &= ~MODE_LOSSLESS; + + if (wpc->config.flags & CONFIG_FLOAT_DATA) + mode |= MODE_FLOAT; + + if (wpc->config.flags & (CONFIG_HIGH_FLAG | CONFIG_VERY_HIGH_FLAG)) { + mode |= MODE_HIGH; + + if ((wpc->config.flags & CONFIG_VERY_HIGH_FLAG) || + (wpc->streams && wpc->streams [0] && wpc->streams [0]->wphdr.version < 0x405)) + mode |= MODE_VERY_HIGH; + } + + if (wpc->config.flags & CONFIG_FAST_FLAG) + mode |= MODE_FAST; + + if (wpc->config.flags & CONFIG_EXTRA_MODE) + mode |= (MODE_EXTRA | (wpc->config.xmode << 12)); + + if (wpc->config.flags & CONFIG_CREATE_EXE) + mode |= MODE_SFX; + + if (wpc->config.flags & CONFIG_MD5_CHECKSUM) + mode |= MODE_MD5; + + if ((wpc->config.flags & CONFIG_HYBRID_FLAG) && (wpc->config.flags & CONFIG_DYNAMIC_SHAPING) && + wpc->streams && wpc->streams [0] && wpc->streams [0]->wphdr.version >= 0x407) + mode |= MODE_DNS; + +#ifndef NO_TAGS + if (valid_tag (&wpc->m_tag)) { + mode |= MODE_VALID_TAG; + + if (valid_tag (&wpc->m_tag) == 'A') + mode |= MODE_APETAG; + } +#endif + + mode |= (wpc->config.qmode << 16) & 0xFF0000; + } + + return mode; +} + +// This function obtains information about specific file features that were +// added for version 5.0, specifically qualifications added to support CAF +// and DSD files. Except for indicating the presence of DSD data, these +// bits are meant to simply indicate the format of the data in the original +// source file and do NOT indicate how the library will return the data to +// the appication (which is always the same). This means that in general an +// application that simply wants to play or process the audio data need not +// be concerned about these. If the file is DSD audio, then either of the +// QMDOE_DSD_LSB_FIRST or QMODE_DSD_MSB_FIRST bits will be set (but the +// DSD audio is always returned to the caller MSB first). + +// QMODE_BIG_ENDIAN 0x1 // big-endian data format (opposite of WAV format) +// QMODE_SIGNED_BYTES 0x2 // 8-bit audio data is signed (opposite of WAV format) +// QMODE_UNSIGNED_WORDS 0x4 // audio data (other than 8-bit) is unsigned (opposite of WAV format) +// QMODE_REORDERED_CHANS 0x8 // source channels were not Microsoft order, so they were reordered +// QMODE_DSD_LSB_FIRST 0x10 // DSD bytes, LSB first (most Sony .dsf files) +// QMODE_DSD_MSB_FIRST 0x20 // DSD bytes, MSB first (Philips .dff files) +// QMODE_DSD_IN_BLOCKS 0x40 // DSD data is blocked by channels (Sony .dsf only) + +int WavpackGetQualifyMode (WavpackContext *wpc) +{ + return wpc->config.qmode & 0xFF; +} + +// This function returns a pointer to a string describing the last error +// generated by WavPack. + +char *WavpackGetErrorMessage (WavpackContext *wpc) +{ + return wpc->error_message; +} + +// Get total number of samples contained in the WavPack file, or -1 if unknown + +uint32_t WavpackGetNumSamples (WavpackContext *wpc) +{ + return (uint32_t) WavpackGetNumSamples64 (wpc); +} + +int64_t WavpackGetNumSamples64 (WavpackContext *wpc) +{ + return wpc ? wpc->total_samples : -1; +} + +// Get the current sample index position, or -1 if unknown + +uint32_t WavpackGetSampleIndex (WavpackContext *wpc) +{ + return (uint32_t) WavpackGetSampleIndex64 (wpc); +} + +int64_t WavpackGetSampleIndex64 (WavpackContext *wpc) +{ + if (wpc) { +#ifdef ENABLE_LEGACY + if (wpc->stream3) + return get_sample_index3 (wpc); + else if (wpc->streams && wpc->streams [0]) + return wpc->streams [0]->sample_index; +#else + if (wpc->streams && wpc->streams [0]) + return wpc->streams [0]->sample_index; +#endif + } + + return -1; +} + +// Get the number of errors encountered so far + +int WavpackGetNumErrors (WavpackContext *wpc) +{ + return wpc ? wpc->crc_errors : 0; +} + +// return TRUE if any uncorrected lossy blocks were actually written or read + +int WavpackLossyBlocks (WavpackContext *wpc) +{ + return wpc ? wpc->lossy_blocks : 0; +} + +// Calculate the progress through the file as a double from 0.0 (for begin) +// to 1.0 (for done). A return value of -1.0 indicates that the progress is +// unknown. + +double WavpackGetProgress (WavpackContext *wpc) +{ + if (wpc && wpc->total_samples != -1 && wpc->total_samples != 0) + return (double) WavpackGetSampleIndex64 (wpc) / wpc->total_samples; + else + return -1.0; +} + +// Return the total size of the WavPack file(s) in bytes. + +uint32_t WavpackGetFileSize (WavpackContext *wpc) +{ + return (uint32_t) (wpc ? wpc->filelen + wpc->file2len : 0); +} + +int64_t WavpackGetFileSize64 (WavpackContext *wpc) +{ + return wpc ? wpc->filelen + wpc->file2len : 0; +} + +// Calculate the ratio of the specified WavPack file size to the size of the +// original audio data as a double greater than 0.0 and (usually) smaller than +// 1.0. A value greater than 1.0 represents "negative" compression and a +// return value of 0.0 indicates that the ratio cannot be determined. + +double WavpackGetRatio (WavpackContext *wpc) +{ + if (wpc && wpc->total_samples != -1 && wpc->filelen) { + double output_size = (double) wpc->total_samples * wpc->config.num_channels * + wpc->config.bytes_per_sample; + double input_size = (double) wpc->filelen + wpc->file2len; + + if (output_size >= 1.0 && input_size >= 1.0) + return input_size / output_size; + } + + return 0.0; +} + +// Calculate the average bitrate of the WavPack file in bits per second. A +// return of 0.0 indicates that the bitrate cannot be determined. An option is +// provided to use (or not use) any attendant .wvc file. + +double WavpackGetAverageBitrate (WavpackContext *wpc, int count_wvc) +{ + if (wpc && wpc->total_samples != -1 && wpc->filelen) { + double output_time = (double) wpc->total_samples / WavpackGetSampleRate (wpc); + double input_size = (double) wpc->filelen + (count_wvc ? wpc->file2len : 0); + + if (output_time >= 0.1 && input_size >= 1.0) + return input_size * 8.0 / output_time; + } + + return 0.0; +} + +// Calculate the bitrate of the current WavPack file block in bits per second. +// This can be used for an "instant" bit display and gets updated from about +// 1 to 4 times per second. A return of 0.0 indicates that the bitrate cannot +// be determined. + +double WavpackGetInstantBitrate (WavpackContext *wpc) +{ + if (wpc && wpc->stream3) + return WavpackGetAverageBitrate (wpc, TRUE); + + if (wpc && wpc->streams && wpc->streams [0] && wpc->streams [0]->wphdr.block_samples) { + double output_time = (double) wpc->streams [0]->wphdr.block_samples / WavpackGetSampleRate (wpc); + double input_size = 0; + int si; + + for (si = 0; si < wpc->num_streams; ++si) { + if (wpc->streams [si]->blockbuff) + input_size += ((WavpackHeader *) wpc->streams [si]->blockbuff)->ckSize; + + if (wpc->streams [si]->block2buff) + input_size += ((WavpackHeader *) wpc->streams [si]->block2buff)->ckSize; + } + + if (output_time > 0.0 && input_size >= 1.0) + return input_size * 8.0 / output_time; + } + + return 0.0; +} + +// This function allows retrieving the Core Audio File channel layout, many of which do not +// conform to the Microsoft ordering standard that WavPack requires internally (at least for +// those channels present in the "channel mask"). In addition to the layout tag, this function +// returns the reordering string (if stored in the file) to allow the unpacker to reorder the +// channels back to the specified layout (if it wants to restore the CAF order). The number of +// channels in the layout is determined from the lower nybble of the layout word (and should +// probably match the number of channels in the file), and if a reorder string is requested +// then that much space must be allocated. Note that all the reordering is actually done +// outside of this library, and that if reordering is done then the appropriate qmode bit +// will be set. +// +// Note: Normally this function would not be used by an application unless it specifically +// wanted to restore a non-standard channel order (to check an MD5, for example) or obtain +// the Core Audio channel layout ID. For simple file decoding for playback, the channel_mask +// should provide all the information required unless there are non-Microsoft channels +// involved, in which case WavpackGetChannelIdentities() will provide the identities of +// the other channels (if they are known). + +uint32_t WavpackGetChannelLayout (WavpackContext *wpc, unsigned char *reorder) +{ + if ((wpc->channel_layout & 0xff) && wpc->channel_reordering && reorder) + memcpy (reorder, wpc->channel_reordering, wpc->channel_layout & 0xff); + + return wpc->channel_layout; +} + +// This function provides the identities of ALL the channels in the file, including the +// standard Microsoft channels (which come first, in order, and are numbered 1-18) and also +// any non-Microsoft channels (which can be in any order and have values from 33-254). The +// value 0x00 is invalid and 0xFF indicates an "unknown" or "unnassigned" channel. The +// string is NULL terminated so the caller must supply enough space for the number +// of channels indicated by WavpackGetNumChannels(), plus one. +// +// Note that this function returns the actual order of the channels in the Wavpack file +// (i.e., the order returned by WavpackUnpackSamples()). If the file includes a "reordering" +// string because the source file was not in Microsoft order that is NOT taken into account +// here and really only needs to be considered if doing an MD5 verification or if it's +// required to restore the original order/file (like wvunpack does). + +void WavpackGetChannelIdentities (WavpackContext *wpc, unsigned char *identities) +{ + int num_channels = wpc->config.num_channels, index = 1; + uint32_t channel_mask = wpc->config.channel_mask; + unsigned char *src = wpc->channel_identities; + + while (num_channels--) { + if (channel_mask) { + while (!(channel_mask & 1)) { + channel_mask >>= 1; + index++; + } + + *identities++ = index++; + channel_mask >>= 1; + } + else if (src && *src) + *identities++ = *src++; + else + *identities++ = 0xff; + } + + *identities = 0; +} + +// For local use only. Install a callback to be executed when WavpackCloseFile() is called, +// usually used to dump some statistics accumulated during encode or decode. + +void install_close_callback (WavpackContext *wpc, void cb_func (void *wpc)) +{ + wpc->close_callback = cb_func; +} + +// Close the specified WavPack file and release all resources used by it. +// Returns NULL. + +WavpackContext *WavpackCloseFile (WavpackContext *wpc) +{ + if (wpc->close_callback) + wpc->close_callback (wpc); + + if (wpc->streams) { + free_streams (wpc); + + if (wpc->streams [0]) + free (wpc->streams [0]); + + free (wpc->streams); + } + +#ifdef ENABLE_LEGACY + if (wpc->stream3) + free_stream3 (wpc); +#endif + + if (wpc->reader && wpc->reader->close && wpc->wv_in) + wpc->reader->close (wpc->wv_in); + + if (wpc->reader && wpc->reader->close && wpc->wvc_in) + wpc->reader->close (wpc->wvc_in); + + WavpackFreeWrapper (wpc); + + if (wpc->channel_reordering) + free (wpc->channel_reordering); + +#ifndef NO_TAGS + free_tag (&wpc->m_tag); +#endif + +#ifdef ENABLE_DSD + if (wpc->decimation_context) + decimate_dsd_destroy (wpc->decimation_context); +#endif + + free (wpc); + + return NULL; +} + +// These routines are used to access (and free) header and trailer data that +// was retrieved from the Wavpack file. The header will be available before +// the samples are decoded and the trailer will be available after all samples +// have been read. + +uint32_t WavpackGetWrapperBytes (WavpackContext *wpc) +{ + return wpc ? wpc->wrapper_bytes : 0; +} + +unsigned char *WavpackGetWrapperData (WavpackContext *wpc) +{ + return wpc ? wpc->wrapper_data : NULL; +} + +void WavpackFreeWrapper (WavpackContext *wpc) +{ + if (wpc && wpc->wrapper_data) { + free (wpc->wrapper_data); + wpc->wrapper_data = NULL; + wpc->wrapper_bytes = 0; + } +} + +// Returns the sample rate of the specified WavPack file + +uint32_t WavpackGetSampleRate (WavpackContext *wpc) +{ + return wpc ? (wpc->dsd_multiplier ? wpc->config.sample_rate * wpc->dsd_multiplier : wpc->config.sample_rate) : 44100; +} + +// Returns the native sample rate of the specified WavPack file +// (provides the native rate for DSD files rather than the "byte" rate that's used for +// seeking, duration, etc. and would generally be used just for user facing reports) + +uint32_t WavpackGetNativeSampleRate (WavpackContext *wpc) +{ + return wpc ? (wpc->dsd_multiplier ? wpc->config.sample_rate * wpc->dsd_multiplier * 8 : wpc->config.sample_rate) : 44100; +} + +// Returns the number of channels of the specified WavPack file. Note that +// this is the actual number of channels contained in the file even if the +// OPEN_2CH_MAX flag was specified when the file was opened. + +int WavpackGetNumChannels (WavpackContext *wpc) +{ + return wpc ? wpc->config.num_channels : 2; +} + +// Returns the standard Microsoft channel mask for the specified WavPack +// file. A value of zero indicates that there is no speaker assignment +// information. + +int WavpackGetChannelMask (WavpackContext *wpc) +{ + return wpc ? wpc->config.channel_mask : 0; +} + +// Return the normalization value for floating point data (valid only +// if floating point data is present). A value of 127 indicates that +// the floating point range is +/- 1.0. Higher values indicate a +// larger floating point range. + +int WavpackGetFloatNormExp (WavpackContext *wpc) +{ + return wpc->config.float_norm_exp; +} + +// Returns the actual number of valid bits per sample contained in the +// original file, which may or may not be a multiple of 8. Floating data +// always has 32 bits, integers may be from 1 to 32 bits each. When this +// value is not a multiple of 8, then the "extra" bits are located in the +// LSBs of the results. That is, values are right justified when unpacked +// into ints, but are left justified in the number of bytes used by the +// original data. + +int WavpackGetBitsPerSample (WavpackContext *wpc) +{ + return wpc ? wpc->config.bits_per_sample : 16; +} + +// Returns the number of bytes used for each sample (1 to 4) in the original +// file. This is required information for the user of this module because the +// audio data is returned in the LOWER bytes of the long buffer and must be +// left-shifted 8, 16, or 24 bits if normalized longs are required. + +int WavpackGetBytesPerSample (WavpackContext *wpc) +{ + return wpc ? wpc->config.bytes_per_sample : 2; +} + +// If the OPEN_2CH_MAX flag is specified when opening the file, this function +// will return the actual number of channels decoded from the file (which may +// or may not be less than the actual number of channels, but will always be +// 1 or 2). Normally, this will be the front left and right channels of a +// multichannel file. + +int WavpackGetReducedChannels (WavpackContext *wpc) +{ + if (wpc) + return wpc->reduced_channels ? wpc->reduced_channels : wpc->config.num_channels; + else + return 2; +} + +// Free all memory allocated for raw WavPack blocks (for all allocated streams) +// and free all additonal streams. This does not free the default stream ([0]) +// which is always kept around. + +void free_streams (WavpackContext *wpc) +{ + int si = wpc->num_streams; + + while (si--) { + if (wpc->streams [si]->blockbuff) { + free (wpc->streams [si]->blockbuff); + wpc->streams [si]->blockbuff = NULL; + } + + if (wpc->streams [si]->block2buff) { + free (wpc->streams [si]->block2buff); + wpc->streams [si]->block2buff = NULL; + } + + if (wpc->streams [si]->sample_buffer) { + free (wpc->streams [si]->sample_buffer); + wpc->streams [si]->sample_buffer = NULL; + } + + if (wpc->streams [si]->dc.shaping_data) { + free (wpc->streams [si]->dc.shaping_data); + wpc->streams [si]->dc.shaping_data = NULL; + } + +#ifdef ENABLE_DSD + if (wpc->streams [si]->dsd.probabilities) { + free (wpc->streams [si]->dsd.probabilities); + wpc->streams [si]->dsd.probabilities = NULL; + } + + if (wpc->streams [si]->dsd.summed_probabilities) { + free (wpc->streams [si]->dsd.summed_probabilities); + wpc->streams [si]->dsd.summed_probabilities = NULL; + } + + if (wpc->streams [si]->dsd.value_lookup) { + int i; + + for (i = 0; i < wpc->streams [si]->dsd.history_bins; ++i) + if (wpc->streams [si]->dsd.value_lookup [i]) + free (wpc->streams [si]->dsd.value_lookup [i]); + + free (wpc->streams [si]->dsd.value_lookup); + wpc->streams [si]->dsd.value_lookup = NULL; + } + + if (wpc->streams [si]->dsd.ptable) { + free (wpc->streams [si]->dsd.ptable); + wpc->streams [si]->dsd.ptable = NULL; + } +#endif + + if (si) { + wpc->num_streams--; + free (wpc->streams [si]); + wpc->streams [si] = NULL; + } + } + + wpc->current_stream = 0; +} + +void WavpackFloatNormalize (int32_t *values, int32_t num_values, int delta_exp) +{ + f32 *fvalues = (f32 *) values; + int exp; + + if (!delta_exp) + return; + + while (num_values--) { + if ((exp = get_exponent (*fvalues)) == 0 || exp + delta_exp <= 0) + *fvalues = 0; + else if (exp == 255 || (exp += delta_exp) >= 255) { + set_exponent (*fvalues, 255); + set_mantissa (*fvalues, 0); + } + else + set_exponent (*fvalues, exp); + + fvalues++; + } +} + +void WavpackLittleEndianToNative (void *data, char *format) +{ + unsigned char *cp = (unsigned char *) data; + int64_t temp; + + while (*format) { + switch (*format) { + case 'D': + temp = cp [0] + ((int64_t) cp [1] << 8) + ((int64_t) cp [2] << 16) + ((int64_t) cp [3] << 24) + + ((int64_t) cp [4] << 32) + ((int64_t) cp [5] << 40) + ((int64_t) cp [6] << 48) + ((int64_t) cp [7] << 56); + * (int64_t *) cp = temp; + cp += 8; + break; + + case 'L': + temp = cp [0] + ((int32_t) cp [1] << 8) + ((int32_t) cp [2] << 16) + ((int32_t) cp [3] << 24); + * (int32_t *) cp = (int32_t) temp; + cp += 4; + break; + + case 'S': + temp = cp [0] + (cp [1] << 8); + * (int16_t *) cp = (int16_t) temp; + cp += 2; + break; + + default: + if (isdigit (*format)) + cp += *format - '0'; + + break; + } + + format++; + } +} + +void WavpackNativeToLittleEndian (void *data, char *format) +{ + unsigned char *cp = (unsigned char *) data; + int64_t temp; + + while (*format) { + switch (*format) { + case 'D': + temp = * (int64_t *) cp; + *cp++ = (unsigned char) temp; + *cp++ = (unsigned char) (temp >> 8); + *cp++ = (unsigned char) (temp >> 16); + *cp++ = (unsigned char) (temp >> 24); + *cp++ = (unsigned char) (temp >> 32); + *cp++ = (unsigned char) (temp >> 40); + *cp++ = (unsigned char) (temp >> 48); + *cp++ = (unsigned char) (temp >> 56); + break; + + case 'L': + temp = * (int32_t *) cp; + *cp++ = (unsigned char) temp; + *cp++ = (unsigned char) (temp >> 8); + *cp++ = (unsigned char) (temp >> 16); + *cp++ = (unsigned char) (temp >> 24); + break; + + case 'S': + temp = * (int16_t *) cp; + *cp++ = (unsigned char) temp; + *cp++ = (unsigned char) (temp >> 8); + break; + + default: + if (isdigit (*format)) + cp += *format - '0'; + + break; + } + + format++; + } +} + +void WavpackBigEndianToNative (void *data, char *format) +{ + unsigned char *cp = (unsigned char *) data; + int64_t temp; + + while (*format) { + switch (*format) { + case 'D': + temp = cp [7] + ((int64_t) cp [6] << 8) + ((int64_t) cp [5] << 16) + ((int64_t) cp [4] << 24) + + ((int64_t) cp [3] << 32) + ((int64_t) cp [2] << 40) + ((int64_t) cp [1] << 48) + ((int64_t) cp [0] << 56); + * (int64_t *) cp = temp; + cp += 8; + break; + + case 'L': + temp = cp [3] + ((int32_t) cp [2] << 8) + ((int32_t) cp [1] << 16) + ((int32_t) cp [0] << 24); + * (int32_t *) cp = (int32_t) temp; + cp += 4; + break; + + case 'S': + temp = cp [1] + (cp [0] << 8); + * (int16_t *) cp = (int16_t) temp; + cp += 2; + break; + + default: + if (isdigit (*format)) + cp += *format - '0'; + + break; + } + + format++; + } +} + +void WavpackNativeToBigEndian (void *data, char *format) +{ + unsigned char *cp = (unsigned char *) data; + int64_t temp; + + while (*format) { + switch (*format) { + case 'D': + temp = * (int64_t *) cp; + *cp++ = (unsigned char) (temp >> 56); + *cp++ = (unsigned char) (temp >> 48); + *cp++ = (unsigned char) (temp >> 40); + *cp++ = (unsigned char) (temp >> 32); + *cp++ = (unsigned char) (temp >> 24); + *cp++ = (unsigned char) (temp >> 16); + *cp++ = (unsigned char) (temp >> 8); + *cp++ = (unsigned char) temp; + break; + + case 'L': + temp = * (int32_t *) cp; + *cp++ = (unsigned char) (temp >> 24); + *cp++ = (unsigned char) (temp >> 16); + *cp++ = (unsigned char) (temp >> 8); + *cp++ = (unsigned char) temp; + break; + + case 'S': + temp = * (int16_t *) cp; + *cp++ = (unsigned char) (temp >> 8); + *cp++ = (unsigned char) temp; + break; + + default: + if (isdigit (*format)) + cp += *format - '0'; + + break; + } + + format++; + } +} + +uint32_t WavpackGetLibraryVersion (void) +{ + return (LIBWAVPACK_MAJOR<<16) + |(LIBWAVPACK_MINOR<<8) + |(LIBWAVPACK_MICRO<<0); +} + +const char *WavpackGetLibraryVersionString (void) +{ + return LIBWAVPACK_VERSION_STRING; +} + diff --git a/third_party/wavpack/src/decorr_tables.h b/third_party/wavpack/src/decorr_tables.h new file mode 100644 index 0000000..be17de9 --- /dev/null +++ b/third_party/wavpack/src/decorr_tables.h @@ -0,0 +1,1077 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// decorr_tables.h + +// These four tables specify the characteristics of the decorrelation filters +// for the four basic compression modes (fast, normal, high, and very high). +// +// The first entry in the table represents the "default" filter for the +// corresponding mode; subsequent entries represent filters that are tried +// in the "extra" modes 1-3 ("extra" modes 4-6 create filters from scratch). +// +// The first value indicates whether the filter is applied to joint stereo +// data (0=L/R, 1=M/S) and the second value represents the "delta" value of +// the adaptive filter. The rest of the values (2-16, depending on mode) are +// the "terms" of the filter. +// +// Each term represents one layer of the sequential filter, where positive +// values indicate the relative sample involved from the same channel (1=prev), +// 17 & 18 are special functions using the previous 2 samples, and negative +// values indicate cross channel decorrelation (in stereo only). +// +// It would be ideal if this was the only source for the decorrelation tables, +// but unfortunately the defaults (first entry) are duplicated in the assembly +// code for the function pack_decorr_mono_buffer() and there is no check in +// that code to make sure the correct filter is being passed in. SO, IF A +// CHANGE IS MADE HERE TO ONE OF THE DEFAULT FILTERS, THEN THE CORRESPONDING +// ASSEMBLY CODE MUST BE CHANGED ALSO, OR VERY CORRUPT FILES WILL RESULT!! +// +// Since this include file contains that actual tables as static const data, +// it should only be included from ONE source file (currently pack.c)! + +static const WavpackDecorrSpec fast_specs [] = { + { 1, 2,18,17 }, // 0 + { 1, 1,17,17 }, // 1 + { 0, 2,18,17 }, // 2 + { 0, 1,17,17 }, // 3 + { 1, 3, 1,18 }, // 4 + { 1, 1,17, 1 }, // 5 + { 0, 1, 1,17 }, // 6 + { 0, 1,-2,17 }, // 7 + { 0, 2,-1,17 }, // 8 + { 1, 1,17, 2 }, // 9 + { 0, 3,18,18 }, // 10 + { 0, 1,17, 1 }, // 11 + { 1, 6, 1, 2 }, // 12 + { 1, 1,17, 3 }, // 13 + { 0, 1,-2, 3 }, // 14 + { 0, 1, 2,17 }, // 15 + { 0, 1,18,-2 }, // 16 + { 0, 1,-1,17 }, // 17 + { 0, 1,18,17 }, // 18 + { 0, 1,17, 2 }, // 19 + { 1, 2,18,-2 }, // 20 + { 1, 1, 1,17 }, // 21 + { 0, 3,18, 2 }, // 22 + { 0, 1,17,-2 }, // 23 + { 0, 1,18,-2 }, // 24 + { 1, 2,17,-3 }, // 25 + { 0, 1,18, 3 }, // 26 + { 0, 1,18,18 }, // 27 + { 1, 1, 1, 3 }, // 28 + { 1, 1,18, 3 }, // 29 + { 1, 1, 1, 3 }, // 30 + { 0, 2,18,17 }, // 31 + { 1, 1, 1,17 }, // 32 + { 1, 1,17, 3 }, // 33 + { 0, 3,18,17 }, // 34 + { 0, 1,18,18 }, // 35 + { 1, 1, 1, 3 }, // 36 + { 1, 1, 1,18 }, // 37 + { 0, 1,18,-2 }, // 38 + { 0, 2,18,17 }, // 39 + { 0, 1,-1,18 }, // 40 + { 1, 1,17, 3 }, // 41 + { 0, 1,17, 2 }, // 42 + { 0, 1,17, 3 }, // 43 + { 1, 1,18, 2 }, // 44 + { 1, 1,17,-2 }, // 45 + { 0, 1, 1,-2 }, // 46 + { 0, 2,18,17 }, // 47 + { 0, 1,17,-2 }, // 48 + { 1, 1,17,-2 }, // 49 + { 0, 1,18, 3 }, // 50 + { 0, 1, 2,17 }, // 51 + { 1, 2,18,-3 }, // 52 + { 1, 2, 1,18 }, // 53 + { 1, 2,18, 2 }, // 54 + { 0, 1,17,-1 }, // 55 + { 0, 1,17,-2 }, // 56 + { 1, 1,17,-2 }, // 57 + { 1, 1, 1, 3 }, // 58 + { 0, 1, 1,17 }, // 59 + { 1, 2,18,-2 }, // 60 + { 1, 2,17,-3 }, // 61 + { 0, 2,18,17 }, // 62 + { 0, 2,18,17 }, // 63 + { 1, 1,17, 2 }, // 64 + { 1, 2,18,18 }, // 65 + { 0, 1,17, 2 }, // 66 + { 0, 1,18,17 }, // 67 + { 1, 1, 1,17 }, // 68 + { 1, 1,17, 2 }, // 69 + { 0, 2,18,18 }, // 70 + { 0, 2,18,17 }, // 71 + { 1, 2,17,-3 }, // 72 + { 1, 6, 1, 2 }, // 73 + { 0, 3,17,17 }, // 74 + { 0, 1, 1,18 }, // 75 + { 0, 1, 1,-2 }, // 76 + { 1, 1,17, 2 }, // 77 + { 0, 2,18,17 }, // 78 + { 0, 2,18,17 }, // 79 + { 1, 1,18, 3 }, // 80 + { 1, 2,17,-3 }, // 81 + { 0, 1,17, 2 }, // 82 + { 0, 1,17, 3 }, // 83 + { 0, 1,18,-2 }, // 84 + { 1, 1,18,18 }, // 85 + { 1, 6, 1, 2 }, // 86 + { 0, 2,18,17 }, // 87 + { 0, 2,18,17 }, // 88 + { 0, 1,-1,17 }, // 89 + { 1, 1,18, 3 }, // 90 + { 0, 1,17,18 }, // 91 + { 1, 1,17, 3 }, // 92 + { 0, 1,18, 3 }, // 93 + { 0, 2,18,17 }, // 94 + { 0, 2,18,17 }, // 95 + { 1, 2,18, 2 }, // 96 + { 0, 1,-2, 3 }, // 97 + { 0, 4,18,-1 }, // 98 + { 0, 2,18,18 }, // 99 + { 0, 1,-2, 3 }, // 100 + { 1, 1,17,-2 }, // 101 + { 0, 1,17, 3 }, // 102 + { 0, 2,18,17 }, // 103 + { 0, 2,-1,18 }, // 104 + { 1, 1, 2,17 }, // 105 + { 0, 2,17,-2 }, // 106 + { 0, 1,17, 2 }, // 107 + { 1, 2,18,-3 }, // 108 + { 0, 1,17,-2 }, // 109 + { 0, 2,18,17 }, // 110 + { 0, 2,18,17 }, // 111 + { 1, 1,17,-2 }, // 112 + { 1, 2,17,-3 }, // 113 + { 1, 1, 1, 3 }, // 114 + { 1, 1, 2,17 }, // 115 + { 1, 2,18, 2 }, // 116 + { 1, 1, 2,17 }, // 117 + { 1, 1,18, 2 }, // 118 + { 0, 2,18,17 }, // 119 + { 0, 2,18,17 }, // 120 + { 0, 1,17,-2 }, // 121 + { 0, 2,18,17 }, // 122 + { 0, 2,17,-1 }, // 123 + { 0, 2,18,-2 }, // 124 + { 0, 2,18,17 }, // 125 + { 0, 2,18,17 }, // 126 + { 0, 2,18,17 }, // 127 + { 1, 1, 1, 3 }, // 128 + { 0, 2,-2,17 }, // 129 + { 0, 2,18,-2 }, // 130 + { 0, 2,17,-2 }, // 131 + { 1, 1, 2,17 }, // 132 + { 1, 1, 1, 3 }, // 133 + { 0, 1, 2,17 }, // 134 + { 0, 2,18,17 }, // 135 + { 0, 3,-1,17 }, // 136 + { 1, 1, 2,17 }, // 137 + { 0, 2,18,18 }, // 138 + { 0, 1,17, 2 }, // 139 + { 1, 4,18,-3 }, // 140 + { 1, 1,18, 1 }, // 141 + { 0, 2,18,17 }, // 142 + { 0, 2,18,17 }, // 143 + { 1, 2,18,-1 }, // 144 + { 0, 1,-1,18 }, // 145 + { 1, 6, 1, 2 }, // 146 + { 1, 1,17, 2 }, // 147 + { 1, 4,18, 3 }, // 148 + { 0, 1, 1,17 }, // 149 + { 0, 1,18, 2 }, // 150 + { 0, 2,18,17 }, // 151 + { 0, 2,18,17 }, // 152 + { 1, 2,17, 2 }, // 153 + { 0, 2,18,-2 }, // 154 + { 0, 1, 1,18 }, // 155 + { 1, 2,18,-3 }, // 156 + { 0, 2,18,17 }, // 157 + { 0, 2,18,17 }, // 158 + { 0, 2,18,17 }, // 159 + { 1, 2,18,18 }, // 160 + { 1, 3,17,17 }, // 161 + { 0, 1,-2,17 }, // 162 + { 0, 1,17,18 }, // 163 + { 0, 1,-1, 3 }, // 164 + { 1, 1, 2,17 }, // 165 + { 0, 2,18,-1 }, // 166 + { 0, 2,18,17 }, // 167 + { 0, 2,18,17 }, // 168 + { 1, 1,17,-2 }, // 169 + { 1, 2,17, 2 }, // 170 + { 1, 1,18, 3 }, // 171 + { 0, 1,18, 2 }, // 172 + { 1, 2,17,-3 }, // 173 + { 0, 2,18,17 }, // 174 + { 0, 2,18,17 }, // 175 + { 0, 1,-2,17 }, // 176 + { 0, 1,17,-1 }, // 177 + { 0, 1,18,-1 }, // 178 + { 0, 2,18,17 }, // 179 + { 1, 2,17,-3 }, // 180 + { 1, 1, 1,18 }, // 181 + { 1, 3,18, 2 }, // 182 + { 0, 2,18,17 }, // 183 + { 0, 2,18,17 }, // 184 + { 0, 2,18,17 }, // 185 + { 0, 2,18,17 }, // 186 + { 0, 3,18,18 }, // 187 + { 0, 1, 1,-2 }, // 188 + { 0, 2,18,17 }, // 189 + { 0, 2,18,17 }, // 190 + { 0, 2,18,17 }, // 191 + { 1, 2,17,-3 }, // 192 + { 1, 1,18,18 }, // 193 + { 0, 2,18, 2 }, // 194 + { 0, 1,17,18 }, // 195 + { 1, 2,18, 2 }, // 196 + { 1, 1,17,-2 }, // 197 + { 0, 2,17,-1 }, // 198 + { 0, 2,18,17 }, // 199 + { 0, 2,18,17 }, // 200 + { 0, 2,18,17 }, // 201 + { 0, 1, 1,-2 }, // 202 + { 0, 1,18, 1 }, // 203 + { 1, 2,18,-2 }, // 204 + { 0, 1,17, 2 }, // 205 + { 0, 2,18,17 }, // 206 + { 0, 2,18,17 }, // 207 + { 1, 1,17, 3 }, // 208 + { 0, 1,17,-1 }, // 209 + { 0, 1,18, 2 }, // 210 + { 1, 1,17, 3 }, // 211 + { 1, 1,17,-2 }, // 212 + { 0, 1,18,18 }, // 213 + { 0, 2,18,17 }, // 214 + { 0, 2,18,17 }, // 215 + { 0, 2,18,17 }, // 216 + { 0, 2,18,17 }, // 217 + { 0, 2,18,17 }, // 218 + { 1, 1,17,18 }, // 219 + { 0, 1,-2, 3 }, // 220 + { 0, 2,18,17 }, // 221 + { 0, 2,18,17 }, // 222 + { 0, 2,18,17 }, // 223 + { 1, 2,18,-3 }, // 224 + { 0, 2,18,17 }, // 225 + { 0, 3,18, 2 }, // 226 + { 0, 1, 1,18 }, // 227 + { 0, 2,18,17 }, // 228 + { 0, 1,17,-1 }, // 229 + { 0, 2,18,17 }, // 230 + { 0, 2,18,17 }, // 231 + { 0, 2,18,17 }, // 232 + { 0, 1,-2, 3 }, // 233 + { 0, 3,17,17 }, // 234 + { 0, 2,18,17 }, // 235 + { 0, 2,18,17 }, // 236 + { 1, 1,17, 2 }, // 237 + { 0, 2,18,17 }, // 238 + { 0, 2,18,17 }, // 239 + { 1, 1,17, 2 }, // 240 + { 0, 2,18,17 }, // 241 + { 0, 2,18,17 }, // 242 + { 0, 2,18,17 }, // 243 + { 0, 2,18, 2 }, // 244 + { 0, 2,18,17 }, // 245 + { 0, 2,18,17 }, // 246 + { 0, 2,18,17 }, // 247 + { 0, 2,18,17 }, // 248 + { 0, 2,18,17 }, // 249 + { 0, 2,18,17 }, // 250 + { 0, 2,18,17 }, // 251 + { 0, 2,18,17 }, // 252 + { 0, 2,18,17 }, // 253 + { 0, 2,18,17 }, // 254 + { 0, 2,18,17 }, // 255 +}; + +static const WavpackDecorrSpec default_specs [] = { + { 1, 2,18,18, 2,17, 3 }, // 0 + { 0, 2,18,17,-1, 3, 2 }, // 1 + { 1, 1,17,18,18,-2, 2 }, // 2 + { 0, 2,18,17, 3,-2,17 }, // 3 + { 1, 2,18,17, 2,17, 3 }, // 4 + { 0, 1,18,18,-1, 2,17 }, // 5 + { 0, 1,17,17,-2, 2, 3 }, // 6 + { 0, 1,18,-2,18, 2,17 }, // 7 + { 1, 2,18,18,-1, 2, 3 }, // 8 + { 0, 2,18,17, 3, 2, 5 }, // 9 + { 1, 1,18,17,18, 2, 5 }, // 10 + { 0, 1,17,17,-2, 2, 3 }, // 11 + { 0, 1,18,-2,18, 2, 5 }, // 12 + { 0, 1,17,-2,17, 2,-3 }, // 13 + { 1, 1,17,-2,17, 1, 2 }, // 14 + { 0, 1,17,17,-2, 2, 3 }, // 15 + { 1, 1,18, 3, 1, 5, 4 }, // 16 + { 1, 4,18,18, 2, 3,-2 }, // 17 + { 0, 1, 1,-1,-1, 2,17 }, // 18 + { 0, 2,18,17, 3, 2, 5 }, // 19 + { 0, 1,18,18,18, 2,17 }, // 20 + { 0, 1,18,17,-1, 2,18 }, // 21 + { 1, 1,17, 3, 2, 1, 7 }, // 22 + { 0, 2,18,-2,18, 2, 3 }, // 23 + { 1, 3,18,-3,18, 2, 3 }, // 24 + { 0, 3,18,17, 2, 3,17 }, // 25 + { 1, 1,17,17, 2, 1, 4 }, // 26 + { 0, 1,17,18,-2, 2,17 }, // 27 + { 1, 1,18,18, 3, 5, 2 }, // 28 + { 0, 1,17,17, 2,18, 4 }, // 29 + { 0, 1,18,17, 1, 4, 6 }, // 30 + { 1, 1, 3,17,18, 2,17 }, // 31 + { 1, 1,17, 3, 2, 1, 7 }, // 32 + { 0, 1,18,17,-1, 2, 3 }, // 33 + { 1, 1,17,17, 2, 1, 4 }, // 34 + { 1, 2,18,17,-1,17, 3 }, // 35 + { 1, 2,18,17, 2, 3,-1 }, // 36 + { 0, 2,18,18,-2, 2,17 }, // 37 + { 0, 1,17,17, 2,18, 4 }, // 38 + { 0, 5,-2,18,18,18, 2 }, // 39 + { 1, 1,18,18,-1, 6, 3 }, // 40 + { 0, 1,17,17,-2, 2, 3 }, // 41 + { 1, 1,18,17,18, 2,17 }, // 42 + { 0, 1,18,17, 4, 3, 1 }, // 43 + { 0, 1,-2,18, 2, 2,18 }, // 44 + { 1, 2,18,18,-2, 2,-1 }, // 45 + { 1, 1,17,17, 2, 1, 4 }, // 46 + { 0, 1,17,18,-2, 2,17 }, // 47 + { 1, 1,17, 3, 2, 1, 7 }, // 48 + { 1, 3,18,-3,18, 2, 3 }, // 49 + { 1, 2,18,18,-2, 2,-1 }, // 50 + { 1, 1,18,18, 3, 5, 2 }, // 51 + { 0, 2,18,18,-1, 2,17 }, // 52 + { 0, 1,18,-1,17,18, 2 }, // 53 + { 0, 1,17,-1, 2, 3, 6 }, // 54 + { 0, 1,18,-2,18, 2, 5 }, // 55 + { 1, 2,18,18,-2, 2,-1 }, // 56 + { 0, 3,18,18, 2, 3,17 }, // 57 + { 0, 1,17,17, 2,18, 4 }, // 58 + { 1, 1,17,-2,17, 1, 2 }, // 59 + { 0, 1,-1, 3, 5, 4, 7 }, // 60 + { 0, 3,18,18, 3, 2, 5 }, // 61 + { 0, 1,17,17, 2,18, 4 }, // 62 + { 0, 1,18,17,-2,18, 3 }, // 63 + { 0, 2,18,18,-2, 2,17 }, // 64 + { 0, 3,18,17,-2, 2, 3 }, // 65 + { 1, 1,18,18,-2, 2,17 }, // 66 + { 0, 1,18,17, 4, 3, 1 }, // 67 + { 1, 2, 3,18,17, 2,17 }, // 68 + { 1, 2,18,18, 2,-2,18 }, // 69 + { 1, 2,18,18,-1,18, 2 }, // 70 + { 0, 2,18,18,-2, 2,17 }, // 71 + { 1, 3,18,18, 2, 3,-2 }, // 72 + { 0, 3,18,18, 3, 2, 5 }, // 73 + { 0, 1,18,-2,18, 2, 5 }, // 74 + { 1, 1,17, 3, 2, 1, 7 }, // 75 + { 1, 3,18,18,-2, 2,18 }, // 76 + { 1, 1,17,18,18,-2, 2 }, // 77 + { 0, 1,18,-2,18, 2, 5 }, // 78 + { 0, 2,18,-2,18, 2, 3 }, // 79 + { 0, 1,-1, 3, 4, 5, 7 }, // 80 + { 1, 1,17,17, 2,-1, 7 }, // 81 + { 0, 1,18,-1,-1, 2,-2 }, // 82 + { 0, 2,18,17, 2, 3,17 }, // 83 + { 0, 1,18,17, 2,18, 2 }, // 84 + { 0, 2,18,17,-1, 2,17 }, // 85 + { 0, 1, 1,18, 3, 2, 5 }, // 86 + { 0, 2,18,-2, 4,18, 2 }, // 87 + { 1, 1,18, 3, 1, 5, 4 }, // 88 + { 0, 1,18,17,18, 2, 5 }, // 89 + { 1, 1,18, 3, 1, 5, 4 }, // 90 + { 0, 4,18,18,-2, 2,18 }, // 91 + { 1, 1,18,18, 3, 2, 5 }, // 92 + { 1, 1,17,17, 2, 1, 4 }, // 93 + { 0, 2,18,18,-2,18, 2 }, // 94 + { 0, 2,18,18,-2,18, 2 }, // 95 + { 1, 1,18,18, 2, 1, 3 }, // 96 + { 1, 1,17,17, 2, 1, 4 }, // 97 + { 1, 2,17,17, 2,18, 3 }, // 98 + { 0, 1,18,17, 1, 4, 6 }, // 99 + { 1, 2,18,18,-2, 2,-1 }, // 100 + { 0, 1,18,-2,18, 2, 5 }, // 101 + { 1, 1,17, 2,18, 2,17 }, // 102 + { 0, 2,18,18,-2,18, 2 }, // 103 + { 0, 1,18,18, 3, 6,-1 }, // 104 + { 0, 1,18,17, 2,18, 3 }, // 105 + { 0, 1,18,17,-2, 2,17 }, // 106 + { 1, 1, 3,17,18, 2,17 }, // 107 + { 1, 3,18,-3,18, 2, 3 }, // 108 + { 1, 3,18,18,-3,18, 2 }, // 109 + { 1, 1,18, 3, 1, 5, 4 }, // 110 + { 0, 1,17,-2,17, 2,-3 }, // 111 + { 1, 1,18,18, 3, 5, 2 }, // 112 + { 1, 2,18,18,-2, 2,-1 }, // 113 + { 0, 1,18,-1,-1, 2,-2 }, // 114 + { 1, 1,18, 3, 1, 5, 4 }, // 115 + { 0, 3,18,17,-1, 2,17 }, // 116 + { 1, 3,18,17, 2,18,-2 }, // 117 + { 0, 2,18,18,-2,18, 2 }, // 118 + { 1, 2,18,18,-2, 2,-1 }, // 119 + { 1, 1,18, 3, 1, 5, 4 }, // 120 + { 0, 4, 3,18,18, 2,17 }, // 121 + { 0, 2,18,18,-2,18, 2 }, // 122 + { 1, 1,18,17,-1,18, 2 }, // 123 + { 0, 2,18,18,-2,18, 2 }, // 124 + { 0, 2,18,18,-2,18, 2 }, // 125 + { 0, 2,18,18,-2,18, 2 }, // 126 + { 0, 2,18,18,-2,18, 2 }, // 127 + { 1, 1,18,18,18, 3, 2 }, // 128 + { 0, 1,17,-1, 2, 3, 6 }, // 129 + { 0, 1,17,-1, 2, 3, 6 }, // 130 + { 0, 2,18,17,-2, 3, 2 }, // 131 + { 1, 3,18,17, 2,-2,18 }, // 132 + { 0, 2,18,18, 2,17, 3 }, // 133 + { 0, 1,18,18, 2,18,-2 }, // 134 + { 0, 2,18,-2, 4,18, 2 }, // 135 + { 0, 1,-2,18, 2, 2,18 }, // 136 + { 0, 2,18,17, 3, 6, 2 }, // 137 + { 0, 1,18,17,18, 2, 5 }, // 138 + { 0, 3,18,18,-2, 3, 2 }, // 139 + { 1, 1,18,18, 2,18, 5 }, // 140 + { 0, 1,17,-1, 2, 3, 6 }, // 141 + { 1, 4,18,18, 2, 3,-2 }, // 142 + { 0, 2,18,17,18, 2,-2 }, // 143 + { 0, 1, 1,18, 3, 2, 5 }, // 144 + { 1, 4,18,-2,18, 2, 3 }, // 145 + { 1, 2,18, 2,18, 3,-2 }, // 146 + { 0, 2,18,18,18, 2, 4 }, // 147 + { 0, 2, 3,17,18, 2,17 }, // 148 + { 1, 1,18,-1,18, 2,17 }, // 149 + { 1, 2,17,17, 2,18, 3 }, // 150 + { 0, 2,18,17,-2, 3, 2 }, // 151 + { 0, 1, 1,-1,-1, 2,17 }, // 152 + { 0, 3, 3,18,18, 2,17 }, // 153 + { 0, 1,18,-1,17,18, 2 }, // 154 + { 0, 1,18,17, 2,18, 3 }, // 155 + { 0, 2,18,18,-2,18, 2 }, // 156 + { 0, 1,18,17, 2,18, 2 }, // 157 + { 0, 2,18,18,-2,18, 2 }, // 158 + { 0, 2,18,18,-2,18, 2 }, // 159 + { 1, 2,17,17, 2,18, 3 }, // 160 + { 0, 1,18,17,-2, 2, 3 }, // 161 + { 0, 1,18,-2,18, 2, 5 }, // 162 + { 1, 4,18,-2,18, 2, 3 }, // 163 + { 1, 3,18,17, 2, 3, 6 }, // 164 + { 0, 2,18,18, 2,17, 3 }, // 165 + { 0, 2,18,17, 2,18, 2 }, // 166 + { 0, 2,18,18,-2,18, 2 }, // 167 + { 1, 1,18,18, 3, 5, 2 }, // 168 + { 0, 2,18,18,-2, 2, 3 }, // 169 + { 1, 2,18,17, 2,17, 3 }, // 170 + { 0, 1,18,17, 2, 3,18 }, // 171 + { 0, 2,18,18,-2,18, 2 }, // 172 + { 1, 4,18,18, 2, 3,-2 }, // 173 + { 0, 1,17,-2,17, 2,-3 }, // 174 + { 0, 1,17,17, 2,18, 4 }, // 175 + { 1, 1,18,18,18, 2, 4 }, // 176 + { 1, 2,18, 2,18, 3,-2 }, // 177 + { 1, 1,18,18,-2, 2,17 }, // 178 + { 0, 2,18,18,-2,18, 2 }, // 179 + { 0, 2,18,18, 2,17, 3 }, // 180 + { 0, 2,18,18,18, 2, 4 }, // 181 + { 0, 2,18,18,-2,18, 2 }, // 182 + { 0, 2,18,17,-2, 3, 2 }, // 183 + { 0, 1, 1,-1,-1, 2,17 }, // 184 + { 1, 4,18,18, 2, 3,-2 }, // 185 + { 0, 2,18,18,-2,18, 2 }, // 186 + { 0, 1,18,-2,18, 3, 2 }, // 187 + { 0, 2,18,18,-2,18, 2 }, // 188 + { 0, 2,18,18,-2,18, 2 }, // 189 + { 0, 2,18,18,-2,18, 2 }, // 190 + { 0, 2,18,18,-2,18, 2 }, // 191 + { 0, 1,18,18,-2, 2,17 }, // 192 + { 0, 3,18,17, 2, 3,17 }, // 193 + { 1, 2,18,18, 2,-2,18 }, // 194 + { 0, 1,-1, 3, 5, 4, 7 }, // 195 + { 1, 1,18, 3, 1, 5, 4 }, // 196 + { 1, 1,18,18,-2,18, 3 }, // 197 + { 0, 2,18,17,18, 2,-2 }, // 198 + { 0, 2,18,18, 2,17, 3 }, // 199 + { 1, 2,18, 2,18, 3,-2 }, // 200 + { 1, 4,18,18, 2, 3,-2 }, // 201 + { 1, 3,18,17, 2, 3, 6 }, // 202 + { 0, 2,18,18,-2,18, 2 }, // 203 + { 1, 2,18,17,-2,-1,17 }, // 204 + { 0, 1,17,-1, 2, 3, 6 }, // 205 + { 0, 2,18,18,-2,18, 2 }, // 206 + { 0, 2,18,18,-2, 2, 3 }, // 207 + { 1, 1,18,18,18, 2, 5 }, // 208 + { 0, 1,17,17,-2, 2, 3 }, // 209 + { 0, 2,18,18,-2,18, 2 }, // 210 + { 0, 2,18,17, 3, 6, 2 }, // 211 + { 0, 2,18,17,18, 2, 3 }, // 212 + { 0, 3,18,17,-3,18, 2 }, // 213 + { 0, 1,18,18,18, 2, 3 }, // 214 + { 0, 1,18,-2,-3, 2, 6 }, // 215 + { 0, 2,18,18,-2,18, 2 }, // 216 + { 1, 1,18,17,18, 2, 5 }, // 217 + { 0, 2,18,18,-2,18, 2 }, // 218 + { 0, 2,18,18,-2,18, 2 }, // 219 + { 1, 1,18,17,18, 2, 5 }, // 220 + { 0, 2,18,18,-2,18, 2 }, // 221 + { 0, 2,18,18,-2,18, 2 }, // 222 + { 0, 2,18,18,-2,18, 2 }, // 223 + { 0, 1,18,18,18, 2, 3 }, // 224 + { 1, 1,17,-2,17, 1, 2 }, // 225 + { 1, 1,17,17, 2,-1, 7 }, // 226 + { 0, 1,18,17, 4, 3, 1 }, // 227 + { 1, 3,18,-3,18, 2, 3 }, // 228 + { 0, 1, 1,18, 3, 2, 5 }, // 229 + { 0, 2,18,18,-2,18, 2 }, // 230 + { 0, 2,18,18,-2,18, 2 }, // 231 + { 0, 1,18,18, 3, 6, 2 }, // 232 + { 0, 1,17,17, 2,18, 4 }, // 233 + { 0, 1,17,17, 2,18, 4 }, // 234 + { 0, 2,18,18,-2,18, 2 }, // 235 + { 0, 2,18,18,-2,18, 2 }, // 236 + { 0, 2,18,18,-2,18, 2 }, // 237 + { 1, 2,18,-2,18, 3, 2 }, // 238 + { 1, 1,17,-2,17, 1, 2 }, // 239 + { 1, 1,18,18, 3, 2, 5 }, // 240 + { 0, 1,18,18,-1, 2, 3 }, // 241 + { 0, 2,18,18,-2,18, 2 }, // 242 + { 0, 2,18,18,-2,18, 2 }, // 243 + { 0, 1,18,17,18, 2, 5 }, // 244 + { 0, 2,18,18,-2,18, 2 }, // 245 + { 0, 2,18,18,-2,18, 2 }, // 246 + { 0, 2,18,18,-2,18, 2 }, // 247 + { 0, 2,18,18,-2,18, 2 }, // 248 + { 0, 1, 3,18,18, 2,17 }, // 249 + { 0, 2,18,18,-2,18, 2 }, // 250 + { 0, 2,18,18,-2,18, 2 }, // 251 + { 0, 2,18,18,-2,18, 2 }, // 252 + { 0, 2,18,18,-2,18, 2 }, // 253 + { 0, 2,18,18,-2,18, 2 }, // 254 + { 0, 2,18,18,-2,18, 2 }, // 255 +}; + +static const WavpackDecorrSpec high_specs [] = { + { 1, 2,18,18,18,-2, 2, 3, 5,-1,17, 4 }, // 0 + { 0, 1,18,17,-2, 2,18, 3, 7, 2, 5, 4 }, // 1 + { 1, 2, 1,18, 3, 6,-2,18, 2, 3, 4, 5 }, // 2 + { 0, 2,18,18,-2, 2,18, 3, 6, 2,17, 4 }, // 3 + { 1, 2,18,18, 2,18, 3, 2,-1, 4,18, 5 }, // 4 + { 1, 1, 7, 6, 5, 3, 4, 2, 5, 4, 3, 7 }, // 5 + { 1, 1,17, 3,18, 7, 2, 6, 1, 4, 3, 5 }, // 6 + { 1, 1,-2,18,18,18, 3,-2, 6, 5, 2, 1 }, // 7 + { 1, 2,18,18,-1,18, 2, 3, 6,-2,17, 5 }, // 8 + { 0, 1,17,17,18, 3, 6, 4, 5, 2,18,-2 }, // 9 + { 1, 2, 1,18,-2, 3, 5, 2, 4,-1, 6, 1 }, // 10 + { 0, 2,18,18, 3, 6,18, 2, 4, 8, 5, 3 }, // 11 + { 0, 1,-2, 1,18, 2,-2, 7,18, 2,-1, 5 }, // 12 + { 1, 1, 4, 3, 8, 1, 5, 2, 5, 6, 2, 8 }, // 13 + { 1, 1,17,18, 2, 6, 3, 4,-1, 1, 8, 6 }, // 14 + { 0, 1,18,18, 3, 6, 3,-2, 2, 5,-1, 1 }, // 15 + { 0, 1,18,18,17,-1, 2,-2,18, 3, 4, 5 }, // 16 + { 1, 2,18,17, 2,-2,18, 3, 5, 7, 2, 4 }, // 17 + { 1, 2,18,18, 3, 6,-2,18, 2, 5, 8, 3 }, // 18 + { 0, 1,18,17, 2,18,18, 2, 6, 5,17, 7 }, // 19 + { 1, 2,18,17, 2,18, 3, 2, 6,18,-1, 4 }, // 20 + { 1, 1, 5, 3, 6, 5, 3, 4, 1, 2, 4, 7 }, // 21 + { 1, 1, 5, 3, 6, 5, 3, 4, 1, 2, 4, 7 }, // 22 + { 0, 1,-2,18,18,18,-2, 3, 2, 4, 6, 5 }, // 23 + { 1, 2,18,17,-3, 3,-1,18, 2, 3, 6, 5 }, // 24 + { 0, 1,17,18, 7, 3,-2, 7, 1, 2, 4, 5 }, // 25 + { 1, 1, 2,18,18,-2, 2, 4,-1,18, 3, 6 }, // 26 + { 0, 3, 1,18, 4, 3, 5, 2, 4,18, 2, 3 }, // 27 + { 0, 1,-2,18, 2,18, 3, 7,18, 2, 6,-2 }, // 28 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 29 + { 1, 1,18,18, 5, 4, 6, 4, 5, 1, 4, 3 }, // 30 + { 1, 1,18, 3, 6, 5, 7, 8, 2, 3, 1,-1 }, // 31 + { 1, 1,18,18,18, 2,-2, 3, 5,18, 2, 8 }, // 32 + { 0, 2,18,17,-2, 2, 3,18,-3, 5, 2, 7 }, // 33 + { 1, 1, 1, 1,-1, 8,17, 3,-2, 2, 6,17 }, // 34 + { 0, 2,18,18,17, 2,-2, 3, 2, 4,18, 5 }, // 35 + { 1, 1,17,18, 2,-1, 5, 7,18, 3, 4, 6 }, // 36 + { 1, 1, 5, 4, 5,17, 3, 6, 3, 4, 7, 2 }, // 37 + { 0, 1,17, 3, 1, 7, 4, 2, 5,-2,18, 6 }, // 38 + { 0, 1,17,18, 2,18, 4, 3, 5, 7,-3, 6 }, // 39 + { 1, 2,17,17,-3,-2, 2, 8,18,-1, 3, 5 }, // 40 + { 0, 1,17,17,18, 2, 3, 6,-2, 8, 1, 7 }, // 41 + { 1, 1, 1, 2, 6,-2,18, 2, 5,-3, 7,-2 }, // 42 + { 0, 1,18,18, 3,18, 6, 8,-2, 2, 3, 5 }, // 43 + { 0, 1,18,17, 2,18,-2, 3, 7, 6, 2, 4 }, // 44 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 45 + { 1, 1,18,18, 2,-1, 3, 6, 1, 3, 4, 8 }, // 46 + { 0, 1,18,18, 3, 6, 5, 3,-2, 2,18,-1 }, // 47 + { 0, 1,18,17,-3,18, 2, 4,-2, 3, 6,17 }, // 48 + { 1, 3, 1, 2,17, 3,18, 7,-1, 5, 2, 4 }, // 49 + { 1, 1,18, 3,18, 6, 8,18,-2, 5, 7, 2 }, // 50 + { 0, 1,17, 2,18, 6, 3, 2, 5, 4, 8, 1 }, // 51 + { 0, 1,18,17,-1, 2, 3,18,18, 2, 3,17 }, // 52 + { 1, 1,18, 7, 6, 5, 5, 3, 1, 4, 2, 4 }, // 53 + { 1, 1, 6,17, 3, 8, 1, 5, 7,-1, 2, 1 }, // 54 + { 1, 1,18,-2,18, 3,-2, 2, 7, 4, 6,18 }, // 55 + { 1, 3,18,-3,18, 2, 3,18,-1, 7, 2, 5 }, // 56 + { 0, 2,18,-2, 7, 1, 3, 2, 4, 6,-3, 7 }, // 57 + { 1, 1,18,-2, 2,-3,18,-2,17,-1, 4, 2 }, // 58 + { 0, 3,17,17, 2, 5, 3, 7,18, 6, 4, 2 }, // 59 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 60 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 61 + { 1, 1,18,17, 4, 6, 6, 4, 5, 3, 4, 1 }, // 62 + { 0, 1,18, 5, 3, 6, 2, 3, 8, 1, 3, 7 }, // 63 + { 1, 2,18,17,-2, 2,18, 3, 5, 7,-1, 2 }, // 64 + { 0, 1, 1,18,18, 3, 6,-1, 4, 8, 5, 2 }, // 65 + { 1, 1, 1, 5, 3, 4, 1, 1, 3, 5, 7, 3 }, // 66 + { 0, 1, 3,18,18, 2,18,18,-1, 2, 3,18 }, // 67 + { 1, 2,18,18,-1,18, 2, 3, 4, 6,18, 5 }, // 68 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 69 + { 1, 1,18, 3, 1, 4, 5, 2, 7, 1, 3, 6 }, // 70 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 71 + { 1, 2,18,18,-1,18, 2, 3, 5,-2, 6, 8 }, // 72 + { 1, 1,17,18, 4, 8, 3, 2, 5, 2, 7, 6 }, // 73 + { 1, 4, 1, 2, 5,18,-2, 2, 3, 7,-1, 4 }, // 74 + { 0, 2,18,17,-1, 3, 6,18, 2, 3, 7, 5 }, // 75 + { 0, 1,-2,18, 2,-3, 6,18, 4, 3,-2, 5 }, // 76 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 77 + { 0, 1,17,17, 6, 2, 4, 8, 3, 5,-1,17 }, // 78 + { 1, 1,18, 3,18, 6, 8,18,-2, 5, 7, 2 }, // 79 + { 1, 2,17,17,-3, 2,18,-2, 8, 3, 6,-1 }, // 80 + { 1, 1,18,-2,17,18, 2, 3,-2, 6, 5, 4 }, // 81 + { 1, 2,18,17,-1, 3,18, 2, 5, 3, 6,-3 }, // 82 + { 0, 1,18,17, 2,18, 7,18, 2, 4, 3,17 }, // 83 + { 1, 3,18,18, 5, 6, 4, 3, 4,18, 6, 5 }, // 84 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 85 + { 1, 1, 7, 6, 5, 3, 4, 2, 5, 4, 3, 7 }, // 86 + { 0, 1,-2,18,18,18, 3, 6, 4, 2, 5, 2 }, // 87 + { 0, 3,18,17,-3,18, 3, 2, 5,-1,17, 3 }, // 88 + { 1, 1,17,18, 7, 3, 1, 7, 4, 2, 6, 5 }, // 89 + { 1, 1,18, 2,-2,-1,18, 5, 3,-2, 1, 2 }, // 90 + { 0, 3,18,18,-1, 3, 2, 7, 5,18, 4, 3 }, // 91 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 92 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 93 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 94 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 95 + { 1, 1,17,18, 2,-2, 4, 8,18, 3, 6, 5 }, // 96 + { 0, 2,18,17, 3, 5,-2, 7, 2,18, 3,-1 }, // 97 + { 1, 1,18, 2,-2,-1,18, 5, 3,-2, 1, 2 }, // 98 + { 0, 2, 3,17,18,18, 2, 5, 7, 6,18, 3 }, // 99 + { 1, 1,17,18,18, 4, 3, 2,18, 7, 8,-1 }, // 100 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 101 + { 0, 1,17, 1, 2, 3, 5, 6, 1, 4, 8,17 }, // 102 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 103 + { 0, 2,18,17,-1,18,-3, 2, 8, 3, 6,17 }, // 104 + { 1, 1,17,17, 1, 2, 4, 5,-1, 2, 1, 6 }, // 105 + { 1, 1, 1, 2, 6,-2,18, 2,-3, 3,-2, 5 }, // 106 + { 0, 1,18, 3,18, 6,18, 5, 2, 4,-1, 8 }, // 107 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 108 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 109 + { 1, 1,18,18,-1, 2,18, 3, 6, 4,-2, 7 }, // 110 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 111 + { 0, 2,-1,18,18,18, 2,-2, 4, 7, 2, 3 }, // 112 + { 0, 3, 3,17,-2, 5, 2, 7,18, 6, 4, 5 }, // 113 + { 0, 1,17, 6,18, 3, 8, 4, 5, 3, 8,18 }, // 114 + { 0, 2,18, 2, 6, 2,18, 3, 2, 4, 5, 8 }, // 115 + { 0, 1, 3,18,18, 2,18,-1, 2,18, 2,17 }, // 116 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 117 + { 0, 1, 3, 6,17,-2, 5, 1, 2, 7, 4, 8 }, // 118 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 119 + { 1, 3, 3,18,17, 5, 6, 2, 7,-2, 8,18 }, // 120 + { 1, 1,18,-1, 3, 1, 7, 2,-1, 4, 6,17 }, // 121 + { 1, 1,18, 2,-2,-1,18, 5, 3,-2, 1, 2 }, // 122 + { 0, 2,18, 1, 2,18, 3, 6, 5, 2, 4, 8 }, // 123 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 124 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 125 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 126 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 127 + { 1, 1,17,-2, 2,18,18, 8, 5, 3, 2, 6 }, // 128 + { 0, 1,18,17, 2,18, 3, 2, 7,-2,18, 4 }, // 129 + { 1, 2, 1,18, 2, 3,-1, 5, 6, 4, 7,17 }, // 130 + { 0, 2,18,17, 3, 6,-2, 2, 3, 8, 5,17 }, // 131 + { 0, 2,18,18, 3, 2,18,-1, 2, 4, 3,17 }, // 132 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 133 + { 1, 2,17,-1,18, 2, 3,-2, 5,18, 2, 7 }, // 134 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 135 + { 1, 2,18,-3,18, 2, 3,-2,18, 5, 6,-3 }, // 136 + { 0, 2,18,17, 3, 5,-2, 7, 2,18, 3,-1 }, // 137 + { 1, 1, 1,18,-1, 2, 3, 1,-2, 8, 2, 5 }, // 138 + { 0, 1,18,18, 3, 6,18, 2, 3, 4, 8, 5 }, // 139 + { 0, 1,-2, 1,18, 2,-2, 5, 7,18, 2,-1 }, // 140 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 141 + { 1, 1,17,18,-1, 2, 8, 3, 4, 5, 1, 7 }, // 142 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 143 + { 0, 2,18,18,-1, 2,18, 3,-2, 5, 4, 2 }, // 144 + { 1, 1,18,17, 2,18, 3, 8, 5, 2, 7,17 }, // 145 + { 0, 1,18,18, 3,18, 6, 8,-2, 2, 3, 5 }, // 146 + { 0, 1,18,18, 2,18, 2, 6,18, 2,17, 7 }, // 147 + { 1, 3,18,17,18, 2, 8,18, 5,-1, 3, 6 }, // 148 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 149 + { 1, 1,18, 7, 6, 5, 5, 3, 1, 4, 2, 4 }, // 150 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 151 + { 1, 2,18,17,-1, 3, 6,18, 2, 5, 8, 3 }, // 152 + { 0, 1,17,18,18, 4, 7, 2, 3,-2,18, 5 }, // 153 + { 1, 2,18, 1, 2, 6, 2, 5,18, 2, 4, 8 }, // 154 + { 0, 4,18, 4, 1, 2, 3, 5, 4, 1, 2, 6 }, // 155 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 156 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 157 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 158 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 159 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 160 + { 0, 2,18,17, 2,-1,18, 3,-3, 5, 2, 4 }, // 161 + { 0, 1,17,17, 3, 6, 3, 5,-2, 2,18,-1 }, // 162 + { 0, 2,18,18, 3,-2,18, 2,-3, 5, 3, 6 }, // 163 + { 1, 1,17,17, 2, 4, 1, 3, 5, 2, 6,-3 }, // 164 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 165 + { 0, 1,17, 1, 3, 2, 7, 1, 6, 3, 4, 8 }, // 166 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 167 + { 0, 1,17,-1,18, 2, 1, 5, 3, 8,-1,-2 }, // 168 + { 1, 1,17,18,-1, 8, 2, 5, 3, 4, 1, 6 }, // 169 + { 1, 2, 1,18, 3,-1, 5, 1, 2, 4, 7, 6 }, // 170 + { 0, 1,18,18, 3, 6, 5, 3,-2, 2,18,-1 }, // 171 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 172 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 173 + { 0, 1, 1,18,-1, 3, 8, 5, 6, 1, 2, 3 }, // 174 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 175 + { 0, 2,18,18, 2, 3, 6,18,-1, 4, 2, 3 }, // 176 + { 1, 1, 1, 3, 5,18, 2, 6, 7, 2, 3, 1 }, // 177 + { 1, 1, 1, 3, 8,18, 5, 2, 7, 1, 3,-2 }, // 178 + { 0, 2,17, 2,18, 3, 6, 2, 4, 5, 8, 3 }, // 179 + { 0, 1,18,17, 2,18, 3, 2, 7,-2,18, 4 }, // 180 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 181 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 182 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 183 + { 1, 2,18,-3,18,-1, 3,-2, 5, 7, 1, 2 }, // 184 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 185 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 186 + { 0, 3,18,18, 2, 6,18, 5,18, 2, 3,17 }, // 187 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 188 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 189 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 190 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 191 + { 1, 3, 1,-1, 1, 3,-2, 2, 5, 7,-3,18 }, // 192 + { 1, 2,18, 7, 3,-3, 2, 8, 2, 5, 4,17 }, // 193 + { 1, 1, 1, 4, 5, 1, 3, 4, 6, 7, 8, 3 }, // 194 + { 0, 1,18,17, 2,18,-1, 2, 3,18, 2, 4 }, // 195 + { 0, 2,18,18,-2,18, 2, 3, 4, 7, 5,17 }, // 196 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 197 + { 1, 1,17,18, 2, 1, 3, 2, 5, 1, 2, 3 }, // 198 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 199 + { 0, 2,18,18,-1, 2, 3, 5, 8, 6, 1,-2 }, // 200 + { 0, 1,17,18, 8, 3, 4, 6, 5, 2, 8, 7 }, // 201 + { 1, 2, 1, 3,-2,18, 2, 5, 1, 7,-1,-2 }, // 202 + { 0, 3,18,17,-1, 3,18, 2, 3, 6, 4,17 }, // 203 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 204 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 205 + { 1, 2,18,18, 4,18, 6, 7, 8, 3,18, 2 }, // 206 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 207 + { 0, 2,17,-3,17, 2,-2, 8, 3,18, 4,-3 }, // 208 + { 1, 1,18,17, 3, 5, 6, 2, 8, 1, 3, 7 }, // 209 + { 0, 1,18,18, 3, 6, 5, 3,-2, 2,18,-1 }, // 210 + { 0, 3,18,18, 2, 6,18, 5,18, 2, 3,17 }, // 211 + { 1, 1,18,18, 5, 4, 6, 4, 5, 1, 4, 3 }, // 212 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 213 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 214 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 215 + { 0, 2, 3,17,18,-3, 2, 5,18, 6,-1, 7 }, // 216 + { 1, 1,17,18, 3, 2, 5,-1, 6, 8, 4, 7 }, // 217 + { 1, 1,18, 1,-2, 3, 2, 1, 7, 6, 3, 4 }, // 218 + { 0, 3, 1, 2,17, 3,18, 2, 7, 5, 4,-1 }, // 219 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 220 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 221 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 222 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 223 + { 1, 1,17,-2, 2,18,18, 8, 5, 3, 2, 6 }, // 224 + { 0, 2,18, 5,18, 2, 3, 7,-2, 1, 6, 8 }, // 225 + { 0, 1, 2,-1,18,-1, 2, 4,-3, 5,18, 3 }, // 226 + { 0, 1, 3,17,18, 5, 2,18, 7, 3, 6, 5 }, // 227 + { 1, 4, 1, 2, 5,18,-2, 2, 3, 7,-1, 4 }, // 228 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 229 + { 0, 1, 1,18, 2, 1, 3, 4, 1, 5, 2, 7 }, // 230 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 231 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 232 + { 0, 1,17,17,18, 2, 4, 5,18,-2, 6, 3 }, // 233 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 234 + { 0, 2,18,18,-1, 3, 5, 6, 8,18, 2, 3 }, // 235 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 236 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 237 + { 0, 1,18,18, 4, 6, 8,18, 7, 3, 2, 5 }, // 238 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 239 + { 0, 2,-1,18,18,18, 2, 4,-2, 2, 3, 6 }, // 240 + { 0, 2,18,-2, 7, 1, 3, 2, 4, 6,-3, 7 }, // 241 + { 1, 1,17,18, 8, 3, 4, 6,-2, 5, 3, 8 }, // 242 + { 0, 2,18, 1, 2, 6, 2, 8, 3,18, 5, 4 }, // 243 + { 1, 1, 3,18,18, 2,18, 2,18, 3, 2,18 }, // 244 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 245 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 246 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 247 + { 1, 1, 3,17,18, 5, 2, 6, 7, 1, 4, 8 }, // 248 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 249 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 250 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 251 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 252 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 253 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 254 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 255 +}; + +static const WavpackDecorrSpec very_high_specs [] = { + { 1, 2,18,18, 2, 3,-2,18, 2, 4, 7, 5, 3, 6, 8,-1,18, 2 }, // 0 + { 0, 1,18,18,-1,18, 2, 3, 4, 6, 5, 7,18,-3, 8, 2,-1, 3 }, // 1 + { 1, 2, 1,18,-2, 4,18, 2, 3, 6,-1, 7, 5,-2,18, 8, 2, 4 }, // 2 + { 0, 1,17,17, 2, 3, 4,18,-1, 5, 6, 7,18, 2, 8,17, 3,-2 }, // 3 + { 1, 1,18,18, 2,18, 3, 2,18, 4,-1, 3,18, 2, 6, 8,17, 5 }, // 4 + { 0, 2,18,17, 2, 3,-2, 5,18,-3, 2, 4, 7, 3, 6, 8, 5,17 }, // 5 + { 1, 1,18,-2, 2,-3,18, 5,-2,18, 2, 3, 6, 2,17, 4, 7,-1 }, // 6 + { 1, 1,17, 8,18, 3,-2, 2, 5, 4,18, 6, 3, 8, 7, 2, 5, 4 }, // 7 + { 0, 2,18,17,-2, 2,18, 3, 2, 5,-3, 4, 7,18, 3, 8, 6, 2 }, // 8 + { 1, 1, 3, 6, 5, 5, 1, 3, 7, 4, 2, 6, 4,18, 3, 7, 5, 6 }, // 9 + { 1, 2, 1,18, 3, 2,-2, 1, 5, 4, 6, 2, 7, 1, 8, 3,-1, 1 }, // 10 + { 0, 1,18,18, 2, 3, 6, 3, 5,-2, 2, 4,18, 3,-2,-1, 6, 7 }, // 11 + { 0, 1,-2,18, 2,18, 7, 2, 6,-2, 3, 4,18,18, 2,-3, 8, 5 }, // 12 + { 0, 2,18,18,18, 2, 4, 3,18, 5, 3, 6,-2, 2, 4,18, 8, 7 }, // 13 + { 0, 1,-2, 1,18, 2,-2,18,-1, 5, 7, 2, 3, 4,18, 2, 6, 2 }, // 14 + { 1, 1,17,18, 3, 2, 1, 7,-1, 2, 4, 3, 5, 6,-2,18, 7, 8 }, // 15 + { 1, 1,18,18, 2,18, 3, 4, 6,-2,18, 5, 8, 2, 3, 7, 4,-1 }, // 16 + { 0, 1,18,18,18,-1, 2, 3, 4, 6, 8,18, 3, 5, 2, 6, 7, 4 }, // 17 + { 1, 1,17,-2,18,18, 2, 5, 3, 8, 2,-1, 6, 1, 3, 4, 7, 5 }, // 18 + { 0, 1,17,17,18, 2, 3, 6,-2, 8, 1, 7, 5, 2, 3, 1, 4, 8 }, // 19 + { 1, 1,17,17, 3, 2, 7, 1, 4, 3, 6, 2, 5,-2, 8, 7,18, 6 }, // 20 + { 0, 1,18,17,-2, 2,18, 3,-3, 7, 6, 5, 2, 4,-1, 8, 3,17 }, // 21 + { 1, 1, 2,18,18,-2, 2, 4,-1, 5,18, 3, 8, 6, 2, 7,17, 4 }, // 22 + { 0, 1,17, 3, 6, 8, 5, 4, 3, 8, 1,18, 7, 2, 4, 5, 6, 3 }, // 23 + { 1, 2,17,18, 4, 8, 3, 2, 5, 7, 6, 8, 2, 7,-2,18, 3, 4 }, // 24 + { 1, 1, 6, 5, 5, 3, 4, 7, 3, 2, 4, 6, 3, 7, 1, 5, 2, 4 }, // 25 + { 1, 1, 1,18,-1, 2, 1, 3, 8,-2, 2, 5, 6, 3, 8, 7,18, 4 }, // 26 + { 0, 1, 1,17,-1,18, 3, 2, 5, 4, 6, 7, 8, 3, 4, 2, 1,-2 }, // 27 + { 0, 1,18, 2,18,18, 2,18, 6,-2,18, 7, 5, 4, 3, 2,18,-2 }, // 28 + { 0, 3, 1, 4,18, 3, 2, 4, 1, 5, 2, 3, 6,18, 8, 7, 2, 4 }, // 29 + { 0, 1,17,-2, 1,-3, 2,18, 3,-2, 4,18, 3, 6, 7,-3, 2, 8 }, // 30 + { 1, 1,17,18,18, 4, 2, 3, 7, 6,18, 8, 5,-1, 4, 2, 3,17 }, // 31 + { 1, 2,18,-1,17,18, 2, 3,-2,18, 5, 8, 2, 4, 3, 7, 6,-1 }, // 32 + { 1, 1,18,18,18,-2, 4, 2, 3,18, 5, 8, 2, 4, 6, 7,-2, 3 }, // 33 + { 1, 2,18,18,-2,18,-1, 3, 2, 5,18,-2, 7, 2, 3, 4, 6, 8 }, // 34 + { 0, 1,17,18,-1, 2, 4,18, 8, 3, 6, 5, 7,-3, 2, 4, 3,17 }, // 35 + { 1, 1,18,18,17, 2,-1,18, 3, 2,18, 6, 5, 4,18, 7, 2,-1 }, // 36 + { 0, 2, 1,18,-1,18, 3, 2, 4, 6,-3, 7,-1, 5, 1, 2, 3, 8 }, // 37 + { 1, 1, 1,17,-2, 2,-3, 6, 3, 5, 1, 2, 7, 6, 8,-2, 4, 1 }, // 38 + { 0, 1,17,-1, 5, 1, 4, 3, 6, 2,-2,18, 3, 2, 4, 5, 8,-1 }, // 39 + { 0, 2,18,18,17, 2, 3,-2, 5,18, 2, 4, 7, 8, 6,17, 3, 5 }, // 40 + { 1, 1, 1, 5, 1, 3, 4, 3, 7, 5, 1, 3, 6, 1, 2, 4, 3, 8 }, // 41 + { 1, 2, 1,-1, 3, 2,18, 7,-2, 5, 2, 6, 4, 3,-1,18, 8, 7 }, // 42 + { 0, 2,18,17, 3,18, 2, 5, 4, 3, 6, 2, 7, 8,18, 3, 4, 5 }, // 43 + { 1, 1, 3, 6,17, 8, 7, 5,18,-1, 1, 2, 3, 4, 2, 6, 8, 1 }, // 44 + { 0, 2,18,18, 3,-3,18, 2, 6, 5, 3, 7,18, 4,-2, 8, 2, 3 }, // 45 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 46 + { 1, 1,17, 1, 7, 2, 3,18,-2, 3, 6, 4, 2, 7, 8, 5, 3,17 }, // 47 + { 1, 1, 3, 6, 5, 5, 1, 3, 7, 4, 2, 6, 4,18, 3, 7, 5, 6 }, // 48 + { 0, 1,18,18,18, 2, 4,-1,18, 8,-1, 2, 3, 4, 6,-2, 1, 7 }, // 49 + { 1, 1,18,-2,17,18, 2, 6, 3,-2, 5, 4, 7, 1,-3, 8, 2, 6 }, // 50 + { 0, 1,17,18,18, 4, 2, 7, 3, 6,-2,18, 8, 4, 5, 2, 7,17 }, // 51 + { 1, 1,18,18, 5, 4, 6, 4, 1, 5, 4, 3, 2, 5, 6, 1, 4, 5 }, // 52 + { 0, 1,18,18,-2,18, 2,-3, 3, 8, 5,18, 6, 4, 3,-1, 7, 2 }, // 53 + { 1, 1,18, 2,-2,-3,18, 5, 2, 3,-2, 4, 6, 1,-3, 2, 7, 8 }, // 54 + { 0, 1,18, 3, 5, 8, 2, 6, 7, 3, 1, 5, 2,-1, 8, 6, 7, 4 }, // 55 + { 1, 1, 4, 3, 8, 1, 5, 6, 2, 5, 8,-2, 2, 7, 3,18, 5, 4 }, // 56 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 57 + { 1, 1,17, 3,18,18, 7, 2, 4,18, 6, 2, 3,-1, 8, 5,18,-3 }, // 58 + { 0, 1, 3,17,18, 2,18, 6, 7,-3,18, 2, 5, 6, 3, 8, 7,-1 }, // 59 + { 1, 1,18,18, 2,18,18, 2,-1, 7, 3,18, 5, 2, 6, 4,-1,18 }, // 60 + { 0, 3,18, 3, 4, 1, 5, 2,18, 4, 2, 3,18, 7, 6, 1, 2, 4 }, // 61 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 62 + { 1, 1,17, 1,18, 2, 3, 6, 4, 5, 7,18, 3, 8, 2, 4,-2,17 }, // 63 + { 1, 2,18,17, 2, 3, 5,18, 6,-2, 7, 3, 2, 4,18, 8,-1, 5 }, // 64 + { 0, 2, 1,18,-1,18, 3, 2, 4, 6,-3, 7,-1, 5, 1, 2, 3, 8 }, // 65 + { 1, 1, 1,18,-1, 8, 2, 6, 3,-2, 1, 2, 5, 4,-3, 8, 6, 3 }, // 66 + { 0, 1,18,18, 2,18, 2,18, 7, 6,18, 2,-2, 3, 5, 4,18, 8 }, // 67 + { 1, 2,18,17, 2, 3,18,-1, 2, 3, 6,18, 5, 4, 3, 7, 2, 8 }, // 68 + { 1, 2,18,18, 3,-2, 4,18, 5, 7, 6, 2, 4,-3, 8, 5,18, 3 }, // 69 + { 1, 1,17,-2,18,18, 2, 5, 3, 8, 2,-1, 6, 1, 3, 4, 7, 5 }, // 70 + { 1, 1, 3,17,18, 5, 7, 2, 4, 6, 1, 8,-1, 3, 7, 4, 1, 2 }, // 71 + { 0, 2, 1,-2, 2,18, 3, 5, 2, 4, 7,-1, 2, 3, 5,18,-2, 4 }, // 72 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 73 + { 1, 1, 1, 2,-2, 6,18,-3, 2, 7, 3,-2, 5, 6, 1, 8, 2, 4 }, // 74 + { 0, 1,18,18,18, 3,-2, 6,18, 2, 4, 3, 5, 8, 7, 6, 2,-2 }, // 75 + { 1, 1, 1, 5, 1, 3, 4, 3, 7, 5, 1, 3, 6, 1, 2, 4, 3, 8 }, // 76 + { 0, 1, 3,17,18, 2, 5,18, 6, 7, 5,-2, 2, 4,18, 3, 6, 8 }, // 77 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 78 + { 0, 2,17,-1,18, 2, 4,-1, 8, 3,18, 7,-3, 4, 5, 1, 2,-2 }, // 79 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 8, 6, 4, 5, 7,-1 }, // 80 + { 1, 1,18,18, 3, 6, 4, 8,-2, 2, 5, 3, 7,18, 6, 8, 4, 2 }, // 81 + { 1, 1,17,18,18,-2, 5, 2, 3, 1, 4,-1, 8, 6, 5, 3, 2,18 }, // 82 + { 1, 1,17,17, 1, 2, 4, 5, 2, 6,-1, 3, 1, 1,-2, 4, 2, 7 }, // 83 + { 1, 1,17, 1, 7, 2, 3,18,-2, 3, 6, 4, 2, 7, 8, 5, 3,17 }, // 84 + { 0, 1,18,17,-2,-3, 1, 2, 3, 2, 5, 4, 7,-3, 6,-2, 2, 1 }, // 85 + { 1, 1, 1, 3, 5,18, 1, 2, 7, 3, 6, 2, 5, 8,-1, 1, 4, 7 }, // 86 + { 1, 1,17, 3, 6, 8, 1, 4, 5, 3,-2, 7, 2, 8, 5, 6,18, 3 }, // 87 + { 1, 1,17,18, 2, 4, 8,-2, 3, 1, 5, 6, 7, 1, 2, 3, 4, 7 }, // 88 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 89 + { 1, 1, 3, 1, 8,18, 5, 2, 3,18, 6, 7,-2, 4, 3, 2, 8,18 }, // 90 + { 0, 1,18,17, 2,18, 3, 4,-1,18, 7, 6, 2, 8, 4,18,18, 5 }, // 91 + { 0, 1,18,18, 2,18,18, 2, 7,-2, 6, 5, 4, 3,18, 3, 2,17 }, // 92 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 93 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 94 + { 1, 1,17, 8,18, 3, 2, 1, 5, 4, 6,-1, 3,-3, 8,18, 7, 2 }, // 95 + { 1, 2,18,17,18, 2, 3, 5,-2,18, 6,-1, 2, 3, 7, 4, 8,17 }, // 96 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 8, 6, 4, 5, 7,-1 }, // 97 + { 1, 2,18,18,-2,17, 2,18, 3, 4,18, 8, 7,-1, 2, 4, 5,17 }, // 98 + { 0, 2,17,-3,17, 3, 2,-2,18, 8, 4,-3, 2,18, 5, 3,-2, 6 }, // 99 + { 0, 1,18,18, 2,18,18, 2, 7,-2, 6, 5, 4, 3,18, 3, 2,17 }, // 100 + { 0, 2, 1,18,-1, 3, 5, 2,-3,18, 7, 3,-1, 6, 4, 2,17, 5 }, // 101 + { 1, 1,17,-2,17, 2,-3, 1, 5,-1, 4, 6, 3, 2, 8, 7,-2, 5 }, // 102 + { 1, 1, 1,18, 1, 3, 5, 8, 6, 2, 3,-1, 7, 1, 4, 8, 5,-3 }, // 103 + { 0, 2, 3,18,18, 2,18,-2, 6, 5, 7, 2, 4,18, 3, 6,-3, 5 }, // 104 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 105 + { 1, 1, 3, 6,17, 8, 7, 5,18,-1, 1, 2, 3, 4, 2, 6, 8, 1 }, // 106 + { 0, 4,18, 2,17, 3,18,-2, 2, 6,18, 2, 7, 3, 5, 4, 8,18 }, // 107 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 108 + { 0, 1,18,18, 2, 3, 6, 3, 5,-2, 2, 4,18, 3,-2,-1, 6, 7 }, // 109 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 110 + { 1, 1,17, 1, 2, 5, 3,-2, 1, 4, 3, 7, 6,-3, 2, 1, 1, 2 }, // 111 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 112 + { 1, 1,18,18,-2,18,-2, 2, 3, 6,18, 4,-1, 2, 3, 8, 1, 4 }, // 113 + { 1, 1,17,-2,17, 2,-3, 1, 5,-1, 4, 6, 3, 2, 8, 7,-2, 5 }, // 114 + { 0, 1,17,17,18, 3, 2,18,18, 6, 8, 2,-2, 3, 5, 4,17,18 }, // 115 + { 1, 1, 1, 5, 1, 3, 4, 3, 7, 5, 1, 3, 6, 1, 2, 4, 3, 8 }, // 116 + { 1, 1, 1, 3,-3,18,18, 6, 5,18, 2,-1, 3, 8, 7,-3, 4,17 }, // 117 + { 1, 1,18, 1, 2, 1, 3, 8, 7, 4, 1, 5, 2,-1,-3,18, 6, 2 }, // 118 + { 0, 1,18, 3, 5, 2, 6, 8,18, 5, 7, 2, 3,-1, 6, 7, 8, 5 }, // 119 + { 0, 2,18, 3,-2, 7, 8, 2, 5, 4,-3, 8, 3, 2,18, 5, 4, 6 }, // 120 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 121 + { 1, 3, 1, 1, 2, 5, 2, 7, 4, 3,-1,18,-2, 8, 2, 1, 6, 7 }, // 122 + { 0, 1, 3,17,18, 5, 2, 6, 7,18, 4, 5, 3, 6,18, 2, 7, 8 }, // 123 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 124 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 125 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 126 + { 0, 1, 1,18, 1, 2, 3, 5, 1, 2, 6, 7, 4, 3, 8, 1,17, 5 }, // 127 + { 1, 2,17,-1,18,-2, 2, 3, 5,18, 2, 4, 6, 7, 3,-1, 5, 8 }, // 128 + { 1, 1,18,18,-3,18,-2, 2, 3,-2,18, 6, 4, 5, 8, 3,17,-3 }, // 129 + { 1, 1,18, 7, 6, 5, 5, 3, 1, 4, 2, 7, 3, 4,-3, 6,18, 8 }, // 130 + { 0, 2,18,18, 2, 3, 5,18, 2, 4, 3, 6,18, 7, 8,-1, 5, 2 }, // 131 + { 0, 1,18,17,-1, 2,18, 3, 2,18, 4, 3,18, 2, 6, 5, 8,17 }, // 132 + { 0, 2,18,17, 2, 3,18, 5,-1, 6, 7, 8, 2, 3, 4, 5,18, 6 }, // 133 + { 1, 2,18,-3,18, 2, 3,-2,-3, 5,18, 7, 6, 2, 4, 3, 8,-2 }, // 134 + { 1, 1,17,18,18,-2, 2, 3, 5, 4, 8,18,-1, 5, 3, 6,-2, 7 }, // 135 + { 1, 2,18,17, 2,-2,18, 3,-1, 4,18, 2, 7, 5, 3, 8, 6, 4 }, // 136 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 137 + { 1, 1, 1, 5, 1, 3, 4, 3, 7, 5, 1, 3, 6, 1, 2, 4, 3, 8 }, // 138 + { 0, 2,18,18, 3, 3,-2, 2, 5,18, 6, 3,-1, 4, 7,-1, 1, 2 }, // 139 + { 0, 1,-2, 1,18, 2,-2, 5, 7,18, 3, 2, 6, 2,-1, 4,-2,17 }, // 140 + { 0, 2,18,18,18, 2, 3,-2,18, 5, 4, 2, 6, 8, 3,-2, 4,18 }, // 141 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 142 + { 1, 1,17,18,-1, 3, 2, 5, 1, 3, 2, 8, 4, 7, 6, 2,-1, 5 }, // 143 + { 1, 1,17,18,18, 4, 2, 3, 7, 6,18, 8, 5,-1, 4, 2, 3,17 }, // 144 + { 0, 1,18,18,-2,18, 2, 3, 4, 5, 6,18, 8, 2, 3, 7,-2, 4 }, // 145 + { 0, 1,18,-2,18,18,-3,-2, 2, 3, 5, 8, 1, 2, 6, 4, 7,-1 }, // 146 + { 0, 1,18,17, 2,18, 3,-2, 2, 7, 6, 4,18, 3, 8, 7, 4, 2 }, // 147 + { 1, 1,17,18,18, 4, 2, 3, 7, 6,18, 8, 5,-1, 4, 2, 3,17 }, // 148 + { 1, 1,18,17,18, 2, 5, 3,-2,18, 6, 2, 3, 4, 8, 7, 5,-1 }, // 149 + { 0, 1, 2,-1,18,-1, 2, 4,-3,18, 5, 3, 6,18, 2, 4, 7, 8 }, // 150 + { 1, 1,17,18, 8, 3, 6, 4,-1, 5, 2, 7, 3, 8, 6, 5,18, 4 }, // 151 + { 0, 2,18, 3,-2, 7, 8, 2, 5, 4,-3, 8, 3, 2,18, 5, 4, 6 }, // 152 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 153 + { 1, 1, 1,18,-1, 8, 2, 6, 3,-2, 1, 2, 5, 4,-3, 8, 6, 3 }, // 154 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 155 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 156 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 157 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 158 + { 0, 1,17,18,18, 4, 2, 7, 3, 6,-2,18, 8, 4, 5, 2, 7,17 }, // 159 + { 1, 2,18,-1,18, 3,-2,18, 2, 5, 3, 6, 7, 2,-1,18, 8, 4 }, // 160 + { 1, 2, 1,18,-2, 4,18, 2, 3, 6,-1, 7, 5,-2,18, 8, 2, 4 }, // 161 + { 1, 2, 1,18,-3, 2, 3,18,-1, 5, 6, 2, 8, 3, 4, 1,-2, 7 }, // 162 + { 0, 1, 1,17,-1,18, 3, 2, 5, 4, 6, 7, 8, 3, 4, 2, 1,-2 }, // 163 + { 1, 1,18,17,18, 4, 3, 5, 1, 2, 6, 3, 4, 7, 1, 8, 5, 2 }, // 164 + { 0, 1,18,-2, 7, 1, 3, 2,-3, 4, 6,-2, 7, 8, 1, 5, 4, 3 }, // 165 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 166 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 167 + { 0, 2,18,18,18,-2, 2, 5, 3, 7,18, 2, 4,-3, 5, 6, 3, 8 }, // 168 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 169 + { 0, 3, 3,18,-1, 5, 2, 7,18, 6, 5, 2, 4, 3,-1, 7,18, 6 }, // 170 + { 0, 2,18,18,18, 4, 3, 2, 6, 4, 8,18, 5, 3, 2, 7,-2, 6 }, // 171 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 172 + { 0, 2,18,18,18, 2, 3,-2,18, 5, 4, 2, 6, 8, 3,-2, 4,18 }, // 173 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 174 + { 1, 1,17, 8,18, 3, 2, 1, 5, 4, 6,-1, 3,-3, 8,18, 7, 2 }, // 175 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 176 + { 0, 1,-1,18,18,18, 2, 4, 6,-2, 2, 8, 3, 4,18, 7,-1, 6 }, // 177 + { 0, 1,18, 1,-2, 2, 4, 1, 3,-1, 2, 5, 7, 1, 6, 8,-2,17 }, // 178 + { 0, 1,17,17,18, 2, 5, 4,18, 3, 8, 7, 4, 6, 8, 1, 5, 2 }, // 179 + { 1, 2,18,18, 5, 4, 6, 3, 4,18, 8, 4,-1, 7, 5, 3, 6, 2 }, // 180 + { 0, 1,18,18,-3,18, 3, 6, 2, 5, 7,18, 3, 8,-1, 4, 5, 2 }, // 181 + { 1, 1,18, 2,-2,-3,18, 5, 2,-2, 4, 3, 6,18, 8,-1, 2, 7 }, // 182 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 183 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 184 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 185 + { 1, 1,17, 1, 7, 2, 3,18,-2, 3, 6, 4, 2, 7, 8, 5, 3,17 }, // 186 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 187 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 188 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 189 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 190 + { 0, 1,17,18, 3,18, 2, 5, 4, 7,-3, 6, 3, 2,18, 4, 7, 3 }, // 191 + { 1, 1, 1, 7, 4, 5, 3, 4, 5, 1, 3, 6, 3, 2, 4, 8,-2, 7 }, // 192 + { 0, 1, 1,18,-1,-2,18, 3, 2,-1, 6, 7, 4, 5, 3,18, 2,-3 }, // 193 + { 1, 1,18,18,-1, 3, 6,18, 5, 4, 8, 2, 3, 6,18, 7, 4,-2 }, // 194 + { 0, 2,18,18, 2, 6,18, 2,18, 5, 3,18, 2, 4, 7, 8, 3,18 }, // 195 + { 1, 1, 3,18,18, 5,18, 6, 2, 4, 7,-2,18, 5, 8, 6, 3, 2 }, // 196 + { 0, 1,18,-2, 7, 1, 3, 2,-3, 4, 6,-2, 7, 8, 1, 5, 4, 3 }, // 197 + { 1, 1,18,-2,18, 2, 5,18, 3,-2, 4, 7, 2,-1, 8, 6, 5, 1 }, // 198 + { 1, 1,17,17, 5,18, 4, 1, 2, 8, 6, 4,-2, 3, 5,-1, 1, 8 }, // 199 + { 0, 2, 1, 2,17, 3, 7,18, 2,-1, 4, 5,18, 2, 7, 3, 6, 8 }, // 200 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 201 + { 1, 1, 3, 6,17, 8, 7, 5,18,-1, 1, 2, 3, 4, 2, 6, 8, 1 }, // 202 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 203 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 204 + { 0, 2,18,18,18, 2,-2, 3, 6, 4, 8,18, 2, 5, 7, 4, 3, 6 }, // 205 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 206 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 207 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 208 + { 1, 1,18, 1, 8, 3, 5, 6, 4,-1, 8, 3, 7,18, 2, 5, 8, 4 }, // 209 + { 1, 1,17,18, 5, 2, 4, 3, 1, 6,-2, 1, 3, 2, 4, 5,-1,17 }, // 210 + { 1, 1,18,17, 2,18, 3,-3, 7, 2, 6, 4, 3, 5,18, 8, 2,-2 }, // 211 + { 1, 1,18,17,18, 4, 3, 5,-1,18, 2, 7, 8, 4, 6, 3,18, 5 }, // 212 + { 0, 1,18,17,18,-2, 2,-3, 3, 4, 8, 5, 2,18, 6, 3, 7,-2 }, // 213 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 214 + { 1, 1,17,18, 8, 3, 4, 6,18, 5,-2, 3, 8, 5, 2, 4, 7, 6 }, // 215 + { 0, 1,18,-2, 3, 5, 1, 7, 3, 2, 6,-3, 4, 1, 5, 8, 3,-2 }, // 216 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 217 + { 1, 1, 3,17,18, 5,-1,18, 2, 6, 7,18, 5, 3,-3,-1, 6, 2 }, // 218 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 219 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 220 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 221 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 222 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 223 + { 1, 3,18,17,-2, 3,-1,18, 2, 5, 3, 7, 6, 2, 4, 8,18, 5 }, // 224 + { 0, 1,18,-1,18, 2,18, 3, 5,18, 2, 8,18, 5, 4,-1, 6, 2 }, // 225 + { 1, 2,18,-2,18,18, 2, 3, 4,-3, 2, 5,18, 7, 4, 3, 8, 6 }, // 226 + { 0, 2,17,-1,18, 2,-1, 1, 7, 3, 8, 5,-2, 4, 1, 2,-3, 6 }, // 227 + { 0, 1,18,17, 2,18, 2,18, 6, 7, 4, 3,18, 5, 2,-2,17, 8 }, // 228 + { 0, 3,18,17, 2, 3,-3,-1,18, 2, 4, 5,18, 7, 3, 2,-3, 6 }, // 229 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 230 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 231 + { 0, 2, 3,18,18,18, 2, 6, 5,18, 7, 2, 4, 6,18, 5, 3, 8 }, // 232 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 233 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 234 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 235 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 236 + { 0, 1,18,18, 3, 6, 3,-2, 2,18, 5,-1, 7, 3, 4,-2, 2, 6 }, // 237 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 238 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 239 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 240 + { 1, 1,18,17,18,18,-2, 2, 3,-3,18, 6, 4, 2,-2, 8, 3, 7 }, // 241 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 242 + { 0, 1,18,18,18, 4, 2, 7, 8,18, 3, 2,-2, 4, 7, 6,17, 5 }, // 243 + { 1, 1,18,18,-1,-2, 8, 3,18, 6, 3, 5, 8, 2, 4, 7, 1, 6 }, // 244 + { 1, 1, 1,-3, 3,18,18, 2,-1, 3, 6, 5,18, 4, 7,-2, 8, 3 }, // 245 + { 1, 1, 1,18, 4, 2, 5,18, 1, 3,-1, 6, 1, 4, 8, 2, 5, 1 }, // 246 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 247 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 248 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 249 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 250 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 251 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 252 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 253 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 254 + { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 255 +}; + +#define NUM_FAST_SPECS (sizeof (fast_specs) / sizeof (fast_specs [0])) +#define NUM_DEFAULT_SPECS (sizeof (default_specs) / sizeof (default_specs [0])) +#define NUM_HIGH_SPECS (sizeof (high_specs) / sizeof (high_specs [0])) +#define NUM_VERY_HIGH_SPECS (sizeof (very_high_specs) / sizeof (very_high_specs [0])) diff --git a/third_party/wavpack/src/decorr_utils.c b/third_party/wavpack/src/decorr_utils.c new file mode 100644 index 0000000..a76b14c --- /dev/null +++ b/third_party/wavpack/src/decorr_utils.c @@ -0,0 +1,204 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// decorr_utils.c + +// This module contains the functions that process metadata blocks that are +// specific to the decorrelator. These would be called any time a WavPack +// block was parsed. These are in a module separate from the actual unpack +// decorrelation code (unpack.c) so that if an application just wants to get +// information from WavPack files (rather than actually decoding audio) then +// less code needs to be linked. + +#include +#include + +#include "wavpack_local.h" + +///////////////////////////// executable code //////////////////////////////// + +// Read decorrelation terms from specified metadata block into the +// decorr_passes array. The terms range from -3 to 8, plus 17 & 18; +// other values are reserved and generate errors for now. The delta +// ranges from 0 to 7 with all values valid. Note that the terms are +// stored in the opposite order in the decorr_passes array compared +// to packing. + +int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd) +{ + int termcnt = wpmd->byte_length; + unsigned char *byteptr = (unsigned char *)wpmd->data; + struct decorr_pass *dpp; + + if (termcnt > MAX_NTERMS) + return FALSE; + + wps->num_terms = termcnt; + + for (dpp = wps->decorr_passes + termcnt - 1; termcnt--; dpp--) { + dpp->term = (int)(*byteptr & 0x1f) - 5; + dpp->delta = (*byteptr++ >> 5) & 0x7; + + if (!dpp->term || dpp->term < -3 || (dpp->term > MAX_TERM && dpp->term < 17) || dpp->term > 18 || + ((wps->wphdr.flags & MONO_DATA) && dpp->term < 0)) + return FALSE; + } + + return TRUE; +} + +// Read decorrelation weights from specified metadata block into the +// decorr_passes array. The weights range +/-1024, but are rounded and +// truncated to fit in signed chars for metadata storage. Weights are +// separate for the two channels and are specified from the "last" term +// (first during encode). Unspecified weights are set to zero. + +int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd) +{ + int termcnt = wpmd->byte_length, tcount; + char *byteptr = (char *)wpmd->data; + struct decorr_pass *dpp; + + if (!(wps->wphdr.flags & MONO_DATA)) + termcnt /= 2; + + if (termcnt > wps->num_terms) + return FALSE; + + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) + dpp->weight_A = dpp->weight_B = 0; + + while (--dpp >= wps->decorr_passes && termcnt--) { + dpp->weight_A = restore_weight (*byteptr++); + + if (!(wps->wphdr.flags & MONO_DATA)) + dpp->weight_B = restore_weight (*byteptr++); + } + + return TRUE; +} + +// Read decorrelation samples from specified metadata block into the +// decorr_passes array. The samples are signed 32-bit values, but are +// converted to signed log2 values for storage in metadata. Values are +// stored for both channels and are specified from the "last" term +// (first during encode) with unspecified samples set to zero. The +// number of samples stored varies with the actual term value, so +// those must obviously come first in the metadata. + +int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd) +{ + unsigned char *byteptr = (unsigned char *)wpmd->data; + unsigned char *endptr = byteptr + wpmd->byte_length; + struct decorr_pass *dpp; + int tcount; + + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { + CLEAR (dpp->samples_A); + CLEAR (dpp->samples_B); + } + + if (wps->wphdr.version == 0x402 && (wps->wphdr.flags & HYBRID_FLAG)) { + if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr) + return FALSE; + + wps->dc.error [0] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8))); + byteptr += 2; + + if (!(wps->wphdr.flags & MONO_DATA)) { + wps->dc.error [1] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8))); + byteptr += 2; + } + } + + while (dpp-- > wps->decorr_passes && byteptr < endptr) + if (dpp->term > MAX_TERM) { + if (byteptr + (wps->wphdr.flags & MONO_DATA ? 4 : 8) > endptr) + return FALSE; + + dpp->samples_A [0] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8))); + dpp->samples_A [1] = wp_exp2s ((int16_t)(byteptr [2] + (byteptr [3] << 8))); + byteptr += 4; + + if (!(wps->wphdr.flags & MONO_DATA)) { + dpp->samples_B [0] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8))); + dpp->samples_B [1] = wp_exp2s ((int16_t)(byteptr [2] + (byteptr [3] << 8))); + byteptr += 4; + } + } + else if (dpp->term < 0) { + if (byteptr + 4 > endptr) + return FALSE; + + dpp->samples_A [0] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8))); + dpp->samples_B [0] = wp_exp2s ((int16_t)(byteptr [2] + (byteptr [3] << 8))); + byteptr += 4; + } + else { + int m = 0, cnt = dpp->term; + + while (cnt--) { + if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr) + return FALSE; + + dpp->samples_A [m] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8))); + byteptr += 2; + + if (!(wps->wphdr.flags & MONO_DATA)) { + dpp->samples_B [m] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8))); + byteptr += 2; + } + + m++; + } + } + + return byteptr == endptr; +} + +// Read the shaping weights from specified metadata block into the +// WavpackStream structure. Note that there must be two values (even +// for mono streams) and that the values are stored in the same +// manner as decorrelation weights. These would normally be read from +// the "correction" file and are used for lossless reconstruction of +// hybrid data. + +int read_shaping_info (WavpackStream *wps, WavpackMetadata *wpmd) +{ + if (wpmd->byte_length == 2) { + char *byteptr = (char *)wpmd->data; + + wps->dc.shaping_acc [0] = (int32_t) restore_weight (*byteptr++) << 16; + wps->dc.shaping_acc [1] = (int32_t) restore_weight (*byteptr++) << 16; + return TRUE; + } + else if (wpmd->byte_length >= (wps->wphdr.flags & MONO_DATA ? 4 : 8)) { + unsigned char *byteptr = (unsigned char *)wpmd->data; + + wps->dc.error [0] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8))); + wps->dc.shaping_acc [0] = wp_exp2s ((int16_t)(byteptr [2] + (byteptr [3] << 8))); + byteptr += 4; + + if (!(wps->wphdr.flags & MONO_DATA)) { + wps->dc.error [1] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8))); + wps->dc.shaping_acc [1] = wp_exp2s ((int16_t)(byteptr [2] + (byteptr [3] << 8))); + byteptr += 4; + } + + if (wpmd->byte_length == (wps->wphdr.flags & MONO_DATA ? 6 : 12)) { + wps->dc.shaping_delta [0] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8))); + + if (!(wps->wphdr.flags & MONO_DATA)) + wps->dc.shaping_delta [1] = wp_exp2s ((int16_t)(byteptr [2] + (byteptr [3] << 8))); + } + + return TRUE; + } + + return FALSE; +} diff --git a/third_party/wavpack/src/entropy_utils.c b/third_party/wavpack/src/entropy_utils.c new file mode 100644 index 0000000..fe8e405 --- /dev/null +++ b/third_party/wavpack/src/entropy_utils.c @@ -0,0 +1,378 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// entropy_utils.c + +// This module contains the functions that process metadata blocks that are +// specific to the entropy decoder; these would be called any time a WavPack +// block was parsed. Additionally, it contains tables and functions that are +// common to both entropy coding and decoding. These are in a module separate +// from the actual entropy encoder (write_words.c) and decoder (read_words.c) +// so that if applications that just do a subset of the full WavPack reading +// and writing can link with a subset of the library. + +#include +#include + +#include "wavpack_local.h" + +///////////////////////////// local table storage //////////////////////////// + +const uint32_t bitset [] = { + 1L << 0, 1L << 1, 1L << 2, 1L << 3, + 1L << 4, 1L << 5, 1L << 6, 1L << 7, + 1L << 8, 1L << 9, 1L << 10, 1L << 11, + 1L << 12, 1L << 13, 1L << 14, 1L << 15, + 1L << 16, 1L << 17, 1L << 18, 1L << 19, + 1L << 20, 1L << 21, 1L << 22, 1L << 23, + 1L << 24, 1L << 25, 1L << 26, 1L << 27, + 1L << 28, 1L << 29, 1L << 30, 1L << 31 +}; + +const uint32_t bitmask [] = { + (1L << 0) - 1, (1L << 1) - 1, (1L << 2) - 1, (1L << 3) - 1, + (1L << 4) - 1, (1L << 5) - 1, (1L << 6) - 1, (1L << 7) - 1, + (1L << 8) - 1, (1L << 9) - 1, (1L << 10) - 1, (1L << 11) - 1, + (1L << 12) - 1, (1L << 13) - 1, (1L << 14) - 1, (1L << 15) - 1, + (1L << 16) - 1, (1L << 17) - 1, (1L << 18) - 1, (1L << 19) - 1, + (1L << 20) - 1, (1L << 21) - 1, (1L << 22) - 1, (1L << 23) - 1, + (1L << 24) - 1, (1L << 25) - 1, (1L << 26) - 1, (1L << 27) - 1, + (1L << 28) - 1, (1L << 29) - 1, (1L << 30) - 1, 0x7fffffff +}; + +const char nbits_table [] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, // 0 - 15 + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, // 16 - 31 + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, // 32 - 47 + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, // 48 - 63 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 64 - 79 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 80 - 95 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 96 - 111 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 112 - 127 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 128 - 143 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 144 - 159 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 160 - 175 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 176 - 191 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 192 - 207 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 208 - 223 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 224 - 239 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 // 240 - 255 +}; + +static const unsigned char log2_table [] = { + 0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x10, 0x11, 0x12, 0x14, 0x15, + 0x16, 0x18, 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a, + 0x2c, 0x2d, 0x2e, 0x2f, 0x31, 0x32, 0x33, 0x34, 0x36, 0x37, 0x38, 0x39, 0x3b, 0x3c, 0x3d, 0x3e, + 0x3f, 0x41, 0x42, 0x43, 0x44, 0x45, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4d, 0x4e, 0x4f, 0x50, 0x51, + 0x52, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, + 0x64, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, + 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, + 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, + 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb2, + 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc0, + 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcb, 0xcc, 0xcd, 0xce, + 0xcf, 0xd0, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd8, 0xd9, 0xda, 0xdb, + 0xdc, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe4, 0xe5, 0xe6, 0xe7, 0xe7, + 0xe8, 0xe9, 0xea, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xee, 0xef, 0xf0, 0xf1, 0xf1, 0xf2, 0xf3, 0xf4, + 0xf4, 0xf5, 0xf6, 0xf7, 0xf7, 0xf8, 0xf9, 0xf9, 0xfa, 0xfb, 0xfc, 0xfc, 0xfd, 0xfe, 0xff, 0xff +}; + +static const unsigned char exp2_table [] = { + 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15, 0x16, 0x16, + 0x17, 0x18, 0x19, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1d, 0x1e, 0x1f, 0x20, 0x20, 0x21, 0x22, 0x23, + 0x24, 0x24, 0x25, 0x26, 0x27, 0x28, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3a, 0x3b, 0x3c, 0x3d, + 0x3e, 0x3f, 0x40, 0x41, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x48, 0x49, 0x4a, 0x4b, + 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, + 0x5b, 0x5c, 0x5d, 0x5e, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x87, 0x88, 0x89, 0x8a, + 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, + 0x9c, 0x9d, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, + 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, + 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc8, 0xc9, 0xca, 0xcb, 0xcd, 0xce, 0xcf, 0xd0, 0xd2, 0xd3, 0xd4, + 0xd6, 0xd7, 0xd8, 0xd9, 0xdb, 0xdc, 0xdd, 0xde, 0xe0, 0xe1, 0xe2, 0xe4, 0xe5, 0xe6, 0xe8, 0xe9, + 0xea, 0xec, 0xed, 0xee, 0xf0, 0xf1, 0xf2, 0xf4, 0xf5, 0xf6, 0xf8, 0xf9, 0xfa, 0xfc, 0xfd, 0xff +}; + +///////////////////////////// executable code //////////////////////////////// + +// Read the median log2 values from the specifed metadata structure, convert +// them back to 32-bit unsigned values and store them. If length is not +// exactly correct then we flag and return an error. + +int read_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd) +{ + unsigned char *byteptr = (unsigned char *)wpmd->data; + + if (wpmd->byte_length != ((wps->wphdr.flags & MONO_DATA) ? 6 : 12)) + return FALSE; + + wps->w.c [0].median [0] = wp_exp2s (byteptr [0] + (byteptr [1] << 8)); + wps->w.c [0].median [1] = wp_exp2s (byteptr [2] + (byteptr [3] << 8)); + wps->w.c [0].median [2] = wp_exp2s (byteptr [4] + (byteptr [5] << 8)); + + if (!(wps->wphdr.flags & MONO_DATA)) { + wps->w.c [1].median [0] = wp_exp2s (byteptr [6] + (byteptr [7] << 8)); + wps->w.c [1].median [1] = wp_exp2s (byteptr [8] + (byteptr [9] << 8)); + wps->w.c [1].median [2] = wp_exp2s (byteptr [10] + (byteptr [11] << 8)); + } + + return TRUE; +} + +// Read the hybrid related values from the specifed metadata structure, convert +// them back to their internal formats and store them. The extended profile +// stuff is not implemented yet, so return an error if we get more data than +// we know what to do with. + +int read_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd) +{ + unsigned char *byteptr = (unsigned char *)wpmd->data; + unsigned char *endptr = byteptr + wpmd->byte_length; + + if (wps->wphdr.flags & HYBRID_BITRATE) { + if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr) + return FALSE; + + wps->w.c [0].slow_level = wp_exp2s (byteptr [0] + (byteptr [1] << 8)); + byteptr += 2; + + if (!(wps->wphdr.flags & MONO_DATA)) { + wps->w.c [1].slow_level = wp_exp2s (byteptr [0] + (byteptr [1] << 8)); + byteptr += 2; + } + } + + if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr) + return FALSE; + + wps->w.bitrate_acc [0] = (int32_t)(byteptr [0] + (byteptr [1] << 8)) << 16; + byteptr += 2; + + if (!(wps->wphdr.flags & MONO_DATA)) { + wps->w.bitrate_acc [1] = (int32_t)(byteptr [0] + (byteptr [1] << 8)) << 16; + byteptr += 2; + } + + if (byteptr < endptr) { + if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr) + return FALSE; + + wps->w.bitrate_delta [0] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8))); + byteptr += 2; + + if (!(wps->wphdr.flags & MONO_DATA)) { + wps->w.bitrate_delta [1] = wp_exp2s ((int16_t)(byteptr [0] + (byteptr [1] << 8))); + byteptr += 2; + } + + if (byteptr < endptr) + return FALSE; + } + else + wps->w.bitrate_delta [0] = wps->w.bitrate_delta [1] = 0; + + return TRUE; +} + +// This function is called during both encoding and decoding of hybrid data to +// update the "error_limit" variable which determines the maximum sample error +// allowed in the main bitstream. In the HYBRID_BITRATE mode (which is the only +// currently implemented) this is calculated from the slow_level values and the +// bitrate accumulators. Note that the bitrate accumulators can be changing. + +void update_error_limit (WavpackStream *wps) +{ + int bitrate_0 = (wps->w.bitrate_acc [0] += wps->w.bitrate_delta [0]) >> 16; + + if (wps->wphdr.flags & MONO_DATA) { + if (wps->wphdr.flags & HYBRID_BITRATE) { + int slow_log_0 = (wps->w.c [0].slow_level + SLO) >> SLS; + + if (slow_log_0 - bitrate_0 > -0x100) + wps->w.c [0].error_limit = wp_exp2s (slow_log_0 - bitrate_0 + 0x100); + else + wps->w.c [0].error_limit = 0; + } + else + wps->w.c [0].error_limit = wp_exp2s (bitrate_0); + } + else { + int bitrate_1 = (wps->w.bitrate_acc [1] += wps->w.bitrate_delta [1]) >> 16; + + if (wps->wphdr.flags & HYBRID_BITRATE) { + int slow_log_0 = (wps->w.c [0].slow_level + SLO) >> SLS; + int slow_log_1 = (wps->w.c [1].slow_level + SLO) >> SLS; + + if (wps->wphdr.flags & HYBRID_BALANCE) { + int balance = (slow_log_1 - slow_log_0 + bitrate_1 + 1) >> 1; + + if (balance > bitrate_0) { + bitrate_1 = bitrate_0 * 2; + bitrate_0 = 0; + } + else if (-balance > bitrate_0) { + bitrate_0 = bitrate_0 * 2; + bitrate_1 = 0; + } + else { + bitrate_1 = bitrate_0 + balance; + bitrate_0 = bitrate_0 - balance; + } + } + + if (slow_log_0 - bitrate_0 > -0x100) + wps->w.c [0].error_limit = wp_exp2s (slow_log_0 - bitrate_0 + 0x100); + else + wps->w.c [0].error_limit = 0; + + if (slow_log_1 - bitrate_1 > -0x100) + wps->w.c [1].error_limit = wp_exp2s (slow_log_1 - bitrate_1 + 0x100); + else + wps->w.c [1].error_limit = 0; + } + else { + wps->w.c [0].error_limit = wp_exp2s (bitrate_0); + wps->w.c [1].error_limit = wp_exp2s (bitrate_1); + } + } +} + +// The concept of a base 2 logarithm is used in many parts of WavPack. It is +// a way of sufficiently accurately representing 32-bit signed and unsigned +// values storing only 16 bits (actually fewer). It is also used in the hybrid +// mode for quickly comparing the relative magnitude of large values (i.e. +// division) and providing smooth exponentials using only addition. + +// These are not strict logarithms in that they become linear around zero and +// can therefore represent both zero and negative values. They have 8 bits +// of precision and in "roundtrip" conversions the total error never exceeds 1 +// part in 225 except for the cases of +/-115 and +/-195 (which error by 1). + + +// This function returns the log2 for the specified 32-bit unsigned value. +// The maximum value allowed is about 0xff800000 and returns 8447. + +int FASTCALL wp_log2 (uint32_t avalue) +{ + int dbits; + + if ((avalue += avalue >> 9) < (1 << 8)) { + dbits = nbits_table [avalue]; + return (dbits << 8) + log2_table [(avalue << (9 - dbits)) & 0xff]; + } + else { + if (avalue < (1L << 16)) + dbits = nbits_table [avalue >> 8] + 8; + else if (avalue < (1L << 24)) + dbits = nbits_table [avalue >> 16] + 16; + else + dbits = nbits_table [avalue >> 24] + 24; + + return (dbits << 8) + log2_table [(avalue >> (dbits - 9)) & 0xff]; + } +} + +// This function scans a buffer of longs and accumulates the total log2 value +// of all the samples. This is useful for determining maximum compression +// because the bitstream storage required for entropy coding is proportional +// to the base 2 log of the samples. On some platforms there is an assembly +// version of this. + +#if !defined(OPT_ASM_X86) && !defined(OPT_ASM_X64) + +uint32_t log2buffer (int32_t *samples, uint32_t num_samples, int limit) +{ + uint32_t result = 0, avalue; + int dbits; + + while (num_samples--) { + avalue = abs (*samples++); + + if ((avalue += avalue >> 9) < (1 << 8)) { + dbits = nbits_table [avalue]; + result += (dbits << 8) + log2_table [(avalue << (9 - dbits)) & 0xff]; + } + else { + if (avalue < (1L << 16)) + dbits = nbits_table [avalue >> 8] + 8; + else if (avalue < (1L << 24)) + dbits = nbits_table [avalue >> 16] + 16; + else + dbits = nbits_table [avalue >> 24] + 24; + + result += dbits = (dbits << 8) + log2_table [(avalue >> (dbits - 9)) & 0xff]; + + if (limit && dbits >= limit) + return (uint32_t) -1; + } + } + + return result; +} + +#endif + +// This function returns the log2 for the specified 32-bit signed value. +// All input values are valid and the return values are in the range of +// +/- 8192. + +int wp_log2s (int32_t value) +{ + return (value < 0) ? -wp_log2 (-value) : wp_log2 (value); +} + +// This function returns the original integer represented by the supplied +// logarithm (at least within the provided accuracy). The log is signed, +// but since a full 32-bit value is returned this can be used for unsigned +// conversions as well (i.e. the input range is -8192 to +8447). + +int32_t wp_exp2s (int log) +{ + uint32_t value; + + if (log < 0) + return -wp_exp2s (-log); + + value = exp2_table [log & 0xff] | 0x100; + + if ((log >>= 8) <= 9) + return value >> (9 - log); + else + return value << (log - 9); +} + +// These two functions convert internal weights (which are normally +/-1024) +// to and from an 8-bit signed character version for storage in metadata. The +// weights are clipped here in the case that they are outside that range. + +signed char store_weight (int weight) +{ + if (weight > 1024) + weight = 1024; + else if (weight < -1024) + weight = -1024; + + if (weight > 0) + weight -= (weight + 64) >> 7; + + return (weight + 4) >> 3; +} + +int restore_weight (signed char weight) +{ + int result; + + if ((result = (int) weight << 3) > 0) + result += (result + 64) >> 7; + + return result; +} diff --git a/third_party/wavpack/src/extra1.c b/third_party/wavpack/src/extra1.c index 4936fb6..80a6362 100644 --- a/third_party/wavpack/src/extra1.c +++ b/third_party/wavpack/src/extra1.c @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////////////////////// // **** WAVPACK **** // // Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // +// Copyright (c) 1998 - 2013 Conifer Software. // // All Rights Reserved. // // Distributed under the BSD Software License (see license.txt) // //////////////////////////////////////////////////////////////////////////// @@ -10,28 +10,41 @@ // This module handles the "extra" mode for mono files. -#include "wavpack_local.h" - #include #include #include #include -//#define USE_OVERHEAD -#define LOG_LIMIT 6912 -//#define EXTRA_DUMP +#include "wavpack_local.h" -#ifdef DEBUG_ALLOC -#define malloc malloc_db -#define realloc realloc_db -#define free free_db -void *malloc_db (uint32_t size); -void *realloc_db (void *ptr, uint32_t size); -void free_db (void *ptr); -int32_t dump_alloc (void); +// This flag causes this module to take into account the size of the header +// (which grows with more decorrelation passes) when making decisions about +// adding additional passes (as opposed to just considering the resulting +// magnitude of the residuals). With really long blocks it seems to actually +// hurt compression (for reasons I cannot explain), but with short blocks it +// works okay, so we're enabling it for now. + +#define USE_OVERHEAD + +// If the log2 value of any sample in a buffer being scanned exceeds this value, +// we abandon that configuration. This prevents us from going down paths that +// are wildly unstable. + +#define LOG_LIMIT 6912 + +//#define EXTRA_DUMP // dump generated filter data error_line() + +#ifdef OPT_ASM_X86 + #define PACK_DECORR_MONO_PASS_CONT pack_decorr_mono_pass_cont_x86 +#elif defined(OPT_ASM_X64) && (defined (_WIN64) || defined(__CYGWIN__) || defined(__MINGW64__)) + #define PACK_DECORR_MONO_PASS_CONT pack_decorr_mono_pass_cont_x64win +#elif defined(OPT_ASM_X64) + #define PACK_DECORR_MONO_PASS_CONT pack_decorr_mono_pass_cont_x64 #endif -//////////////////////////////// local tables /////////////////////////////// +#ifdef PACK_DECORR_MONO_PASS_CONT + void PACK_DECORR_MONO_PASS_CONT (int32_t *out_buffer, int32_t *in_buffer, struct decorr_pass *dpp, int32_t sample_count); +#endif typedef struct { int32_t *sampleptrs [MAX_NTERMS+2]; @@ -42,13 +55,22 @@ typedef struct { static void decorr_mono_pass (int32_t *in_samples, int32_t *out_samples, uint32_t num_samples, struct decorr_pass *dpp, int dir) { + int32_t cont_samples = 0; int m = 0, i; +#ifdef PACK_DECORR_MONO_PASS_CONT + if (num_samples > 16 && dir > 0) { + int32_t pre_samples = (dpp->term > MAX_TERM) ? 2 : dpp->term; + cont_samples = num_samples - pre_samples; + num_samples = pre_samples; + } +#endif + dpp->sum_A = 0; if (dir < 0) { - out_samples += (num_samples - 1); - in_samples += (num_samples - 1); + out_samples += (num_samples + cont_samples - 1); + in_samples += (num_samples + cont_samples - 1); dir = -1; } else @@ -57,7 +79,7 @@ static void decorr_mono_pass (int32_t *in_samples, int32_t *out_samples, uint32_ dpp->weight_A = restore_weight (store_weight (dpp->weight_A)); for (i = 0; i < 8; ++i) - dpp->samples_A [i] = exp2s (log2s (dpp->samples_A [i])); + dpp->samples_A [i] = wp_exp2s (wp_log2s (dpp->samples_A [i])); if (dpp->term > MAX_TERM) { while (num_samples--) { @@ -108,6 +130,11 @@ static void decorr_mono_pass (int32_t *in_samples, int32_t *out_samples, uint32_ m = (m + 1) & (MAX_TERM - 1); } } + +#ifdef PACK_DECORR_MONO_PASS_CONT + if (cont_samples) + PACK_DECORR_MONO_PASS_CONT (out_samples, in_samples, dpp, cont_samples); +#endif } static void reverse_mono_decorr (struct decorr_pass *dpp) @@ -224,7 +251,7 @@ static void recurse_mono (WavpackContext *wpc, WavpackExtraInfo *info, int depth info->dps [depth].term = term; info->dps [depth].delta = delta; decorr_mono_buffer (samples, outsamples, wps->wphdr.block_samples, info->dps, depth); - bits = log2buffer (outsamples, wps->wphdr.block_samples, info->log_limit); + bits = LOG2BUFFER (outsamples, wps->wphdr.block_samples, info->log_limit); if (bits != (uint32_t) -1) bits += log2overhead (info->dps [0].term, depth + 1); @@ -289,7 +316,7 @@ static void delta_mono (WavpackContext *wpc, WavpackExtraInfo *info) decorr_mono_buffer (info->sampleptrs [i], info->sampleptrs [i+1], wps->wphdr.block_samples, info->dps, i); } - bits = log2buffer (info->sampleptrs [i], wps->wphdr.block_samples, info->log_limit); + bits = LOG2BUFFER (info->sampleptrs [i], wps->wphdr.block_samples, info->log_limit); if (bits != (uint32_t) -1) bits += log2overhead (wps->decorr_passes [0].term, i); @@ -314,7 +341,7 @@ static void delta_mono (WavpackContext *wpc, WavpackExtraInfo *info) decorr_mono_buffer (info->sampleptrs [i], info->sampleptrs [i+1], wps->wphdr.block_samples, info->dps, i); } - bits = log2buffer (info->sampleptrs [i], wps->wphdr.block_samples, info->log_limit); + bits = LOG2BUFFER (info->sampleptrs [i], wps->wphdr.block_samples, info->log_limit); if (bits != (uint32_t) -1) bits += log2overhead (wps->decorr_passes [0].term, i); @@ -358,7 +385,7 @@ static void sort_mono (WavpackContext *wpc, WavpackExtraInfo *info) for (i = ri; i < info->nterms && wps->decorr_passes [i].term; ++i) decorr_mono_buffer (info->sampleptrs [i], info->sampleptrs [i+1], wps->wphdr.block_samples, info->dps, i); - bits = log2buffer (info->sampleptrs [i], wps->wphdr.block_samples, info->log_limit); + bits = LOG2BUFFER (info->sampleptrs [i], wps->wphdr.block_samples, info->log_limit); if (bits != (uint32_t) -1) bits += log2overhead (wps->decorr_passes [0].term, i); @@ -412,13 +439,13 @@ static void analyze_mono (WavpackContext *wpc, int32_t *samples, int do_samples) for (i = 0; i < info.nterms && info.dps [i].term; ++i) decorr_mono_pass (info.sampleptrs [i], info.sampleptrs [i + 1], wps->wphdr.block_samples, info.dps + i, 1); - info.best_bits = log2buffer (info.sampleptrs [info.nterms], wps->wphdr.block_samples, 0) * 1; + info.best_bits = LOG2BUFFER (info.sampleptrs [info.nterms], wps->wphdr.block_samples, 0) * 1; info.best_bits += log2overhead (info.dps [0].term, i); memcpy (info.sampleptrs [info.nterms + 1], info.sampleptrs [i], wps->wphdr.block_samples * 4); if (wpc->config.extra_flags & EXTRA_BRANCHES) recurse_mono (wpc, &info, 0, (int) floor (wps->delta_decay + 0.5), - log2buffer (info.sampleptrs [0], wps->wphdr.block_samples, 0)); + LOG2BUFFER (info.sampleptrs [0], wps->wphdr.block_samples, 0)); if (wpc->config.extra_flags & EXTRA_SORT_FIRST) sort_mono (wpc, &info); @@ -500,6 +527,12 @@ void execute_mono (WavpackContext *wpc, int32_t *samples, int no_history, int do uint32_t best_size = (uint32_t) -1, size; int log_limit, pi, i; +#ifdef SKIP_DECORRELATION + CLEAR (wps->decorr_passes); + wps->num_terms = 0; + return; +#endif + for (i = 0; i < num_samples; ++i) if (samples [i]) break; @@ -571,7 +604,7 @@ void execute_mono (WavpackContext *wpc, int32_t *samples, int no_history, int do } wpds = &wps->decorr_specs [c]; - nterms = (int) strlen (wpds->terms); + nterms = (int) strlen ((char *) wpds->terms); while (1) { memcpy (temp_buffer [0], noisy_buffer ? noisy_buffer : samples, buf_size); @@ -598,7 +631,7 @@ void execute_mono (WavpackContext *wpc, int32_t *samples, int no_history, int do decorr_mono_pass (temp_buffer [j&1], temp_buffer [~j&1], num_samples, &temp_decorr_pass, 1); } - size = log2buffer (temp_buffer [j&1], num_samples, log_limit); + size = LOG2BUFFER (temp_buffer [j&1], num_samples, log_limit); if (size == (uint32_t) -1 && nterms) nterms >>= 1; diff --git a/third_party/wavpack/src/extra2.c b/third_party/wavpack/src/extra2.c index 17fcf01..8a1a305 100644 --- a/third_party/wavpack/src/extra2.c +++ b/third_party/wavpack/src/extra2.c @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////////////////////// // **** WAVPACK **** // // Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // +// Copyright (c) 1998 - 2013 Conifer Software. // // MMX optimizations (c) 2006 Joachim Henke // // All Rights Reserved. // // Distributed under the BSD Software License (see license.txt) // @@ -11,325 +11,74 @@ // This module handles the "extra" mode for stereo files. -#include "wavpack_local.h" - #include #include #include #include -//#define USE_OVERHEAD -#define LOG_LIMIT 6912 -//#define EXTRA_DUMP +#include "wavpack_local.h" -#ifdef DEBUG_ALLOC -#define malloc malloc_db -#define realloc realloc_db -#define free free_db -void *malloc_db (uint32_t size); -void *realloc_db (void *ptr, uint32_t size); -void free_db (void *ptr); -int32_t dump_alloc (void); +// This flag causes this module to take into account the size of the header +// (which grows with more decorrelation passes) when making decisions about +// adding additional passes (as opposed to just considering the resulting +// magnitude of the residuals). With really long blocks it seems to actually +// hurt compression (for reasons I cannot explain), but with short blocks it +// works okay, so we're enabling it for now. + +#define USE_OVERHEAD + +// If the log2 value of any sample in a buffer being scanned exceeds this value, +// we abandon that configuration. This prevents us from going down paths that +// are wildly unstable. + +#define LOG_LIMIT 6912 + +//#define EXTRA_DUMP // dump generated filter data to error_line() + +#ifdef OPT_ASM_X86 + #define PACK_DECORR_STEREO_PASS_CONT pack_decorr_stereo_pass_cont_x86 + #define PACK_DECORR_STEREO_PASS_CONT_REV pack_decorr_stereo_pass_cont_rev_x86 + #define PACK_DECORR_STEREO_PASS_CONT_AVAILABLE pack_cpu_has_feature_x86(CPU_FEATURE_MMX) +#elif defined(OPT_ASM_X64) && (defined (_WIN64) || defined(__CYGWIN__) || defined(__MINGW64__)) + #define PACK_DECORR_STEREO_PASS_CONT pack_decorr_stereo_pass_cont_x64win + #define PACK_DECORR_STEREO_PASS_CONT_REV pack_decorr_stereo_pass_cont_rev_x64win + #define PACK_DECORR_STEREO_PASS_CONT_AVAILABLE 1 +#elif defined(OPT_ASM_X64) + #define PACK_DECORR_STEREO_PASS_CONT pack_decorr_stereo_pass_cont_x64 + #define PACK_DECORR_STEREO_PASS_CONT_REV pack_decorr_stereo_pass_cont_rev_x64 + #define PACK_DECORR_STEREO_PASS_CONT_AVAILABLE 1 #endif -//////////////////////////////// local tables /////////////////////////////// +#ifdef PACK_DECORR_STEREO_PASS_CONT + void PACK_DECORR_STEREO_PASS_CONT (struct decorr_pass *dpp, int32_t *in_buffer, int32_t *out_buffer, int32_t sample_count); + void PACK_DECORR_STEREO_PASS_CONT_REV (struct decorr_pass *dpp, int32_t *in_buffer, int32_t *out_buffer, int32_t sample_count); +#endif typedef struct { int32_t *sampleptrs [MAX_NTERMS+2]; struct decorr_pass dps [MAX_NTERMS]; - int nterms, log_limit, gt16bit; + int nterms, log_limit; uint32_t best_bits; } WavpackExtraInfo; -#ifdef OPT_MMX - -static void decorr_stereo_pass (int32_t *in_samples, int32_t *out_samples, int32_t num_samples, struct decorr_pass *dpp, int dir) -{ - const __m64 - delta = _mm_set1_pi32 (dpp->delta), - fill = _mm_set1_pi32 (0x7bff), - mask = _mm_set1_pi32 (0x7fff), - round = _mm_set1_pi32 (512), - zero = _mm_set1_pi32 (0); - __m64 - sum_AB = zero, - weight_AB = _mm_set_pi32 (restore_weight (store_weight (dpp->weight_B)), restore_weight (store_weight (dpp->weight_A))), - left_right, sam_AB, tmp0, tmp1, samples_AB [MAX_TERM]; - int k, m = 0; - - if (dir < 0) { - out_samples += (num_samples - 1) * 2; - in_samples += (num_samples - 1) * 2; - dir = -2; - } - else - dir = 2; - - for (k = 0; k < MAX_TERM; ++k) { - ((int32_t *) samples_AB) [k * 2] = exp2s (log2s (dpp->samples_A [k])); - ((int32_t *) samples_AB) [k * 2 + 1] = exp2s (log2s (dpp->samples_B [k])); - } - - if (dpp->term > 0) { - if (dpp->term == 17) { - while (num_samples--) { - left_right = *(__m64 *) in_samples; - tmp0 = samples_AB [0]; - sam_AB = _m_paddd (tmp0, tmp0); - sam_AB = _m_psubd (sam_AB, samples_AB [1]); - samples_AB [0] = left_right; - samples_AB [1] = tmp0; - - tmp0 = _m_paddd (sam_AB, sam_AB); - tmp1 = _m_pand (sam_AB, mask); - tmp0 = _m_psrldi (tmp0, 16); - tmp1 = _m_pmaddwd (tmp1, weight_AB); - tmp0 = _m_pmaddwd (tmp0, weight_AB); - tmp1 = _m_paddd (tmp1, round); - tmp0 = _m_pslldi (tmp0, 5); - tmp1 = _m_psradi (tmp1, 10); - left_right = _m_psubd (left_right, tmp0); - left_right = _m_psubd (left_right, tmp1); - - *(__m64 *) out_samples = left_right; - - tmp0 = _m_pxor (sam_AB, left_right); - tmp0 = _m_psradi (tmp0, 31); - tmp1 = _m_pxor (delta, tmp0); - tmp1 = _m_psubd (tmp1, tmp0); - sam_AB = _m_pcmpeqd (sam_AB, zero); - tmp0 = _m_pcmpeqd (left_right, zero); - tmp0 = _m_por (tmp0, sam_AB); - tmp0 = _m_pandn (tmp0, tmp1); - weight_AB = _m_paddd (weight_AB, tmp0); - - sum_AB = _m_paddd (sum_AB, weight_AB); - - in_samples += dir; - out_samples += dir; - } - } - else if (dpp->term == 18) { - while (num_samples--) { - left_right = *(__m64 *) in_samples; - tmp0 = samples_AB [0]; - sam_AB = _m_psubd (tmp0, samples_AB [1]); - sam_AB = _m_psradi (sam_AB, 1); - sam_AB = _m_paddd (sam_AB, tmp0); - samples_AB [0] = left_right; - samples_AB [1] = tmp0; - - tmp0 = _m_paddd (sam_AB, sam_AB); - tmp1 = _m_pand (sam_AB, mask); - tmp0 = _m_psrldi (tmp0, 16); - tmp1 = _m_pmaddwd (tmp1, weight_AB); - tmp0 = _m_pmaddwd (tmp0, weight_AB); - tmp1 = _m_paddd (tmp1, round); - tmp0 = _m_pslldi (tmp0, 5); - tmp1 = _m_psradi (tmp1, 10); - left_right = _m_psubd (left_right, tmp0); - left_right = _m_psubd (left_right, tmp1); - - *(__m64 *) out_samples = left_right; - - tmp0 = _m_pxor (sam_AB, left_right); - tmp0 = _m_psradi (tmp0, 31); - tmp1 = _m_pxor (delta, tmp0); - tmp1 = _m_psubd (tmp1, tmp0); - sam_AB = _m_pcmpeqd (sam_AB, zero); - tmp0 = _m_pcmpeqd (left_right, zero); - tmp0 = _m_por (tmp0, sam_AB); - tmp0 = _m_pandn (tmp0, tmp1); - weight_AB = _m_paddd (weight_AB, tmp0); - - sum_AB = _m_paddd (sum_AB, weight_AB); - - in_samples += dir; - out_samples += dir; - } - } - else { - k = dpp->term & (MAX_TERM - 1); - while (num_samples--) { - left_right = *(__m64 *) in_samples; - sam_AB = samples_AB [m]; - samples_AB [k] = left_right; - - tmp0 = _m_paddd (sam_AB, sam_AB); - tmp1 = _m_pand (sam_AB, mask); - tmp0 = _m_psrldi (tmp0, 16); - tmp1 = _m_pmaddwd (tmp1, weight_AB); - tmp0 = _m_pmaddwd (tmp0, weight_AB); - tmp1 = _m_paddd (tmp1, round); - tmp0 = _m_pslldi (tmp0, 5); - tmp1 = _m_psradi (tmp1, 10); - left_right = _m_psubd (left_right, tmp0); - left_right = _m_psubd (left_right, tmp1); - - *(__m64 *) out_samples = left_right; - - tmp0 = _m_pxor (sam_AB, left_right); - tmp0 = _m_psradi (tmp0, 31); - tmp1 = _m_pxor (delta, tmp0); - tmp1 = _m_psubd (tmp1, tmp0); - sam_AB = _m_pcmpeqd (sam_AB, zero); - tmp0 = _m_pcmpeqd (left_right, zero); - tmp0 = _m_por (tmp0, sam_AB); - tmp0 = _m_pandn (tmp0, tmp1); - weight_AB = _m_paddd (weight_AB, tmp0); - - sum_AB = _m_paddd (sum_AB, weight_AB); - - in_samples += dir; - out_samples += dir; - k = (k + 1) & (MAX_TERM - 1); - m = (m + 1) & (MAX_TERM - 1); - } - } - } - else { - if (dpp->term == -1) { - while (num_samples--) { - left_right = *(__m64 *) in_samples; - sam_AB = samples_AB [0]; - samples_AB [0] = _m_punpckhdq (left_right, sam_AB); - sam_AB = _m_punpckldq (sam_AB, left_right); - - tmp0 = _m_paddd (sam_AB, sam_AB); - tmp1 = _m_pand (sam_AB, mask); - tmp0 = _m_psrldi (tmp0, 16); - tmp1 = _m_pmaddwd (tmp1, weight_AB); - tmp0 = _m_pmaddwd (tmp0, weight_AB); - tmp1 = _m_paddd (tmp1, round); - tmp0 = _m_pslldi (tmp0, 5); - tmp1 = _m_psradi (tmp1, 10); - left_right = _m_psubd (left_right, tmp0); - left_right = _m_psubd (left_right, tmp1); - - *(__m64 *) out_samples = left_right; - - tmp0 = _m_pcmpeqd (sam_AB, zero); - tmp1 = _m_pcmpeqd (left_right, zero); - tmp0 = _m_por (tmp0, tmp1); - tmp0 = _m_pandn (tmp0, delta); - sam_AB = _m_pxor (sam_AB, left_right); - sam_AB = _m_psradi (sam_AB, 31); - tmp1 = _m_psubd (fill, sam_AB); - weight_AB = _m_pxor (weight_AB, sam_AB); - weight_AB = _m_paddd (weight_AB, tmp1); - weight_AB = _m_paddsw (weight_AB, tmp0); - weight_AB = _m_psubd (weight_AB, tmp1); - weight_AB = _m_pxor (weight_AB, sam_AB); - - sum_AB = _m_paddd (sum_AB, weight_AB); - - in_samples += dir; - out_samples += dir; - } - } - else if (dpp->term == -2) { - while (num_samples--) { - left_right = *(__m64 *) in_samples; - sam_AB = samples_AB [0]; - samples_AB [0] = _m_punpckldq (sam_AB, left_right); - sam_AB = _m_punpckhdq (left_right, sam_AB); - - tmp0 = _m_paddd (sam_AB, sam_AB); - tmp1 = _m_pand (sam_AB, mask); - tmp0 = _m_psrldi (tmp0, 16); - tmp1 = _m_pmaddwd (tmp1, weight_AB); - tmp0 = _m_pmaddwd (tmp0, weight_AB); - tmp1 = _m_paddd (tmp1, round); - tmp0 = _m_pslldi (tmp0, 5); - tmp1 = _m_psradi (tmp1, 10); - left_right = _m_psubd (left_right, tmp0); - left_right = _m_psubd (left_right, tmp1); - - *(__m64 *) out_samples = left_right; - - tmp0 = _m_pcmpeqd (sam_AB, zero); - tmp1 = _m_pcmpeqd (left_right, zero); - tmp0 = _m_por (tmp0, tmp1); - tmp0 = _m_pandn (tmp0, delta); - sam_AB = _m_pxor (sam_AB, left_right); - sam_AB = _m_psradi (sam_AB, 31); - tmp1 = _m_psubd (fill, sam_AB); - weight_AB = _m_pxor (weight_AB, sam_AB); - weight_AB = _m_paddd (weight_AB, tmp1); - weight_AB = _m_paddsw (weight_AB, tmp0); - weight_AB = _m_psubd (weight_AB, tmp1); - weight_AB = _m_pxor (weight_AB, sam_AB); - - sum_AB = _m_paddd (sum_AB, weight_AB); - - in_samples += dir; - out_samples += dir; - } - } - else if (dpp->term == -3) { - while (num_samples--) { - left_right = *(__m64 *) in_samples; - sam_AB = samples_AB [0]; - tmp0 = _m_punpckhdq (left_right, left_right); - samples_AB [0] = _m_punpckldq (tmp0, left_right); - - tmp0 = _m_paddd (sam_AB, sam_AB); - tmp1 = _m_pand (sam_AB, mask); - tmp0 = _m_psrldi (tmp0, 16); - tmp1 = _m_pmaddwd (tmp1, weight_AB); - tmp0 = _m_pmaddwd (tmp0, weight_AB); - tmp1 = _m_paddd (tmp1, round); - tmp0 = _m_pslldi (tmp0, 5); - tmp1 = _m_psradi (tmp1, 10); - left_right = _m_psubd (left_right, tmp0); - left_right = _m_psubd (left_right, tmp1); - - *(__m64 *) out_samples = left_right; - - tmp0 = _m_pcmpeqd (sam_AB, zero); - tmp1 = _m_pcmpeqd (left_right, zero); - tmp0 = _m_por (tmp0, tmp1); - tmp0 = _m_pandn (tmp0, delta); - sam_AB = _m_pxor (sam_AB, left_right); - sam_AB = _m_psradi (sam_AB, 31); - tmp1 = _m_psubd (fill, sam_AB); - weight_AB = _m_pxor (weight_AB, sam_AB); - weight_AB = _m_paddd (weight_AB, tmp1); - weight_AB = _m_paddsw (weight_AB, tmp0); - weight_AB = _m_psubd (weight_AB, tmp1); - weight_AB = _m_pxor (weight_AB, sam_AB); - - sum_AB = _m_paddd (sum_AB, weight_AB); - - in_samples += dir; - out_samples += dir; - } - } - } - dpp->sum_A = ((int32_t *) &sum_AB) [0]; - dpp->sum_B = ((int32_t *) &sum_AB) [1]; - dpp->weight_A = ((int32_t *) &weight_AB) [0]; - dpp->weight_B = ((int32_t *) &weight_AB) [1]; - - for (k = 0; k < MAX_TERM; ++k) { - dpp->samples_A [k] = ((int32_t *) samples_AB) [m * 2]; - dpp->samples_B [k] = ((int32_t *) samples_AB) [m * 2 + 1]; - m = (m + 1) & (MAX_TERM - 1); - } - _mm_empty (); -} - -#else - static void decorr_stereo_pass (int32_t *in_samples, int32_t *out_samples, int32_t num_samples, struct decorr_pass *dpp, int dir) { + int32_t cont_samples = 0; int m = 0, i; +#ifdef PACK_DECORR_STEREO_PASS_CONT + if (num_samples > 16 && PACK_DECORR_STEREO_PASS_CONT_AVAILABLE) { + int32_t pre_samples = (dpp->term < 0 || dpp->term > MAX_TERM) ? 2 : dpp->term; + cont_samples = num_samples - pre_samples; + num_samples = pre_samples; + } +#endif + dpp->sum_A = dpp->sum_B = 0; if (dir < 0) { - out_samples += (num_samples - 1) * 2; - in_samples += (num_samples - 1) * 2; + out_samples += (num_samples + cont_samples - 1) * 2; + in_samples += (num_samples + cont_samples - 1) * 2; dir = -2; } else @@ -339,8 +88,8 @@ static void decorr_stereo_pass (int32_t *in_samples, int32_t *out_samples, int32 dpp->weight_B = restore_weight (store_weight (dpp->weight_B)); for (i = 0; i < 8; ++i) { - dpp->samples_A [i] = exp2s (log2s (dpp->samples_A [i])); - dpp->samples_B [i] = exp2s (log2s (dpp->samples_B [i])); + dpp->samples_A [i] = wp_exp2s (wp_log2s (dpp->samples_A [i])); + dpp->samples_B [i] = wp_exp2s (wp_log2s (dpp->samples_B [i])); } switch (dpp->term) { @@ -511,184 +260,15 @@ static void decorr_stereo_pass (int32_t *in_samples, int32_t *out_samples, int32 break; } -} +#ifdef PACK_DECORR_STEREO_PASS_CONT + if (cont_samples) { + if (dir < 0) + PACK_DECORR_STEREO_PASS_CONT_REV (dpp, in_samples, out_samples, cont_samples); + else + PACK_DECORR_STEREO_PASS_CONT (dpp, in_samples, out_samples, cont_samples); + } #endif - -static void decorr_stereo_pass_quick (int32_t *in_samples, int32_t *out_samples, int32_t num_samples, struct decorr_pass *dpp, int dir) -{ - int m = 0, i; - - if (dir < 0) { - out_samples += (num_samples - 1) * 2; - in_samples += (num_samples - 1) * 2; - dir = -2; - } - else - dir = 2; - - dpp->weight_A = restore_weight (store_weight (dpp->weight_A)); - dpp->weight_B = restore_weight (store_weight (dpp->weight_B)); - - for (i = 0; i < 8; ++i) { - dpp->samples_A [i] = exp2s (log2s (dpp->samples_A [i])); - dpp->samples_B [i] = exp2s (log2s (dpp->samples_B [i])); - } - - switch (dpp->term) { - - case 2: - while (num_samples--) { - int32_t sam, tmp; - - sam = dpp->samples_A [0]; - dpp->samples_A [0] = dpp->samples_A [1]; - out_samples [0] = tmp = (dpp->samples_A [1] = in_samples [0]) - apply_weight_i (dpp->weight_A, sam); - update_weight (dpp->weight_A, dpp->delta, sam, tmp); - - sam = dpp->samples_B [0]; - dpp->samples_B [0] = dpp->samples_B [1]; - out_samples [1] = tmp = (dpp->samples_B [1] = in_samples [1]) - apply_weight_i (dpp->weight_B, sam); - update_weight (dpp->weight_B, dpp->delta, sam, tmp); - - in_samples += dir; - out_samples += dir; - } - - break; - - case 17: - while (num_samples--) { - int32_t sam, tmp; - - sam = 2 * dpp->samples_A [0] - dpp->samples_A [1]; - dpp->samples_A [1] = dpp->samples_A [0]; - out_samples [0] = tmp = (dpp->samples_A [0] = in_samples [0]) - apply_weight_i (dpp->weight_A, sam); - update_weight (dpp->weight_A, dpp->delta, sam, tmp); - - sam = 2 * dpp->samples_B [0] - dpp->samples_B [1]; - dpp->samples_B [1] = dpp->samples_B [0]; - out_samples [1] = tmp = (dpp->samples_B [0] = in_samples [1]) - apply_weight_i (dpp->weight_B, sam); - update_weight (dpp->weight_B, dpp->delta, sam, tmp); - - in_samples += dir; - out_samples += dir; - } - - break; - - case 18: - while (num_samples--) { - int32_t sam, tmp; - - sam = dpp->samples_A [0] + ((dpp->samples_A [0] - dpp->samples_A [1]) >> 1); - dpp->samples_A [1] = dpp->samples_A [0]; - out_samples [0] = tmp = (dpp->samples_A [0] = in_samples [0]) - apply_weight_i (dpp->weight_A, sam); - update_weight (dpp->weight_A, dpp->delta, sam, tmp); - - sam = dpp->samples_B [0] + ((dpp->samples_B [0] - dpp->samples_B [1]) >> 1); - dpp->samples_B [1] = dpp->samples_B [0]; - out_samples [1] = tmp = (dpp->samples_B [0] = in_samples [1]) - apply_weight_i (dpp->weight_B, sam); - update_weight (dpp->weight_B, dpp->delta, sam, tmp); - - in_samples += dir; - out_samples += dir; - } - - break; - - default: { - int k = dpp->term & (MAX_TERM - 1); - - while (num_samples--) { - int32_t sam, tmp; - - sam = dpp->samples_A [m]; - out_samples [0] = tmp = (dpp->samples_A [k] = in_samples [0]) - apply_weight_i (dpp->weight_A, sam); - update_weight (dpp->weight_A, dpp->delta, sam, tmp); - - sam = dpp->samples_B [m]; - out_samples [1] = tmp = (dpp->samples_B [k] = in_samples [1]) - apply_weight_i (dpp->weight_B, sam); - update_weight (dpp->weight_B, dpp->delta, sam, tmp); - - in_samples += dir; - out_samples += dir; - m = (m + 1) & (MAX_TERM - 1); - k = (k + 1) & (MAX_TERM - 1); - } - - if (m) { - int32_t temp_A [MAX_TERM], temp_B [MAX_TERM]; - int k; - - memcpy (temp_A, dpp->samples_A, sizeof (dpp->samples_A)); - memcpy (temp_B, dpp->samples_B, sizeof (dpp->samples_B)); - - for (k = 0; k < MAX_TERM; k++) { - dpp->samples_A [k] = temp_A [m]; - dpp->samples_B [k] = temp_B [m]; - m = (m + 1) & (MAX_TERM - 1); - } - } - - break; - } - - case -1: - while (num_samples--) { - int32_t sam_A, sam_B, tmp; - - sam_A = dpp->samples_A [0]; - out_samples [0] = tmp = (sam_B = in_samples [0]) - apply_weight_i (dpp->weight_A, sam_A); - update_weight_clip (dpp->weight_A, dpp->delta, sam_A, tmp); - - out_samples [1] = tmp = (dpp->samples_A [0] = in_samples [1]) - apply_weight_i (dpp->weight_B, sam_B); - update_weight_clip (dpp->weight_B, dpp->delta, sam_B, tmp); - - in_samples += dir; - out_samples += dir; - } - - break; - - case -2: - while (num_samples--) { - int32_t sam_A, sam_B, tmp; - - sam_B = dpp->samples_B [0]; - out_samples [1] = tmp = (sam_A = in_samples [1]) - apply_weight_i (dpp->weight_B, sam_B); - update_weight_clip (dpp->weight_B, dpp->delta, sam_B, tmp); - - out_samples [0] = tmp = (dpp->samples_B [0] = in_samples [0]) - apply_weight_i (dpp->weight_A, sam_A); - update_weight_clip (dpp->weight_A, dpp->delta, sam_A, tmp); - - in_samples += dir; - out_samples += dir; - } - - break; - - case -3: - while (num_samples--) { - int32_t sam_A, sam_B, tmp; - - sam_A = dpp->samples_A [0]; - sam_B = dpp->samples_B [0]; - - dpp->samples_A [0] = tmp = in_samples [1]; - out_samples [1] = tmp -= apply_weight_i (dpp->weight_B, sam_B); - update_weight_clip (dpp->weight_B, dpp->delta, sam_B, tmp); - - dpp->samples_B [0] = tmp = in_samples [0]; - out_samples [0] = tmp -= apply_weight_i (dpp->weight_A, sam_A); - update_weight_clip (dpp->weight_A, dpp->delta, sam_A, tmp); - - in_samples += dir; - out_samples += dir; - } - - break; - } } static void reverse_decorr (struct decorr_pass *dpp) @@ -788,10 +368,7 @@ static void decorr_stereo_buffer (WavpackExtraInfo *info, int32_t *samples, int3 // if (memcmp (dppi, &dp, sizeof (dp))) // error_line ("decorr_passes don't match, delta = %d", delta); - if (info->gt16bit) - decorr_stereo_pass (samples, outsamples, num_samples, &dp, 1); - else - decorr_stereo_pass_quick (samples, outsamples, num_samples, &dp, 1); + decorr_stereo_pass (samples, outsamples, num_samples, &dp, 1); } static int log2overhead (int first_term, int num_terms) @@ -837,7 +414,7 @@ static void recurse_stereo (WavpackContext *wpc, WavpackExtraInfo *info, int dep info->dps [depth].term = term; info->dps [depth].delta = delta; decorr_stereo_buffer (info, samples, outsamples, wps->wphdr.block_samples, depth); - bits = log2buffer (outsamples, wps->wphdr.block_samples * 2, info->log_limit); + bits = LOG2BUFFER (outsamples, wps->wphdr.block_samples * 2, info->log_limit); if (bits != (uint32_t) -1) bits += log2overhead (info->dps [0].term, depth + 1); @@ -903,7 +480,7 @@ static void delta_stereo (WavpackContext *wpc, WavpackExtraInfo *info) decorr_stereo_buffer (info, info->sampleptrs [i], info->sampleptrs [i+1], wps->wphdr.block_samples, i); } - bits = log2buffer (info->sampleptrs [i], wps->wphdr.block_samples * 2, info->log_limit); + bits = LOG2BUFFER (info->sampleptrs [i], wps->wphdr.block_samples * 2, info->log_limit); if (bits != (uint32_t) -1) bits += log2overhead (wps->decorr_passes [0].term, i); @@ -928,7 +505,7 @@ static void delta_stereo (WavpackContext *wpc, WavpackExtraInfo *info) decorr_stereo_buffer (info, info->sampleptrs [i], info->sampleptrs [i+1], wps->wphdr.block_samples, i); } - bits = log2buffer (info->sampleptrs [i], wps->wphdr.block_samples * 2, info->log_limit); + bits = LOG2BUFFER (info->sampleptrs [i], wps->wphdr.block_samples * 2, info->log_limit); if (bits != (uint32_t) -1) bits += log2overhead (wps->decorr_passes [0].term, i); @@ -972,7 +549,7 @@ static void sort_stereo (WavpackContext *wpc, WavpackExtraInfo *info) for (i = ri; i < info->nterms && wps->decorr_passes [i].term; ++i) decorr_stereo_buffer (info, info->sampleptrs [i], info->sampleptrs [i+1], wps->wphdr.block_samples, i); - bits = log2buffer (info->sampleptrs [i], wps->wphdr.block_samples * 2, info->log_limit); + bits = LOG2BUFFER (info->sampleptrs [i], wps->wphdr.block_samples * 2, info->log_limit); if (bits != (uint32_t) -1) bits += log2overhead (wps->decorr_passes [0].term, i); @@ -1001,8 +578,6 @@ static void analyze_stereo (WavpackContext *wpc, int32_t *samples, int do_sample WavpackExtraInfo info; int i; - info.gt16bit = ((wps->wphdr.flags & MAG_MASK) >> MAG_LSB) >= 16; - #ifdef LOG_LIMIT info.log_limit = (((wps->wphdr.flags & MAG_MASK) >> MAG_LSB) + 4) * 256; @@ -1026,18 +601,15 @@ static void analyze_stereo (WavpackContext *wpc, int32_t *samples, int do_sample memcpy (info.sampleptrs [0], samples, wps->wphdr.block_samples * 8); for (i = 0; i < info.nterms && info.dps [i].term; ++i) - if (info.gt16bit) - decorr_stereo_pass (info.sampleptrs [i], info.sampleptrs [i + 1], wps->wphdr.block_samples, info.dps + i, 1); - else - decorr_stereo_pass_quick (info.sampleptrs [i], info.sampleptrs [i + 1], wps->wphdr.block_samples, info.dps + i, 1); + decorr_stereo_pass (info.sampleptrs [i], info.sampleptrs [i + 1], wps->wphdr.block_samples, info.dps + i, 1); - info.best_bits = log2buffer (info.sampleptrs [info.nterms], wps->wphdr.block_samples * 2, 0) * 1; + info.best_bits = LOG2BUFFER (info.sampleptrs [info.nterms], wps->wphdr.block_samples * 2, 0) * 1; info.best_bits += log2overhead (info.dps [0].term, i); memcpy (info.sampleptrs [info.nterms + 1], info.sampleptrs [i], wps->wphdr.block_samples * 8); if (wpc->config.extra_flags & EXTRA_BRANCHES) recurse_stereo (wpc, &info, 0, (int) floor (wps->delta_decay + 0.5), - log2buffer (info.sampleptrs [0], wps->wphdr.block_samples * 2, 0)); + LOG2BUFFER (info.sampleptrs [0], wps->wphdr.block_samples * 2, 0)); if (wpc->config.extra_flags & EXTRA_SORT_FIRST) sort_stereo (wpc, &info); @@ -1137,6 +709,12 @@ void execute_stereo (WavpackContext *wpc, int32_t *samples, int no_history, int uint32_t best_size = (uint32_t) -1, size; int log_limit, force_js = 0, force_ts = 0, pi, i; +#ifdef SKIP_DECORRELATION + CLEAR (wps->decorr_passes); + wps->num_terms = 0; + return; +#endif + for (i = 0; i < num_samples * 2; ++i) if (samples [i]) break; @@ -1216,7 +794,7 @@ void execute_stereo (WavpackContext *wpc, int32_t *samples, int no_history, int } wpds = &wps->decorr_specs [c]; - nterms = (int) strlen (wpds->terms); + nterms = (int) strlen ((char *) wpds->terms); while (1) { if (force_js || (wpds->joint_stereo && !force_ts)) { @@ -1258,14 +836,10 @@ void execute_stereo (WavpackContext *wpc, int32_t *samples, int no_history, int reverse_decorr (&temp_decorr_pass); memcpy (save_decorr_passes + j, &temp_decorr_pass, sizeof (struct decorr_pass)); - - if (((wps->wphdr.flags & MAG_MASK) >> MAG_LSB) >= 16) - decorr_stereo_pass (temp_buffer [j&1], temp_buffer [~j&1], num_samples, &temp_decorr_pass, 1); - else - decorr_stereo_pass_quick (temp_buffer [j&1], temp_buffer [~j&1], num_samples, &temp_decorr_pass, 1); + decorr_stereo_pass (temp_buffer [j&1], temp_buffer [~j&1], num_samples, &temp_decorr_pass, 1); } - size = log2buffer (temp_buffer [j&1], num_samples * 2, log_limit); + size = LOG2BUFFER (temp_buffer [j&1], num_samples * 2, log_limit); if (size == (uint32_t) -1 && nterms) nterms >>= 1; diff --git a/third_party/wavpack/src/float.c b/third_party/wavpack/src/float.c deleted file mode 100644 index a01cfb3..0000000 --- a/third_party/wavpack/src/float.c +++ /dev/null @@ -1,371 +0,0 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -// float.c - -#include "wavpack_local.h" - -#include - -#ifdef DEBUG_ALLOC -#define malloc malloc_db -#define realloc realloc_db -#define free free_db -void *malloc_db (uint32_t size); -void *realloc_db (void *ptr, uint32_t size); -void free_db (void *ptr); -int32_t dump_alloc (void); -#endif - -#ifndef NO_PACK - -void write_float_info (WavpackStream *wps, WavpackMetadata *wpmd) -{ - char *byteptr; - - byteptr = wpmd->data = malloc (4); - wpmd->id = ID_FLOAT_INFO; - *byteptr++ = wps->float_flags; - *byteptr++ = wps->float_shift; - *byteptr++ = wps->float_max_exp; - *byteptr++ = wps->float_norm_exp; - wpmd->byte_length = (int32_t)(byteptr - (char *) wpmd->data); -} - -int scan_float_data (WavpackStream *wps, f32 *values, int32_t num_values) -{ - int32_t shifted_ones = 0, shifted_zeros = 0, shifted_both = 0; - int32_t false_zeros = 0, neg_zeros = 0; - uint32_t ordata = 0, crc = 0xffffffff; - int32_t count, value, shift_count; - int max_exp = 0; - f32 *dp; - - wps->float_shift = wps->float_flags = 0; - - for (dp = values, count = num_values; count--; dp++) { - crc = crc * 27 + get_mantissa (*dp) * 9 + get_exponent (*dp) * 3 + get_sign (*dp); - - if (get_exponent (*dp) > max_exp && get_exponent (*dp) < 255) - max_exp = get_exponent (*dp); - } - - wps->crc_x = crc; - - for (dp = values, count = num_values; count--; dp++) { - if (get_exponent (*dp) == 255) { - wps->float_flags |= FLOAT_EXCEPTIONS; - value = 0x1000000; - shift_count = 0; - } - else if (get_exponent (*dp)) { - shift_count = max_exp - get_exponent (*dp); - value = 0x800000 + get_mantissa (*dp); - } - else { - shift_count = max_exp ? max_exp - 1 : 0; - value = get_mantissa (*dp); - -// if (get_mantissa (*dp)) -// denormals++; - } - - if (shift_count < 25) - value >>= shift_count; - else - value = 0; - - if (!value) { - if (get_exponent (*dp) || get_mantissa (*dp)) - ++false_zeros; - else if (get_sign (*dp)) - ++neg_zeros; - } - else if (shift_count) { - int32_t mask = (1 << shift_count) - 1; - - if (!(get_mantissa (*dp) & mask)) - shifted_zeros++; - else if ((get_mantissa (*dp) & mask) == mask) - shifted_ones++; - else - shifted_both++; - } - - ordata |= value; - * (int32_t *) dp = (get_sign (*dp)) ? -value : value; - } - - wps->float_max_exp = max_exp; - - if (shifted_both) - wps->float_flags |= FLOAT_SHIFT_SENT; - else if (shifted_ones && !shifted_zeros) - wps->float_flags |= FLOAT_SHIFT_ONES; - else if (shifted_ones && shifted_zeros) - wps->float_flags |= FLOAT_SHIFT_SAME; - else if (ordata && !(ordata & 1)) { - while (!(ordata & 1)) { - wps->float_shift++; - ordata >>= 1; - } - - for (dp = values, count = num_values; count--; dp++) - * (int32_t *) dp >>= wps->float_shift; - } - - wps->wphdr.flags &= ~MAG_MASK; - - while (ordata) { - wps->wphdr.flags += 1 << MAG_LSB; - ordata >>= 1; - } - - if (false_zeros || neg_zeros) - wps->float_flags |= FLOAT_ZEROS_SENT; - - if (neg_zeros) - wps->float_flags |= FLOAT_NEG_ZEROS; - -// error_line ("samples = %d, max exp = %d, pre-shift = %d, denormals = %d", -// num_values, max_exp, wps->float_shift, denormals); -// if (wps->float_flags & FLOAT_EXCEPTIONS) -// error_line ("exceptions!"); -// error_line ("shifted ones/zeros/both = %d/%d/%d, true/neg/false zeros = %d/%d/%d", -// shifted_ones, shifted_zeros, shifted_both, true_zeros, neg_zeros, false_zeros); - - return wps->float_flags & (FLOAT_EXCEPTIONS | FLOAT_ZEROS_SENT | FLOAT_SHIFT_SENT | FLOAT_SHIFT_SAME); -} - -void send_float_data (WavpackStream *wps, f32 *values, int32_t num_values) -{ - int max_exp = wps->float_max_exp; - int32_t count, value, shift_count; - f32 *dp; - - for (dp = values, count = num_values; count--; dp++) { - if (get_exponent (*dp) == 255) { - if (get_mantissa (*dp)) { - putbit_1 (&wps->wvxbits); - putbits (get_mantissa (*dp), 23, &wps->wvxbits); - } - else { - putbit_0 (&wps->wvxbits); - } - - value = 0x1000000; - shift_count = 0; - } - else if (get_exponent (*dp)) { - shift_count = max_exp - get_exponent (*dp); - value = 0x800000 + get_mantissa (*dp); - } - else { - shift_count = max_exp ? max_exp - 1 : 0; - value = get_mantissa (*dp); - } - - if (shift_count < 25) - value >>= shift_count; - else - value = 0; - - if (!value) { - if (wps->float_flags & FLOAT_ZEROS_SENT) { - if (get_exponent (*dp) || get_mantissa (*dp)) { - putbit_1 (&wps->wvxbits); - putbits (get_mantissa (*dp), 23, &wps->wvxbits); - - if (max_exp >= 25) { - putbits (get_exponent (*dp), 8, &wps->wvxbits); - } - - putbit (get_sign (*dp), &wps->wvxbits); - } - else { - putbit_0 (&wps->wvxbits); - - if (wps->float_flags & FLOAT_NEG_ZEROS) - putbit (get_sign (*dp), &wps->wvxbits); - } - } - } - else if (shift_count) { - if (wps->float_flags & FLOAT_SHIFT_SENT) { - int32_t data = get_mantissa (*dp) & ((1 << shift_count) - 1); - putbits (data, shift_count, &wps->wvxbits); - } - else if (wps->float_flags & FLOAT_SHIFT_SAME) { - putbit (get_mantissa (*dp) & 1, &wps->wvxbits); - } - } - } -} - -#endif - -#if !defined(NO_UNPACK) || defined(INFO_ONLY) - -int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd) -{ - int bytecnt = wpmd->byte_length; - char *byteptr = wpmd->data; - - if (bytecnt != 4) - return FALSE; - - wps->float_flags = *byteptr++; - wps->float_shift = *byteptr++; - wps->float_max_exp = *byteptr++; - wps->float_norm_exp = *byteptr; - return TRUE; -} - -#endif - -#ifndef NO_UNPACK - -static void float_values_nowvx (WavpackStream *wps, int32_t *values, int32_t num_values); - -void float_values (WavpackStream *wps, int32_t *values, int32_t num_values) -{ - uint32_t crc = wps->crc_x; - - if (!bs_is_open (&wps->wvxbits)) { - float_values_nowvx (wps, values, num_values); - return; - } - - while (num_values--) { - int shift_count = 0, exp = wps->float_max_exp; - f32 outval = 0; - uint32_t temp; - - if (*values == 0) { - if (wps->float_flags & FLOAT_ZEROS_SENT) { - if (getbit (&wps->wvxbits)) { - getbits (&temp, 23, &wps->wvxbits); - set_mantissa (outval, temp); - - if (exp >= 25) { - getbits (&temp, 8, &wps->wvxbits); - set_exponent (outval, temp); - } - - set_sign (outval, getbit (&wps->wvxbits)); - } - else if (wps->float_flags & FLOAT_NEG_ZEROS) - set_sign (outval, getbit (&wps->wvxbits)); - } - } - else { - *values <<= wps->float_shift; - - if (*values < 0) { - *values = -*values; - set_sign (outval, 1); - } - - if (*values == 0x1000000) { - if (getbit (&wps->wvxbits)) { - getbits (&temp, 23, &wps->wvxbits); - set_mantissa (outval, temp); - } - - set_exponent (outval, 255); - } - else { - if (exp) - while (!(*values & 0x800000) && --exp) { - shift_count++; - *values <<= 1; - } - - if (shift_count) { - if ((wps->float_flags & FLOAT_SHIFT_ONES) || - ((wps->float_flags & FLOAT_SHIFT_SAME) && getbit (&wps->wvxbits))) - *values |= ((1 << shift_count) - 1); - else if (wps->float_flags & FLOAT_SHIFT_SENT) { - getbits (&temp, shift_count, &wps->wvxbits); - *values |= temp & ((1 << shift_count) - 1); - } - } - - set_mantissa (outval, *values); - set_exponent (outval, exp); - } - } - - crc = crc * 27 + get_mantissa (outval) * 9 + get_exponent (outval) * 3 + get_sign (outval); - * (f32 *) values++ = outval; - } - - wps->crc_x = crc; -} - -static void float_values_nowvx (WavpackStream *wps, int32_t *values, int32_t num_values) -{ - while (num_values--) { - int shift_count = 0, exp = wps->float_max_exp; - f32 outval = 0; - - if (*values) { - *values <<= wps->float_shift; - - if (*values < 0) { - *values = -*values; - set_sign (outval, 1); - } - - if (*values >= 0x1000000) { - while (*values & 0xf000000) { - *values >>= 1; - ++exp; - } - } - else if (exp) { - while (!(*values & 0x800000) && --exp) { - shift_count++; - *values <<= 1; - } - - if (shift_count && (wps->float_flags & FLOAT_SHIFT_ONES)) - *values |= ((1 << shift_count) - 1); - } - - set_mantissa (outval, *values); - set_exponent (outval, exp); - } - - * (f32 *) values++ = outval; - } -} - -#endif - -void WavpackFloatNormalize (int32_t *values, int32_t num_values, int delta_exp) -{ - f32 *fvalues = (f32 *) values; - int exp; - - if (!delta_exp) - return; - - while (num_values--) { - if ((exp = get_exponent (*fvalues)) == 0 || exp + delta_exp <= 0) - *fvalues = 0; - else if (exp == 255 || (exp += delta_exp) >= 255) { - set_exponent (*fvalues, 255); - set_mantissa (*fvalues, 0); - } - else - set_exponent (*fvalues, exp); - - fvalues++; - } -} diff --git a/third_party/wavpack/src/metadata.c b/third_party/wavpack/src/metadata.c deleted file mode 100644 index 0fd8a49..0000000 --- a/third_party/wavpack/src/metadata.c +++ /dev/null @@ -1,313 +0,0 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -// metadata.c - -// This module handles the metadata structure introduced in WavPack 4.0 - -#include "wavpack_local.h" - -#include -#include - -#ifdef DEBUG_ALLOC -#define malloc malloc_db -#define realloc realloc_db -#define free free_db -void *malloc_db (uint32_t size); -void *realloc_db (void *ptr, uint32_t size); -void free_db (void *ptr); -int32_t dump_alloc (void); -#endif - -#if !defined(NO_UNPACK) || defined(INFO_ONLY) - -int read_metadata_buff (WavpackMetadata *wpmd, unsigned char *blockbuff, unsigned char **buffptr) -{ - WavpackHeader *wphdr = (WavpackHeader *) blockbuff; - unsigned char *buffend = blockbuff + wphdr->ckSize + 8; - - if (buffend - *buffptr < 2) - return FALSE; - - wpmd->id = *(*buffptr)++; - wpmd->byte_length = *(*buffptr)++ << 1; - - if (wpmd->id & ID_LARGE) { - wpmd->id &= ~ID_LARGE; - - if (buffend - *buffptr < 2) - return FALSE; - - wpmd->byte_length += *(*buffptr)++ << 9; - wpmd->byte_length += *(*buffptr)++ << 17; - } - - if (wpmd->id & ID_ODD_SIZE) { - wpmd->id &= ~ID_ODD_SIZE; - wpmd->byte_length--; - } - - if (wpmd->byte_length) { - if (buffend - *buffptr < wpmd->byte_length + (wpmd->byte_length & 1)) { - wpmd->data = NULL; - return FALSE; - } - - wpmd->data = *buffptr; - (*buffptr) += wpmd->byte_length + (wpmd->byte_length & 1); - } - else - wpmd->data = NULL; - - return TRUE; -} - -int process_metadata (WavpackContext *wpc, WavpackMetadata *wpmd) -{ - WavpackStream *wps = wpc->streams [wpc->current_stream]; - - switch (wpmd->id) { - case ID_DUMMY: - return TRUE; - - case ID_DECORR_TERMS: - return read_decorr_terms (wps, wpmd); - - case ID_DECORR_WEIGHTS: - return read_decorr_weights (wps, wpmd); - - case ID_DECORR_SAMPLES: - return read_decorr_samples (wps, wpmd); - - case ID_ENTROPY_VARS: - return read_entropy_vars (wps, wpmd); - - case ID_HYBRID_PROFILE: - return read_hybrid_profile (wps, wpmd); - - case ID_SHAPING_WEIGHTS: - return read_shaping_info (wps, wpmd); - - case ID_FLOAT_INFO: - return read_float_info (wps, wpmd); - - case ID_INT32_INFO: - return read_int32_info (wps, wpmd); - - case ID_CHANNEL_INFO: - return read_channel_info (wpc, wpmd); - - case ID_CONFIG_BLOCK: - return read_config_info (wpc, wpmd); - - case ID_SAMPLE_RATE: - return read_sample_rate (wpc, wpmd); - - case ID_WV_BITSTREAM: - return init_wv_bitstream (wps, wpmd); - - case ID_WVC_BITSTREAM: - return init_wvc_bitstream (wps, wpmd); - - case ID_WVX_BITSTREAM: - return init_wvx_bitstream (wps, wpmd); - - case ID_RIFF_HEADER: case ID_RIFF_TRAILER: - return read_wrapper_data (wpc, wpmd); - - case ID_MD5_CHECKSUM: - if (wpmd->byte_length == 16) { - memcpy (wpc->config.md5_checksum, wpmd->data, 16); - wpc->config.flags |= CONFIG_MD5_CHECKSUM; - wpc->config.md5_read = 1; - } - - return TRUE; - - default: - return (wpmd->id & ID_OPTIONAL_DATA) ? TRUE : FALSE; - } -} - -#endif - -#ifndef NO_PACK - -int copy_metadata (WavpackMetadata *wpmd, unsigned char *buffer_start, unsigned char *buffer_end) -{ - uint32_t mdsize = wpmd->byte_length + (wpmd->byte_length & 1); - WavpackHeader *wphdr = (WavpackHeader *) buffer_start; - - if (wpmd->byte_length & 1) - ((char *) wpmd->data) [wpmd->byte_length] = 0; - - mdsize += (wpmd->byte_length > 510) ? 4 : 2; - buffer_start += wphdr->ckSize + 8; - - if (buffer_start + mdsize >= buffer_end) - return FALSE; - - buffer_start [0] = wpmd->id | (wpmd->byte_length & 1 ? ID_ODD_SIZE : 0); - buffer_start [1] = (wpmd->byte_length + 1) >> 1; - - if (wpmd->byte_length > 510) { - buffer_start [0] |= ID_LARGE; - buffer_start [2] = (wpmd->byte_length + 1) >> 9; - buffer_start [3] = (wpmd->byte_length + 1) >> 17; - } - - if (wpmd->data && wpmd->byte_length) { - if (wpmd->byte_length > 510) { - buffer_start [0] |= ID_LARGE; - buffer_start [2] = (wpmd->byte_length + 1) >> 9; - buffer_start [3] = (wpmd->byte_length + 1) >> 17; - memcpy (buffer_start + 4, wpmd->data, mdsize - 4); - } - else - memcpy (buffer_start + 2, wpmd->data, mdsize - 2); - } - - wphdr->ckSize += mdsize; - return TRUE; -} - -int add_to_metadata (WavpackContext *wpc, void *data, uint32_t bcount, unsigned char id) -{ - WavpackMetadata *mdp; - unsigned char *src = data; - - while (bcount) { - if (wpc->metacount) { - uint32_t bc = bcount; - - mdp = wpc->metadata + wpc->metacount - 1; - - if (mdp->id == id) { - if (wpc->metabytes + bcount > 1000000) - bc = 1000000 - wpc->metabytes; - - mdp->data = realloc (mdp->data, mdp->byte_length + bc); - memcpy ((char *) mdp->data + mdp->byte_length, src, bc); - mdp->byte_length += bc; - wpc->metabytes += bc; - bcount -= bc; - src += bc; - - if (wpc->metabytes >= 1000000 && !write_metadata_block (wpc)) - return FALSE; - } - } - - if (bcount) { - wpc->metadata = realloc (wpc->metadata, (wpc->metacount + 1) * sizeof (WavpackMetadata)); - mdp = wpc->metadata + wpc->metacount++; - mdp->byte_length = 0; - mdp->data = NULL; - mdp->id = id; - } - } - - return TRUE; -} - -static char *write_metadata (WavpackMetadata *wpmd, char *outdata) -{ - unsigned char id = wpmd->id, wordlen [3]; - - wordlen [0] = (wpmd->byte_length + 1) >> 1; - wordlen [1] = (wpmd->byte_length + 1) >> 9; - wordlen [2] = (wpmd->byte_length + 1) >> 17; - - if (wpmd->byte_length & 1) { -// ((char *) wpmd->data) [wpmd->byte_length] = 0; - id |= ID_ODD_SIZE; - } - - if (wordlen [1] || wordlen [2]) - id |= ID_LARGE; - - *outdata++ = id; - *outdata++ = wordlen [0]; - - if (id & ID_LARGE) { - *outdata++ = wordlen [1]; - *outdata++ = wordlen [2]; - } - - if (wpmd->data && wpmd->byte_length) { - memcpy (outdata, wpmd->data, wpmd->byte_length); - outdata += wpmd->byte_length; - - if (wpmd->byte_length & 1) - *outdata++ = 0; - } - - return outdata; -} - -int write_metadata_block (WavpackContext *wpc) -{ - char *block_buff, *block_ptr; - WavpackHeader *wphdr; - - if (wpc->metacount) { - int metacount = wpc->metacount, block_size = sizeof (WavpackHeader); - WavpackMetadata *wpmdp = wpc->metadata; - - while (metacount--) { - block_size += wpmdp->byte_length + (wpmdp->byte_length & 1); - block_size += (wpmdp->byte_length > 510) ? 4 : 2; - wpmdp++; - } - - wphdr = (WavpackHeader *) (block_buff = malloc (block_size)); - - CLEAR (*wphdr); - memcpy (wphdr->ckID, "wvpk", 4); - wphdr->total_samples = wpc->total_samples; - wphdr->version = wpc->stream_version; - wphdr->ckSize = block_size - 8; - wphdr->block_samples = 0; - - block_ptr = (char *)(wphdr + 1); - - wpmdp = wpc->metadata; - - while (wpc->metacount) { - block_ptr = write_metadata (wpmdp, block_ptr); - wpc->metabytes -= wpmdp->byte_length; - free_metadata (wpmdp++); - wpc->metacount--; - } - - free (wpc->metadata); - wpc->metadata = NULL; - native_to_little_endian ((WavpackHeader *) block_buff, WavpackHeaderFormat); - - if (!wpc->blockout (wpc->wv_out, block_buff, block_size)) { - free (block_buff); - strcpy (wpc->error_message, "can't write WavPack data, disk probably full!"); - return FALSE; - } - - free (block_buff); - } - - return TRUE; -} - -#endif - -void free_metadata (WavpackMetadata *wpmd) -{ - if (wpmd->data) { - free (wpmd->data); - wpmd->data = NULL; - } -} diff --git a/third_party/wavpack/src/open_filename.c b/third_party/wavpack/src/open_filename.c new file mode 100644 index 0000000..4c74e67 --- /dev/null +++ b/third_party/wavpack/src/open_filename.c @@ -0,0 +1,304 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// open_filename.c + +// This module provides all the code required to open an existing WavPack +// file, by filename, for reading. It does not contain the actual code to +// unpack audio data and this was done so that programs that just want to +// query WavPack files for information (like, for example, taggers) don't +// need to link in a lot of unnecessary code. +// +// To allow opening files by filename, this code provides an interface +// between the reader callback mechanism that WavPack uses internally and +// the standard fstream C library. Note that in applications that do not +// require opening files by filename, this module can be omitted (which +// might make building easier). +// +// For Unicode support on Windows, a flag has been added (OPEN_FILE_UTF8) +// that forces the filename string to be assumed UTF-8 and converted to +// a widechar string suitable for _wfopen(). Without this flag we revert +// to the previous behavior of simply calling fopen() and hoping that the +// local character set works. This is ignored on non-Windows platforms +// (which is okay because they are probably UTF-8 anyway). + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#include +#endif + +#include +#include + +#include "wavpack_local.h" + +#include +#include + +#if (defined(__GNUC__) || defined(__sun)) && !defined(_WIN32) +#include +#endif + +#ifdef __OS2__ +#include +#endif + +#ifdef _WIN32 +#define fileno _fileno +static FILE *fopen_utf8 (const char *filename_utf8, const char *mode_utf8); +#if !defined(S_ISREG) && defined(S_IFMT) && defined(S_IFREG) +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#endif +#endif + +#ifdef HAVE_FSEEKO +#define fseek fseeko +#define ftell ftello +#endif + +static int32_t read_bytes (void *id, void *data, int32_t bcount) +{ + return (int32_t) fread (data, 1, bcount, (FILE*) id); +} + +static int64_t get_pos (void *id) +{ +#ifdef _WIN32 + return _ftelli64 ((FILE*) id); +#else + return ftell ((FILE*) id); +#endif +} + +static int set_pos_abs (void *id, int64_t pos) +{ +#ifdef _WIN32 + return _fseeki64 (id, pos, SEEK_SET); +#else + return fseek (id, pos, SEEK_SET); +#endif +} + +static int set_pos_rel (void *id, int64_t delta, int mode) +{ +#ifdef _WIN32 + return _fseeki64 (id, delta, mode); +#else + return fseek (id, delta, mode); +#endif +} + +static int push_back_byte (void *id, int c) +{ + return ungetc (c, id); +} + +#ifdef _WIN32 + +static int64_t get_length (void *id) +{ + LARGE_INTEGER Size; + HANDLE fHandle; + + if (id == NULL) + return 0; + + fHandle = (HANDLE)_get_osfhandle(_fileno((FILE*) id)); + if (fHandle == INVALID_HANDLE_VALUE) + return 0; + + Size.u.LowPart = GetFileSize(fHandle, &Size.u.HighPart); + + if (Size.u.LowPart == INVALID_FILE_SIZE && GetLastError() != NO_ERROR) + return 0; + + return (int64_t)Size.QuadPart; +} + +#else + +static int64_t get_length (void *id) +{ + FILE *file = id; + struct stat statbuf; + + if (!file || fstat (fileno (file), &statbuf) || !S_ISREG(statbuf.st_mode)) + return 0; + + return statbuf.st_size; +} + +#endif + +static int can_seek (void *id) +{ + FILE *file = id; + struct stat statbuf; + + return file && !fstat (fileno (file), &statbuf) && S_ISREG(statbuf.st_mode); +} + +static int32_t write_bytes (void *id, void *data, int32_t bcount) +{ + return (int32_t) fwrite (data, 1, bcount, (FILE*) id); +} + +#ifdef _WIN32 + +static int truncate_here (void *id) +{ + FILE *file = id; + int64_t curr_pos = _ftelli64 (file); + + return _chsize_s (fileno (file), curr_pos); +} + +#else + +static int truncate_here (void *id) +{ + FILE *file = id; + off_t curr_pos = ftell (file); + + return ftruncate (fileno (file), curr_pos); +} + +#endif + +static int close_stream (void *id) +{ + return fclose ((FILE*) id); +} + +// int32_t (*read_bytes)(void *id, void *data, int32_t bcount); +// int32_t (*write_bytes)(void *id, void *data, int32_t bcount); +// int64_t (*get_pos)(void *id); // new signature for large files +// int (*set_pos_abs)(void *id, int64_t pos); // new signature for large files +// int (*set_pos_rel)(void *id, int64_t delta, int mode); // new signature for large files +// int (*push_back_byte)(void *id, int c); +// int64_t (*get_length)(void *id); // new signature for large files +// int (*can_seek)(void *id); +// int (*truncate_here)(void *id); // new function to truncate file at current position +// int (*close)(void *id); // new function to close file + +static WavpackStreamReader64 freader = { + read_bytes, write_bytes, get_pos, set_pos_abs, set_pos_rel, + push_back_byte, get_length, can_seek, truncate_here, close_stream +}; + +// This function attempts to open the specified WavPack file for reading. If +// this fails for any reason then an appropriate message is copied to "error" +// (which must accept 80 characters) and NULL is returned, otherwise a +// pointer to a WavpackContext structure is returned (which is used to call +// all other functions in this module). A filename beginning with "-" is +// assumed to be stdin. The "flags" argument has the following bit mask +// values to specify details of the open operation: + +// OPEN_WVC: attempt to open/read "correction" file +// OPEN_TAGS: attempt to read ID3v1 / APEv2 tags (requires seekable file) +// OPEN_WRAPPER: make audio wrapper available (i.e. RIFF) to caller +// OPEN_2CH_MAX: open only first stream of multichannel file (usually L/R) +// OPEN_NORMALIZE: normalize floating point data to +/- 1.0 (w/ offset exp) +// OPEN_STREAMING: blindly unpacks blocks w/o regard to header file position +// OPEN_EDIT_TAGS: allow editing of tags (file must be writable) +// OPEN_FILE_UTF8: assume infilename is UTF-8 encoded (Windows only) + +// Version 4.2 of the WavPack library adds the OPEN_STREAMING flag. This is +// essentially a "raw" mode where the library will simply decode any blocks +// fed it through the reader callback, regardless of where those blocks came +// from in a stream. The only requirement is that complete WavPack blocks are +// fed to the decoder (and this may require multiple blocks in multichannel +// mode) and that complete blocks are decoded (even if all samples are not +// actually required). All the blocks must contain the same number of channels +// and bit resolution, and the correction data must be either present or not. +// All other parameters may change from block to block (like lossy/lossless). +// Obviously, in this mode any seeking must be performed by the application +// (and again, decoding must start at the beginning of the block containing +// the seek sample). + +WavpackContext *WavpackOpenFileInput (const char *infilename, char *error, int flags, int norm_offset) +{ + char *file_mode = (flags & OPEN_EDIT_TAGS) ? "r+b" : "rb"; + FILE *(*fopen_func)(const char *, const char *) = fopen; + FILE *wv_id, *wvc_id; + +#ifdef _WIN32 + if (flags & OPEN_FILE_UTF8) + fopen_func = fopen_utf8; +#endif + + if (*infilename == '-') { + wv_id = stdin; +#if defined(_WIN32) + _setmode (fileno (stdin), O_BINARY); +#endif +#if defined(__OS2__) + setmode (fileno (stdin), O_BINARY); +#endif + } + else if ((wv_id = fopen_func (infilename, file_mode)) == NULL) { + if (error) strcpy (error, (flags & OPEN_EDIT_TAGS) ? "can't open file for editing" : "can't open file"); + return NULL; + } + + if (wv_id != stdin && (flags & OPEN_WVC)) { + char *in2filename = malloc (strlen (infilename) + 10); + + strcpy (in2filename, infilename); + strcat (in2filename, "c"); + wvc_id = fopen_func (in2filename, "rb"); + free (in2filename); + } + else + wvc_id = NULL; + + return WavpackOpenFileInputEx64 (&freader, wv_id, wvc_id, error, flags, norm_offset); +} + +#ifdef _WIN32 + +// The following code Copyright (c) 2004-2012 LoRd_MuldeR +// (see cli/win32_unicode_support.c for full license) + +static wchar_t *utf8_to_utf16(const char *input) +{ + wchar_t *Buffer; + int BuffSize = 0, Result = 0; + + BuffSize = MultiByteToWideChar(CP_UTF8, 0, input, -1, NULL, 0); + Buffer = (wchar_t*) malloc(sizeof(wchar_t) * BuffSize); + if(Buffer) + { + Result = MultiByteToWideChar(CP_UTF8, 0, input, -1, Buffer, BuffSize); + } + + return ((Result > 0) && (Result <= BuffSize)) ? Buffer : NULL; +} + + +static FILE *fopen_utf8(const char *filename_utf8, const char *mode_utf8) +{ + FILE *ret = NULL; + wchar_t *filename_utf16 = utf8_to_utf16(filename_utf8); + wchar_t *mode_utf16 = utf8_to_utf16(mode_utf8); + + if(filename_utf16 && mode_utf16) + { + ret = _wfopen(filename_utf16, mode_utf16); + } + + if(filename_utf16) free(filename_utf16); + if(mode_utf16) free(mode_utf16); + + return ret; +} + +#endif + + diff --git a/third_party/wavpack/src/open_legacy.c b/third_party/wavpack/src/open_legacy.c new file mode 100644 index 0000000..fb61509 --- /dev/null +++ b/third_party/wavpack/src/open_legacy.c @@ -0,0 +1,114 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2016 David Bryant. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// open_legacy.c + +// This code provides an interface between the new reader callback mechanism that +// WavPack uses internally and the old reader callback functions that did not +// provide large file support. + +#include +#include + +#include "wavpack_local.h" + +typedef struct { + WavpackStreamReader *reader; + void *id; +} WavpackReaderTranslator; + +static int32_t trans_read_bytes (void *id, void *data, int32_t bcount) +{ + WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id; + return trans->reader->read_bytes (trans->id, data, bcount); +} + +static int32_t trans_write_bytes (void *id, void *data, int32_t bcount) +{ + WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id; + return trans->reader->write_bytes (trans->id, data, bcount); +} + +static int64_t trans_get_pos (void *id) +{ + WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id; + return trans->reader->get_pos (trans->id); +} + +static int trans_set_pos_abs (void *id, int64_t pos) +{ + WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id; + return trans->reader->set_pos_abs (trans->id, (uint32_t) pos); +} + +static int trans_set_pos_rel (void *id, int64_t delta, int mode) +{ + WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id; + return trans->reader->set_pos_rel (trans->id, (int32_t) delta, mode); +} + +static int trans_push_back_byte (void *id, int c) +{ + WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id; + return trans->reader->push_back_byte (trans->id, c); +} + +static int64_t trans_get_length (void *id) +{ + WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id; + return trans->reader->get_length (trans->id); +} + +static int trans_can_seek (void *id) +{ + WavpackReaderTranslator *trans = (WavpackReaderTranslator *)id; + return trans->reader->can_seek (trans->id); +} + +static int trans_close_stream (void *id) +{ + free (id); + return 0; +} + +static WavpackStreamReader64 trans_reader = { + trans_read_bytes, trans_write_bytes, trans_get_pos, trans_set_pos_abs, trans_set_pos_rel, + trans_push_back_byte, trans_get_length, trans_can_seek, NULL, trans_close_stream +}; + +// This function is identical to WavpackOpenFileInput64() except that instead +// of providing the new 64-bit reader callbacks, the old reader callbacks are +// utilized and a translation layer is employed. It is provided as a compatibility +// function for existing applications. To ensure that streaming applications using +// this function continue to work, the OPEN_NO_CHECKSUM flag is forced on when +// the OPEN_STREAMING flag is set. + +WavpackContext *WavpackOpenFileInputEx (WavpackStreamReader *reader, void *wv_id, void *wvc_id, char *error, int flags, int norm_offset) +{ + WavpackReaderTranslator *trans_wv = NULL, *trans_wvc = NULL; + + // this prevents existing streaming applications from failing if they try to pass + // in blocks that have been modified from the original (e.g., Matroska blocks) + + if (flags & OPEN_STREAMING) + flags |= OPEN_NO_CHECKSUM; + + if (wv_id) { + trans_wv = (WavpackReaderTranslator *)malloc (sizeof (WavpackReaderTranslator)); + trans_wv->reader = reader; + trans_wv->id = wv_id; + } + + if (wvc_id) { + trans_wvc = (WavpackReaderTranslator *)malloc (sizeof (WavpackReaderTranslator)); + trans_wvc->reader = reader; + trans_wvc->id = wvc_id; + } + + return WavpackOpenFileInputEx64 (&trans_reader, trans_wv, trans_wvc, error, flags, norm_offset); +} diff --git a/third_party/wavpack/src/open_raw.c b/third_party/wavpack/src/open_raw.c new file mode 100644 index 0000000..6fda6f3 --- /dev/null +++ b/third_party/wavpack/src/open_raw.c @@ -0,0 +1,315 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2016 David Bryant. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// open_raw.c + +// This code provides the ability to decode WavPack frames directly from +// memory for use in a streaming application. It can handle full blocks +// or the headerless block data provided by Matroska and the DirectShow +// WavPack splitter. For information about how Matroska stores WavPack, +// see: https://www.matroska.org/technical/specs/codecid/wavpack.html + +#include +#include + +#include "wavpack_local.h" + +typedef struct { + unsigned char *sptr, *dptr, *eptr, free_required; +} RawSegment; + +typedef struct { + RawSegment *segments; + int num_segments, curr_segment; + unsigned char ungetc_char, ungetc_flag; +} WavpackRawContext; + +static int32_t raw_read_bytes (void *id, void *data, int32_t bcount) +{ + WavpackRawContext *rcxt = id; + unsigned char *outptr = data; + + while (bcount) { + if (rcxt->ungetc_flag) { + *outptr++ = rcxt->ungetc_char; + rcxt->ungetc_flag = 0; + bcount--; + } + else if (rcxt->curr_segment < rcxt->num_segments) { + RawSegment *segptr = rcxt->segments + rcxt->curr_segment; + int bytes_to_copy = (int)(segptr->eptr - segptr->dptr); + + if (bytes_to_copy > bcount) + bytes_to_copy = bcount; + + memcpy (outptr, segptr->dptr, bytes_to_copy); + outptr += bytes_to_copy; + bcount -= bytes_to_copy; + + if ((segptr->dptr += bytes_to_copy) == segptr->eptr) + rcxt->curr_segment++; + } + else + break; + } + + return (int32_t)(outptr - (unsigned char *) data); +} + +static int32_t raw_write_bytes (void *id, void *data, int32_t bcount) +{ + return 0; +} + +static int64_t raw_get_pos (void *id) +{ + return 0; +} + +static int raw_set_pos_abs (void *id, int64_t pos) +{ + return 0; +} + +static int raw_set_pos_rel (void *id, int64_t delta, int mode) +{ + return 0; +} + +static int raw_push_back_byte (void *id, int c) +{ + WavpackRawContext *rcxt = id; + rcxt->ungetc_char = c; + rcxt->ungetc_flag = 1; + return c; +} + +static int64_t raw_get_length (void *id) +{ + return 0; +} + +static int raw_can_seek (void *id) +{ + return 0; +} + +static int raw_close_stream (void *id) +{ + WavpackRawContext *rcxt = id; + int i; + + if (rcxt) { + for (i = 0; i < rcxt->num_segments; ++i) + if (rcxt->segments [i].sptr && rcxt->segments [i].free_required) + free (rcxt->segments [i].sptr); + + if (rcxt->segments) free (rcxt->segments); + free (rcxt); + } + + return 0; +} + +static WavpackStreamReader64 raw_reader = { + raw_read_bytes, raw_write_bytes, raw_get_pos, raw_set_pos_abs, raw_set_pos_rel, + raw_push_back_byte, raw_get_length, raw_can_seek, NULL, raw_close_stream +}; + +// This function is similar to WavpackOpenFileInput() except that instead of +// providing a filename to open, the caller provides pointers to buffered +// WavPack frames (both standard and, optionally, correction data). It +// decodes only a single frame. Note that in this context, a "frame" is a +// collection of WavPack blocks that represent all the channels present. In +// the case of mono or [most] stereo streams, this is the same thing, but +// for multichannel streams each frame consists of several WavPack blocks +// (which can contain only 1 or 2 channels). + +WavpackContext *WavpackOpenRawDecoder ( + void *main_data, int32_t main_size, + void *corr_data, int32_t corr_size, + int16_t version, char *error, int flags, int norm_offset) +{ + WavpackRawContext *raw_wv = NULL, *raw_wvc = NULL; + + // if the WavPack data does not contain headers we assume Matroska-style storage + // and recreate the missing headers + + if (strncmp (main_data, "wvpk", 4)) { + uint32_t multiple_blocks = 0, block_size, block_samples = 0, wphdr_flags, crc; + uint32_t main_bytes = main_size, corr_bytes = corr_size; + unsigned char *mcp = main_data; + unsigned char *ccp = corr_data; + int msi = 0, csi = 0; + + raw_wv = malloc (sizeof (WavpackRawContext)); + memset (raw_wv, 0, sizeof (WavpackRawContext)); + + if (corr_data && corr_size) { + raw_wvc = malloc (sizeof (WavpackRawContext)); + memset (raw_wvc, 0, sizeof (WavpackRawContext)); + } + + while (main_bytes >= 12) { + WavpackHeader *wphdr = malloc (sizeof (WavpackHeader)); + + if (!msi) { + block_samples = *mcp++; + block_samples += *mcp++ << 8; + block_samples += *mcp++ << 16; + block_samples += *mcp++ << 24; + main_bytes -= 4; + } + + wphdr_flags = *mcp++; + wphdr_flags += *mcp++ << 8; + wphdr_flags += *mcp++ << 16; + wphdr_flags += *mcp++ << 24; + main_bytes -= 4; + + // if the first block does not have the FINAL_BLOCK flag set, + // then there are multiple blocks + + if (!msi && !(wphdr_flags & FINAL_BLOCK)) + multiple_blocks = 1; + + crc = *mcp++; + crc += *mcp++ << 8; + crc += *mcp++ << 16; + crc += *mcp++ << 24; + main_bytes -= 4; + + if (multiple_blocks) { + block_size = *mcp++; + block_size += *mcp++ << 8; + block_size += *mcp++ << 16; + block_size += *mcp++ << 24; + main_bytes -= 4; + } + else + block_size = main_bytes; + + if (block_size > main_bytes) { + if (error) strcpy (error, "main block overran available data!"); + raw_close_stream (raw_wv); + raw_close_stream (raw_wvc); + return NULL; + } + + memset (wphdr, 0, sizeof (WavpackHeader)); + memcpy (wphdr->ckID, "wvpk", 4); + wphdr->ckSize = sizeof (WavpackHeader) - 8 + block_size; + SET_TOTAL_SAMPLES (*wphdr, block_samples); + wphdr->block_samples = block_samples; + wphdr->version = version; + wphdr->flags = wphdr_flags; + wphdr->crc = crc; + WavpackLittleEndianToNative (wphdr, WavpackHeaderFormat); + + raw_wv->num_segments += 2; + raw_wv->segments = realloc (raw_wv->segments, sizeof (RawSegment) * raw_wv->num_segments); + raw_wv->segments [msi].dptr = raw_wv->segments [msi].sptr = (unsigned char *) wphdr; + raw_wv->segments [msi].eptr = raw_wv->segments [msi].dptr + sizeof (WavpackHeader); + raw_wv->segments [msi++].free_required = 1; + raw_wv->segments [msi].dptr = raw_wv->segments [msi].sptr = mcp; + raw_wv->segments [msi].eptr = raw_wv->segments [msi].dptr + block_size; + raw_wv->segments [msi++].free_required = 0; + main_bytes -= block_size; + mcp += block_size; + + if (corr_data && corr_bytes >= 4) { + wphdr = malloc (sizeof (WavpackHeader)); + + crc = *ccp++; + crc += *ccp++ << 8; + crc += *ccp++ << 16; + crc += *ccp++ << 24; + corr_bytes -= 4; + + if (multiple_blocks) { + block_size = *ccp++; + block_size += *ccp++ << 8; + block_size += *ccp++ << 16; + block_size += *ccp++ << 24; + corr_bytes -= 4; + } + else + block_size = corr_bytes; + + if (block_size > corr_bytes) { + if (error) strcpy (error, "correction block overran available data!"); + raw_close_stream (raw_wv); + raw_close_stream (raw_wvc); + return NULL; + } + + memset (wphdr, 0, sizeof (WavpackHeader)); + memcpy (wphdr->ckID, "wvpk", 4); + wphdr->ckSize = sizeof (WavpackHeader) - 8 + block_size; + SET_TOTAL_SAMPLES (*wphdr, block_samples); + wphdr->block_samples = block_samples; + wphdr->version = version; + wphdr->flags = wphdr_flags; + wphdr->crc = crc; + WavpackLittleEndianToNative (wphdr, WavpackHeaderFormat); + + raw_wvc->num_segments += 2; + raw_wvc->segments = realloc (raw_wvc->segments, sizeof (RawSegment) * raw_wvc->num_segments); + raw_wvc->segments [csi].dptr = raw_wvc->segments [csi].sptr = (unsigned char *) wphdr; + raw_wvc->segments [csi].eptr = raw_wvc->segments [csi].dptr + sizeof (WavpackHeader); + raw_wvc->segments [csi++].free_required = 1; + raw_wvc->segments [csi].dptr = raw_wvc->segments [csi].sptr = ccp; + raw_wvc->segments [csi].eptr = raw_wvc->segments [csi].dptr + block_size; + raw_wvc->segments [csi++].free_required = 0; + corr_bytes -= block_size; + ccp += block_size; + } + } + + if (main_bytes || (corr_data && corr_bytes)) { + if (error) strcpy (error, "leftover multiblock data!"); + raw_close_stream (raw_wv); + raw_close_stream (raw_wvc); + return NULL; + } + } + else { // the case of WavPack blocks with headers is much easier... + if (main_data) { + raw_wv = malloc (sizeof (WavpackRawContext)); + memset (raw_wv, 0, sizeof (WavpackRawContext)); + raw_wv->num_segments = 1; + raw_wv->segments = malloc (sizeof (RawSegment) * raw_wv->num_segments); + raw_wv->segments [0].dptr = raw_wv->segments [0].sptr = main_data; + raw_wv->segments [0].eptr = raw_wv->segments [0].dptr + main_size; + raw_wv->segments [0].free_required = 0; + } + + if (corr_data && corr_size) { + raw_wvc = malloc (sizeof (WavpackRawContext)); + memset (raw_wvc, 0, sizeof (WavpackRawContext)); + raw_wvc->num_segments = 1; + raw_wvc->segments = malloc (sizeof (RawSegment) * raw_wvc->num_segments); + raw_wvc->segments [0].dptr = raw_wvc->segments [0].sptr = corr_data; + raw_wvc->segments [0].eptr = raw_wvc->segments [0].dptr + corr_size; + raw_wvc->segments [0].free_required = 0; + } + } + + return WavpackOpenFileInputEx64 (&raw_reader, raw_wv, raw_wvc, error, flags | OPEN_STREAMING | OPEN_NO_CHECKSUM, norm_offset); +} + +// Return the number of samples represented by the current (and in the raw case, only) frame. + +uint32_t WavpackGetNumSamplesInFrame (WavpackContext *wpc) +{ + if (wpc && wpc->streams && wpc->streams [0]) + return wpc->streams [0]->wphdr.block_samples; + else + return -1; +} + diff --git a/third_party/wavpack/src/open_utils.c b/third_party/wavpack/src/open_utils.c new file mode 100644 index 0000000..c880d34 --- /dev/null +++ b/third_party/wavpack/src/open_utils.c @@ -0,0 +1,1279 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2016 David Bryant. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// open_utils.c + +// This module provides all the code required to open an existing WavPack file +// for reading by using a reader callback mechanism (NOT a filename). This +// includes the code required to find and parse WavPack blocks, process any +// included metadata, and queue up the bitstreams containing the encoded audio +// data. It does not the actual code to unpack audio data and this was done so +// that programs that just want to query WavPack files for information (like, +// for example, taggers) don't need to link in a lot of unnecessary code. + +#include +#include + +#include "wavpack_local.h" + +// This function is identical to WavpackOpenFileInput() except that instead +// of providing a filename to open, the caller provides a pointer to a set of +// reader callbacks and instances of up to two streams. The first of these +// streams is required and contains the regular WavPack data stream; the second +// contains the "correction" file if desired. Unlike the standard open +// function which handles the correction file transparently, in this case it +// is the responsibility of the caller to be aware of correction files. + +static int seek_eof_information (WavpackContext *wpc, int64_t *final_index, int get_wrapper); + +WavpackContext *WavpackOpenFileInputEx64 (WavpackStreamReader64 *reader, void *wv_id, void *wvc_id, char *error, int flags, int norm_offset) +{ + WavpackContext *wpc = (WavpackContext *)malloc (sizeof (WavpackContext)); + WavpackStream *wps; + int num_blocks = 0; + unsigned char first_byte; + uint32_t bcount; + + if (!wpc) { + if (error) strcpy (error, "can't allocate memory"); + return NULL; + } + + CLEAR (*wpc); + wpc->wv_in = wv_id; + wpc->wvc_in = wvc_id; + wpc->reader = reader; + wpc->total_samples = -1; + wpc->norm_offset = norm_offset; + wpc->max_streams = OLD_MAX_STREAMS; // use this until overwritten with actual number + wpc->open_flags = flags; + + wpc->filelen = wpc->reader->get_length (wpc->wv_in); + +#ifndef NO_TAGS + if ((flags & (OPEN_TAGS | OPEN_EDIT_TAGS)) && wpc->reader->can_seek (wpc->wv_in)) { + load_tag (wpc); + wpc->reader->set_pos_abs (wpc->wv_in, 0); + + if ((flags & OPEN_EDIT_TAGS) && !editable_tag (&wpc->m_tag)) { + if (error) strcpy (error, "can't edit tags located at the beginning of files!"); + return WavpackCloseFile (wpc); + } + } +#endif + + if (wpc->reader->read_bytes (wpc->wv_in, &first_byte, 1) != 1) { + if (error) strcpy (error, "can't read all of WavPack file!"); + return WavpackCloseFile (wpc); + } + + wpc->reader->push_back_byte (wpc->wv_in, first_byte); + + if (first_byte == 'R') { +#ifdef ENABLE_LEGACY + return open_file3 (wpc, error); +#else + if (error) strcpy (error, "this legacy WavPack file is deprecated, use version 4.80.0 to transcode"); + return WavpackCloseFile (wpc); +#endif + } + + wpc->streams = (WavpackStream **)(malloc ((wpc->num_streams = 1) * sizeof (wpc->streams [0]))); + if (!wpc->streams) { + if (error) strcpy (error, "can't allocate memory"); + return WavpackCloseFile (wpc); + } + + wpc->streams [0] = wps = (WavpackStream *)malloc (sizeof (WavpackStream)); + if (!wps) { + if (error) strcpy (error, "can't allocate memory"); + return WavpackCloseFile (wpc); + } + CLEAR (*wps); + + while (!wps->wphdr.block_samples) { + + wpc->filepos = wpc->reader->get_pos (wpc->wv_in); + bcount = read_next_header (wpc->reader, wpc->wv_in, &wps->wphdr); + + if (bcount == (uint32_t) -1 || + (!wps->wphdr.block_samples && num_blocks++ > 16)) { + if (error) strcpy (error, "not compatible with this version of WavPack file!"); + return WavpackCloseFile (wpc); + } + + wpc->filepos += bcount; + wps->blockbuff = (unsigned char *)malloc (wps->wphdr.ckSize + 8); + if (!wps->blockbuff) { + if (error) strcpy (error, "can't allocate memory"); + return WavpackCloseFile (wpc); + } + memcpy (wps->blockbuff, &wps->wphdr, 32); + + if (wpc->reader->read_bytes (wpc->wv_in, wps->blockbuff + 32, wps->wphdr.ckSize - 24) != wps->wphdr.ckSize - 24) { + if (error) strcpy (error, "can't read all of WavPack file!"); + return WavpackCloseFile (wpc); + } + + // if block does not verify, flag error, free buffer, and continue + if (!WavpackVerifySingleBlock (wps->blockbuff, !(flags & OPEN_NO_CHECKSUM))) { + wps->wphdr.block_samples = 0; + free (wps->blockbuff); + wps->blockbuff = NULL; + wpc->crc_errors++; + continue; + } + + wps->init_done = FALSE; + + if (wps->wphdr.block_samples) { + if (flags & OPEN_STREAMING) + SET_BLOCK_INDEX (wps->wphdr, 0); + else if (wpc->total_samples == -1) { + if (GET_BLOCK_INDEX (wps->wphdr) || GET_TOTAL_SAMPLES (wps->wphdr) == -1) { + wpc->initial_index = GET_BLOCK_INDEX (wps->wphdr); + SET_BLOCK_INDEX (wps->wphdr, 0); + + if (wpc->reader->can_seek (wpc->wv_in)) { + int64_t final_index = -1; + + seek_eof_information (wpc, &final_index, FALSE); + + if (final_index != -1) + wpc->total_samples = final_index - wpc->initial_index; + } + } + else + wpc->total_samples = GET_TOTAL_SAMPLES (wps->wphdr); + } + } + else if (wpc->total_samples == -1 && !GET_BLOCK_INDEX (wps->wphdr) && GET_TOTAL_SAMPLES (wps->wphdr)) + wpc->total_samples = GET_TOTAL_SAMPLES (wps->wphdr); + + if (wpc->wvc_in && wps->wphdr.block_samples && (wps->wphdr.flags & HYBRID_FLAG)) { + unsigned char ch; + + if (wpc->reader->read_bytes (wpc->wvc_in, &ch, 1) == 1) { + wpc->reader->push_back_byte (wpc->wvc_in, ch); + wpc->file2len = wpc->reader->get_length (wpc->wvc_in); + wpc->wvc_flag = TRUE; + } + } + + if (wpc->wvc_flag && !read_wvc_block (wpc)) { + if (error) strcpy (error, "not compatible with this version of correction file!"); + return WavpackCloseFile (wpc); + } + + if (!wps->init_done && !unpack_init (wpc)) { + if (error) strcpy (error, wpc->error_message [0] ? wpc->error_message : + "not compatible with this version of WavPack file!"); + + return WavpackCloseFile (wpc); + } + + wps->init_done = TRUE; + } + + wpc->config.flags &= ~0xff; + wpc->config.flags |= wps->wphdr.flags & 0xff; + + if (!wpc->config.num_channels) { + wpc->config.num_channels = (wps->wphdr.flags & MONO_FLAG) ? 1 : 2; + wpc->config.channel_mask = 0x5 - wpc->config.num_channels; + } + + if ((flags & OPEN_2CH_MAX) && !(wps->wphdr.flags & FINAL_BLOCK)) + wpc->reduced_channels = (wps->wphdr.flags & MONO_FLAG) ? 1 : 2; + + if (wps->wphdr.flags & DSD_FLAG) { +#ifdef ENABLE_DSD + if (flags & OPEN_DSD_NATIVE) { + wpc->config.bytes_per_sample = 1; + wpc->config.bits_per_sample = 8; + } + else if (flags & OPEN_DSD_AS_PCM) { + wpc->decimation_context = decimate_dsd_init (wpc->reduced_channels ? + wpc->reduced_channels : wpc->config.num_channels); + + wpc->config.bytes_per_sample = 3; + wpc->config.bits_per_sample = 24; + } + else { + if (error) strcpy (error, "not configured to handle DSD WavPack files!"); + return WavpackCloseFile (wpc); + } +#else + if (error) strcpy (error, "not configured to handle DSD WavPack files!"); + return WavpackCloseFile (wpc); +#endif + } + else { + wpc->config.bytes_per_sample = (wps->wphdr.flags & BYTES_STORED) + 1; + wpc->config.float_norm_exp = wps->float_norm_exp; + + wpc->config.bits_per_sample = (wpc->config.bytes_per_sample * 8) - + ((wps->wphdr.flags & SHIFT_MASK) >> SHIFT_LSB); + } + + if (!wpc->config.sample_rate) { + if (!wps->wphdr.block_samples || (wps->wphdr.flags & SRATE_MASK) == SRATE_MASK) + wpc->config.sample_rate = 44100; + else + wpc->config.sample_rate = sample_rates [(wps->wphdr.flags & SRATE_MASK) >> SRATE_LSB]; + } + + return wpc; +} + +// This function returns the major version number of the WavPack program +// (or library) that created the open file. Currently, this can be 1 to 5. +// Minor versions are not recorded in WavPack files. + +int WavpackGetVersion (WavpackContext *wpc) +{ + if (wpc) { +#ifdef ENABLE_LEGACY + if (wpc->stream3) + return get_version3 (wpc); +#endif + return wpc->version_five ? 5 : 4; + } + + return 0; +} + +// Return the file format specified in the call to WavpackSetFileInformation() +// when the file was created. For all files created prior to WavPack 5.0 this +// will 0 (WP_FORMAT_WAV). + +unsigned char WavpackGetFileFormat (WavpackContext *wpc) +{ + return wpc->file_format; +} + +// Return a string representing the recommended file extension for the open +// WavPack file. For all files created prior to WavPack 5.0 this will be "wav", +// even for raw files with no RIFF into. This string is specified in the +// call to WavpackSetFileInformation() when the file was created. + +char *WavpackGetFileExtension (WavpackContext *wpc) +{ + if (wpc && wpc->file_extension [0]) + return wpc->file_extension; + else + return "wav"; +} + +// This function initializes everything required to unpack a WavPack block +// and must be called before unpack_samples() is called to obtain audio data. +// It is assumed that the WavpackHeader has been read into the wps->wphdr +// (in the current WavpackStream) and that the entire block has been read at +// wps->blockbuff. If a correction file is available (wpc->wvc_flag = TRUE) +// then the corresponding correction block must be read into wps->block2buff +// and its WavpackHeader has overwritten the header at wps->wphdr. This is +// where all the metadata blocks are scanned including those that contain +// bitstream data. + +static int read_metadata_buff (WavpackMetadata *wpmd, unsigned char *blockbuff, unsigned char **buffptr); +static int process_metadata (WavpackContext *wpc, WavpackMetadata *wpmd); +static void bs_open_read (Bitstream *bs, void *buffer_start, void *buffer_end); + +int unpack_init (WavpackContext *wpc) +{ + WavpackStream *wps = wpc->streams [wpc->current_stream]; + unsigned char *blockptr, *block2ptr; + WavpackMetadata wpmd; + + wps->num_terms = 0; + wps->mute_error = FALSE; + wps->crc = wps->crc_x = 0xffffffff; + wps->dsd.ready = 0; + CLEAR (wps->wvbits); + CLEAR (wps->wvcbits); + CLEAR (wps->wvxbits); + CLEAR (wps->decorr_passes); + CLEAR (wps->dc); + CLEAR (wps->w); + + if (!(wps->wphdr.flags & MONO_FLAG) && wpc->config.num_channels && wps->wphdr.block_samples && + (wpc->reduced_channels == 1 || wpc->config.num_channels == 1)) { + wps->mute_error = TRUE; + return FALSE; + } + + if ((wps->wphdr.flags & UNKNOWN_FLAGS) || (wps->wphdr.flags & MONO_DATA) == MONO_DATA) { + wps->mute_error = TRUE; + return FALSE; + } + + blockptr = wps->blockbuff + sizeof (WavpackHeader); + + while (read_metadata_buff (&wpmd, wps->blockbuff, &blockptr)) + if (!process_metadata (wpc, &wpmd)) { + wps->mute_error = TRUE; + return FALSE; + } + + if (wps->wphdr.block_samples && wpc->wvc_flag && wps->block2buff) { + block2ptr = wps->block2buff + sizeof (WavpackHeader); + + while (read_metadata_buff (&wpmd, wps->block2buff, &block2ptr)) + if (!process_metadata (wpc, &wpmd)) { + wps->mute_error = TRUE; + return FALSE; + } + } + + if (wps->wphdr.block_samples && ((wps->wphdr.flags & DSD_FLAG) ? !wps->dsd.ready : !bs_is_open (&wps->wvbits))) { + if (bs_is_open (&wps->wvcbits)) + strcpy (wpc->error_message, "can't unpack correction files alone!"); + + wps->mute_error = TRUE; + return FALSE; + } + + if (wps->wphdr.block_samples && !bs_is_open (&wps->wvxbits)) { + if ((wps->wphdr.flags & INT32_DATA) && wps->int32_sent_bits) + wpc->lossy_blocks = TRUE; + + if ((wps->wphdr.flags & FLOAT_DATA) && + wps->float_flags & (FLOAT_EXCEPTIONS | FLOAT_ZEROS_SENT | FLOAT_SHIFT_SENT | FLOAT_SHIFT_SAME)) + wpc->lossy_blocks = TRUE; + } + + if (wps->wphdr.block_samples) + wps->sample_index = GET_BLOCK_INDEX (wps->wphdr); + + return TRUE; +} + +//////////////////////////////// matadata handlers /////////////////////////////// + +// These functions handle specific metadata types and are called directly +// during WavPack block parsing by process_metadata() at the bottom. + +// This function initialzes the main bitstream for audio samples, which must +// be in the "wv" file. + +static int init_wv_bitstream (WavpackStream *wps, WavpackMetadata *wpmd) +{ + if (!wpmd->byte_length || (wpmd->byte_length & 1)) + return FALSE; + + bs_open_read (&wps->wvbits, wpmd->data, (unsigned char *) wpmd->data + wpmd->byte_length); + return TRUE; +} + +// This function initialzes the "correction" bitstream for audio samples, +// which currently must be in the "wvc" file. + +static int init_wvc_bitstream (WavpackStream *wps, WavpackMetadata *wpmd) +{ + if (!wpmd->byte_length || (wpmd->byte_length & 1)) + return FALSE; + + bs_open_read (&wps->wvcbits, wpmd->data, (unsigned char *) wpmd->data + wpmd->byte_length); + return TRUE; +} + +// This function initialzes the "extra" bitstream for audio samples which +// contains the information required to losslessly decompress 32-bit float data +// or integer data that exceeds 24 bits. This bitstream is in the "wv" file +// for pure lossless data or the "wvc" file for hybrid lossless. This data +// would not be used for hybrid lossy mode. There is also a 32-bit CRC stored +// in the first 4 bytes of these blocks. + +static int init_wvx_bitstream (WavpackStream *wps, WavpackMetadata *wpmd) +{ + unsigned char *cp = (unsigned char *)wpmd->data; + + if (wpmd->byte_length <= 4 || (wpmd->byte_length & 1)) + return FALSE; + + wps->crc_wvx = *cp++; + wps->crc_wvx |= (int32_t) *cp++ << 8; + wps->crc_wvx |= (int32_t) *cp++ << 16; + wps->crc_wvx |= (int32_t) *cp++ << 24; + + bs_open_read (&wps->wvxbits, cp, (unsigned char *) wpmd->data + wpmd->byte_length); + return TRUE; +} + +// Read the int32 data from the specified metadata into the specified stream. +// This data is used for integer data that has more than 24 bits of magnitude +// or, in some cases, used to eliminate redundant bits from any audio stream. + +static int read_int32_info (WavpackStream *wps, WavpackMetadata *wpmd) +{ + int bytecnt = wpmd->byte_length; + char *byteptr = (char *)wpmd->data; + + if (bytecnt != 4) + return FALSE; + + wps->int32_sent_bits = *byteptr++; + wps->int32_zeros = *byteptr++; + wps->int32_ones = *byteptr++; + wps->int32_dups = *byteptr; + + return TRUE; +} + +static int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd) +{ + int bytecnt = wpmd->byte_length; + char *byteptr = (char *)wpmd->data; + + if (bytecnt != 4) + return FALSE; + + wps->float_flags = *byteptr++; + wps->float_shift = *byteptr++; + wps->float_max_exp = *byteptr++; + wps->float_norm_exp = *byteptr; + return TRUE; +} + +// Read multichannel information from metadata. The first byte is the total +// number of channels and the following bytes represent the channel_mask +// as described for Microsoft WAVEFORMATEX. + +static int read_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + int bytecnt = wpmd->byte_length, shift = 0, mask_bits; + unsigned char *byteptr = (unsigned char *)wpmd->data; + uint32_t mask = 0; + + if (!bytecnt || bytecnt > 7) + return FALSE; + + if (!wpc->config.num_channels) { + + // if bytecnt is 6 or 7 we are using new configuration with "unlimited" streams + + if (bytecnt >= 6) { + wpc->config.num_channels = (byteptr [0] | ((byteptr [2] & 0xf) << 8)) + 1; + wpc->max_streams = (byteptr [1] | ((byteptr [2] & 0xf0) << 4)) + 1; + + if (wpc->config.num_channels < wpc->max_streams) + return FALSE; + + byteptr += 3; + mask = *byteptr++; + mask |= (uint32_t) *byteptr++ << 8; + mask |= (uint32_t) *byteptr++ << 16; + + if (bytecnt == 7) // this was introduced in 5.0 + mask |= (uint32_t) *byteptr << 24; + } + else { + wpc->config.num_channels = *byteptr++; + + while (--bytecnt) { + mask |= (uint32_t) *byteptr++ << shift; + shift += 8; + } + } + + if (wpc->config.num_channels > wpc->max_streams * 2) + return FALSE; + + wpc->config.channel_mask = mask; + + for (mask_bits = 0; mask; mask >>= 1) + if ((mask & 1) && ++mask_bits > wpc->config.num_channels) + return FALSE; + } + + return TRUE; +} + +// Read multichannel identity information from metadata. Data is an array of +// unsigned characters representing any channels in the file that DO NOT +// match one the 18 Microsoft standard channels (and are represented in the +// channel mask). A value of 0 is not allowed and 0xff means an unknown or +// undefined channel identity. + +static int read_channel_identities (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + if (!wpc->channel_identities) { + wpc->channel_identities = (unsigned char *)malloc (wpmd->byte_length + 1); + memcpy (wpc->channel_identities, wpmd->data, wpmd->byte_length); + wpc->channel_identities [wpmd->byte_length] = 0; + } + + return TRUE; +} + +// Read configuration information from metadata. + +static int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + int bytecnt = wpmd->byte_length; + unsigned char *byteptr = (unsigned char *)wpmd->data; + + if (bytecnt >= 3) { + wpc->config.flags &= 0xff; + wpc->config.flags |= (int32_t) *byteptr++ << 8; + wpc->config.flags |= (int32_t) *byteptr++ << 16; + wpc->config.flags |= (int32_t) *byteptr++ << 24; + bytecnt -= 3; + + if (bytecnt && (wpc->config.flags & CONFIG_EXTRA_MODE)) { + wpc->config.xmode = *byteptr++; + bytecnt--; + } + + // we used an extra config byte here for the 5.0.0 alpha, so still + // honor it now (but this has been replaced with NEW_CONFIG) + + if (bytecnt) { + wpc->config.qmode = (wpc->config.qmode & ~0xff) | *byteptr; + wpc->version_five = 1; + } + } + + return TRUE; +} + +// Read "new" configuration information from metadata. + +static int read_new_config_info (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + int bytecnt = wpmd->byte_length; + unsigned char *byteptr = (unsigned char *)wpmd->data; + + wpc->version_five = 1; // just having this block signals version 5.0 + + wpc->file_format = wpc->config.qmode = wpc->channel_layout = 0; + + if (wpc->channel_reordering) { + free (wpc->channel_reordering); + wpc->channel_reordering = NULL; + } + + // if there's any data, the first two bytes are file_format and qmode flags + + if (bytecnt >= 2) { + wpc->file_format = *byteptr++; + wpc->config.qmode = (wpc->config.qmode & ~0xff) | *byteptr++; + bytecnt -= 2; + + // another byte indicates a channel layout + + if (bytecnt) { + int nchans, i; + + wpc->channel_layout = (int32_t) *byteptr++ << 16; + bytecnt--; + + // another byte means we have a channel count for the layout and maybe a reordering + + if (bytecnt) { + wpc->channel_layout += nchans = *byteptr++; + bytecnt--; + + // any more means there's a reordering string + + if (bytecnt) { + if (bytecnt > nchans) + return FALSE; + + wpc->channel_reordering = (unsigned char *)malloc (nchans); + + // note that redundant reordering info is not stored, so we fill in the rest + + if (wpc->channel_reordering) { + for (i = 0; i < nchans; ++i) + if (bytecnt) { + wpc->channel_reordering [i] = *byteptr++; + + if (wpc->channel_reordering [i] >= nchans) // make sure index is in range + wpc->channel_reordering [i] = 0; + + bytecnt--; + } + else + wpc->channel_reordering [i] = i; + } + } + } + else + wpc->channel_layout += wpc->config.num_channels; + } + } + + return TRUE; +} + +// Read non-standard sampling rate from metadata. + +static int read_sample_rate (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + int bytecnt = wpmd->byte_length; + unsigned char *byteptr = (unsigned char *)wpmd->data; + + if (bytecnt == 3 || bytecnt == 4) { + wpc->config.sample_rate = (int32_t) *byteptr++; + wpc->config.sample_rate |= (int32_t) *byteptr++ << 8; + wpc->config.sample_rate |= (int32_t) *byteptr++ << 16; + + // for sampling rates > 16777215 (non-audio probably, or ...) + + if (bytecnt == 4) + wpc->config.sample_rate |= (int32_t) (*byteptr & 0x7f) << 24; + } + + return TRUE; +} + +// Read wrapper data from metadata. Currently, this consists of the RIFF +// header and trailer that wav files contain around the audio data but could +// be used for other formats as well. Because WavPack files contain all the +// information required for decoding and playback, this data can probably +// be ignored except when an exact wavefile restoration is needed. + +static int read_wrapper_data (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + if ((wpc->open_flags & OPEN_WRAPPER) && wpc->wrapper_bytes < MAX_WRAPPER_BYTES && wpmd->byte_length) { + wpc->wrapper_data = (unsigned char *)realloc (wpc->wrapper_data, wpc->wrapper_bytes + wpmd->byte_length); + if (!wpc->wrapper_data) + return FALSE; + memcpy (wpc->wrapper_data + wpc->wrapper_bytes, wpmd->data, wpmd->byte_length); + wpc->wrapper_bytes += wpmd->byte_length; + } + + return TRUE; +} + +static int read_metadata_buff (WavpackMetadata *wpmd, unsigned char *blockbuff, unsigned char **buffptr) +{ + WavpackHeader *wphdr = (WavpackHeader *) blockbuff; + unsigned char *buffend = blockbuff + wphdr->ckSize + 8; + + if (buffend - *buffptr < 2) + return FALSE; + + wpmd->id = *(*buffptr)++; + wpmd->byte_length = *(*buffptr)++ << 1; + + if (wpmd->id & ID_LARGE) { + wpmd->id &= ~ID_LARGE; + + if (buffend - *buffptr < 2) + return FALSE; + + wpmd->byte_length += *(*buffptr)++ << 9; + wpmd->byte_length += *(*buffptr)++ << 17; + } + + if (wpmd->id & ID_ODD_SIZE) { + if (!wpmd->byte_length) // odd size and zero length makes no sense + return FALSE; + wpmd->id &= ~ID_ODD_SIZE; + wpmd->byte_length--; + } + + if (wpmd->byte_length) { + if (buffend - *buffptr < wpmd->byte_length + (wpmd->byte_length & 1)) { + wpmd->data = NULL; + return FALSE; + } + + wpmd->data = *buffptr; + (*buffptr) += wpmd->byte_length + (wpmd->byte_length & 1); + } + else + wpmd->data = NULL; + + return TRUE; +} + +static int process_metadata (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + WavpackStream *wps = wpc->streams [wpc->current_stream]; + + switch (wpmd->id) { + case ID_DUMMY: + return TRUE; + + case ID_DECORR_TERMS: + return read_decorr_terms (wps, wpmd); + + case ID_DECORR_WEIGHTS: + return read_decorr_weights (wps, wpmd); + + case ID_DECORR_SAMPLES: + return read_decorr_samples (wps, wpmd); + + case ID_ENTROPY_VARS: + return read_entropy_vars (wps, wpmd); + + case ID_HYBRID_PROFILE: + return read_hybrid_profile (wps, wpmd); + + case ID_SHAPING_WEIGHTS: + return read_shaping_info (wps, wpmd); + + case ID_FLOAT_INFO: + return read_float_info (wps, wpmd); + + case ID_INT32_INFO: + return read_int32_info (wps, wpmd); + + case ID_CHANNEL_INFO: + return read_channel_info (wpc, wpmd); + + case ID_CHANNEL_IDENTITIES: + return read_channel_identities (wpc, wpmd); + + case ID_CONFIG_BLOCK: + return read_config_info (wpc, wpmd); + + case ID_NEW_CONFIG_BLOCK: + return read_new_config_info (wpc, wpmd); + + case ID_SAMPLE_RATE: + return read_sample_rate (wpc, wpmd); + + case ID_WV_BITSTREAM: + return init_wv_bitstream (wps, wpmd); + + case ID_WVC_BITSTREAM: + return init_wvc_bitstream (wps, wpmd); + + case ID_WVX_BITSTREAM: + return init_wvx_bitstream (wps, wpmd); + + case ID_DSD_BLOCK: +#ifdef ENABLE_DSD + return init_dsd_block (wpc, wpmd); +#else + strcpy (wpc->error_message, "not configured to handle DSD WavPack files!"); + return FALSE; +#endif + + case ID_ALT_HEADER: case ID_ALT_TRAILER: + if (!(wpc->open_flags & OPEN_ALT_TYPES)) + return TRUE; + + case ID_RIFF_HEADER: case ID_RIFF_TRAILER: + return read_wrapper_data (wpc, wpmd); + + case ID_ALT_MD5_CHECKSUM: + if (!(wpc->open_flags & OPEN_ALT_TYPES)) + return TRUE; + + case ID_MD5_CHECKSUM: + if (wpmd->byte_length == 16) { + memcpy (wpc->config.md5_checksum, wpmd->data, 16); + wpc->config.flags |= CONFIG_MD5_CHECKSUM; + wpc->config.md5_read = 1; + } + + return TRUE; + + case ID_ALT_EXTENSION: + if (wpmd->byte_length && wpmd->byte_length < sizeof (wpc->file_extension)) { + memcpy (wpc->file_extension, wpmd->data, wpmd->byte_length); + wpc->file_extension [wpmd->byte_length] = 0; + } + + return TRUE; + + // we don't actually verify the checksum here (it's done right after the + // block is read), but it's a good indicator of version 5 files + + case ID_BLOCK_CHECKSUM: + wpc->version_five = 1; + return TRUE; + + default: + return (wpmd->id & ID_OPTIONAL_DATA) ? TRUE : FALSE; + } +} + +//////////////////////////////// bitstream management /////////////////////////////// + +// Open the specified BitStream and associate with the specified buffer. + +static void bs_read (Bitstream *bs); + +static void bs_open_read (Bitstream *bs, void *buffer_start, void *buffer_end) +{ + bs->error = bs->sr = bs->bc = 0; + bs->ptr = ((bs->buf = (uint16_t *)buffer_start) - 1); + bs->end = (uint16_t *)buffer_end; + bs->wrap = bs_read; +} + +// This function is only called from the getbit() and getbits() macros when +// the BitStream has been exhausted and more data is required. Sinve these +// bistreams no longer access files, this function simple sets an error and +// resets the buffer. + +static void bs_read (Bitstream *bs) +{ + bs->ptr = bs->buf; + bs->error = 1; +} + +// This function is called to close the bitstream. It returns the number of +// full bytes actually read as bits. + +uint32_t bs_close_read (Bitstream *bs) +{ + uint32_t bytes_read; + + if (bs->bc < sizeof (*(bs->ptr)) * 8) + bs->ptr++; + + bytes_read = (uint32_t)(bs->ptr - bs->buf) * sizeof (*(bs->ptr)); + + if (!(bytes_read & 1)) + ++bytes_read; + + CLEAR (*bs); + return bytes_read; +} + +// Normally the trailing wrapper will not be available when a WavPack file is first +// opened for reading because it is stored in the final block of the file. This +// function forces a seek to the end of the file to pick up any trailing wrapper +// stored there (then use WavPackGetWrapper**() to obtain). This can obviously only +// be used for seekable files (not pipes) and is not available for pre-4.0 WavPack +// files. + +void WavpackSeekTrailingWrapper (WavpackContext *wpc) +{ + if ((wpc->open_flags & OPEN_WRAPPER) && + wpc->reader->can_seek (wpc->wv_in) && !wpc->stream3) + seek_eof_information (wpc, NULL, TRUE); +} + +// Get any MD5 checksum stored in the metadata (should be called after reading +// last sample or an extra seek will occur). A return value of FALSE indicates +// that no MD5 checksum was stored. + +int WavpackGetMD5Sum (WavpackContext *wpc, unsigned char data [16]) +{ + if (wpc->config.flags & CONFIG_MD5_CHECKSUM) { + if (!wpc->config.md5_read && wpc->reader->can_seek (wpc->wv_in)) + seek_eof_information (wpc, NULL, FALSE); + + if (wpc->config.md5_read) { + memcpy (data, wpc->config.md5_checksum, 16); + return TRUE; + } + } + + return FALSE; +} + +// Read from current file position until a valid 32-byte WavPack 4.0 header is +// found and read into the specified pointer. The number of bytes skipped is +// returned. If no WavPack header is found within 1 meg, then a -1 is returned +// to indicate the error. No additional bytes are read past the header and it +// is returned in the processor's native endian mode. Seeking is not required. + +uint32_t read_next_header (WavpackStreamReader64 *reader, void *id, WavpackHeader *wphdr) +{ + unsigned char buffer [sizeof (*wphdr)], *sp = buffer + sizeof (*wphdr), *ep = sp; + uint32_t bytes_skipped = 0; + int bleft; + + while (1) { + if (sp < ep) { + bleft = (int)(ep - sp); + memmove (buffer, sp, bleft); + } + else + bleft = 0; + + if (reader->read_bytes (id, buffer + bleft, sizeof (*wphdr) - bleft) != sizeof (*wphdr) - bleft) + return -1; + + sp = buffer; + + if (*sp++ == 'w' && *sp == 'v' && *++sp == 'p' && *++sp == 'k' && + !(*++sp & 1) && sp [2] < 16 && !sp [3] && (sp [2] || sp [1] || *sp >= 24) && sp [5] == 4 && + sp [4] >= (MIN_STREAM_VERS & 0xff) && sp [4] <= (MAX_STREAM_VERS & 0xff) && sp [18] < 3 && !sp [19]) { + memcpy (wphdr, buffer, sizeof (*wphdr)); + WavpackLittleEndianToNative (wphdr, WavpackHeaderFormat); + return bytes_skipped; + } + + while (sp < ep && *sp != 'w') + sp++; + + if ((bytes_skipped += (uint32_t)(sp - buffer)) > 1024 * 1024) + return -1; + } +} + +// Compare the regular wv file block header to a potential matching wvc +// file block header and return action code based on analysis: +// +// 0 = use wvc block (assuming rest of block is readable) +// 1 = bad match; try to read next wvc block +// -1 = bad match; ignore wvc file for this block and backup fp (if +// possible) and try to use this block next time + +static int match_wvc_header (WavpackHeader *wv_hdr, WavpackHeader *wvc_hdr) +{ + if (GET_BLOCK_INDEX (*wv_hdr) == GET_BLOCK_INDEX (*wvc_hdr) && + wv_hdr->block_samples == wvc_hdr->block_samples) { + int wvi = 0, wvci = 0; + + if (wv_hdr->flags == wvc_hdr->flags) + return 0; + + if (wv_hdr->flags & INITIAL_BLOCK) + wvi -= 1; + + if (wv_hdr->flags & FINAL_BLOCK) + wvi += 1; + + if (wvc_hdr->flags & INITIAL_BLOCK) + wvci -= 1; + + if (wvc_hdr->flags & FINAL_BLOCK) + wvci += 1; + + return (wvci - wvi < 0) ? 1 : -1; + } + + if (((GET_BLOCK_INDEX (*wvc_hdr) - GET_BLOCK_INDEX (*wv_hdr)) << 24) < 0) + return 1; + else + return -1; +} + +// Read the wvc block that matches the regular wv block that has been +// read for the current stream. If an exact match is not found then +// we either keep reading or back up and (possibly) use the block +// later. The skip_wvc flag is set if not matching wvc block is found +// so that we can still decode using only the lossy version (although +// we flag this as an error). A return of FALSE indicates a serious +// error (not just that we missed one wvc block). + +int read_wvc_block (WavpackContext *wpc) +{ + WavpackStream *wps = wpc->streams [wpc->current_stream]; + int64_t bcount, file2pos; + WavpackHeader orig_wphdr; + WavpackHeader wphdr; + int compare_result; + + while (1) { + file2pos = wpc->reader->get_pos (wpc->wvc_in); + bcount = read_next_header (wpc->reader, wpc->wvc_in, &wphdr); + + if (bcount == (uint32_t) -1) { + wps->wvc_skip = TRUE; + wpc->crc_errors++; + return FALSE; + } + + memcpy (&orig_wphdr, &wphdr, 32); // save original header for verify step + + if (wpc->open_flags & OPEN_STREAMING) + SET_BLOCK_INDEX (wphdr, wps->sample_index = 0); + else + SET_BLOCK_INDEX (wphdr, GET_BLOCK_INDEX (wphdr) - wpc->initial_index); + + if (wphdr.flags & INITIAL_BLOCK) + wpc->file2pos = file2pos + bcount; + + compare_result = match_wvc_header (&wps->wphdr, &wphdr); + + if (!compare_result) { + wps->block2buff = (unsigned char *)malloc (wphdr.ckSize + 8); + if (!wps->block2buff) + return FALSE; + + if (wpc->reader->read_bytes (wpc->wvc_in, wps->block2buff + 32, wphdr.ckSize - 24) != + wphdr.ckSize - 24) { + free (wps->block2buff); + wps->block2buff = NULL; + wps->wvc_skip = TRUE; + wpc->crc_errors++; + return FALSE; + } + + memcpy (wps->block2buff, &orig_wphdr, 32); + + // don't use corrupt blocks + if (!WavpackVerifySingleBlock (wps->block2buff, !(wpc->open_flags & OPEN_NO_CHECKSUM))) { + free (wps->block2buff); + wps->block2buff = NULL; + wps->wvc_skip = TRUE; + wpc->crc_errors++; + return TRUE; + } + + wps->wvc_skip = FALSE; + memcpy (wps->block2buff, &wphdr, 32); + memcpy (&wps->wphdr, &wphdr, 32); + return TRUE; + } + else if (compare_result == -1) { + wps->wvc_skip = TRUE; + wpc->reader->set_pos_rel (wpc->wvc_in, -32, SEEK_CUR); + wpc->crc_errors++; + return TRUE; + } + } +} + +// This function is used to seek to end of a file to obtain certain information +// that is stored there at the file creation time because it is not known at +// the start. This includes the MD5 sum and and trailing part of the file +// wrapper, and in some rare cases may include the total number of samples in +// the file (although we usually try to back up and write that at the front of +// the file). Note this function restores the file position to its original +// location (and obviously requires a seekable file). The normal return value +// is TRUE indicating no errors, although this does not actually mean that any +// information was retrieved. An error return of FALSE usually means the file +// terminated unexpectedly. Note that this could be used to get all three +// types of information in one go, but it's not actually used that way now. + +static int seek_eof_information (WavpackContext *wpc, int64_t *final_index, int get_wrapper) +{ + int64_t restore_pos, last_pos = -1; + WavpackStreamReader64 *reader = wpc->reader; + int alt_types = wpc->open_flags & OPEN_ALT_TYPES; + uint32_t blocks = 0, audio_blocks = 0; + void *id = wpc->wv_in; + WavpackHeader wphdr; + + restore_pos = reader->get_pos (id); // we restore file position when done + + // start 1MB from the end-of-file, or from the start if the file is not that big + + if (reader->get_length (id) > (int64_t) 1048576) + reader->set_pos_rel (id, -1048576, SEEK_END); + else + reader->set_pos_abs (id, 0); + + // Note that we go backward (without parsing inside blocks) until we find a block + // with audio (careful to not get stuck in a loop). Only then do we go forward + // parsing all blocks in their entirety. + + while (1) { + uint32_t bcount = read_next_header (reader, id, &wphdr); + int64_t current_pos = reader->get_pos (id); + + // if we just got to the same place as last time, we're stuck and need to give up + + if (current_pos == last_pos) { + reader->set_pos_abs (id, restore_pos); + return FALSE; + } + + last_pos = current_pos; + + // We enter here if we just read 1 MB without seeing any WavPack block headers. + // Since WavPack blocks are < 1 MB, that means we're in a big APE tag, or we got + // to the end-of-file. + + if (bcount == (uint32_t) -1) { + + // if we have not seen any blocks at all yet, back up almost 2 MB (or to the + // beginning of the file) and try again + + if (!blocks) { + if (current_pos > (int64_t) 2000000) + reader->set_pos_rel (id, -2000000, SEEK_CUR); + else + reader->set_pos_abs (id, 0); + + continue; + } + + // if we have seen WavPack blocks, then this means we've done all we can do here + + reader->set_pos_abs (id, restore_pos); + return TRUE; + } + + blocks++; + + // If the block has audio samples, calculate a final index, although this is not + // final since this may not be the last block with audio. On the other hand, if + // this block does not have audio, and we haven't seen one with audio, we have + // to go back some more. + + if (wphdr.block_samples) { + if (final_index) + *final_index = GET_BLOCK_INDEX (wphdr) + wphdr.block_samples; + + audio_blocks++; + } + else if (!audio_blocks) { + if (current_pos > (int64_t) 1048576) + reader->set_pos_rel (id, -1048576, SEEK_CUR); + else + reader->set_pos_abs (id, 0); + + continue; + } + + // at this point we have seen at least one block with audio, so we parse the + // entire block looking for MD5 metadata or (conditionally) trailing wrappers + + bcount = wphdr.ckSize - sizeof (WavpackHeader) + 8; + + while (bcount >= 2) { + unsigned char meta_id, c1, c2; + uint32_t meta_bc, meta_size; + + if (reader->read_bytes (id, &meta_id, 1) != 1 || + reader->read_bytes (id, &c1, 1) != 1) { + reader->set_pos_abs (id, restore_pos); + return FALSE; + } + + meta_bc = c1 << 1; + bcount -= 2; + + if (meta_id & ID_LARGE) { + if (bcount < 2 || reader->read_bytes (id, &c1, 1) != 1 || + reader->read_bytes (id, &c2, 1) != 1) { + reader->set_pos_abs (id, restore_pos); + return FALSE; + } + + meta_bc += ((uint32_t) c1 << 9) + ((uint32_t) c2 << 17); + bcount -= 2; + } + + meta_size = (meta_id & ID_ODD_SIZE) ? meta_bc - 1 : meta_bc; + meta_id &= ID_UNIQUE; + + if (get_wrapper && (meta_id == ID_RIFF_TRAILER || (alt_types && meta_id == ID_ALT_TRAILER)) && meta_bc) { + wpc->wrapper_data = (unsigned char *)realloc (wpc->wrapper_data, wpc->wrapper_bytes + meta_bc); + + if (!wpc->wrapper_data) { + reader->set_pos_abs (id, restore_pos); + return FALSE; + } + + if (reader->read_bytes (id, wpc->wrapper_data + wpc->wrapper_bytes, meta_bc) == meta_bc) + wpc->wrapper_bytes += meta_size; + else { + reader->set_pos_abs (id, restore_pos); + return FALSE; + } + } + else if (meta_id == ID_MD5_CHECKSUM || (alt_types && meta_id == ID_ALT_MD5_CHECKSUM)) { + if (meta_bc == 16 && bcount >= 16) { + if (reader->read_bytes (id, wpc->config.md5_checksum, 16) == 16) + wpc->config.md5_read = TRUE; + else { + reader->set_pos_abs (id, restore_pos); + return FALSE; + } + } + else + reader->set_pos_rel (id, meta_bc, SEEK_CUR); + } + else + reader->set_pos_rel (id, meta_bc, SEEK_CUR); + + bcount -= meta_bc; + } + } +} + +// Quickly verify the referenced block. It is assumed that the WavPack header has been converted +// to native endian format. If a block checksum is performed, that is done in little-endian +// (file) format. It is also assumed that the caller has made sure that the block length +// indicated in the header is correct (we won't overflow the buffer). If a checksum is present, +// then it is checked, otherwise we just check that all the metadata blocks are formatted +// correctly (without looking at their contents). Returns FALSE for bad block. + +int WavpackVerifySingleBlock (unsigned char *buffer, int verify_checksum) +{ + WavpackHeader *wphdr = (WavpackHeader *) buffer; + uint32_t checksum_passed = 0, bcount, meta_bc; + unsigned char *dp, meta_id, c1, c2; + + if (strncmp (wphdr->ckID, "wvpk", 4) || wphdr->ckSize + 8 < sizeof (WavpackHeader)) + return FALSE; + + bcount = wphdr->ckSize - sizeof (WavpackHeader) + 8; + dp = (unsigned char *)(wphdr + 1); + + while (bcount >= 2) { + meta_id = *dp++; + c1 = *dp++; + + meta_bc = c1 << 1; + bcount -= 2; + + if (meta_id & ID_LARGE) { + if (bcount < 2) + return FALSE; + + c1 = *dp++; + c2 = *dp++; + meta_bc += ((uint32_t) c1 << 9) + ((uint32_t) c2 << 17); + bcount -= 2; + } + + if (bcount < meta_bc) + return FALSE; + + if (verify_checksum && (meta_id & ID_UNIQUE) == ID_BLOCK_CHECKSUM) { +#ifdef BITSTREAM_SHORTS + uint16_t *csptr = (uint16_t*) buffer; +#else + unsigned char *csptr = buffer; +#endif + int wcount = (int)(dp - 2 - buffer) >> 1; + uint32_t csum = (uint32_t) -1; + + if ((meta_id & ID_ODD_SIZE) || meta_bc < 2 || meta_bc > 4) + return FALSE; + +#ifdef BITSTREAM_SHORTS + while (wcount--) + csum = (csum * 3) + *csptr++; +#else + WavpackNativeToLittleEndian ((WavpackHeader *) buffer, WavpackHeaderFormat); + + while (wcount--) { + csum = (csum * 3) + csptr [0] + (csptr [1] << 8); + csptr += 2; + } + + WavpackLittleEndianToNative ((WavpackHeader *) buffer, WavpackHeaderFormat); +#endif + + if (meta_bc == 4) { + if (*dp++ != (csum & 0xff) || *dp++ != ((csum >> 8) & 0xff) || *dp++ != ((csum >> 16) & 0xff) || *dp++ != ((csum >> 24) & 0xff)) + return FALSE; + } + else { + csum ^= csum >> 16; + + if (*dp++ != (csum & 0xff) || *dp++ != ((csum >> 8) & 0xff)) + return FALSE; + } + + checksum_passed++; + } + + bcount -= meta_bc; + dp += meta_bc; + } + + return (bcount == 0) && (!verify_checksum || !(wphdr->flags & HAS_CHECKSUM) || checksum_passed); +} diff --git a/third_party/wavpack/src/pack.c b/third_party/wavpack/src/pack.c index eb4c418..84e884b 100644 --- a/third_party/wavpack/src/pack.c +++ b/third_party/wavpack/src/pack.c @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////////////////////// // **** WAVPACK **** // // Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // +// Copyright (c) 1998 - 2013 Conifer Software. // // MMX optimizations (c) 2006 Joachim Henke // // All Rights Reserved. // // Distributed under the BSD Software License (see license.txt) // @@ -10,1074 +10,16 @@ // pack.c // This module actually handles the compression of the audio data, except for -// the entropy coding which is handled by the words? modules. For efficiency, -// the conversion is isolated to tight loops that handle an entire buffer. - -#include "wavpack_local.h" +// the entropy encoding which is handled by the write_words.c module. For better +// efficiency, the conversion is isolated to tight loops that handle an entire +// buffer. #include -#include #include #include -#ifdef DEBUG_ALLOC -#define malloc malloc_db -#define realloc realloc_db -#define free free_db -void *malloc_db (uint32_t size); -void *realloc_db (void *ptr, uint32_t size); -void free_db (void *ptr); -int32_t dump_alloc (void); -#endif - -//////////////////////////////// local tables /////////////////////////////// - -// These two tables specify the characteristics of the decorrelation filters. -// Each term represents one layer of the sequential filter, where positive -// values indicate the relative sample involved from the same channel (1=prev), -// 17 & 18 are special functions using the previous 2 samples, and negative -// values indicate cross channel decorrelation (in stereo only). - -static const WavpackDecorrSpec fast_specs [] = { - { 1, 2,18,17 }, // 0 - { 1, 1,17,17 }, // 1 - { 0, 2,18,17 }, // 2 - { 0, 1,17,17 }, // 3 - { 1, 3, 1,18 }, // 4 - { 1, 1,17, 1 }, // 5 - { 0, 1, 1,17 }, // 6 - { 0, 1,-2,17 }, // 7 - { 0, 2,-1,17 }, // 8 - { 1, 1,17, 2 }, // 9 - { 0, 3,18,18 }, // 10 - { 0, 1,17, 1 }, // 11 - { 1, 6, 1, 2 }, // 12 - { 1, 1,17, 3 }, // 13 - { 0, 1,-2, 3 }, // 14 - { 0, 1, 2,17 }, // 15 - { 0, 1,18,-2 }, // 16 - { 0, 1,-1,17 }, // 17 - { 0, 1,18,17 }, // 18 - { 0, 1,17, 2 }, // 19 - { 1, 2,18,-2 }, // 20 - { 1, 1, 1,17 }, // 21 - { 0, 3,18, 2 }, // 22 - { 0, 1,17,-2 }, // 23 - { 0, 1,18,-2 }, // 24 - { 1, 2,17,-3 }, // 25 - { 0, 1,18, 3 }, // 26 - { 0, 1,18,18 }, // 27 - { 1, 1, 1, 3 }, // 28 - { 1, 1,18, 3 }, // 29 - { 1, 1, 1, 3 }, // 30 - { 0, 2,18,17 }, // 31 - { 1, 1, 1,17 }, // 32 - { 1, 1,17, 3 }, // 33 - { 0, 3,18,17 }, // 34 - { 0, 1,18,18 }, // 35 - { 1, 1, 1, 3 }, // 36 - { 1, 1, 1,18 }, // 37 - { 0, 1,18,-2 }, // 38 - { 0, 2,18,17 }, // 39 - { 0, 1,-1,18 }, // 40 - { 1, 1,17, 3 }, // 41 - { 0, 1,17, 2 }, // 42 - { 0, 1,17, 3 }, // 43 - { 1, 1,18, 2 }, // 44 - { 1, 1,17,-2 }, // 45 - { 0, 1, 1,-2 }, // 46 - { 0, 2,18,17 }, // 47 - { 0, 1,17,-2 }, // 48 - { 1, 1,17,-2 }, // 49 - { 0, 1,18, 3 }, // 50 - { 0, 1, 2,17 }, // 51 - { 1, 2,18,-3 }, // 52 - { 1, 2, 1,18 }, // 53 - { 1, 2,18, 2 }, // 54 - { 0, 1,17,-1 }, // 55 - { 0, 1,17,-2 }, // 56 - { 1, 1,17,-2 }, // 57 - { 1, 1, 1, 3 }, // 58 - { 0, 1, 1,17 }, // 59 - { 1, 2,18,-2 }, // 60 - { 1, 2,17,-3 }, // 61 - { 0, 2,18,17 }, // 62 - { 0, 2,18,17 }, // 63 - { 1, 1,17, 2 }, // 64 - { 1, 2,18,18 }, // 65 - { 0, 1,17, 2 }, // 66 - { 0, 1,18,17 }, // 67 - { 1, 1, 1,17 }, // 68 - { 1, 1,17, 2 }, // 69 - { 0, 2,18,18 }, // 70 - { 0, 2,18,17 }, // 71 - { 1, 2,17,-3 }, // 72 - { 1, 6, 1, 2 }, // 73 - { 0, 3,17,17 }, // 74 - { 0, 1, 1,18 }, // 75 - { 0, 1, 1,-2 }, // 76 - { 1, 1,17, 2 }, // 77 - { 0, 2,18,17 }, // 78 - { 0, 2,18,17 }, // 79 - { 1, 1,18, 3 }, // 80 - { 1, 2,17,-3 }, // 81 - { 0, 1,17, 2 }, // 82 - { 0, 1,17, 3 }, // 83 - { 0, 1,18,-2 }, // 84 - { 1, 1,18,18 }, // 85 - { 1, 6, 1, 2 }, // 86 - { 0, 2,18,17 }, // 87 - { 0, 2,18,17 }, // 88 - { 0, 1,-1,17 }, // 89 - { 1, 1,18, 3 }, // 90 - { 0, 1,17,18 }, // 91 - { 1, 1,17, 3 }, // 92 - { 0, 1,18, 3 }, // 93 - { 0, 2,18,17 }, // 94 - { 0, 2,18,17 }, // 95 - { 1, 2,18, 2 }, // 96 - { 0, 1,-2, 3 }, // 97 - { 0, 4,18,-1 }, // 98 - { 0, 2,18,18 }, // 99 - { 0, 1,-2, 3 }, // 100 - { 1, 1,17,-2 }, // 101 - { 0, 1,17, 3 }, // 102 - { 0, 2,18,17 }, // 103 - { 0, 2,-1,18 }, // 104 - { 1, 1, 2,17 }, // 105 - { 0, 2,17,-2 }, // 106 - { 0, 1,17, 2 }, // 107 - { 1, 2,18,-3 }, // 108 - { 0, 1,17,-2 }, // 109 - { 0, 2,18,17 }, // 110 - { 0, 2,18,17 }, // 111 - { 1, 1,17,-2 }, // 112 - { 1, 2,17,-3 }, // 113 - { 1, 1, 1, 3 }, // 114 - { 1, 1, 2,17 }, // 115 - { 1, 2,18, 2 }, // 116 - { 1, 1, 2,17 }, // 117 - { 1, 1,18, 2 }, // 118 - { 0, 2,18,17 }, // 119 - { 0, 2,18,17 }, // 120 - { 0, 1,17,-2 }, // 121 - { 0, 2,18,17 }, // 122 - { 0, 2,17,-1 }, // 123 - { 0, 2,18,-2 }, // 124 - { 0, 2,18,17 }, // 125 - { 0, 2,18,17 }, // 126 - { 0, 2,18,17 }, // 127 - { 1, 1, 1, 3 }, // 128 - { 0, 2,-2,17 }, // 129 - { 0, 2,18,-2 }, // 130 - { 0, 2,17,-2 }, // 131 - { 1, 1, 2,17 }, // 132 - { 1, 1, 1, 3 }, // 133 - { 0, 1, 2,17 }, // 134 - { 0, 2,18,17 }, // 135 - { 0, 3,-1,17 }, // 136 - { 1, 1, 2,17 }, // 137 - { 0, 2,18,18 }, // 138 - { 0, 1,17, 2 }, // 139 - { 1, 4,18,-3 }, // 140 - { 1, 1,18, 1 }, // 141 - { 0, 2,18,17 }, // 142 - { 0, 2,18,17 }, // 143 - { 1, 2,18,-1 }, // 144 - { 0, 1,-1,18 }, // 145 - { 1, 6, 1, 2 }, // 146 - { 1, 1,17, 2 }, // 147 - { 1, 4,18, 3 }, // 148 - { 0, 1, 1,17 }, // 149 - { 0, 1,18, 2 }, // 150 - { 0, 2,18,17 }, // 151 - { 0, 2,18,17 }, // 152 - { 1, 2,17, 2 }, // 153 - { 0, 2,18,-2 }, // 154 - { 0, 1, 1,18 }, // 155 - { 1, 2,18,-3 }, // 156 - { 0, 2,18,17 }, // 157 - { 0, 2,18,17 }, // 158 - { 0, 2,18,17 }, // 159 - { 1, 2,18,18 }, // 160 - { 1, 3,17,17 }, // 161 - { 0, 1,-2,17 }, // 162 - { 0, 1,17,18 }, // 163 - { 0, 1,-1, 3 }, // 164 - { 1, 1, 2,17 }, // 165 - { 0, 2,18,-1 }, // 166 - { 0, 2,18,17 }, // 167 - { 0, 2,18,17 }, // 168 - { 1, 1,17,-2 }, // 169 - { 1, 2,17, 2 }, // 170 - { 1, 1,18, 3 }, // 171 - { 0, 1,18, 2 }, // 172 - { 1, 2,17,-3 }, // 173 - { 0, 2,18,17 }, // 174 - { 0, 2,18,17 }, // 175 - { 0, 1,-2,17 }, // 176 - { 0, 1,17,-1 }, // 177 - { 0, 1,18,-1 }, // 178 - { 0, 2,18,17 }, // 179 - { 1, 2,17,-3 }, // 180 - { 1, 1, 1,18 }, // 181 - { 1, 3,18, 2 }, // 182 - { 0, 2,18,17 }, // 183 - { 0, 2,18,17 }, // 184 - { 0, 2,18,17 }, // 185 - { 0, 2,18,17 }, // 186 - { 0, 3,18,18 }, // 187 - { 0, 1, 1,-2 }, // 188 - { 0, 2,18,17 }, // 189 - { 0, 2,18,17 }, // 190 - { 0, 2,18,17 }, // 191 - { 1, 2,17,-3 }, // 192 - { 1, 1,18,18 }, // 193 - { 0, 2,18, 2 }, // 194 - { 0, 1,17,18 }, // 195 - { 1, 2,18, 2 }, // 196 - { 1, 1,17,-2 }, // 197 - { 0, 2,17,-1 }, // 198 - { 0, 2,18,17 }, // 199 - { 0, 2,18,17 }, // 200 - { 0, 2,18,17 }, // 201 - { 0, 1, 1,-2 }, // 202 - { 0, 1,18, 1 }, // 203 - { 1, 2,18,-2 }, // 204 - { 0, 1,17, 2 }, // 205 - { 0, 2,18,17 }, // 206 - { 0, 2,18,17 }, // 207 - { 1, 1,17, 3 }, // 208 - { 0, 1,17,-1 }, // 209 - { 0, 1,18, 2 }, // 210 - { 1, 1,17, 3 }, // 211 - { 1, 1,17,-2 }, // 212 - { 0, 1,18,18 }, // 213 - { 0, 2,18,17 }, // 214 - { 0, 2,18,17 }, // 215 - { 0, 2,18,17 }, // 216 - { 0, 2,18,17 }, // 217 - { 0, 2,18,17 }, // 218 - { 1, 1,17,18 }, // 219 - { 0, 1,-2, 3 }, // 220 - { 0, 2,18,17 }, // 221 - { 0, 2,18,17 }, // 222 - { 0, 2,18,17 }, // 223 - { 1, 2,18,-3 }, // 224 - { 0, 2,18,17 }, // 225 - { 0, 3,18, 2 }, // 226 - { 0, 1, 1,18 }, // 227 - { 0, 2,18,17 }, // 228 - { 0, 1,17,-1 }, // 229 - { 0, 2,18,17 }, // 230 - { 0, 2,18,17 }, // 231 - { 0, 2,18,17 }, // 232 - { 0, 1,-2, 3 }, // 233 - { 0, 3,17,17 }, // 234 - { 0, 2,18,17 }, // 235 - { 0, 2,18,17 }, // 236 - { 1, 1,17, 2 }, // 237 - { 0, 2,18,17 }, // 238 - { 0, 2,18,17 }, // 239 - { 1, 1,17, 2 }, // 240 - { 0, 2,18,17 }, // 241 - { 0, 2,18,17 }, // 242 - { 0, 2,18,17 }, // 243 - { 0, 2,18, 2 }, // 244 - { 0, 2,18,17 }, // 245 - { 0, 2,18,17 }, // 246 - { 0, 2,18,17 }, // 247 - { 0, 2,18,17 }, // 248 - { 0, 2,18,17 }, // 249 - { 0, 2,18,17 }, // 250 - { 0, 2,18,17 }, // 251 - { 0, 2,18,17 }, // 252 - { 0, 2,18,17 }, // 253 - { 0, 2,18,17 }, // 254 - { 0, 2,18,17 }, // 255 -}; - -static const WavpackDecorrSpec default_specs [] = { - { 1, 2,18,18, 2,17, 3 }, // 0 - { 0, 2,18,17,-1, 3, 2 }, // 1 - { 1, 1,17,18,18,-2, 2 }, // 2 - { 0, 2,18,17, 3,-2,17 }, // 3 - { 1, 2,18,17, 2,17, 3 }, // 4 - { 0, 1,18,18,-1, 2,17 }, // 5 - { 0, 1,17,17,-2, 2, 3 }, // 6 - { 0, 1,18,-2,18, 2,17 }, // 7 - { 1, 2,18,18,-1, 2, 3 }, // 8 - { 0, 2,18,17, 3, 2, 5 }, // 9 - { 1, 1,18,17,18, 2, 5 }, // 10 - { 0, 1,17,17,-2, 2, 3 }, // 11 - { 0, 1,18,-2,18, 2, 5 }, // 12 - { 0, 1,17,-2,17, 2,-3 }, // 13 - { 1, 1,17,-2,17, 1, 2 }, // 14 - { 0, 1,17,17,-2, 2, 3 }, // 15 - { 1, 1,18, 3, 1, 5, 4 }, // 16 - { 1, 4,18,18, 2, 3,-2 }, // 17 - { 0, 1, 1,-1,-1, 2,17 }, // 18 - { 0, 2,18,17, 3, 2, 5 }, // 19 - { 0, 1,18,18,18, 2,17 }, // 20 - { 0, 1,18,17,-1, 2,18 }, // 21 - { 1, 1,17, 3, 2, 1, 7 }, // 22 - { 0, 2,18,-2,18, 2, 3 }, // 23 - { 1, 3,18,-3,18, 2, 3 }, // 24 - { 0, 3,18,17, 2, 3,17 }, // 25 - { 1, 1,17,17, 2, 1, 4 }, // 26 - { 0, 1,17,18,-2, 2,17 }, // 27 - { 1, 1,18,18, 3, 5, 2 }, // 28 - { 0, 1,17,17, 2,18, 4 }, // 29 - { 0, 1,18,17, 1, 4, 6 }, // 30 - { 1, 1, 3,17,18, 2,17 }, // 31 - { 1, 1,17, 3, 2, 1, 7 }, // 32 - { 0, 1,18,17,-1, 2, 3 }, // 33 - { 1, 1,17,17, 2, 1, 4 }, // 34 - { 1, 2,18,17,-1,17, 3 }, // 35 - { 1, 2,18,17, 2, 3,-1 }, // 36 - { 0, 2,18,18,-2, 2,17 }, // 37 - { 0, 1,17,17, 2,18, 4 }, // 38 - { 0, 5,-2,18,18,18, 2 }, // 39 - { 1, 1,18,18,-1, 6, 3 }, // 40 - { 0, 1,17,17,-2, 2, 3 }, // 41 - { 1, 1,18,17,18, 2,17 }, // 42 - { 0, 1,18,17, 4, 3, 1 }, // 43 - { 0, 1,-2,18, 2, 2,18 }, // 44 - { 1, 2,18,18,-2, 2,-1 }, // 45 - { 1, 1,17,17, 2, 1, 4 }, // 46 - { 0, 1,17,18,-2, 2,17 }, // 47 - { 1, 1,17, 3, 2, 1, 7 }, // 48 - { 1, 3,18,-3,18, 2, 3 }, // 49 - { 1, 2,18,18,-2, 2,-1 }, // 50 - { 1, 1,18,18, 3, 5, 2 }, // 51 - { 0, 2,18,18,-1, 2,17 }, // 52 - { 0, 1,18,-1,17,18, 2 }, // 53 - { 0, 1,17,-1, 2, 3, 6 }, // 54 - { 0, 1,18,-2,18, 2, 5 }, // 55 - { 1, 2,18,18,-2, 2,-1 }, // 56 - { 0, 3,18,18, 2, 3,17 }, // 57 - { 0, 1,17,17, 2,18, 4 }, // 58 - { 1, 1,17,-2,17, 1, 2 }, // 59 - { 0, 1,-1, 3, 5, 4, 7 }, // 60 - { 0, 3,18,18, 3, 2, 5 }, // 61 - { 0, 1,17,17, 2,18, 4 }, // 62 - { 0, 1,18,17,-2,18, 3 }, // 63 - { 0, 2,18,18,-2, 2,17 }, // 64 - { 0, 3,18,17,-2, 2, 3 }, // 65 - { 1, 1,18,18,-2, 2,17 }, // 66 - { 0, 1,18,17, 4, 3, 1 }, // 67 - { 1, 2, 3,18,17, 2,17 }, // 68 - { 1, 2,18,18, 2,-2,18 }, // 69 - { 1, 2,18,18,-1,18, 2 }, // 70 - { 0, 2,18,18,-2, 2,17 }, // 71 - { 1, 3,18,18, 2, 3,-2 }, // 72 - { 0, 3,18,18, 3, 2, 5 }, // 73 - { 0, 1,18,-2,18, 2, 5 }, // 74 - { 1, 1,17, 3, 2, 1, 7 }, // 75 - { 1, 3,18,18,-2, 2,18 }, // 76 - { 1, 1,17,18,18,-2, 2 }, // 77 - { 0, 1,18,-2,18, 2, 5 }, // 78 - { 0, 2,18,-2,18, 2, 3 }, // 79 - { 0, 1,-1, 3, 4, 5, 7 }, // 80 - { 1, 1,17,17, 2,-1, 7 }, // 81 - { 0, 1,18,-1,-1, 2,-2 }, // 82 - { 0, 2,18,17, 2, 3,17 }, // 83 - { 0, 1,18,17, 2,18, 2 }, // 84 - { 0, 2,18,17,-1, 2,17 }, // 85 - { 0, 1, 1,18, 3, 2, 5 }, // 86 - { 0, 2,18,-2, 4,18, 2 }, // 87 - { 1, 1,18, 3, 1, 5, 4 }, // 88 - { 0, 1,18,17,18, 2, 5 }, // 89 - { 1, 1,18, 3, 1, 5, 4 }, // 90 - { 0, 4,18,18,-2, 2,18 }, // 91 - { 1, 1,18,18, 3, 2, 5 }, // 92 - { 1, 1,17,17, 2, 1, 4 }, // 93 - { 0, 2,18,18,-2,18, 2 }, // 94 - { 0, 2,18,18,-2,18, 2 }, // 95 - { 1, 1,18,18, 2, 1, 3 }, // 96 - { 1, 1,17,17, 2, 1, 4 }, // 97 - { 1, 2,17,17, 2,18, 3 }, // 98 - { 0, 1,18,17, 1, 4, 6 }, // 99 - { 1, 2,18,18,-2, 2,-1 }, // 100 - { 0, 1,18,-2,18, 2, 5 }, // 101 - { 1, 1,17, 2,18, 2,17 }, // 102 - { 0, 2,18,18,-2,18, 2 }, // 103 - { 0, 1,18,18, 3, 6,-1 }, // 104 - { 0, 1,18,17, 2,18, 3 }, // 105 - { 0, 1,18,17,-2, 2,17 }, // 106 - { 1, 1, 3,17,18, 2,17 }, // 107 - { 1, 3,18,-3,18, 2, 3 }, // 108 - { 1, 3,18,18,-3,18, 2 }, // 109 - { 1, 1,18, 3, 1, 5, 4 }, // 110 - { 0, 1,17,-2,17, 2,-3 }, // 111 - { 1, 1,18,18, 3, 5, 2 }, // 112 - { 1, 2,18,18,-2, 2,-1 }, // 113 - { 0, 1,18,-1,-1, 2,-2 }, // 114 - { 1, 1,18, 3, 1, 5, 4 }, // 115 - { 0, 3,18,17,-1, 2,17 }, // 116 - { 1, 3,18,17, 2,18,-2 }, // 117 - { 0, 2,18,18,-2,18, 2 }, // 118 - { 1, 2,18,18,-2, 2,-1 }, // 119 - { 1, 1,18, 3, 1, 5, 4 }, // 120 - { 0, 4, 3,18,18, 2,17 }, // 121 - { 0, 2,18,18,-2,18, 2 }, // 122 - { 1, 1,18,17,-1,18, 2 }, // 123 - { 0, 2,18,18,-2,18, 2 }, // 124 - { 0, 2,18,18,-2,18, 2 }, // 125 - { 0, 2,18,18,-2,18, 2 }, // 126 - { 0, 2,18,18,-2,18, 2 }, // 127 - { 1, 1,18,18,18, 3, 2 }, // 128 - { 0, 1,17,-1, 2, 3, 6 }, // 129 - { 0, 1,17,-1, 2, 3, 6 }, // 130 - { 0, 2,18,17,-2, 3, 2 }, // 131 - { 1, 3,18,17, 2,-2,18 }, // 132 - { 0, 2,18,18, 2,17, 3 }, // 133 - { 0, 1,18,18, 2,18,-2 }, // 134 - { 0, 2,18,-2, 4,18, 2 }, // 135 - { 0, 1,-2,18, 2, 2,18 }, // 136 - { 0, 2,18,17, 3, 6, 2 }, // 137 - { 0, 1,18,17,18, 2, 5 }, // 138 - { 0, 3,18,18,-2, 3, 2 }, // 139 - { 1, 1,18,18, 2,18, 5 }, // 140 - { 0, 1,17,-1, 2, 3, 6 }, // 141 - { 1, 4,18,18, 2, 3,-2 }, // 142 - { 0, 2,18,17,18, 2,-2 }, // 143 - { 0, 1, 1,18, 3, 2, 5 }, // 144 - { 1, 4,18,-2,18, 2, 3 }, // 145 - { 1, 2,18, 2,18, 3,-2 }, // 146 - { 0, 2,18,18,18, 2, 4 }, // 147 - { 0, 2, 3,17,18, 2,17 }, // 148 - { 1, 1,18,-1,18, 2,17 }, // 149 - { 1, 2,17,17, 2,18, 3 }, // 150 - { 0, 2,18,17,-2, 3, 2 }, // 151 - { 0, 1, 1,-1,-1, 2,17 }, // 152 - { 0, 3, 3,18,18, 2,17 }, // 153 - { 0, 1,18,-1,17,18, 2 }, // 154 - { 0, 1,18,17, 2,18, 3 }, // 155 - { 0, 2,18,18,-2,18, 2 }, // 156 - { 0, 1,18,17, 2,18, 2 }, // 157 - { 0, 2,18,18,-2,18, 2 }, // 158 - { 0, 2,18,18,-2,18, 2 }, // 159 - { 1, 2,17,17, 2,18, 3 }, // 160 - { 0, 1,18,17,-2, 2, 3 }, // 161 - { 0, 1,18,-2,18, 2, 5 }, // 162 - { 1, 4,18,-2,18, 2, 3 }, // 163 - { 1, 3,18,17, 2, 3, 6 }, // 164 - { 0, 2,18,18, 2,17, 3 }, // 165 - { 0, 2,18,17, 2,18, 2 }, // 166 - { 0, 2,18,18,-2,18, 2 }, // 167 - { 1, 1,18,18, 3, 5, 2 }, // 168 - { 0, 2,18,18,-2, 2, 3 }, // 169 - { 1, 2,18,17, 2,17, 3 }, // 170 - { 0, 1,18,17, 2, 3,18 }, // 171 - { 0, 2,18,18,-2,18, 2 }, // 172 - { 1, 4,18,18, 2, 3,-2 }, // 173 - { 0, 1,17,-2,17, 2,-3 }, // 174 - { 0, 1,17,17, 2,18, 4 }, // 175 - { 1, 1,18,18,18, 2, 4 }, // 176 - { 1, 2,18, 2,18, 3,-2 }, // 177 - { 1, 1,18,18,-2, 2,17 }, // 178 - { 0, 2,18,18,-2,18, 2 }, // 179 - { 0, 2,18,18, 2,17, 3 }, // 180 - { 0, 2,18,18,18, 2, 4 }, // 181 - { 0, 2,18,18,-2,18, 2 }, // 182 - { 0, 2,18,17,-2, 3, 2 }, // 183 - { 0, 1, 1,-1,-1, 2,17 }, // 184 - { 1, 4,18,18, 2, 3,-2 }, // 185 - { 0, 2,18,18,-2,18, 2 }, // 186 - { 0, 1,18,-2,18, 3, 2 }, // 187 - { 0, 2,18,18,-2,18, 2 }, // 188 - { 0, 2,18,18,-2,18, 2 }, // 189 - { 0, 2,18,18,-2,18, 2 }, // 190 - { 0, 2,18,18,-2,18, 2 }, // 191 - { 0, 1,18,18,-2, 2,17 }, // 192 - { 0, 3,18,17, 2, 3,17 }, // 193 - { 1, 2,18,18, 2,-2,18 }, // 194 - { 0, 1,-1, 3, 5, 4, 7 }, // 195 - { 1, 1,18, 3, 1, 5, 4 }, // 196 - { 1, 1,18,18,-2,18, 3 }, // 197 - { 0, 2,18,17,18, 2,-2 }, // 198 - { 0, 2,18,18, 2,17, 3 }, // 199 - { 1, 2,18, 2,18, 3,-2 }, // 200 - { 1, 4,18,18, 2, 3,-2 }, // 201 - { 1, 3,18,17, 2, 3, 6 }, // 202 - { 0, 2,18,18,-2,18, 2 }, // 203 - { 1, 2,18,17,-2,-1,17 }, // 204 - { 0, 1,17,-1, 2, 3, 6 }, // 205 - { 0, 2,18,18,-2,18, 2 }, // 206 - { 0, 2,18,18,-2, 2, 3 }, // 207 - { 1, 1,18,18,18, 2, 5 }, // 208 - { 0, 1,17,17,-2, 2, 3 }, // 209 - { 0, 2,18,18,-2,18, 2 }, // 210 - { 0, 2,18,17, 3, 6, 2 }, // 211 - { 0, 2,18,17,18, 2, 3 }, // 212 - { 0, 3,18,17,-3,18, 2 }, // 213 - { 0, 1,18,18,18, 2, 3 }, // 214 - { 0, 1,18,-2,-3, 2, 6 }, // 215 - { 0, 2,18,18,-2,18, 2 }, // 216 - { 1, 1,18,17,18, 2, 5 }, // 217 - { 0, 2,18,18,-2,18, 2 }, // 218 - { 0, 2,18,18,-2,18, 2 }, // 219 - { 1, 1,18,17,18, 2, 5 }, // 220 - { 0, 2,18,18,-2,18, 2 }, // 221 - { 0, 2,18,18,-2,18, 2 }, // 222 - { 0, 2,18,18,-2,18, 2 }, // 223 - { 0, 1,18,18,18, 2, 3 }, // 224 - { 1, 1,17,-2,17, 1, 2 }, // 225 - { 1, 1,17,17, 2,-1, 7 }, // 226 - { 0, 1,18,17, 4, 3, 1 }, // 227 - { 1, 3,18,-3,18, 2, 3 }, // 228 - { 0, 1, 1,18, 3, 2, 5 }, // 229 - { 0, 2,18,18,-2,18, 2 }, // 230 - { 0, 2,18,18,-2,18, 2 }, // 231 - { 0, 1,18,18, 3, 6, 2 }, // 232 - { 0, 1,17,17, 2,18, 4 }, // 233 - { 0, 1,17,17, 2,18, 4 }, // 234 - { 0, 2,18,18,-2,18, 2 }, // 235 - { 0, 2,18,18,-2,18, 2 }, // 236 - { 0, 2,18,18,-2,18, 2 }, // 237 - { 1, 2,18,-2,18, 3, 2 }, // 238 - { 1, 1,17,-2,17, 1, 2 }, // 239 - { 1, 1,18,18, 3, 2, 5 }, // 240 - { 0, 1,18,18,-1, 2, 3 }, // 241 - { 0, 2,18,18,-2,18, 2 }, // 242 - { 0, 2,18,18,-2,18, 2 }, // 243 - { 0, 1,18,17,18, 2, 5 }, // 244 - { 0, 2,18,18,-2,18, 2 }, // 245 - { 0, 2,18,18,-2,18, 2 }, // 246 - { 0, 2,18,18,-2,18, 2 }, // 247 - { 0, 2,18,18,-2,18, 2 }, // 248 - { 0, 1, 3,18,18, 2,17 }, // 249 - { 0, 2,18,18,-2,18, 2 }, // 250 - { 0, 2,18,18,-2,18, 2 }, // 251 - { 0, 2,18,18,-2,18, 2 }, // 252 - { 0, 2,18,18,-2,18, 2 }, // 253 - { 0, 2,18,18,-2,18, 2 }, // 254 - { 0, 2,18,18,-2,18, 2 }, // 255 -}; - -static const WavpackDecorrSpec high_specs [] = { - { 1, 2,18,18,18,-2, 2, 3, 5,-1,17, 4 }, // 0 - { 0, 1,18,17,-2, 2,18, 3, 7, 2, 5, 4 }, // 1 - { 1, 2, 1,18, 3, 6,-2,18, 2, 3, 4, 5 }, // 2 - { 0, 2,18,18,-2, 2,18, 3, 6, 2,17, 4 }, // 3 - { 1, 2,18,18, 2,18, 3, 2,-1, 4,18, 5 }, // 4 - { 1, 1, 7, 6, 5, 3, 4, 2, 5, 4, 3, 7 }, // 5 - { 1, 1,17, 3,18, 7, 2, 6, 1, 4, 3, 5 }, // 6 - { 1, 1,-2,18,18,18, 3,-2, 6, 5, 2, 1 }, // 7 - { 1, 2,18,18,-1,18, 2, 3, 6,-2,17, 5 }, // 8 - { 0, 1,17,17,18, 3, 6, 4, 5, 2,18,-2 }, // 9 - { 1, 2, 1,18,-2, 3, 5, 2, 4,-1, 6, 1 }, // 10 - { 0, 2,18,18, 3, 6,18, 2, 4, 8, 5, 3 }, // 11 - { 0, 1,-2, 1,18, 2,-2, 7,18, 2,-1, 5 }, // 12 - { 1, 1, 4, 3, 8, 1, 5, 2, 5, 6, 2, 8 }, // 13 - { 1, 1,17,18, 2, 6, 3, 4,-1, 1, 8, 6 }, // 14 - { 0, 1,18,18, 3, 6, 3,-2, 2, 5,-1, 1 }, // 15 - { 0, 1,18,18,17,-1, 2,-2,18, 3, 4, 5 }, // 16 - { 1, 2,18,17, 2,-2,18, 3, 5, 7, 2, 4 }, // 17 - { 1, 2,18,18, 3, 6,-2,18, 2, 5, 8, 3 }, // 18 - { 0, 1,18,17, 2,18,18, 2, 6, 5,17, 7 }, // 19 - { 1, 2,18,17, 2,18, 3, 2, 6,18,-1, 4 }, // 20 - { 1, 1, 5, 3, 6, 5, 3, 4, 1, 2, 4, 7 }, // 21 - { 1, 1, 5, 3, 6, 5, 3, 4, 1, 2, 4, 7 }, // 22 - { 0, 1,-2,18,18,18,-2, 3, 2, 4, 6, 5 }, // 23 - { 1, 2,18,17,-3, 3,-1,18, 2, 3, 6, 5 }, // 24 - { 0, 1,17,18, 7, 3,-2, 7, 1, 2, 4, 5 }, // 25 - { 1, 1, 2,18,18,-2, 2, 4,-1,18, 3, 6 }, // 26 - { 0, 3, 1,18, 4, 3, 5, 2, 4,18, 2, 3 }, // 27 - { 0, 1,-2,18, 2,18, 3, 7,18, 2, 6,-2 }, // 28 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 29 - { 1, 1,18,18, 5, 4, 6, 4, 5, 1, 4, 3 }, // 30 - { 1, 1,18, 3, 6, 5, 7, 8, 2, 3, 1,-1 }, // 31 - { 1, 1,18,18,18, 2,-2, 3, 5,18, 2, 8 }, // 32 - { 0, 2,18,17,-2, 2, 3,18,-3, 5, 2, 7 }, // 33 - { 1, 1, 1, 1,-1, 8,17, 3,-2, 2, 6,17 }, // 34 - { 0, 2,18,18,17, 2,-2, 3, 2, 4,18, 5 }, // 35 - { 1, 1,17,18, 2,-1, 5, 7,18, 3, 4, 6 }, // 36 - { 1, 1, 5, 4, 5,17, 3, 6, 3, 4, 7, 2 }, // 37 - { 0, 1,17, 3, 1, 7, 4, 2, 5,-2,18, 6 }, // 38 - { 0, 1,17,18, 2,18, 4, 3, 5, 7,-3, 6 }, // 39 - { 1, 2,17,17,-3,-2, 2, 8,18,-1, 3, 5 }, // 40 - { 0, 1,17,17,18, 2, 3, 6,-2, 8, 1, 7 }, // 41 - { 1, 1, 1, 2, 6,-2,18, 2, 5,-3, 7,-2 }, // 42 - { 0, 1,18,18, 3,18, 6, 8,-2, 2, 3, 5 }, // 43 - { 0, 1,18,17, 2,18,-2, 3, 7, 6, 2, 4 }, // 44 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 45 - { 1, 1,18,18, 2,-1, 3, 6, 1, 3, 4, 8 }, // 46 - { 0, 1,18,18, 3, 6, 5, 3,-2, 2,18,-1 }, // 47 - { 0, 1,18,17,-3,18, 2, 4,-2, 3, 6,17 }, // 48 - { 1, 3, 1, 2,17, 3,18, 7,-1, 5, 2, 4 }, // 49 - { 1, 1,18, 3,18, 6, 8,18,-2, 5, 7, 2 }, // 50 - { 0, 1,17, 2,18, 6, 3, 2, 5, 4, 8, 1 }, // 51 - { 0, 1,18,17,-1, 2, 3,18,18, 2, 3,17 }, // 52 - { 1, 1,18, 7, 6, 5, 5, 3, 1, 4, 2, 4 }, // 53 - { 1, 1, 6,17, 3, 8, 1, 5, 7,-1, 2, 1 }, // 54 - { 1, 1,18,-2,18, 3,-2, 2, 7, 4, 6,18 }, // 55 - { 1, 3,18,-3,18, 2, 3,18,-1, 7, 2, 5 }, // 56 - { 0, 2,18,-2, 7, 1, 3, 2, 4, 6,-3, 7 }, // 57 - { 1, 1,18,-2, 2,-3,18,-2,17,-1, 4, 2 }, // 58 - { 0, 3,17,17, 2, 5, 3, 7,18, 6, 4, 2 }, // 59 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 60 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 61 - { 1, 1,18,17, 4, 6, 6, 4, 5, 3, 4, 1 }, // 62 - { 0, 1,18, 5, 3, 6, 2, 3, 8, 1, 3, 7 }, // 63 - { 1, 2,18,17,-2, 2,18, 3, 5, 7,-1, 2 }, // 64 - { 0, 1, 1,18,18, 3, 6,-1, 4, 8, 5, 2 }, // 65 - { 1, 1, 1, 5, 3, 4, 1, 1, 3, 5, 7, 3 }, // 66 - { 0, 1, 3,18,18, 2,18,18,-1, 2, 3,18 }, // 67 - { 1, 2,18,18,-1,18, 2, 3, 4, 6,18, 5 }, // 68 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 69 - { 1, 1,18, 3, 1, 4, 5, 2, 7, 1, 3, 6 }, // 70 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 71 - { 1, 2,18,18,-1,18, 2, 3, 5,-2, 6, 8 }, // 72 - { 1, 1,17,18, 4, 8, 3, 2, 5, 2, 7, 6 }, // 73 - { 1, 4, 1, 2, 5,18,-2, 2, 3, 7,-1, 4 }, // 74 - { 0, 2,18,17,-1, 3, 6,18, 2, 3, 7, 5 }, // 75 - { 0, 1,-2,18, 2,-3, 6,18, 4, 3,-2, 5 }, // 76 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 77 - { 0, 1,17,17, 6, 2, 4, 8, 3, 5,-1,17 }, // 78 - { 1, 1,18, 3,18, 6, 8,18,-2, 5, 7, 2 }, // 79 - { 1, 2,17,17,-3, 2,18,-2, 8, 3, 6,-1 }, // 80 - { 1, 1,18,-2,17,18, 2, 3,-2, 6, 5, 4 }, // 81 - { 1, 2,18,17,-1, 3,18, 2, 5, 3, 6,-3 }, // 82 - { 0, 1,18,17, 2,18, 7,18, 2, 4, 3,17 }, // 83 - { 1, 3,18,18, 5, 6, 4, 3, 4,18, 6, 5 }, // 84 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 85 - { 1, 1, 7, 6, 5, 3, 4, 2, 5, 4, 3, 7 }, // 86 - { 0, 1,-2,18,18,18, 3, 6, 4, 2, 5, 2 }, // 87 - { 0, 3,18,17,-3,18, 3, 2, 5,-1,17, 3 }, // 88 - { 1, 1,17,18, 7, 3, 1, 7, 4, 2, 6, 5 }, // 89 - { 1, 1,18, 2,-2,-1,18, 5, 3,-2, 1, 2 }, // 90 - { 0, 3,18,18,-1, 3, 2, 7, 5,18, 4, 3 }, // 91 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 92 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 93 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 94 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 95 - { 1, 1,17,18, 2,-2, 4, 8,18, 3, 6, 5 }, // 96 - { 0, 2,18,17, 3, 5,-2, 7, 2,18, 3,-1 }, // 97 - { 1, 1,18, 2,-2,-1,18, 5, 3,-2, 1, 2 }, // 98 - { 0, 2, 3,17,18,18, 2, 5, 7, 6,18, 3 }, // 99 - { 1, 1,17,18,18, 4, 3, 2,18, 7, 8,-1 }, // 100 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 101 - { 0, 1,17, 1, 2, 3, 5, 6, 1, 4, 8,17 }, // 102 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 103 - { 0, 2,18,17,-1,18,-3, 2, 8, 3, 6,17 }, // 104 - { 1, 1,17,17, 1, 2, 4, 5,-1, 2, 1, 6 }, // 105 - { 1, 1, 1, 2, 6,-2,18, 2,-3, 3,-2, 5 }, // 106 - { 0, 1,18, 3,18, 6,18, 5, 2, 4,-1, 8 }, // 107 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 108 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 109 - { 1, 1,18,18,-1, 2,18, 3, 6, 4,-2, 7 }, // 110 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 111 - { 0, 2,-1,18,18,18, 2,-2, 4, 7, 2, 3 }, // 112 - { 0, 3, 3,17,-2, 5, 2, 7,18, 6, 4, 5 }, // 113 - { 0, 1,17, 6,18, 3, 8, 4, 5, 3, 8,18 }, // 114 - { 0, 2,18, 2, 6, 2,18, 3, 2, 4, 5, 8 }, // 115 - { 0, 1, 3,18,18, 2,18,-1, 2,18, 2,17 }, // 116 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 117 - { 0, 1, 3, 6,17,-2, 5, 1, 2, 7, 4, 8 }, // 118 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 119 - { 1, 3, 3,18,17, 5, 6, 2, 7,-2, 8,18 }, // 120 - { 1, 1,18,-1, 3, 1, 7, 2,-1, 4, 6,17 }, // 121 - { 1, 1,18, 2,-2,-1,18, 5, 3,-2, 1, 2 }, // 122 - { 0, 2,18, 1, 2,18, 3, 6, 5, 2, 4, 8 }, // 123 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 124 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 125 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 126 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 127 - { 1, 1,17,-2, 2,18,18, 8, 5, 3, 2, 6 }, // 128 - { 0, 1,18,17, 2,18, 3, 2, 7,-2,18, 4 }, // 129 - { 1, 2, 1,18, 2, 3,-1, 5, 6, 4, 7,17 }, // 130 - { 0, 2,18,17, 3, 6,-2, 2, 3, 8, 5,17 }, // 131 - { 0, 2,18,18, 3, 2,18,-1, 2, 4, 3,17 }, // 132 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 133 - { 1, 2,17,-1,18, 2, 3,-2, 5,18, 2, 7 }, // 134 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 135 - { 1, 2,18,-3,18, 2, 3,-2,18, 5, 6,-3 }, // 136 - { 0, 2,18,17, 3, 5,-2, 7, 2,18, 3,-1 }, // 137 - { 1, 1, 1,18,-1, 2, 3, 1,-2, 8, 2, 5 }, // 138 - { 0, 1,18,18, 3, 6,18, 2, 3, 4, 8, 5 }, // 139 - { 0, 1,-2, 1,18, 2,-2, 5, 7,18, 2,-1 }, // 140 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 141 - { 1, 1,17,18,-1, 2, 8, 3, 4, 5, 1, 7 }, // 142 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 143 - { 0, 2,18,18,-1, 2,18, 3,-2, 5, 4, 2 }, // 144 - { 1, 1,18,17, 2,18, 3, 8, 5, 2, 7,17 }, // 145 - { 0, 1,18,18, 3,18, 6, 8,-2, 2, 3, 5 }, // 146 - { 0, 1,18,18, 2,18, 2, 6,18, 2,17, 7 }, // 147 - { 1, 3,18,17,18, 2, 8,18, 5,-1, 3, 6 }, // 148 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 149 - { 1, 1,18, 7, 6, 5, 5, 3, 1, 4, 2, 4 }, // 150 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 151 - { 1, 2,18,17,-1, 3, 6,18, 2, 5, 8, 3 }, // 152 - { 0, 1,17,18,18, 4, 7, 2, 3,-2,18, 5 }, // 153 - { 1, 2,18, 1, 2, 6, 2, 5,18, 2, 4, 8 }, // 154 - { 0, 4,18, 4, 1, 2, 3, 5, 4, 1, 2, 6 }, // 155 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 156 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 157 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 158 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 159 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 160 - { 0, 2,18,17, 2,-1,18, 3,-3, 5, 2, 4 }, // 161 - { 0, 1,17,17, 3, 6, 3, 5,-2, 2,18,-1 }, // 162 - { 0, 2,18,18, 3,-2,18, 2,-3, 5, 3, 6 }, // 163 - { 1, 1,17,17, 2, 4, 1, 3, 5, 2, 6,-3 }, // 164 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 165 - { 0, 1,17, 1, 3, 2, 7, 1, 6, 3, 4, 8 }, // 166 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 167 - { 0, 1,17,-1,18, 2, 1, 5, 3, 8,-1,-2 }, // 168 - { 1, 1,17,18,-1, 8, 2, 5, 3, 4, 1, 6 }, // 169 - { 1, 2, 1,18, 3,-1, 5, 1, 2, 4, 7, 6 }, // 170 - { 0, 1,18,18, 3, 6, 5, 3,-2, 2,18,-1 }, // 171 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 172 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 173 - { 0, 1, 1,18,-1, 3, 8, 5, 6, 1, 2, 3 }, // 174 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 175 - { 0, 2,18,18, 2, 3, 6,18,-1, 4, 2, 3 }, // 176 - { 1, 1, 1, 3, 5,18, 2, 6, 7, 2, 3, 1 }, // 177 - { 1, 1, 1, 3, 8,18, 5, 2, 7, 1, 3,-2 }, // 178 - { 0, 2,17, 2,18, 3, 6, 2, 4, 5, 8, 3 }, // 179 - { 0, 1,18,17, 2,18, 3, 2, 7,-2,18, 4 }, // 180 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 181 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 182 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 183 - { 1, 2,18,-3,18,-1, 3,-2, 5, 7, 1, 2 }, // 184 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 185 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 186 - { 0, 3,18,18, 2, 6,18, 5,18, 2, 3,17 }, // 187 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 188 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 189 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 190 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 191 - { 1, 3, 1,-1, 1, 3,-2, 2, 5, 7,-3,18 }, // 192 - { 1, 2,18, 7, 3,-3, 2, 8, 2, 5, 4,17 }, // 193 - { 1, 1, 1, 4, 5, 1, 3, 4, 6, 7, 8, 3 }, // 194 - { 0, 1,18,17, 2,18,-1, 2, 3,18, 2, 4 }, // 195 - { 0, 2,18,18,-2,18, 2, 3, 4, 7, 5,17 }, // 196 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 197 - { 1, 1,17,18, 2, 1, 3, 2, 5, 1, 2, 3 }, // 198 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 199 - { 0, 2,18,18,-1, 2, 3, 5, 8, 6, 1,-2 }, // 200 - { 0, 1,17,18, 8, 3, 4, 6, 5, 2, 8, 7 }, // 201 - { 1, 2, 1, 3,-2,18, 2, 5, 1, 7,-1,-2 }, // 202 - { 0, 3,18,17,-1, 3,18, 2, 3, 6, 4,17 }, // 203 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 204 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 205 - { 1, 2,18,18, 4,18, 6, 7, 8, 3,18, 2 }, // 206 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 207 - { 0, 2,17,-3,17, 2,-2, 8, 3,18, 4,-3 }, // 208 - { 1, 1,18,17, 3, 5, 6, 2, 8, 1, 3, 7 }, // 209 - { 0, 1,18,18, 3, 6, 5, 3,-2, 2,18,-1 }, // 210 - { 0, 3,18,18, 2, 6,18, 5,18, 2, 3,17 }, // 211 - { 1, 1,18,18, 5, 4, 6, 4, 5, 1, 4, 3 }, // 212 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 213 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 214 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 215 - { 0, 2, 3,17,18,-3, 2, 5,18, 6,-1, 7 }, // 216 - { 1, 1,17,18, 3, 2, 5,-1, 6, 8, 4, 7 }, // 217 - { 1, 1,18, 1,-2, 3, 2, 1, 7, 6, 3, 4 }, // 218 - { 0, 3, 1, 2,17, 3,18, 2, 7, 5, 4,-1 }, // 219 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 220 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 221 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 222 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 223 - { 1, 1,17,-2, 2,18,18, 8, 5, 3, 2, 6 }, // 224 - { 0, 2,18, 5,18, 2, 3, 7,-2, 1, 6, 8 }, // 225 - { 0, 1, 2,-1,18,-1, 2, 4,-3, 5,18, 3 }, // 226 - { 0, 1, 3,17,18, 5, 2,18, 7, 3, 6, 5 }, // 227 - { 1, 4, 1, 2, 5,18,-2, 2, 3, 7,-1, 4 }, // 228 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 229 - { 0, 1, 1,18, 2, 1, 3, 4, 1, 5, 2, 7 }, // 230 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 231 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 232 - { 0, 1,17,17,18, 2, 4, 5,18,-2, 6, 3 }, // 233 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 234 - { 0, 2,18,18,-1, 3, 5, 6, 8,18, 2, 3 }, // 235 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 236 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 237 - { 0, 1,18,18, 4, 6, 8,18, 7, 3, 2, 5 }, // 238 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 239 - { 0, 2,-1,18,18,18, 2, 4,-2, 2, 3, 6 }, // 240 - { 0, 2,18,-2, 7, 1, 3, 2, 4, 6,-3, 7 }, // 241 - { 1, 1,17,18, 8, 3, 4, 6,-2, 5, 3, 8 }, // 242 - { 0, 2,18, 1, 2, 6, 2, 8, 3,18, 5, 4 }, // 243 - { 1, 1, 3,18,18, 2,18, 2,18, 3, 2,18 }, // 244 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 245 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 246 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 247 - { 1, 1, 3,17,18, 5, 2, 6, 7, 1, 4, 8 }, // 248 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 249 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 250 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 251 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 252 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 253 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 254 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2, 8 }, // 255 -}; - -static const WavpackDecorrSpec very_high_specs [] = { - { 1, 2,18,18, 2, 3,-2,18, 2, 4, 7, 5, 3, 6, 8,-1,18, 2 }, // 0 - { 0, 1,18,18,-1,18, 2, 3, 4, 6, 5, 7,18,-3, 8, 2,-1, 3 }, // 1 - { 1, 2, 1,18,-2, 4,18, 2, 3, 6,-1, 7, 5,-2,18, 8, 2, 4 }, // 2 - { 0, 1,17,17, 2, 3, 4,18,-1, 5, 6, 7,18, 2, 8,17, 3,-2 }, // 3 - { 1, 1,18,18, 2,18, 3, 2,18, 4,-1, 3,18, 2, 6, 8,17, 5 }, // 4 - { 0, 2,18,17, 2, 3,-2, 5,18,-3, 2, 4, 7, 3, 6, 8, 5,17 }, // 5 - { 1, 1,18,-2, 2,-3,18, 5,-2,18, 2, 3, 6, 2,17, 4, 7,-1 }, // 6 - { 1, 1,17, 8,18, 3,-2, 2, 5, 4,18, 6, 3, 8, 7, 2, 5, 4 }, // 7 - { 0, 2,18,17,-2, 2,18, 3, 2, 5,-3, 4, 7,18, 3, 8, 6, 2 }, // 8 - { 1, 1, 3, 6, 5, 5, 1, 3, 7, 4, 2, 6, 4,18, 3, 7, 5, 6 }, // 9 - { 1, 2, 1,18, 3, 2,-2, 1, 5, 4, 6, 2, 7, 1, 8, 3,-1, 1 }, // 10 - { 0, 1,18,18, 2, 3, 6, 3, 5,-2, 2, 4,18, 3,-2,-1, 6, 7 }, // 11 - { 0, 1,-2,18, 2,18, 7, 2, 6,-2, 3, 4,18,18, 2,-3, 8, 5 }, // 12 - { 0, 2,18,18,18, 2, 4, 3,18, 5, 3, 6,-2, 2, 4,18, 8, 7 }, // 13 - { 0, 1,-2, 1,18, 2,-2,18,-1, 5, 7, 2, 3, 4,18, 2, 6, 2 }, // 14 - { 1, 1,17,18, 3, 2, 1, 7,-1, 2, 4, 3, 5, 6,-2,18, 7, 8 }, // 15 - { 1, 1,18,18, 2,18, 3, 4, 6,-2,18, 5, 8, 2, 3, 7, 4,-1 }, // 16 - { 0, 1,18,18,18,-1, 2, 3, 4, 6, 8,18, 3, 5, 2, 6, 7, 4 }, // 17 - { 1, 1,17,-2,18,18, 2, 5, 3, 8, 2,-1, 6, 1, 3, 4, 7, 5 }, // 18 - { 0, 1,17,17,18, 2, 3, 6,-2, 8, 1, 7, 5, 2, 3, 1, 4, 8 }, // 19 - { 1, 1,17,17, 3, 2, 7, 1, 4, 3, 6, 2, 5,-2, 8, 7,18, 6 }, // 20 - { 0, 1,18,17,-2, 2,18, 3,-3, 7, 6, 5, 2, 4,-1, 8, 3,17 }, // 21 - { 1, 1, 2,18,18,-2, 2, 4,-1, 5,18, 3, 8, 6, 2, 7,17, 4 }, // 22 - { 0, 1,17, 3, 6, 8, 5, 4, 3, 8, 1,18, 7, 2, 4, 5, 6, 3 }, // 23 - { 1, 2,17,18, 4, 8, 3, 2, 5, 7, 6, 8, 2, 7,-2,18, 3, 4 }, // 24 - { 1, 1, 6, 5, 5, 3, 4, 7, 3, 2, 4, 6, 3, 7, 1, 5, 2, 4 }, // 25 - { 1, 1, 1,18,-1, 2, 1, 3, 8,-2, 2, 5, 6, 3, 8, 7,18, 4 }, // 26 - { 0, 1, 1,17,-1,18, 3, 2, 5, 4, 6, 7, 8, 3, 4, 2, 1,-2 }, // 27 - { 0, 1,18, 2,18,18, 2,18, 6,-2,18, 7, 5, 4, 3, 2,18,-2 }, // 28 - { 0, 3, 1, 4,18, 3, 2, 4, 1, 5, 2, 3, 6,18, 8, 7, 2, 4 }, // 29 - { 0, 1,17,-2, 1,-3, 2,18, 3,-2, 4,18, 3, 6, 7,-3, 2, 8 }, // 30 - { 1, 1,17,18,18, 4, 2, 3, 7, 6,18, 8, 5,-1, 4, 2, 3,17 }, // 31 - { 1, 2,18,-1,17,18, 2, 3,-2,18, 5, 8, 2, 4, 3, 7, 6,-1 }, // 32 - { 1, 1,18,18,18,-2, 4, 2, 3,18, 5, 8, 2, 4, 6, 7,-2, 3 }, // 33 - { 1, 2,18,18,-2,18,-1, 3, 2, 5,18,-2, 7, 2, 3, 4, 6, 8 }, // 34 - { 0, 1,17,18,-1, 2, 4,18, 8, 3, 6, 5, 7,-3, 2, 4, 3,17 }, // 35 - { 1, 1,18,18,17, 2,-1,18, 3, 2,18, 6, 5, 4,18, 7, 2,-1 }, // 36 - { 0, 2, 1,18,-1,18, 3, 2, 4, 6,-3, 7,-1, 5, 1, 2, 3, 8 }, // 37 - { 1, 1, 1,17,-2, 2,-3, 6, 3, 5, 1, 2, 7, 6, 8,-2, 4, 1 }, // 38 - { 0, 1,17,-1, 5, 1, 4, 3, 6, 2,-2,18, 3, 2, 4, 5, 8,-1 }, // 39 - { 0, 2,18,18,17, 2, 3,-2, 5,18, 2, 4, 7, 8, 6,17, 3, 5 }, // 40 - { 1, 1, 1, 5, 1, 3, 4, 3, 7, 5, 1, 3, 6, 1, 2, 4, 3, 8 }, // 41 - { 1, 2, 1,-1, 3, 2,18, 7,-2, 5, 2, 6, 4, 3,-1,18, 8, 7 }, // 42 - { 0, 2,18,17, 3,18, 2, 5, 4, 3, 6, 2, 7, 8,18, 3, 4, 5 }, // 43 - { 1, 1, 3, 6,17, 8, 7, 5,18,-1, 1, 2, 3, 4, 2, 6, 8, 1 }, // 44 - { 0, 2,18,18, 3,-3,18, 2, 6, 5, 3, 7,18, 4,-2, 8, 2, 3 }, // 45 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 46 - { 1, 1,17, 1, 7, 2, 3,18,-2, 3, 6, 4, 2, 7, 8, 5, 3,17 }, // 47 - { 1, 1, 3, 6, 5, 5, 1, 3, 7, 4, 2, 6, 4,18, 3, 7, 5, 6 }, // 48 - { 0, 1,18,18,18, 2, 4,-1,18, 8,-1, 2, 3, 4, 6,-2, 1, 7 }, // 49 - { 1, 1,18,-2,17,18, 2, 6, 3,-2, 5, 4, 7, 1,-3, 8, 2, 6 }, // 50 - { 0, 1,17,18,18, 4, 2, 7, 3, 6,-2,18, 8, 4, 5, 2, 7,17 }, // 51 - { 1, 1,18,18, 5, 4, 6, 4, 1, 5, 4, 3, 2, 5, 6, 1, 4, 5 }, // 52 - { 0, 1,18,18,-2,18, 2,-3, 3, 8, 5,18, 6, 4, 3,-1, 7, 2 }, // 53 - { 1, 1,18, 2,-2,-3,18, 5, 2, 3,-2, 4, 6, 1,-3, 2, 7, 8 }, // 54 - { 0, 1,18, 3, 5, 8, 2, 6, 7, 3, 1, 5, 2,-1, 8, 6, 7, 4 }, // 55 - { 1, 1, 4, 3, 8, 1, 5, 6, 2, 5, 8,-2, 2, 7, 3,18, 5, 4 }, // 56 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 57 - { 1, 1,17, 3,18,18, 7, 2, 4,18, 6, 2, 3,-1, 8, 5,18,-3 }, // 58 - { 0, 1, 3,17,18, 2,18, 6, 7,-3,18, 2, 5, 6, 3, 8, 7,-1 }, // 59 - { 1, 1,18,18, 2,18,18, 2,-1, 7, 3,18, 5, 2, 6, 4,-1,18 }, // 60 - { 0, 3,18, 3, 4, 1, 5, 2,18, 4, 2, 3,18, 7, 6, 1, 2, 4 }, // 61 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 62 - { 1, 1,17, 1,18, 2, 3, 6, 4, 5, 7,18, 3, 8, 2, 4,-2,17 }, // 63 - { 1, 2,18,17, 2, 3, 5,18, 6,-2, 7, 3, 2, 4,18, 8,-1, 5 }, // 64 - { 0, 2, 1,18,-1,18, 3, 2, 4, 6,-3, 7,-1, 5, 1, 2, 3, 8 }, // 65 - { 1, 1, 1,18,-1, 8, 2, 6, 3,-2, 1, 2, 5, 4,-3, 8, 6, 3 }, // 66 - { 0, 1,18,18, 2,18, 2,18, 7, 6,18, 2,-2, 3, 5, 4,18, 8 }, // 67 - { 1, 2,18,17, 2, 3,18,-1, 2, 3, 6,18, 5, 4, 3, 7, 2, 8 }, // 68 - { 1, 2,18,18, 3,-2, 4,18, 5, 7, 6, 2, 4,-3, 8, 5,18, 3 }, // 69 - { 1, 1,17,-2,18,18, 2, 5, 3, 8, 2,-1, 6, 1, 3, 4, 7, 5 }, // 70 - { 1, 1, 3,17,18, 5, 7, 2, 4, 6, 1, 8,-1, 3, 7, 4, 1, 2 }, // 71 - { 0, 2, 1,-2, 2,18, 3, 5, 2, 4, 7,-1, 2, 3, 5,18,-2, 4 }, // 72 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 73 - { 1, 1, 1, 2,-2, 6,18,-3, 2, 7, 3,-2, 5, 6, 1, 8, 2, 4 }, // 74 - { 0, 1,18,18,18, 3,-2, 6,18, 2, 4, 3, 5, 8, 7, 6, 2,-2 }, // 75 - { 1, 1, 1, 5, 1, 3, 4, 3, 7, 5, 1, 3, 6, 1, 2, 4, 3, 8 }, // 76 - { 0, 1, 3,17,18, 2, 5,18, 6, 7, 5,-2, 2, 4,18, 3, 6, 8 }, // 77 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 78 - { 0, 2,17,-1,18, 2, 4,-1, 8, 3,18, 7,-3, 4, 5, 1, 2,-2 }, // 79 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 8, 6, 4, 5, 7,-1 }, // 80 - { 1, 1,18,18, 3, 6, 4, 8,-2, 2, 5, 3, 7,18, 6, 8, 4, 2 }, // 81 - { 1, 1,17,18,18,-2, 5, 2, 3, 1, 4,-1, 8, 6, 5, 3, 2,18 }, // 82 - { 1, 1,17,17, 1, 2, 4, 5, 2, 6,-1, 3, 1, 1,-2, 4, 2, 7 }, // 83 - { 1, 1,17, 1, 7, 2, 3,18,-2, 3, 6, 4, 2, 7, 8, 5, 3,17 }, // 84 - { 0, 1,18,17,-2,-3, 1, 2, 3, 2, 5, 4, 7,-3, 6,-2, 2, 1 }, // 85 - { 1, 1, 1, 3, 5,18, 1, 2, 7, 3, 6, 2, 5, 8,-1, 1, 4, 7 }, // 86 - { 1, 1,17, 3, 6, 8, 1, 4, 5, 3,-2, 7, 2, 8, 5, 6,18, 3 }, // 87 - { 1, 1,17,18, 2, 4, 8,-2, 3, 1, 5, 6, 7, 1, 2, 3, 4, 7 }, // 88 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 89 - { 1, 1, 3, 1, 8,18, 5, 2, 3,18, 6, 7,-2, 4, 3, 2, 8,18 }, // 90 - { 0, 1,18,17, 2,18, 3, 4,-1,18, 7, 6, 2, 8, 4,18,18, 5 }, // 91 - { 0, 1,18,18, 2,18,18, 2, 7,-2, 6, 5, 4, 3,18, 3, 2,17 }, // 92 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 93 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 94 - { 1, 1,17, 8,18, 3, 2, 1, 5, 4, 6,-1, 3,-3, 8,18, 7, 2 }, // 95 - { 1, 2,18,17,18, 2, 3, 5,-2,18, 6,-1, 2, 3, 7, 4, 8,17 }, // 96 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 8, 6, 4, 5, 7,-1 }, // 97 - { 1, 2,18,18,-2,17, 2,18, 3, 4,18, 8, 7,-1, 2, 4, 5,17 }, // 98 - { 0, 2,17,-3,17, 3, 2,-2,18, 8, 4,-3, 2,18, 5, 3,-2, 6 }, // 99 - { 0, 1,18,18, 2,18,18, 2, 7,-2, 6, 5, 4, 3,18, 3, 2,17 }, // 100 - { 0, 2, 1,18,-1, 3, 5, 2,-3,18, 7, 3,-1, 6, 4, 2,17, 5 }, // 101 - { 1, 1,17,-2,17, 2,-3, 1, 5,-1, 4, 6, 3, 2, 8, 7,-2, 5 }, // 102 - { 1, 1, 1,18, 1, 3, 5, 8, 6, 2, 3,-1, 7, 1, 4, 8, 5,-3 }, // 103 - { 0, 2, 3,18,18, 2,18,-2, 6, 5, 7, 2, 4,18, 3, 6,-3, 5 }, // 104 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 105 - { 1, 1, 3, 6,17, 8, 7, 5,18,-1, 1, 2, 3, 4, 2, 6, 8, 1 }, // 106 - { 0, 4,18, 2,17, 3,18,-2, 2, 6,18, 2, 7, 3, 5, 4, 8,18 }, // 107 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 108 - { 0, 1,18,18, 2, 3, 6, 3, 5,-2, 2, 4,18, 3,-2,-1, 6, 7 }, // 109 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 110 - { 1, 1,17, 1, 2, 5, 3,-2, 1, 4, 3, 7, 6,-3, 2, 1, 1, 2 }, // 111 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 112 - { 1, 1,18,18,-2,18,-2, 2, 3, 6,18, 4,-1, 2, 3, 8, 1, 4 }, // 113 - { 1, 1,17,-2,17, 2,-3, 1, 5,-1, 4, 6, 3, 2, 8, 7,-2, 5 }, // 114 - { 0, 1,17,17,18, 3, 2,18,18, 6, 8, 2,-2, 3, 5, 4,17,18 }, // 115 - { 1, 1, 1, 5, 1, 3, 4, 3, 7, 5, 1, 3, 6, 1, 2, 4, 3, 8 }, // 116 - { 1, 1, 1, 3,-3,18,18, 6, 5,18, 2,-1, 3, 8, 7,-3, 4,17 }, // 117 - { 1, 1,18, 1, 2, 1, 3, 8, 7, 4, 1, 5, 2,-1,-3,18, 6, 2 }, // 118 - { 0, 1,18, 3, 5, 2, 6, 8,18, 5, 7, 2, 3,-1, 6, 7, 8, 5 }, // 119 - { 0, 2,18, 3,-2, 7, 8, 2, 5, 4,-3, 8, 3, 2,18, 5, 4, 6 }, // 120 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 121 - { 1, 3, 1, 1, 2, 5, 2, 7, 4, 3,-1,18,-2, 8, 2, 1, 6, 7 }, // 122 - { 0, 1, 3,17,18, 5, 2, 6, 7,18, 4, 5, 3, 6,18, 2, 7, 8 }, // 123 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 124 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 125 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 126 - { 0, 1, 1,18, 1, 2, 3, 5, 1, 2, 6, 7, 4, 3, 8, 1,17, 5 }, // 127 - { 1, 2,17,-1,18,-2, 2, 3, 5,18, 2, 4, 6, 7, 3,-1, 5, 8 }, // 128 - { 1, 1,18,18,-3,18,-2, 2, 3,-2,18, 6, 4, 5, 8, 3,17,-3 }, // 129 - { 1, 1,18, 7, 6, 5, 5, 3, 1, 4, 2, 7, 3, 4,-3, 6,18, 8 }, // 130 - { 0, 2,18,18, 2, 3, 5,18, 2, 4, 3, 6,18, 7, 8,-1, 5, 2 }, // 131 - { 0, 1,18,17,-1, 2,18, 3, 2,18, 4, 3,18, 2, 6, 5, 8,17 }, // 132 - { 0, 2,18,17, 2, 3,18, 5,-1, 6, 7, 8, 2, 3, 4, 5,18, 6 }, // 133 - { 1, 2,18,-3,18, 2, 3,-2,-3, 5,18, 7, 6, 2, 4, 3, 8,-2 }, // 134 - { 1, 1,17,18,18,-2, 2, 3, 5, 4, 8,18,-1, 5, 3, 6,-2, 7 }, // 135 - { 1, 2,18,17, 2,-2,18, 3,-1, 4,18, 2, 7, 5, 3, 8, 6, 4 }, // 136 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 137 - { 1, 1, 1, 5, 1, 3, 4, 3, 7, 5, 1, 3, 6, 1, 2, 4, 3, 8 }, // 138 - { 0, 2,18,18, 3, 3,-2, 2, 5,18, 6, 3,-1, 4, 7,-1, 1, 2 }, // 139 - { 0, 1,-2, 1,18, 2,-2, 5, 7,18, 3, 2, 6, 2,-1, 4,-2,17 }, // 140 - { 0, 2,18,18,18, 2, 3,-2,18, 5, 4, 2, 6, 8, 3,-2, 4,18 }, // 141 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 142 - { 1, 1,17,18,-1, 3, 2, 5, 1, 3, 2, 8, 4, 7, 6, 2,-1, 5 }, // 143 - { 1, 1,17,18,18, 4, 2, 3, 7, 6,18, 8, 5,-1, 4, 2, 3,17 }, // 144 - { 0, 1,18,18,-2,18, 2, 3, 4, 5, 6,18, 8, 2, 3, 7,-2, 4 }, // 145 - { 0, 1,18,-2,18,18,-3,-2, 2, 3, 5, 8, 1, 2, 6, 4, 7,-1 }, // 146 - { 0, 1,18,17, 2,18, 3,-2, 2, 7, 6, 4,18, 3, 8, 7, 4, 2 }, // 147 - { 1, 1,17,18,18, 4, 2, 3, 7, 6,18, 8, 5,-1, 4, 2, 3,17 }, // 148 - { 1, 1,18,17,18, 2, 5, 3,-2,18, 6, 2, 3, 4, 8, 7, 5,-1 }, // 149 - { 0, 1, 2,-1,18,-1, 2, 4,-3,18, 5, 3, 6,18, 2, 4, 7, 8 }, // 150 - { 1, 1,17,18, 8, 3, 6, 4,-1, 5, 2, 7, 3, 8, 6, 5,18, 4 }, // 151 - { 0, 2,18, 3,-2, 7, 8, 2, 5, 4,-3, 8, 3, 2,18, 5, 4, 6 }, // 152 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 153 - { 1, 1, 1,18,-1, 8, 2, 6, 3,-2, 1, 2, 5, 4,-3, 8, 6, 3 }, // 154 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 155 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 156 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 157 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 158 - { 0, 1,17,18,18, 4, 2, 7, 3, 6,-2,18, 8, 4, 5, 2, 7,17 }, // 159 - { 1, 2,18,-1,18, 3,-2,18, 2, 5, 3, 6, 7, 2,-1,18, 8, 4 }, // 160 - { 1, 2, 1,18,-2, 4,18, 2, 3, 6,-1, 7, 5,-2,18, 8, 2, 4 }, // 161 - { 1, 2, 1,18,-3, 2, 3,18,-1, 5, 6, 2, 8, 3, 4, 1,-2, 7 }, // 162 - { 0, 1, 1,17,-1,18, 3, 2, 5, 4, 6, 7, 8, 3, 4, 2, 1,-2 }, // 163 - { 1, 1,18,17,18, 4, 3, 5, 1, 2, 6, 3, 4, 7, 1, 8, 5, 2 }, // 164 - { 0, 1,18,-2, 7, 1, 3, 2,-3, 4, 6,-2, 7, 8, 1, 5, 4, 3 }, // 165 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 166 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 167 - { 0, 2,18,18,18,-2, 2, 5, 3, 7,18, 2, 4,-3, 5, 6, 3, 8 }, // 168 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 169 - { 0, 3, 3,18,-1, 5, 2, 7,18, 6, 5, 2, 4, 3,-1, 7,18, 6 }, // 170 - { 0, 2,18,18,18, 4, 3, 2, 6, 4, 8,18, 5, 3, 2, 7,-2, 6 }, // 171 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 172 - { 0, 2,18,18,18, 2, 3,-2,18, 5, 4, 2, 6, 8, 3,-2, 4,18 }, // 173 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 174 - { 1, 1,17, 8,18, 3, 2, 1, 5, 4, 6,-1, 3,-3, 8,18, 7, 2 }, // 175 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 176 - { 0, 1,-1,18,18,18, 2, 4, 6,-2, 2, 8, 3, 4,18, 7,-1, 6 }, // 177 - { 0, 1,18, 1,-2, 2, 4, 1, 3,-1, 2, 5, 7, 1, 6, 8,-2,17 }, // 178 - { 0, 1,17,17,18, 2, 5, 4,18, 3, 8, 7, 4, 6, 8, 1, 5, 2 }, // 179 - { 1, 2,18,18, 5, 4, 6, 3, 4,18, 8, 4,-1, 7, 5, 3, 6, 2 }, // 180 - { 0, 1,18,18,-3,18, 3, 6, 2, 5, 7,18, 3, 8,-1, 4, 5, 2 }, // 181 - { 1, 1,18, 2,-2,-3,18, 5, 2,-2, 4, 3, 6,18, 8,-1, 2, 7 }, // 182 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 183 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 184 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 185 - { 1, 1,17, 1, 7, 2, 3,18,-2, 3, 6, 4, 2, 7, 8, 5, 3,17 }, // 186 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 187 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 188 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 189 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 190 - { 0, 1,17,18, 3,18, 2, 5, 4, 7,-3, 6, 3, 2,18, 4, 7, 3 }, // 191 - { 1, 1, 1, 7, 4, 5, 3, 4, 5, 1, 3, 6, 3, 2, 4, 8,-2, 7 }, // 192 - { 0, 1, 1,18,-1,-2,18, 3, 2,-1, 6, 7, 4, 5, 3,18, 2,-3 }, // 193 - { 1, 1,18,18,-1, 3, 6,18, 5, 4, 8, 2, 3, 6,18, 7, 4,-2 }, // 194 - { 0, 2,18,18, 2, 6,18, 2,18, 5, 3,18, 2, 4, 7, 8, 3,18 }, // 195 - { 1, 1, 3,18,18, 5,18, 6, 2, 4, 7,-2,18, 5, 8, 6, 3, 2 }, // 196 - { 0, 1,18,-2, 7, 1, 3, 2,-3, 4, 6,-2, 7, 8, 1, 5, 4, 3 }, // 197 - { 1, 1,18,-2,18, 2, 5,18, 3,-2, 4, 7, 2,-1, 8, 6, 5, 1 }, // 198 - { 1, 1,17,17, 5,18, 4, 1, 2, 8, 6, 4,-2, 3, 5,-1, 1, 8 }, // 199 - { 0, 2, 1, 2,17, 3, 7,18, 2,-1, 4, 5,18, 2, 7, 3, 6, 8 }, // 200 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 201 - { 1, 1, 3, 6,17, 8, 7, 5,18,-1, 1, 2, 3, 4, 2, 6, 8, 1 }, // 202 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 203 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 204 - { 0, 2,18,18,18, 2,-2, 3, 6, 4, 8,18, 2, 5, 7, 4, 3, 6 }, // 205 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 206 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 207 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 208 - { 1, 1,18, 1, 8, 3, 5, 6, 4,-1, 8, 3, 7,18, 2, 5, 8, 4 }, // 209 - { 1, 1,17,18, 5, 2, 4, 3, 1, 6,-2, 1, 3, 2, 4, 5,-1,17 }, // 210 - { 1, 1,18,17, 2,18, 3,-3, 7, 2, 6, 4, 3, 5,18, 8, 2,-2 }, // 211 - { 1, 1,18,17,18, 4, 3, 5,-1,18, 2, 7, 8, 4, 6, 3,18, 5 }, // 212 - { 0, 1,18,17,18,-2, 2,-3, 3, 4, 8, 5, 2,18, 6, 3, 7,-2 }, // 213 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 214 - { 1, 1,17,18, 8, 3, 4, 6,18, 5,-2, 3, 8, 5, 2, 4, 7, 6 }, // 215 - { 0, 1,18,-2, 3, 5, 1, 7, 3, 2, 6,-3, 4, 1, 5, 8, 3,-2 }, // 216 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 217 - { 1, 1, 3,17,18, 5,-1,18, 2, 6, 7,18, 5, 3,-3,-1, 6, 2 }, // 218 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 219 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 220 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 221 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 222 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 223 - { 1, 3,18,17,-2, 3,-1,18, 2, 5, 3, 7, 6, 2, 4, 8,18, 5 }, // 224 - { 0, 1,18,-1,18, 2,18, 3, 5,18, 2, 8,18, 5, 4,-1, 6, 2 }, // 225 - { 1, 2,18,-2,18,18, 2, 3, 4,-3, 2, 5,18, 7, 4, 3, 8, 6 }, // 226 - { 0, 2,17,-1,18, 2,-1, 1, 7, 3, 8, 5,-2, 4, 1, 2,-3, 6 }, // 227 - { 0, 1,18,17, 2,18, 2,18, 6, 7, 4, 3,18, 5, 2,-2,17, 8 }, // 228 - { 0, 3,18,17, 2, 3,-3,-1,18, 2, 4, 5,18, 7, 3, 2,-3, 6 }, // 229 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 230 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 231 - { 0, 2, 3,18,18,18, 2, 6, 5,18, 7, 2, 4, 6,18, 5, 3, 8 }, // 232 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 233 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 234 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 235 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 236 - { 0, 1,18,18, 3, 6, 3,-2, 2,18, 5,-1, 7, 3, 4,-2, 2, 6 }, // 237 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 238 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 239 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 240 - { 1, 1,18,17,18,18,-2, 2, 3,-3,18, 6, 4, 2,-2, 8, 3, 7 }, // 241 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 242 - { 0, 1,18,18,18, 4, 2, 7, 8,18, 3, 2,-2, 4, 7, 6,17, 5 }, // 243 - { 1, 1,18,18,-1,-2, 8, 3,18, 6, 3, 5, 8, 2, 4, 7, 1, 6 }, // 244 - { 1, 1, 1,-3, 3,18,18, 2,-1, 3, 6, 5,18, 4, 7,-2, 8, 3 }, // 245 - { 1, 1, 1,18, 4, 2, 5,18, 1, 3,-1, 6, 1, 4, 8, 2, 5, 1 }, // 246 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 247 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 248 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 249 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 250 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 251 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 252 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 253 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 254 - { 0, 1,-1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 }, // 255 -}; - -#define NUM_FAST_SPECS (sizeof (fast_specs) / sizeof (fast_specs [0])) -#define NUM_DEFAULT_SPECS (sizeof (default_specs) / sizeof (default_specs [0])) -#define NUM_HIGH_SPECS (sizeof (high_specs) / sizeof (high_specs [0])) -#define NUM_VERY_HIGH_SPECS (sizeof (very_high_specs) / sizeof (very_high_specs [0])) +#include "wavpack_local.h" +#include "decorr_tables.h" // contains data, only include from this module! ///////////////////////////// executable code //////////////////////////////// @@ -1093,6 +35,10 @@ void pack_init (WavpackContext *wpc) CLEAR (wps->decorr_passes); CLEAR (wps->dc); +#ifdef SKIP_DECORRELATION + wpc->config.xmode = 0; +#endif + /* although we set the term and delta values here for clarity, they're * actually hardcoded in the analysis function for speed */ @@ -1103,7 +49,7 @@ void pack_init (WavpackContext *wpc) if (wpc->config.flags & CONFIG_AUTO_SHAPING) { if (wpc->config.flags & CONFIG_OPTIMIZE_WVC) - wps->dc.shaping_acc [0] = wps->dc.shaping_acc [1] = -512L << 16; + wps->dc.shaping_acc [0] = wps->dc.shaping_acc [1] = -(512L << 16); else if (wpc->config.sample_rate >= 64000) wps->dc.shaping_acc [0] = wps->dc.shaping_acc [1] = 1024L << 16; else @@ -1154,7 +100,7 @@ void pack_init (WavpackContext *wpc) // array into the specified metadata structure. Both the actual term id and // the delta are packed into single characters. -void write_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd) +static void write_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd) { int tcount = wps->num_terms; struct decorr_pass *dpp; @@ -1174,7 +120,7 @@ void write_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd) // range +/-1024, but are rounded and truncated to fit in signed chars for // metadata storage. Weights are separate for the two channels -void write_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd) +static void write_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd) { struct decorr_pass *dpp = wps->decorr_passes; int tcount = wps->num_terms, i; @@ -1215,7 +161,7 @@ void write_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd) // sending more than the first term's samples is a waste. The "wcount" // variable can be set to the number of terms to have their samples stored. -void write_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd) +static void write_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd) { int tcount = wps->num_terms, wcount = 1, temp; struct decorr_pass *dpp; @@ -1227,27 +173,27 @@ void write_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd) for (dpp = wps->decorr_passes; tcount--; ++dpp) if (wcount) { if (dpp->term > MAX_TERM) { - dpp->samples_A [0] = exp2s (temp = log2s (dpp->samples_A [0])); + dpp->samples_A [0] = wp_exp2s (temp = wp_log2s (dpp->samples_A [0])); *byteptr++ = temp; *byteptr++ = temp >> 8; - dpp->samples_A [1] = exp2s (temp = log2s (dpp->samples_A [1])); + dpp->samples_A [1] = wp_exp2s (temp = wp_log2s (dpp->samples_A [1])); *byteptr++ = temp; *byteptr++ = temp >> 8; if (!(wps->wphdr.flags & MONO_DATA)) { - dpp->samples_B [0] = exp2s (temp = log2s (dpp->samples_B [0])); + dpp->samples_B [0] = wp_exp2s (temp = wp_log2s (dpp->samples_B [0])); *byteptr++ = temp; *byteptr++ = temp >> 8; - dpp->samples_B [1] = exp2s (temp = log2s (dpp->samples_B [1])); + dpp->samples_B [1] = wp_exp2s (temp = wp_log2s (dpp->samples_B [1])); *byteptr++ = temp; *byteptr++ = temp >> 8; } } else if (dpp->term < 0) { - dpp->samples_A [0] = exp2s (temp = log2s (dpp->samples_A [0])); + dpp->samples_A [0] = wp_exp2s (temp = wp_log2s (dpp->samples_A [0])); *byteptr++ = temp; *byteptr++ = temp >> 8; - dpp->samples_B [0] = exp2s (temp = log2s (dpp->samples_B [0])); + dpp->samples_B [0] = wp_exp2s (temp = wp_log2s (dpp->samples_B [0])); *byteptr++ = temp; *byteptr++ = temp >> 8; } @@ -1255,12 +201,12 @@ void write_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd) int m = 0, cnt = dpp->term; while (cnt--) { - dpp->samples_A [m] = exp2s (temp = log2s (dpp->samples_A [m])); + dpp->samples_A [m] = wp_exp2s (temp = wp_log2s (dpp->samples_A [m])); *byteptr++ = temp; *byteptr++ = temp >> 8; if (!(wps->wphdr.flags & MONO_DATA)) { - dpp->samples_B [m] = exp2s (temp = log2s (dpp->samples_B [m])); + dpp->samples_B [m] = wp_exp2s (temp = wp_log2s (dpp->samples_B [m])); *byteptr++ = temp; *byteptr++ = temp >> 8; } @@ -1285,7 +231,7 @@ void write_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd) // hybrid data. The "delta" parameter is not yet used in encoding as it // will be part of the "quality" mode. -void write_shaping_info (WavpackStream *wps, WavpackMetadata *wpmd) +static void write_shaping_info (WavpackStream *wps, WavpackMetadata *wpmd) { char *byteptr; int temp; @@ -1293,29 +239,29 @@ void write_shaping_info (WavpackStream *wps, WavpackMetadata *wpmd) byteptr = wpmd->data = malloc (12); wpmd->id = ID_SHAPING_WEIGHTS; - wps->dc.error [0] = exp2s (temp = log2s (wps->dc.error [0])); + wps->dc.error [0] = wp_exp2s (temp = wp_log2s (wps->dc.error [0])); *byteptr++ = temp; *byteptr++ = temp >> 8; - wps->dc.shaping_acc [0] = exp2s (temp = log2s (wps->dc.shaping_acc [0])); + wps->dc.shaping_acc [0] = wp_exp2s (temp = wp_log2s (wps->dc.shaping_acc [0])); *byteptr++ = temp; *byteptr++ = temp >> 8; if (!(wps->wphdr.flags & MONO_DATA)) { - wps->dc.error [1] = exp2s (temp = log2s (wps->dc.error [1])); + wps->dc.error [1] = wp_exp2s (temp = wp_log2s (wps->dc.error [1])); *byteptr++ = temp; *byteptr++ = temp >> 8; - wps->dc.shaping_acc [1] = exp2s (temp = log2s (wps->dc.shaping_acc [1])); + wps->dc.shaping_acc [1] = wp_exp2s (temp = wp_log2s (wps->dc.shaping_acc [1])); *byteptr++ = temp; *byteptr++ = temp >> 8; } if (wps->dc.shaping_delta [0] | wps->dc.shaping_delta [1]) { - wps->dc.shaping_delta [0] = exp2s (temp = log2s (wps->dc.shaping_delta [0])); + wps->dc.shaping_delta [0] = wp_exp2s (temp = wp_log2s (wps->dc.shaping_delta [0])); *byteptr++ = temp; *byteptr++ = temp >> 8; if (!(wps->wphdr.flags & MONO_DATA)) { - wps->dc.shaping_delta [1] = exp2s (temp = log2s (wps->dc.shaping_delta [1])); + wps->dc.shaping_delta [1] = wp_exp2s (temp = wp_log2s (wps->dc.shaping_delta [1])); *byteptr++ = temp; *byteptr++ = temp >> 8; } @@ -1329,7 +275,7 @@ void write_shaping_info (WavpackStream *wps, WavpackMetadata *wpmd) // than 24 bits of magnitude or, in some cases, it's used to eliminate // redundant bits from any audio stream. -void write_int32_info (WavpackStream *wps, WavpackMetadata *wpmd) +static void write_int32_info (WavpackStream *wps, WavpackMetadata *wpmd) { char *byteptr; @@ -1342,29 +288,43 @@ void write_int32_info (WavpackStream *wps, WavpackMetadata *wpmd) wpmd->byte_length = (int32_t)(byteptr - (char *) wpmd->data); } +static void write_float_info (WavpackStream *wps, WavpackMetadata *wpmd) +{ + char *byteptr; + + byteptr = wpmd->data = malloc (4); + wpmd->id = ID_FLOAT_INFO; + *byteptr++ = wps->float_flags; + *byteptr++ = wps->float_shift; + *byteptr++ = wps->float_max_exp; + *byteptr++ = wps->float_norm_exp; + wpmd->byte_length = (int32_t)(byteptr - (char *) wpmd->data); +} + // Allocate room for and copy the multichannel information into the specified // metadata structure. The first byte is the total number of channels and the // following bytes represent the channel_mask as described for Microsoft // WAVEFORMATEX. -void write_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd) +static void write_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd) { uint32_t mask = wpc->config.channel_mask; - char *byteptr; + char *byteptr = wpmd->data = malloc (8); - if (wpc->num_streams > OLD_MAX_STREAMS) { - byteptr = wpmd->data = malloc (6); - wpmd->id = ID_CHANNEL_INFO; - *byteptr++ = wpc->config.num_channels - 1; - *byteptr++ = wpc->num_streams - 1; + wpmd->id = ID_CHANNEL_INFO; + + if (wpc->num_streams > OLD_MAX_STREAMS) { // if > 8 streams, use 6 or 7 bytes (breaks old decoders + *byteptr++ = wpc->config.num_channels - 1; // that could only handle 8 streams) and allow (in theory) + *byteptr++ = wpc->num_streams - 1; // up to 4096 channels *byteptr++ = (((wpc->num_streams - 1) >> 4) & 0xf0) | (((wpc->config.num_channels - 1) >> 8) & 0xf); *byteptr++ = mask; *byteptr++ = (mask >> 8); *byteptr++ = (mask >> 16); + + if (mask & 0xff000000) // this will break versions < 5.0, but is RF64-specific + *byteptr++ = (mask >> 24); } - else { - byteptr = wpmd->data = malloc (4); - wpmd->id = ID_CHANNEL_INFO; + else { // otherwise use only 1 to 5 bytes *byteptr++ = wpc->config.num_channels; while (mask) { @@ -1376,17 +336,30 @@ void write_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd) wpmd->byte_length = (int32_t)(byteptr - (char *) wpmd->data); } +// Allocate room for and copy the multichannel identities into the specified +// metadata structure. Data is an array of unsigned characters representing +// any channels in the file that DO NOT match one the 18 Microsoft standard +// channels (and are represented in the channel mask). A value of 0 is not +// allowed and 0xff means an unknown or undefined channel identity. + +static void write_channel_identities_info (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + wpmd->byte_length = (int) strlen ((char *) wpc->channel_identities); + wpmd->data = strdup ((char *) wpc->channel_identities); + wpmd->id = ID_CHANNEL_IDENTITIES; +} + // Allocate room for and copy the configuration information into the specified // metadata structure. Currently, we just store the upper 3 bytes of // config.flags and only in the first block of audio data. Note that this is // for informational purposes not required for playback or decoding (like // whether high or fast mode was specified). -void write_config_info (WavpackContext *wpc, WavpackMetadata *wpmd) +static void write_config_info (WavpackContext *wpc, WavpackMetadata *wpmd) { char *byteptr; - byteptr = wpmd->data = malloc (4); + byteptr = wpmd->data = malloc (8); wpmd->id = ID_CONFIG_BLOCK; *byteptr++ = (char) (wpc->config.flags >> 8); *byteptr++ = (char) (wpc->config.flags >> 16); @@ -1395,16 +368,64 @@ void write_config_info (WavpackContext *wpc, WavpackMetadata *wpmd) if (wpc->config.flags & CONFIG_EXTRA_MODE) *byteptr++ = (char) wpc->config.xmode; + // for the 5.0.0 alpha, we wrote the qmode flags here, but this + // has been replaced with the new_config block + // *byteptr++ = (char) wpc->config.qmode; + wpmd->byte_length = (int32_t)(byteptr - (char *) wpmd->data); } -// Allocate room for and copy the non-standard sampling rateinto the specified -// metadata structure. We just store the lower 3 bytes of the sampling rate. -// Note that this would only be used when the sampling rate was not included -// in the table of 15 "standard" values. +// Allocate room for and copy the "new" configuration information into the +// specified metadata structure. This is all the stuff introduced with version +// 5.0 and includes the qmode flags (big-endian, etc.) and CAF extended +// channel layouts (including optional reordering). Even if there is no new +// configuration, we still send the empty metadata block to signal a 5.0 file. -void write_sample_rate (WavpackContext *wpc, WavpackMetadata *wpmd) +static void write_new_config_info (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + char *byteptr = wpmd->data = malloc (260); + wpmd->id = ID_NEW_CONFIG_BLOCK; + + if (wpc->file_format || (wpc->config.qmode & 0xff) || wpc->channel_layout) { + *byteptr++ = (char) wpc->file_format; + *byteptr++ = (char) wpc->config.qmode; + + if (wpc->channel_layout) { + int nchans = wpc->channel_layout & 0xff; + + *byteptr++ = (char) ((wpc->channel_layout & 0xff0000) >> 16); + + if (wpc->channel_reordering || nchans != wpc->config.num_channels) + *byteptr++ = (char) nchans; + + if (wpc->channel_reordering) { + int i, num_to_send = 0; + + // to save space, don't send redundant reorder string bytes + + for (i = 0; i < nchans; ++i) + if (wpc->channel_reordering [i] != i) + num_to_send = i + 1; + + if (num_to_send) { + memcpy (byteptr, wpc->channel_reordering, num_to_send); + byteptr += num_to_send; + } + } + } + } + + wpmd->byte_length = (int32_t)(byteptr - (char *) wpmd->data); +} + +// Allocate room for and copy the non-standard sampling rate into the specified +// metadata structure. We normally store the lower 3 bytes of the sampling rate, +// unless 4 bytes are required (introduced in version 5). Note that this would +// only be used when the sampling rate was not included in the table of 15 +// "standard" values. + +static void write_sample_rate (WavpackContext *wpc, WavpackMetadata *wpmd) { char *byteptr; @@ -1413,6 +434,12 @@ void write_sample_rate (WavpackContext *wpc, WavpackMetadata *wpmd) *byteptr++ = (char) (wpc->config.sample_rate); *byteptr++ = (char) (wpc->config.sample_rate >> 8); *byteptr++ = (char) (wpc->config.sample_rate >> 16); + + // handle 4-byte sampling rates for scientific applications, etc. + + if (wpc->config.sample_rate & 0x7f000000) + *byteptr++ = (char) (wpc->config.sample_rate >> 24) & 0x7f; + wpmd->byte_length = (int32_t)(byteptr - (char *) wpmd->data); } @@ -1425,13 +452,13 @@ void write_sample_rate (WavpackContext *wpc, WavpackMetadata *wpmd) // "wps->blockend" points to the end of the available space. A return value of // FALSE indicates an error. -static void best_floating_line (short *values, int num_values, double *initial_y, double *final_y, short *max_error); -static void dynamic_noise_shaping (WavpackContext *wpc, int32_t *buffer, int shortening_allowed); static int scan_int32_data (WavpackStream *wps, int32_t *values, int32_t num_values); static void scan_int32_quick (WavpackStream *wps, int32_t *values, int32_t num_values); static void send_int32_data (WavpackStream *wps, int32_t *values, int32_t num_values); static int scan_redundancy (int32_t *values, int32_t num_values); static int pack_samples (WavpackContext *wpc, int32_t *buffer); +static void bs_open_write (Bitstream *bs, void *buffer_start, void *buffer_end); +static uint32_t bs_close_write (Bitstream *bs); int pack_block (WavpackContext *wpc, int32_t *buffer) { @@ -1440,6 +467,13 @@ int pack_block (WavpackContext *wpc, int32_t *buffer) int32_t sample_count = wps->wphdr.block_samples, *orig_data = NULL; int dynamic_shaping_done = FALSE; + // This is done first because this code can potentially change the size of the block about to + // be encoded. This can happen because the dynamic noise shaping algorithm wants to send a + // shorter block because the desired noise-shaping profile is changing quickly. It can also + // be that the --merge-blocks feature wants to create a longer block because it combines areas + // with equal redundancy. These are not applicable for anything besides the first stream of + // the file and they are not applicable with float data or >24-bit data. + if (!wpc->current_stream && !(flags & FLOAT_DATA) && (flags & MAG_MASK) >> MAG_LSB < 24) { if ((wpc->config.flags & CONFIG_DYNAMIC_SHAPING) && !wpc->config.block_samples) { dynamic_noise_shaping (wpc, buffer, TRUE); @@ -1459,7 +493,10 @@ int pack_block (WavpackContext *wpc, int32_t *buffer) } } - if (!(flags & MONO_FLAG) && wpc->stream_version >= 0x410) { + // This code scans stereo data to check whether it can be stored as mono data + // (i.e., all L/R samples identical). Only available with MAX_STREAM_VERS. + + if (!(flags & MONO_FLAG) && wpc->stream_version == MAX_STREAM_VERS) { int32_t lor = 0, diff = 0; int32_t *sptr, *dptr, i; @@ -1493,6 +530,9 @@ int pack_block (WavpackContext *wpc, int32_t *buffer) } } + // This is where we handle any fixed shift which occurs when the integer size does not evenly fit + // in bytes (like 12-bit or 20-bit) and is the same for the entire file (not based on scanning) + if (flags & SHIFT_MASK) { int shift = (flags & SHIFT_MASK) >> SHIFT_LSB; int mag = (flags & MAG_MASK) >> MAG_LSB; @@ -1516,12 +556,23 @@ int pack_block (WavpackContext *wpc, int32_t *buffer) wps->wphdr.flags = flags; } - if ((flags & FLOAT_DATA) || (flags & MAG_MASK) >> MAG_LSB >= 24) { + // The regular WavPack decorrelation and entropy encoding can handle up to 24-bit integer data. If + // we have float data or integers larger than 24-bit, then we have to potentially do extra processing. + // For lossy encoding, we can simply convert this data in-place to 24-bit data and encode and sent + // that, along with some metadata about how to restore the original format (even if the restoration + // is not exact). However, for lossless operation we must make a copy of the original data that will + // be used to create a "extension stream" that will allow verbatim restoration of the original data. + // In the hybrid mode that extension goes in the correction file, otherwise it goes in the mail file. + + if ((flags & FLOAT_DATA) || (flags & MAG_MASK) >> MAG_LSB >= 24) { // if float data or >24-bit integers... + + // if lossless we have to copy the data to use later... + if ((!(flags & HYBRID_FLAG) || wpc->wvc_flag) && !(wpc->config.flags & CONFIG_SKIP_WVX)) { orig_data = malloc (sizeof (f32) * ((flags & MONO_DATA) ? sample_count : sample_count * 2)); memcpy (orig_data, buffer, sizeof (f32) * ((flags & MONO_DATA) ? sample_count : sample_count * 2)); - if (flags & FLOAT_DATA) { + if (flags & FLOAT_DATA) { // if lossless float data come here wps->float_norm_exp = wpc->config.float_norm_exp; if (!scan_float_data (wps, (f32 *) buffer, (flags & MONO_DATA) ? sample_count : sample_count * 2)) { @@ -1529,14 +580,14 @@ int pack_block (WavpackContext *wpc, int32_t *buffer) orig_data = NULL; } } - else { + else { // otherwise lossless > 24-bit integers if (!scan_int32_data (wps, buffer, (flags & MONO_DATA) ? sample_count : sample_count * 2)) { free (orig_data); orig_data = NULL; } } } - else { + else { // otherwise, we're lossy, so no copy if (flags & FLOAT_DATA) { wps->float_norm_exp = wpc->config.float_norm_exp; @@ -1547,20 +598,30 @@ int pack_block (WavpackContext *wpc, int32_t *buffer) wpc->lossy_blocks = TRUE; } + // if there's any chance of magnitude change, clear the noise-shaping error term + // and also reset the entropy encoder (which this does) + + wps->dc.error [0] = wps->dc.error [1] = 0; wps->num_terms = 0; } + // if 24-bit integers or less we do a "quick" scan which just scans for redundancy and does NOT set the flag's "magnitude" value else { scan_int32_quick (wps, buffer, (flags & MONO_DATA) ? sample_count : sample_count * 2); - if (wps->shift != wps->int32_zeros + wps->int32_ones + wps->int32_dups) { + if (wps->shift != wps->int32_zeros + wps->int32_ones + wps->int32_dups) { // detect a change in any redundancy shifting here wps->shift = wps->int32_zeros + wps->int32_ones + wps->int32_dups; - wps->num_terms = 0; + wps->dc.error [0] = wps->dc.error [1] = 0; // on a change, clear the noise-shaping error term and + wps->num_terms = 0; // also reset the entropy encoder (which this does) } } - if ((wpc->config.flags & CONFIG_DYNAMIC_SHAPING) && !dynamic_shaping_done) + if ((wpc->config.flags & CONFIG_DYNAMIC_SHAPING) && !dynamic_shaping_done) // calculate dynamic noise profile dynamic_noise_shaping (wpc, buffer, FALSE); + // In some cases we need to start the decorrelation and entropy encoding from scratch. This + // could be because we switched from stereo to mono encoding or because the magnitude of + // the data changed, or just because this is the first block. + if (!wps->num_passes && !wps->num_terms) { wps->num_passes = 1; @@ -1572,6 +633,8 @@ int pack_block (WavpackContext *wpc, int32_t *buffer) wps->num_passes = 0; } + // actually pack the block here and return on an error (which pretty much can only be a block buffer overrun) + if (!pack_samples (wpc, buffer)) { wps->wphdr.flags = sflags; @@ -1583,6 +646,8 @@ int pack_block (WavpackContext *wpc, int32_t *buffer) else wps->wphdr.flags = sflags; + // potentially move any unused dynamic noise shaping profile data to use next time + if (wps->dc.shaping_data) { if (wps->dc.shaping_samples != sample_count) memmove (wps->dc.shaping_data, wps->dc.shaping_data + sample_count, @@ -1591,6 +656,10 @@ int pack_block (WavpackContext *wpc, int32_t *buffer) wps->dc.shaping_samples -= sample_count; } + // finally, if we're doing lossless float data or lossless >24-bit integers, this is where we take the + // original data that we saved earlier and create the "extension" stream containing the information + // required to refine the "lossy" 24-bit data into the lossless original + if (orig_data) { uint32_t data_count; unsigned char *cptr; @@ -1634,133 +703,6 @@ int pack_block (WavpackContext *wpc, int32_t *buffer) return TRUE; } -static void dynamic_noise_shaping (WavpackContext *wpc, int32_t *buffer, int shortening_allowed) -{ - WavpackStream *wps = wpc->streams [wpc->current_stream]; - int32_t sample_count = wps->wphdr.block_samples; - struct decorr_pass *ap = &wps->analysis_pass; - uint32_t flags = wps->wphdr.flags; - int32_t *bptr, temp, sam; - short *swptr; - int sc; - - if (!wps->num_terms && sample_count > 8) { - if (flags & MONO_DATA) - for (bptr = buffer + sample_count - 3, sc = sample_count - 2; sc--;) { - sam = (3 * bptr [1] - bptr [2]) >> 1; - temp = *bptr-- - apply_weight (ap->weight_A, sam); - update_weight (ap->weight_A, 2, sam, temp); - } - else - for (bptr = buffer + (sample_count - 3) * 2 + 1, sc = sample_count - 2; sc--;) { - sam = (3 * bptr [2] - bptr [4]) >> 1; - temp = *bptr-- - apply_weight (ap->weight_B, sam); - update_weight (ap->weight_B, 2, sam, temp); - sam = (3 * bptr [2] - bptr [4]) >> 1; - temp = *bptr-- - apply_weight (ap->weight_A, sam); - update_weight (ap->weight_A, 2, sam, temp); - } - } - - if (sample_count > wps->dc.shaping_samples) { - sc = sample_count - wps->dc.shaping_samples; - swptr = wps->dc.shaping_data + wps->dc.shaping_samples; - bptr = buffer + wps->dc.shaping_samples * ((flags & MONO_DATA) ? 1 : 2); - - if (flags & MONO_DATA) - while (sc--) { - sam = (3 * ap->samples_A [0] - ap->samples_A [1]) >> 1; - temp = *bptr - apply_weight (ap->weight_A, sam); - update_weight (ap->weight_A, 2, sam, temp); - ap->samples_A [1] = ap->samples_A [0]; - ap->samples_A [0] = *bptr++; - *swptr++ = (ap->weight_A < 256) ? 1024 : 1536 - ap->weight_A * 2; - } - else - while (sc--) { - sam = (3 * ap->samples_A [0] - ap->samples_A [1]) >> 1; - temp = *bptr - apply_weight (ap->weight_A, sam); - update_weight (ap->weight_A, 2, sam, temp); - ap->samples_A [1] = ap->samples_A [0]; - ap->samples_A [0] = *bptr++; - - sam = (3 * ap->samples_B [0] - ap->samples_B [1]) >> 1; - temp = *bptr - apply_weight (ap->weight_B, sam); - update_weight (ap->weight_B, 2, sam, temp); - ap->samples_B [1] = ap->samples_B [0]; - ap->samples_B [0] = *bptr++; - - *swptr++ = (ap->weight_A + ap->weight_B < 512) ? 1024 : 1536 - ap->weight_A - ap->weight_B; - } - - wps->dc.shaping_samples = sample_count; - } - - if (wpc->wvc_flag) { - int max_allowed_error = 1000000 / wpc->ave_block_samples; - short max_error, trial_max_error; - double initial_y, final_y; - - if (max_allowed_error < 128) - max_allowed_error = 128; - - best_floating_line (wps->dc.shaping_data, sample_count, &initial_y, &final_y, &max_error); - - if (shortening_allowed && max_error > max_allowed_error) { - int min_samples = 0, max_samples = sample_count, trial_count; - double trial_initial_y, trial_final_y; - - while (1) { - trial_count = (min_samples + max_samples) / 2; - - best_floating_line (wps->dc.shaping_data, trial_count, &trial_initial_y, - &trial_final_y, &trial_max_error); - - if (trial_max_error < max_allowed_error) { - max_error = trial_max_error; - min_samples = trial_count; - initial_y = trial_initial_y; - final_y = trial_final_y; - } - else - max_samples = trial_count; - - if (min_samples > 10000 || max_samples - min_samples < 2) - break; - } - - sample_count = min_samples; - } - - if (initial_y < -512) initial_y = -512; - else if (initial_y > 1024) initial_y = 1024; - - if (final_y < -512) final_y = -512; - else if (final_y > 1024) final_y = 1024; -#if 0 - error_line ("%.2f sec, sample count = %5d, max error = %3d, range = %5d, %5d, actual = %5d, %5d", - (double) wps->sample_index / wpc->config.sample_rate, sample_count, max_error, - (int) floor (initial_y), (int) floor (final_y), - wps->dc.shaping_data [0], wps->dc.shaping_data [sample_count-1]); -#endif - if (sample_count != wps->wphdr.block_samples) - wps->wphdr.block_samples = sample_count; - - if (wpc->wvc_flag) { - wps->dc.shaping_acc [0] = wps->dc.shaping_acc [1] = (int32_t) floor (initial_y * 65536.0 + 0.5); - - wps->dc.shaping_delta [0] = wps->dc.shaping_delta [1] = - (int32_t) floor ((final_y - initial_y) / (sample_count - 1) * 65536.0 + 0.5); - - wps->dc.shaping_array = NULL; - } - else - wps->dc.shaping_array = wps->dc.shaping_data; - } - else - wps->dc.shaping_array = wps->dc.shaping_data; -} - // Quickly scan a buffer of long integer data and determine whether any // redundancy in the LSBs can be used to reduce the data's magnitude. If yes, // then the INT32_DATA flag is set and the int32 parameters are set. This @@ -1957,6 +899,45 @@ static void send_int32_data (WavpackStream *wps, int32_t *values, int32_t num_va } } +void send_general_metadata (WavpackContext *wpc) +{ + WavpackStream *wps = wpc->streams [wpc->current_stream]; + uint32_t flags = wps->wphdr.flags; + WavpackMetadata wpmd; + + if ((flags & SRATE_MASK) == SRATE_MASK && wpc->config.sample_rate != 44100) { + write_sample_rate (wpc, &wpmd); + copy_metadata (&wpmd, wps->blockbuff, wps->blockend); + free_metadata (&wpmd); + } + + if ((flags & INITIAL_BLOCK) && + (wpc->config.num_channels > 2 || + wpc->config.channel_mask != 0x5 - wpc->config.num_channels)) { + write_channel_info (wpc, &wpmd); + copy_metadata (&wpmd, wps->blockbuff, wps->blockend); + free_metadata (&wpmd); + + if (wpc->channel_identities) { + write_channel_identities_info (wpc, &wpmd); + copy_metadata (&wpmd, wps->blockbuff, wps->blockend); + free_metadata (&wpmd); + } + } + + if ((flags & INITIAL_BLOCK) && !wps->sample_index) { + write_config_info (wpc, &wpmd); + copy_metadata (&wpmd, wps->blockbuff, wps->blockend); + free_metadata (&wpmd); + } + + if (flags & INITIAL_BLOCK) { + write_new_config_info (wpc, &wpmd); + copy_metadata (&wpmd, wps->blockbuff, wps->blockend); + free_metadata (&wpmd); + } +} + // Pack an entire block of samples (either mono or stereo) into a completed // WavPack block. It is assumed that there is sufficient space for the // completed block at "wps->blockbuff" and that "wps->blockend" points to the @@ -1968,20 +949,55 @@ static void send_int32_data (WavpackStream *wps, int32_t *values, int32_t num_va // the caller must look at the ckSize field of the written WavpackHeader, NOT // the one in the WavpackStream. -static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); -static void decorr_stereo_pass_id2 (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); +#ifdef OPT_ASM_X86 + #define DECORR_STEREO_PASS(a,b,c) do { \ + if (pack_cpu_has_feature_x86 (CPU_FEATURE_MMX)) \ + pack_decorr_stereo_pass_x86 (a, b, c); \ + else decorr_stereo_pass (a, b, c); } while (0) + #define DECORR_MONO_BUFFER pack_decorr_mono_buffer_x86 + #define SCAN_MAX_MAGNITUDE(a,b) \ + (pack_cpu_has_feature_x86 (CPU_FEATURE_MMX) ? \ + scan_max_magnitude_x86 (a, b) : \ + scan_max_magnitude (a, b)) +#elif defined(OPT_ASM_X64) && (defined (_WIN64) || defined(__CYGWIN__) || defined(__MINGW64__)) + #define DECORR_STEREO_PASS pack_decorr_stereo_pass_x64win + #define DECORR_MONO_BUFFER pack_decorr_mono_buffer_x64win + #define SCAN_MAX_MAGNITUDE scan_max_magnitude_x64win +#elif defined(OPT_ASM_X64) + #define DECORR_STEREO_PASS pack_decorr_stereo_pass_x64 + #define DECORR_MONO_BUFFER pack_decorr_mono_buffer_x64 + #define SCAN_MAX_MAGNITUDE scan_max_magnitude_x64 +#else + #define DECORR_STEREO_PASS decorr_stereo_pass + #define DECORR_MONO_BUFFER decorr_mono_buffer + #define SCAN_MAX_MAGNITUDE scan_max_magnitude +#endif + +uint32_t DECORR_MONO_BUFFER (int32_t *buffer, struct decorr_pass *decorr_passes, int32_t num_terms, int32_t sample_count); + +#ifdef OPT_ASM_X86 +void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); +void pack_decorr_stereo_pass_x86 (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); +uint32_t scan_max_magnitude (int32_t *values, int32_t num_values); +uint32_t scan_max_magnitude_x86 (int32_t *values, int32_t num_values); +#else +void DECORR_STEREO_PASS (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); +uint32_t SCAN_MAX_MAGNITUDE (int32_t *values, int32_t num_values); +#endif + +// This macro controls the "repack" function where a block of samples will be repacked with +// fewer terms if a single residual exceeds the specified magnitude threshold. + +#define REPACK_SAFE_NUM_TERMS 5 // 5 terms is always okay (and we truncate to this) static int pack_samples (WavpackContext *wpc, int32_t *buffer) { - WavpackStream *wps = wpc->streams [wpc->current_stream]; - uint32_t flags = wps->wphdr.flags, data_count, crc, crc2, i; - uint32_t sample_count = wps->wphdr.block_samples; - short *shaping_array = wps->dc.shaping_array; - int tcount, lossy = FALSE, m = 0; - double noise_acc = 0.0, noise; + WavpackStream *wps = wpc->streams [wpc->current_stream], saved_stream; + uint32_t flags = wps->wphdr.flags, repack_possible, data_count, crc, crc2, i; + uint32_t sample_count = wps->wphdr.block_samples, repack_mask; + int32_t *bptr, *saved_buffer = NULL; struct decorr_pass *dpp; WavpackMetadata wpmd; - int32_t *bptr; crc = crc2 = 0xffffffff; @@ -2036,794 +1052,436 @@ static int pack_samples (WavpackContext *wpc, int32_t *buffer) if (!sample_count) return TRUE; - write_decorr_terms (wps, &wpmd); - copy_metadata (&wpmd, wps->blockbuff, wps->blockend); - free_metadata (&wpmd); + memcpy (&wps->wphdr, wps->blockbuff, sizeof (WavpackHeader)); + repack_possible = !wps->num_passes && wps->num_terms > REPACK_SAFE_NUM_TERMS; + repack_mask = (flags & MAG_MASK) >> MAG_LSB >= 16 ? 0xF0000000 : 0xFFF00000; + saved_stream = *wps; - write_decorr_weights (wps, &wpmd); - copy_metadata (&wpmd, wps->blockbuff, wps->blockend); - free_metadata (&wpmd); - - write_decorr_samples (wps, &wpmd); - copy_metadata (&wpmd, wps->blockbuff, wps->blockend); - free_metadata (&wpmd); - - write_entropy_vars (wps, &wpmd); - copy_metadata (&wpmd, wps->blockbuff, wps->blockend); - free_metadata (&wpmd); - - if ((flags & SRATE_MASK) == SRATE_MASK && wpc->config.sample_rate != 44100) { - write_sample_rate (wpc, &wpmd); - copy_metadata (&wpmd, wps->blockbuff, wps->blockend); - free_metadata (&wpmd); + if (repack_possible && !(flags & HYBRID_FLAG)) { + saved_buffer = malloc (sample_count * sizeof (int32_t) * (flags & MONO_DATA ? 1 : 2)); + memcpy (saved_buffer, buffer, sample_count * sizeof (int32_t) * (flags & MONO_DATA ? 1 : 2)); } - if (flags & HYBRID_FLAG) { - write_hybrid_profile (wps, &wpmd); + // This code is written as a loop, but in the overwhelming majority of cases it executes only once. + // If one of the higher modes is being used and a residual exceeds a certain threshold, then the + // block will be repacked using fewer decorrelation terms. Note that this has only been triggered + // by pathological audio samples designed to trigger it...in practice this might never happen. Note + // that this only applies to the "high" and "very high" modes and only when packing directly + // (i.e. without the "extra" modes that will have already checked magnitude). + + do { + short *shaping_array = wps->dc.shaping_array; + int tcount, lossy = FALSE, m = 0; + double noise_acc = 0.0, noise; + uint32_t max_magnitude = 0; + + write_decorr_terms (wps, &wpmd); copy_metadata (&wpmd, wps->blockbuff, wps->blockend); free_metadata (&wpmd); - } - if (flags & FLOAT_DATA) { - write_float_info (wps, &wpmd); + write_decorr_weights (wps, &wpmd); copy_metadata (&wpmd, wps->blockbuff, wps->blockend); free_metadata (&wpmd); - } - if (flags & INT32_DATA) { - write_int32_info (wps, &wpmd); + write_decorr_samples (wps, &wpmd); copy_metadata (&wpmd, wps->blockbuff, wps->blockend); free_metadata (&wpmd); - } - if ((flags & INITIAL_BLOCK) && - (wpc->config.num_channels > 2 || - wpc->config.channel_mask != 0x5 - wpc->config.num_channels)) { - write_channel_info (wpc, &wpmd); + write_entropy_vars (wps, &wpmd); + copy_metadata (&wpmd, wps->blockbuff, wps->blockend); + free_metadata (&wpmd); + + if (flags & HYBRID_FLAG) { + write_hybrid_profile (wps, &wpmd); copy_metadata (&wpmd, wps->blockbuff, wps->blockend); free_metadata (&wpmd); - } + } - if ((flags & INITIAL_BLOCK) && !wps->sample_index) { - write_config_info (wpc, &wpmd); - copy_metadata (&wpmd, wps->blockbuff, wps->blockend); - free_metadata (&wpmd); - } - - bs_open_write (&wps->wvbits, wps->blockbuff + ((WavpackHeader *) wps->blockbuff)->ckSize + 12, wps->blockend); - - if (wpc->wvc_flag) { - wps->wphdr.ckSize = sizeof (WavpackHeader) - 8; - memcpy (wps->block2buff, &wps->wphdr, sizeof (WavpackHeader)); - - if (flags & HYBRID_SHAPE) { - write_shaping_info (wps, &wpmd); - copy_metadata (&wpmd, wps->block2buff, wps->block2end); + if (flags & FLOAT_DATA) { + write_float_info (wps, &wpmd); + copy_metadata (&wpmd, wps->blockbuff, wps->blockend); free_metadata (&wpmd); } - bs_open_write (&wps->wvcbits, wps->block2buff + ((WavpackHeader *) wps->block2buff)->ckSize + 12, wps->block2end); - } + if (flags & INT32_DATA) { + write_int32_info (wps, &wpmd); + copy_metadata (&wpmd, wps->blockbuff, wps->blockend); + free_metadata (&wpmd); + } - /////////////////////// handle lossless mono mode ///////////////////////// + send_general_metadata (wpc); + bs_open_write (&wps->wvbits, wps->blockbuff + ((WavpackHeader *) wps->blockbuff)->ckSize + 12, wps->blockend); - if (!(flags & HYBRID_FLAG) && (flags & MONO_DATA)) { - if (!wps->num_passes) + if (wpc->wvc_flag) { + wps->wphdr.ckSize = sizeof (WavpackHeader) - 8; + memcpy (wps->block2buff, &wps->wphdr, sizeof (WavpackHeader)); + + if (flags & HYBRID_SHAPE) { + write_shaping_info (wps, &wpmd); + copy_metadata (&wpmd, wps->block2buff, wps->block2end); + free_metadata (&wpmd); + } + + bs_open_write (&wps->wvcbits, wps->block2buff + ((WavpackHeader *) wps->block2buff)->ckSize + 12, wps->block2end); + } + + /////////////////////// handle lossless mono mode ///////////////////////// + + if (!(flags & HYBRID_FLAG) && (flags & MONO_DATA)) { + if (!wps->num_passes) { + max_magnitude = DECORR_MONO_BUFFER (buffer, wps->decorr_passes, wps->num_terms, sample_count); + m = sample_count & (MAX_TERM - 1); + } + + send_words_lossless (wps, buffer, sample_count); + } + + //////////////////// handle the lossless stereo mode ////////////////////// + + else if (!(flags & HYBRID_FLAG) && !(flags & MONO_DATA)) { + if (!wps->num_passes) { + if (flags & JOINT_STEREO) { + int32_t *eptr = buffer + (sample_count * 2); + + for (bptr = buffer; bptr < eptr; bptr += 2) + bptr [1] += ((bptr [0] -= bptr [1]) >> 1); + } + + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount-- ; dpp++) + DECORR_STEREO_PASS (dpp, buffer, sample_count); + + m = sample_count & (MAX_TERM - 1); + + if (repack_possible) + max_magnitude = SCAN_MAX_MAGNITUDE (buffer, sample_count * 2); + } + + send_words_lossless (wps, buffer, sample_count); + } + + /////////////////// handle the lossy/hybrid mono mode ///////////////////// + + else if ((flags & HYBRID_FLAG) && (flags & MONO_DATA)) for (bptr = buffer, i = 0; i < sample_count; ++i) { - int32_t code = *bptr; + int32_t code, temp; + int shaping_weight; - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { - int32_t sam; + crc2 += (crc2 << 1) + (code = *bptr++); + if (flags & HYBRID_SHAPE) { + if (shaping_array) + shaping_weight = *shaping_array++; + else + shaping_weight = (wps->dc.shaping_acc [0] += wps->dc.shaping_delta [0]) >> 16; + + temp = -apply_weight (shaping_weight, wps->dc.error [0]); + + if ((flags & NEW_SHAPING) && shaping_weight < 0 && temp) { + if (temp == wps->dc.error [0]) + temp = (temp < 0) ? temp + 1 : temp - 1; + + wps->dc.error [0] = -code; + code += temp; + } + else + wps->dc.error [0] = -(code += temp); + } + + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount-- ; dpp++) if (dpp->term > MAX_TERM) { if (dpp->term & 1) - sam = 2 * dpp->samples_A [0] - dpp->samples_A [1]; + dpp->samples_A [2] = 2 * dpp->samples_A [0] - dpp->samples_A [1]; else - sam = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1; + dpp->samples_A [2] = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1; + + code -= (dpp->aweight_A = apply_weight (dpp->weight_A, dpp->samples_A [2])); + } + else + code -= (dpp->aweight_A = apply_weight (dpp->weight_A, dpp->samples_A [m])); + + max_magnitude |= (code < 0 ? ~code : code); + code = send_word (wps, code, 0); + + while (--dpp >= wps->decorr_passes) { + if (dpp->term > MAX_TERM) { + update_weight (dpp->weight_A, dpp->delta, dpp->samples_A [2], code); + dpp->samples_A [1] = dpp->samples_A [0]; + dpp->samples_A [0] = (code += dpp->aweight_A); + } + else { + int32_t sam = dpp->samples_A [m]; + + update_weight (dpp->weight_A, dpp->delta, sam, code); + dpp->samples_A [(m + dpp->term) & (MAX_TERM - 1)] = (code += dpp->aweight_A); + } + } + + wps->dc.error [0] += code; + m = (m + 1) & (MAX_TERM - 1); + + if ((crc += (crc << 1) + code) != crc2) + lossy = TRUE; + + if (wpc->config.flags & CONFIG_CALC_NOISE) { + noise = code - bptr [-1]; + + noise_acc += noise *= noise; + wps->dc.noise_ave = (wps->dc.noise_ave * 0.99) + (noise * 0.01); + + if (wps->dc.noise_ave > wps->dc.noise_max) + wps->dc.noise_max = wps->dc.noise_ave; + } + } + + /////////////////// handle the lossy/hybrid stereo mode /////////////////// + + else if ((flags & HYBRID_FLAG) && !(flags & MONO_DATA)) + for (bptr = buffer, i = 0; i < sample_count; ++i) { + int32_t left, right, temp; + int shaping_weight; + + left = *bptr++; + crc2 += (crc2 << 3) + (left << 1) + left + (right = *bptr++); + + if (flags & HYBRID_SHAPE) { + if (shaping_array) + shaping_weight = *shaping_array++; + else + shaping_weight = (wps->dc.shaping_acc [0] += wps->dc.shaping_delta [0]) >> 16; + + temp = -apply_weight (shaping_weight, wps->dc.error [0]); + + if ((flags & NEW_SHAPING) && shaping_weight < 0 && temp) { + if (temp == wps->dc.error [0]) + temp = (temp < 0) ? temp + 1 : temp - 1; + + wps->dc.error [0] = -left; + left += temp; + } + else + wps->dc.error [0] = -(left += temp); + + if (!shaping_array) + shaping_weight = (wps->dc.shaping_acc [1] += wps->dc.shaping_delta [1]) >> 16; + + temp = -apply_weight (shaping_weight, wps->dc.error [1]); + + if ((flags & NEW_SHAPING) && shaping_weight < 0 && temp) { + if (temp == wps->dc.error [1]) + temp = (temp < 0) ? temp + 1 : temp - 1; + + wps->dc.error [1] = -right; + right += temp; + } + else + wps->dc.error [1] = -(right += temp); + } + + if (flags & JOINT_STEREO) + right += ((left -= right) >> 1); + + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount-- ; dpp++) + if (dpp->term > MAX_TERM) { + if (dpp->term & 1) { + dpp->samples_A [2] = 2 * dpp->samples_A [0] - dpp->samples_A [1]; + dpp->samples_B [2] = 2 * dpp->samples_B [0] - dpp->samples_B [1]; + } + else { + dpp->samples_A [2] = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1; + dpp->samples_B [2] = (3 * dpp->samples_B [0] - dpp->samples_B [1]) >> 1; + } + + left -= (dpp->aweight_A = apply_weight (dpp->weight_A, dpp->samples_A [2])); + right -= (dpp->aweight_B = apply_weight (dpp->weight_B, dpp->samples_B [2])); + } + else if (dpp->term > 0) { + left -= (dpp->aweight_A = apply_weight (dpp->weight_A, dpp->samples_A [m])); + right -= (dpp->aweight_B = apply_weight (dpp->weight_B, dpp->samples_B [m])); + } + else { + if (dpp->term == -1) + dpp->samples_B [0] = left; + else if (dpp->term == -2) + dpp->samples_A [0] = right; + + left -= (dpp->aweight_A = apply_weight (dpp->weight_A, dpp->samples_A [0])); + right -= (dpp->aweight_B = apply_weight (dpp->weight_B, dpp->samples_B [0])); + } + + max_magnitude |= (left < 0 ? ~left : left) | (right < 0 ? ~right : right); + left = send_word (wps, left, 0); + right = send_word (wps, right, 1); + + while (--dpp >= wps->decorr_passes) + if (dpp->term > MAX_TERM) { + update_weight (dpp->weight_A, dpp->delta, dpp->samples_A [2], left); + update_weight (dpp->weight_B, dpp->delta, dpp->samples_B [2], right); dpp->samples_A [1] = dpp->samples_A [0]; - dpp->samples_A [0] = code; + dpp->samples_B [1] = dpp->samples_B [0]; + + dpp->samples_A [0] = (left += dpp->aweight_A); + dpp->samples_B [0] = (right += dpp->aweight_B); + } + else if (dpp->term > 0) { + int k = (m + dpp->term) & (MAX_TERM - 1); + + update_weight (dpp->weight_A, dpp->delta, dpp->samples_A [m], left); + dpp->samples_A [k] = (left += dpp->aweight_A); + + update_weight (dpp->weight_B, dpp->delta, dpp->samples_B [m], right); + dpp->samples_B [k] = (right += dpp->aweight_B); } else { - sam = dpp->samples_A [m]; - dpp->samples_A [(m + dpp->term) & (MAX_TERM - 1)] = code; + if (dpp->term == -1) { + dpp->samples_B [0] = left + dpp->aweight_A; + dpp->aweight_B = apply_weight (dpp->weight_B, dpp->samples_B [0]); + } + else if (dpp->term == -2) { + dpp->samples_A [0] = right + dpp->aweight_B; + dpp->aweight_A = apply_weight (dpp->weight_A, dpp->samples_A [0]); + } + + update_weight_clip (dpp->weight_A, dpp->delta, dpp->samples_A [0], left); + update_weight_clip (dpp->weight_B, dpp->delta, dpp->samples_B [0], right); + dpp->samples_B [0] = (left += dpp->aweight_A); + dpp->samples_A [0] = (right += dpp->aweight_B); } - code -= apply_weight (dpp->weight_A, sam); - update_weight (dpp->weight_A, dpp->delta, sam, code); - } + if (flags & JOINT_STEREO) + left += (right -= (left >> 1)); + wps->dc.error [0] += left; + wps->dc.error [1] += right; m = (m + 1) & (MAX_TERM - 1); - *bptr++ = code; - } - send_words_lossless (wps, buffer, sample_count); - } + if ((crc += (crc << 3) + (left << 1) + left + right) != crc2) + lossy = TRUE; - //////////////////// handle the lossless stereo mode ////////////////////// + if (wpc->config.flags & CONFIG_CALC_NOISE) { + noise = (double)(left - bptr [-2]) * (left - bptr [-2]); + noise += (double)(right - bptr [-1]) * (right - bptr [-1]); - else if (!(flags & HYBRID_FLAG) && !(flags & MONO_DATA)) { - int32_t *eptr = buffer + (sample_count * 2); + noise_acc += noise /= 2.0; + wps->dc.noise_ave = (wps->dc.noise_ave * 0.99) + (noise * 0.01); - if (!wps->num_passes) { - if (flags & JOINT_STEREO) - for (bptr = buffer; bptr < eptr; bptr += 2) - bptr [1] += ((bptr [0] -= bptr [1]) >> 1); - - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount-- ; dpp++) - if (((flags & MAG_MASK) >> MAG_LSB) >= 16 || dpp->delta != 2) - decorr_stereo_pass (dpp, buffer, sample_count); - else - decorr_stereo_pass_id2 (dpp, buffer, sample_count); - } - - send_words_lossless (wps, buffer, sample_count); - } - - /////////////////// handle the lossy/hybrid mono mode ///////////////////// - - else if ((flags & HYBRID_FLAG) && (flags & MONO_DATA)) - for (bptr = buffer, i = 0; i < sample_count; ++i) { - int32_t code, temp; - int shaping_weight; - - crc2 += (crc2 << 1) + (code = *bptr++); - - if (flags & HYBRID_SHAPE) { - if (shaping_array) - shaping_weight = *shaping_array++; - else - shaping_weight = (wps->dc.shaping_acc [0] += wps->dc.shaping_delta [0]) >> 16; - - temp = -apply_weight (shaping_weight, wps->dc.error [0]); - - if ((flags & NEW_SHAPING) && shaping_weight < 0 && temp) { - if (temp == wps->dc.error [0]) - temp = (temp < 0) ? temp + 1 : temp - 1; - - wps->dc.error [0] = -code; - code += temp; - } - else - wps->dc.error [0] = -(code += temp); - } - - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount-- ; dpp++) - if (dpp->term > MAX_TERM) { - if (dpp->term & 1) - dpp->samples_A [2] = 2 * dpp->samples_A [0] - dpp->samples_A [1]; - else - dpp->samples_A [2] = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1; - - code -= (dpp->aweight_A = apply_weight (dpp->weight_A, dpp->samples_A [2])); - } - else - code -= (dpp->aweight_A = apply_weight (dpp->weight_A, dpp->samples_A [m])); - - code = send_word (wps, code, 0); - - while (--dpp >= wps->decorr_passes) { - if (dpp->term > MAX_TERM) { - update_weight (dpp->weight_A, dpp->delta, dpp->samples_A [2], code); - dpp->samples_A [1] = dpp->samples_A [0]; - dpp->samples_A [0] = (code += dpp->aweight_A); - } - else { - int32_t sam = dpp->samples_A [m]; - - update_weight (dpp->weight_A, dpp->delta, sam, code); - dpp->samples_A [(m + dpp->term) & (MAX_TERM - 1)] = (code += dpp->aweight_A); + if (wps->dc.noise_ave > wps->dc.noise_max) + wps->dc.noise_max = wps->dc.noise_ave; } } - wps->dc.error [0] += code; - m = (m + 1) & (MAX_TERM - 1); + if (m) + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) + if (dpp->term > 0 && dpp->term <= MAX_TERM) { + int32_t temp_A [MAX_TERM], temp_B [MAX_TERM]; + int k; - if ((crc += (crc << 1) + code) != crc2) - lossy = TRUE; + memcpy (temp_A, dpp->samples_A, sizeof (dpp->samples_A)); + memcpy (temp_B, dpp->samples_B, sizeof (dpp->samples_B)); - if (wpc->config.flags & CONFIG_CALC_NOISE) { - noise = code - bptr [-1]; - - noise_acc += noise *= noise; - wps->dc.noise_ave = (wps->dc.noise_ave * 0.99) + (noise * 0.01); - - if (wps->dc.noise_ave > wps->dc.noise_max) - wps->dc.noise_max = wps->dc.noise_ave; - } - } - - /////////////////// handle the lossy/hybrid stereo mode /////////////////// - - else if ((flags & HYBRID_FLAG) && !(flags & MONO_DATA)) - for (bptr = buffer, i = 0; i < sample_count; ++i) { - int32_t left, right, temp; - int shaping_weight; - - left = *bptr++; - crc2 += (crc2 << 3) + (left << 1) + left + (right = *bptr++); - - if (flags & HYBRID_SHAPE) { - if (shaping_array) - shaping_weight = *shaping_array++; - else - shaping_weight = (wps->dc.shaping_acc [0] += wps->dc.shaping_delta [0]) >> 16; - - temp = -apply_weight (shaping_weight, wps->dc.error [0]); - - if ((flags & NEW_SHAPING) && shaping_weight < 0 && temp) { - if (temp == wps->dc.error [0]) - temp = (temp < 0) ? temp + 1 : temp - 1; - - wps->dc.error [0] = -left; - left += temp; - } - else - wps->dc.error [0] = -(left += temp); - - if (!shaping_array) - shaping_weight = (wps->dc.shaping_acc [1] += wps->dc.shaping_delta [1]) >> 16; - - temp = -apply_weight (shaping_weight, wps->dc.error [1]); - - if ((flags & NEW_SHAPING) && shaping_weight < 0 && temp) { - if (temp == wps->dc.error [1]) - temp = (temp < 0) ? temp + 1 : temp - 1; - - wps->dc.error [1] = -right; - right += temp; - } - else - wps->dc.error [1] = -(right += temp); - } - - if (flags & JOINT_STEREO) - right += ((left -= right) >> 1); - - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount-- ; dpp++) - if (dpp->term > MAX_TERM) { - if (dpp->term & 1) { - dpp->samples_A [2] = 2 * dpp->samples_A [0] - dpp->samples_A [1]; - dpp->samples_B [2] = 2 * dpp->samples_B [0] - dpp->samples_B [1]; + for (k = 0; k < MAX_TERM; k++) { + dpp->samples_A [k] = temp_A [m]; + dpp->samples_B [k] = temp_B [m]; + m = (m + 1) & (MAX_TERM - 1); } - else { - dpp->samples_A [2] = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1; - dpp->samples_B [2] = (3 * dpp->samples_B [0] - dpp->samples_B [1]) >> 1; - } - - left -= (dpp->aweight_A = apply_weight (dpp->weight_A, dpp->samples_A [2])); - right -= (dpp->aweight_B = apply_weight (dpp->weight_B, dpp->samples_B [2])); - } - else if (dpp->term > 0) { - left -= (dpp->aweight_A = apply_weight (dpp->weight_A, dpp->samples_A [m])); - right -= (dpp->aweight_B = apply_weight (dpp->weight_B, dpp->samples_B [m])); - } - else { - if (dpp->term == -1) - dpp->samples_B [0] = left; - else if (dpp->term == -2) - dpp->samples_A [0] = right; - - left -= (dpp->aweight_A = apply_weight (dpp->weight_A, dpp->samples_A [0])); - right -= (dpp->aweight_B = apply_weight (dpp->weight_B, dpp->samples_B [0])); } - left = send_word (wps, left, 0); - right = send_word (wps, right, 1); + if (wpc->config.flags & CONFIG_CALC_NOISE) + wps->dc.noise_sum += noise_acc; - while (--dpp >= wps->decorr_passes) - if (dpp->term > MAX_TERM) { - update_weight (dpp->weight_A, dpp->delta, dpp->samples_A [2], left); - update_weight (dpp->weight_B, dpp->delta, dpp->samples_B [2], right); + flush_word (wps); + data_count = bs_close_write (&wps->wvbits); - dpp->samples_A [1] = dpp->samples_A [0]; - dpp->samples_B [1] = dpp->samples_B [0]; - - dpp->samples_A [0] = (left += dpp->aweight_A); - dpp->samples_B [0] = (right += dpp->aweight_B); - } - else if (dpp->term > 0) { - int k = (m + dpp->term) & (MAX_TERM - 1); - - update_weight (dpp->weight_A, dpp->delta, dpp->samples_A [m], left); - dpp->samples_A [k] = (left += dpp->aweight_A); - - update_weight (dpp->weight_B, dpp->delta, dpp->samples_B [m], right); - dpp->samples_B [k] = (right += dpp->aweight_B); - } - else { - if (dpp->term == -1) { - dpp->samples_B [0] = left + dpp->aweight_A; - dpp->aweight_B = apply_weight (dpp->weight_B, dpp->samples_B [0]); - } - else if (dpp->term == -2) { - dpp->samples_A [0] = right + dpp->aweight_B; - dpp->aweight_A = apply_weight (dpp->weight_A, dpp->samples_A [0]); - } - - update_weight_clip (dpp->weight_A, dpp->delta, dpp->samples_A [0], left); - update_weight_clip (dpp->weight_B, dpp->delta, dpp->samples_B [0], right); - dpp->samples_B [0] = (left += dpp->aweight_A); - dpp->samples_A [0] = (right += dpp->aweight_B); - } - - if (flags & JOINT_STEREO) - left += (right -= (left >> 1)); - - wps->dc.error [0] += left; - wps->dc.error [1] += right; - m = (m + 1) & (MAX_TERM - 1); - - if ((crc += (crc << 3) + (left << 1) + left + right) != crc2) - lossy = TRUE; - - if (wpc->config.flags & CONFIG_CALC_NOISE) { - noise = (double)(left - bptr [-2]) * (left - bptr [-2]); - noise += (double)(right - bptr [-1]) * (right - bptr [-1]); - - noise_acc += noise /= 2.0; - wps->dc.noise_ave = (wps->dc.noise_ave * 0.99) + (noise * 0.01); - - if (wps->dc.noise_ave > wps->dc.noise_max) - wps->dc.noise_max = wps->dc.noise_ave; - } - } - - if (m) - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) - if (dpp->term > 0 && dpp->term <= MAX_TERM) { - int32_t temp_A [MAX_TERM], temp_B [MAX_TERM]; - int k; - - memcpy (temp_A, dpp->samples_A, sizeof (dpp->samples_A)); - memcpy (temp_B, dpp->samples_B, sizeof (dpp->samples_B)); - - for (k = 0; k < MAX_TERM; k++) { - dpp->samples_A [k] = temp_A [m]; - dpp->samples_B [k] = temp_B [m]; - m = (m + 1) & (MAX_TERM - 1); - } - } - - if (wpc->config.flags & CONFIG_CALC_NOISE) - wps->dc.noise_sum += noise_acc; - - flush_word (wps); - data_count = bs_close_write (&wps->wvbits); - - if (data_count) { - if (data_count != (uint32_t) -1) { - unsigned char *cptr = wps->blockbuff + ((WavpackHeader *) wps->blockbuff)->ckSize + 8; - - *cptr++ = ID_WV_BITSTREAM | ID_LARGE; - *cptr++ = data_count >> 1; - *cptr++ = data_count >> 9; - *cptr++ = data_count >> 17; - ((WavpackHeader *) wps->blockbuff)->ckSize += data_count + 4; - } - else - return FALSE; - } - - ((WavpackHeader *) wps->blockbuff)->crc = crc; - - if (wpc->wvc_flag) { - data_count = bs_close_write (&wps->wvcbits); - - if (data_count && lossy) { + if (data_count) { if (data_count != (uint32_t) -1) { - unsigned char *cptr = wps->block2buff + ((WavpackHeader *) wps->block2buff)->ckSize + 8; + unsigned char *cptr = wps->blockbuff + ((WavpackHeader *) wps->blockbuff)->ckSize + 8; - *cptr++ = ID_WVC_BITSTREAM | ID_LARGE; + *cptr++ = ID_WV_BITSTREAM | ID_LARGE; *cptr++ = data_count >> 1; *cptr++ = data_count >> 9; *cptr++ = data_count >> 17; - ((WavpackHeader *) wps->block2buff)->ckSize += data_count + 4; + ((WavpackHeader *) wps->blockbuff)->ckSize += data_count + 4; } else return FALSE; } - ((WavpackHeader *) wps->block2buff)->crc = crc2; - } - else if (lossy) - wpc->lossy_blocks = TRUE; + ((WavpackHeader *) wps->blockbuff)->crc = crc; + + if (wpc->wvc_flag) { + data_count = bs_close_write (&wps->wvcbits); + + if (data_count && lossy) { + if (data_count != (uint32_t) -1) { + unsigned char *cptr = wps->block2buff + ((WavpackHeader *) wps->block2buff)->ckSize + 8; + + *cptr++ = ID_WVC_BITSTREAM | ID_LARGE; + *cptr++ = data_count >> 1; + *cptr++ = data_count >> 9; + *cptr++ = data_count >> 17; + ((WavpackHeader *) wps->block2buff)->ckSize += data_count + 4; + } + else + return FALSE; + } + + ((WavpackHeader *) wps->block2buff)->crc = crc2; + } + else if (lossy) + wpc->lossy_blocks = TRUE; + + // we're done with the entire block, so now we check if our threshold for a "repack" was hit + + if (repack_possible && wps->num_terms > REPACK_SAFE_NUM_TERMS && (max_magnitude & repack_mask)) { + *wps = saved_stream; + wps->num_terms = REPACK_SAFE_NUM_TERMS; + memcpy (wps->blockbuff, &wps->wphdr, sizeof (WavpackHeader)); + + if (saved_buffer) + memcpy (buffer, saved_buffer, sample_count * sizeof (int32_t) * (flags & MONO_DATA ? 1 : 2)); + + if (flags & HYBRID_FLAG) + crc = crc2 = 0xffffffff; + } + else { + // if we actually did repack the block with fewer terms, we detect that here + // and clean up so that we return to the original term count...otherwise we just + // free the saved_buffer (if allocated) and break out of the loop + if (wps->num_terms != saved_stream.num_terms) { + int ti; + + for (ti = wps->num_terms; ti < saved_stream.num_terms; ++ti) { + wps->decorr_passes [ti].weight_A = wps->decorr_passes [ti].weight_B = 0; + CLEAR (wps->decorr_passes [ti].samples_A); + CLEAR (wps->decorr_passes [ti].samples_B); + } + + wps->num_terms = saved_stream.num_terms; + } + + if (saved_buffer) + free (saved_buffer); + + break; + } + + } while (1); wps->sample_index += sample_count; return TRUE; } -// Perform a pass of the stereo decorrelation as specified by the referenced -// dpp structure. This version is optimized for samples that can use the -// simple apply_weight macro (i.e. <= 16-bit audio) and for when the weight -// delta is 2 (which is the case with all the default, non -x modes). For -// cases that do not fit this model, the more general decorr_stereo_pass() -// is provided. Note that this function returns the dpp->samples_X[] values -// in the "normalized" positions for terms 1-8. +#if !defined(OPT_ASM_X64) -static void decorr_stereo_pass_id2 (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) -{ - int32_t *bptr, *eptr = buffer + (sample_count * 2); - int m, k; +// This is the "C" version of the stereo decorrelation pass function. There +// are assembly optimized versions of this that can be used if available. +// It performs a single pass of stereo decorrelation, in place, as specified +// by the decorr_pass structure. Note that this function does NOT return the +// dpp->samples_X[] values in the "normalized" positions for terms 1-8, so if +// the number of samples is not a multiple of MAX_TERM, these must be moved if +// they are to be used somewhere else. - switch (dpp->term) { - case 17: - for (bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam, tmp; - - sam = 2 * dpp->samples_A [0] - dpp->samples_A [1]; - dpp->samples_A [1] = dpp->samples_A [0]; - bptr [0] = tmp = (dpp->samples_A [0] = bptr [0]) - apply_weight_i (dpp->weight_A, sam); - update_weight_d2 (dpp->weight_A, dpp->delta, sam, tmp); - - sam = 2 * dpp->samples_B [0] - dpp->samples_B [1]; - dpp->samples_B [1] = dpp->samples_B [0]; - bptr [1] = tmp = (dpp->samples_B [0] = bptr [1]) - apply_weight_i (dpp->weight_B, sam); - update_weight_d2 (dpp->weight_B, dpp->delta, sam, tmp); - } - - break; - - case 18: - for (bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam, tmp; - - sam = dpp->samples_A [0] + ((dpp->samples_A [0] - dpp->samples_A [1]) >> 1); - dpp->samples_A [1] = dpp->samples_A [0]; - bptr [0] = tmp = (dpp->samples_A [0] = bptr [0]) - apply_weight_i (dpp->weight_A, sam); - update_weight_d2 (dpp->weight_A, dpp->delta, sam, tmp); - - sam = dpp->samples_B [0] + ((dpp->samples_B [0] - dpp->samples_B [1]) >> 1); - dpp->samples_B [1] = dpp->samples_B [0]; - bptr [1] = tmp = (dpp->samples_B [0] = bptr [1]) - apply_weight_i (dpp->weight_B, sam); - update_weight_d2 (dpp->weight_B, dpp->delta, sam, tmp); - } - - break; - - default: - for (m = 0, k = dpp->term & (MAX_TERM - 1), bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam, tmp; - - sam = dpp->samples_A [m]; - bptr [0] = tmp = (dpp->samples_A [k] = bptr [0]) - apply_weight_i (dpp->weight_A, sam); - update_weight_d2 (dpp->weight_A, dpp->delta, sam, tmp); - - sam = dpp->samples_B [m]; - bptr [1] = tmp = (dpp->samples_B [k] = bptr [1]) - apply_weight_i (dpp->weight_B, sam); - update_weight_d2 (dpp->weight_B, dpp->delta, sam, tmp); - - m = (m + 1) & (MAX_TERM - 1); - k = (k + 1) & (MAX_TERM - 1); - } - - if (m) { - int32_t temp_A [MAX_TERM], temp_B [MAX_TERM]; - - memcpy (temp_A, dpp->samples_A, sizeof (dpp->samples_A)); - memcpy (temp_B, dpp->samples_B, sizeof (dpp->samples_B)); - - for (k = 0; k < MAX_TERM; k++) { - dpp->samples_A [k] = temp_A [m]; - dpp->samples_B [k] = temp_B [m]; - m = (m + 1) & (MAX_TERM - 1); - } - } - - break; - - case -1: - for (bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam_A, sam_B, tmp; - - sam_A = dpp->samples_A [0]; - bptr [0] = tmp = (sam_B = bptr [0]) - apply_weight_i (dpp->weight_A, sam_A); - update_weight_clip_d2 (dpp->weight_A, dpp->delta, sam_A, tmp); - - bptr [1] = tmp = (dpp->samples_A [0] = bptr [1]) - apply_weight_i (dpp->weight_B, sam_B); - update_weight_clip_d2 (dpp->weight_B, dpp->delta, sam_B, tmp); - } - - break; - - case -2: - for (bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam_A, sam_B, tmp; - - sam_B = dpp->samples_B [0]; - bptr [1] = tmp = (sam_A = bptr [1]) - apply_weight_i (dpp->weight_B, sam_B); - update_weight_clip_d2 (dpp->weight_B, dpp->delta, sam_B, tmp); - - bptr [0] = tmp = (dpp->samples_B [0] = bptr [0]) - apply_weight_i (dpp->weight_A, sam_A); - update_weight_clip_d2 (dpp->weight_A, dpp->delta, sam_A, tmp); - } - - break; - - case -3: - for (bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam_A, sam_B, tmp; - - sam_A = dpp->samples_A [0]; - sam_B = dpp->samples_B [0]; - - dpp->samples_A [0] = tmp = bptr [1]; - bptr [1] = tmp -= apply_weight_i (dpp->weight_B, sam_B); - update_weight_clip_d2 (dpp->weight_B, dpp->delta, sam_B, tmp); - - dpp->samples_B [0] = tmp = bptr [0]; - bptr [0] = tmp -= apply_weight_i (dpp->weight_A, sam_A); - update_weight_clip_d2 (dpp->weight_A, dpp->delta, sam_A, tmp); - } - - break; - } -} - -// Perform a pass of the stereo decorrelation as specified by the referenced -// dpp structure. This function is provided in both a regular C version and -// an MMX version (using intrinsics) written by Joachim Henke. The MMX version -// is significantly faster when the sample data requires the full-resolution -// apply_weight macro. However, when the data is lower resolution (<= 16-bit) -// then the difference is slight (or the MMX is even slower), so for these -// cases the simpler decorr_stereo_pass_id2() is used. Note that this function -// returns the dpp->samples_X[] values in the "normalized" positions for -// terms 1-8. - -#ifdef OPT_MMX - -static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) -{ - const __m64 - delta = _mm_set1_pi32 (dpp->delta), - fill = _mm_set1_pi32 (0x7bff), - mask = _mm_set1_pi32 (0x7fff), - round = _mm_set1_pi32 (512), - zero = _mm_set1_pi32 (0); - __m64 - weight_AB = _mm_set_pi32 (restore_weight (store_weight (dpp->weight_B)), restore_weight (store_weight (dpp->weight_A))), - left_right, sam_AB, tmp0, tmp1, samples_AB [MAX_TERM]; - int k, m = 0; - - for (k = 0; k < MAX_TERM; ++k) { - ((int32_t *) samples_AB) [k * 2] = exp2s (log2s (dpp->samples_A [k])); - ((int32_t *) samples_AB) [k * 2 + 1] = exp2s (log2s (dpp->samples_B [k])); - } - - if (dpp->term > 0) { - if (dpp->term == 17) { - while (sample_count--) { - left_right = *(__m64 *) buffer; - tmp0 = samples_AB [0]; - sam_AB = _m_paddd (tmp0, tmp0); - sam_AB = _m_psubd (sam_AB, samples_AB [1]); - samples_AB [0] = left_right; - samples_AB [1] = tmp0; - - tmp0 = _m_paddd (sam_AB, sam_AB); - tmp1 = _m_pand (sam_AB, mask); - tmp0 = _m_psrldi (tmp0, 16); - tmp1 = _m_pmaddwd (tmp1, weight_AB); - tmp0 = _m_pmaddwd (tmp0, weight_AB); - tmp1 = _m_paddd (tmp1, round); - tmp0 = _m_pslldi (tmp0, 5); - tmp1 = _m_psradi (tmp1, 10); - left_right = _m_psubd (left_right, tmp0); - left_right = _m_psubd (left_right, tmp1); - - *(__m64 *) buffer = left_right; - - tmp0 = _m_pxor (sam_AB, left_right); - tmp0 = _m_psradi (tmp0, 31); - tmp1 = _m_pxor (delta, tmp0); - tmp1 = _m_psubd (tmp1, tmp0); - sam_AB = _m_pcmpeqd (sam_AB, zero); - tmp0 = _m_pcmpeqd (left_right, zero); - tmp0 = _m_por (tmp0, sam_AB); - tmp0 = _m_pandn (tmp0, tmp1); - weight_AB = _m_paddd (weight_AB, tmp0); - - buffer += 2; - } - } - else if (dpp->term == 18) { - while (sample_count--) { - left_right = *(__m64 *) buffer; - tmp0 = samples_AB [0]; - sam_AB = _m_psubd (tmp0, samples_AB [1]); - sam_AB = _m_psradi (sam_AB, 1); - sam_AB = _m_paddd (sam_AB, tmp0); - samples_AB [0] = left_right; - samples_AB [1] = tmp0; - - tmp0 = _m_paddd (sam_AB, sam_AB); - tmp1 = _m_pand (sam_AB, mask); - tmp0 = _m_psrldi (tmp0, 16); - tmp1 = _m_pmaddwd (tmp1, weight_AB); - tmp0 = _m_pmaddwd (tmp0, weight_AB); - tmp1 = _m_paddd (tmp1, round); - tmp0 = _m_pslldi (tmp0, 5); - tmp1 = _m_psradi (tmp1, 10); - left_right = _m_psubd (left_right, tmp0); - left_right = _m_psubd (left_right, tmp1); - - *(__m64 *) buffer = left_right; - - tmp0 = _m_pxor (sam_AB, left_right); - tmp0 = _m_psradi (tmp0, 31); - tmp1 = _m_pxor (delta, tmp0); - tmp1 = _m_psubd (tmp1, tmp0); - sam_AB = _m_pcmpeqd (sam_AB, zero); - tmp0 = _m_pcmpeqd (left_right, zero); - tmp0 = _m_por (tmp0, sam_AB); - tmp0 = _m_pandn (tmp0, tmp1); - weight_AB = _m_paddd (weight_AB, tmp0); - - buffer += 2; - } - } - else { - k = dpp->term & (MAX_TERM - 1); - while (sample_count--) { - left_right = *(__m64 *) buffer; - sam_AB = samples_AB [m]; - samples_AB [k] = left_right; - - tmp0 = _m_paddd (sam_AB, sam_AB); - tmp1 = _m_pand (sam_AB, mask); - tmp0 = _m_psrldi (tmp0, 16); - tmp1 = _m_pmaddwd (tmp1, weight_AB); - tmp0 = _m_pmaddwd (tmp0, weight_AB); - tmp1 = _m_paddd (tmp1, round); - tmp0 = _m_pslldi (tmp0, 5); - tmp1 = _m_psradi (tmp1, 10); - left_right = _m_psubd (left_right, tmp0); - left_right = _m_psubd (left_right, tmp1); - - *(__m64 *) buffer = left_right; - - tmp0 = _m_pxor (sam_AB, left_right); - tmp0 = _m_psradi (tmp0, 31); - tmp1 = _m_pxor (delta, tmp0); - tmp1 = _m_psubd (tmp1, tmp0); - sam_AB = _m_pcmpeqd (sam_AB, zero); - tmp0 = _m_pcmpeqd (left_right, zero); - tmp0 = _m_por (tmp0, sam_AB); - tmp0 = _m_pandn (tmp0, tmp1); - weight_AB = _m_paddd (weight_AB, tmp0); - - buffer += 2; - k = (k + 1) & (MAX_TERM - 1); - m = (m + 1) & (MAX_TERM - 1); - } - } - } - else { - if (dpp->term == -1) { - while (sample_count--) { - left_right = *(__m64 *) buffer; - sam_AB = samples_AB [0]; - samples_AB [0] = _m_punpckhdq (left_right, sam_AB); - sam_AB = _m_punpckldq (sam_AB, left_right); - - tmp0 = _m_paddd (sam_AB, sam_AB); - tmp1 = _m_pand (sam_AB, mask); - tmp0 = _m_psrldi (tmp0, 16); - tmp1 = _m_pmaddwd (tmp1, weight_AB); - tmp0 = _m_pmaddwd (tmp0, weight_AB); - tmp1 = _m_paddd (tmp1, round); - tmp0 = _m_pslldi (tmp0, 5); - tmp1 = _m_psradi (tmp1, 10); - left_right = _m_psubd (left_right, tmp0); - left_right = _m_psubd (left_right, tmp1); - - *(__m64 *) buffer = left_right; - - tmp0 = _m_pcmpeqd (sam_AB, zero); - tmp1 = _m_pcmpeqd (left_right, zero); - tmp0 = _m_por (tmp0, tmp1); - tmp0 = _m_pandn (tmp0, delta); - sam_AB = _m_pxor (sam_AB, left_right); - sam_AB = _m_psradi (sam_AB, 31); - tmp1 = _m_psubd (fill, sam_AB); - weight_AB = _m_pxor (weight_AB, sam_AB); - weight_AB = _m_paddd (weight_AB, tmp1); - weight_AB = _m_paddsw (weight_AB, tmp0); - weight_AB = _m_psubd (weight_AB, tmp1); - weight_AB = _m_pxor (weight_AB, sam_AB); - - buffer += 2; - } - } - else if (dpp->term == -2) { - while (sample_count--) { - left_right = *(__m64 *) buffer; - sam_AB = samples_AB [0]; - samples_AB [0] = _m_punpckldq (sam_AB, left_right); - sam_AB = _m_punpckhdq (left_right, sam_AB); - - tmp0 = _m_paddd (sam_AB, sam_AB); - tmp1 = _m_pand (sam_AB, mask); - tmp0 = _m_psrldi (tmp0, 16); - tmp1 = _m_pmaddwd (tmp1, weight_AB); - tmp0 = _m_pmaddwd (tmp0, weight_AB); - tmp1 = _m_paddd (tmp1, round); - tmp0 = _m_pslldi (tmp0, 5); - tmp1 = _m_psradi (tmp1, 10); - left_right = _m_psubd (left_right, tmp0); - left_right = _m_psubd (left_right, tmp1); - - *(__m64 *) buffer = left_right; - - tmp0 = _m_pcmpeqd (sam_AB, zero); - tmp1 = _m_pcmpeqd (left_right, zero); - tmp0 = _m_por (tmp0, tmp1); - tmp0 = _m_pandn (tmp0, delta); - sam_AB = _m_pxor (sam_AB, left_right); - sam_AB = _m_psradi (sam_AB, 31); - tmp1 = _m_psubd (fill, sam_AB); - weight_AB = _m_pxor (weight_AB, sam_AB); - weight_AB = _m_paddd (weight_AB, tmp1); - weight_AB = _m_paddsw (weight_AB, tmp0); - weight_AB = _m_psubd (weight_AB, tmp1); - weight_AB = _m_pxor (weight_AB, sam_AB); - - buffer += 2; - } - } - else if (dpp->term == -3) { - while (sample_count--) { - left_right = *(__m64 *) buffer; - sam_AB = samples_AB [0]; - tmp0 = _m_punpckhdq (left_right, left_right); - samples_AB [0] = _m_punpckldq (tmp0, left_right); - - tmp0 = _m_paddd (sam_AB, sam_AB); - tmp1 = _m_pand (sam_AB, mask); - tmp0 = _m_psrldi (tmp0, 16); - tmp1 = _m_pmaddwd (tmp1, weight_AB); - tmp0 = _m_pmaddwd (tmp0, weight_AB); - tmp1 = _m_paddd (tmp1, round); - tmp0 = _m_pslldi (tmp0, 5); - tmp1 = _m_psradi (tmp1, 10); - left_right = _m_psubd (left_right, tmp0); - left_right = _m_psubd (left_right, tmp1); - - *(__m64 *) buffer = left_right; - - tmp0 = _m_pcmpeqd (sam_AB, zero); - tmp1 = _m_pcmpeqd (left_right, zero); - tmp0 = _m_por (tmp0, tmp1); - tmp0 = _m_pandn (tmp0, delta); - sam_AB = _m_pxor (sam_AB, left_right); - sam_AB = _m_psradi (sam_AB, 31); - tmp1 = _m_psubd (fill, sam_AB); - weight_AB = _m_pxor (weight_AB, sam_AB); - weight_AB = _m_paddd (weight_AB, tmp1); - weight_AB = _m_paddsw (weight_AB, tmp0); - weight_AB = _m_psubd (weight_AB, tmp1); - weight_AB = _m_pxor (weight_AB, sam_AB); - - buffer += 2; - } - } - } - - dpp->weight_A = ((int32_t *) &weight_AB) [0]; - dpp->weight_B = ((int32_t *) &weight_AB) [1]; - - for (k = 0; k < MAX_TERM; ++k) { - dpp->samples_A [k] = ((int32_t *) samples_AB) [m * 2]; - dpp->samples_B [k] = ((int32_t *) samples_AB) [m * 2 + 1]; - m = (m + 1) & (MAX_TERM - 1); - } - - _mm_empty (); -} - -#else - -static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) +void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) { int32_t *bptr, *eptr = buffer + (sample_count * 2); int m, k; @@ -2879,19 +1537,6 @@ static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_ k = (k + 1) & (MAX_TERM - 1); } - if (m) { - int32_t temp_A [MAX_TERM], temp_B [MAX_TERM]; - - memcpy (temp_A, dpp->samples_A, sizeof (dpp->samples_A)); - memcpy (temp_B, dpp->samples_B, sizeof (dpp->samples_B)); - - for (k = 0; k < MAX_TERM; k++) { - dpp->samples_A [k] = temp_A [m]; - dpp->samples_B [k] = temp_B [m]; - m = (m + 1) & (MAX_TERM - 1); - } - } - break; case -1: @@ -2942,6 +1587,75 @@ static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_ } } +// This is the "C" version of the magnitude scanning function. There are +// assembly optimized versions of this that can be used if available. This +// function scans a buffer of signed 32-bit ints and returns the magnitude +// of the largest sample, with a power-of-two resolution. It might be more +// useful to return the actual maximum absolute value (and this function +// could do that without breaking anything), but that implementation would +// likely be slower. Instead, this simply returns the "or" of all the +// values "xor"d with their own sign. + +uint32_t scan_max_magnitude (int32_t *values, int32_t num_values) +{ + uint32_t magnitude = 0; + + while (num_values--) + magnitude |= (*values < 0) ? ~*values++ : *values++; + + return magnitude; +} + +#endif + +#if !defined(OPT_ASM_X86) && !defined(OPT_ASM_X64) + +// This is the "C" version of the mono decorrelation pass function. There +// are assembly optimized versions of this that are be used if available. +// It decorrelates a buffer of mono samples, in place, as specified by the array +// of decorr_pass structures. Note that this function does NOT return the +// dpp->samples_X[] values in the "normalized" positions for terms 1-8, so if +// the number of samples is not a multiple of MAX_TERM, these must be moved if +// they are to be used somewhere else. The magnitude of the output samples is +// accumulated and returned (see scan_max_magnitude() for more details). + +uint32_t decorr_mono_buffer (int32_t *buffer, struct decorr_pass *decorr_passes, int32_t num_terms, int32_t sample_count) +{ + uint32_t max_magnitude = 0; + struct decorr_pass *dpp; + int tcount, i; + + for (i = 0; i < sample_count; ++i) { + int32_t code = *buffer; + + for (tcount = num_terms, dpp = decorr_passes; tcount--; dpp++) { + int32_t sam; + + if (dpp->term > MAX_TERM) { + if (dpp->term & 1) + sam = 2 * dpp->samples_A [0] - dpp->samples_A [1]; + else + sam = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1; + + dpp->samples_A [1] = dpp->samples_A [0]; + dpp->samples_A [0] = code; + } + else { + sam = dpp->samples_A [i & (MAX_TERM - 1)]; + dpp->samples_A [(i + dpp->term) & (MAX_TERM - 1)] = code; + } + + code -= apply_weight (dpp->weight_A, sam); + update_weight (dpp->weight_A, dpp->delta, sam, code); + } + + *buffer++ = code; + max_magnitude |= (code < 0) ? ~code : code; + } + + return max_magnitude; +} + #endif ////////////////////////////////////////////////////////////////////////////// @@ -2961,44 +1675,52 @@ double WavpackGetEncodedNoise (WavpackContext *wpc, double *peak) return wps->dc.noise_sum; } -// Given an array of integer data (in shorts), find the linear function that most closely -// represents it (based on minimum sum of absolute errors). This is returned as the double -// precision initial & final Y values of the best-fit line. The function can also optionally -// compute and return a maximum error value (as a short). Note that the ends of the resulting -// line may fall way outside the range of input values, so some sort of clipping may be -// needed. +// Open the specified BitStream using the specified buffer pointers. It is +// assumed that enough buffer space has been allocated for all data that will +// be written, otherwise an error will be generated. -void best_floating_line (short *values, int num_values, double *initial_y, double *final_y, short *max_error) +static void bs_write (Bitstream *bs); + +static void bs_open_write (Bitstream *bs, void *buffer_start, void *buffer_end) { - double left_sum = 0.0, right_sum = 0.0, center_x = (num_values - 1) / 2.0, center_y, m; - int i; - - for (i = 0; i < num_values >> 1; ++i) { - right_sum += values [num_values - i - 1]; - left_sum += values [i]; - } - - if (num_values & 1) { - right_sum += values [num_values >> 1] * 0.5; - left_sum += values [num_values >> 1] * 0.5; - } - - center_y = (right_sum + left_sum) / num_values; - m = (right_sum - left_sum) / ((double) num_values * num_values) * 4.0; - - if (initial_y) - *initial_y = center_y - m * center_x; - - if (final_y) - *final_y = center_y + m * center_x; - - if (max_error) { - double max = 0.0; - - for (i = 0; i < num_values; ++i) - if (fabs (values [i] - (center_y + (i - center_x) * m)) > max) - max = fabs (values [i] - (center_y + (i - center_x) * m)); - - *max_error = (short) floor (max + 0.5); - } + bs->error = bs->sr = bs->bc = 0; + bs->ptr = bs->buf = buffer_start; + bs->end = buffer_end; + bs->wrap = bs_write; +} + +// This function is only called from the putbit() and putbits() macros when +// the buffer is full, which is now flagged as an error. + +static void bs_write (Bitstream *bs) +{ + bs->ptr = bs->buf; + bs->error = 1; +} + +// This function forces a flushing write of the specified BitStream, and +// returns the total number of bytes written into the buffer. + +static uint32_t bs_close_write (Bitstream *bs) +{ + uint32_t bytes_written; + + if (bs->error) + return (uint32_t) -1; + + while (1) { + while (bs->bc) + putbit_1 (bs); + + bytes_written = (uint32_t)(bs->ptr - bs->buf) * sizeof (*(bs->ptr)); + + if (bytes_written & 1) { + putbit_1 (bs); + } + else + break; + }; + + CLEAR (*bs); + return bytes_written; } diff --git a/third_party/wavpack/src/pack_dns.c b/third_party/wavpack/src/pack_dns.c new file mode 100644 index 0000000..2c6f3c8 --- /dev/null +++ b/third_party/wavpack/src/pack_dns.c @@ -0,0 +1,191 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// pack_dns.c + +// This module handles the implementation of "dynamic noise shaping" which is +// designed to move the spectrum of the quantization noise introduced by lossy +// compression up or down in frequency so that it is more likely to be masked +// by the source material. + +#include +#include +#include + +#include "wavpack_local.h" + +static void best_floating_line (short *values, int num_values, double *initial_y, double *final_y, short *max_error); + +void dynamic_noise_shaping (WavpackContext *wpc, int32_t *buffer, int shortening_allowed) +{ + WavpackStream *wps = wpc->streams [wpc->current_stream]; + int32_t sample_count = wps->wphdr.block_samples; + struct decorr_pass *ap = &wps->analysis_pass; + uint32_t flags = wps->wphdr.flags; + int32_t *bptr, temp, sam; + short *swptr; + int sc; + + if (!wps->num_terms && sample_count > 8) { + if (flags & MONO_DATA) + for (bptr = buffer + sample_count - 3, sc = sample_count - 2; sc--;) { + sam = (3 * bptr [1] - bptr [2]) >> 1; + temp = *bptr-- - apply_weight (ap->weight_A, sam); + update_weight (ap->weight_A, 2, sam, temp); + } + else + for (bptr = buffer + (sample_count - 3) * 2 + 1, sc = sample_count - 2; sc--;) { + sam = (3 * bptr [2] - bptr [4]) >> 1; + temp = *bptr-- - apply_weight (ap->weight_B, sam); + update_weight (ap->weight_B, 2, sam, temp); + sam = (3 * bptr [2] - bptr [4]) >> 1; + temp = *bptr-- - apply_weight (ap->weight_A, sam); + update_weight (ap->weight_A, 2, sam, temp); + } + } + + if (sample_count > wps->dc.shaping_samples) { + sc = sample_count - wps->dc.shaping_samples; + swptr = wps->dc.shaping_data + wps->dc.shaping_samples; + bptr = buffer + wps->dc.shaping_samples * ((flags & MONO_DATA) ? 1 : 2); + + if (flags & MONO_DATA) + while (sc--) { + sam = (3 * ap->samples_A [0] - ap->samples_A [1]) >> 1; + temp = *bptr - apply_weight (ap->weight_A, sam); + update_weight (ap->weight_A, 2, sam, temp); + ap->samples_A [1] = ap->samples_A [0]; + ap->samples_A [0] = *bptr++; + *swptr++ = (ap->weight_A < 256) ? 1024 : 1536 - ap->weight_A * 2; + } + else + while (sc--) { + sam = (3 * ap->samples_A [0] - ap->samples_A [1]) >> 1; + temp = *bptr - apply_weight (ap->weight_A, sam); + update_weight (ap->weight_A, 2, sam, temp); + ap->samples_A [1] = ap->samples_A [0]; + ap->samples_A [0] = *bptr++; + + sam = (3 * ap->samples_B [0] - ap->samples_B [1]) >> 1; + temp = *bptr - apply_weight (ap->weight_B, sam); + update_weight (ap->weight_B, 2, sam, temp); + ap->samples_B [1] = ap->samples_B [0]; + ap->samples_B [0] = *bptr++; + + *swptr++ = (ap->weight_A + ap->weight_B < 512) ? 1024 : 1536 - ap->weight_A - ap->weight_B; + } + + wps->dc.shaping_samples = sample_count; + } + + if (wpc->wvc_flag) { + int max_allowed_error = 1000000 / wpc->ave_block_samples; + short max_error, trial_max_error; + double initial_y, final_y; + + if (max_allowed_error < 128) + max_allowed_error = 128; + + best_floating_line (wps->dc.shaping_data, sample_count, &initial_y, &final_y, &max_error); + + if (shortening_allowed && max_error > max_allowed_error) { + int min_samples = 0, max_samples = sample_count, trial_count; + double trial_initial_y, trial_final_y; + + while (1) { + trial_count = (min_samples + max_samples) / 2; + + best_floating_line (wps->dc.shaping_data, trial_count, &trial_initial_y, + &trial_final_y, &trial_max_error); + + if (trial_max_error < max_allowed_error) { + max_error = trial_max_error; + min_samples = trial_count; + initial_y = trial_initial_y; + final_y = trial_final_y; + } + else + max_samples = trial_count; + + if (min_samples > 10000 || max_samples - min_samples < 2) + break; + } + + sample_count = min_samples; + } + + if (initial_y < -512) initial_y = -512; + else if (initial_y > 1024) initial_y = 1024; + + if (final_y < -512) final_y = -512; + else if (final_y > 1024) final_y = 1024; +#if 0 + error_line ("%.2f sec, sample count = %5d, max error = %3d, range = %5d, %5d, actual = %5d, %5d", + (double) wps->sample_index / wpc->config.sample_rate, sample_count, max_error, + (int) floor (initial_y), (int) floor (final_y), + wps->dc.shaping_data [0], wps->dc.shaping_data [sample_count-1]); +#endif + if (sample_count != wps->wphdr.block_samples) + wps->wphdr.block_samples = sample_count; + + if (wpc->wvc_flag) { + wps->dc.shaping_acc [0] = wps->dc.shaping_acc [1] = (int32_t) floor (initial_y * 65536.0 + 0.5); + + wps->dc.shaping_delta [0] = wps->dc.shaping_delta [1] = + (int32_t) floor ((final_y - initial_y) / (sample_count - 1) * 65536.0 + 0.5); + + wps->dc.shaping_array = NULL; + } + else + wps->dc.shaping_array = wps->dc.shaping_data; + } + else + wps->dc.shaping_array = wps->dc.shaping_data; +} + +// Given an array of integer data (in shorts), find the linear function that most closely +// represents it (based on minimum sum of absolute errors). This is returned as the double +// precision initial & final Y values of the best-fit line. The function can also optionally +// compute and return a maximum error value (as a short). Note that the ends of the resulting +// line may fall way outside the range of input values, so some sort of clipping may be +// needed. + +static void best_floating_line (short *values, int num_values, double *initial_y, double *final_y, short *max_error) +{ + double left_sum = 0.0, right_sum = 0.0, center_x = (num_values - 1) / 2.0, center_y, m; + int i; + + for (i = 0; i < num_values >> 1; ++i) { + right_sum += values [num_values - i - 1]; + left_sum += values [i]; + } + + if (num_values & 1) { + right_sum += values [num_values >> 1] * 0.5; + left_sum += values [num_values >> 1] * 0.5; + } + + center_y = (right_sum + left_sum) / num_values; + m = (right_sum - left_sum) / ((double) num_values * num_values) * 4.0; + + if (initial_y) + *initial_y = center_y - m * center_x; + + if (final_y) + *final_y = center_y + m * center_x; + + if (max_error) { + double max = 0.0; + + for (i = 0; i < num_values; ++i) + if (fabs (values [i] - (center_y + (i - center_x) * m)) > max) + max = fabs (values [i] - (center_y + (i - center_x) * m)); + + *max_error = (short) floor (max + 0.5); + } +} diff --git a/third_party/wavpack/src/pack_dsd.c b/third_party/wavpack/src/pack_dsd.c new file mode 100644 index 0000000..531f454 --- /dev/null +++ b/third_party/wavpack/src/pack_dsd.c @@ -0,0 +1,669 @@ +//////////////////////////////////////////////////////////////////////////// +// **** DSDPACK **** // +// Lossless DSD (Direct Stream Digital) Audio Compressor // +// Copyright (c) 2013 - 2016 David Bryant. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// pack_dsd.c + +// This module actually handles the compression of the DSD audio data. + +#ifdef ENABLE_DSD + +#include +#include +#include + +#include "wavpack_local.h" + +///////////////////////////// executable code //////////////////////////////// + +// This function initializes everything required to pack WavPack DSD bitstreams +// and must be called BEFORE any other function in this module. + +void pack_dsd_init (WavpackContext *wpc) +{ + WavpackStream *wps = wpc->streams [wpc->current_stream]; + + wps->sample_index = 0; +} + +// Pack an entire block of samples (either mono or stereo) into a completed +// WavPack block. This function is actually a shell for pack_samples() and +// performs tasks like handling any shift required by the format, preprocessing +// of floating point data or integer data over 24 bits wide, and implementing +// the "extra" mode (via the extra?.c modules). It is assumed that there is +// sufficient space for the completed block at "wps->blockbuff" and that +// "wps->blockend" points to the end of the available space. A return value of +// FALSE indicates an error. + +// Pack an entire block of samples (either mono or stereo) into a completed +// WavPack block. It is assumed that there is sufficient space for the +// completed block at "wps->blockbuff" and that "wps->blockend" points to the +// end of the available space. A return value of FALSE indicates an error. +// Any unsent metadata is transmitted first, then required metadata for this +// block is sent, and finally the compressed integer data is sent. If a "wpx" +// stream is required for floating point data or large integer data, then this +// must be handled outside this function. To find out how much data was written +// the caller must look at the ckSize field of the written WavpackHeader, NOT +// the one in the WavpackStream. + +static int encode_buffer_high (WavpackStream *wps, int32_t *buffer, int num_samples, unsigned char *destination); +static int encode_buffer_fast (WavpackStream *wps, int32_t *buffer, int num_samples, unsigned char *destination); + +int pack_dsd_block (WavpackContext *wpc, int32_t *buffer) +{ + WavpackStream *wps = wpc->streams [wpc->current_stream]; + uint32_t flags = wps->wphdr.flags, mult = wpc->dsd_multiplier, data_count; + uint32_t sample_count = wps->wphdr.block_samples; + unsigned char *dsd_encoding, dsd_power = 0; + int32_t res; + + // This code scans stereo data to check whether it can be stored as mono data + // (i.e., all L/R samples identical). + + if (!(flags & MONO_FLAG)) { + int32_t *sptr, *dptr, i; + + for (sptr = buffer, i = 0; i < (int32_t) sample_count; sptr += 2, i++) + if ((sptr [0] ^ sptr [1]) & 0xff) + break; + + if (i == sample_count) { + wps->wphdr.flags = flags |= FALSE_STEREO; + dptr = buffer; + sptr = buffer; + + for (i = sample_count; i--; sptr++) + *dptr++ = *sptr++; + } + else + wps->wphdr.flags = flags &= ~FALSE_STEREO; + } + + wps->wphdr.ckSize = sizeof (WavpackHeader) - 8; + memcpy (wps->blockbuff, &wps->wphdr, sizeof (WavpackHeader)); + + if (wpc->metacount) { + WavpackMetadata *wpmdp = wpc->metadata; + + while (wpc->metacount) { + copy_metadata (wpmdp, wps->blockbuff, wps->blockend); + wpc->metabytes -= wpmdp->byte_length; + free_metadata (wpmdp++); + wpc->metacount--; + } + + free (wpc->metadata); + wpc->metadata = NULL; + } + + if (!sample_count) + return TRUE; + + send_general_metadata (wpc); + memcpy (&wps->wphdr, wps->blockbuff, sizeof (WavpackHeader)); + + dsd_encoding = wps->blockbuff + ((WavpackHeader *) wps->blockbuff)->ckSize + 12; + + while (mult >>= 1) + dsd_power++; + + *dsd_encoding++ = dsd_power; + + if (wpc->config.flags & CONFIG_HIGH_FLAG) { + int fast_res = encode_buffer_fast (wps, buffer, sample_count, dsd_encoding); + + res = encode_buffer_high (wps, buffer, sample_count, dsd_encoding); + + if ((fast_res != -1) && (res == -1 || res > fast_res)) + res = encode_buffer_fast (wps, buffer, sample_count, dsd_encoding); + } + else + res = encode_buffer_fast (wps, buffer, sample_count, dsd_encoding); + + if (res == -1) { + int num_samples = sample_count * ((flags & MONO_DATA) ? 1 : 2); + uint32_t crc = 0xffffffff; + + *dsd_encoding++ = 0; + + data_count = num_samples + 2; + + while (num_samples--) + crc += (crc << 1) + (*dsd_encoding++ = *buffer++); + + ((WavpackHeader *) wps->blockbuff)->crc = crc; + } + else + data_count = res + 1; + + if (data_count) { + unsigned char *cptr = wps->blockbuff + ((WavpackHeader *) wps->blockbuff)->ckSize + 8; + + if (data_count & 1) { + cptr [data_count + 4] = 0; + *cptr++ = ID_DSD_BLOCK | ID_LARGE | ID_ODD_SIZE; + data_count++; + } + else + *cptr++ = ID_DSD_BLOCK | ID_LARGE; + + *cptr++ = data_count >> 1; + *cptr++ = data_count >> 9; + *cptr++ = data_count >> 17; + ((WavpackHeader *) wps->blockbuff)->ckSize += data_count + 4; + } + + wps->sample_index += sample_count; + return TRUE; +} + +/*------------------------------------------------------------------------------------------------------------------------*/ + +// #define DSD_BYTE_READY(low,high) (((low) >> 24) == ((high) >> 24)) +// #define DSD_BYTE_READY(low,high) (!(((low) ^ (high)) >> 24)) +#define DSD_BYTE_READY(low,high) (!(((low) ^ (high)) & 0xff000000)) + +#define MAX_HISTORY_BITS 5 +#define MAX_PROBABILITY 0xa0 // set to 0xff to disable RLE encoding for probabilities table + +#if (MAX_PROBABILITY < 0xff) + +static int rle_encode (unsigned char *src, int bcount, unsigned char *destination) +{ + int max_rle_zeros = 0xff - MAX_PROBABILITY; + unsigned char *dp = destination; + int zcount = 0; + + while (bcount--) { + if (*src) { + while (zcount) { + *dp++ = MAX_PROBABILITY + (zcount > max_rle_zeros ? max_rle_zeros : zcount); + zcount -= (zcount > max_rle_zeros ? max_rle_zeros : zcount); + } + + *dp++ = *src++; + } + else { + zcount++; + src++; + } + } + + while (zcount) { + *dp++ = MAX_PROBABILITY + (zcount > max_rle_zeros ? max_rle_zeros : zcount); + zcount -= (zcount > max_rle_zeros ? max_rle_zeros : zcount); + } + + *dp++ = 0; + + return (int)(dp - destination); +} + +#endif + +static void calculate_probabilities (int hist [256], unsigned char probs [256], unsigned short prob_sums [256]) +{ + int divisor, min_value, max_value, sum_values; + int min_hits = 0x7fffffff, max_hits = 0, i; + + for (i = 0; i < 256; ++i) { + if (hist [i] < min_hits) min_hits = hist [i]; + if (hist [i] > max_hits) max_hits = hist [i]; + } + + if (max_hits == 0) { + memset (probs, 0, sizeof (*probs) * 256); + memset (prob_sums, 0, sizeof (*prob_sums) * 256); + return; + } + +// fprintf (stderr, "process_histogram(): hits = %d to %d\n", min_hits, max_hits); + + if (max_hits > MAX_PROBABILITY) + divisor = ((max_hits << 8) + (MAX_PROBABILITY >> 1)) / MAX_PROBABILITY; + else + divisor = 0; + + while (1) { + min_value = 0x7fffffff; max_value = 0; sum_values = 0; + + for (i = 0; i < 256; ++i) { + int value; + + if (hist [i]) { + if (divisor) { + if (!(value = ((hist [i] << 8) + (divisor >> 1)) / divisor)) + value = 1; + } + else + value = hist [i]; + + if (value < min_value) min_value = value; + if (value > max_value) max_value = value; + } + else + value = 0; + + prob_sums [i] = sum_values += value; + probs [i] = value; + } + + if (max_value > MAX_PROBABILITY) { + divisor++; + continue; + } + +#if 0 // this code reduces probability values when they are completely redundant (i.e., common divisor), but + // this doesn't really happen often enough to make it worthwhile + + if (min_value > 1) { + for (i = 0; i < 256; ++i) + if (probs [i] % min_value) + break; + + if (i == 256) { + for (i = 0; i < 256; ++i) { + prob_sums [i] /= min_value; + probs [i] /= min_value; + } + + // fprintf (stderr, "fixed min_value = %d, divisor = %d, probs_sum = %d\n", min_value, divisor, prob_sums [255]); + } + } +#endif + + break; + } +} + +static int encode_buffer_fast (WavpackStream *wps, int32_t *buffer, int num_samples, unsigned char *destination) +{ + uint32_t flags = wps->wphdr.flags, crc = 0xffffffff; + unsigned int low = 0, high = 0xffffffff, mult; + unsigned short (*summed_probabilities) [256]; + unsigned char (*probabilities) [256]; + unsigned char *dp = destination, *ep; + int history_bins, bc, p0 = 0, p1 = 0; + int total_summed_probabilities = 0; + int (*histogram) [256]; + int32_t *bp = buffer; + char history_bits; + + if (!(flags & MONO_DATA)) + num_samples *= 2; + + if (num_samples < 280) + return -1; + else if (num_samples < 560) + history_bits = 0; + else if (num_samples < 1725) + history_bits = 1; + else if (num_samples < 5000) + history_bits = 2; + else if (num_samples < 14000) + history_bits = 3; + else if (num_samples < 28000) + history_bits = 4; + else if (num_samples < 76000) + history_bits = 5; + else if (num_samples < 130000) + history_bits = 6; + else if (num_samples < 300000) + history_bits = 7; + else + history_bits = 8; + + if (history_bits > MAX_HISTORY_BITS) + history_bits = MAX_HISTORY_BITS; + + history_bins = 1 << history_bits; + histogram = malloc (sizeof (*histogram) * history_bins); + memset (histogram, 0, sizeof (*histogram) * history_bins); + probabilities = malloc (sizeof (*probabilities) * history_bins); + summed_probabilities = malloc (sizeof (*summed_probabilities) * history_bins); + + bc = num_samples; + + if (flags & MONO_DATA) + while (bc--) { + crc += (crc << 1) + (*bp & 0xff); + histogram [p0] [*bp & 0xff]++; + p0 = *bp++ & (history_bins-1); + } + else + while (bc--) { + crc += (crc << 1) + (*bp & 0xff); + histogram [p0] [*bp & 0xff]++; + p0 = p1; + p1 = *bp++ & (history_bins-1); + } + + for (p0 = 0; p0 < history_bins; p0++) { + calculate_probabilities (histogram [p0], probabilities [p0], summed_probabilities [p0]); + total_summed_probabilities += summed_probabilities [p0] [255]; + } + + ((WavpackHeader *) wps->blockbuff)->crc = crc; + + // This code detects the case where the required value lookup tables grow silly big and cuts them back down. This would + // normally only happen with large blocks or poorly compressible data. The target is to guarantee that the total memory + // required for all three decode tables will be 2K bytes per history bin. + + while (total_summed_probabilities > history_bins * 1280) { + int max_sum = 0, sum_values = 0, largest_bin = 0; + + for (p0 = 0; p0 < history_bins; ++p0) + if (summed_probabilities [p0] [255] > max_sum) { + max_sum = summed_probabilities [p0] [255]; + largest_bin = p0; + } + + total_summed_probabilities -= max_sum; + p0 = largest_bin; + + for (p1 = 0; p1 < 256; ++p1) + summed_probabilities [p0] [p1] = sum_values += probabilities [p0] [p1] = (probabilities [p0] [p1] + 1) >> 1; + + total_summed_probabilities += summed_probabilities [p0] [255]; + // fprintf (stderr, "processed bin 0x%02x, bin: %d --> %d, new sum = %d\n", + // p0, max_sum, summed_probabilities [p0] [255], total_summed_probabilities); + } + + free (histogram); + bp = buffer; + bc = num_samples; + *dp++ = 1; + *dp++ = history_bits; + *dp++ = MAX_PROBABILITY; + ep = destination + num_samples - 10; + +#if (MAX_PROBABILITY < 0xff) + dp += rle_encode ((unsigned char *) probabilities, sizeof (*probabilities) * history_bins, dp); +#else + memcpy (dp, probabilities, sizeof (*probabilities) * history_bins); + dp += sizeof (*probabilities) * history_bins; +#endif + + p0 = p1 = 0; + + while (dp < ep && bc--) { + + mult = (high - low) / summed_probabilities [p0] [255]; + + if (!mult) { + high = low; + + while (DSD_BYTE_READY (high, low)) { + *dp++ = high >> 24; + high = (high << 8) | 0xff; + low <<= 8; + } + + mult = (high - low) / summed_probabilities [p0] [255]; + } + + if (*bp & 0xff) + low += summed_probabilities [p0] [(*bp & 0xff)-1] * mult; + + high = low + probabilities [p0] [*bp & 0xff] * mult - 1; + + while (DSD_BYTE_READY (high, low)) { + *dp++ = high >> 24; + high = (high << 8) | 0xff; + low <<= 8; + } + + if (flags & MONO_DATA) + p0 = *bp++ & (history_bins-1); + else { + p0 = p1; + p1 = *bp++ & (history_bins-1); + } + } + + high = low; + + while (DSD_BYTE_READY (high, low)) { + *dp++ = high >> 24; + high = (high << 8) | 0xff; + low <<= 8; + } + + free (summed_probabilities); + free (probabilities); + + if (dp < ep) + return (int)(dp - destination); + else + return -1; +} + +/*------------------------------------------------------------------------------------------------------------------------*/ + +#define PTABLE_BITS 8 +#define PTABLE_BINS (1<> 8; c--;) + value += (DOWN - value) >> DECAY; + + for (i = 0; i < PTABLE_BINS/2; ++i) { + table [i] = value; + table [PTABLE_BINS-1-i] = 0x100ffff - value; + + if (value > 0x010000) { + rate += (rate * rate_s + 128) >> 8; + + for (c = (rate + 64) >> 7; c--;) + value += (DOWN - value) >> DECAY; + } + } +} + +static int normalize_ptable (int *ptable) +{ + int rate = 0, min_error, error_sum, i; + int ntable [PTABLE_BINS]; + + init_ptable (ntable, rate, RATE_S); + + for (min_error = i = 0; i < PTABLE_BINS; ++i) + min_error += abs (ptable [i] - ntable [i]) >> 8; + + while (1) { + init_ptable (ntable, ++rate, RATE_S); + + for (error_sum = i = 0; i < PTABLE_BINS; ++i) + error_sum += abs (ptable [i] - ntable [i]) >> 8; + + if (error_sum < min_error) + min_error = error_sum; + else + break; + } + + return rate - 1; +} + +static int encode_buffer_high (WavpackStream *wps, int32_t *buffer, int num_samples, unsigned char *destination) +{ + int channel, stereo = (wps->wphdr.flags & MONO_DATA) ? 0 : 1; + uint32_t crc = 0xffffffff, high = 0xffffffff, low = 0; + unsigned char *dp = destination, *ep; + DSDfilters *sp; + + if (num_samples * (stereo + 1) < 280) + return -1; + + *dp++ = 3; + ep = destination + num_samples * (stereo + 1) - 10; + + if (!wps->sample_index) { + if (!wps->dsd.ptable) + wps->dsd.ptable = malloc (PTABLE_BINS * sizeof (*wps->dsd.ptable)); + + init_ptable (wps->dsd.ptable, INITIAL_TERM, RATE_S); + + for (channel = 0; channel < 2; ++channel) { + sp = wps->dsd.filters + channel; + + sp->filter1 = sp->filter2 = sp->filter3 = sp->filter4 = sp->filter5 = VALUE_ONE / 2; + sp->filter6 = sp->factor = 0; + } + + *dp++ = INITIAL_TERM; + *dp++ = RATE_S; + } + else { + int rate = normalize_ptable (wps->dsd.ptable); + init_ptable (wps->dsd.ptable, rate, RATE_S); + *dp++ = rate; + *dp++ = RATE_S; + } + + for (channel = 0; channel <= stereo; ++channel) { + sp = wps->dsd.filters + channel; + + *dp = sp->filter1 >> (PRECISION - 8); + sp->filter1 = *dp++ << (PRECISION - 8); + + *dp = sp->filter2 >> (PRECISION - 8); + sp->filter2 = *dp++ << (PRECISION - 8); + + *dp = sp->filter3 >> (PRECISION - 8); + sp->filter3 = *dp++ << (PRECISION - 8); + + *dp = sp->filter4 >> (PRECISION - 8); + sp->filter4 = *dp++ << (PRECISION - 8); + + *dp = sp->filter5 >> (PRECISION - 8); + sp->filter5 = *dp++ << (PRECISION - 8); + + *dp++ = sp->factor; + *dp++ = sp->factor >> 8; + sp->filter6 = 0; + sp->factor = (sp->factor << 16) >> 16; + } + + sp = wps->dsd.filters; + + while (dp < ep && num_samples--) { + int bitcount = 8; + + crc += (crc << 1) + (sp->byte = *buffer++ & 0xff); + sp [0].value = sp [0].filter1 - sp [0].filter5 + ((sp [0].filter6 * sp [0].factor) >> 2); + + if (stereo) { + crc += (crc << 1) + (sp [1].byte = *buffer++ & 0xff); + sp [1].value = sp [1].filter1 - sp [1].filter5 + ((sp [1].filter6 * sp [1].factor) >> 2); + } + + while (bitcount--) { + int32_t *pp = wps->dsd.ptable + ((sp [0].value >> (PRECISION - PRECISION_USE)) & PTABLE_MASK); + + if (sp [0].byte & 0x80) { + high = low + ((high - low) >> 8) * (*pp >> 16); + *pp += (UP - *pp) >> DECAY; + sp [0].filter0 = -1; + } + else { + low += 1 + ((high - low) >> 8) * (*pp >> 16); + *pp += (DOWN - *pp) >> DECAY; + sp [0].filter0 = 0; + } + + while (DSD_BYTE_READY (high, low)) { + *dp++ = high >> 24; + high = (high << 8) | 0xff; + low <<= 8; + } + + sp [0].value += sp [0].filter6 << 3; + sp [0].factor += (((sp [0].value ^ sp [0].filter0) >> 31) | 1) & ((sp [0].value ^ (sp [0].value - (sp [0].filter6 << 4))) >> 31); + sp [0].filter1 += ((sp [0].filter0 & VALUE_ONE) - sp [0].filter1) >> 6; + sp [0].filter2 += ((sp [0].filter0 & VALUE_ONE) - sp [0].filter2) >> 4; + sp [0].filter3 += (sp [0].filter2 - sp [0].filter3) >> 4; + sp [0].filter4 += (sp [0].filter3 - sp [0].filter4) >> 4; + sp [0].value = (sp [0].filter4 - sp [0].filter5) >> 4; + sp [0].filter5 += sp [0].value; + sp [0].filter6 += (sp [0].value - sp [0].filter6) >> 3; + sp [0].value = sp [0].filter1 - sp [0].filter5 + ((sp [0].filter6 * sp [0].factor) >> 2); + sp [0].byte <<= 1; + + if (!stereo) + continue; + + pp = wps->dsd.ptable + ((sp [1].value >> (PRECISION - PRECISION_USE)) & PTABLE_MASK); + + if (sp [1].byte & 0x80) { + high = low + ((high - low) >> 8) * (*pp >> 16); + *pp += (UP - *pp) >> DECAY; + sp [1].filter0 = -1; + } + else { + low += 1 + ((high - low) >> 8) * (*pp >> 16); + *pp += (DOWN - *pp) >> DECAY; + sp [1].filter0 = 0; + } + + while (DSD_BYTE_READY (high, low)) { + *dp++ = high >> 24; + high = (high << 8) | 0xff; + low <<= 8; + } + + sp [1].value += sp [1].filter6 << 3; + sp [1].factor += (((sp [1].value ^ sp [1].filter0) >> 31) | 1) & ((sp [1].value ^ (sp [1].value - (sp [1].filter6 << 4))) >> 31); + sp [1].filter1 += ((sp [1].filter0 & VALUE_ONE) - sp [1].filter1) >> 6; + sp [1].filter2 += ((sp [1].filter0 & VALUE_ONE) - sp [1].filter2) >> 4; + sp [1].filter3 += (sp [1].filter2 - sp [1].filter3) >> 4; + sp [1].filter4 += (sp [1].filter3 - sp [1].filter4) >> 4; + sp [1].value = (sp [1].filter4 - sp [1].filter5) >> 4; + sp [1].filter5 += sp [1].value; + sp [1].filter6 += (sp [1].value - sp [1].filter6) >> 3; + sp [1].value = sp [1].filter1 - sp [1].filter5 + ((sp [1].filter6 * sp [1].factor) >> 2); + sp [1].byte <<= 1; + } + + sp [0].factor -= (sp->factor + 512) >> 10; + + if (stereo) + sp [1].factor -= (sp [1].factor + 512) >> 10; + } + + ((WavpackHeader *) wps->blockbuff)->crc = crc; + high = low; + + while (DSD_BYTE_READY (high, low)) { + *dp++ = high >> 24; + high = (high << 8) | 0xff; + low <<= 8; + } + + if (dp < ep) + return (int)(dp - destination); + else + return -1; +} + +#endif // ENABLE_DSD diff --git a/third_party/wavpack/src/pack_floats.c b/third_party/wavpack/src/pack_floats.c new file mode 100644 index 0000000..90ab656 --- /dev/null +++ b/third_party/wavpack/src/pack_floats.c @@ -0,0 +1,270 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// pack_floats.c + +// This module deals with the compression of floating-point data. Note that no +// floating point math is involved here...the values are only processed with +// the macros that directly access the mantissa, exponent, and sign fields. +// That's why we use the f32 type instead of the built-in float type. + +#include + +#include "wavpack_local.h" + +//#define DISPLAY_DIAGNOSTICS + +// Scan the provided buffer of floating-point values and (1) convert the +// significant portion of the data to integers for compression using the +// regular WavPack algorithms (which only operate on integers) and (2) +// determine whether the data requires a second stream for lossless +// storage (which will usually be the case except when the floating-point +// data was originally integer data). The converted integers are returned +// "in-place" and a return value of TRUE indicates that a second stream +// is required. + +int scan_float_data (WavpackStream *wps, f32 *values, int32_t num_values) +{ + int32_t shifted_ones = 0, shifted_zeros = 0, shifted_both = 0; + int32_t false_zeros = 0, neg_zeros = 0; +#ifdef DISPLAY_DIAGNOSTICS + int32_t true_zeros = 0, denormals = 0, exceptions = 0; +#endif + uint32_t ordata = 0, crc = 0xffffffff; + int32_t count, value, shift_count; + int max_mag = 0, max_exp = 0; + f32 *dp; + + wps->float_shift = wps->float_flags = 0; + + // First loop goes through all the data and (1) calculates the CRC and (2) finds the + // max magnitude that does not have an exponent of 255 (reserved for +/-inf and NaN). + for (dp = values, count = num_values; count--; dp++) { + crc = crc * 27 + get_mantissa (*dp) * 9 + get_exponent (*dp) * 3 + get_sign (*dp); + + if (get_exponent (*dp) < 255 && get_magnitude (*dp) > max_mag) + max_mag = get_magnitude (*dp); + } + + wps->crc_x = crc; + + // round up the magnitude so that when we convert the floating-point values to integers, + // they will be (at most) just over 24-bits signed precision + if (get_exponent (max_mag)) + max_exp = get_exponent (max_mag + 0x7F0000); + + for (dp = values, count = num_values; count--; dp++) { + // Exponent of 255 is reserved for +/-inf (mantissa = 0) or NaN (mantissa != 0). + // we use a value one greater than 24-bits unsigned for this. + if (get_exponent (*dp) == 255) { +#ifdef DISPLAY_DIAGNOSTICS + exceptions++; +#endif + wps->float_flags |= FLOAT_EXCEPTIONS; + value = 0x1000000; + shift_count = 0; + } + // This is the regular case. We generate a 24-bit unsigned value with the implied + // '1' MSB set and calculate a shift that will make it line up with the biggest + // samples in this block (although that shift would obviously shift out real data). + else if (get_exponent (*dp)) { + shift_count = max_exp - get_exponent (*dp); + value = 0x800000 + get_mantissa (*dp); + } + // Zero exponent means either +/- zero (mantissa = 0) or denormals (mantissa != 0). + // shift_count is set so that denormals (without an implied '1') will line up with + // regular values (with their implied '1' added at bit 23). Trust me. We don't care + // about the shift with zero. + else { + shift_count = max_exp ? max_exp - 1 : 0; + value = get_mantissa (*dp); + +#ifdef DISPLAY_DIAGNOSTICS + if (get_mantissa (*dp)) + denormals++; +#endif + } + + if (shift_count < 25) + value >>= shift_count; // perform the shift if there could be anything left + else + value = 0; // else just zero the value + + // If we are going to encode an integer zero, then this might be a "false zero" which + // means that there are significant bits but they're completely shifted out, or a + // "negative zero" which is simply a floating point value that we have to encode + // (and converting it to a positive zero would be an error). + if (!value) { + if (get_exponent (*dp) || get_mantissa (*dp)) + ++false_zeros; + else if (get_sign (*dp)) + ++neg_zeros; +#ifdef DISPLAY_DIAGNOSTICS + else + ++true_zeros; +#endif + } + // If we are going to shift something (but not everything) out of our integer before + // encoding, then we generate a mask corresponding to the bits that will be shifted + // out and increment the counter for the 3 possible cases of (1) all zeros, (2) all + // ones, and (3) a mix of ones and zeros. + else if (shift_count) { + int32_t mask = (1 << shift_count) - 1; + + if (!(get_mantissa (*dp) & mask)) + shifted_zeros++; + else if ((get_mantissa (*dp) & mask) == mask) + shifted_ones++; + else + shifted_both++; + } + + // "or" all the integer values together, and store the final integer with applied sign + + ordata |= value; + * (int32_t *) dp = (get_sign (*dp)) ? -value : value; + } + + wps->float_max_exp = max_exp; // on decode, we use this to calculate actual exponent + + // Now, based on our various counts, we determine the scheme required to encode the bits + // shifted out. Usually these will simply have to be sent literally, but in some rare cases + // we can get away with always assuming ones shifted out, or assuming all the bits shifted + // out in each value are the same (which means we only have to send a single bit). + if (shifted_both) + wps->float_flags |= FLOAT_SHIFT_SENT; + else if (shifted_ones && !shifted_zeros) + wps->float_flags |= FLOAT_SHIFT_ONES; + else if (shifted_ones && shifted_zeros) + wps->float_flags |= FLOAT_SHIFT_SAME; + // Another case is that we only shift out zeros (or maybe nothing), and in that case we + // check to see if our data actually has less than 24 or 25 bits of resolution, which means + // that we reduce can the magnitude of the integers we are encoding (which saves all those + // bits). The number of bits of reduced resolution is stored in float_shift. + else if (ordata && !(ordata & 1)) { + while (!(ordata & 1)) { + wps->float_shift++; + ordata >>= 1; + } + + // here we shift out all those zeros in the integer data we will encode + for (dp = values, count = num_values; count--; dp++) + * (int32_t *) dp >>= wps->float_shift; + } + + // Here we calculate the actual magnitude used by our integer data, although this is just + // used for informational purposes during encode/decode to possibly use faster math. + + wps->wphdr.flags &= ~MAG_MASK; + + while (ordata) { + wps->wphdr.flags += 1 << MAG_LSB; + ordata >>= 1; + } + + // Finally, we have to set some flags that guide how we encode various types of "zeros". + // If none of these are set (which is the most common situation), then every integer + // zero in the decoded data will simply become a floating-point zero. + + if (false_zeros || neg_zeros) + wps->float_flags |= FLOAT_ZEROS_SENT; + + if (neg_zeros) + wps->float_flags |= FLOAT_NEG_ZEROS; + +#ifdef DISPLAY_DIAGNOSTICS + { + int32_t *ip, min = 0x7fffffff, max = 0x80000000; + for (ip = (int32_t *) values, count = num_values; count--; ip++) { + if (*ip < min) min = *ip; + if (*ip > max) max = *ip; + } + + fprintf (stderr, "integer range = %d to %d\n", min, max); + } + + fprintf (stderr, "samples = %d, max exp = %d, pre-shift = %d, denormals = %d, exceptions = %d, max_mag = %x\n", + num_values, max_exp, wps->float_shift, denormals, exceptions, max_mag); + fprintf (stderr, "shifted ones/zeros/both = %d/%d/%d, true/neg/false zeros = %d/%d/%d\n", + shifted_ones, shifted_zeros, shifted_both, true_zeros, neg_zeros, false_zeros); +#endif + + return wps->float_flags & (FLOAT_EXCEPTIONS | FLOAT_ZEROS_SENT | FLOAT_SHIFT_SENT | FLOAT_SHIFT_SAME); +} + +// Given a buffer of float data, convert the data to integers (which is what the WavPack compression +// algorithms require) and write the other data required for lossless compression (which includes +// significant bits shifted out of the integers, plus information about +/- zeros and exceptions +// like NaN and +/- infinities) into the wvxbits stream (which is assumed to be opened). Note that +// for this work correctly, scan_float_data() must have been called on the original data to set +// the appropiate flags in float_flags and max_exp. + +void send_float_data (WavpackStream *wps, f32 *values, int32_t num_values) +{ + int max_exp = wps->float_max_exp; + int32_t count, value, shift_count; + f32 *dp; + + for (dp = values, count = num_values; count--; dp++) { + if (get_exponent (*dp) == 255) { + if (get_mantissa (*dp)) { + putbit_1 (&wps->wvxbits); + putbits (get_mantissa (*dp), 23, &wps->wvxbits); + } + else { + putbit_0 (&wps->wvxbits); + } + + value = 0x1000000; + shift_count = 0; + } + else if (get_exponent (*dp)) { + shift_count = max_exp - get_exponent (*dp); + value = 0x800000 + get_mantissa (*dp); + } + else { + shift_count = max_exp ? max_exp - 1 : 0; + value = get_mantissa (*dp); + } + + if (shift_count < 25) + value >>= shift_count; + else + value = 0; + + if (!value) { + if (wps->float_flags & FLOAT_ZEROS_SENT) { + if (get_exponent (*dp) || get_mantissa (*dp)) { + putbit_1 (&wps->wvxbits); + putbits (get_mantissa (*dp), 23, &wps->wvxbits); + + if (max_exp >= 25) { + putbits (get_exponent (*dp), 8, &wps->wvxbits); + } + + putbit (get_sign (*dp), &wps->wvxbits); + } + else { + putbit_0 (&wps->wvxbits); + + if (wps->float_flags & FLOAT_NEG_ZEROS) + putbit (get_sign (*dp), &wps->wvxbits); + } + } + } + else if (shift_count) { + if (wps->float_flags & FLOAT_SHIFT_SENT) { + int32_t data = get_mantissa (*dp) & ((1 << shift_count) - 1); + putbits (data, shift_count, &wps->wvxbits); + } + else if (wps->float_flags & FLOAT_SHIFT_SAME) { + putbit (get_mantissa (*dp) & 1, &wps->wvxbits); + } + } + } +} diff --git a/third_party/wavpack/src/pack_utils.c b/third_party/wavpack/src/pack_utils.c new file mode 100644 index 0000000..1918c18 --- /dev/null +++ b/third_party/wavpack/src/pack_utils.c @@ -0,0 +1,1418 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// pack_utils.c + +// This module provides the high-level API for creating WavPack files from +// audio data. It manages the buffers used to deinterleave the data passed +// in from the application into the individual streams and it handles the +// generation of riff headers and the "fixup" on the first WavPack block +// header for the case where the number of samples was unknown (or wrong). +// The actual audio stream compression is handled in the pack.c module. + +#include +#include +#include + +#include "wavpack_local.h" + +///////////////////////////// executable code //////////////////////////////// + +// Open context for writing WavPack files. The returned context pointer is used +// in all following calls to the library. The "blockout" function will be used +// to store the actual completed WavPack blocks and will be called with the id +// pointers containing user defined data (one for the wv file and one for the +// wvc file). A return value of NULL indicates that memory could not be +// allocated for the context. + +WavpackContext *WavpackOpenFileOutput (WavpackBlockOutput blockout, void *wv_id, void *wvc_id) +{ + WavpackContext *wpc = malloc (sizeof (WavpackContext)); + + if (!wpc) + return NULL; + + CLEAR (*wpc); + wpc->total_samples = -1; + wpc->stream_version = CUR_STREAM_VERS; + wpc->blockout = blockout; + wpc->wv_out = wv_id; + wpc->wvc_out = wvc_id; + return wpc; +} + +static int add_to_metadata (WavpackContext *wpc, void *data, uint32_t bcount, unsigned char id); + +// New for version 5.0, this function allows the application to store a file extension and a +// file_format identification. The extension would be used by the unpacker if the user had not +// specified the target filename, and specifically handles the case where the original file +// had the "wrong" extension for the file format (e.g., a Wave64 file having a "wav" extension) +// or an alternative (e.g., "bwf") or where the file format is not known. Specifying a file +// format besides the default WP_FORMAT_WAV will ensure that old decoders will not be able to +// see the non-wav wrapper provided with WavpackAddWrapper() (which they would end up putting +// on a file with a .wav extension). + +void WavpackSetFileInformation (WavpackContext *wpc, char *file_extension, unsigned char file_format) +{ + if (file_extension && strlen (file_extension) < sizeof (wpc->file_extension)) { + add_to_metadata (wpc, file_extension, (uint32_t) strlen (file_extension), ID_ALT_EXTENSION); + strcpy (wpc->file_extension, file_extension); + } + + wpc->file_format = file_format; +} + +// Set configuration for writing WavPack files. This must be done before +// sending any actual samples, however it is okay to send wrapper or other +// metadata before calling this. The "config" structure contains the following +// required information: + +// config->bytes_per_sample see WavpackGetBytesPerSample() for info +// config->bits_per_sample see WavpackGetBitsPerSample() for info +// config->channel_mask Microsoft standard (mono = 4, stereo = 3) +// config->num_channels self evident +// config->sample_rate self evident + +// In addition, the following fields and flags may be set: + +// config->flags: +// -------------- +// o CONFIG_HYBRID_FLAG select hybrid mode (must set bitrate) +// o CONFIG_JOINT_STEREO select joint stereo (must set override also) +// o CONFIG_JOINT_OVERRIDE override default joint stereo selection +// o CONFIG_HYBRID_SHAPE select hybrid noise shaping (set override & +// shaping_weight != 0.0) +// o CONFIG_SHAPE_OVERRIDE override default hybrid noise shaping +// (set CONFIG_HYBRID_SHAPE and shaping_weight) +// o CONFIG_FAST_FLAG "fast" compression mode +// o CONFIG_HIGH_FLAG "high" compression mode +// o CONFIG_BITRATE_KBPS hybrid bitrate is kbps, not bits / sample +// o CONFIG_CREATE_WVC create correction file +// o CONFIG_OPTIMIZE_WVC maximize bybrid compression (-cc option) +// o CONFIG_CALC_NOISE calc noise in hybrid mode +// o CONFIG_EXTRA_MODE extra processing mode (slow!) +// o CONFIG_SKIP_WVX no wvx stream for floats & large ints +// o CONFIG_MD5_CHECKSUM specify if you plan to store MD5 signature +// o CONFIG_CREATE_EXE specify if you plan to prepend sfx module +// o CONFIG_OPTIMIZE_MONO detect and optimize for mono files posing as +// stereo (uses a more recent stream format that +// is not compatible with decoders < 4.3) + +// config->bitrate hybrid bitrate in either bits/sample or kbps +// config->shaping_weight hybrid noise shaping coefficient override +// config->block_samples force samples per WavPack block (0 = use deflt) +// config->float_norm_exp select floating-point data (127 for +/-1.0) +// config->xmode extra mode processing value override + +// If the number of samples to be written is known then it should be passed +// here. If the duration is not known then pass -1. In the case that the size +// is not known (or the writing is terminated early) then it is suggested that +// the application retrieve the first block written and let the library update +// the total samples indication. A function is provided to do this update and +// it should be done to the "correction" file also. If this cannot be done +// (because a pipe is being used, for instance) then a valid WavPack will still +// be created, but when applications want to access that file they will have +// to seek all the way to the end to determine the actual duration. Also, if +// a RIFF header has been included then it should be updated as well or the +// WavPack file will not be directly unpackable to a valid wav file (although +// it will still be usable by itself). A return of FALSE indicates an error. +// +// The enhanced version of this function now allows setting the identities of +// any channels that are NOT standard Microsoft channels and are therefore not +// represented in the channel mask. WavPack files require that all the Microsoft +// channels come first (and in Microsoft order) and these are followed by any +// other channels (which can be in any order). +// +// The identities are provided in a NULL-terminated string (0x00 is not an allowed +// channel ID). The Microsoft channels may be provided as well (and will be checked) +// but it is really only neccessary to provide the "unknown" channels. Any truly +// unknown channels are indicated with a 0xFF. +// +// The channel IDs so far reserved are listed here: +// +// 0: not allowed / terminator +// 1 - 18: Microsoft standard channels +// 30, 31: Stereo mix from RF64 (not really recommended, but RF64 specifies this) +// 33 - 44: Core Audio channels (see Core Audio specification) +// 127 - 128: Amio LeftHeight, Amio RightHeight +// 138 - 142: Amio BottomFrontLeft/Center/Right, Amio ProximityLeft/Right +// 200 - 207: Core Audio channels (see Core Audio specification) +// 221 - 224: Core Audio channels 301 - 305 (offset by 80) +// 255: Present but unknown or unused channel +// +// All other channel IDs are reserved. Ask if something you need is missing. + +// Table of channels that will automatically "pair" into a single stereo stream + +static const struct { unsigned char a, b; } stereo_pairs [] = { + { 1, 2 }, // FL, FR + { 5, 6 }, // BL, BR + { 7, 8 }, // FLC, FRC + { 10, 11 }, // SL, SR + { 13, 15 }, // TFL, TFR + { 16, 18 }, // TBL, TBR + { 30, 31 }, // stereo mix L,R (RF64) + { 33, 34 }, // Rls, Rrs + { 35, 36 }, // Lw, Rw + { 38, 39 }, // Lt, Rt + { 127, 128 }, // Lh, Rh + { 138, 140 }, // Bfl, Bfr + { 141, 142 }, // Pl, Pr + { 200, 201 }, // Amb_W, Amb_X + { 202, 203 }, // Amb_Y, Amb_Z + { 204, 205 }, // MS_Mid, MS_Side + { 206, 207 }, // XY_X, XY_Y + { 221, 222 }, // Hph_L, Hph_R +}; + +#define NUM_STEREO_PAIRS (sizeof (stereo_pairs) / sizeof (stereo_pairs [0])) + +// Legacy version of this function for compatibility with existing applications. Note that this version +// also generates older streams to be compatible with all decoders back to 4.0, but of course cannot be +// used with > 2^32 samples or non-Microsoft channels. The older stream version only differs in that it +// does not support the "mono optimization" feature where stereo blocks containing identical audio data +// in both channels are encoded in mono for better efficiency. + +int WavpackSetConfiguration (WavpackContext *wpc, WavpackConfig *config, uint32_t total_samples) +{ + config->flags |= CONFIG_COMPATIBLE_WRITE; // write earlier version streams + + if (total_samples == (uint32_t) -1) + return WavpackSetConfiguration64 (wpc, config, -1, NULL); + else + return WavpackSetConfiguration64 (wpc, config, total_samples, NULL); +} + +int WavpackSetConfiguration64 (WavpackContext *wpc, WavpackConfig *config, int64_t total_samples, const unsigned char *chan_ids) +{ + uint32_t flags, bps = 0; + uint32_t chan_mask = config->channel_mask; + int num_chans = config->num_channels; + int i; + + wpc->stream_version = (config->flags & CONFIG_COMPATIBLE_WRITE) ? CUR_STREAM_VERS : MAX_STREAM_VERS; + + if ((config->qmode & QMODE_DSD_AUDIO) && config->bytes_per_sample == 1 && config->bits_per_sample == 8) { +#ifdef ENABLE_DSD + wpc->dsd_multiplier = 1; + flags = DSD_FLAG; + + for (i = 14; i >= 0; --i) + if (config->sample_rate % sample_rates [i] == 0) { + int divisor = config->sample_rate / sample_rates [i]; + + if (divisor && (divisor & (divisor - 1)) == 0) { + config->sample_rate /= divisor; + wpc->dsd_multiplier = divisor; + break; + } + } + + // most options that don't apply to DSD we can simply ignore for now, but NOT hybrid mode! + if (config->flags & CONFIG_HYBRID_FLAG) { + strcpy (wpc->error_message, "hybrid mode not available for DSD!"); + return FALSE; + } + + // with DSD, very few PCM options work (or make sense), so only allow those that do + config->flags &= (CONFIG_HIGH_FLAG | CONFIG_MD5_CHECKSUM | CONFIG_PAIR_UNDEF_CHANS); + config->float_norm_exp = config->xmode = 0; +#else + strcpy (wpc->error_message, "libwavpack not configured for DSD!"); + return FALSE; +#endif + } + else + flags = config->bytes_per_sample - 1; + + wpc->total_samples = total_samples; + wpc->config.sample_rate = config->sample_rate; + wpc->config.num_channels = config->num_channels; + wpc->config.channel_mask = config->channel_mask; + wpc->config.bits_per_sample = config->bits_per_sample; + wpc->config.bytes_per_sample = config->bytes_per_sample; + wpc->config.block_samples = config->block_samples; + wpc->config.flags = config->flags; + wpc->config.qmode = config->qmode; + + if (config->flags & CONFIG_VERY_HIGH_FLAG) + wpc->config.flags |= CONFIG_HIGH_FLAG; + + for (i = 0; i < 15; ++i) + if (wpc->config.sample_rate == sample_rates [i]) + break; + + flags |= i << SRATE_LSB; + + // all of this stuff only applies to PCM + + if (!(flags & DSD_FLAG)) { + if (config->float_norm_exp) { + wpc->config.float_norm_exp = config->float_norm_exp; + wpc->config.flags |= CONFIG_FLOAT_DATA; + flags |= FLOAT_DATA; + } + else + flags |= ((config->bytes_per_sample * 8) - config->bits_per_sample) << SHIFT_LSB; + + if (config->flags & CONFIG_HYBRID_FLAG) { + flags |= HYBRID_FLAG | HYBRID_BITRATE | HYBRID_BALANCE; + + if (!(wpc->config.flags & CONFIG_SHAPE_OVERRIDE)) { + wpc->config.flags |= CONFIG_HYBRID_SHAPE | CONFIG_AUTO_SHAPING; + flags |= HYBRID_SHAPE | NEW_SHAPING; + } + else if (wpc->config.flags & CONFIG_HYBRID_SHAPE) { + wpc->config.shaping_weight = config->shaping_weight; + flags |= HYBRID_SHAPE | NEW_SHAPING; + } + + if (wpc->config.flags & (CONFIG_CROSS_DECORR | CONFIG_OPTIMIZE_WVC)) + flags |= CROSS_DECORR; + + if (config->flags & CONFIG_BITRATE_KBPS) { + bps = (uint32_t) floor (config->bitrate * 256000.0 / config->sample_rate / config->num_channels + 0.5); + + if (bps > (64 << 8)) + bps = 64 << 8; + } + else + bps = (uint32_t) floor (config->bitrate * 256.0 + 0.5); + } + else + flags |= CROSS_DECORR; + + if (!(config->flags & CONFIG_JOINT_OVERRIDE) || (config->flags & CONFIG_JOINT_STEREO)) + flags |= JOINT_STEREO; + + if (config->flags & CONFIG_CREATE_WVC) + wpc->wvc_flag = TRUE; + } + + // if a channel-identities string was specified, process that here, otherwise all channels + // not present in the channel mask are considered "unassigned" + + if (chan_ids) { + int lastchan = 0, mask_copy = chan_mask; + + if ((int) strlen ((char *) chan_ids) > num_chans) { // can't be more than num channels! + strcpy (wpc->error_message, "chan_ids longer than num channels!"); + return FALSE; + } + + // skip past channels that are specified in the channel mask (no reason to store those) + + while (*chan_ids) + if (*chan_ids <= 32 && *chan_ids > lastchan && (mask_copy & (1 << (*chan_ids-1)))) { + mask_copy &= ~(1 << (*chan_ids-1)); + lastchan = *chan_ids++; + } + else + break; + + // now scan the string for an actually defined channel (and don't store if there aren't any) + + for (i = 0; chan_ids [i]; i++) + if (chan_ids [i] != 0xff) { + wpc->channel_identities = (unsigned char *) strdup ((char *) chan_ids); + break; + } + } + + // This loop goes through all the channels and creates the Wavpack "streams" for them to go in. + // A stream can hold either one or two channels, so we have several rules to determine how many + // channels will go in each stream. + + for (wpc->current_stream = 0; num_chans; wpc->current_stream++) { + WavpackStream *wps = malloc (sizeof (WavpackStream)); + unsigned char left_chan_id = 0, right_chan_id = 0; + int pos, chans = 1; + + // allocate the stream and initialize the pointer to it + wpc->streams = realloc (wpc->streams, (wpc->current_stream + 1) * sizeof (wpc->streams [0])); + wpc->streams [wpc->current_stream] = wps; + CLEAR (*wps); + + // if there are any bits [still] set in the channel_mask, get the next one or two IDs from there + if (chan_mask) + for (pos = 0; pos < 32; ++pos) + if (chan_mask & (1 << pos)) { + if (left_chan_id) { + right_chan_id = pos + 1; + break; + } + else { + chan_mask &= ~(1 << pos); + left_chan_id = pos + 1; + } + } + + // next check for any channels identified in the channel-identities string + while (!right_chan_id && chan_ids && *chan_ids) + if (left_chan_id) + right_chan_id = *chan_ids; + else + left_chan_id = *chan_ids++; + + // assume anything we did not get is "unassigned" + if (!left_chan_id) + left_chan_id = right_chan_id = 0xff; + else if (!right_chan_id) + right_chan_id = 0xff; + + // if we have 2 channels, this is where we decide if we can combine them into one stream: + // 1. they are "unassigned" and we've been told to combine unassigned pairs, or + // 2. they appear together in the valid "pairings" list + if (num_chans >= 2) { + if ((config->flags & CONFIG_PAIR_UNDEF_CHANS) && left_chan_id == 0xff && right_chan_id == 0xff) + chans = 2; + else + for (i = 0; i < NUM_STEREO_PAIRS; ++i) + if ((left_chan_id == stereo_pairs [i].a && right_chan_id == stereo_pairs [i].b) || + (left_chan_id == stereo_pairs [i].b && right_chan_id == stereo_pairs [i].a)) { + if (right_chan_id <= 32 && (chan_mask & (1 << (right_chan_id-1)))) + chan_mask &= ~(1 << (right_chan_id-1)); + else if (chan_ids && *chan_ids == right_chan_id) + chan_ids++; + + chans = 2; + break; + } + } + + num_chans -= chans; + + if (num_chans && wpc->current_stream == NEW_MAX_STREAMS - 1) + break; + + memcpy (wps->wphdr.ckID, "wvpk", 4); + wps->wphdr.ckSize = sizeof (WavpackHeader) - 8; + SET_TOTAL_SAMPLES (wps->wphdr, wpc->total_samples); + wps->wphdr.version = wpc->stream_version; + wps->wphdr.flags = flags; + wps->bits = bps; + + if (!wpc->current_stream) + wps->wphdr.flags |= INITIAL_BLOCK; + + if (!num_chans) + wps->wphdr.flags |= FINAL_BLOCK; + + if (chans == 1) { + wps->wphdr.flags &= ~(JOINT_STEREO | CROSS_DECORR | HYBRID_BALANCE); + wps->wphdr.flags |= MONO_FLAG; + } + } + + wpc->num_streams = wpc->current_stream; + wpc->current_stream = 0; + + if (num_chans) { + strcpy (wpc->error_message, "too many channels!"); + return FALSE; + } + + if (config->flags & CONFIG_EXTRA_MODE) + wpc->config.xmode = config->xmode ? config->xmode : 1; + + return TRUE; +} + +// This function allows setting the Core Audio File channel layout, many of which do not +// conform to the Microsoft ordering standard that Wavpack requires internally (at least for +// those channels present in the "channel mask"). In addition to the layout tag, this function +// allows a reordering string to be stored in the file to allow the unpacker to reorder the +// channels back to the specified layout (if it is aware of this feature and wants to restore +// the CAF order). The number of channels in the layout is specified in the lower nybble of +// the layout word, and if a reorder string is specified it must be that long. Note that all +// the reordering is actually done outside of this library, and that if reordering is done +// then the appropriate qmode bit must be set to ensure that any MD5 sum is stored with a new +// ID so that old decoders don't try to verify it (and to let the decoder know that a reorder +// might be required). +// +// Note: This function should only be used to encode Core Audio files in such a way that a +// verbatim archive can be created. Applications can just include the chan_ids parameter in +// the call to WavpackSetConfiguration64() if there are non-Microsoft channels to specify, +// or do nothing special if only Microsoft channels are present (the vast majority of cases). + +int WavpackSetChannelLayout (WavpackContext *wpc, uint32_t layout_tag, const unsigned char *reorder) +{ + int nchans = layout_tag & 0xff; + + if ((layout_tag & 0xff00ff00) || nchans > wpc->config.num_channels) + return FALSE; + + wpc->channel_layout = layout_tag; + + if (wpc->channel_reordering) { + free (wpc->channel_reordering); + wpc->channel_reordering = NULL; + } + + if (nchans && reorder) { + int min_index = 256, i; + + for (i = 0; i < nchans; ++i) + if (reorder [i] < min_index) + min_index = reorder [i]; + + wpc->channel_reordering = malloc (nchans); + + if (wpc->channel_reordering) + for (i = 0; i < nchans; ++i) + wpc->channel_reordering [i] = reorder [i] - min_index; + } + + return TRUE; +} + +// Prepare to actually pack samples by determining the size of the WavPack +// blocks and allocating sample buffers and initializing each stream. Call +// after WavpackSetConfiguration() and before WavpackPackSamples(). A return +// of FALSE indicates an error. + +static int write_metadata_block (WavpackContext *wpc); + +int WavpackPackInit (WavpackContext *wpc) +{ + if (wpc->metabytes > 16384) // 16384 bytes still leaves plenty of room for audio + write_metadata_block (wpc); // in this block (otherwise write a special one) + + // The default block size is a compromise. Longer blocks provide better encoding efficiency, + // but longer blocks adversely affect memory requirements and seeking performance. For WavPack + // version 5.0, the default block sizes have been reduced by half from the previous version, + // but the difference in encoding efficiency will generally be less than 0.1 percent. + + if (wpc->dsd_multiplier) { + wpc->block_samples = (wpc->config.sample_rate % 7) ? 48000 : 44100; + + if (wpc->config.flags & CONFIG_HIGH_FLAG) + wpc->block_samples /= 2; + + if (wpc->config.num_channels == 1) + wpc->block_samples *= 2; + + while (wpc->block_samples > 12000 && wpc->block_samples * wpc->config.num_channels > 300000) + wpc->block_samples /= 2; + } + else { + int divisor = (wpc->config.flags & CONFIG_HIGH_FLAG) ? 2 : 4; + + while (wpc->config.sample_rate % divisor) + divisor--; + + wpc->block_samples = wpc->config.sample_rate / divisor; + + while (wpc->block_samples > 12000 && wpc->block_samples * wpc->config.num_channels > 75000) + wpc->block_samples /= 2; + + while (wpc->block_samples * wpc->config.num_channels < 20000) + wpc->block_samples *= 2; + } + + if (wpc->config.block_samples) { + if ((wpc->config.flags & CONFIG_MERGE_BLOCKS) && + wpc->block_samples > (uint32_t) wpc->config.block_samples) { + wpc->block_boundary = wpc->config.block_samples; + wpc->block_samples /= wpc->config.block_samples; + wpc->block_samples *= wpc->config.block_samples; + } + else + wpc->block_samples = wpc->config.block_samples; + } + + wpc->ave_block_samples = wpc->block_samples; + wpc->max_samples = wpc->block_samples + (wpc->block_samples >> 1); + + for (wpc->current_stream = 0; wpc->current_stream < wpc->num_streams; wpc->current_stream++) { + WavpackStream *wps = wpc->streams [wpc->current_stream]; + + wps->sample_buffer = malloc (wpc->max_samples * (wps->wphdr.flags & MONO_FLAG ? 4 : 8)); + +#ifdef ENABLE_DSD + if (wps->wphdr.flags & DSD_FLAG) + pack_dsd_init (wpc); + else +#endif + pack_init (wpc); + } + + return TRUE; +} + +// Pack the specified samples. Samples must be stored in longs in the native +// endian format of the executing processor. The number of samples specified +// indicates composite samples (sometimes called "frames"). So, the actual +// number of data points would be this "sample_count" times the number of +// channels. Note that samples are accumulated here until enough exist to +// create a complete WavPack block (or several blocks for multichannel audio). +// If an application wants to break a block at a specific sample, then it must +// simply call WavpackFlushSamples() to force an early termination. Completed +// WavPack blocks are send to the function provided in the initial call to +// WavpackOpenFileOutput(). A return of FALSE indicates an error. + +static int pack_streams (WavpackContext *wpc, uint32_t block_samples); +static int create_riff_header (WavpackContext *wpc, int64_t total_samples, void *outbuffer); + +int WavpackPackSamples (WavpackContext *wpc, int32_t *sample_buffer, uint32_t sample_count) +{ + int nch = wpc->config.num_channels; + + while (sample_count) { + int32_t *source_pointer = sample_buffer; + unsigned int samples_to_copy; + + if (!wpc->riff_header_added && !wpc->riff_header_created && !wpc->file_format) { + char riff_header [128]; + + if (!add_to_metadata (wpc, riff_header, create_riff_header (wpc, wpc->total_samples, riff_header), ID_RIFF_HEADER)) + return FALSE; + } + + if (wpc->acc_samples + sample_count > wpc->max_samples) + samples_to_copy = wpc->max_samples - wpc->acc_samples; + else + samples_to_copy = sample_count; + + for (wpc->current_stream = 0; wpc->current_stream < wpc->num_streams; wpc->current_stream++) { + WavpackStream *wps = wpc->streams [wpc->current_stream]; + int32_t *dptr, *sptr, cnt; + + dptr = wps->sample_buffer + wpc->acc_samples * (wps->wphdr.flags & MONO_FLAG ? 1 : 2); + sptr = source_pointer; + cnt = samples_to_copy; + + // This code used to just copy the 32-bit samples regardless of the actual size with the + // assumption that the caller had properly sign-extended the values (if they were smaller + // than 32 bits). However, several people have discovered that if the data isn't properly + // sign extended then ugly things happen (e.g. CRC errors that show up only on decode). + // To prevent this, we now explicitly sign-extend samples smaller than 32-bit when we + // copy, and the performance hit from doing this is very small (generally < 1%). + + if (wps->wphdr.flags & MONO_FLAG) { + switch (wpc->config.bytes_per_sample) { + case 1: + while (cnt--) { + *dptr++ = (signed char) *sptr; + sptr += nch; + } + + break; + + case 2: + while (cnt--) { + *dptr++ = (int16_t) *sptr; + sptr += nch; + } + + break; + + case 3: + while (cnt--) { + *dptr++ = (*sptr << 8) >> 8; + sptr += nch; + } + + break; + + default: + while (cnt--) { + *dptr++ = *sptr; + sptr += nch; + } + } + + source_pointer++; + } + else { + switch (wpc->config.bytes_per_sample) { + case 1: + while (cnt--) { + *dptr++ = (signed char) sptr [0]; + *dptr++ = (signed char) sptr [1]; + sptr += nch; + } + + break; + + case 2: + while (cnt--) { + *dptr++ = (int16_t) sptr [0]; + *dptr++ = (int16_t) sptr [1]; + sptr += nch; + } + + break; + + case 3: + while (cnt--) { + *dptr++ = (sptr [0] << 8) >> 8; + *dptr++ = (sptr [1] << 8) >> 8; + sptr += nch; + } + + break; + + default: + while (cnt--) { + *dptr++ = sptr [0]; + *dptr++ = sptr [1]; + sptr += nch; + } + } + + source_pointer += 2; + } + } + + sample_buffer += samples_to_copy * nch; + sample_count -= samples_to_copy; + + if ((wpc->acc_samples += samples_to_copy) == wpc->max_samples && + !pack_streams (wpc, wpc->block_samples)) + return FALSE; + } + + return TRUE; +} + +// Flush all accumulated samples into WavPack blocks. This is normally called +// after all samples have been sent to WavpackPackSamples(), but can also be +// called to terminate a WavPack block at a specific sample (in other words it +// is possible to continue after this operation). This is also called to +// dump non-audio blocks like those holding metadata for various purposes. +// A return of FALSE indicates an error. + +int WavpackFlushSamples (WavpackContext *wpc) +{ + while (wpc->acc_samples) { + uint32_t block_samples; + + if (wpc->acc_samples > wpc->block_samples) + block_samples = wpc->acc_samples / 2; + else + block_samples = wpc->acc_samples; + + if (!pack_streams (wpc, block_samples)) + return FALSE; + } + + if (wpc->metacount) + write_metadata_block (wpc); + + return TRUE; +} + +// Note: The following function is no longer required because a proper wav +// header is now automatically generated for the application. However, if the +// application wants to generate its own header or wants to include additional +// chunks, then this function can still be used in which case the automatic +// wav header generation is suppressed. + +// Add wrapper (currently RIFF only) to WavPack blocks. This should be called +// before sending any audio samples for the RIFF header or after all samples +// have been sent for any RIFF trailer. WavpackFlushSamples() should be called +// between sending the last samples and calling this for trailer data to make +// sure that headers and trailers don't get mixed up in very short files. If +// the exact contents of the RIFF header are not known because, for example, +// the file duration is uncertain or trailing chunks are possible, simply write +// a "dummy" header of the correct length. When all data has been written it +// will be possible to read the first block written and update the header +// directly. An example of this can be found in the Audition filter. A +// return of FALSE indicates an error. + +int WavpackAddWrapper (WavpackContext *wpc, void *data, uint32_t bcount) +{ + int64_t index = WavpackGetSampleIndex64 (wpc); + unsigned char meta_id; + + if (!index || index == -1) { + wpc->riff_header_added = TRUE; + meta_id = wpc->file_format ? ID_ALT_HEADER : ID_RIFF_HEADER; + } + else { + wpc->riff_trailer_bytes += bcount; + meta_id = wpc->file_format ? ID_ALT_TRAILER : ID_RIFF_TRAILER; + } + + return add_to_metadata (wpc, data, bcount, meta_id); +} + +// Store computed MD5 sum in WavPack metadata. Note that the user must compute +// the 16 byte sum; it is not done here. A return of FALSE indicates an error. +// If any of the lower 8 bits of qmode are set, then this MD5 is stored with +// a metadata ID that old decoders do not recognize (because they would not +// interpret the qmode and would therefore fail the verification). + +int WavpackStoreMD5Sum (WavpackContext *wpc, unsigned char data [16]) +{ + return add_to_metadata (wpc, data, 16, (wpc->config.qmode & 0xff) ? ID_ALT_MD5_CHECKSUM : ID_MD5_CHECKSUM); +} + +#pragma pack(push,4) + +typedef struct { + char ckID [4]; + uint64_t chunkSize64; +} CS64Chunk; + +typedef struct { + uint64_t riffSize64, dataSize64, sampleCount64; + uint32_t tableLength; +} DS64Chunk; + +typedef struct { + char ckID [4]; + uint32_t ckSize; + char junk [28]; +} JunkChunk; + +#pragma pack(pop) + +#define DS64ChunkFormat "DDDL" + +static int create_riff_header (WavpackContext *wpc, int64_t total_samples, void *outbuffer) +{ + int do_rf64 = 0, write_junk = 1; + ChunkHeader ds64hdr, datahdr, fmthdr; + char *outptr = outbuffer; + RiffChunkHeader riffhdr; + DS64Chunk ds64_chunk; + JunkChunk junkchunk; + WaveHeader wavhdr; + + int64_t total_data_bytes, total_riff_bytes; + int32_t channel_mask = wpc->config.channel_mask; + int32_t sample_rate = wpc->config.sample_rate; + int bytes_per_sample = wpc->config.bytes_per_sample; + int bits_per_sample = wpc->config.bits_per_sample; + int format = (wpc->config.float_norm_exp) ? 3 : 1; + int num_channels = wpc->config.num_channels; + int wavhdrsize = 16; + + wpc->riff_header_created = TRUE; + + if (format == 3 && wpc->config.float_norm_exp != 127) { + strcpy (wpc->error_message, "can't create valid RIFF wav header for non-normalized floating data!"); + return FALSE; + } + + if (total_samples == -1) + total_samples = 0x7ffff000 / (bytes_per_sample * num_channels); + + total_data_bytes = total_samples * bytes_per_sample * num_channels; + + if (total_data_bytes > 0xff000000) { + write_junk = 0; + do_rf64 = 1; + } + + CLEAR (wavhdr); + + wavhdr.FormatTag = format; + wavhdr.NumChannels = num_channels; + wavhdr.SampleRate = sample_rate; + wavhdr.BytesPerSecond = sample_rate * num_channels * bytes_per_sample; + wavhdr.BlockAlign = bytes_per_sample * num_channels; + wavhdr.BitsPerSample = bits_per_sample; + + if (num_channels > 2 || channel_mask != 0x5 - num_channels) { + wavhdrsize = sizeof (wavhdr); + wavhdr.cbSize = 22; + wavhdr.ValidBitsPerSample = bits_per_sample; + wavhdr.SubFormat = format; + wavhdr.ChannelMask = channel_mask; + wavhdr.FormatTag = 0xfffe; + wavhdr.BitsPerSample = bytes_per_sample * 8; + wavhdr.GUID [4] = 0x10; + wavhdr.GUID [6] = 0x80; + wavhdr.GUID [9] = 0xaa; + wavhdr.GUID [11] = 0x38; + wavhdr.GUID [12] = 0x9b; + wavhdr.GUID [13] = 0x71; + } + + strncpy (riffhdr.ckID, do_rf64 ? "RF64" : "RIFF", sizeof (riffhdr.ckID)); + strncpy (riffhdr.formType, "WAVE", sizeof (riffhdr.formType)); + total_riff_bytes = sizeof (riffhdr) + wavhdrsize + sizeof (datahdr) + total_data_bytes + wpc->riff_trailer_bytes; + if (do_rf64) total_riff_bytes += sizeof (ds64hdr) + sizeof (ds64_chunk); + if (write_junk) total_riff_bytes += sizeof (junkchunk); + strncpy (fmthdr.ckID, "fmt ", sizeof (fmthdr.ckID)); + strncpy (datahdr.ckID, "data", sizeof (datahdr.ckID)); + fmthdr.ckSize = wavhdrsize; + + if (write_junk) { + CLEAR (junkchunk); + strncpy (junkchunk.ckID, "junk", sizeof (junkchunk.ckID)); + junkchunk.ckSize = sizeof (junkchunk) - 8; + WavpackNativeToLittleEndian (&junkchunk, ChunkHeaderFormat); + } + + if (do_rf64) { + strncpy (ds64hdr.ckID, "ds64", sizeof (ds64hdr.ckID)); + ds64hdr.ckSize = sizeof (ds64_chunk); + CLEAR (ds64_chunk); + ds64_chunk.riffSize64 = total_riff_bytes; + ds64_chunk.dataSize64 = total_data_bytes; + ds64_chunk.sampleCount64 = total_samples; + riffhdr.ckSize = (uint32_t) -1; + datahdr.ckSize = (uint32_t) -1; + WavpackNativeToLittleEndian (&ds64hdr, ChunkHeaderFormat); + WavpackNativeToLittleEndian (&ds64_chunk, DS64ChunkFormat); + } + else { + riffhdr.ckSize = (uint32_t) total_riff_bytes; + datahdr.ckSize = (uint32_t) total_data_bytes; + } + + WavpackNativeToLittleEndian (&riffhdr, ChunkHeaderFormat); + WavpackNativeToLittleEndian (&fmthdr, ChunkHeaderFormat); + WavpackNativeToLittleEndian (&wavhdr, WaveHeaderFormat); + WavpackNativeToLittleEndian (&datahdr, ChunkHeaderFormat); + + // write the RIFF chunks up to just before the data starts + + outptr = (char *) memcpy (outptr, &riffhdr, sizeof (riffhdr)) + sizeof (riffhdr); + + if (do_rf64) { + outptr = (char *) memcpy (outptr, &ds64hdr, sizeof (ds64hdr)) + sizeof (ds64hdr); + outptr = (char *) memcpy (outptr, &ds64_chunk, sizeof (ds64_chunk)) + sizeof (ds64_chunk); + } + + if (write_junk) + outptr = (char *) memcpy (outptr, &junkchunk, sizeof (junkchunk)) + sizeof (junkchunk); + + outptr = (char *) memcpy (outptr, &fmthdr, sizeof (fmthdr)) + sizeof (fmthdr); + outptr = (char *) memcpy (outptr, &wavhdr, wavhdrsize) + wavhdrsize; + outptr = (char *) memcpy (outptr, &datahdr, sizeof (datahdr)) + sizeof (datahdr); + + return (int)(outptr - (char *) outbuffer); +} + +static int block_add_checksum (unsigned char *buffer_start, unsigned char *buffer_end, int bytes); + +static int pack_streams (WavpackContext *wpc, uint32_t block_samples) +{ + uint32_t max_blocksize, max_chans = 1, bcount; + unsigned char *outbuff, *outend, *out2buff, *out2end; + int result = TRUE, i; + + // for calculating output (block) buffer size, first see if any streams are stereo + + for (i = 0; i < wpc->num_streams; i++) + if (!(wpc->streams [i]->wphdr.flags & MONO_FLAG)) { + max_chans = 2; + break; + } + + // then calculate maximum size based on bytes / sample + + max_blocksize = block_samples * max_chans * ((wpc->streams [0]->wphdr.flags & BYTES_STORED) + 1); + + // add margin based on how much "negative" compression is possible with pathological audio + + if ((wpc->config.flags & CONFIG_FLOAT_DATA) && !(wpc->config.flags & CONFIG_SKIP_WVX)) + max_blocksize += max_blocksize; // 100% margin for lossless float data + else + max_blocksize += max_blocksize >> 2; // otherwise 25% margin for everything else + + max_blocksize += wpc->metabytes + 1024; // finally, add metadata & another 1K margin + + out2buff = (wpc->wvc_flag) ? malloc (max_blocksize) : NULL; + out2end = out2buff + max_blocksize; + outbuff = malloc (max_blocksize); + outend = outbuff + max_blocksize; + + for (wpc->current_stream = 0; wpc->current_stream < wpc->num_streams; wpc->current_stream++) { + WavpackStream *wps = wpc->streams [wpc->current_stream]; + uint32_t flags = wps->wphdr.flags; + + flags &= ~MAG_MASK; + flags += (1 << MAG_LSB) * ((flags & BYTES_STORED) * 8 + 7); + + SET_BLOCK_INDEX (wps->wphdr, wps->sample_index); + wps->wphdr.block_samples = block_samples; + wps->wphdr.flags = flags; + wps->block2buff = out2buff; + wps->block2end = out2end; + wps->blockbuff = outbuff; + wps->blockend = outend; + +#ifdef ENABLE_DSD + if (flags & DSD_FLAG) + result = pack_dsd_block (wpc, wps->sample_buffer); + else +#endif + result = pack_block (wpc, wps->sample_buffer); + + if (result) { + result = block_add_checksum (outbuff, outend, (flags & HYBRID_FLAG) ? 2 : 4); + + if (result && out2buff) + result = block_add_checksum (out2buff, out2end, 2); + } + + wps->blockbuff = wps->block2buff = NULL; + + if (wps->wphdr.block_samples != block_samples) + block_samples = wps->wphdr.block_samples; + + if (!result) { + strcpy (wpc->error_message, "output buffer overflowed!"); + break; + } + + bcount = ((WavpackHeader *) outbuff)->ckSize + 8; + WavpackNativeToLittleEndian ((WavpackHeader *) outbuff, WavpackHeaderFormat); + result = wpc->blockout (wpc->wv_out, outbuff, bcount); + + if (!result) { + strcpy (wpc->error_message, "can't write WavPack data, disk probably full!"); + break; + } + + wpc->filelen += bcount; + + if (out2buff) { + bcount = ((WavpackHeader *) out2buff)->ckSize + 8; + WavpackNativeToLittleEndian ((WavpackHeader *) out2buff, WavpackHeaderFormat); + result = wpc->blockout (wpc->wvc_out, out2buff, bcount); + + if (!result) { + strcpy (wpc->error_message, "can't write WavPack data, disk probably full!"); + break; + } + + wpc->file2len += bcount; + } + + if (wpc->acc_samples != block_samples) + memmove (wps->sample_buffer, wps->sample_buffer + block_samples * (flags & MONO_FLAG ? 1 : 2), + (wpc->acc_samples - block_samples) * sizeof (int32_t) * (flags & MONO_FLAG ? 1 : 2)); + } + + wpc->current_stream = 0; + wpc->ave_block_samples = (wpc->ave_block_samples * 0x7 + block_samples + 0x4) >> 3; + wpc->acc_samples -= block_samples; + free (outbuff); + + if (out2buff) + free (out2buff); + + return result; +} + +// Given the pointer to the first block written (to either a .wv or .wvc file), +// update the block with the actual number of samples written. If the wav +// header was generated by the library, then it is updated also. This should +// be done if WavpackSetConfiguration() was called with an incorrect number +// of samples (or -1). It is the responsibility of the application to read and +// rewrite the block. An example of this can be found in the Audition filter. + +static void block_update_checksum (unsigned char *buffer_start); + +void WavpackUpdateNumSamples (WavpackContext *wpc, void *first_block) +{ + uint32_t wrapper_size; + + WavpackLittleEndianToNative (first_block, WavpackHeaderFormat); + SET_TOTAL_SAMPLES (* (WavpackHeader *) first_block, WavpackGetSampleIndex64 (wpc)); + + if (wpc->riff_header_created && WavpackGetWrapperLocation (first_block, &wrapper_size)) { + unsigned char riff_header [128]; + + if (wrapper_size == create_riff_header (wpc, WavpackGetSampleIndex64 (wpc), riff_header)) + memcpy (WavpackGetWrapperLocation (first_block, NULL), riff_header, wrapper_size); + } + + block_update_checksum (first_block); + WavpackNativeToLittleEndian (first_block, WavpackHeaderFormat); +} + +// Note: The following function is no longer required because the wav header +// automatically generated for the application will also be updated by +// WavpackUpdateNumSamples (). However, if the application wants to generate +// its own header or wants to include additional chunks, then this function +// still must be used to update the application generated header. + +// Given the pointer to the first block written to a WavPack file, this +// function returns the location of the stored RIFF header that was originally +// written with WavpackAddWrapper(). This would normally be used to update +// the wav header to indicate that a different number of samples was actually +// written or if additional RIFF chunks are written at the end of the file. +// The "size" parameter can be set to non-NULL to obtain the exact size of the +// RIFF header, and the function will return FALSE if the header is not found +// in the block's metadata (or it is not a valid WavPack block). It is the +// responsibility of the application to read and rewrite the block. An example +// of this can be found in the Audition filter. + +static void *find_metadata (void *wavpack_block, int desired_id, uint32_t *size); + +void *WavpackGetWrapperLocation (void *first_block, uint32_t *size) +{ + void *loc; + + WavpackLittleEndianToNative (first_block, WavpackHeaderFormat); + loc = find_metadata (first_block, ID_RIFF_HEADER, size); + + if (!loc) + loc = find_metadata (first_block, ID_ALT_HEADER, size); + + WavpackNativeToLittleEndian (first_block, WavpackHeaderFormat); + + return loc; +} + +static void *find_metadata (void *wavpack_block, int desired_id, uint32_t *size) +{ + WavpackHeader *wphdr = wavpack_block; + unsigned char *dp, meta_id, c1, c2; + int32_t bcount, meta_bc; + + if (strncmp (wphdr->ckID, "wvpk", 4)) + return NULL; + + bcount = wphdr->ckSize - sizeof (WavpackHeader) + 8; + dp = (unsigned char *)(wphdr + 1); + + while (bcount >= 2) { + meta_id = *dp++; + c1 = *dp++; + + meta_bc = c1 << 1; + bcount -= 2; + + if (meta_id & ID_LARGE) { + if (bcount < 2) + break; + + c1 = *dp++; + c2 = *dp++; + meta_bc += ((uint32_t) c1 << 9) + ((uint32_t) c2 << 17); + bcount -= 2; + } + + if ((meta_id & ID_UNIQUE) == desired_id) { + if ((bcount - meta_bc) >= 0) { + if (size) + *size = meta_bc - ((meta_id & ID_ODD_SIZE) ? 1 : 0); + + return dp; + } + else + return NULL; + } + + bcount -= meta_bc; + dp += meta_bc; + } + + return NULL; +} + +int copy_metadata (WavpackMetadata *wpmd, unsigned char *buffer_start, unsigned char *buffer_end) +{ + uint32_t mdsize = wpmd->byte_length + (wpmd->byte_length & 1); + WavpackHeader *wphdr = (WavpackHeader *) buffer_start; + + mdsize += (wpmd->byte_length > 510) ? 4 : 2; + buffer_start += wphdr->ckSize + 8; + + if (buffer_start + mdsize >= buffer_end) + return FALSE; + + buffer_start [0] = wpmd->id | (wpmd->byte_length & 1 ? ID_ODD_SIZE : 0); + buffer_start [1] = (wpmd->byte_length + 1) >> 1; + + if (wpmd->byte_length > 510) { + buffer_start [0] |= ID_LARGE; + buffer_start [2] = (wpmd->byte_length + 1) >> 9; + buffer_start [3] = (wpmd->byte_length + 1) >> 17; + } + + if (wpmd->data && wpmd->byte_length) { + memcpy (buffer_start + (wpmd->byte_length > 510 ? 4 : 2), wpmd->data, wpmd->byte_length); + + if (wpmd->byte_length & 1) // if size is odd, make sure pad byte is a zero + buffer_start [mdsize - 1] = 0; + } + + wphdr->ckSize += mdsize; + return TRUE; +} + +static int add_to_metadata (WavpackContext *wpc, void *data, uint32_t bcount, unsigned char id) +{ + WavpackMetadata *mdp; + unsigned char *src = data; + + while (bcount) { + if (wpc->metacount) { + uint32_t bc = bcount; + + mdp = wpc->metadata + wpc->metacount - 1; + + if (mdp->id == id) { + if (wpc->metabytes + bcount > 1000000) + bc = 1000000 - wpc->metabytes; + + mdp->data = realloc (mdp->data, mdp->byte_length + bc); + memcpy ((char *) mdp->data + mdp->byte_length, src, bc); + mdp->byte_length += bc; + wpc->metabytes += bc; + bcount -= bc; + src += bc; + + if (wpc->metabytes >= 1000000 && !write_metadata_block (wpc)) + return FALSE; + } + } + + if (bcount) { + wpc->metadata = realloc (wpc->metadata, (wpc->metacount + 1) * sizeof (WavpackMetadata)); + mdp = wpc->metadata + wpc->metacount++; + mdp->byte_length = 0; + mdp->data = NULL; + mdp->id = id; + } + } + + return TRUE; +} + +static char *write_metadata (WavpackMetadata *wpmd, char *outdata) +{ + unsigned char id = wpmd->id, wordlen [3]; + + wordlen [0] = (wpmd->byte_length + 1) >> 1; + wordlen [1] = (wpmd->byte_length + 1) >> 9; + wordlen [2] = (wpmd->byte_length + 1) >> 17; + + if (wpmd->byte_length & 1) + id |= ID_ODD_SIZE; + + if (wordlen [1] || wordlen [2]) + id |= ID_LARGE; + + *outdata++ = id; + *outdata++ = wordlen [0]; + + if (id & ID_LARGE) { + *outdata++ = wordlen [1]; + *outdata++ = wordlen [2]; + } + + if (wpmd->data && wpmd->byte_length) { + memcpy (outdata, wpmd->data, wpmd->byte_length); + outdata += wpmd->byte_length; + + if (wpmd->byte_length & 1) + *outdata++ = 0; + } + + return outdata; +} + +static int write_metadata_block (WavpackContext *wpc) +{ + char *block_buff, *block_ptr; + WavpackHeader *wphdr; + + if (wpc->metacount) { + int metacount = wpc->metacount, block_size = sizeof (WavpackHeader); + WavpackMetadata *wpmdp = wpc->metadata; + + while (metacount--) { + block_size += wpmdp->byte_length + (wpmdp->byte_length & 1); + block_size += (wpmdp->byte_length > 510) ? 4 : 2; + wpmdp++; + } + + // allocate 6 extra bytes for 4-byte checksum (which we add last) + wphdr = (WavpackHeader *) (block_buff = malloc (block_size + 6)); + + CLEAR (*wphdr); + memcpy (wphdr->ckID, "wvpk", 4); + SET_TOTAL_SAMPLES (*wphdr, wpc->total_samples); + wphdr->version = wpc->stream_version; + wphdr->ckSize = block_size - 8; + wphdr->block_samples = 0; + + block_ptr = (char *)(wphdr + 1); + + wpmdp = wpc->metadata; + + while (wpc->metacount) { + block_ptr = write_metadata (wpmdp, block_ptr); + wpc->metabytes -= wpmdp->byte_length; + free_metadata (wpmdp++); + wpc->metacount--; + } + + free (wpc->metadata); + wpc->metadata = NULL; + // add a 4-byte checksum here (increases block size by 6) + block_add_checksum ((unsigned char *) block_buff, (unsigned char *) block_buff + (block_size += 6), 4); + WavpackNativeToLittleEndian ((WavpackHeader *) block_buff, WavpackHeaderFormat); + + if (!wpc->blockout (wpc->wv_out, block_buff, block_size)) { + free (block_buff); + strcpy (wpc->error_message, "can't write WavPack data, disk probably full!"); + return FALSE; + } + + free (block_buff); + } + + return TRUE; +} + +void free_metadata (WavpackMetadata *wpmd) +{ + if (wpmd->data) { + free (wpmd->data); + wpmd->data = NULL; + } +} + +// These two functions add or update the block checksums that were introduced in WavPack 5.0. +// The presence of the checksum is indicated by a flag in the wavpack header (HAS_CHECKSUM) +// and the actual metadata item should be the last one in the block, and can be either 2 or 4 +// bytes. Of course, older versions of the decoder will simply ignore both of these. + +static int block_add_checksum (unsigned char *buffer_start, unsigned char *buffer_end, int bytes) +{ + WavpackHeader *wphdr = (WavpackHeader *) buffer_start; +#ifdef BITSTREAM_SHORTS + uint16_t *csptr = (uint16_t*) buffer_start; +#else + unsigned char *csptr = buffer_start; +#endif + int bcount = wphdr->ckSize + 8, wcount; + uint32_t csum = (uint32_t) -1; + + if (bytes != 2 && bytes != 4) + return FALSE; + + if (bcount < sizeof (WavpackHeader) || (bcount & 1) || buffer_start + bcount + 2 + bytes > buffer_end) + return FALSE; + + wphdr->flags |= HAS_CHECKSUM; + wphdr->ckSize += 2 + bytes; + wcount = bcount >> 1; + +#ifdef BITSTREAM_SHORTS + while (wcount--) + csum = (csum * 3) + *csptr++; +#else + WavpackNativeToLittleEndian ((WavpackHeader *) buffer_start, WavpackHeaderFormat); + + while (wcount--) { + csum = (csum * 3) + csptr [0] + (csptr [1] << 8); + csptr += 2; + } + + WavpackLittleEndianToNative ((WavpackHeader *) buffer_start, WavpackHeaderFormat); +#endif + + buffer_start += bcount; + *buffer_start++ = ID_BLOCK_CHECKSUM; + *buffer_start++ = bytes >> 1; + + if (bytes == 4) { + *buffer_start++ = csum; + *buffer_start++ = csum >> 8; + *buffer_start++ = csum >> 16; + *buffer_start++ = csum >> 24; + } + else { + csum ^= csum >> 16; + *buffer_start++ = csum; + *buffer_start++ = csum >> 8; + } + + return TRUE; +} + +static void block_update_checksum (unsigned char *buffer_start) +{ + WavpackHeader *wphdr = (WavpackHeader *) buffer_start; + unsigned char *dp, meta_id, c1, c2; + uint32_t bcount, meta_bc; + + if (!(wphdr->flags & HAS_CHECKSUM)) + return; + + bcount = wphdr->ckSize - sizeof (WavpackHeader) + 8; + dp = (unsigned char *)(wphdr + 1); + + while (bcount >= 2) { + meta_id = *dp++; + c1 = *dp++; + + meta_bc = c1 << 1; + bcount -= 2; + + if (meta_id & ID_LARGE) { + if (bcount < 2) + return; + + c1 = *dp++; + c2 = *dp++; + meta_bc += ((uint32_t) c1 << 9) + ((uint32_t) c2 << 17); + bcount -= 2; + } + + if (bcount < meta_bc) + return; + + if ((meta_id & ID_UNIQUE) == ID_BLOCK_CHECKSUM) { +#ifdef BITSTREAM_SHORTS + uint16_t *csptr = (uint16_t*) buffer_start; +#else + unsigned char *csptr = buffer_start; +#endif + int wcount = (int)(dp - 2 - buffer_start) >> 1; + uint32_t csum = (uint32_t) -1; + + if ((meta_id & ID_ODD_SIZE) || meta_bc < 2 || meta_bc > 4) + return; + +#ifdef BITSTREAM_SHORTS + while (wcount--) + csum = (csum * 3) + *csptr++; +#else + WavpackNativeToLittleEndian ((WavpackHeader *) buffer_start, WavpackHeaderFormat); + + while (wcount--) { + csum = (csum * 3) + csptr [0] + (csptr [1] << 8); + csptr += 2; + } + + WavpackLittleEndianToNative ((WavpackHeader *) buffer_start, WavpackHeaderFormat); +#endif + + if (meta_bc == 4) { + *dp++ = csum; + *dp++ = csum >> 8; + *dp++ = csum >> 16; + *dp++ = csum >> 24; + return; + } + else { + csum ^= csum >> 16; + *dp++ = csum; + *dp++ = csum >> 8; + return; + } + } + + bcount -= meta_bc; + dp += meta_bc; + } +} diff --git a/third_party/wavpack/src/pack_x64.S b/third_party/wavpack/src/pack_x64.S new file mode 100644 index 0000000..a8798fd --- /dev/null +++ b/third_party/wavpack/src/pack_x64.S @@ -0,0 +1,1941 @@ +############################################################################ +## **** WAVPACK **** ## +## Hybrid Lossless Wavefile Compressor ## +## Copyright (c) 1998 - 2015 Conifer Software. ## +## All Rights Reserved. ## +## Distributed under the BSD Software License (see license.txt) ## +############################################################################ + + .intel_syntax noprefix + .text + + .globl _pack_decorr_stereo_pass_x64win + .globl _pack_decorr_stereo_pass_cont_rev_x64win + .globl _pack_decorr_stereo_pass_cont_x64win + .globl _pack_decorr_mono_buffer_x64win + .globl _pack_decorr_mono_pass_cont_x64win + .globl _scan_max_magnitude_x64win + .globl _log2buffer_x64win + + .globl pack_decorr_stereo_pass_x64win + .globl pack_decorr_stereo_pass_cont_rev_x64win + .globl pack_decorr_stereo_pass_cont_x64win + .globl pack_decorr_mono_buffer_x64win + .globl pack_decorr_mono_pass_cont_x64win + .globl scan_max_magnitude_x64win + .globl log2buffer_x64win + + .globl _pack_decorr_stereo_pass_x64 + .globl _pack_decorr_stereo_pass_cont_rev_x64 + .globl _pack_decorr_stereo_pass_cont_x64 + .globl _pack_decorr_mono_buffer_x64 + .globl _pack_decorr_mono_pass_cont_x64 + .globl _scan_max_magnitude_x64 + .globl _log2buffer_x64 + + .globl pack_decorr_stereo_pass_x64 + .globl pack_decorr_stereo_pass_cont_rev_x64 + .globl pack_decorr_stereo_pass_cont_x64 + .globl pack_decorr_mono_buffer_x64 + .globl pack_decorr_mono_pass_cont_x64 + .globl scan_max_magnitude_x64 + .globl log2buffer_x64 + +# This module contains X64 assembly optimized versions of functions required +# to encode WavPack files. + +# This is an assembly optimized version of the following WavPack function: +# +# void pack_decorr_stereo_pass ( +# struct decorr_pass *dpp, +# int32_t *buffer, +# int32_t sample_count); +# +# It performs a single pass of stereo decorrelation, in place, as specified +# by the decorr_pass structure. Note that this function does NOT return the +# dpp->samples_X[] values in the "normalized" positions for terms 1-8, so if +# the number of samples is not a multiple of MAX_TERM, these must be moved if +# they are to be used somewhere else. +# +# This is written to work on an X86-64 processor (also called the AMD64) +# running in 64-bit mode and uses the MMX extensions to improve the +# performance by processing both stereo channels together. It is based on +# the original MMX code written by Joachim Henke that used MMX intrinsics +# called from C. Many thanks to Joachim for that! +# +# An issue with using MMX for this is that the sample history array in the +# decorr_pass structure contains separate arrays for each channel while the +# MMX code wants there to be a single array of dual samples. The fix for +# this is to convert the data in the arrays on entry and exit, and this is +# made easy by the fact that the 8 MMX regsiters hold exactly the required +# amount of data (64 bytes)! +# +# This version has entry points for both the System V ABI and the Windows +# X64 ABI. It does not use the "red zone" or the "shadow area"; it saves the +# non-volatile registers for both ABIs on the stack and allocates another +# 8 bytes on the stack so that it's properly aligned. Note that it does NOT +# provide unwind data for the Windows ABI (the unpack_x64.asm module for +# MSVC does). The arguments are passed in registers: +# +# System V Windows +# struct decorr_pass *dpp rdi rcx +# int32_t *buffer rsi rdx +# int32_t sample_count edx r8d +# +# During the processing loops, the following registers are used: +# +# rdi buffer pointer +# rsi termination buffer pointer +# rax,rbx,rdx used in default term to reduce calculation +# rbp decorr_pass pointer +# mm0, mm1 scratch +# mm2 original sample values +# mm3 correlation samples +# mm4 0 (for pcmpeqd) +# mm5 weights +# mm6 delta +# mm7 512 (for rounding) +# + +_pack_decorr_stereo_pass_x64win: +pack_decorr_stereo_pass_x64win: + push rbp + push rbx + push rdi + push rsi + sub rsp, 8 + mov rdi, rcx # copy params from win regs to Linux regs + mov rsi, rdx # so we can leave following code similar + mov rdx, r8 + mov rcx, r9 + jmp benter + +_pack_decorr_stereo_pass_x64: +pack_decorr_stereo_pass_x64: + push rbp + push rbx + push rdi + push rsi + sub rsp, 8 + +benter: mov rbp, rdi # rbp = *dpp + mov rdi, rsi # rdi = inbuffer + mov esi, edx + shl esi, 3 + jz bdone + add rsi, rdi # rsi = termination buffer pointer + + // convert samples_A and samples_B array into samples_AB array for MMX + // (the MMX registers provide exactly enough storage to do this easily) + + movq mm0, [rbp+16] + punpckldq mm0, [rbp+48] + movq mm1, [rbp+16] + punpckhdq mm1, [rbp+48] + movq mm2, [rbp+24] + punpckldq mm2, [rbp+56] + movq mm3, [rbp+24] + punpckhdq mm3, [rbp+56] + movq mm4, [rbp+32] + punpckldq mm4, [rbp+64] + movq mm5, [rbp+32] + punpckhdq mm5, [rbp+64] + movq mm6, [rbp+40] + punpckldq mm6, [rbp+72] + movq mm7, [rbp+40] + punpckhdq mm7, [rbp+72] + + movq [rbp+16], mm0 + movq [rbp+24], mm1 + movq [rbp+32], mm2 + movq [rbp+40], mm3 + movq [rbp+48], mm4 + movq [rbp+56], mm5 + movq [rbp+64], mm6 + movq [rbp+72], mm7 + + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 # mm7 = round (512) + + mov eax, [rbp+4] + movd mm6, eax + punpckldq mm6, mm6 # mm6 = delta (0-7) + + mov eax, 0xFFFF # mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 # mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [rbp+8] # mm5 = weight_AB masked to 16-bit + + movq mm4, [rbp+16] # preload samples_AB[0] + + mov al, [rbp] # get term and vector to correct loop + cmp al, 17 + je buff_term_17_loop + cmp al, 18 + je buff_term_18_loop + cmp al, -1 + je buff_term_minus_1_loop + cmp al, -2 + je buff_term_minus_2_loop + cmp al, -3 + je buff_term_minus_3_loop + + pxor mm4, mm4 # mm4 = 0 (for pcmpeqd) + xor eax, eax + xor ebx, ebx + add bl, [rbp] + mov ecx, 7 + and ebx, ecx + jmp buff_default_term_loop + + .balign 64 + +buff_default_term_loop: + movq mm2, [rdi] # mm2 = left_right + movq mm3, [rbp+16+rax*8] + inc eax + and eax, ecx + movq [rbp+16+rbx*8], mm2 + inc ebx + and ebx, ecx + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [rdi], mm2 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pcmpeqd mm2, mm4 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm4 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + cmp rdi, rsi + jnz buff_default_term_loop + + jmp bdone + + .balign 64 + +buff_term_17_loop: + movq mm3, mm4 # get previous calculated value + paddd mm3, mm4 + psubd mm3, [rbp+24] + movq [rbp+24], mm4 + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + movq mm2, [rdi] # mm2 = left_right + movq mm4, mm2 + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [rdi], mm2 # store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + cmp rdi, rsi + jnz buff_term_17_loop + + movq [rbp+16], mm4 # post-store samples_AB[0] + jmp bdone + + .balign 64 + +buff_term_18_loop: + movq mm3, mm4 # get previous calculated value + psubd mm3, [rbp+24] + psrad mm3, 1 + paddd mm3, mm4 # mm3 = sam_AB + movq [rbp+24], mm4 + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + movq mm2, [rdi] # mm2 = left_right + movq mm4, mm2 + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [rdi], mm2 # store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + cmp rdi, rsi + jnz buff_term_18_loop + + movq [rbp+16], mm4 # post-store samples_AB[0] + jmp bdone + + .balign 64 + +buff_term_minus_1_loop: + movq mm3, mm4 # mm3 = previous calculated value + movq mm2, [rdi] # mm2 = left_right + movq mm4, mm2 + psrlq mm4, 32 + punpckldq mm3, mm2 # mm3 = sam_AB + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [rdi], mm2 # store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 # and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp rdi, rsi + jnz buff_term_minus_1_loop + + movq [rbp+16], mm4 # post-store samples_AB[0] + jmp bdone + + .balign 64 + +buff_term_minus_2_loop: + movq mm2, [rdi] # mm2 = left_right + movq mm3, mm2 + psrlq mm3, 32 + por mm3, mm4 + punpckldq mm4, mm2 + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [rdi], mm2 # store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 # and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp rdi, rsi + jnz buff_term_minus_2_loop + + movq [rbp+16], mm4 # post-store samples_AB[0] + jmp bdone + + .balign 64 + +buff_term_minus_3_loop: + movq mm2, [rdi] # mm2 = left_right + movq mm3, mm4 # mm3 = previous calculated value + movq mm4, mm2 # mm0 = swap dwords of new data + psrlq mm4, 32 + punpckldq mm4, mm2 # mm3 = sam_AB + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [rdi], mm2 # store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 # and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp rdi, rsi + jnz buff_term_minus_3_loop + + movq [rbp+16], mm4 # post-store samples_AB[0] + +bdone: pslld mm5, 16 # sign-extend 16-bit weights back to dwords + psrad mm5, 16 + movq [rbp+8], mm5 # put weight_AB back + + // convert samples_AB array back into samples_A and samples_B + + movq mm0, [rbp+16] + movq mm1, [rbp+24] + movq mm2, [rbp+32] + movq mm3, [rbp+40] + movq mm4, [rbp+48] + movq mm5, [rbp+56] + movq mm6, [rbp+64] + movq mm7, [rbp+72] + + movd [rbp+16], mm0 + movd [rbp+20], mm1 + movd [rbp+24], mm2 + movd [rbp+28], mm3 + movd [rbp+32], mm4 + movd [rbp+36], mm5 + movd [rbp+40], mm6 + movd [rbp+44], mm7 + + punpckhdq mm0, mm0 + punpckhdq mm1, mm1 + punpckhdq mm2, mm2 + punpckhdq mm3, mm3 + punpckhdq mm4, mm4 + punpckhdq mm5, mm5 + punpckhdq mm6, mm6 + punpckhdq mm7, mm7 + + movd [rbp+48], mm0 + movd [rbp+52], mm1 + movd [rbp+56], mm2 + movd [rbp+60], mm3 + movd [rbp+64], mm4 + movd [rbp+68], mm5 + movd [rbp+72], mm6 + movd [rbp+76], mm7 + + emms + + add rsp, 8 + pop rsi + pop rdi + pop rbx + pop rbp + ret + + +# These are assembly optimized version of the following WavPack functions: +# +# void pack_decorr_stereo_pass_cont ( +# struct decorr_pass *dpp, +# int32_t *in_buffer, +# int32_t *out_buffer, +# int32_t sample_count); +# +# void pack_decorr_stereo_pass_cont_rev ( +# struct decorr_pass *dpp, +# int32_t *in_buffer, +# int32_t *out_buffer, +# int32_t sample_count); +# +# It performs a single pass of stereo decorrelation, transfering from the +# input buffer to the output buffer. Note that this version of the function +# requires that the up to 8 previous (depending on dpp->term) stereo samples +# are visible and correct. In other words, it ignores the "samples_*" +# fields in the decorr_pass structure and gets the history data directly +# from the source buffer. It does, however, return the appropriate history +# samples to the decorr_pass structure before returning. +# +# This is written to work on an X86-64 processor (also called the AMD64) +# running in 64-bit mode and uses the MMX extensions to improve the +# performance by processing both stereo channels together. It is based on +# the original MMX code written by Joachim Henke that used MMX intrinsics +# called from C. Many thanks to Joachim for that! +# +# This version has entry points for both the System V ABI and the Windows +# X64 ABI. It does not use the "red zone" or the "shadow area"; it saves the +# non-volatile registers for both ABIs on the stack and allocates another +# 8 bytes on the stack to store the dpp pointer. Note that it does NOT +# provide unwind data for the Windows ABI (the unpack_x64.asm module for +# MSVC does). The arguments are passed in registers: +# +# System V Windows +# struct decorr_pass *dpp rdi rcx +# int32_t *in_buffer rsi rdx +# int32_t *out_buffer rdx r8 +# int32_t sample_count ecx r9 +# +# During the processing loops, the following registers are used: +# +# rdi input buffer pointer +# rsi direction (-8 forward, +8 reverse) +# rbx delta from input to output buffer +# ecx sample count +# rdx sign (dir) * term * -8 (terms 1-8 only) +# mm0, mm1 scratch +# mm2 original sample values +# mm3 correlation samples +# mm4 weight sums +# mm5 weights +# mm6 delta +# mm7 512 (for rounding) +# +# stack usage: +# +# [rsp+0] = *dpp +# + +_pack_decorr_stereo_pass_cont_rev_x64win: +pack_decorr_stereo_pass_cont_rev_x64win: + mov rax, 8 + jmp wstart + +_pack_decorr_stereo_pass_cont_x64win: +pack_decorr_stereo_pass_cont_x64win: + mov rax, -8 + jmp wstart + +wstart: push rbp + push rbx + push rdi + push rsi + sub rsp, 8 + mov rdi, rcx # copy params from win regs to Linux regs + mov rsi, rdx # so we can leave following code similar + mov rdx, r8 + mov rcx, r9 + jmp enter + +_pack_decorr_stereo_pass_cont_rev_x64: +pack_decorr_stereo_pass_cont_rev_x64: + mov rax, 8 + jmp start + +_pack_decorr_stereo_pass_cont_x64: +pack_decorr_stereo_pass_cont_x64: + mov rax, -8 + jmp start + +start: push rbp + push rbx + push rdi + push rsi + sub rsp, 8 + +enter: mov [rsp], rdi # [rbp-8] = *dpp + mov rdi, rsi # rdi = inbuffer + mov rsi, rax # get direction from rax + + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 # mm7 = round (512) + + mov rax, [rsp] # access dpp + mov eax, [rax+4] + movd mm6, eax + punpckldq mm6, mm6 # mm6 = delta (0-7) + + mov rax, [rsp] # access dpp + movq mm5, [rax+8] # mm5 = weight_AB + movq mm4, [rax+88] # mm4 = sum_AB + + mov rbx, rdx # rbx = out_buffer (rdx) - in_buffer (rdi) + sub rbx, rdi + + mov rax, [rsp] # *eax = dpp + movsxd rax, DWORD PTR [rax] # get term and vector to correct loop + cmp al, 17 + je term_17_loop + cmp al, 18 + je term_18_loop + cmp al, -1 + je term_minus_1_loop + cmp al, -2 + je term_minus_2_loop + cmp al, -3 + je term_minus_3_loop + + shl rax, 3 + mov rdx, rax # rdx = term * 8 to index correlation sample + test rsi, rsi # test direction + jns default_term_loop + neg rdx + jmp default_term_loop + + .balign 64 + +default_term_loop: + movq mm3, [rdi+rdx] # mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [rdi] # mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [rdi+rbx], mm2 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + sub rdi, rsi + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddd mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + paddd mm4, mm5 # add weights to sum + dec ecx + jnz default_term_loop + + mov rax, [rsp] # access dpp + movq [rax+8], mm5 # put weight_AB back + movq [rax+88], mm4 # put sum_AB back + emms + + mov rdx, [rsp] # access dpp with rdx + movsxd rcx, DWORD PTR [rdx] # rcx = dpp->term + +default_store_samples: + dec rcx + add rdi, rsi # back up one full sample + mov eax, [rdi+4] + mov [rdx+rcx*4+48], eax # store samples_B [ecx] + mov eax, [rdi] + mov [rdx+rcx*4+16], eax # store samples_A [ecx] + test rcx, rcx + jnz default_store_samples + jmp done + + .balign 64 + +term_17_loop: + movq mm3, [rdi+rsi] # get previous calculated value + paddd mm3, mm3 + psubd mm3, [rdi+rsi*2] + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [rdi] # mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [rdi+rbx], mm2 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + sub rdi, rsi + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddd mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + paddd mm4, mm5 # add weights to sum + dec ecx + jnz term_17_loop + + mov rax, [rsp] # access dpp + movq [rax+8], mm5 # put weight_AB back + movq [rax+88], mm4 # put sum_AB back + emms + jmp term_1718_common_store + + .balign 64 + +term_18_loop: + movq mm3, [rdi+rsi] # get previous calculated value + movq mm0, mm3 + psubd mm3, [rdi+rsi*2] + psrad mm3, 1 + paddd mm3, mm0 # mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [rdi] # mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [rdi+rbx], mm2 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + sub rdi, rsi + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddd mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + dec ecx + paddd mm4, mm5 # add weights to sum + jnz term_18_loop + + mov rax, [rsp] # access dpp + movq [rax+8], mm5 # put weight_AB back + movq [rax+88], mm4 # put sum_AB back + emms + +term_1718_common_store: + + mov rax, [rsp] # access dpp + add rdi, rsi # back up a full sample + mov edx, [rdi+4] # dpp->samples_B [0] = iptr [-1]; + mov [rax+48], edx + mov edx, [rdi] # dpp->samples_A [0] = iptr [-2]; + mov [rax+16], edx + add rdi, rsi # back up another sample + mov edx, [rdi+4] # dpp->samples_B [1] = iptr [-3]; + mov [rax+52], edx + mov edx, [rdi] # dpp->samples_A [1] = iptr [-4]; + mov [rax+20], edx + jmp done + + .balign 64 + +term_minus_1_loop: + movq mm3, [rdi+rsi] # mm3 = previous calculated value + movq mm2, [rdi] # mm2 = left_right + psrlq mm3, 32 + punpckldq mm3, mm2 # mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [rdi+rbx], mm2 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + sub rdi, rsi + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddd mm5, mm1 + paddusw mm5, mm2 # and add to weight_AB + psubd mm5, mm1 + pxor mm5, mm0 + paddd mm4, mm5 # add weights to sum + dec ecx + jnz term_minus_1_loop + + mov rax, [rsp] # access dpp + movq [rax+8], mm5 # put weight_AB back + movq [rax+88], mm4 # put sum_AB back + emms + + add rdi, rsi # back up a full sample + mov edx, [rdi+4] # dpp->samples_A [0] = iptr [-1]; + mov rax, [rsp] + mov [rax+16], edx + jmp done + + .balign 64 + +term_minus_2_loop: + movq mm2, [rdi] # mm2 = left_right + movq mm3, mm2 # mm3 = swap dwords + psrlq mm3, 32 + punpckldq mm3, [rdi+rsi] # mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [rdi+rbx], mm2 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + sub rdi, rsi + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddd mm5, mm1 + paddusw mm5, mm2 # and add to weight_AB + psubd mm5, mm1 + pxor mm5, mm0 + paddd mm4, mm5 # add weights to sum + dec ecx + jnz term_minus_2_loop + + mov rax, [rsp] # access dpp + movq [rax+8], mm5 # put weight_AB back + movq [rax+88], mm4 # put sum_AB back + emms + + add rdi, rsi # back up a full sample + mov edx, [rdi] # dpp->samples_B [0] = iptr [-2]; + mov rax, [rsp] + mov [rax+48], edx + jmp done + + .balign 64 + +term_minus_3_loop: + movq mm0, [rdi+rsi] # mm0 = previous calculated value + movq mm3, mm0 # mm3 = swap dwords + psrlq mm3, 32 + punpckldq mm3, mm0 # mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [rdi] # mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [rdi+rbx], mm2 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + sub rdi, rsi + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddd mm5, mm1 + paddusw mm5, mm2 # and add to weight_AB + psubd mm5, mm1 + pxor mm5, mm0 + paddd mm4, mm5 # add weights to sum + dec ecx + jnz term_minus_3_loop + + mov rax, [rsp] # access dpp + movq [rax+8], mm5 # put weight_AB back + movq [rax+88], mm4 # put sum_AB back + emms + + add rdi, rsi # back up a full sample + mov edx, [rdi+4] # dpp->samples_A [0] = iptr [-1]; + mov rax, [rsp] + mov [rax+16], edx + mov edx, [rdi] # dpp->samples_B [0] = iptr [-2]; + mov [rax+48], edx + +done: add rsp, 8 + pop rsi + pop rdi + pop rbx + pop rbp + ret + + +# This is an assembly optimized version of the following WavPack function: +# +# uint32_t decorr_mono_buffer (int32_t *buffer, +# struct decorr_pass *decorr_passes, +# int32_t num_terms, +# int32_t sample_count) +# +# Decorrelate a buffer of mono samples, in place, as specified by the array +# of decorr_pass structures. Note that this function does NOT return the +# dpp->samples_X[] values in the "normalized" positions for terms 1-8, so if +# the number of samples is not a multiple of MAX_TERM, these must be moved if +# they are to be used somewhere else. The magnitude of the output samples is +# accumulated and returned (see scan_max_magnitude() for more details). By +# using the overflow detection of the multiply instruction, this detects +# when the "long_math" varient is required. +# +# For the fastest possible operation with the four "common" decorrelation +# filters (i.e, fast, normal, high and very high) this function can be +# configured to include hardcoded versions of these filters that are created +# using macros. In that case, the passed filter is checked to make sure that +# it matches one of the four. If it doesn't, or if the hardcoded flters are +# not enabled, a "general" version of the decorrelation loop is used. This +# variable enables the hardcoded filters and can be disabled if there are +# problems with the code or macros: + + HARDCODED_FILTERS = 1 + +# Entry points for both the System V ABI and the Windows X64 ABI are provided. +# It does not use the "red zone" or the "shadow area"; it saves the +# non-volatile registers for both ABIs on the stack and allocates another +# 24 bytes on the stack to store the dpp pointer and the sample count. Note +# that it does NOT provide unwind data for the Windows ABI (the unpack_x64.asm +# module for MSVC does). The arguments are passed in registers: +# +# System V Windows +# int32_t *buffer rdi rcx +# struct decorr_pass *dpp rsi rdx +# int32_t num_terms rdx r8 +# int32_t sample_count ecx r9 +# +# stack usage: +# +# [rsp+8] = sample_count +# [rsp+0] = decorr_passes (unused in hardcoded filter case) +# +# register usage: +# +# ecx = sample being decorrelated +# esi = sample up counter +# rdi = *buffer +# rbp = *dpp +# r8 = magnitude accumulator +# r9 = dpp end ptr (unused in hardcoded filter case) +# + .if HARDCODED_FILTERS +# +# This macro is used for checking the decorr_passes array to make sure that the terms match +# the hardcoded terms. The terms of these filters are the first element in the tables defined +# in decorr_tables.h (with the negative terms replaced with 1). +# + + .macro chkterm term rbp_offset + cmp BYTE PTR [rbp], \term + jnz use_general_version + add rbp, \rbp_offset + .endm +# +# This macro processes the single specified term (with a fixed delta of 2) and updates the +# term pointer (rbp) with the specified offset when done. It assumes the following registers: +# +# ecx = sample being decorrelated +# esi = sample up counter (used for terms 1-8) +# rbp = decorr_pass pointer for this term (updated with "rbp_offset" when done) +# rax, rbx, rdx = scratch +# + .macro exeterm term rbp_offset + + .if \term <= 8 + mov eax, esi + and eax, 7 + mov ebx, [rbp+16+rax*4] + .if \term != 8 + add eax, \term + and eax, 7 + .endif + mov [rbp+16+rax*4], ecx + + .elseif \term == 17 + + mov edx, [rbp+16] # handle term 17 + mov [rbp+16], ecx + lea ebx, [rdx+rdx] + sub ebx, [rbp+20] + mov [rbp+20], edx + + .else + + mov edx, [rbp+16] # handle term 18 + mov [rbp+16], ecx + lea ebx, [rdx+rdx*2] + sub ebx, [rbp+20] + sar ebx, 1 + mov [rbp+20], edx + + .endif + + mov eax, [rbp+8] + imul eax, ebx # 32-bit multiply is almost always enough + jo 1f # but handle overflow if it happens + sar eax, 10 + sbb ecx, eax # borrow flag provides rounding + jmp 2f +1: mov eax, [rbp+8] # perform 64-bit multiply on overflow + imul ebx + shr eax, 10 + sbb ecx, eax + shl edx, 22 + sub ecx, edx +2: je 3f + test ebx, ebx + je 3f + xor ebx, ecx + sar ebx, 30 + or ebx, 1 # this generates delta of 1 + shl ebx, 1 # this generates delta of 2 + add [rbp+8], ebx +3: add rbp, \rbp_offset + + .endm + + .endif # end of macro definitions + +# entry points of function + +_pack_decorr_mono_buffer_x64win: +pack_decorr_mono_buffer_x64win: + push rbp + push rbx + push rdi + push rsi + sub rsp, 24 + mov rdi, rcx # copy params from win regs to Linux regs + mov rsi, rdx # so we can leave following code similar + mov rdx, r8 + mov rcx, r9 + jmp mentry + +_pack_decorr_mono_buffer_x64: +pack_decorr_mono_buffer_x64: + push rbp + push rbx + push rdi + push rsi + sub rsp, 24 + +mentry: mov [rsp+8], rcx # [rsp+8] = sample count + mov [rsp], rsi # [rsp+0] = decorr_passes + xor r8, r8 # r8 = max magnitude mask + xor esi, esi # up counter = 0 + + and ecx, ecx # test & handle zero sample count & zero term count + jz mexit + and edx, edx + jz mexit + + .if HARDCODED_FILTERS + +# first check to make sure all the "deltas" are 2 + + mov rbp, [rsp] # rbp is decorr_pass pointer + mov ebx, edx # get term count +deltas: cmp BYTE PTR [rbp+4], 2 # make sure all the deltas are 2 + jnz use_general_version # if any aren't, use general case + add rbp, 96 + dec ebx + jnz deltas + + mov rbp, [rsp] # rbp is decorr_pass pointer + cmp dl, 2 # 2 terms is "fast" + jnz nfast + chkterm 18, 96 # check "fast" terms + chkterm 17, -96 + jmp mono_fast_loop + +nfast: cmp dl, 5 # 5 terms is "normal" + jnz nnorm + chkterm 18, 96 # check "normal" terms + chkterm 18, 96 + chkterm 2, 96 + chkterm 17, 96 + chkterm 3, 96*-4 + jmp mono_normal_loop + +nnorm: cmp dl, 10 # 10 terms is "high" + jnz nhigh + chkterm 18, 96 # check "high" terms + chkterm 18, 96 + chkterm 18, 96 + chkterm 1, 96 + chkterm 2, 96 + chkterm 3, 96 + chkterm 5, 96 + chkterm 1, 96 + chkterm 17, 96 + chkterm 4, 96*-9 + jmp mono_high_loop + +nhigh: cmp dl, 16 # 16 terms is "very high" + jnz use_general_version # if none of these, use general version + chkterm 18, 96 # else check "very high" terms + chkterm 18, 96 + chkterm 2, 96 + chkterm 3, 96 + chkterm 1, 96 + chkterm 18, 96 + chkterm 2, 96 + chkterm 4, 96 + chkterm 7, 96 + chkterm 5, 96 + chkterm 3, 96 + chkterm 6, 96 + chkterm 8, 96 + chkterm 1, 96 + chkterm 18, 96 + chkterm 2, 96*-15 + jmp mono_vhigh_loop + + .balign 64 + +# hardcoded "fast" decorrelation loop + +mono_fast_loop: + mov ecx, [rdi+rsi*4] # ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 17, -96 + + mov [rdi+rsi*4], ecx # store completed sample + mov eax, ecx # update magnitude mask + cdq + xor eax, edx + or r8, rax + inc esi # increment sample index + cmp esi, [rsp+8] + jnz mono_fast_loop # loop back for all samples + jmp mexit # then exit + + .balign 64 + +# hardcoded "normal" decorrelation loop + +mono_normal_loop: + mov ecx, [rdi+rsi*4] # ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 18, 96 + exeterm 2, 96 + exeterm 17, 96 + exeterm 3, 96*-4 + + mov [rdi+rsi*4], ecx # store completed sample + mov eax, ecx # update magnitude mask + cdq + xor eax, edx + or r8, rax + inc esi # increment sample index + cmp esi, [rsp+8] + jnz mono_normal_loop # loop back for all samples + jmp mexit # then exit + + .balign 64 + +# hardcoded "high" decorrelation loop + +mono_high_loop: + mov ecx, [rdi+rsi*4] # ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 18, 96 + exeterm 18, 96 + exeterm 1, 96 + exeterm 2, 96 + exeterm 3, 96 + exeterm 5, 96 + exeterm 1, 96 + exeterm 17, 96 + exeterm 4, 96*-9 + + mov [rdi+rsi*4], ecx # store completed sample + mov eax, ecx # update magnitude mask + cdq + xor eax, edx + or r8, rax + inc esi # increment sample index + cmp esi, [rsp+8] + jnz mono_high_loop # loop back for all samples + jmp mexit # then exit + + .balign 64 + +# hardcoded "very high" decorrelation loop + +mono_vhigh_loop: + mov ecx, [rdi+rsi*4] # ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 18, 96 + exeterm 2, 96 + exeterm 3, 96 + exeterm 1, 96 + exeterm 18, 96 + exeterm 2, 96 + exeterm 4, 96 + exeterm 7, 96 + exeterm 5, 96 + exeterm 3, 96 + exeterm 6, 96 + exeterm 8, 96 + exeterm 1, 96 + exeterm 18, 96 + exeterm 2, 96*-15 + + mov [rdi+rsi*4], ecx # store completed sample + mov eax, ecx # update magnitude mask + cdq + xor eax, edx + or r8, rax + inc esi # increment sample index + cmp esi, [rsp+8] + jnz mono_vhigh_loop # loop back for all samples + jmp mexit # then exit + + .endif # end of hardcoded filters configuration + +# when none of the hardcoded filters are applicable, or we aren't using them, fall through to here + +use_general_version: + mov rbp, [rsp] # reload decorr_passes pointer to first term + imul rax, rdx, 96 + add rax, rbp # r9 = terminating decorr_pass pointer + mov r9, rax + jmp decorrelate_loop + + .balign 64 + +decorrelate_loop: + mov ecx, [rdi+rsi*4] # ecx is the sample we're decorrelating +nxterm: mov edx, [rbp] + cmp dl, 17 + jge 3f + + mov eax, esi + and eax, 7 + mov ebx, [rbp+16+rax*4] + add eax, edx + and eax, 7 + mov [rbp+16+rax*4], ecx + jmp domult + + .balign 4 +3: mov edx, [rbp+16] + mov [rbp+16], ecx + je 4f + lea ebx, [rdx+rdx*2] + sub ebx, [rbp+20] + sar ebx, 1 + mov [rbp+20], edx + jmp domult + + .balign 4 +4: lea ebx, [rdx+rdx] + sub ebx, [rbp+20] + mov [rbp+20], edx + +domult: mov eax, [rbp+8] + mov edx, eax + imul eax, ebx + jo multov # on overflow, jump to use 64-bit imul varient + sar eax, 10 + sbb ecx, eax + je 2f + test ebx, ebx + je 2f + xor ebx, ecx + sar ebx, 31 + xor edx, ebx + add edx, [rbp+4] + xor edx, ebx + mov [rbp+8], edx +2: add rbp, 96 + cmp rbp, r9 + jnz nxterm + + mov [rdi+rsi*4], ecx # store completed sample + mov eax, ecx # update magnitude mask + cdq + xor eax, edx + or r8, rax + mov rbp, [rsp] # reload decorr_passes pointer to first term + inc esi # increment sample index + cmp esi, [rsp+8] + jnz decorrelate_loop + jmp mexit + + .balign 4 +multov: mov eax, [rbp+8] + imul ebx + shr eax, 10 + sbb ecx, eax + shl edx, 22 + sub ecx, edx + je 2f + test ebx, ebx + je 2f + xor ebx, ecx + sar ebx, 31 + mov eax, [rbp+8] + xor eax, ebx + add eax, [rbp+4] + xor eax, ebx + mov [rbp+8], eax +2: add rbp, 96 + cmp rbp, r9 + jnz nxterm + + mov [rdi+rsi*4], ecx # store completed sample + mov eax, ecx # update magnitude mask + cdq + xor eax, edx + or r8, rax + mov rbp, [rsp] # reload decorr_passes pointer to first term + inc esi # increment sample index + cmp esi, [rsp+8] + jnz decorrelate_loop # loop all the way back + +# common exit for entire function + +mexit: mov rax, r8 # return max magnitude + add rsp, 24 + pop rsi + pop rdi + pop rbx + pop rbp + ret + + +# This is an assembly optimized version of the following WavPack function: +# +# void decorr_mono_pass_cont (int32_t *out_buffer, +# int32_t *in_buffer, +# struct decorr_pass *dpp, +# int32_t sample_count); +# +# It performs a single pass of mono decorrelation, transfering from the +# input buffer to the output buffer. Note that this version of the function +# requires that the up to 8 previous (depending on dpp->term) mono samples +# are visible and correct. In other words, it ignores the "samples_*" +# fields in the decorr_pass structure and gets the history data directly +# from the source buffer. It does, however, return the appropriate history +# samples to the decorr_pass structure before returning. +# +# By using the overflow detection of the multiply instruction, it detects +# when the "long_math" varient is required and automatically does it. +# +# This version has entry points for both the System V ABI and the Windows +# X64 ABI. It does not use the "red zone" or the "shadow area"; it saves the +# non-volatile registers for both ABIs on the stack and allocates another +# 8 bytes on the stack to store the dpp pointer. Note that it does NOT +# provide unwind data for the Windows ABI (the pack_x64.asm module for +# MSVC does). The arguments are passed in registers: +# +# System V Windows +# int32_t *out_buffer rdi rcx +# int32_t *in_buffer rsi rdx +# struct decorr_pass *dpp rdx r8 +# int32_t sample_count ecx r9 +# +# Stack usage: +# +# [rsp+0] = *dpp +# +# Register usage: +# +# rsi = source ptr +# rdi = destination ptr +# rcx = term * -4 (default terms) +# rcx = previous sample (terms 17 & 18) +# ebp = weight +# r8d = delta +# r9d = weight sum +# r10 = eptr +# + +_pack_decorr_mono_pass_cont_x64win: +pack_decorr_mono_pass_cont_x64win: + push rbp + push rbx + push rdi + push rsi + sub rsp, 8 + mov rdi, rcx # copy params from win regs to Linux regs + mov rsi, rdx # so we can leave following code similar + mov rdx, r8 + mov rcx, r9 + jmp menter + +_pack_decorr_mono_pass_cont_x64: +pack_decorr_mono_pass_cont_x64: + push rbp + push rbx + push rdi + push rsi + sub rsp, 8 + +menter: mov [rsp], rdx + and ecx, ecx # test & handle zero sample count + jz mono_done + + cld + mov r8d, [rdx+4] # rd8 = delta + mov ebp, [rdx+8] # ebp = weight + mov r9d, [rdx+88] # r9d = weight sum + lea r10, [rsi+rcx*4] # r10 = eptr + mov ecx, [rsi-4] # preload last sample + mov eax, [rdx] # get term + cmp al, 17 + je mono_term_17_loop + cmp al, 18 + je mono_term_18_loop + + imul rcx, rax, -4 # rcx is index to correlation sample + jmp mono_default_term_loop + + .balign 64 + +mono_default_term_loop: + mov edx, [rsi+rcx] + mov ebx, edx + imul edx, ebp + jo 1f + lodsd + sar edx, 10 + sbb eax, edx + jmp 2f +1: mov eax, ebx + imul ebp + shl edx, 22 + shr eax, 10 + adc edx, eax # edx = apply_weight (sam_A) + lodsd + sub eax, edx +2: stosd + je 3f + test ebx, ebx + je 3f + xor eax, ebx + cdq + xor ebp, edx + add ebp, r8d + xor ebp, edx +3: add r9d, ebp + cmp rsi, r10 + jnz mono_default_term_loop + + mov rdx, [rsp] # rdx = *dpp + mov [rdx+8], ebp # put weight back + mov [rdx+88], r9d # put weight sum back + movsxd rcx, DWORD PTR [rdx] # rcx = dpp->term + +mono_default_store_samples: + dec rcx + sub rsi, 4 # back up one sample + mov eax, [rsi] + mov [rdx+rcx*4+16], eax # store samples_A [ecx] + test rcx, rcx + jnz mono_default_store_samples + jmp mono_done + + .balign 64 + +mono_term_17_loop: + lea edx, [rcx+rcx] + sub edx, [rsi-8] # ebx = sam_A + mov ebx, edx + imul edx, ebp + jo 1f + sar edx, 10 + lodsd + mov ecx, eax + sbb eax, edx + jmp 2f +1: mov eax, ebx + imul ebp + shl edx, 22 + shr eax, 10 + adc edx, eax # edx = apply_weight (sam_A) + lodsd + mov ecx, eax + sub eax, edx +2: stosd + je 3f + test ebx, ebx + je 3f + xor eax, ebx + cdq + xor ebp, edx + add ebp, r8d + xor ebp, edx +3: add r9d, ebp + cmp rsi, r10 + jnz mono_term_17_loop + jmp mono_term_1718_exit + + .balign 64 + +mono_term_18_loop: + lea edx, [rcx+rcx*2] + sub edx, [rsi-8] + sar edx, 1 + mov ebx, edx # ebx = sam_A + imul edx, ebp + jo 1f + sar edx, 10 + lodsd + mov ecx, eax + sbb eax, edx + jmp 2f +1: mov eax, ebx + imul ebp + shl edx, 22 + shr eax, 10 + adc edx, eax # edx = apply_weight (sam_A) + lodsd + mov ecx, eax + sub eax, edx +2: stosd + je 3f + test ebx, ebx + je 3f + xor eax, ebx + cdq + xor ebp, edx + add ebp, r8d + xor ebp, edx +3: add r9d, ebp + cmp rsi, r10 + jnz mono_term_18_loop + +mono_term_1718_exit: + mov rdx, [rsp] # rdx = *dpp + mov [rdx+8], ebp # put weight back + mov [rdx+88], r9d # put weight sum back + mov eax, [rsi-4] # dpp->samples_A [0] = bptr [-1] + mov [rdx+16], eax + mov eax, [rsi-8] # dpp->samples_A [1] = bptr [-2] + mov [rdx+20], eax + +mono_done: + add rsp, 8 + pop rsi + pop rdi + pop rbx + pop rbp + ret + + +# This is an assembly optimized version of the following WavPack function: +# +# uint32_t scan_max_magnitude (int32_t *buffer, int32_t sample_count); +# +# This function scans a buffer of signed 32-bit ints and returns the magnitude +# of the largest sample, with a power-of-two resolution. It might be more +# useful to return the actual maximum absolute value, but that implementation +# would be slower. Instead, this simply returns the "or" of all the values +# "xor"d with their own sign, like so: +# +# while (sample_count--) +# magnitude |= (*buffer < 0) ? ~*buffer++ : *buffer++; +# +# This is written to work on an X86-64 processor (also called the AMD64) +# running in 64-bit mode and uses the MMX extensions to improve the +# performance by processing two samples together. +# +# This version has entry points for both the System V ABI and the Windows +# X64 ABI. It does not use the "red zone" or the "shadow area"; it saves the +# non-volatile registers for both ABIs on the stack and allocates another +# 8 bytes on the stack so that it's properly aligned. Note that it does NOT +# provide unwind data for the Windows ABI (the unpack_x64.asm module for +# MSVC does). The arguments are passed in registers: +# +# System V Windows +# int32_t *buffer rdi rcx +# int32_t sample_count rsi rdx +# +# During the processing loops, the following registers are used: +# +# rdi buffer pointer +# rsi termination buffer pointer +# ebx single magnitude accumulator +# mm0 dual magnitude accumulator +# mm1, mm2 scratch +# + +_scan_max_magnitude_x64win: +scan_max_magnitude_x64win: + push rbp + push rbx + push rdi + push rsi + sub rsp, 8 + mov rdi, rcx # copy params from win regs to Linux regs + mov rsi, rdx # so we can leave following code similar + mov rdx, r8 + mov rcx, r9 + jmp senter + +_scan_max_magnitude_x64: +scan_max_magnitude_x64: + push rbp + push rbx + push rdi + push rsi + sub rsp, 8 + +senter: xor ebx, ebx # clear magnitude accumulator + + mov eax, esi # eax = count + and eax, 7 + mov ecx, eax # ecx = leftover samples to "manually" scan at end + + shr esi, 3 # esi = num of loops to process mmx (8 samples/loop) + shl esi, 5 # esi = num of bytes to process mmx (32 bytes/loop) + jz nommx # jump around if no mmx loops to do (< 8 samples) + + pxor mm0, mm0 # clear dual magnitude accumulator + add rsi, rdi # rsi = termination buffer pointer for mmx loop + jmp mmxlp + + .balign 64 + +mmxlp: movq mm1, [rdi] # get stereo samples in mm1 & mm2 + movq mm2, mm1 + psrad mm1, 31 # mm1 = sign (mm2) + pxor mm1, mm2 # mm1 = absolute magnitude, or into result + por mm0, mm1 + + movq mm1, [rdi+8] # do it again with 6 more samples + movq mm2, mm1 + psrad mm1, 31 + pxor mm1, mm2 + por mm0, mm1 + + movq mm1, [rdi+16] + movq mm2, mm1 + psrad mm1, 31 + pxor mm1, mm2 + por mm0, mm1 + + movq mm1, [rdi+24] + movq mm2, mm1 + psrad mm1, 31 + pxor mm1, mm2 + por mm0, mm1 + + add rdi, 32 + cmp rdi, rsi + jnz mmxlp + + movd eax, mm0 # ebx = "or" of high and low mm0 + punpckhdq mm0, mm0 + movd ebx, mm0 + or ebx, eax + emms + +nommx: and ecx, ecx # any leftover samples to do? + jz noleft + +leftlp: mov eax, [rdi] + cdq + xor eax, edx + or ebx, eax + add rdi, 4 + loop leftlp + +noleft: mov eax, ebx # move magnitude to eax for return + add rsp, 8 + pop rsi + pop rdi + pop rbx + pop rbp + ret + + +# This is an assembly optimized version of the following WavPack function: +# +# uint32_t log2buffer (int32_t *samples, uint32_t num_samples, int limit); +# +# This function scans a buffer of 32-bit ints and accumulates the total +# log2 value of all the samples. This is useful for determining maximum +# compression because the bitstream storage required for entropy coding +# is proportional to the base 2 log of the samples. +# +# This is written to work on an X86-64 processor (also called the AMD64) +# running in 64-bit mode. This version has entry points for both the System +# V ABI and the Windows X64 ABI. It does not use the "red zone" or the +# "shadow area"; it saves the non-volatile registers for both ABIs on the +# stack and allocates another 8 bytes on the stack so it's aligned properly. +# Note that it does NOT provide unwind data for the Windows ABI (but the +# unpack_x64.asm module for MSVC does). The arguments are passed in registers: +# +# System V Windows +# int32_t *samples rdi rcx +# uint32_t num_samples esi rdx +# int limit edx r8 +# +# During the processing loops, the following registers are used: +# +# r8 pointer to the 256-byte log fraction table +# rsi input buffer pointer +# edi sum accumulator +# ebx sample count +# ebp limit (if specified non-zero) +# eax,ecx,edx scratch +# + + .balign 256 + +log2_table: + .byte 0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x10, 0x11, 0x12, 0x14, 0x15 + .byte 0x16, 0x18, 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a + .byte 0x2c, 0x2d, 0x2e, 0x2f, 0x31, 0x32, 0x33, 0x34, 0x36, 0x37, 0x38, 0x39, 0x3b, 0x3c, 0x3d, 0x3e + .byte 0x3f, 0x41, 0x42, 0x43, 0x44, 0x45, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4d, 0x4e, 0x4f, 0x50, 0x51 + .byte 0x52, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63 + .byte 0x64, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x74, 0x75 + .byte 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85 + .byte 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95 + .byte 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4 + .byte 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb2 + .byte 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc0 + .byte 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcb, 0xcc, 0xcd, 0xce + .byte 0xcf, 0xd0, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd8, 0xd9, 0xda, 0xdb + .byte 0xdc, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe4, 0xe5, 0xe6, 0xe7, 0xe7 + .byte 0xe8, 0xe9, 0xea, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xee, 0xef, 0xf0, 0xf1, 0xf1, 0xf2, 0xf3, 0xf4 + .byte 0xf4, 0xf5, 0xf6, 0xf7, 0xf7, 0xf8, 0xf9, 0xf9, 0xfa, 0xfb, 0xfc, 0xfc, 0xfd, 0xfe, 0xff, 0xff + +_log2buffer_x64win: +log2buffer_x64win: + push rbp + push rbx + push rdi + push rsi + sub rsp, 8 + mov rdi, rcx # copy params from win regs to Linux regs + mov rsi, rdx # so we can leave following code similar + mov rdx, r8 + mov rcx, r9 + jmp log2bf + +_log2buffer_x64: +log2buffer_x64: + push rbp + push rbx + push rdi + push rsi + sub rsp, 8 + +log2bf: mov ebx, esi # ebx = num_samples + mov rsi, rdi # rsi = *samples + xor edi, edi # initialize sum + lea r8, [log2_table+rip] + test ebx, ebx # test count for zero + jz normal_exit + mov ebp, edx # ebp = limit + test ebp, ebp # we have separate loops for limit and no limit + jz no_limit_loop + jmp limit_loop + + .balign 64 + +limit_loop: + mov eax, [rsi] # get next sample into eax + cdq # edx = sign of sample (for abs) + add rsi, 4 + xor eax, edx + sub eax, edx + je L40 # skip if sample was zero + mov edx, eax # move to edx and apply rounding + shr eax, 9 + add edx, eax + bsr ecx, edx # ecx = MSB set in sample (0 - 31) + lea eax, [ecx+1] # eax = number used bits in sample (1 - 32) + sub ecx, 8 # ecx = shift right amount (-8 to 23) + ror edx, cl # use rotate to do "signed" shift + shl eax, 8 # move nbits to integer portion of log + movzx edx, dl # dl = mantissa, look up log fraction in table + mov al, [r8+rdx] # eax = combined integer and fraction for full log + add edi, eax # add to running sum and compare to limit + cmp eax, ebp + jge limit_exceeded +L40: sub ebx, 1 # loop back if more samples + jne limit_loop + jmp normal_exit + + .balign 64 + +no_limit_loop: + mov eax, [rsi] # get next sample into eax + cdq # edx = sign of sample (for abs) + add rsi, 4 + xor eax, edx + sub eax, edx + je L45 # skip if sample was zero + mov edx, eax # move to edx and apply rounding + shr eax, 9 + add edx, eax + bsr ecx, edx # ecx = MSB set in sample (0 - 31) + lea eax, [ecx+1] # eax = number used bits in sample (1 - 32) + sub ecx, 8 # ecx = shift right amount (-8 to 23) + ror edx, cl # use rotate to do "signed" shift + shl eax, 8 # move nbits to integer portion of log + movzx edx, dl # dl = mantissa, look up log fraction in table + mov al, [r8+rdx] # eax = combined integer and fraction for full log + add edi, eax # add to running sum +L45: sub ebx, 1 + jne no_limit_loop + jmp normal_exit + +limit_exceeded: + mov edi, -1 # return -1 to indicate limit hit +normal_exit: + mov eax, edi # move sum accumulator into eax for return + add rsp, 8 + pop rsi + pop rdi + pop rbx + pop rbp + ret + +#ifdef __ELF__ + .section .note.GNU-stack,"",@progbits +#endif + diff --git a/third_party/wavpack/src/pack_x64.asm b/third_party/wavpack/src/pack_x64.asm new file mode 100644 index 0000000..2a4b551 --- /dev/null +++ b/third_party/wavpack/src/pack_x64.asm @@ -0,0 +1,1852 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; **** WAVPACK **** ;; +;; Hybrid Lossless Wavefile Compressor ;; +;; Copyright (c) 1998 - 2015 Conifer Software. ;; +;; All Rights Reserved. ;; +;; Distributed under the BSD Software License (see license.txt) ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + include + + public pack_decorr_stereo_pass_cont_rev_x64win + public pack_decorr_stereo_pass_cont_x64win + +asmcode segment page 'CODE' + +; This module contains X64 assembly optimized versions of functions required +; to encode WavPack files. + +; This is an assembly optimized version of the following WavPack function: +; +; void pack_decorr_stereo_pass ( +; struct decorr_pass *dpp, +; int32_t *buffer, +; int32_t sample_count); +; +; It performs a single pass of stereo decorrelation, in place, as specified +; by the decorr_pass structure. Note that this function does NOT return the +; dpp->samples_X[] values in the "normalized" positions for terms 1-8, so if +; the number of samples is not a multiple of MAX_TERM, these must be moved if +; they are to be used somewhere else. +; +; This is written to work on an X86-64 processor (also called the AMD64) +; running in 64-bit mode and uses the MMX extensions to improve the +; performance by processing both stereo channels together. It is based on +; the original MMX code written by Joachim Henke that used MMX intrinsics +; called from C. Many thanks to Joachim for that! +; +; An issue with using MMX for this is that the sample history array in the +; decorr_pass structure contains separate arrays for each channel while the +; MMX code wants there to be a single array of dual samples. The fix for +; this is to convert the data in the arrays on entry and exit, and this is +; made easy by the fact that the 8 MMX regsiters hold exactly the required +; amount of data (64 bytes)! +; +; This is written to work on an X86-64 processor (also called the AMD64) +; running in 64-bit mode. This version is for the 64-bit Windows ABI and +; provides appropriate prologs and epilogs for stack unwinding. The +; arguments are passed in registers: +; +; struct decorr_pass *dpp rcx +; int32_t *buffer rdx +; int32_t sample_count r8d +; +; During the processing loops, the following registers are used: +; +; rdi buffer pointer +; rsi termination buffer pointer +; rax,rbx,rdx used in default term to reduce calculation +; rbp decorr_pass pointer +; mm0, mm1 scratch +; mm2 original sample values +; mm3 correlation samples +; mm4 0 (for pcmpeqd) +; mm5 weights +; mm6 delta +; mm7 512 (for rounding) +; + +pack_decorr_stereo_pass_x64win proc frame + push_reg rbp ; save non-volatile registers on stack + push_reg rbx ; (alphabetically) + push_reg rdi + push_reg rsi + alloc_stack 8 ; allocate 8 bytes on stack & align to 16 bytes + end_prologue + + mov rdi, rcx ; copy params from win regs to Linux regs + mov rsi, rdx ; so we can leave following code similar + mov rdx, r8 + mov rcx, r9 + + mov rbp, rdi ; rbp = *dpp + mov rdi, rsi ; rdi = inbuffer + mov esi, edx + sal esi, 3 + jz bdone + add rsi, rdi ; rsi = termination buffer pointer + + ; convert samples_A and samples_B array into samples_AB array for MMX + ; (the MMX registers provide exactly enough storage to do this easily) + + movq mm0, [rbp+16] + punpckldq mm0, [rbp+48] + movq mm1, [rbp+16] + punpckhdq mm1, [rbp+48] + movq mm2, [rbp+24] + punpckldq mm2, [rbp+56] + movq mm3, [rbp+24] + punpckhdq mm3, [rbp+56] + movq mm4, [rbp+32] + punpckldq mm4, [rbp+64] + movq mm5, [rbp+32] + punpckhdq mm5, [rbp+64] + movq mm6, [rbp+40] + punpckldq mm6, [rbp+72] + movq mm7, [rbp+40] + punpckhdq mm7, [rbp+72] + + movq [rbp+16], mm0 + movq [rbp+24], mm1 + movq [rbp+32], mm2 + movq [rbp+40], mm3 + movq [rbp+48], mm4 + movq [rbp+56], mm5 + movq [rbp+64], mm6 + movq [rbp+72], mm7 + + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 ; mm7 = round (512) + + mov eax, [rbp+4] + movd mm6, eax + punpckldq mm6, mm6 ; mm6 = delta (0-7) + + mov eax, 0FFFFh ; mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 ; mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [rbp+8] ; mm5 = weight_AB masked to 16-bit + + movq mm4, [rbp+16] ; preload samples_AB[0] + + mov al, [rbp] ; get term and vector to correct loop + cmp al, 17 + je buff_term_17_loop + cmp al, 18 + je buff_term_18_loop + cmp al, -1 + je buff_term_minus_1_loop + cmp al, -2 + je buff_term_minus_2_loop + cmp al, -3 + je buff_term_minus_3_loop + + pxor mm4, mm4 ; mm4 = 0 (for pcmpeqd) + xor eax, eax + xor ebx, ebx + add bl, [rbp] + mov ecx, 7 + and ebx, ecx + jmp buff_default_term_loop + + align 64 + +buff_default_term_loop: + movq mm2, [rdi] ; mm2 = left_right + movq mm3, [rbp+16+rax*8] + inc eax + and eax, ecx + movq [rbp+16+rbx*8], mm2 + inc ebx + and ebx, ecx + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [rdi], mm2 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pcmpeqd mm2, mm4 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm4 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + cmp rdi, rsi + jnz buff_default_term_loop + + jmp bdone + + align 64 + +buff_term_17_loop: + movq mm3, mm4 ; get previous calculated value + paddd mm3, mm4 + psubd mm3, [rbp+24] + movq [rbp+24], mm4 + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + movq mm2, [rdi] ; mm2 = left_right + movq mm4, mm2 + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [rdi], mm2 ; store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + cmp rdi, rsi + jnz buff_term_17_loop + + movq [rbp+16], mm4 ; post-store samples_AB[0] + jmp bdone + + align 64 + +buff_term_18_loop: + movq mm3, mm4 ; get previous calculated value + psubd mm3, [rbp+24] + psrad mm3, 1 + paddd mm3, mm4 ; mm3 = sam_AB + movq [rbp+24], mm4 + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + movq mm2, [rdi] ; mm2 = left_right + movq mm4, mm2 + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [rdi], mm2 ; store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + cmp rdi, rsi + jnz buff_term_18_loop + + movq [rbp+16], mm4 ; post-store samples_AB[0] + jmp bdone + + align 64 + +buff_term_minus_1_loop: + movq mm3, mm4 ; mm3 = previous calculated value + movq mm2, [rdi] ; mm2 = left_right + movq mm4, mm2 + psrlq mm4, 32 + punpckldq mm3, mm2 ; mm3 = sam_AB + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [rdi], mm2 ; store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 ; and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp rdi, rsi + jnz buff_term_minus_1_loop + + movq [rbp+16], mm4 ; post-store samples_AB[0] + jmp bdone + + align 64 + +buff_term_minus_2_loop: + movq mm2, [rdi] ; mm2 = left_right + movq mm3, mm2 + psrlq mm3, 32 + por mm3, mm4 + punpckldq mm4, mm2 + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [rdi], mm2 ; store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 ; and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp rdi, rsi + jnz buff_term_minus_2_loop + + movq [rbp+16], mm4 ; post-store samples_AB[0] + jmp bdone + + align 64 + +buff_term_minus_3_loop: + movq mm2, [rdi] ; mm2 = left_right + movq mm3, mm4 ; mm3 = previous calculated value + movq mm4, mm2 ; mm0 = swap dwords of new data + psrlq mm4, 32 + punpckldq mm4, mm2 ; mm3 = sam_AB + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [rdi], mm2 ; store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 ; and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp rdi, rsi + jnz buff_term_minus_3_loop + + movq [rbp+16], mm4 ; post-store samples_AB[0] + +bdone: pslld mm5, 16 ; sign-extend 16-bit weights back to dwords + psrad mm5, 16 + movq [rbp+8], mm5 ; put weight_AB back + + ; convert samples_AB array back into samples_A and samples_B + + movq mm0, [rbp+16] + movq mm1, [rbp+24] + movq mm2, [rbp+32] + movq mm3, [rbp+40] + movq mm4, [rbp+48] + movq mm5, [rbp+56] + movq mm6, [rbp+64] + movq mm7, [rbp+72] + + movd DWORD PTR [rbp+16], mm0 + movd DWORD PTR [rbp+20], mm1 + movd DWORD PTR [rbp+24], mm2 + movd DWORD PTR [rbp+28], mm3 + movd DWORD PTR [rbp+32], mm4 + movd DWORD PTR [rbp+36], mm5 + movd DWORD PTR [rbp+40], mm6 + movd DWORD PTR [rbp+44], mm7 + + punpckhdq mm0, mm0 + punpckhdq mm1, mm1 + punpckhdq mm2, mm2 + punpckhdq mm3, mm3 + punpckhdq mm4, mm4 + punpckhdq mm5, mm5 + punpckhdq mm6, mm6 + punpckhdq mm7, mm7 + + movd DWORD PTR [rbp+48], mm0 + movd DWORD PTR [rbp+52], mm1 + movd DWORD PTR [rbp+56], mm2 + movd DWORD PTR [rbp+60], mm3 + movd DWORD PTR [rbp+64], mm4 + movd DWORD PTR [rbp+68], mm5 + movd DWORD PTR [rbp+72], mm6 + movd DWORD PTR [rbp+76], mm7 + + emms + + add rsp, 8 + pop rsi + pop rdi + pop rbx + pop rbp + ret + +pack_decorr_stereo_pass_x64win endp + +; These are assembly optimized version of the following WavPack functions: +; +; void pack_decorr_stereo_pass_cont ( +; struct decorr_pass *dpp, +; int32_t *in_buffer, +; int32_t *out_buffer, +; int32_t sample_count); +; +; void pack_decorr_stereo_pass_cont_rev ( +; struct decorr_pass *dpp, +; int32_t *in_buffer, +; int32_t *out_buffer, +; int32_t sample_count); +; +; It performs a single pass of stereo decorrelation, transfering from the +; input buffer to the output buffer. Note that this version of the function +; requires that the up to 8 previous (depending on dpp->term) stereo samples +; are visible and correct. In other words, it ignores the "samples_*" +; fields in the decorr_pass structure and gets the history data directly +; from the source buffer. It does, however, return the appropriate history +; samples to the decorr_pass structure before returning. +; +; This is written to work on an X86-64 processor (also called the AMD64) +; running in 64-bit mode and uses the MMX extensions to improve the +; performance by processing both stereo channels together. It is based on +; the original MMX code written by Joachim Henke that used MMX intrinsics +; called from C. Many thanks to Joachim for that! +; +; This version is for 64-bit Windows. Note that the two public functions +; are "leaf" functions that simply load rax with the direction and jump +; into the private common "frame" function. The arguments are passed in +; registers: +; +; struct decorr_pass *dpp rcx +; int32_t *in_buffer rdx +; int32_t *out_buffer r8 +; int32_t sample_count r9d +; +; During the processing loops, the following registers are used: +; +; rdi input buffer pointer +; rsi direction (-8 forward, +8 reverse) +; rbx delta from input to output buffer +; ecx sample count +; rdx sign (dir) * term * -8 (terms 1-8 only) +; mm0, mm1 scratch +; mm2 original sample values +; mm3 correlation samples +; mm4 weight sums +; mm5 weights +; mm6 delta +; mm7 512 (for rounding) +; +; stack usage: +; +; [rsp+0] = *dpp +; + +pack_decorr_stereo_pass_cont_rev_x64win: + mov rax, 8 ; get value for reverse direction & jump + jmp pack_decorr_stereo_pass_cont_common + +pack_decorr_stereo_pass_cont_x64win: + mov rax, -8 ; get value for forward direction & jump + jmp pack_decorr_stereo_pass_cont_common + +pack_decorr_stereo_pass_cont_common proc frame + push_reg rbp ; save non-volatile registers on stack + push_reg rbx ; (alphabetically) + push_reg rdi + push_reg rsi + alloc_stack 8 ; allocate 8 bytes on stack & align to 16 bytes + end_prologue + + mov [rsp], rcx ; [rsp] = *dpp + mov rdi, rcx ; copy params from win regs to Linux regs + mov rsi, rdx ; so we can leave following code similar + mov rdx, r8 + mov rcx, r9 + + mov rdi, rsi ; rdi = inbuffer + mov rsi, rax ; rsi = -direction + + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 ; mm7 = round (512) + + mov rax, [rsp] ; access dpp + mov eax, [rax+4] + movd mm6, eax + punpckldq mm6, mm6 ; mm6 = delta (0-7) + + mov rax, [rsp] ; access dpp + movq mm5, [rax+8] ; mm5 = weight_AB + movq mm4, [rax+88] ; mm4 = sum_AB + + mov rbx, rdx ; rbx = out_buffer (rdx) - in_buffer (rdi) + sub rbx, rdi + + mov rax, [rsp] ; *eax = dpp + movsxd rax, DWORD PTR [rax] ; get term and vector to correct loop + cmp al, 17 + je term_17_loop + cmp al, 18 + je term_18_loop + cmp al, -1 + je term_minus_1_loop + cmp al, -2 + je term_minus_2_loop + cmp al, -3 + je term_minus_3_loop + + sal rax, 3 + mov rdx, rax ; rdx = term * 8 to index correlation sample + test rsi, rsi ; test direction + jns default_term_loop + neg rdx + jmp default_term_loop + + align 64 + +default_term_loop: + movq mm3, [rdi+rdx] ; mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [rdi] ; mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [rdi+rbx], mm2 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + sub rdi, rsi + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddd mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + paddd mm4, mm5 ; add weights to sum + dec ecx + jnz default_term_loop + + mov rax, [rsp] ; access dpp + movq [rax+8], mm5 ; put weight_AB back + movq [rax+88], mm4 ; put sum_AB back + emms + + mov rdx, [rsp] ; access dpp with rdx + movsxd rcx, DWORD PTR [rdx] ; rcx = dpp->term + +default_store_samples: + dec rcx + add rdi, rsi ; back up one full sample + mov eax, [rdi+4] + mov [rdx+rcx*4+48], eax ; store samples_B [ecx] + mov eax, [rdi] + mov [rdx+rcx*4+16], eax ; store samples_A [ecx] + test rcx, rcx + jnz default_store_samples + jmp done + + align 64 + +term_17_loop: + movq mm3, [rdi+rsi] ; get previous calculated value + paddd mm3, mm3 + psubd mm3, [rdi+rsi*2] + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [rdi] ; mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [rdi+rbx], mm2 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + sub rdi, rsi + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddd mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + paddd mm4, mm5 ; add weights to sum + dec ecx + jnz term_17_loop + + mov rax, [rsp] ; access dpp + movq [rax+8], mm5 ; put weight_AB back + movq [rax+88], mm4 ; put sum_AB back + emms + jmp term_1718_common_store + + align 64 + +term_18_loop: + movq mm3, [rdi+rsi] ; get previous calculated value + movq mm0, mm3 + psubd mm3, [rdi+rsi*2] + psrad mm3, 1 + paddd mm3, mm0 ; mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [rdi] ; mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [rdi+rbx], mm2 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + sub rdi, rsi + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddd mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + dec ecx + paddd mm4, mm5 ; add weights to sum + jnz term_18_loop + + mov rax, [rsp] ; access dpp + movq [rax+8], mm5 ; put weight_AB back + movq [rax+88], mm4 ; put sum_AB back + emms + +term_1718_common_store: + + mov rax, [rsp] ; access dpp + add rdi, rsi ; back up a full sample + mov edx, [rdi+4] ; dpp->samples_B [0] = iptr [-1]; + mov [rax+48], edx + mov edx, [rdi] ; dpp->samples_A [0] = iptr [-2]; + mov [rax+16], edx + add rdi, rsi ; back up another sample + mov edx, [rdi+4] ; dpp->samples_B [1] = iptr [-3]; + mov [rax+52], edx + mov edx, [rdi] ; dpp->samples_A [1] = iptr [-4]; + mov [rax+20], edx + jmp done + + align 64 + +term_minus_1_loop: + movq mm3, [rdi+rsi] ; mm3 = previous calculated value + movq mm2, [rdi] ; mm2 = left_right + psrlq mm3, 32 + punpckldq mm3, mm2 ; mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [rdi+rbx], mm2 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + sub rdi, rsi + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddd mm5, mm1 + paddusw mm5, mm2 ; and add to weight_AB + psubd mm5, mm1 + pxor mm5, mm0 + paddd mm4, mm5 ; add weights to sum + dec ecx + jnz term_minus_1_loop + + mov rax, [rsp] ; access dpp + movq [rax+8], mm5 ; put weight_AB back + movq [rax+88], mm4 ; put sum_AB back + emms + + add rdi, rsi ; back up a full sample + mov edx, [rdi+4] ; dpp->samples_A [0] = iptr [-1]; + mov rax, [rsp] + mov [rax+16], edx + jmp done + + align 64 + +term_minus_2_loop: + movq mm2, [rdi] ; mm2 = left_right + movq mm3, mm2 ; mm3 = swap dwords + psrlq mm3, 32 + punpckldq mm3, [rdi+rsi] ; mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [rdi+rbx], mm2 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + sub rdi, rsi + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddd mm5, mm1 + paddusw mm5, mm2 ; and add to weight_AB + psubd mm5, mm1 + pxor mm5, mm0 + paddd mm4, mm5 ; add weights to sum + dec ecx + jnz term_minus_2_loop + + mov rax, [rsp] ; access dpp + movq [rax+8], mm5 ; put weight_AB back + movq [rax+88], mm4 ; put sum_AB back + emms + + add rdi, rsi ; back up a full sample + mov edx, [rdi] ; dpp->samples_B [0] = iptr [-2]; + mov rax, [rsp] + mov [rax+48], edx + jmp done + + align 64 + +term_minus_3_loop: + movq mm0, [rdi+rsi] ; mm0 = previous calculated value + movq mm3, mm0 ; mm3 = swap dwords + psrlq mm3, 32 + punpckldq mm3, mm0 ; mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [rdi] ; mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [rdi+rbx], mm2 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + sub rdi, rsi + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddd mm5, mm1 + paddusw mm5, mm2 ; and add to weight_AB + psubd mm5, mm1 + pxor mm5, mm0 + paddd mm4, mm5 ; add weights to sum + dec ecx + jnz term_minus_3_loop + + mov rax, [rsp] ; access dpp + movq [rax+8], mm5 ; put weight_AB back + movq [rax+88], mm4 ; put sum_AB back + emms + + add rdi, rsi ; back up a full sample + mov edx, [rdi+4] ; dpp->samples_A [0] = iptr [-1]; + mov rax, [rsp] + mov [rax+16], edx + mov edx, [rdi] ; dpp->samples_B [0] = iptr [-2]; + mov [rax+48], edx + +done: add rsp, 8 ; begin epilog by deallocating stack + pop rsi ; restore non-volatile registers & return + pop rdi + pop rbx + pop rbp + ret + +pack_decorr_stereo_pass_cont_common endp + +; This is an assembly optimized version of the following WavPack function: +; +; uint32_t decorr_mono_buffer (int32_t *buffer, +; struct decorr_pass *decorr_passes, +; int32_t num_terms, +; int32_t sample_count) +; +; Decorrelate a buffer of mono samples, in place, as specified by the array +; of decorr_pass structures. Note that this function does NOT return the +; dpp->samples_X[] values in the "normalized" positions for terms 1-8, so if +; the number of samples is not a multiple of MAX_TERM, these must be moved if +; they are to be used somewhere else. The magnitude of the output samples is +; accumulated and returned (see scan_max_magnitude() for more details). By +; using the overflow detection of the multiply instruction, this detects +; when the "long_math" varient is required. +; +; For the fastest possible operation with the four "common" decorrelation +; filters (i.e, fast, normal, high and very high) this function can be +; configured to include hardcoded versions of these filters that are created +; using macros. In that case, the passed filter is checked to make sure that +; it matches one of the four. If it doesn't, or if the hardcoded flters are +; not enabled, a "general" version of the decorrelation loop is used. This +; variable enables the hardcoded filters and can be disabled if there are +; problems with the code or macros: + + HARDCODED_FILTERS = 1 + +; This is written to work on an X86-64 processor (also called the AMD64) +; running in 64-bit mode. This version is for the 64-bit Windows ABI and +; provides appropriate prologs and epilogs for stack unwinding. The +; arguments are passed in registers: +; +; int32_t *buffer rcx +; struct decorr_pass *dpp rdx +; int32_t num_terms r8 +; int32_t sample_count r9 +; +; stack usage: +; +; [rsp+8] = sample_count +; [rsp+0] = decorr_passes (unused in hardcoded filter case) +; +; register usage: +; +; ecx = sample being decorrelated +; esi = sample up counter +; rdi = *buffer +; rbp = *dpp +; r8 = magnitude accumulator +; r9 = dpp end ptr (unused in hardcoded filter case) +; + if HARDCODED_FILTERS +; +; This macro is used for checking the decorr_passes array to make sure that the terms match +; the hardcoded terms. The terms of these filters are the first element in the tables defined +; in decorr_tables.h (with the negative terms replaced with 1). +; + +chkterm macro term, rbp_offset + cmp BYTE PTR [rbp], term + jnz use_general_version + add rbp, rbp_offset + endm +; +; This macro processes the single specified term (with a fixed delta of 2) and updates the +; term pointer (rbp) with the specified offset when done. It assumes the following registers: +; +; ecx = sample being decorrelated +; esi = sample up counter (used for terms 1-8) +; rbp = decorr_pass pointer for this term (updated with "rbp_offset" when done) +; rax, rbx, rdx = scratch +; + +exeterm macro term, rbp_offset + local over, cont, done + + if term le 8 + mov eax, esi + and eax, 7 + mov ebx, [rbp+16+rax*4] + if term ne 8 + add eax, term + and eax, 7 + endif + mov [rbp+16+rax*4], ecx + + elseif term eq 17 + + mov edx, [rbp+16] ; handle term 17 + mov [rbp+16], ecx + lea ebx, [rdx+rdx] + sub ebx, [rbp+20] + mov [rbp+20], edx + + else + + mov edx, [rbp+16] ; handle term 18 + mov [rbp+16], ecx + lea ebx, [rdx+rdx*2] + sub ebx, [rbp+20] + sar ebx, 1 + mov [rbp+20], edx + + endif + + mov eax, [rbp+8] + imul eax, ebx ; 32-bit multiply is almost always enough + jo over ; but handle overflow if it happens + sar eax, 10 + sbb ecx, eax ; borrow flag provides rounding + jmp cont +over: mov eax, [rbp+8] ; perform 64-bit multiply on overflow + imul ebx + shr eax, 10 + sbb ecx, eax + shl edx, 22 + sub ecx, edx +cont: je done + test ebx, ebx + je done + xor ebx, ecx + sar ebx, 30 + or ebx, 1 ; this generates delta of 1 + sal ebx, 1 ; this generates delta of 2 + add [rbp+8], ebx +done: add rbp, rbp_offset + + endm + + endif ; end of macro definitions + +; entry points of function + +pack_decorr_mono_buffer_x64win proc public frame + push_reg rbp ; save non-volatile registers on stack + push_reg rbx ; (alphabetically) + push_reg rdi + push_reg rsi + alloc_stack 24 ; allocate 24 bytes on stack & align to 16 bytes + end_prologue + + mov rdi, rcx ; copy params from win regs to Linux regs + mov rsi, rdx ; so we can leave following code similar + mov rdx, r8 + mov rcx, r9 + + mov [rsp+8], rcx ; [rsp+8] = sample count + mov [rsp], rsi ; [rsp+0] = decorr_passes + xor r8, r8 ; r8 = max magnitude mask + xor esi, esi ; up counter = 0 + + and ecx, ecx ; test & handle zero sample count & zero term count + jz mexit + and edx, edx + jz mexit + + if HARDCODED_FILTERS + +; first check to make sure all the "deltas" are 2 + + mov rbp, [rsp] ; rbp is decorr_pass pointer + mov ebx, edx ; get term count +deltas: cmp BYTE PTR [rbp+4], 2 ; make sure all the deltas are 2 + jnz use_general_version ; if any aren't, use general case + add rbp, 96 + dec ebx + jnz deltas + + mov rbp, [rsp] ; rbp is decorr_pass pointer + cmp dl, 2 ; 2 terms is "fast" + jnz nfast + chkterm 18, 96 ; check "fast" terms + chkterm 17, -96 + jmp mono_fast_loop ; if both terms match, go execute filter + +nfast: cmp dl, 5 ; 5 terms is "normal" + jnz nnorm + chkterm 18, 96 ; check "normal" terms + chkterm 18, 96 + chkterm 2, 96 + chkterm 17, 96 + chkterm 3, 96*-4 + jmp mono_normal_loop ; if all terms match, go execute filter + +nnorm: cmp dl, 10 ; 10 terms is "high" + jnz nhigh + chkterm 18, 96 ; check "high" terms + chkterm 18, 96 + chkterm 18, 96 + chkterm 1, 96 + chkterm 2, 96 + chkterm 3, 96 + chkterm 5, 96 + chkterm 1, 96 + chkterm 17, 96 + chkterm 4, 96*-9 + jmp mono_high_loop ; if all terms match, go execute filter + +nhigh: cmp dl, 16 ; 16 terms is "very high" + jnz use_general_version ; if none of these, use general version + chkterm 18, 96 ; else check "very high" terms + chkterm 18, 96 + chkterm 2, 96 + chkterm 3, 96 + chkterm 1, 96 + chkterm 18, 96 + chkterm 2, 96 + chkterm 4, 96 + chkterm 7, 96 + chkterm 5, 96 + chkterm 3, 96 + chkterm 6, 96 + chkterm 8, 96 + chkterm 1, 96 + chkterm 18, 96 + chkterm 2, 96*-15 + jmp mono_vhigh_loop ; if all terms match, go execute filter + + align 64 + +; hardcoded "fast" decorrelation loop + +mono_fast_loop: + mov ecx, [rdi+rsi*4] ; ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 17, -96 + + mov [rdi+rsi*4], ecx ; store completed sample + mov eax, ecx ; update magnitude mask + cdq + xor eax, edx + or r8, rax + inc esi ; increment sample index + cmp esi, [rsp+8] + jnz mono_fast_loop ; loop back for all samples + jmp mexit ; then exit + + align 64 + +; hardcoded "normal" decorrelation loop + +mono_normal_loop: + mov ecx, [rdi+rsi*4] ; ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 18, 96 + exeterm 2, 96 + exeterm 17, 96 + exeterm 3, 96*-4 + + mov [rdi+rsi*4], ecx ; store completed sample + mov eax, ecx ; update magnitude mask + cdq + xor eax, edx + or r8, rax + inc esi ; increment sample index + cmp esi, [rsp+8] + jnz mono_normal_loop ; loop back for all samples + jmp mexit ; then exit + + align 64 + +; hardcoded "high" decorrelation loop + +mono_high_loop: + mov ecx, [rdi+rsi*4] ; ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 18, 96 + exeterm 18, 96 + exeterm 1, 96 + exeterm 2, 96 + exeterm 3, 96 + exeterm 5, 96 + exeterm 1, 96 + exeterm 17, 96 + exeterm 4, 96*-9 + + mov [rdi+rsi*4], ecx ; store completed sample + mov eax, ecx ; update magnitude mask + cdq + xor eax, edx + or r8, rax + inc esi ; increment sample index + cmp esi, [rsp+8] + jnz mono_high_loop ; loop back for all samples + jmp mexit ; then exit + + align 64 + +; hardcoded "very high" decorrelation loop + +mono_vhigh_loop: + mov ecx, [rdi+rsi*4] ; ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 18, 96 + exeterm 2, 96 + exeterm 3, 96 + exeterm 1, 96 + exeterm 18, 96 + exeterm 2, 96 + exeterm 4, 96 + exeterm 7, 96 + exeterm 5, 96 + exeterm 3, 96 + exeterm 6, 96 + exeterm 8, 96 + exeterm 1, 96 + exeterm 18, 96 + exeterm 2, 96*-15 + + mov [rdi+rsi*4], ecx ; store completed sample + mov eax, ecx ; update magnitude mask + cdq + xor eax, edx + or r8, rax + inc esi ; increment sample index + cmp esi, [rsp+8] + jnz mono_vhigh_loop ; loop back for all samples + jmp mexit ; then exit + + endif ; end of hardcoded filters configuration + +; if none of the hardcoded filters are applicable, or we aren't using them, fall through to here + +use_general_version: + mov rbp, [rsp] ; reload decorr_passes pointer to first term + imul rax, rdx, 96 + add rax, rbp ; r9 = terminating decorr_pass pointer + mov r9, rax + jmp decorrelate_loop + + align 64 + +decorrelate_loop: + mov ecx, [rdi+rsi*4] ; ecx is the sample we're decorrelating +nxterm: mov edx, [rbp] + cmp dl, 17 + jge @f + + mov eax, esi + and eax, 7 + mov ebx, [rbp+16+rax*4] + add eax, edx + and eax, 7 + mov [rbp+16+rax*4], ecx + jmp domult + + align 4 +@@: mov edx, [rbp+16] + mov [rbp+16], ecx + je @f + lea ebx, [rdx+rdx*2] + sub ebx, [rbp+20] + sar ebx, 1 + mov [rbp+20], edx + jmp domult + + align 4 +@@: lea ebx, [rdx+rdx] + sub ebx, [rbp+20] + mov [rbp+20], edx + +domult: mov eax, [rbp+8] + mov edx, eax + imul eax, ebx + jo multov ; on overflow, jump to use 64-bit imul varient + sar eax, 10 + sbb ecx, eax + je @f + test ebx, ebx + je @f + xor ebx, ecx + sar ebx, 31 + xor edx, ebx + add edx, [rbp+4] + xor edx, ebx + mov [rbp+8], edx +@@: add rbp, 96 + cmp rbp, r9 + jnz nxterm + + mov [rdi+rsi*4], ecx ; store completed sample + mov eax, ecx ; update magnitude mask + cdq + xor eax, edx + or r8, rax + mov rbp, [rsp] ; reload decorr_passes pointer to first term + inc esi ; increment sample index + cmp esi, [rsp+8] + jnz decorrelate_loop + jmp mexit + + align 4 +multov: mov eax, [rbp+8] + imul ebx + shr eax, 10 + sbb ecx, eax + shl edx, 22 + sub ecx, edx + je @f + test ebx, ebx + je @f + xor ebx, ecx + sar ebx, 31 + mov eax, [rbp+8] + xor eax, ebx + add eax, [rbp+4] + xor eax, ebx + mov [rbp+8], eax +@@: add rbp, 96 + cmp rbp, r9 + jnz nxterm + + mov [rdi+rsi*4], ecx ; store completed sample + mov eax, ecx ; update magnitude mask + cdq + xor eax, edx + or r8, rax + mov rbp, [rsp] ; reload decorr_passes pointer to first term + inc esi ; increment sample index + cmp esi, [rsp+8] + jnz decorrelate_loop ; loop all the way back + +; common exit for entire function + +mexit: mov rax, r8 ; return max magnitude + add rsp, 24 + pop rsi + pop rdi + pop rbx + pop rbp + ret + +pack_decorr_mono_buffer_x64win endp + + +; This is an assembly optimized version of the following WavPack function: +; +; void decorr_mono_pass_cont (int32_t *out_buffer, +; int32_t *in_buffer, +; struct decorr_pass *dpp, +; int32_t sample_count); +; +; It performs a single pass of mono decorrelation, transfering from the +; input buffer to the output buffer. Note that this version of the function +; requires that the up to 8 previous (depending on dpp->term) mono samples +; are visible and correct. In other words, it ignores the "samples_*" +; fields in the decorr_pass structure and gets the history data directly +; from the source buffer. It does, however, return the appropriate history +; samples to the decorr_pass structure before returning. +; +; By using the overflow detection of the multiply instruction, it detects +; when the "long_math" varient is required and automatically does it. +; +; This is written to work on an X86-64 processor (also called the AMD64) +; running in 64-bit mode. This version is for the 64-bit Windows ABI and +; provides appropriate prologs and epilogs for stack unwinding. The +; arguments are passed in registers: +; +; int32_t *out_buffer rcx +; int32_t *in_buffer rdx +; struct decorr_pass *dpp r8 +; int32_t sample_count r9 +; +; Stack usage: +; +; [rsp+0] = *dpp +; +; Register usage: +; +; rsi = source ptr +; rdi = destination ptr +; rcx = term * -4 (default terms) +; rcx = previous sample (terms 17 & 18) +; ebp = weight +; r8d = delta +; r9d = weight sum +; r10 = eptr +; + +pack_decorr_mono_pass_cont_x64win proc public frame + push_reg rbp + push_reg rbx + push_reg rdi + push_reg rsi + alloc_stack 8 ; allocate 8 bytes on stack & align to 16 bytes + end_prologue + + mov rdi, rcx ; copy params from win regs to Linux regs + mov rsi, rdx ; so we can leave following code similar + mov rdx, r8 + mov rcx, r9 + + mov [rsp], rdx + and ecx, ecx ; test & handle zero sample count + jz mono_done + + cld + mov r8d, [rdx+4] ; rd8 = delta + mov ebp, [rdx+8] ; ebp = weight + mov r9d, [rdx+88] ; r9d = weight sum + lea r10, [rsi+rcx*4] ; r10 = eptr + mov ecx, [rsi-4] ; preload last sample + mov eax, [rdx] ; get term + cmp al, 17 + je mono_term_17_loop + cmp al, 18 + je mono_term_18_loop + + imul rcx, rax, -4 ; rcx is index to correlation sample + jmp mono_default_term_loop + + align 64 + +mono_default_term_loop: + mov edx, [rsi+rcx] + mov ebx, edx + imul edx, ebp + jo over + lodsd + sar edx, 10 + sbb eax, edx + jmp @f +over: mov eax, ebx + imul ebp + shl edx, 22 + shr eax, 10 + adc edx, eax ; edx = apply_weight (sam_A) + lodsd + sub eax, edx +@@: stosd + je @f + test ebx, ebx + je @f + xor eax, ebx + cdq + xor ebp, edx + add ebp, r8d + xor ebp, edx +@@: add r9d, ebp + cmp rsi, r10 + jnz mono_default_term_loop + + mov rdx, [rsp] ; rdx = *dpp + mov [rdx+8], ebp ; put weight back + mov [rdx+88], r9d ; put weight sum back + movsxd rcx, DWORD PTR [rdx] ; rcx = dpp->term + +mono_default_store_samples: + dec rcx + sub rsi, 4 ; back up one sample + mov eax, [rsi] + mov [rdx+rcx*4+16], eax ; store samples_A [ecx] + test rcx, rcx + jnz mono_default_store_samples + jmp mono_done + + align 64 + +mono_term_17_loop: + lea edx, [rcx+rcx] + sub edx, [rsi-8] ; ebx = sam_A + mov ebx, edx + imul edx, ebp + jo over17 + sar edx, 10 + lodsd + mov ecx, eax + sbb eax, edx + jmp @f +over17: mov eax, ebx + imul ebp + shl edx, 22 + shr eax, 10 + adc edx, eax ; edx = apply_weight (sam_A) + lodsd + mov ecx, eax + sub eax, edx +@@: stosd + je @f + test ebx, ebx + je @f + xor eax, ebx + cdq + xor ebp, edx + add ebp, r8d + xor ebp, edx +@@: add r9d, ebp + cmp rsi, r10 + jnz mono_term_17_loop + jmp mono_term_1718_exit + + align 64 + +mono_term_18_loop: + lea edx, [rcx+rcx*2] + sub edx, [rsi-8] + sar edx, 1 + mov ebx, edx ; ebx = sam_A + imul edx, ebp + jo over18 + sar edx, 10 + lodsd + mov ecx, eax + sbb eax, edx + jmp @f +over18: mov eax, ebx + imul ebp + shl edx, 22 + shr eax, 10 + adc edx, eax ; edx = apply_weight (sam_A) + lodsd + mov ecx, eax + sub eax, edx +@@: stosd + je @f + test ebx, ebx + je @f + xor eax, ebx + cdq + xor ebp, edx + add ebp, r8d + xor ebp, edx +@@: add r9d, ebp + cmp rsi, r10 + jnz mono_term_18_loop + +mono_term_1718_exit: + mov rdx, [rsp] ; rdx = *dpp + mov [rdx+8], ebp ; put weight back + mov [rdx+88], r9d ; put weight sum back + mov eax, [rsi-4] ; dpp->samples_A [0] = bptr [-1] + mov [rdx+16], eax + mov eax, [rsi-8] ; dpp->samples_A [1] = bptr [-2] + mov [rdx+20], eax + +mono_done: + add rsp, 8 + pop rsi + pop rdi + pop rbx + pop rbp + ret + +pack_decorr_mono_pass_cont_x64win endp + + +; This is an assembly optimized version of the following WavPack function: +; +; uint32_t scan_max_magnitude (int32_t *buffer, int32_t sample_count); +; +; This function scans a buffer of signed 32-bit ints and returns the magnitude +; of the largest sample, with a power-of-two resolution. It might be more +; useful to return the actual maximum absolute value, but that implementation +; would be slower. Instead, this simply returns the "or" of all the values +; "xor"d with their own sign, like so: +; +; while (sample_count--) +; magnitude |= (*buffer < 0) ? ~*buffer++ : *buffer++; +; +; This is written to work on an X86-64 processor (also called the AMD64) +; running in 64-bit mode and uses the MMX extensions to improve the +; performance by processing two samples together. +; +; This is written to work on an X86-64 processor (also called the AMD64) +; running in 64-bit mode. This version is for the 64-bit Windows ABI and +; provides appropriate prologs and epilogs for stack unwinding. The +; arguments are passed in registers: +; +; int32_t *buffer rcx +; int32_t sample_count rdx +; +; During the processing loops, the following registers are used: +; +; rdi buffer pointer +; rsi termination buffer pointer +; ebx single magnitude accumulator +; mm0 dual magnitude accumulator +; mm1, mm2 scratch +; + +scan_max_magnitude_x64win proc public frame + push_reg rbp ; save non-volatile registers on stack + push_reg rbx ; (alphabetically) + push_reg rdi + push_reg rsi + alloc_stack 8 ; allocate 8 bytes on stack & align to 16 bytes + end_prologue + + mov rdi, rcx ; copy params from win regs to Linux regs + mov rsi, rdx ; so we can leave following code similar + + xor ebx, ebx ; clear magnitude accumulator + + mov eax, esi ; eax = count + and eax, 7 + mov ecx, eax ; ecx = leftover samples to "manually" scan at end + + shr esi, 3 ; esi = num of loops to process mmx (8 samples/loop) + shl esi, 5 ; esi = num of bytes to process mmx (32 bytes/loop) + jz nommx ; jump around if no mmx loops to do (< 8 samples) + + pxor mm0, mm0 ; clear dual magnitude accumulator + add rsi, rdi ; rsi = termination buffer pointer for mmx loop + jmp mmxlp + + align 64 + +mmxlp: movq mm1, [rdi] ; get stereo samples in mm1 & mm2 + movq mm2, mm1 + psrad mm1, 31 ; mm1 = sign (mm2) + pxor mm1, mm2 ; mm1 = absolute magnitude, or into result + por mm0, mm1 + + movq mm1, [rdi+8] ; do it again with 6 more samples + movq mm2, mm1 + psrad mm1, 31 + pxor mm1, mm2 + por mm0, mm1 + + movq mm1, [rdi+16] + movq mm2, mm1 + psrad mm1, 31 + pxor mm1, mm2 + por mm0, mm1 + + movq mm1, [rdi+24] + movq mm2, mm1 + psrad mm1, 31 + pxor mm1, mm2 + por mm0, mm1 + + add rdi, 32 + cmp rdi, rsi + jnz mmxlp + + movd eax, mm0 ; ebx = "or" of high and low mm0 + punpckhdq mm0, mm0 + movd ebx, mm0 + or ebx, eax + emms + +nommx: and ecx, ecx ; any leftover samples to do? + jz noleft + +leftlp: mov eax, [rdi] + cdq + xor eax, edx + or ebx, eax + add rdi, 4 + loop leftlp + +noleft: mov eax, ebx ; move magnitude to eax for return + add rsp, 8 + pop rsi + pop rdi + pop rbx + pop rbp + ret + +scan_max_magnitude_x64win endp + + +; This is an assembly optimized version of the following WavPack function: +; +; uint32_t log2buffer (int32_t *samples, uint32_t num_samples, int limit); +; +; This function scans a buffer of 32-bit ints and accumulates the total +; log2 value of all the samples. This is useful for determining maximum +; compression because the bitstream storage required for entropy coding +; is proportional to the base 2 log of the samples. +; +; This is written to work on an X86-64 processor (also called the AMD64) +; running in 64-bit mode. This version is for the 64-bit Windows ABI and +; provides appropriate prologs and epilogs for stack unwinding. The +; arguments are passed in registers: +; +; int32_t *samples rcx +; uint32_t num_samples rdx +; int limit r8 +; +; During the processing loops, the following registers are used: +; +; r8 pointer to the 256-byte log fraction table +; rsi input buffer pointer +; edi sum accumulator +; ebx sample count +; ebp limit (if specified non-zero) +; eax,ecx,edx scratch +; + + align 256 + + .radix 16 + +log2_table: + byte 000, 001, 003, 004, 006, 007, 009, 00a, 00b, 00d, 00e, 010, 011, 012, 014, 015 + byte 016, 018, 019, 01a, 01c, 01d, 01e, 020, 021, 022, 024, 025, 026, 028, 029, 02a + byte 02c, 02d, 02e, 02f, 031, 032, 033, 034, 036, 037, 038, 039, 03b, 03c, 03d, 03e + byte 03f, 041, 042, 043, 044, 045, 047, 048, 049, 04a, 04b, 04d, 04e, 04f, 050, 051 + byte 052, 054, 055, 056, 057, 058, 059, 05a, 05c, 05d, 05e, 05f, 060, 061, 062, 063 + byte 064, 066, 067, 068, 069, 06a, 06b, 06c, 06d, 06e, 06f, 070, 071, 072, 074, 075 + byte 076, 077, 078, 079, 07a, 07b, 07c, 07d, 07e, 07f, 080, 081, 082, 083, 084, 085 + byte 086, 087, 088, 089, 08a, 08b, 08c, 08d, 08e, 08f, 090, 091, 092, 093, 094, 095 + byte 096, 097, 098, 099, 09a, 09b, 09b, 09c, 09d, 09e, 09f, 0a0, 0a1, 0a2, 0a3, 0a4 + byte 0a5, 0a6, 0a7, 0a8, 0a9, 0a9, 0aa, 0ab, 0ac, 0ad, 0ae, 0af, 0b0, 0b1, 0b2, 0b2 + byte 0b3, 0b4, 0b5, 0b6, 0b7, 0b8, 0b9, 0b9, 0ba, 0bb, 0bc, 0bd, 0be, 0bf, 0c0, 0c0 + byte 0c1, 0c2, 0c3, 0c4, 0c5, 0c6, 0c6, 0c7, 0c8, 0c9, 0ca, 0cb, 0cb, 0cc, 0cd, 0ce + byte 0cf, 0d0, 0d0, 0d1, 0d2, 0d3, 0d4, 0d4, 0d5, 0d6, 0d7, 0d8, 0d8, 0d9, 0da, 0db + byte 0dc, 0dc, 0dd, 0de, 0df, 0e0, 0e0, 0e1, 0e2, 0e3, 0e4, 0e4, 0e5, 0e6, 0e7, 0e7 + byte 0e8, 0e9, 0ea, 0ea, 0eb, 0ec, 0ed, 0ee, 0ee, 0ef, 0f0, 0f1, 0f1, 0f2, 0f3, 0f4 + byte 0f4, 0f5, 0f6, 0f7, 0f7, 0f8, 0f9, 0f9, 0fa, 0fb, 0fc, 0fc, 0fd, 0fe, 0ff, 0ff + + .radix 10 + +log2buffer_x64win proc public frame + push_reg rbp ; save non-volatile registers on stack + push_reg rbx ; (alphabetically) + push_reg rdi + push_reg rsi + alloc_stack 8 ; allocate 8 bytes on stack & align to 16 bytes + end_prologue + + mov rdi, rcx ; copy params from win regs to Linux regs + mov rsi, rdx ; so we can leave following code similar + mov rdx, r8 + + mov ebx, esi ; ebx = num_samples + mov rsi, rdi ; rsi = *samples + xor edi, edi ; initialize sum + lea r8, log2_table + test ebx, ebx ; test count for zero + jz normal_exit + mov ebp, edx ; ebp = limit + test ebp, ebp ; we have separate loops for limit and no limit + jz no_limit_loop + jmp limit_loop + + align 64 + +limit_loop: + mov eax, [rsi] ; get next sample into eax + cdq ; edx = sign of sample (for abs) + add rsi, 4 + xor eax, edx + sub eax, edx + je L40 ; skip if sample was zero + mov edx, eax ; move to edx and apply rounding + shr eax, 9 + add edx, eax + bsr ecx, edx ; ecx = MSB set in sample (0 - 31) + lea eax, [ecx+1] ; eax = number used bits in sample (1 - 32) + sub ecx, 8 ; ecx = shift right amount (-8 to 23) + ror edx, cl ; use rotate to do "signed" shift + sal eax, 8 ; move nbits to integer portion of log + movzx edx, dl ; dl = mantissa, look up log fraction in table + mov al, [r8+rdx] ; eax = combined integer and fraction for full log + add edi, eax ; add to running sum and compare to limit + cmp eax, ebp + jge limit_exceeded +L40: sub ebx, 1 ; loop back if more samples + jne limit_loop + jmp normal_exit + + align 64 + +no_limit_loop: + mov eax, [rsi] ; get next sample into eax + cdq ; edx = sign of sample (for abs) + add rsi, 4 + xor eax, edx + sub eax, edx + je L45 ; skip if sample was zero + mov edx, eax ; move to edx and apply rounding + shr eax, 9 + add edx, eax + bsr ecx, edx ; ecx = MSB set in sample (0 - 31) + lea eax, [ecx+1] ; eax = number used bits in sample (1 - 32) + sub ecx, 8 ; ecx = shift right amount (-8 to 23) + ror edx, cl ; use rotate to do "signed" shift + sal eax, 8 ; move nbits to integer portion of log + movzx edx, dl ; dl = mantissa, look up log fraction in table + mov al, [r8+rdx] ; eax = combined integer and fraction for full log + add edi, eax ; add to running sum +L45: sub ebx, 1 + jne no_limit_loop + jmp normal_exit + +limit_exceeded: + mov edi, -1 ; return -1 to indicate limit hit +normal_exit: + mov eax, edi ; move sum accumulator into eax for return + + add rsp, 8 ; begin epilog by deallocating stack + pop rsi ; restore non-volatile registers & return + pop rdi + pop rbx + pop rbp + ret + +log2buffer_x64win endp + +asmcode ends + + end + diff --git a/third_party/wavpack/src/pack_x86.S b/third_party/wavpack/src/pack_x86.S new file mode 100644 index 0000000..31cf7a4 --- /dev/null +++ b/third_party/wavpack/src/pack_x86.S @@ -0,0 +1,1840 @@ +############################################################################ +## **** WAVPACK **** ## +## Hybrid Lossless Wavefile Compressor ## +## Copyright (c) 1998 - 2015 Conifer Software. ## +## All Rights Reserved. ## +## Distributed under the BSD Software License (see license.txt) ## +############################################################################ + + .intel_syntax noprefix + .text + + .globl _pack_decorr_stereo_pass_x86 + .globl _pack_decorr_stereo_pass_cont_rev_x86 + .globl _pack_decorr_stereo_pass_cont_x86 + .globl _pack_decorr_mono_buffer_x86 + .globl _pack_decorr_mono_pass_cont_x86 + .globl _pack_cpu_has_feature_x86 + .globl _scan_max_magnitude_x86 + .globl _log2buffer_x86 + + .globl pack_decorr_stereo_pass_x86 + .globl pack_decorr_stereo_pass_cont_rev_x86 + .globl pack_decorr_stereo_pass_cont_x86 + .globl pack_decorr_mono_buffer_x86 + .globl pack_decorr_mono_pass_cont_x86 + .globl pack_cpu_has_feature_x86 + .globl scan_max_magnitude_x86 + .globl log2buffer_x86 + +# This module contains X86 assembly optimized versions of functions required +# to encode WavPack files. Note that the stereo versions of these functions +# use the MMX registers and instructions of the X86 processor, and so a +# helper function is provided to make a runtime check for that feature. + +# This is an assembly optimized version of the following WavPack function: +# +# void pack_decorr_stereo_pass ( +# struct decorr_pass *dpp, +# int32_t *buffer, +# int32_t sample_count); +# +# It performs a single pass of stereo decorrelation, in place, as specified +# by the decorr_pass structure. Note that this function does NOT return the +# dpp->samples_X[] values in the "normalized" positions for terms 1-8, so if +# the number of samples is not a multiple of MAX_TERM, these must be moved if +# they are to be used somewhere else. +# +# This is written to work on an IA-32 processor and uses the MMX extensions +# to improve the performance by processing both stereo channels together. +# It is based on the original MMX code written by Joachim Henke that used +# MMX intrinsics called from C. Many thanks to Joachim for that! +# +# An issue with using MMX for this is that the sample history array in the +# decorr_pass structure contains separate arrays for each channel while the +# MMX code wants there to be a single array of dual samples. The fix for +# this is to convert the data in the arrays on entry and exit, and this is +# made easy by the fact that the 8 MMX regsiters hold exactly the required +# amount of data (64 bytes)! +# +# This is written to work on an IA-32 processor. The arguments are on the +# stack at these locations (after 4 pushes, we do not use ebp as a base +# pointer): +# +# struct decorr_pass *dpp [esp+20] +# int32_t *buffer [esp+24] +# int32_t sample_count [esp+28] +# +# During the processing loops, the following registers are used: +# +# edi buffer pointer +# esi termination buffer pointer +# eax,ebx,edx used in default term to reduce calculation +# ebp decorr_pass pointer +# mm0, mm1 scratch +# mm2 original sample values +# mm3 correlation samples +# mm4 0 (for pcmpeqd) +# mm5 weights +# mm6 delta +# mm7 512 (for rounding) +# + +_pack_decorr_stereo_pass_x86: +pack_decorr_stereo_pass_x86: + push ebp + push ebx + push edi + push esi + + mov ebp, [esp+20] # ebp = *dpp + mov edi, [esp+24] # edi = buffer + mov esi, [esp+28] + shl esi, 3 + jz bdone + add esi, edi # esi = termination buffer pointer + + // convert samples_A and samples_B array into samples_AB array for MMX + // (the MMX registers provide exactly enough storage to do this easily) + + movq mm0, [ebp+16] + punpckldq mm0, [ebp+48] + movq mm1, [ebp+16] + punpckhdq mm1, [ebp+48] + movq mm2, [ebp+24] + punpckldq mm2, [ebp+56] + movq mm3, [ebp+24] + punpckhdq mm3, [ebp+56] + movq mm4, [ebp+32] + punpckldq mm4, [ebp+64] + movq mm5, [ebp+32] + punpckhdq mm5, [ebp+64] + movq mm6, [ebp+40] + punpckldq mm6, [ebp+72] + movq mm7, [ebp+40] + punpckhdq mm7, [ebp+72] + + movq [ebp+16], mm0 + movq [ebp+24], mm1 + movq [ebp+32], mm2 + movq [ebp+40], mm3 + movq [ebp+48], mm4 + movq [ebp+56], mm5 + movq [ebp+64], mm6 + movq [ebp+72], mm7 + + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 # mm7 = round (512) + + mov eax, [ebp+4] + movd mm6, eax + punpckldq mm6, mm6 # mm6 = delta (0-7) + + mov eax, 0xFFFF # mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 # mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [ebp+8] # mm5 = weight_AB masked to 16-bit + + movq mm4, [ebp+16] # preload samples_AB[0] + + mov al, [ebp] # get term and vector to correct loop + cmp al, 17 + je buff_term_17_loop + cmp al, 18 + je buff_term_18_loop + cmp al, -1 + je buff_term_minus_1_loop + cmp al, -2 + je buff_term_minus_2_loop + cmp al, -3 + je buff_term_minus_3_loop + + pxor mm4, mm4 # mm4 = 0 (for pcmpeqd) + xor eax, eax + xor ebx, ebx + add bl, [ebp] + mov ecx, 7 + and ebx, ecx + jmp buff_default_term_loop + + .balign 64 + +buff_default_term_loop: + movq mm2, [edi] # mm2 = left_right + movq mm3, [ebp+16+eax*8] + inc eax + and eax, ecx + movq [ebp+16+ebx*8], mm2 + inc ebx + and ebx, ecx + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [edi], mm2 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pcmpeqd mm2, mm4 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm4 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + cmp edi, esi + jnz buff_default_term_loop + + jmp bdone + + .balign 64 + +buff_term_17_loop: + movq mm3, mm4 # get previous calculated value + paddd mm3, mm4 + psubd mm3, [ebp+24] + movq [ebp+24], mm4 + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + movq mm2, [edi] # mm2 = left_right + movq mm4, mm2 + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [edi], mm2 # store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + cmp edi, esi + jnz buff_term_17_loop + + movq [ebp+16], mm4 # post-store samples_AB[0] + jmp bdone + + .balign 64 + +buff_term_18_loop: + movq mm3, mm4 # get previous calculated value + psubd mm3, [ebp+24] + psrad mm3, 1 + paddd mm3, mm4 # mm3 = sam_AB + movq [ebp+24], mm4 + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + movq mm2, [edi] # mm2 = left_right + movq mm4, mm2 + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [edi], mm2 # store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + cmp edi, esi + jnz buff_term_18_loop + + movq [ebp+16], mm4 # post-store samples_AB[0] + jmp bdone + + .balign 64 + +buff_term_minus_1_loop: + movq mm3, mm4 # mm3 = previous calculated value + movq mm2, [edi] # mm2 = left_right + movq mm4, mm2 + psrlq mm4, 32 + punpckldq mm3, mm2 # mm3 = sam_AB + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [edi], mm2 # store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 # and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp edi, esi + jnz buff_term_minus_1_loop + + movq [ebp+16], mm4 # post-store samples_AB[0] + jmp bdone + + .balign 64 + +buff_term_minus_2_loop: + movq mm2, [edi] # mm2 = left_right + movq mm3, mm2 + psrlq mm3, 32 + por mm3, mm4 + punpckldq mm4, mm2 + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [edi], mm2 # store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 # and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp edi, esi + jnz buff_term_minus_2_loop + + movq [ebp+16], mm4 # post-store samples_AB[0] + jmp bdone + + .balign 64 + +buff_term_minus_3_loop: + movq mm2, [edi] # mm2 = left_right + movq mm3, mm4 # mm3 = previous calculated value + movq mm4, mm2 # mm0 = swap dwords of new data + psrlq mm4, 32 + punpckldq mm4, mm2 # mm3 = sam_AB + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [edi], mm2 # store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 # and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp edi, esi + jnz buff_term_minus_3_loop + + movq [ebp+16], mm4 # post-store samples_AB[0] + +bdone: pslld mm5, 16 # sign-extend 16-bit weights back to dwords + psrad mm5, 16 + movq [ebp+8], mm5 # put weight_AB back + + // convert samples_AB array back into samples_A and samples_B + + movq mm0, [ebp+16] + movq mm1, [ebp+24] + movq mm2, [ebp+32] + movq mm3, [ebp+40] + movq mm4, [ebp+48] + movq mm5, [ebp+56] + movq mm6, [ebp+64] + movq mm7, [ebp+72] + + movd [ebp+16], mm0 + movd [ebp+20], mm1 + movd [ebp+24], mm2 + movd [ebp+28], mm3 + movd [ebp+32], mm4 + movd [ebp+36], mm5 + movd [ebp+40], mm6 + movd [ebp+44], mm7 + + punpckhdq mm0, mm0 + punpckhdq mm1, mm1 + punpckhdq mm2, mm2 + punpckhdq mm3, mm3 + punpckhdq mm4, mm4 + punpckhdq mm5, mm5 + punpckhdq mm6, mm6 + punpckhdq mm7, mm7 + + movd [ebp+48], mm0 + movd [ebp+52], mm1 + movd [ebp+56], mm2 + movd [ebp+60], mm3 + movd [ebp+64], mm4 + movd [ebp+68], mm5 + movd [ebp+72], mm6 + movd [ebp+76], mm7 + + emms + + pop esi + pop edi + pop ebx + pop ebp + ret + +# These are assembly optimized version of the following WavPack functions: +# +# void pack_decorr_stereo_pass_cont ( +# struct decorr_pass *dpp, +# int32_t *in_buffer, +# int32_t *out_buffer, +# int32_t sample_count); +# +# void pack_decorr_stereo_pass_cont_rev ( +# struct decorr_pass *dpp, +# int32_t *in_buffer, +# int32_t *out_buffer, +# int32_t sample_count); +# +# It performs a single pass of stereo decorrelation, transfering from the +# input buffer to the output buffer. Note that this version of the function +# requires that the up to 8 previous (depending on dpp->term) stereo samples +# are visible and correct. In other words, it ignores the "samples_*" +# fields in the decorr_pass structure and gets the history data directly +# from the source buffer. It does, however, return the appropriate history +# samples to the decorr_pass structure before returning. +# +# This is written to work on an IA-32 processor and uses the MMX extensions +# to improve the performance by processing both stereo channels together. +# It is based on the original MMX code written by Joachim Henke that used +# MMX intrinsics called from C. Many thanks to Joachim for that! +# +# No additional stack space is used; all storage is done in registers. The +# arguments on entry: +# +# struct decorr_pass *dpp [ebp+8] +# int32_t *in_buffer [ebp+12] +# int32_t *out_buffer [ebp+16] +# int32_t sample_count [ebp+20] +# +# During the processing loops, the following registers are used: +# +# edi input buffer pointer +# esi direction (-8 forward, +8 reverse) +# ebx delta from input to output buffer +# ecx sample count +# edx sign (dir) * term * -8 (terms 1-8 only) +# mm0, mm1 scratch +# mm2 original sample values +# mm3 correlation samples +# mm4 weight sums +# mm5 weights +# mm6 delta +# mm7 512 (for rounding) +# + +_pack_decorr_stereo_pass_cont_rev_x86: +pack_decorr_stereo_pass_cont_rev_x86: + push ebp + mov ebp, esp + push ebx # save the registers that we need to + push esi + push edi + + mov esi, 8 # esi indicates direction (inverted) + jmp start + +_pack_decorr_stereo_pass_cont_x86: +pack_decorr_stereo_pass_cont_x86: + push ebp + mov ebp, esp + push ebx # save the registers that we need to + push esi + push edi + + mov esi, -8 # esi indicates direction (inverted) + +start: mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 # mm7 = round (512) + + mov eax, [ebp+8] # access dpp + mov eax, [eax+4] + movd mm6, eax + punpckldq mm6, mm6 # mm6 = delta (0-7) + + mov eax, [ebp+8] # access dpp + movq mm5, [eax+8] # mm5 = weight_AB + movq mm4, [eax+88] # mm4 = sum_AB + + mov edi, [ebp+12] # edi = in_buffer + mov ebx, [ebp+16] + sub ebx, edi # ebx = delta to output buffer + + mov ecx, [ebp+20] # ecx = sample_count + test ecx, ecx + jz done + + mov eax, [ebp+8] # *eax = dpp + mov eax, [eax] # get term and vector to correct loop + cmp eax, 17 + je term_17_loop + cmp eax, 18 + je term_18_loop + cmp eax, -1 + je term_minus_1_loop + cmp eax, -2 + je term_minus_2_loop + cmp eax, -3 + je term_minus_3_loop + + shl eax, 3 + mov edx, eax # edx = term * 8 to index correlation sample + test esi, esi # test direction + jns default_term_loop + neg edx + jmp default_term_loop + + .balign 64 + +default_term_loop: + movq mm3, [edi+edx] # mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [edi] # mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [edi+ebx], mm2 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + sub edi, esi + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddd mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + paddd mm4, mm5 # add weights to sum + dec ecx + jnz default_term_loop + + mov eax, [ebp+8] # access dpp + movq [eax+8], mm5 # put weight_AB back + movq [eax+88], mm4 # put sum_AB back + emms + + mov edx, [ebp+8] # access dpp with edx + mov ecx, [edx] # ecx = dpp->term + +default_store_samples: + dec ecx + add edi, esi # back up one full sample + mov eax, [edi+4] + mov [edx+ecx*4+48], eax # store samples_B [ecx] + mov eax, [edi] + mov [edx+ecx*4+16], eax # store samples_A [ecx] + test ecx, ecx + jnz default_store_samples + jmp done + + .balign 64 + +term_17_loop: + movq mm3, [edi+esi] # get previous calculated value + paddd mm3, mm3 + psubd mm3, [edi+esi*2] + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [edi] # mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [edi+ebx], mm2 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + sub edi, esi + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddd mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + paddd mm4, mm5 # add weights to sum + dec ecx + jnz term_17_loop + + mov eax, [ebp+8] # access dpp + movq [eax+8], mm5 # put weight_AB back + movq [eax+88], mm4 # put sum_AB back + emms + jmp term_1718_common_store + + .balign 64 + +term_18_loop: + movq mm3, [edi+esi] # get previous calculated value + movq mm0, mm3 + psubd mm3, [edi+esi*2] + psrad mm3, 1 + paddd mm3, mm0 # mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [edi] # mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [edi+ebx], mm2 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + sub edi, esi + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddd mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + dec ecx + paddd mm4, mm5 # add weights to sum + jnz term_18_loop + + mov eax, [ebp+8] # access dpp + movq [eax+8], mm5 # put weight_AB back + movq [eax+88], mm4 # put sum_AB back + emms + +term_1718_common_store: + + mov eax, [ebp+8] # access dpp + add edi, esi # back up a full sample + mov edx, [edi+4] # dpp->samples_B [0] = iptr [-1]; + mov [eax+48], edx + mov edx, [edi] # dpp->samples_A [0] = iptr [-2]; + mov [eax+16], edx + add edi, esi # back up another sample + mov edx, [edi+4] # dpp->samples_B [1] = iptr [-3]; + mov [eax+52], edx + mov edx, [edi] # dpp->samples_A [1] = iptr [-4]; + mov [eax+20], edx + jmp done + + .balign 64 + +term_minus_1_loop: + movq mm3, [edi+esi] # mm3 = previous calculated value + movq mm2, [edi] # mm2 = left_right + psrlq mm3, 32 + punpckldq mm3, mm2 # mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [edi+ebx], mm2 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + sub edi, esi + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddd mm5, mm1 + paddusw mm5, mm2 # and add to weight_AB + psubd mm5, mm1 + pxor mm5, mm0 + paddd mm4, mm5 # add weights to sum + dec ecx + jnz term_minus_1_loop + + mov eax, [ebp+8] # access dpp + movq [eax+8], mm5 # put weight_AB back + movq [eax+88], mm4 # put sum_AB back + emms + + add edi, esi # back up a full sample + mov edx, [edi+4] # dpp->samples_A [0] = iptr [-1]; + mov eax, [ebp+8] + mov [eax+16], edx + jmp done + + .balign 64 + +term_minus_2_loop: + movq mm2, [edi] # mm2 = left_right + movq mm3, mm2 # mm3 = swap dwords + psrlq mm3, 32 + punpckldq mm3, [edi+esi] # mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [edi+ebx], mm2 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + sub edi, esi + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddd mm5, mm1 + paddusw mm5, mm2 # and add to weight_AB + psubd mm5, mm1 + pxor mm5, mm0 + paddd mm4, mm5 # add weights to sum + dec ecx + jnz term_minus_2_loop + + mov eax, [ebp+8] # access dpp + movq [eax+8], mm5 # put weight_AB back + movq [eax+88], mm4 # put sum_AB back + emms + + add edi, esi # back up a full sample + mov edx, [edi] # dpp->samples_B [0] = iptr [-2]; + mov eax, [ebp+8] + mov [eax+48], edx + jmp done + + .balign 64 + +term_minus_3_loop: + movq mm0, [edi+esi] # mm0 = previous calculated value + movq mm3, mm0 # mm3 = swap dwords + psrlq mm3, 32 + punpckldq mm3, mm0 # mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [edi] # mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 # add shifted sums + movq mm0, mm3 + movq [edi+ebx], mm2 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + sub edi, esi + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddd mm5, mm1 + paddusw mm5, mm2 # and add to weight_AB + psubd mm5, mm1 + pxor mm5, mm0 + paddd mm4, mm5 # add weights to sum + dec ecx + jnz term_minus_3_loop + + mov eax, [ebp+8] # access dpp + movq [eax+8], mm5 # put weight_AB back + movq [eax+88], mm4 # put sum_AB back + emms + + add edi, esi # back up a full sample + mov edx, [edi+4] # dpp->samples_A [0] = iptr [-1]; + mov eax, [ebp+8] + mov [eax+16], edx + mov edx, [edi] # dpp->samples_B [0] = iptr [-2]; + mov [eax+48], edx + +done: pop edi + pop esi + pop ebx + leave + ret + + +# This is an assembly optimized version of the following WavPack function: +# +# uint32_t decorr_mono_buffer (int32_t *buffer, +# struct decorr_pass *decorr_passes, +# int32_t num_terms, +# int32_t sample_count) +# +# Decorrelate a buffer of mono samples, in place, as specified by the array +# of decorr_pass structures. Note that this function does NOT return the +# dpp->samples_X[] values in the "normalized" positions for terms 1-8, so if +# the number of samples is not a multiple of MAX_TERM, these must be moved if +# they are to be used somewhere else. The magnitude of the output samples is +# accumulated and returned (see scan_max_magnitude() for more details). By +# using the overflow detection of the multiply instruction, this detects +# when the "long_math" varient is required. +# +# For the fastest possible operation with the four "common" decorrelation +# filters (i.e, fast, normal, high and very high) this function can be +# configured to include hardcoded versions of these filters that are created +# using macros. In that case, the passed filter is checked to make sure that +# it matches one of the four. If it doesn't, or if the hardcoded flters are +# not enabled, a "general" version of the decorrelation loop is used. This +# variable enables the hardcoded filters and can be disabled if there are +# problems with the code or macros: + + HARDCODED_FILTERS = 1 + +# This is written to work on an IA-32 processor. The arguments are on the +# stack at these locations (after 6 pushes, we do not use ebp as a base +# pointer): +# +# int32_t *buffer [esp+28] +# struct decorr_pass *dpp [esp+32] +# int32_t num_terms [esp+36] +# int32_t sample_count [esp+40] +# +# register usage: +# +# ecx = sample being decorrelated +# esi = sample up counter +# edi = *buffer +# ebp = *dpp +# +# stack usage: +# +# [esp+0] = dpp end ptr (unused in hardcoded filter case) +# [esp+4] = magnitude accumulator +# + .if HARDCODED_FILTERS +# +# This macro is used for checking the decorr_passes array to make sure that the terms match +# the hardcoded terms. The terms of these filters are the first element in the tables defined +# in decorr_tables.h (with the negative terms replaced with 1). +# + + .macro chkterm term ebp_offset + cmp BYTE PTR [ebp], \term + jnz use_general_version + add ebp, \ebp_offset + .endm +# +# This macro processes the single specified term (with a fixed delta of 2) and updates the +# term pointer (rbp) with the specified offset when done. It assumes the following registers: +# +# ecx = sample being decorrelated +# esi = sample up counter (used for terms 1-8) +# rbp = decorr_pass pointer for this term (updated with "rbp_offset" when done) +# rax, rbx, rdx = scratch +# + .macro exeterm term ebp_offset + + .if \term <= 8 + mov eax, esi + and eax, 7 + mov ebx, [ebp+16+eax*4] + .if \term != 8 + add eax, \term + and eax, 7 + .endif + mov [ebp+16+eax*4], ecx + + .elseif \term == 17 + + mov edx, [ebp+16] # handle term 17 + mov [ebp+16], ecx + lea ebx, [edx+edx] + sub ebx, [ebp+20] + mov [ebp+20], edx + + .else + + mov edx, [ebp+16] # handle term 18 + mov [ebp+16], ecx + lea ebx, [edx+edx*2] + sub ebx, [ebp+20] + sar ebx, 1 + mov [ebp+20], edx + + .endif + + mov eax, [ebp+8] + imul eax, ebx # 32-bit multiply is almost always enough + jo 1f # but handle overflow if it happens + sar eax, 10 + sbb ecx, eax # borrow flag provides rounding + jmp 2f +1: mov eax, [ebp+8] # perform 64-bit multiply on overflow + imul ebx + shr eax, 10 + sbb ecx, eax + shl edx, 22 + sub ecx, edx +2: je 3f + test ebx, ebx + je 3f + xor ebx, ecx + sar ebx, 30 + or ebx, 1 # this generates delta of 1 + shl ebx, 1 # this generates delta of 2 + add [ebp+8], ebx +3: add ebp, \ebp_offset + + .endm + + .endif # end of macro definitions + +# entry point of function + +_pack_decorr_mono_buffer_x86: +pack_decorr_mono_buffer_x86: + push ebp # save the resgister that we need to + push ebx + push esi + push edi + xor eax, eax + push eax # this is magnitude accumulator + push eax # this will be dpp end ptr + + mov edi, [esp+28] # edi is buffer pointer + xor esi, esi # up counter = 0 + + cmp DWORD PTR [esp+40], 0 # test & handle zero sample count & zero term count + jz mexit + cmp DWORD PTR [esp+36], 0 + jz mexit + + .if HARDCODED_FILTERS + +# first check to make sure all the "deltas" are 2 + + mov ebp, [esp+32] # ebp is decorr_pass pointer + mov ebx, [esp+36] # get term count +deltas: cmp BYTE PTR [ebp+4], 2 # make sure all the deltas are 2 + jnz use_general_version # if any aren't, use general case + add ebp, 96 + dec ebx + jnz deltas + + mov ebp, [esp+32] # ebp is decorr_pass pointer + mov edx, [esp+36] # get term count + cmp dl, 2 # 2 terms is "fast" + jnz nfast + chkterm 18, 96 # check "fast" terms + chkterm 17, -96 + jmp mono_fast_loop + +nfast: cmp dl, 5 # 5 terms is "normal" + jnz nnorm + chkterm 18, 96 # check "normal" terms + chkterm 18, 96 + chkterm 2, 96 + chkterm 17, 96 + chkterm 3, 96*-4 + jmp mono_normal_loop + +nnorm: cmp dl, 10 # 10 terms is "high" + jnz nhigh + chkterm 18, 96 # check "high" terms + chkterm 18, 96 + chkterm 18, 96 + chkterm 1, 96 + chkterm 2, 96 + chkterm 3, 96 + chkterm 5, 96 + chkterm 1, 96 + chkterm 17, 96 + chkterm 4, 96*-9 + jmp mono_high_loop + +nhigh: cmp dl, 16 # 16 terms is "very high" + jnz use_general_version # if none of these, use general version + chkterm 18, 96 # else check "very high" terms + chkterm 18, 96 + chkterm 2, 96 + chkterm 3, 96 + chkterm 1, 96 + chkterm 18, 96 + chkterm 2, 96 + chkterm 4, 96 + chkterm 7, 96 + chkterm 5, 96 + chkterm 3, 96 + chkterm 6, 96 + chkterm 8, 96 + chkterm 1, 96 + chkterm 18, 96 + chkterm 2, 96*-15 + jmp mono_vhigh_loop + + .balign 64 + +mono_fast_loop: + mov ecx, [edi+esi*4] # ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 17, -96 + + mov [edi+esi*4], ecx # store completed sample + mov eax, ecx # magnitude accumulator |= (sample < 0) ? ~sample : sample + cdq + xor eax, edx + or [esp+4], eax + inc esi # increment sample index + cmp esi, [esp+40] + jnz mono_fast_loop # loop back for all samples + jmp mexit + + .balign 64 + +mono_normal_loop: + mov ecx, [edi+esi*4] # ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 18, 96 + exeterm 2, 96 + exeterm 17, 96 + exeterm 3, 96*-4 + + mov [edi+esi*4], ecx # store completed sample + mov eax, ecx # magnitude accumulator |= (sample < 0) ? ~sample : sample + cdq + xor eax, edx + or [esp+4], eax + inc esi # increment sample index + cmp esi, [esp+40] + jnz mono_normal_loop # loop back for all samples + jmp mexit + + .balign 64 + +mono_high_loop: + mov ecx, [edi+esi*4] # ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 18, 96 + exeterm 18, 96 + exeterm 1, 96 + exeterm 2, 96 + exeterm 3, 96 + exeterm 5, 96 + exeterm 1, 96 + exeterm 17, 96 + exeterm 4, 96*-9 + + mov [edi+esi*4], ecx # store completed sample + mov eax, ecx # magnitude accumulator |= (sample < 0) ? ~sample : sample + cdq + xor eax, edx + or [esp+4], eax + inc esi # increment sample index + cmp esi, [esp+40] + jnz mono_high_loop # loop back for all samples + jmp mexit + + .balign 64 + +mono_vhigh_loop: + mov ecx, [edi+esi*4] # ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 18, 96 + exeterm 2, 96 + exeterm 3, 96 + exeterm 1, 96 + exeterm 18, 96 + exeterm 2, 96 + exeterm 4, 96 + exeterm 7, 96 + exeterm 5, 96 + exeterm 3, 96 + exeterm 6, 96 + exeterm 8, 96 + exeterm 1, 96 + exeterm 18, 96 + exeterm 2, 96*-15 + + mov [edi+esi*4], ecx # store completed sample + mov eax, ecx # magnitude accumulator |= (sample < 0) ? ~sample : sample + cdq + xor eax, edx + or [esp+4], eax + inc esi # increment sample index + cmp esi, [esp+40] + jnz mono_vhigh_loop # loop back for all samples + jmp mexit + + .endif + +use_general_version: + mov ebp, [esp+32] + mov edx, [esp+36] # get number of terms + imul eax, edx, 96 # calculate & store termination check ptr + add eax, [esp+32] + mov [esp], eax + jmp decorrelate_loop + + .balign 64 + +decorrelate_loop: + mov ecx, [edi+esi*4] # ecx is the sample we're decorrelating +nxterm: mov edx, [ebp] + cmp dl, 17 + jge 3f + + mov eax, esi + and eax, 7 + mov ebx, [ebp+16+eax*4] + add eax, edx + and eax, 7 + mov [ebp+16+eax*4], ecx + jmp domult + + .balign 4 +3: mov edx, [ebp+16] + mov [ebp+16], ecx + je 4f + lea ebx, [edx+edx*2] + sub ebx, [ebp+20] + sar ebx, 1 + mov [ebp+20], edx + jmp domult + + .balign 4 +4: lea ebx, [edx+edx] + sub ebx, [ebp+20] + mov [ebp+20], edx + +domult: mov eax, [ebp+8] + mov edx, eax + imul eax, ebx + jo multov # on overflow, jump to use 64-bit imul varient + sar eax, 10 + sbb ecx, eax + je 2f + test ebx, ebx + je 2f + xor ebx, ecx + sar ebx, 31 + xor edx, ebx + add edx, [ebp+4] + xor edx, ebx + mov [ebp+8], edx +2: add ebp, 96 + cmp ebp, [esp] + jnz nxterm + + mov [edi+esi*4], ecx # store completed sample + mov eax, ecx # magnitude accumulator |= (sample < 0) ? ~sample : sample + cdq + xor eax, edx + or [esp+4], eax + mov ebp, [esp+32] # reload decorr_passes pointer to first term + inc esi # increment sample index + cmp esi, [esp+40] + jnz decorrelate_loop + jmp mexit + + .balign 4 +multov: mov eax, [ebp+8] + imul ebx + shr eax, 10 + sbb ecx, eax + shl edx, 22 + sub ecx, edx + je 2f + test ebx, ebx + je 2f + xor ebx, ecx + sar ebx, 31 + mov eax, [ebp+8] + xor eax, ebx + add eax, [ebp+4] + xor eax, ebx + mov [ebp+8], eax +2: add ebp, 96 + cmp ebp, [esp] + jnz nxterm + + mov [edi+esi*4], ecx # store completed sample + mov eax, ecx # magnitude accumulator |= (sample < 0) ? ~sample : sample + cdq + xor eax, edx + or [esp+4], eax + mov ebp, [esp+32] # reload decorr_passes pointer to first term + inc esi # increment sample index + cmp esi, [esp+40] + jnz decorrelate_loop # loop all the way back this time + +mexit: pop eax + pop eax # pop magnitude accumulator + pop edi + pop esi + pop ebx + pop ebp + ret + + +# This is an assembly optimized version of the following WavPack function: +# +# void decorr_mono_pass_cont (int32_t *out_buffer, +# int32_t *in_buffer, +# struct decorr_pass *dpp, +# int32_t sample_count); +# +# It performs a single pass of mono decorrelation, transfering from the +# input buffer to the output buffer. Note that this version of the function +# requires that the up to 8 previous (depending on dpp->term) mono samples +# are visible and correct. In other words, it ignores the "samples_*" +# fields in the decorr_pass structure and gets the history data directly +# from the source buffer. It does, however, return the appropriate history +# samples to the decorr_pass structure before returning. +# +# By using the overflow detection of the multiply instruction, it detects +# when the "long_math" varient is required and automatically does it. +# +# This is written to work on an IA-32 processor. The arguments on entry: +# +# int32_t *out_buffer [ebp+8] +# int32_t *in_buffer [ebp+12] +# struct decorr_pass *dpp [ebp+16] +# int32_t sample_count [ebp+20] +# +# Register / stack usage: +# +# esi = source ptr +# edi = destination ptr +# ecx = term * -4 (default terms) +# ecx = previous sample (terms 17 & 18) +# ebp = weight +# [esp] = delta +# [esp+4] = weight sum +# [esp+8] = eptr +# + +_pack_decorr_mono_pass_cont_x86: +pack_decorr_mono_pass_cont_x86: + push ebp + mov ebp, esp + push ebx # save the registers that we need to + push esi + push edi + cld + + mov esi, [ebp+12] + mov edi, [ebp+8] + mov edx, [ebp+16] # edx = *dpp + mov ecx, [ebp+20] # ecx = sample count + mov ebp, [edx+8] # ebp = weight + lea eax, [esi+ecx*4] # calc & push eptr (access with [esp+8]) + push eax + mov eax, [edx+88] # push dpp->sum_A (access with [esp+4]) + push eax + mov eax, [edx+4] # push delta (access with [esp]) + push eax + test ecx, ecx # test for and handle zero count + jz mono_done + + cld # we use lodsd/stosd + mov ecx, [esi-4] # preload last sample + mov eax, [edx] # get term & branch for terms 17 & 18 + cmp eax, 17 + je mono_term_17_loop + cmp eax, 18 + je mono_term_18_loop + + imul ecx, eax, -4 # ecx is index to correlation sample now + jmp mono_default_term_loop + + .balign 64 + +mono_default_term_loop: + mov edx, [esi+ecx] + mov ebx, edx + imul edx, ebp + jo 1f + lodsd + sar edx, 10 + sbb eax, edx + jmp 2f +1: mov eax, ebx + imul ebp + shl edx, 22 + shr eax, 10 + adc edx, eax # edx = apply_weight (sam_A) + lodsd + sub eax, edx +2: stosd + je 3f + test ebx, ebx + je 3f + xor eax, ebx + cdq + xor ebp, edx + add ebp, [esp] + xor ebp, edx +3: add [esp+4], ebp + cmp esi, [esp+8] + jnz mono_default_term_loop + + mov ecx, ebp # ecx = weight + mov eax, [esp+4] # eax = weight sum + lea ebp, [esp+24] # restore ebp (we've pushed 6 DWORDS) + mov edx, [ebp+16] # edx = *dpp + mov [edx+8], ecx # put weight back + mov [edx+88], eax # put dpp->sum_A back + mov ecx, [edx] # ecx = dpp->term + +mono_default_store_samples: + dec ecx + sub esi, 4 # back up one sample + mov eax, [esi] + mov [edx+ecx*4+16], eax # store samples_A [ecx] + test ecx, ecx + jnz mono_default_store_samples + jmp mono_done + + .balign 64 + +mono_term_17_loop: + lea edx, [ecx+ecx] + sub edx, [esi-8] # ebx = sam_A + mov ebx, edx + imul edx, ebp + jo 1f + sar edx, 10 + lodsd + mov ecx, eax + sbb eax, edx + jmp 2f +1: mov eax, ebx + imul ebp + shl edx, 22 + shr eax, 10 + adc edx, eax # edx = apply_weight (sam_A) + lodsd + mov ecx, eax + sub eax, edx +2: stosd + je 3f + test ebx, ebx + je 3f + xor eax, ebx + cdq + xor ebp, edx + add ebp, [esp] + xor ebp, edx +3: add [esp+4], ebp + cmp esi, [esp+8] + jnz mono_term_17_loop + jmp mono_term_1718_exit + + .balign 64 + +mono_term_18_loop: + lea edx, [ecx+ecx*2] + sub edx, [esi-8] + sar edx, 1 + mov ebx, edx # ebx = sam_A + imul edx, ebp + jo 1f + sar edx, 10 + lodsd + mov ecx, eax + sbb eax, edx + jmp 2f +1: mov eax, ebx + imul ebp + shl edx, 22 + shr eax, 10 + adc edx, eax # edx = apply_weight (sam_A) + lodsd + mov ecx, eax + sub eax, edx +2: stosd + je 3f + test ebx, ebx + je 3f + xor eax, ebx + cdq + xor ebp, edx + add ebp, [esp] + xor ebp, edx +3: add [esp+4], ebp + cmp esi, [esp+8] + jnz mono_term_18_loop + +mono_term_1718_exit: + mov ecx, ebp # ecx = weight + mov eax, [esp+4] # eax = weight sum + lea ebp, [esp+24] # restore ebp (we've pushed 6 DWORDS) + mov edx, [ebp+16] # edx = *dpp + mov [edx+8], ecx # put weight back + mov [edx+88], eax # put dpp->sum_A back + mov eax, [esi-4] # dpp->samples_A [0] = bptr [-1] + mov [edx+16], eax + mov eax, [esi-8] # dpp->samples_A [1] = bptr [-2] + mov [edx+20], eax + +mono_done: + add esp, 12 # deallocate stack space + pop edi # pop saved registers & return + pop esi + pop ebx + pop ebp + ret + + +# This is an assembly optimized version of the following WavPack function: +# +# uint32_t scan_max_magnitude (int32_t *buffer, int32_t sample_count); +# +# This function scans a buffer of signed 32-bit ints and returns the magnitude +# of the largest sample, with a power-of-two resolution. It might be more +# useful to return the actual maximum absolute value, but that implementation +# would be slower. Instead, this simply returns the "or" of all the values +# "xor"d with their own sign, like so: +# +# while (sample_count--) +# magnitude |= (*buffer < 0) ? ~*buffer++ : *buffer++; +# +# This is written to work on an IA-32 processor and uses the MMX extensions +# to improve the performance by processing two samples together. The arguments +# are on the stack at these locations (after 4 pushes, we do not use ebp as a +# base pointer): +# +# int32_t *buffer [esp+20] +# uint32_t sample_count [esp+24] +# +# During the processing loops, the following registers are used: +# +# edi buffer pointer +# esi termination buffer pointer +# ebx single magnitude accumulator +# mm0 dual magnitude accumulator +# mm1, mm2 scratch +# + +_scan_max_magnitude_x86: +scan_max_magnitude_x86: + push ebp + push ebx + push esi + push edi + + xor ebx, ebx # clear magnitude accumulator + mov edi, [esp+20] # edi = buffer pointer + + mov eax, [esp+24] # eax = count + and eax, 7 + mov ecx, eax # ecx = leftover samples to "manually" scan at end + + mov eax, [esp+24] # eax = count + shr eax, 3 # eax = num of loops to process mmx (8 samples/loop) + shl eax, 5 # eax = num of bytes to process mmx (32 bytes/loop) + jz nommx # jump around if no mmx loops to do (< 8 samples) + + pxor mm0, mm0 # clear dual magnitude accumulator + add eax, edi # esi = termination buffer pointer for mmx loop + mov esi, eax + jmp mmxlp + + .balign 64 + +mmxlp: movq mm1, [edi] # get stereo samples in mm1 & mm2 + movq mm2, mm1 + psrad mm1, 31 # mm1 = sign (mm2) + pxor mm1, mm2 # mm1 = absolute magnitude, or into result + por mm0, mm1 + + movq mm1, [edi+8] # do it again with 6 more samples + movq mm2, mm1 + psrad mm1, 31 + pxor mm1, mm2 + por mm0, mm1 + + movq mm1, [edi+16] + movq mm2, mm1 + psrad mm1, 31 + pxor mm1, mm2 + por mm0, mm1 + + movq mm1, [edi+24] + movq mm2, mm1 + psrad mm1, 31 + pxor mm1, mm2 + por mm0, mm1 + + add edi, 32 + cmp edi, esi + jnz mmxlp + + movd eax, mm0 # ebx = "or" of high and low mm0 + punpckhdq mm0, mm0 + movd ebx, mm0 + or ebx, eax + emms + +nommx: and ecx, ecx # any leftover samples to do? + jz noleft + +leftlp: mov eax, [edi] + cdq + xor eax, edx + or ebx, eax + add edi, 4 + loop leftlp + +noleft: mov eax, ebx # move magnitude to eax for return + pop edi + pop esi + pop ebx + pop ebp + ret + + +# This is an assembly optimized version of the following WavPack function: +# +# uint32_t log2buffer (int32_t *samples, uint32_t num_samples, int limit); +# +# This function scans a buffer of 32-bit ints and accumulates the total +# log2 value of all the samples. This is useful for determining maximum +# compression because the bitstream storage required for entropy coding +# is proportional to the base 2 log of the samples. +# +# This is written to work on an IA-32 processor. The arguments are on the +# stack at these locations (after 4 pushes, we do not use ebp as a base +# pointer): +# +# int32_t *samples [esp+20] +# uint32_t num_samples [esp+24] +# int limit [esp+28] +# +# During the processing loops, the following registers are used: +# +# esi input buffer pointer +# edi sum accumulator +# ebx sample count +# ebp log2_table pointer +# eax,ecx,edx scratch +# + + .balign 256 + +log2_table: + .byte 0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x10, 0x11, 0x12, 0x14, 0x15 + .byte 0x16, 0x18, 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a + .byte 0x2c, 0x2d, 0x2e, 0x2f, 0x31, 0x32, 0x33, 0x34, 0x36, 0x37, 0x38, 0x39, 0x3b, 0x3c, 0x3d, 0x3e + .byte 0x3f, 0x41, 0x42, 0x43, 0x44, 0x45, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4d, 0x4e, 0x4f, 0x50, 0x51 + .byte 0x52, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63 + .byte 0x64, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x74, 0x75 + .byte 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85 + .byte 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95 + .byte 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4 + .byte 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb2 + .byte 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc0 + .byte 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcb, 0xcc, 0xcd, 0xce + .byte 0xcf, 0xd0, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd8, 0xd9, 0xda, 0xdb + .byte 0xdc, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe4, 0xe5, 0xe6, 0xe7, 0xe7 + .byte 0xe8, 0xe9, 0xea, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xee, 0xef, 0xf0, 0xf1, 0xf1, 0xf2, 0xf3, 0xf4 + .byte 0xf4, 0xf5, 0xf6, 0xf7, 0xf7, 0xf8, 0xf9, 0xf9, 0xfa, 0xfb, 0xfc, 0xfc, 0xfd, 0xfe, 0xff, 0xff + +_log2buffer_x86: +log2buffer_x86: + push ebp + push ebx + push esi + push edi + cld + +# These three instructions allow this to be PIC (position independent code). Having the hardcoded offset is +# certainly not ideal, but it will probably work everywhere. The actual desired expression (nexti - log2_table) +# would not compile on OS X. + + call nexti # push address of nexti (return address) +nexti: pop ebp # pop address of nexti into ebp + sub ebp, 266 # offset to log2_table, should be (nexti - log2_table) + + mov esi, [esp+20] # esi = sample source pointer + xor edi, edi # edi = 0 (accumulator) + mov ebx, [esp+24] # ebx = num_samples + test ebx, ebx # exit now if none, sum = 0 + jz normal_exit + + mov eax, [esp+28] # eax = limit + test eax, eax # we have separate loops for limit and no limit + jz no_limit_loop + jmp limit_loop + + .balign 64 + +limit_loop: + mov eax, [esi] # get next sample into eax + cdq # edx = sign of sample (for abs) + add esi, 4 + xor eax, edx + sub eax, edx + je L40 # skip if sample was zero + mov edx, eax # move to edx and apply rounding + shr eax, 9 + add edx, eax + bsr ecx, edx # ecx = MSB set in sample (0 - 31) + lea eax, [ecx+1] # eax = number used bits in sample (1 - 32) + sub ecx, 8 # ecx = shift right amount (-8 to 23) + ror edx, cl # use rotate to do "signed" shift + shl eax, 8 # move nbits to integer portion of log + movzx edx, dl # dl = mantissa, look up log fraction in table + mov al, [ebp+edx] # eax = combined integer and fraction for full log + add edi, eax # add to running sum and compare to limit + cmp eax, [esp+28] + jge limit_exceeded +L40: sub ebx, 1 # loop back if more samples + jne limit_loop + jmp normal_exit + + .balign 64 + +no_limit_loop: + mov eax, [esi] # get next sample into eax + cdq # edx = sign of sample (for abs) + add esi, 4 + xor eax, edx + sub eax, edx + je L45 # skip if sample was zero + mov edx, eax # move to edx and apply rounding + shr eax, 9 + add edx, eax + bsr ecx, edx # ecx = MSB set in sample (0 - 31) + lea eax, [ecx+1] # eax = number used bits in sample (1 - 32) + sub ecx, 8 # ecx = shift right amount (-8 to 23) + ror edx, cl # use rotate to do "signed" shift + shl eax, 8 # move nbits to integer portion of log + movzx edx, dl # dl = mantissa, look up log fraction in table + mov al, [ebp+edx] # eax = combined integer and fraction for full log + add edi, eax # add to running sum +L45: sub ebx, 1 # loop back if more samples + jne no_limit_loop + jmp normal_exit + +limit_exceeded: + mov edi, -1 # -1 return means log limit exceeded +normal_exit: + mov eax, edi # move sum accumulator into eax for return + pop edi + pop esi + pop ebx + pop ebp + ret + +# Helper function to determine if specified CPU feature is available (used here for MMX). +# Input parameter is index of feature to be checked (EDX from CPUID(1) only, MMX = 23). +# Return value is the specified bit (0 or 1) or 0 if CPUID is not supported. + +_pack_cpu_has_feature_x86: +pack_cpu_has_feature_x86: + pushfd # save eflags + pushfd # push another copy + xor dword ptr [esp], 0x200000 # toggle ID bit on stack & pop it back into eflags + popfd + pushfd # store possibly modified eflags + pop eax # and pop back into eax + xor eax, [esp] # compare to original pushed eflags + popfd # restore original eflags + and eax, 0x200000 # eax = 1 if eflags ID bit was changable + jz oldcpu # return zero if CPUID is not available (wow!) + + push ebx # we must save ebx + mov eax, 1 # do cpuid (1) to get features into edx + cpuid + mov eax, edx # copy into eax for shift + mov cl, [esp+8] # get parameter and shift that bit index into LSB + sar eax, cl + and eax, 1 + pop ebx # restore ebx and return 0 or 1 + +oldcpu: ret # return value in eax + +#ifdef __ELF__ + .section .note.GNU-stack,"",@progbits +#endif + diff --git a/third_party/wavpack/src/pack_x86.asm b/third_party/wavpack/src/pack_x86.asm new file mode 100644 index 0000000..87b5f02 --- /dev/null +++ b/third_party/wavpack/src/pack_x86.asm @@ -0,0 +1,1827 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; **** WAVPACK **** ;; +;; Hybrid Lossless Wavefile Compressor ;; +;; Copyright (c) 1998 - 2015 Conifer Software. ;; +;; All Rights Reserved. ;; +;; Distributed under the BSD Software License (see license.txt) ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + .686 + .mmx + .model flat +asmcode segment page 'CODE' + public _pack_decorr_stereo_pass_x86 + public _pack_decorr_stereo_pass_cont_rev_x86 + public _pack_decorr_stereo_pass_cont_x86 + public _pack_decorr_mono_buffer_x86 + public _pack_decorr_mono_pass_cont_x86 + public _pack_cpu_has_feature_x86 + public _scan_max_magnitude_x86 + public _log2buffer_x86 + +; This module contains X86 assembly optimized versions of functions required +; to encode WavPack files. + +; This is an assembly optimized version of the following WavPack function: +; +; void pack_decorr_stereo_pass ( +; struct decorr_pass *dpp, +; int32_t *buffer, +; int32_t sample_count); +; +; It performs a single pass of stereo decorrelation, in place, as specified +; by the decorr_pass structure. Note that this function does NOT return the +; dpp->samples_X[] values in the "normalized" positions for terms 1-8, so if +; the number of samples is not a multiple of MAX_TERM, these must be moved if +; they are to be used somewhere else. +; +; This is written to work on an IA-32 processor and uses the MMX extensions +; to improve the performance by processing both stereo channels together. +; It is based on the original MMX code written by Joachim Henke that used +; MMX intrinsics called from C. Many thanks to Joachim for that! +; +; An issue with using MMX for this is that the sample history array in the +; decorr_pass structure contains separate arrays for each channel while the +; MMX code wants there to be a single array of dual samples. The fix for +; this is to convert the data in the arrays on entry and exit, and this is +; made easy by the fact that the 8 MMX regsiters hold exactly the required +; amount of data (64 bytes)! +; +; This is written to work on an IA-32 processor. The arguments are on the +; stack at these locations (after 4 pushes, we do not use ebp as a base +; pointer): +; +; struct decorr_pass *dpp [esp+20] +; int32_t *buffer [esp+24] +; int32_t sample_count [esp+28] +; +; During the processing loops, the following registers are used: +; +; edi buffer pointer +; esi termination buffer pointer +; eax,ebx,edx used in default term to reduce calculation +; ebp decorr_pass pointer +; mm0, mm1 scratch +; mm2 original sample values +; mm3 correlation samples +; mm4 0 (for pcmpeqd) +; mm5 weights +; mm6 delta +; mm7 512 (for rounding) +; + +_pack_decorr_stereo_pass_x86: + push ebp + push ebx + push edi + push esi + + mov ebp, [esp+20] ; ebp = *dpp + mov edi, [esp+24] ; edi = buffer + mov esi, [esp+28] + sal esi, 3 + jz bdone + add esi, edi ; esi = termination buffer pointer + + ; convert samples_A and samples_B array into samples_AB array for MMX + ; (the MMX registers provide exactly enough storage to do this easily) + + movq mm0, [ebp+16] + punpckldq mm0, [ebp+48] + movq mm1, [ebp+16] + punpckhdq mm1, [ebp+48] + movq mm2, [ebp+24] + punpckldq mm2, [ebp+56] + movq mm3, [ebp+24] + punpckhdq mm3, [ebp+56] + movq mm4, [ebp+32] + punpckldq mm4, [ebp+64] + movq mm5, [ebp+32] + punpckhdq mm5, [ebp+64] + movq mm6, [ebp+40] + punpckldq mm6, [ebp+72] + movq mm7, [ebp+40] + punpckhdq mm7, [ebp+72] + + movq [ebp+16], mm0 + movq [ebp+24], mm1 + movq [ebp+32], mm2 + movq [ebp+40], mm3 + movq [ebp+48], mm4 + movq [ebp+56], mm5 + movq [ebp+64], mm6 + movq [ebp+72], mm7 + + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 ; mm7 = round (512) + + mov eax, [ebp+4] + movd mm6, eax + punpckldq mm6, mm6 ; mm6 = delta (0-7) + + mov eax, 0FFFFh ; mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 ; mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [ebp+8] ; mm5 = weight_AB masked to 16-bit + + movq mm4, [ebp+16] ; preload samples_AB[0] + + mov al, [ebp] ; get term and vector to correct loop + cmp al, 17 + je buff_term_17_loop + cmp al, 18 + je buff_term_18_loop + cmp al, -1 + je buff_term_minus_1_loop + cmp al, -2 + je buff_term_minus_2_loop + cmp al, -3 + je buff_term_minus_3_loop + + pxor mm4, mm4 ; mm4 = 0 (for pcmpeqd) + xor eax, eax + xor ebx, ebx + add bl, [ebp] + mov ecx, 7 + and ebx, ecx + jmp buff_default_term_loop + + align 64 + +buff_default_term_loop: + movq mm2, [edi] ; mm2 = left_right + movq mm3, [ebp+16+eax*8] + inc eax + and eax, ecx + movq [ebp+16+ebx*8], mm2 + inc ebx + and ebx, ecx + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [edi], mm2 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pcmpeqd mm2, mm4 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm4 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + cmp edi, esi + jnz buff_default_term_loop + + jmp bdone + + align 64 + +buff_term_17_loop: + movq mm3, mm4 ; get previous calculated value + paddd mm3, mm4 + psubd mm3, [ebp+24] + movq [ebp+24], mm4 + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + movq mm2, [edi] ; mm2 = left_right + movq mm4, mm2 + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [edi], mm2 ; store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + cmp edi, esi + jnz buff_term_17_loop + + movq [ebp+16], mm4 ; post-store samples_AB[0] + jmp bdone + + align 64 + +buff_term_18_loop: + movq mm3, mm4 ; get previous calculated value + psubd mm3, [ebp+24] + psrad mm3, 1 + paddd mm3, mm4 ; mm3 = sam_AB + movq [ebp+24], mm4 + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + movq mm2, [edi] ; mm2 = left_right + movq mm4, mm2 + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [edi], mm2 ; store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + cmp edi, esi + jnz buff_term_18_loop + + movq [ebp+16], mm4 ; post-store samples_AB[0] + jmp bdone + + align 64 + +buff_term_minus_1_loop: + movq mm3, mm4 ; mm3 = previous calculated value + movq mm2, [edi] ; mm2 = left_right + movq mm4, mm2 + psrlq mm4, 32 + punpckldq mm3, mm2 ; mm3 = sam_AB + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [edi], mm2 ; store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 ; and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp edi, esi + jnz buff_term_minus_1_loop + + movq [ebp+16], mm4 ; post-store samples_AB[0] + jmp bdone + + align 64 + +buff_term_minus_2_loop: + movq mm2, [edi] ; mm2 = left_right + movq mm3, mm2 + psrlq mm3, 32 + por mm3, mm4 + punpckldq mm4, mm2 + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [edi], mm2 ; store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 ; and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp edi, esi + jnz buff_term_minus_2_loop + + movq [ebp+16], mm4 ; post-store samples_AB[0] + jmp bdone + + align 64 + +buff_term_minus_3_loop: + movq mm2, [edi] ; mm2 = left_right + movq mm3, mm4 ; mm3 = previous calculated value + movq mm4, mm2 ; mm0 = swap dwords of new data + psrlq mm4, 32 + punpckldq mm4, mm2 ; mm3 = sam_AB + + movq mm1, mm3 + paddd mm1, mm1 + psrlw mm1, 1 + pmaddwd mm1, mm5 + + movq mm0, mm3 + psrld mm0, 15 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [edi], mm2 ; store result + pxor mm1, mm1 + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 ; and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp edi, esi + jnz buff_term_minus_3_loop + + movq [ebp+16], mm4 ; post-store samples_AB[0] + +bdone: pslld mm5, 16 ; sign-extend 16-bit weights back to dwords + psrad mm5, 16 + movq [ebp+8], mm5 ; put weight_AB back + + ; convert samples_AB array back into samples_A and samples_B + + movq mm0, [ebp+16] + movq mm1, [ebp+24] + movq mm2, [ebp+32] + movq mm3, [ebp+40] + movq mm4, [ebp+48] + movq mm5, [ebp+56] + movq mm6, [ebp+64] + movq mm7, [ebp+72] + + movd DWORD PTR [ebp+16], mm0 + movd DWORD PTR [ebp+20], mm1 + movd DWORD PTR [ebp+24], mm2 + movd DWORD PTR [ebp+28], mm3 + movd DWORD PTR [ebp+32], mm4 + movd DWORD PTR [ebp+36], mm5 + movd DWORD PTR [ebp+40], mm6 + movd DWORD PTR [ebp+44], mm7 + + punpckhdq mm0, mm0 + punpckhdq mm1, mm1 + punpckhdq mm2, mm2 + punpckhdq mm3, mm3 + punpckhdq mm4, mm4 + punpckhdq mm5, mm5 + punpckhdq mm6, mm6 + punpckhdq mm7, mm7 + + movd DWORD PTR [ebp+48], mm0 + movd DWORD PTR [ebp+52], mm1 + movd DWORD PTR [ebp+56], mm2 + movd DWORD PTR [ebp+60], mm3 + movd DWORD PTR [ebp+64], mm4 + movd DWORD PTR [ebp+68], mm5 + movd DWORD PTR [ebp+72], mm6 + movd DWORD PTR [ebp+76], mm7 + + emms + + pop esi + pop edi + pop ebx + pop ebp + ret + +; These are assembly optimized version of the following WavPack functions: +; +; void pack_decorr_stereo_pass_cont ( +; struct decorr_pass *dpp, +; int32_t *in_buffer, +; int32_t *out_buffer, +; int32_t sample_count); +; +; void pack_decorr_stereo_pass_cont_rev ( +; struct decorr_pass *dpp, +; int32_t *in_buffer, +; int32_t *out_buffer, +; int32_t sample_count); +; +; It performs a single pass of stereo decorrelation, transfering from the +; input buffer to the output buffer. Note that this version of the function +; requires that the up to 8 previous (depending on dpp->term) stereo samples +; are visible and correct. In other words, it ignores the "samples_*" +; fields in the decorr_pass structure and gets the history data directly +; from the source buffer. It does, however, return the appropriate history +; samples to the decorr_pass structure before returning. +; +; This is written to work on an IA-32 processor and uses the MMX extensions +; to improve the performance by processing both stereo channels together. +; It is based on the original MMX code written by Joachim Henke that used +; MMX intrinsics called from C. Many thanks to Joachim for that! +; +; No additional stack space is used; all storage is done in registers. The +; arguments on entry: +; +; struct decorr_pass *dpp [ebp+8] +; int32_t *in_buffer [ebp+12] +; int32_t *out_buffer [ebp+16] +; int32_t sample_count [ebp+20] +; +; During the processing loops, the following registers are used: +; +; edi input buffer pointer +; esi direction (-8 forward, +8 reverse) +; ebx delta from input to output buffer +; ecx sample count +; edx sign (dir) * term * -8 (terms 1-8 only) +; mm0, mm1 scratch +; mm2 original sample values +; mm3 correlation samples +; mm4 weight sums +; mm5 weights +; mm6 delta +; mm7 512 (for rounding) +; + +_pack_decorr_stereo_pass_cont_rev_x86: + push ebp + mov ebp, esp + push ebx ; save the registers that we need to + push esi + push edi + + mov esi, 8 ; esi indicates direction (inverted) + jmp start + +_pack_decorr_stereo_pass_cont_x86: + push ebp + mov ebp, esp + push ebx ; save the registers that we need to + push esi + push edi + + mov esi, -8 ; esi indicates direction (inverted) + +start: mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 ; mm7 = round (512) + + mov eax, [ebp+8] ; access dpp + mov eax, [eax+4] + movd mm6, eax + punpckldq mm6, mm6 ; mm6 = delta (0-7) + + mov eax, [ebp+8] ; access dpp + movq mm5, [eax+8] ; mm5 = weight_AB + movq mm4, [eax+88] ; mm4 = sum_AB + + mov edi, [ebp+12] ; edi = in_buffer + mov ebx, [ebp+16] + sub ebx, edi ; ebx = delta to output buffer + + mov ecx, [ebp+20] ; ecx = sample_count + test ecx, ecx + jz done + + mov eax, [ebp+8] ; *eax = dpp + mov eax, [eax] ; get term and vector to correct loop + cmp eax, 17 + je term_17_loop + cmp eax, 18 + je term_18_loop + cmp eax, -1 + je term_minus_1_loop + cmp eax, -2 + je term_minus_2_loop + cmp eax, -3 + je term_minus_3_loop + + sal eax, 3 + mov edx, eax ; edx = term * 8 to index correlation sample + test esi, esi ; test direction + jns default_term_loop + neg edx + jmp default_term_loop + + align 64 + +default_term_loop: + movq mm3, [edi+edx] ; mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [edi] ; mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [edi+ebx], mm2 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + sub edi, esi + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddd mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + paddd mm4, mm5 ; add weights to sum + dec ecx + jnz default_term_loop + + mov eax, [ebp+8] ; access dpp + movq [eax+8], mm5 ; put weight_AB back + movq [eax+88], mm4 ; put sum_AB back + emms + + mov edx, [ebp+8] ; access dpp with edx + mov ecx, [edx] ; ecx = dpp->term + +default_store_samples: + dec ecx + add edi, esi ; back up one full sample + mov eax, [edi+4] + mov [edx+ecx*4+48], eax ; store samples_B [ecx] + mov eax, [edi] + mov [edx+ecx*4+16], eax ; store samples_A [ecx] + test ecx, ecx + jnz default_store_samples + jmp done + + align 64 + +term_17_loop: + movq mm3, [edi+esi] ; get previous calculated value + paddd mm3, mm3 + psubd mm3, [edi+esi*2] + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [edi] ; mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [edi+ebx], mm2 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + sub edi, esi + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddd mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + paddd mm4, mm5 ; add weights to sum + dec ecx + jnz term_17_loop + + mov eax, [ebp+8] ; access dpp + movq [eax+8], mm5 ; put weight_AB back + movq [eax+88], mm4 ; put sum_AB back + emms + jmp term_1718_common_store + + align 64 + +term_18_loop: + movq mm3, [edi+esi] ; get previous calculated value + movq mm0, mm3 + psubd mm3, [edi+esi*2] + psrad mm3, 1 + paddd mm3, mm0 ; mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [edi] ; mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [edi+ebx], mm2 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + sub edi, esi + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddd mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + dec ecx + paddd mm4, mm5 ; add weights to sum + jnz term_18_loop + + mov eax, [ebp+8] ; access dpp + movq [eax+8], mm5 ; put weight_AB back + movq [eax+88], mm4 ; put sum_AB back + emms + +term_1718_common_store: + + mov eax, [ebp+8] ; access dpp + add edi, esi ; back up a full sample + mov edx, [edi+4] ; dpp->samples_B [0] = iptr [-1]; + mov [eax+48], edx + mov edx, [edi] ; dpp->samples_A [0] = iptr [-2]; + mov [eax+16], edx + add edi, esi ; back up another sample + mov edx, [edi+4] ; dpp->samples_B [1] = iptr [-3]; + mov [eax+52], edx + mov edx, [edi] ; dpp->samples_A [1] = iptr [-4]; + mov [eax+20], edx + jmp done + + align 64 + +term_minus_1_loop: + movq mm3, [edi+esi] ; mm3 = previous calculated value + movq mm2, [edi] ; mm2 = left_right + psrlq mm3, 32 + punpckldq mm3, mm2 ; mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [edi+ebx], mm2 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + sub edi, esi + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddd mm5, mm1 + paddusw mm5, mm2 ; and add to weight_AB + psubd mm5, mm1 + pxor mm5, mm0 + paddd mm4, mm5 ; add weights to sum + dec ecx + jnz term_minus_1_loop + + mov eax, [ebp+8] ; access dpp + movq [eax+8], mm5 ; put weight_AB back + movq [eax+88], mm4 ; put sum_AB back + emms + + add edi, esi ; back up a full sample + mov edx, [edi+4] ; dpp->samples_A [0] = iptr [-1]; + mov eax, [ebp+8] + mov [eax+16], edx + jmp done + + align 64 + +term_minus_2_loop: + movq mm2, [edi] ; mm2 = left_right + movq mm3, mm2 ; mm3 = swap dwords + psrlq mm3, 32 + punpckldq mm3, [edi+esi] ; mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [edi+ebx], mm2 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + sub edi, esi + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddd mm5, mm1 + paddusw mm5, mm2 ; and add to weight_AB + psubd mm5, mm1 + pxor mm5, mm0 + paddd mm4, mm5 ; add weights to sum + dec ecx + jnz term_minus_2_loop + + mov eax, [ebp+8] ; access dpp + movq [eax+8], mm5 ; put weight_AB back + movq [eax+88], mm4 ; put sum_AB back + emms + + add edi, esi ; back up a full sample + mov edx, [edi] ; dpp->samples_B [0] = iptr [-2]; + mov eax, [ebp+8] + mov [eax+48], edx + jmp done + + align 64 + +term_minus_3_loop: + movq mm0, [edi+esi] ; mm0 = previous calculated value + movq mm3, mm0 ; mm3 = swap dwords + psrlq mm3, 32 + punpckldq mm3, mm0 ; mm3 = sam_AB + + movq mm1, mm3 + pslld mm1, 17 + psrld mm1, 17 + pmaddwd mm1, mm5 + + movq mm0, mm3 + pslld mm0, 1 + psrld mm0, 16 + pmaddwd mm0, mm5 + + movq mm2, [edi] ; mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + psubd mm2, mm0 + psubd mm2, mm1 ; add shifted sums + movq mm0, mm3 + movq [edi+ebx], mm2 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + sub edi, esi + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddd mm5, mm1 + paddusw mm5, mm2 ; and add to weight_AB + psubd mm5, mm1 + pxor mm5, mm0 + paddd mm4, mm5 ; add weights to sum + dec ecx + jnz term_minus_3_loop + + mov eax, [ebp+8] ; access dpp + movq [eax+8], mm5 ; put weight_AB back + movq [eax+88], mm4 ; put sum_AB back + emms + + add edi, esi ; back up a full sample + mov edx, [edi+4] ; dpp->samples_A [0] = iptr [-1]; + mov eax, [ebp+8] + mov [eax+16], edx + mov edx, [edi] ; dpp->samples_B [0] = iptr [-2]; + mov [eax+48], edx + +done: pop edi + pop esi + pop ebx + leave + ret + + +; This is an assembly optimized version of the following WavPack function: +; +; uint32_t decorr_mono_buffer (int32_t *buffer, +; struct decorr_pass *decorr_passes, +; int32_t num_terms, +; int32_t sample_count) +; +; Decorrelate a buffer of mono samples, in place, as specified by the array +; of decorr_pass structures. Note that this function does NOT return the +; dpp->samples_X[] values in the "normalized" positions for terms 1-8, so if +; the number of samples is not a multiple of MAX_TERM, these must be moved if +; they are to be used somewhere else. The magnitude of the output samples is +; accumulated and returned (see scan_max_magnitude() for more details). By +; using the overflow detection of the multiply instruction, this detects +; when the "long_math" varient is required. +; +; For the fastest possible operation with the four "common" decorrelation +; filters (i.e, fast, normal, high and very high) this function can be +; configured to include hardcoded versions of these filters that are created +; using macros. In that case, the passed filter is checked to make sure that +; it matches one of the four. If it doesn't, or if the hardcoded flters are +; not enabled, a "general" version of the decorrelation loop is used. This +; variable enables the hardcoded filters and can be disabled if there are +; problems with the code or macros: + + HARDCODED_FILTERS = 1 + +; This is written to work on an IA-32 processor. The arguments are on the +; stack at these locations (after 6 pushes, we do not use ebp as a base +; pointer): +; +; int32_t *buffer [esp+28] +; struct decorr_pass *dpp [esp+32] +; int32_t num_terms [esp+36] +; int32_t sample_count [esp+40] +; +; register usage: +; +; ecx = sample being decorrelated +; esi = sample up counter +; edi = *buffer +; ebp = *dpp +; +; stack usage: +; +; [esp+0] = dpp end ptr (unused in hardcoded filter case) +; [esp+4] = magnitude accumulator +; + if HARDCODED_FILTERS +; +; This macro is used for checking the decorr_passes array to make sure that the terms match +; the hardcoded terms. The terms of these filters are the first element in the tables defined +; in decorr_tables.h (with the negative terms replaced with 1). +; + +chkterm macro term, ebp_offset + cmp BYTE PTR [ebp], term + jnz use_general_version + add ebp, ebp_offset + endm + +; +; This macro processes the single specified term (with a fixed delta of 2) and updates the +; term pointer (rbp) with the specified offset when done. It assumes the following registers: +; +; ecx = sample being decorrelated +; esi = sample up counter (used for terms 1-8) +; rbp = decorr_pass pointer for this term (updated with "rbp_offset" when done) +; rax, rbx, rdx = scratch +; + +exeterm macro term, ebp_offset + local over, cont, done + + if term le 8 + mov eax, esi + and eax, 7 + mov ebx, [ebp+16+eax*4] + if term ne 8 + add eax, term + and eax, 7 + endif + mov [ebp+16+eax*4], ecx + + elseif term eq 17 + + mov edx, [ebp+16] ; handle term 17 + mov [ebp+16], ecx + lea ebx, [edx+edx] + sub ebx, [ebp+20] + mov [ebp+20], edx + + else + + mov edx, [ebp+16] ; handle term 18 + mov [ebp+16], ecx + lea ebx, [edx+edx*2] + sub ebx, [ebp+20] + sar ebx, 1 + mov [ebp+20], edx + + endif + + mov eax, [ebp+8] + imul eax, ebx ; 32-bit multiply is almost always enough + jo over ; but handle overflow if it happens + sar eax, 10 + sbb ecx, eax ; borrow flag provides rounding + jmp cont +over: mov eax, [ebp+8] ; perform 64-bit multiply on overflow + imul ebx + shr eax, 10 + sbb ecx, eax + shl edx, 22 + sub ecx, edx +cont: je done + test ebx, ebx + je done + xor ebx, ecx + sar ebx, 30 + or ebx, 1 ; this generates delta of 1 + sal ebx, 1 ; this generates delta of 2 + add [ebp+8], ebx +done: add ebp, ebp_offset + + endm + + endif ; end of macro definitions + +; entry point of function + +_pack_decorr_mono_buffer_x86: + push ebp ; save the resgister that we need to + push ebx + push esi + push edi + xor eax, eax + push eax ; this is magnitude accumulator + push eax ; this will be dpp end ptr + + mov edi, [esp+28] ; edi is buffer pointer + xor esi, esi ; up counter = 0 + + cmp DWORD PTR [esp+40], 0 ; test & handle zero sample count & zero term count + jz mexit + cmp DWORD PTR [esp+36], 0 + jz mexit + + if HARDCODED_FILTERS + +; first check to make sure all the "deltas" are 2 + + mov ebp, [esp+32] ; ebp is decorr_pass pointer + mov ebx, [esp+36] ; get term count +deltas: cmp BYTE PTR [ebp+4], 2 ; make sure all the deltas are 2 + jnz use_general_version ; if any aren't, use general case + add ebp, 96 + dec ebx + jnz deltas + + mov ebp, [esp+32] ; ebp is decorr_pass pointer + mov edx, [esp+36] ; get term count + cmp dl, 2 ; 2 terms is "fast" + jnz nfast + chkterm 18, 96 ; check "fast" terms + chkterm 17, -96 + jmp mono_fast_loop + +nfast: cmp dl, 5 ; 5 terms is "normal" + jnz nnorm + chkterm 18, 96 ; check "normal" terms + chkterm 18, 96 + chkterm 2, 96 + chkterm 17, 96 + chkterm 3, 96*-4 + jmp mono_normal_loop + +nnorm: cmp dl, 10 ; 10 terms is "high" + jnz nhigh + chkterm 18, 96 ; check "high" terms + chkterm 18, 96 + chkterm 18, 96 + chkterm 1, 96 + chkterm 2, 96 + chkterm 3, 96 + chkterm 5, 96 + chkterm 1, 96 + chkterm 17, 96 + chkterm 4, 96*-9 + jmp mono_high_loop + +nhigh: cmp dl, 16 ; 16 terms is "very high" + jnz use_general_version ; if none of these, use general version + chkterm 18, 96 ; else check "very high" terms + chkterm 18, 96 + chkterm 2, 96 + chkterm 3, 96 + chkterm 1, 96 + chkterm 18, 96 + chkterm 2, 96 + chkterm 4, 96 + chkterm 7, 96 + chkterm 5, 96 + chkterm 3, 96 + chkterm 6, 96 + chkterm 8, 96 + chkterm 1, 96 + chkterm 18, 96 + chkterm 2, 96*-15 + jmp mono_vhigh_loop + + align 64 + +mono_fast_loop: + mov ecx, [edi+esi*4] ; ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 17, -96 + + mov [edi+esi*4], ecx ; store completed sample + mov eax, ecx ; magnitude accumulator |= (sample < 0) ? ~sample : sample + cdq + xor eax, edx + or [esp+4], eax + inc esi ; increment sample index + cmp esi, [esp+40] + jnz mono_fast_loop ; loop back for all samples + jmp mexit + + align 64 + +mono_normal_loop: + mov ecx, [edi+esi*4] ; ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 18, 96 + exeterm 2, 96 + exeterm 17, 96 + exeterm 3, 96*-4 + + mov [edi+esi*4], ecx ; store completed sample + mov eax, ecx ; magnitude accumulator |= (sample < 0) ? ~sample : sample + cdq + xor eax, edx + or [esp+4], eax + inc esi ; increment sample index + cmp esi, [esp+40] + jnz mono_normal_loop ; loop back for all samples + jmp mexit + + align 64 + +mono_high_loop: + mov ecx, [edi+esi*4] ; ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 18, 96 + exeterm 18, 96 + exeterm 1, 96 + exeterm 2, 96 + exeterm 3, 96 + exeterm 5, 96 + exeterm 1, 96 + exeterm 17, 96 + exeterm 4, 96*-9 + + mov [edi+esi*4], ecx ; store completed sample + mov eax, ecx ; magnitude accumulator |= (sample < 0) ? ~sample : sample + cdq + xor eax, edx + or [esp+4], eax + inc esi ; increment sample index + cmp esi, [esp+40] + jnz mono_high_loop ; loop back for all samples + jmp mexit + + align 64 + +mono_vhigh_loop: + mov ecx, [edi+esi*4] ; ecx is the sample we're decorrelating + + exeterm 18, 96 + exeterm 18, 96 + exeterm 2, 96 + exeterm 3, 96 + exeterm 1, 96 + exeterm 18, 96 + exeterm 2, 96 + exeterm 4, 96 + exeterm 7, 96 + exeterm 5, 96 + exeterm 3, 96 + exeterm 6, 96 + exeterm 8, 96 + exeterm 1, 96 + exeterm 18, 96 + exeterm 2, 96*-15 + + mov [edi+esi*4], ecx ; store completed sample + mov eax, ecx ; magnitude accumulator |= (sample < 0) ? ~sample : sample + cdq + xor eax, edx + or [esp+4], eax + inc esi ; increment sample index + cmp esi, [esp+40] + jnz mono_vhigh_loop ; loop back for all samples + jmp mexit + + endif ; end of HARDCODED_FILTERS + +use_general_version: + mov ebp, [esp+32] + mov edx, [esp+36] ; get number of terms + imul eax, edx, 96 ; calculate & store termination check ptr + add eax, [esp+32] + mov [esp], eax + jmp decorrelate_loop + + align 64 + +decorrelate_loop: + mov ecx, [edi+esi*4] ; ecx is the sample we're decorrelating +nxterm: mov edx, [ebp] + cmp dl, 17 + jge @f + + mov eax, esi + and eax, 7 + mov ebx, [ebp+16+eax*4] + add eax, edx + and eax, 7 + mov [ebp+16+eax*4], ecx + jmp domult + + align 4 +@@: mov edx, [ebp+16] + mov [ebp+16], ecx + je @f + lea ebx, [edx+edx*2] + sub ebx, [ebp+20] + sar ebx, 1 + mov [ebp+20], edx + jmp domult + + align 4 +@@: lea ebx, [edx+edx] + sub ebx, [ebp+20] + mov [ebp+20], edx + +domult: mov eax, [ebp+8] + mov edx, eax + imul eax, ebx + jo multov ; on overflow, jump to use 64-bit imul varient + sar eax, 10 + sbb ecx, eax + je @f + test ebx, ebx + je @f + xor ebx, ecx + sar ebx, 31 + xor edx, ebx + add edx, [ebp+4] + xor edx, ebx + mov [ebp+8], edx +@@: add ebp, 96 + cmp ebp, [esp] + jnz nxterm + + mov [edi+esi*4], ecx ; store completed sample + mov eax, ecx ; magnitude accumulator |= (sample < 0) ? ~sample : sample + cdq + xor eax, edx + or [esp+4], eax + mov ebp, [esp+32] ; reload decorr_passes pointer to first term + inc esi ; increment sample index + cmp esi, [esp+40] + jnz decorrelate_loop + jmp mexit + + align 4 +multov: mov eax, [ebp+8] + imul ebx + shr eax, 10 + sbb ecx, eax + shl edx, 22 + sub ecx, edx + je @f + test ebx, ebx + je @f + xor ebx, ecx + sar ebx, 31 + mov eax, [ebp+8] + xor eax, ebx + add eax, [ebp+4] + xor eax, ebx + mov [ebp+8], eax +@@: add ebp, 96 + cmp ebp, [esp] + jnz nxterm + + mov [edi+esi*4], ecx ; store completed sample + mov eax, ecx ; magnitude accumulator |= (sample < 0) ? ~sample : sample + cdq + xor eax, edx + or [esp+4], eax + mov ebp, [esp+32] ; reload decorr_passes pointer to first term + inc esi ; increment sample index + cmp esi, [esp+40] + jnz decorrelate_loop ; loop all the way back this time + +mexit: pop eax + pop eax ; pop magnitude accumulator + pop edi + pop esi + pop ebx + pop ebp + ret + + +; This is an assembly optimized version of the following WavPack function: +; +; void decorr_mono_pass_cont (int32_t *out_buffer, +; int32_t *in_buffer, +; struct decorr_pass *dpp, +; int32_t sample_count); +; +; It performs a single pass of mono decorrelation, transfering from the +; input buffer to the output buffer. Note that this version of the function +; requires that the up to 8 previous (depending on dpp->term) mono samples +; are visible and correct. In other words, it ignores the "samples_*" +; fields in the decorr_pass structure and gets the history data directly +; from the source buffer. It does, however, return the appropriate history +; samples to the decorr_pass structure before returning. +; +; By using the overflow detection of the multiply instruction, it detects +; when the "long_math" varient is required and automatically does it. +; +; This is written to work on an IA-32 processor. The arguments on entry: +; +; int32_t *out_buffer [ebp+8] +; int32_t *in_buffer [ebp+12] +; struct decorr_pass *dpp [ebp+16] +; int32_t sample_count [ebp+20] +; +; Register / stack usage: +; +; esi = source ptr +; edi = destination ptr +; ecx = term * -4 (default terms) +; ecx = previous sample (terms 17 & 18) +; ebp = weight +; [esp] = delta +; [esp+4] = weight sum +; [esp+8] = eptr +; + +_pack_decorr_mono_pass_cont_x86: + push ebp + mov ebp, esp + push ebx ; save the registers that we need to + push esi + push edi + cld + + mov esi, [ebp+12] + mov edi, [ebp+8] + mov edx, [ebp+16] ; edx = *dpp + mov ecx, [ebp+20] ; ecx = sample count + mov ebp, [edx+8] ; ebp = weight + lea eax, [esi+ecx*4] ; calc & push eptr (access with [esp+8]) + push eax + mov eax, [edx+88] ; push dpp->sum_A (access with [esp+4]) + push eax + mov eax, [edx+4] ; push delta (access with [esp]) + push eax + test ecx, ecx ; test for and handle zero count + jz mono_done + + cld ; we use lodsd/stosd + mov ecx, [esi-4] ; preload last sample + mov eax, [edx] ; get term & branch for terms 17 & 18 + cmp eax, 17 + je mono_term_17_loop + cmp eax, 18 + je mono_term_18_loop + + imul ecx, eax, -4 ; ecx is index to correlation sample now + jmp mono_default_term_loop + + align 64 + +mono_default_term_loop: + mov edx, [esi+ecx] + mov ebx, edx + imul edx, ebp + jo over + lodsd + sar edx, 10 + sbb eax, edx + jmp @f +over: mov eax, ebx + imul ebp + shl edx, 22 + shr eax, 10 + adc edx, eax ; edx = apply_weight (sam_A) + lodsd + sub eax, edx +@@: stosd + je @f + test ebx, ebx + je @f + xor eax, ebx + cdq + xor ebp, edx + add ebp, [esp] + xor ebp, edx +@@: add [esp+4], ebp + cmp esi, [esp+8] + jnz mono_default_term_loop + + mov ecx, ebp ; ecx = weight + mov eax, [esp+4] ; eax = weight sum + lea ebp, [esp+24] ; restore ebp (we've pushed 6 DWORDS) + mov edx, [ebp+16] ; edx = *dpp + mov [edx+8], ecx ; put weight back + mov [edx+88], eax ; put dpp->sum_A back + mov ecx, [edx] ; ecx = dpp->term + +mono_default_store_samples: + dec ecx + sub esi, 4 ; back up one sample + mov eax, [esi] + mov [edx+ecx*4+16], eax ; store samples_A [ecx] + test ecx, ecx + jnz mono_default_store_samples + jmp mono_done + + align 64 + +mono_term_17_loop: + lea edx, [ecx+ecx] + sub edx, [esi-8] ; ebx = sam_A + mov ebx, edx + imul edx, ebp + jo over17 + sar edx, 10 + lodsd + mov ecx, eax + sbb eax, edx + jmp @f +over17: mov eax, ebx + imul ebp + shl edx, 22 + shr eax, 10 + adc edx, eax ; edx = apply_weight (sam_A) + lodsd + mov ecx, eax + sub eax, edx +@@: stosd + je @f + test ebx, ebx + je @f + xor eax, ebx + cdq + xor ebp, edx + add ebp, [esp] + xor ebp, edx +@@: add [esp+4], ebp + cmp esi, [esp+8] + jnz mono_term_17_loop + jmp mono_term_1718_exit + + align 64 + +mono_term_18_loop: + lea edx, [ecx+ecx*2] + sub edx, [esi-8] + sar edx, 1 + mov ebx, edx ; ebx = sam_A + imul edx, ebp + jo over18 + sar edx, 10 + lodsd + mov ecx, eax + sbb eax, edx + jmp @f +over18: mov eax, ebx + imul ebp + shl edx, 22 + shr eax, 10 + adc edx, eax ; edx = apply_weight (sam_A) + lodsd + mov ecx, eax + sub eax, edx +@@: stosd + je @f + test ebx, ebx + je @f + xor eax, ebx + cdq + xor ebp, edx + add ebp, [esp] + xor ebp, edx +@@: add [esp+4], ebp + cmp esi, [esp+8] + jnz mono_term_18_loop + +mono_term_1718_exit: + mov ecx, ebp ; ecx = weight + mov eax, [esp+4] ; eax = weight sum + lea ebp, [esp+24] ; restore ebp (we've pushed 6 DWORDS) + mov edx, [ebp+16] ; edx = *dpp + mov [edx+8], ecx ; put weight back + mov [edx+88], eax ; put dpp->sum_A back + mov eax, [esi-4] ; dpp->samples_A [0] = bptr [-1] + mov [edx+16], eax + mov eax, [esi-8] ; dpp->samples_A [1] = bptr [-2] + mov [edx+20], eax + +mono_done: + add esp, 12 ; deallocate stack space + pop edi ; pop saved registers & return + pop esi + pop ebx + pop ebp + ret + + +; This is an assembly optimized version of the following WavPack function: +; +; uint32_t scan_max_magnitude (int32_t *buffer, int32_t sample_count); +; +; This function scans a buffer of signed 32-bit ints and returns the magnitude +; of the largest sample, with a power-of-two resolution. It might be more +; useful to return the actual maximum absolute value, but that implementation +; would be slower. Instead, this simply returns the "or" of all the values +; "xor"d with their own sign, like so: +; +; while (sample_count--) +; magnitude |= (*buffer < 0) ? ~*buffer++ : *buffer++; +; +; This is written to work on an IA-32 processor and uses the MMX extensions +; to improve the performance by processing two samples together. The arguments +; are on the stack at these locations (after 4 pushes, we do not use ebp as a +; base pointer): +; +; int32_t *buffer [esp+20] +; uint32_t sample_count [esp+24] +; +; During the processing loops, the following registers are used: +; +; edi buffer pointer +; esi termination buffer pointer +; ebx single magnitude accumulator +; mm0 dual magnitude accumulator +; mm1, mm2 scratch +; + +_scan_max_magnitude_x86: + push ebp + push ebx + push esi + push edi + + xor ebx, ebx ; clear magnitude accumulator + mov edi, [esp+20] ; edi = buffer pointer + + mov eax, [esp+24] ; eax = count + and eax, 7 + mov ecx, eax ; ecx = leftover samples to "manually" scan at end + + mov eax, [esp+24] ; eax = count + shr eax, 3 ; eax = num of loops to process mmx (8 samples/loop) + shl eax, 5 ; eax = num of bytes to process mmx (32 bytes/loop) + jz nommx ; jump around if no mmx loops to do (< 8 samples) + + pxor mm0, mm0 ; clear dual magnitude accumulator + add eax, edi ; esi = termination buffer pointer for mmx loop + mov esi, eax + jmp mmxlp + + align 64 + +mmxlp: movq mm1, [edi] ; get stereo samples in mm1 & mm2 + movq mm2, mm1 + psrad mm1, 31 ; mm1 = sign (mm2) + pxor mm1, mm2 ; mm1 = absolute magnitude, or into result + por mm0, mm1 + + movq mm1, [edi+8] ; do it again with 6 more samples + movq mm2, mm1 + psrad mm1, 31 + pxor mm1, mm2 + por mm0, mm1 + + movq mm1, [edi+16] + movq mm2, mm1 + psrad mm1, 31 + pxor mm1, mm2 + por mm0, mm1 + + movq mm1, [edi+24] + movq mm2, mm1 + psrad mm1, 31 + pxor mm1, mm2 + por mm0, mm1 + + add edi, 32 + cmp edi, esi + jnz mmxlp + + movd eax, mm0 ; ebx = "or" of high and low mm0 + punpckhdq mm0, mm0 + movd ebx, mm0 + or ebx, eax + emms + +nommx: and ecx, ecx ; any leftover samples to do? + jz noleft + +leftlp: mov eax, [edi] + cdq + xor eax, edx + or ebx, eax + add edi, 4 + loop leftlp + +noleft: mov eax, ebx ; move magnitude to eax for return + pop edi + pop esi + pop ebx + pop ebp + ret + + +; This is an assembly optimized version of the following WavPack function: +; +; uint32_t log2buffer (int32_t *samples, uint32_t num_samples, int limit); +; +; This function scans a buffer of 32-bit ints and accumulates the total +; log2 value of all the samples. This is useful for determining maximum +; compression because the bitstream storage required for entropy coding +; is proportional to the base 2 log of the samples. +; +; This is written to work on an IA-32 processor. The arguments are on the +; stack at these locations (after 4 pushes, we do not use ebp as a base +; pointer): +; +; int32_t *samples [esp+20] +; uint32_t num_samples [esp+24] +; int limit [esp+28] +; +; During the processing loops, the following registers are used: +; +; esi input buffer pointer +; edi sum accumulator +; ebx sample count +; ebp log2_table pointer +; eax,ecx,edx scratch +; + + align 256 + .radix 16 + +log2_table: + byte 000, 001, 003, 004, 006, 007, 009, 00a, 00b, 00d, 00e, 010, 011, 012, 014, 015 + byte 016, 018, 019, 01a, 01c, 01d, 01e, 020, 021, 022, 024, 025, 026, 028, 029, 02a + byte 02c, 02d, 02e, 02f, 031, 032, 033, 034, 036, 037, 038, 039, 03b, 03c, 03d, 03e + byte 03f, 041, 042, 043, 044, 045, 047, 048, 049, 04a, 04b, 04d, 04e, 04f, 050, 051 + byte 052, 054, 055, 056, 057, 058, 059, 05a, 05c, 05d, 05e, 05f, 060, 061, 062, 063 + byte 064, 066, 067, 068, 069, 06a, 06b, 06c, 06d, 06e, 06f, 070, 071, 072, 074, 075 + byte 076, 077, 078, 079, 07a, 07b, 07c, 07d, 07e, 07f, 080, 081, 082, 083, 084, 085 + byte 086, 087, 088, 089, 08a, 08b, 08c, 08d, 08e, 08f, 090, 091, 092, 093, 094, 095 + byte 096, 097, 098, 099, 09a, 09b, 09b, 09c, 09d, 09e, 09f, 0a0, 0a1, 0a2, 0a3, 0a4 + byte 0a5, 0a6, 0a7, 0a8, 0a9, 0a9, 0aa, 0ab, 0ac, 0ad, 0ae, 0af, 0b0, 0b1, 0b2, 0b2 + byte 0b3, 0b4, 0b5, 0b6, 0b7, 0b8, 0b9, 0b9, 0ba, 0bb, 0bc, 0bd, 0be, 0bf, 0c0, 0c0 + byte 0c1, 0c2, 0c3, 0c4, 0c5, 0c6, 0c6, 0c7, 0c8, 0c9, 0ca, 0cb, 0cb, 0cc, 0cd, 0ce + byte 0cf, 0d0, 0d0, 0d1, 0d2, 0d3, 0d4, 0d4, 0d5, 0d6, 0d7, 0d8, 0d8, 0d9, 0da, 0db + byte 0dc, 0dc, 0dd, 0de, 0df, 0e0, 0e0, 0e1, 0e2, 0e3, 0e4, 0e4, 0e5, 0e6, 0e7, 0e7 + byte 0e8, 0e9, 0ea, 0ea, 0eb, 0ec, 0ed, 0ee, 0ee, 0ef, 0f0, 0f1, 0f1, 0f2, 0f3, 0f4 + byte 0f4, 0f5, 0f6, 0f7, 0f7, 0f8, 0f9, 0f9, 0fa, 0fb, 0fc, 0fc, 0fd, 0fe, 0ff, 0ff + + .radix 10 + +_log2buffer_x86: + push ebp + push ebx + push esi + push edi + cld + + mov esi, [esp+20] ; esi = sample source pointer + xor edi, edi ; edi = 0 (accumulator) + mov ebx, [esp+24] ; ebx = num_samples + test ebx, ebx ; exit now if none, sum = 0 + jz normal_exit + +; These three instructions allow this to be PIC (position independent code). The purpose is to +; load the address of the log2_table into ebp regardless of where this is all loaded in memory. + + call nexti ; push address of nexti (return address) +nexti: pop ebp ; pop address of nexti into ebp + sub ebp, nexti - log2_table ; offset to log2_table + + mov eax, [esp+28] ; eax = limit + test eax, eax ; we have separate loops for limit and no limit + jz no_limit_loop + jmp limit_loop + + align 64 + +limit_loop: + mov eax, [esi] ; get next sample into eax + cdq ; edx = sign of sample (for abs) + add esi, 4 + xor eax, edx + sub eax, edx + je L40 ; skip if sample was zero + mov edx, eax ; move to edx and apply rounding + shr eax, 9 + add edx, eax + bsr ecx, edx ; ecx = MSB set in sample (0 - 31) + lea eax, [ecx+1] ; eax = number used bits in sample (1 - 32) + sub ecx, 8 ; ecx = shift right amount (-8 to 23) + ror edx, cl ; use rotate to do "signed" shift + sal eax, 8 ; move nbits to integer portion of log + movzx edx, dl ; dl = mantissa, look up log fraction in table + mov al, BYTE PTR [ebp+edx] ; eax = combined integer and fraction for full log + add edi, eax ; add to running sum and compare to limit + cmp eax, [esp+28] + jge limit_exceeded +L40: sub ebx, 1 ; loop back if more samples + jne limit_loop + jmp normal_exit + + align 64 + +no_limit_loop: + mov eax, [esi] ; get next sample into eax + cdq ; edx = sign of sample (for abs) + add esi, 4 + xor eax, edx + sub eax, edx + je L45 ; skip if sample was zero + mov edx, eax ; move to edx and apply rounding + shr eax, 9 + add edx, eax + bsr ecx, edx ; ecx = MSB set in sample (0 - 31) + lea eax, [ecx+1] ; eax = number used bits in sample (1 - 32) + sub ecx, 8 ; ecx = shift right amount (-8 to 23) + ror edx, cl ; use rotate to do "signed" shift + sal eax, 8 ; move nbits to integer portion of log + movzx edx, dl ; dl = mantissa, look up log fraction in table + mov al, BYTE PTR [ebp+edx] ; eax = combined integer and fraction for full log + add edi, eax ; add to running sum +L45: sub ebx, 1 ; loop back if more samples + jne no_limit_loop + jmp normal_exit + +limit_exceeded: + mov edi, -1 ; -1 return means log limit exceeded +normal_exit: + mov eax, edi ; move sum accumulator into eax for return + pop edi + pop esi + pop ebx + pop ebp + ret + +; Helper function to determine if specified CPU feature is available (used here for MMX). +; Input parameter is index of feature to be checked (EDX from CPUID(1) only, MMX = 23). +; Return value is the specified bit (0 or 1) or 0 if CPUID is not supported. + +_pack_cpu_has_feature_x86: + pushfd ; save eflags + pushfd ; push another copy + xor dword ptr [esp], 200000h ; toggle ID bit on stack & pop it back into eflags + popfd + pushfd ; store possibly modified eflags + pop eax ; and pop back into eax + xor eax, [esp] ; compare to original pushed eflags + popfd ; restore original eflags + and eax, 200000h ; eax = 1 if eflags ID bit was changable + jz oldcpu ; return zero if CPUID is not available (wow!) + + push ebx ; we must save ebx + mov eax, 1 ; do cpuid (1) to get features into edx + cpuid + mov eax, edx ; copy into eax for shift + mov cl, [esp+8] ; get parameter and shift that bit index into LSB + sar eax, cl + and eax, 1 + pop ebx ; restore ebx and return 0 or 1 + +oldcpu: ret ; return value in eax + +asmcode ends + + end + diff --git a/third_party/wavpack/src/read_words.c b/third_party/wavpack/src/read_words.c new file mode 100644 index 0000000..bbe2db5 --- /dev/null +++ b/third_party/wavpack/src/read_words.c @@ -0,0 +1,614 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// read_words.c + +// This module provides entropy word decoding functions using +// a variation on the Rice method. This was introduced in version 3.93 +// because it allows splitting the data into a "lossy" stream and a +// "correction" stream in a very efficient manner and is therefore ideal +// for the "hybrid" mode. For 4.0, the efficiency of this method was +// significantly improved by moving away from the normal Rice restriction of +// using powers of two for the modulus divisions and now the method can be +// used for both hybrid and pure lossless encoding. + +// Samples are divided by median probabilities at 5/7 (71.43%), 10/49 (20.41%), +// and 20/343 (5.83%). Each zone has 3.5 times fewer samples than the +// previous. Using standard Rice coding on this data would result in 1.4 +// bits per sample average (not counting sign bit). However, there is a +// very simple encoding that is over 99% efficient with this data and +// results in about 1.22 bits per sample. + +#include +#include + +#include "wavpack_local.h" + +#if defined (HAVE___BUILTIN_CTZ) || defined (_WIN64) +#define USE_CTZ_OPTIMIZATION // use ctz intrinsic (or Windows equivalent) to count trailing ones +#else +#define USE_NEXT8_OPTIMIZATION // optimization using a table to count trailing ones +#endif + +#define USE_BITMASK_TABLES // use tables instead of shifting for certain masking operations + +///////////////////////////// local table storage //////////////////////////// + +#ifdef USE_NEXT8_OPTIMIZATION +static const char ones_count_table [] = { + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8 +}; +#endif + +///////////////////////////// executable code //////////////////////////////// + +static uint32_t __inline read_code (Bitstream *bs, uint32_t maxcode); + +// Read the next word from the bitstream "wvbits" and return the value. This +// function can be used for hybrid or lossless streams, but since an +// optimized version is available for lossless this function would normally +// be used for hybrid only. If a hybrid lossless stream is being read then +// the "correction" offset is written at the specified pointer. A return value +// of WORD_EOF indicates that the end of the bitstream was reached (all 1s) or +// some other error occurred. + +int32_t FASTCALL get_word (WavpackStream *wps, int chan, int32_t *correction) +{ + register struct entropy_data *c = wps->w.c + chan; + uint32_t ones_count, low, mid, high; + int32_t value; + int sign; + + if (!wps->wvbits.ptr) + return WORD_EOF; + + if (correction) + *correction = 0; + + if (!(wps->w.c [0].median [0] & ~1) && !wps->w.holding_zero && !wps->w.holding_one && !(wps->w.c [1].median [0] & ~1)) { + uint32_t mask; + int cbits; + + if (wps->w.zeros_acc) { + if (--wps->w.zeros_acc) { + c->slow_level -= (c->slow_level + SLO) >> SLS; + return 0; + } + } + else { + for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits); + + if (cbits == 33) + return WORD_EOF; + + if (cbits < 2) + wps->w.zeros_acc = cbits; + else { + for (mask = 1, wps->w.zeros_acc = 0; --cbits; mask <<= 1) + if (getbit (&wps->wvbits)) + wps->w.zeros_acc |= mask; + + wps->w.zeros_acc |= mask; + } + + if (wps->w.zeros_acc) { + c->slow_level -= (c->slow_level + SLO) >> SLS; + CLEAR (wps->w.c [0].median); + CLEAR (wps->w.c [1].median); + return 0; + } + } + } + + if (wps->w.holding_zero) + ones_count = wps->w.holding_zero = 0; + else { +#ifdef USE_CTZ_OPTIMIZATION + while (wps->wvbits.bc < LIMIT_ONES) { + if (++(wps->wvbits.ptr) == wps->wvbits.end) + wps->wvbits.wrap (&wps->wvbits); + + wps->wvbits.sr |= *(wps->wvbits.ptr) << wps->wvbits.bc; + wps->wvbits.bc += sizeof (*(wps->wvbits.ptr)) * 8; + } + +#ifdef _WIN32 + { unsigned long res; _BitScanForward (&res, (unsigned long)~wps->wvbits.sr); ones_count = (uint32_t) res; } +#else + ones_count = __builtin_ctz (~wps->wvbits.sr); +#endif + + if (ones_count >= LIMIT_ONES) { + wps->wvbits.bc -= ones_count; + wps->wvbits.sr >>= ones_count; + + for (; ones_count < (LIMIT_ONES + 1) && getbit (&wps->wvbits); ++ones_count); + + if (ones_count == (LIMIT_ONES + 1)) + return WORD_EOF; + + if (ones_count == LIMIT_ONES) { + uint32_t mask; + int cbits; + + for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits); + + if (cbits == 33) + return WORD_EOF; + + if (cbits < 2) + ones_count = cbits; + else { + for (mask = 1, ones_count = 0; --cbits; mask <<= 1) + if (getbit (&wps->wvbits)) + ones_count |= mask; + + ones_count |= mask; + } + + ones_count += LIMIT_ONES; + } + } + else { + wps->wvbits.bc -= ones_count + 1; + wps->wvbits.sr >>= ones_count + 1; + } +#elif defined (USE_NEXT8_OPTIMIZATION) + int next8; + + if (wps->wvbits.bc < 8) { + if (++(wps->wvbits.ptr) == wps->wvbits.end) + wps->wvbits.wrap (&wps->wvbits); + + next8 = (wps->wvbits.sr |= *(wps->wvbits.ptr) << wps->wvbits.bc) & 0xff; + wps->wvbits.bc += sizeof (*(wps->wvbits.ptr)) * 8; + } + else + next8 = wps->wvbits.sr & 0xff; + + if (next8 == 0xff) { + wps->wvbits.bc -= 8; + wps->wvbits.sr >>= 8; + + for (ones_count = 8; ones_count < (LIMIT_ONES + 1) && getbit (&wps->wvbits); ++ones_count); + + if (ones_count == (LIMIT_ONES + 1)) + return WORD_EOF; + + if (ones_count == LIMIT_ONES) { + uint32_t mask; + int cbits; + + for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits); + + if (cbits == 33) + return WORD_EOF; + + if (cbits < 2) + ones_count = cbits; + else { + for (mask = 1, ones_count = 0; --cbits; mask <<= 1) + if (getbit (&wps->wvbits)) + ones_count |= mask; + + ones_count |= mask; + } + + ones_count += LIMIT_ONES; + } + } + else { + wps->wvbits.bc -= (ones_count = ones_count_table [next8]) + 1; + wps->wvbits.sr >>= ones_count + 1; + } +#else + for (ones_count = 0; ones_count < (LIMIT_ONES + 1) && getbit (&wps->wvbits); ++ones_count); + + if (ones_count >= LIMIT_ONES) { + uint32_t mask; + int cbits; + + if (ones_count == (LIMIT_ONES + 1)) + return WORD_EOF; + + for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits); + + if (cbits == 33) + return WORD_EOF; + + if (cbits < 2) + ones_count = cbits; + else { + for (mask = 1, ones_count = 0; --cbits; mask <<= 1) + if (getbit (&wps->wvbits)) + ones_count |= mask; + + ones_count |= mask; + } + + ones_count += LIMIT_ONES; + } +#endif + + if (wps->w.holding_one) { + wps->w.holding_one = ones_count & 1; + ones_count = (ones_count >> 1) + 1; + } + else { + wps->w.holding_one = ones_count & 1; + ones_count >>= 1; + } + + wps->w.holding_zero = ~wps->w.holding_one & 1; + } + + if ((wps->wphdr.flags & HYBRID_FLAG) && !chan) + update_error_limit (wps); + + if (ones_count == 0) { + low = 0; + high = GET_MED (0) - 1; + DEC_MED0 (); + } + else { + low = GET_MED (0); + INC_MED0 (); + + if (ones_count == 1) { + high = low + GET_MED (1) - 1; + DEC_MED1 (); + } + else { + low += GET_MED (1); + INC_MED1 (); + + if (ones_count == 2) { + high = low + GET_MED (2) - 1; + DEC_MED2 (); + } + else { + low += (ones_count - 2) * GET_MED (2); + high = low + GET_MED (2) - 1; + INC_MED2 (); + } + } + } + + low &= 0x7fffffff; + high &= 0x7fffffff; + + if (low > high) // make sure high and low make sense + high = low; + + mid = (high + low + 1) >> 1; + + if (!c->error_limit) + mid = read_code (&wps->wvbits, high - low) + low; + else while (high - low > c->error_limit) { + if (getbit (&wps->wvbits)) + mid = (high + (low = mid) + 1) >> 1; + else + mid = ((high = mid - 1) + low + 1) >> 1; + } + + sign = getbit (&wps->wvbits); + + if (bs_is_open (&wps->wvcbits) && c->error_limit) { + value = read_code (&wps->wvcbits, high - low) + low; + + if (correction) + *correction = sign ? (mid - value) : (value - mid); + } + + if (wps->wphdr.flags & HYBRID_BITRATE) { + c->slow_level -= (c->slow_level + SLO) >> SLS; + c->slow_level += wp_log2 (mid); + } + + return sign ? ~mid : mid; +} + +// This is an optimized version of get_word() that is used for lossless only +// (error_limit == 0). Also, rather than obtaining a single sample, it can be +// used to obtain an entire buffer of either mono or stereo samples. + +int32_t get_words_lossless (WavpackStream *wps, int32_t *buffer, int32_t nsamples) +{ + struct entropy_data *c = wps->w.c; + uint32_t ones_count, low, high; + Bitstream *bs = &wps->wvbits; + int32_t csamples; +#ifdef USE_NEXT8_OPTIMIZATION + int32_t next8; +#endif + + if (nsamples && !bs->ptr) { + memset (buffer, 0, (wps->wphdr.flags & MONO_DATA) ? nsamples * 4 : nsamples * 8); + return nsamples; + } + + if (!(wps->wphdr.flags & MONO_DATA)) + nsamples *= 2; + + for (csamples = 0; csamples < nsamples; ++csamples) { + if (!(wps->wphdr.flags & MONO_DATA)) + c = wps->w.c + (csamples & 1); + + if (wps->w.holding_zero) { + wps->w.holding_zero = 0; + low = read_code (bs, GET_MED (0) - 1); + DEC_MED0 (); + buffer [csamples] = (getbit (bs)) ? ~low : low; + + if (++csamples == nsamples) + break; + + if (!(wps->wphdr.flags & MONO_DATA)) + c = wps->w.c + (csamples & 1); + } + + if (wps->w.c [0].median [0] < 2 && !wps->w.holding_one && wps->w.c [1].median [0] < 2) { + uint32_t mask; + int cbits; + + if (wps->w.zeros_acc) { + if (--wps->w.zeros_acc) { + buffer [csamples] = 0; + continue; + } + } + else { + for (cbits = 0; cbits < 33 && getbit (bs); ++cbits); + + if (cbits == 33) + break; + + if (cbits < 2) + wps->w.zeros_acc = cbits; + else { + for (mask = 1, wps->w.zeros_acc = 0; --cbits; mask <<= 1) + if (getbit (bs)) + wps->w.zeros_acc |= mask; + + wps->w.zeros_acc |= mask; + } + + if (wps->w.zeros_acc) { + CLEAR (wps->w.c [0].median); + CLEAR (wps->w.c [1].median); + buffer [csamples] = 0; + continue; + } + } + } + +#ifdef USE_CTZ_OPTIMIZATION + while (bs->bc < LIMIT_ONES) { + if (++(bs->ptr) == bs->end) + bs->wrap (bs); + + bs->sr |= *(bs->ptr) << bs->bc; + bs->bc += sizeof (*(bs->ptr)) * 8; + } + +#ifdef _WIN32 + { unsigned long res; _BitScanForward (&res, (unsigned long)~wps->wvbits.sr); ones_count = (uint32_t) res; } +#else + ones_count = __builtin_ctz (~wps->wvbits.sr); +#endif + + if (ones_count >= LIMIT_ONES) { + bs->bc -= ones_count; + bs->sr >>= ones_count; + + for (; ones_count < (LIMIT_ONES + 1) && getbit (bs); ++ones_count); + + if (ones_count == (LIMIT_ONES + 1)) + break; + + if (ones_count == LIMIT_ONES) { + uint32_t mask; + int cbits; + + for (cbits = 0; cbits < 33 && getbit (bs); ++cbits); + + if (cbits == 33) + break; + + if (cbits < 2) + ones_count = cbits; + else { + for (mask = 1, ones_count = 0; --cbits; mask <<= 1) + if (getbit (bs)) + ones_count |= mask; + + ones_count |= mask; + } + + ones_count += LIMIT_ONES; + } + } + else { + bs->bc -= ones_count + 1; + bs->sr >>= ones_count + 1; + } +#elif defined (USE_NEXT8_OPTIMIZATION) + if (bs->bc < 8) { + if (++(bs->ptr) == bs->end) + bs->wrap (bs); + + next8 = (bs->sr |= *(bs->ptr) << bs->bc) & 0xff; + bs->bc += sizeof (*(bs->ptr)) * 8; + } + else + next8 = bs->sr & 0xff; + + if (next8 == 0xff) { + bs->bc -= 8; + bs->sr >>= 8; + + for (ones_count = 8; ones_count < (LIMIT_ONES + 1) && getbit (bs); ++ones_count); + + if (ones_count == (LIMIT_ONES + 1)) + break; + + if (ones_count == LIMIT_ONES) { + uint32_t mask; + int cbits; + + for (cbits = 0; cbits < 33 && getbit (bs); ++cbits); + + if (cbits == 33) + break; + + if (cbits < 2) + ones_count = cbits; + else { + for (mask = 1, ones_count = 0; --cbits; mask <<= 1) + if (getbit (bs)) + ones_count |= mask; + + ones_count |= mask; + } + + ones_count += LIMIT_ONES; + } + } + else { + bs->bc -= (ones_count = ones_count_table [next8]) + 1; + bs->sr >>= ones_count + 1; + } +#else + for (ones_count = 0; ones_count < (LIMIT_ONES + 1) && getbit (bs); ++ones_count); + + if (ones_count >= LIMIT_ONES) { + uint32_t mask; + int cbits; + + if (ones_count == (LIMIT_ONES + 1)) + break; + + for (cbits = 0; cbits < 33 && getbit (bs); ++cbits); + + if (cbits == 33) + break; + + if (cbits < 2) + ones_count = cbits; + else { + for (mask = 1, ones_count = 0; --cbits; mask <<= 1) + if (getbit (bs)) + ones_count |= mask; + + ones_count |= mask; + } + + ones_count += LIMIT_ONES; + } +#endif + + low = wps->w.holding_one; + wps->w.holding_one = ones_count & 1; + wps->w.holding_zero = ~ones_count & 1; + ones_count = (ones_count >> 1) + low; + + if (ones_count == 0) { + low = 0; + high = GET_MED (0) - 1; + DEC_MED0 (); + } + else { + low = GET_MED (0); + INC_MED0 (); + + if (ones_count == 1) { + high = low + GET_MED (1) - 1; + DEC_MED1 (); + } + else { + low += GET_MED (1); + INC_MED1 (); + + if (ones_count == 2) { + high = low + GET_MED (2) - 1; + DEC_MED2 (); + } + else { + low += (ones_count - 2) * GET_MED (2); + high = low + GET_MED (2) - 1; + INC_MED2 (); + } + } + } + + low += read_code (bs, high - low); + buffer [csamples] = (getbit (bs)) ? ~low : low; + } + + return (wps->wphdr.flags & MONO_DATA) ? csamples : (csamples / 2); +} + +// Read a single unsigned value from the specified bitstream with a value +// from 0 to maxcode. If there are exactly a power of two number of possible +// codes then this will read a fixed number of bits; otherwise it reads the +// minimum number of bits and then determines whether another bit is needed +// to define the code. + +static uint32_t __inline read_code (Bitstream *bs, uint32_t maxcode) +{ + unsigned long local_sr; + uint32_t extras, code; + int bitcount; + + if (maxcode < 2) + return maxcode ? getbit (bs) : 0; + + bitcount = count_bits (maxcode); +#ifdef USE_BITMASK_TABLES + extras = bitset [bitcount] - maxcode - 1; +#else + extras = (1 << bitcount) - maxcode - 1; +#endif + + local_sr = bs->sr; + + while (bs->bc < bitcount) { + if (++(bs->ptr) == bs->end) + bs->wrap (bs); + + local_sr |= (long)*(bs->ptr) << bs->bc; + bs->bc += sizeof (*(bs->ptr)) * 8; + } + +#ifdef USE_BITMASK_TABLES + if ((code = local_sr & bitmask [bitcount - 1]) >= extras) +#else + if ((code = local_sr & ((1 << (bitcount - 1)) - 1)) >= extras) +#endif + code = (code << 1) - extras + ((local_sr >> (bitcount - 1)) & 1); + else + bitcount--; + + if (sizeof (local_sr) < 8 && bs->bc > sizeof (local_sr) * 8) { + bs->bc -= bitcount; + bs->sr = *(bs->ptr) >> (sizeof (*(bs->ptr)) * 8 - bs->bc); + } + else { + bs->bc -= bitcount; + bs->sr = local_sr >> bitcount; + } + + return code; +} diff --git a/third_party/wavpack/src/tag_utils.c b/third_party/wavpack/src/tag_utils.c new file mode 100644 index 0000000..f98e1dd --- /dev/null +++ b/third_party/wavpack/src/tag_utils.c @@ -0,0 +1,597 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// tag_utils.c + +// This module provides the high-level API for creating, reading and editing +// APEv2 tags on WavPack files. Read-only support is also provided for ID3v1 +// tags, but their use is not recommended. + +#include +#include + +#include "wavpack_local.h" + +#ifdef _WIN32 +#define stricmp(x,y) _stricmp(x,y) +#else +#define stricmp strcasecmp +#endif + +static int get_ape_tag_item (M_Tag *m_tag, const char *item, char *value, int size, int type); +static int get_id3_tag_item (M_Tag *m_tag, const char *item, char *value, int size); +static int get_ape_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size, int type); +static int get_id3_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size); +static int append_ape_tag_item (WavpackContext *wpc, const char *item, const char *value, int vsize, int type); +static int write_tag_blockout (WavpackContext *wpc); +static int write_tag_reader (WavpackContext *wpc); +static void tagcpy (char *dest, char *src, int tag_size); +static int tagdata (char *src, int tag_size); + +//////////////////// Global functions part of external API ///////////////////////// + +// Count and return the total number of tag items in the specified file. + +int WavpackGetNumTagItems (WavpackContext *wpc) +{ + int i = 0; + + while (WavpackGetTagItemIndexed (wpc, i, NULL, 0)) + ++i; + + return i; +} + +// Count and return the total number of binary tag items in the specified file. This applies +// only to APEv2 tags and was implemented as a separate function to avoid breaking the old API. + +int WavpackGetNumBinaryTagItems (WavpackContext *wpc) +{ + int i = 0; + + while (WavpackGetBinaryTagItemIndexed (wpc, i, NULL, 0)) + ++i; + + return i; +} + +// Attempt to get the specified item from the specified file's ID3v1 or APEv2 +// tag. The "size" parameter specifies the amount of space available at "value", +// if the desired item will not fit in this space then ellipses (...) will +// be appended and the string terminated. Only text data are supported. The +// actual length of the string is returned (or 0 if no matching value found). +// Note that with APEv2 tags the length might not be the same as the number of +// characters because UTF-8 encoding is used. Also, APEv2 tags can have multiple +// (NULL separated) strings for a single value (this is why the length is +// returned). If this function is called with a NULL "value" pointer (or a +// zero "length") then only the actual length of the value data is returned +// (not counting the terminating NULL). This can be used to determine the +// actual memory to be allocated beforehand. + +int WavpackGetTagItem (WavpackContext *wpc, const char *item, char *value, int size) +{ + M_Tag *m_tag = &wpc->m_tag; + + if (value && size) + *value = 0; + + if (m_tag->ape_tag_hdr.ID [0] == 'A') + return get_ape_tag_item (m_tag, item, value, size, APE_TAG_TYPE_TEXT); + else if (m_tag->id3_tag.tag_id [0] == 'T') + return get_id3_tag_item (m_tag, item, value, size); + else + return 0; +} + +// Attempt to get the specified binary item from the specified file's APEv2 +// tag. The "size" parameter specifies the amount of space available at "value". +// If the desired item will not fit in this space then nothing will be copied +// and 0 will be returned, otherwise the actual size will be returned. If this +// function is called with a NULL "value" pointer (or a zero "length") then only +// the actual length of the value data is returned and can be used to determine +// the actual memory to be allocated beforehand. + +int WavpackGetBinaryTagItem (WavpackContext *wpc, const char *item, char *value, int size) +{ + M_Tag *m_tag = &wpc->m_tag; + + if (value && size) + *value = 0; + + if (m_tag->ape_tag_hdr.ID [0] == 'A') + return get_ape_tag_item (m_tag, item, value, size, APE_TAG_TYPE_BINARY); + else + return 0; +} + +// This function looks up the tag item name by index and is used when the +// application wants to access all the items in the file's ID3v1 or APEv2 tag. +// Note that this function accesses only the item's name; WavpackGetTagItem() +// still must be called to get the actual value. The "size" parameter specifies +// the amount of space available at "item", if the desired item will not fit in +// this space then ellipses (...) will be appended and the string terminated. +// The actual length of the string is returned (or 0 if no item exists for +// index). If this function is called with a NULL "value" pointer (or a +// zero "length") then only the actual length of the item name is returned +// (not counting the terminating NULL). This can be used to determine the +// actual memory to be allocated beforehand. For binary tag values use the +// otherwise identical WavpackGetBinaryTagItemIndexed (); + +int WavpackGetTagItemIndexed (WavpackContext *wpc, int index, char *item, int size) +{ + M_Tag *m_tag = &wpc->m_tag; + + if (item && size) + *item = 0; + + if (m_tag->ape_tag_hdr.ID [0] == 'A') + return get_ape_tag_item_indexed (m_tag, index, item, size, APE_TAG_TYPE_TEXT); + else if (m_tag->id3_tag.tag_id [0] == 'T') + return get_id3_tag_item_indexed (m_tag, index, item, size); + else + return 0; +} + +int WavpackGetBinaryTagItemIndexed (WavpackContext *wpc, int index, char *item, int size) +{ + M_Tag *m_tag = &wpc->m_tag; + + if (item && size) + *item = 0; + + if (m_tag->ape_tag_hdr.ID [0] == 'A') + return get_ape_tag_item_indexed (m_tag, index, item, size, APE_TAG_TYPE_BINARY); + else + return 0; +} + +// These two functions are used to append APEv2 tags to WavPack files; one is +// for text values (UTF-8 encoded) and the other is for binary values. If no tag +// has been started, then an empty one will be allocated first. When finished, +// use WavpackWriteTag() to write the completed tag to the file. The purpose of +// the passed size parameter is obvious for binary values, but might not be for +// text values. Keep in mind that APEv2 text values can have multiple values +// that are NULL separated, so the size is required to know the extent of the +// value (although the final terminating NULL is not included in the passed +// size). If the specified item already exists, it will be replaced with the +// new value. ID3v1 tags are not supported. + +int WavpackAppendTagItem (WavpackContext *wpc, const char *item, const char *value, int vsize) +{ + while (WavpackDeleteTagItem (wpc, item)); + return append_ape_tag_item (wpc, item, value, vsize, APE_TAG_TYPE_TEXT); +} + +int WavpackAppendBinaryTagItem (WavpackContext *wpc, const char *item, const char *value, int vsize) +{ + while (WavpackDeleteTagItem (wpc, item)); + return append_ape_tag_item (wpc, item, value, vsize, APE_TAG_TYPE_BINARY); +} + +// Delete the specified tag item from the APEv2 tag on the specified WavPack file +// (fields cannot be deleted from ID3v1 tags). A return value of TRUE indicates +// that the item was found and successfully deleted. + +int WavpackDeleteTagItem (WavpackContext *wpc, const char *item) +{ + M_Tag *m_tag = &wpc->m_tag; + + if (m_tag->ape_tag_hdr.ID [0] == 'A') { + unsigned char *p = m_tag->ape_tag_data; + unsigned char *q = p + m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr); + int i; + + for (i = 0; i < m_tag->ape_tag_hdr.item_count; ++i) { + int vsize, isize; + + vsize = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 8; // skip flags because we don't need them + for (isize = 0; p[isize] && p + isize < q; ++isize); + + if (vsize < 0 || vsize > m_tag->ape_tag_hdr.length || p + isize + vsize + 1 > q) + break; + + if (isize && vsize && !stricmp (item, (char *) p)) { + unsigned char *d = p - 8; + + p += isize + vsize + 1; + + while (p < q) + *d++ = *p++; + + m_tag->ape_tag_hdr.length = (int32_t)(d - m_tag->ape_tag_data) + sizeof (APE_Tag_Hdr); + m_tag->ape_tag_hdr.item_count--; + return 1; + } + else + p += isize + vsize + 1; + } + } + + return 0; +} + +// Once a APEv2 tag has been created with WavpackAppendTag(), this function is +// used to write the completed tag to the end of the WavPack file. Note that +// this function uses the same "blockout" function that is used to write +// regular WavPack blocks, although that's where the similarity ends. It is also +// used to write tags that have been edited on existing files. + +int WavpackWriteTag (WavpackContext *wpc) +{ + if (wpc->blockout) // this is the case for creating fresh WavPack files + return write_tag_blockout (wpc); + else // otherwise we are editing existing tags (OPEN_EDIT_TAGS) + return write_tag_reader (wpc); +} + +////////////////////////// local static functions ///////////////////////////// + +static int get_ape_tag_item (M_Tag *m_tag, const char *item, char *value, int size, int type) +{ + unsigned char *p = m_tag->ape_tag_data; + unsigned char *q = p + m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr); + int i; + + for (i = 0; i < m_tag->ape_tag_hdr.item_count && q - p > 8; ++i) { + int vsize, flags, isize; + + vsize = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4; + flags = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4; + for (isize = 0; p[isize] && p + isize < q; ++isize); + + if (vsize < 0 || vsize > m_tag->ape_tag_hdr.length || p + isize + vsize + 1 > q) + break; + + if (isize && vsize && !stricmp (item, (char *) p) && ((flags & 6) >> 1) == type) { + + if (!value || !size) + return vsize; + + if (type == APE_TAG_TYPE_BINARY) { + if (vsize <= size) { + memcpy (value, p + isize + 1, vsize); + return vsize; + } + else + return 0; + } + else if (vsize < size) { + memcpy (value, p + isize + 1, vsize); + value [vsize] = 0; + return vsize; + } + else if (size >= 4) { + memcpy (value, p + isize + 1, size - 1); + value [size - 4] = value [size - 3] = value [size - 2] = '.'; + value [size - 1] = 0; + return size - 1; + } + else + return 0; + } + else + p += isize + vsize + 1; + } + + return 0; +} + +static int get_id3_tag_item (M_Tag *m_tag, const char *item, char *value, int size) +{ + char lvalue [64]; + int len; + + lvalue [0] = 0; + + if (!stricmp (item, "title")) + tagcpy (lvalue, m_tag->id3_tag.title, sizeof (m_tag->id3_tag.title)); + else if (!stricmp (item, "artist")) + tagcpy (lvalue, m_tag->id3_tag.artist, sizeof (m_tag->id3_tag.artist)); + else if (!stricmp (item, "album")) + tagcpy (lvalue, m_tag->id3_tag.album, sizeof (m_tag->id3_tag.album)); + else if (!stricmp (item, "year")) + tagcpy (lvalue, m_tag->id3_tag.year, sizeof (m_tag->id3_tag.year)); + else if (!stricmp (item, "comment")) + tagcpy (lvalue, m_tag->id3_tag.comment, sizeof (m_tag->id3_tag.comment)); + else if (!stricmp (item, "track") && m_tag->id3_tag.comment [29] && !m_tag->id3_tag.comment [28]) + sprintf (lvalue, "%d", m_tag->id3_tag.comment [29]); + else + return 0; + + len = (int) strlen (lvalue); + + if (!value || !size) + return len; + + if (len < size) { + strcpy (value, lvalue); + return len; + } + else if (size >= 4) { + strncpy (value, lvalue, size - 1); + value [size - 4] = value [size - 3] = value [size - 2] = '.'; + value [size - 1] = 0; + return size - 1; + } + else + return 0; +} + +static int get_ape_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size, int type) +{ + unsigned char *p = m_tag->ape_tag_data; + unsigned char *q = p + m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr); + int i; + + for (i = 0; i < m_tag->ape_tag_hdr.item_count && index >= 0 && q - p > 8; ++i) { + int vsize, flags, isize; + + vsize = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4; + flags = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4; + for (isize = 0; p[isize] && p + isize < q; ++isize); + + if (vsize < 0 || vsize > m_tag->ape_tag_hdr.length || p + isize + vsize + 1 > q) + break; + + if (isize && vsize && ((flags & 6) >> 1) == type && !index--) { + + if (!item || !size) + return isize; + + if (isize < size) { + memcpy (item, p, isize); + item [isize] = 0; + return isize; + } + else if (size >= 4) { + memcpy (item, p, size - 1); + item [size - 4] = item [size - 3] = item [size - 2] = '.'; + item [size - 1] = 0; + return size - 1; + } + else + return 0; + } + else + p += isize + vsize + 1; + } + + return 0; +} + +static int get_id3_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size) +{ + char lvalue [16]; + int len; + + lvalue [0] = 0; + + if (tagdata (m_tag->id3_tag.title, sizeof (m_tag->id3_tag.title)) && !index--) + strcpy (lvalue, "Title"); + else if (tagdata (m_tag->id3_tag.artist, sizeof (m_tag->id3_tag.artist)) && !index--) + strcpy (lvalue, "Artist"); + else if (tagdata (m_tag->id3_tag.album, sizeof (m_tag->id3_tag.album)) && !index--) + strcpy (lvalue, "Album"); + else if (tagdata (m_tag->id3_tag.year, sizeof (m_tag->id3_tag.year)) && !index--) + strcpy (lvalue, "Year"); + else if (tagdata (m_tag->id3_tag.comment, sizeof (m_tag->id3_tag.comment)) && !index--) + strcpy (lvalue, "Comment"); + else if (m_tag->id3_tag.comment [29] && !m_tag->id3_tag.comment [28] && !index--) + strcpy (lvalue, "Track"); + else + return 0; + + len = (int) strlen (lvalue); + + if (!item || !size) + return len; + + if (len < size) { + strcpy (item, lvalue); + return len; + } + else if (size >= 4) { + strncpy (item, lvalue, size - 1); + item [size - 4] = item [size - 3] = item [size - 2] = '.'; + item [size - 1] = 0; + return size - 1; + } + else + return 0; +} + +static int append_ape_tag_item (WavpackContext *wpc, const char *item, const char *value, int vsize, int type) +{ + M_Tag *m_tag = &wpc->m_tag; + int isize = (int) strlen (item); + + if (!m_tag->ape_tag_hdr.ID [0]) { + strncpy (m_tag->ape_tag_hdr.ID, "APETAGEX", sizeof (m_tag->ape_tag_hdr.ID)); + m_tag->ape_tag_hdr.version = 2000; + m_tag->ape_tag_hdr.length = sizeof (m_tag->ape_tag_hdr); + m_tag->ape_tag_hdr.item_count = 0; + m_tag->ape_tag_hdr.flags = APE_TAG_CONTAINS_HEADER; // we will include header on tags we originate + } + + if (m_tag->ape_tag_hdr.ID [0] == 'A') { + int new_item_len = vsize + isize + 9, flags = type << 1; + unsigned char *p; + + if (m_tag->ape_tag_hdr.length + new_item_len > APE_TAG_MAX_LENGTH) { + strcpy (wpc->error_message, "APEv2 tag exceeds maximum allowed length!"); + return FALSE; + } + + m_tag->ape_tag_hdr.item_count++; + m_tag->ape_tag_hdr.length += new_item_len; + p = m_tag->ape_tag_data = (unsigned char*)realloc (m_tag->ape_tag_data, m_tag->ape_tag_hdr.length); + p += m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr) - new_item_len; + + *p++ = (unsigned char) vsize; + *p++ = (unsigned char) (vsize >> 8); + *p++ = (unsigned char) (vsize >> 16); + *p++ = (unsigned char) (vsize >> 24); + + *p++ = (unsigned char) flags; + *p++ = (unsigned char) (flags >> 8); + *p++ = (unsigned char) (flags >> 16); + *p++ = (unsigned char) (flags >> 24); + + strcpy ((char *) p, item); + p += isize + 1; + memcpy (p, value, vsize); + + return TRUE; + } + else + return FALSE; +} + +// Append the stored APEv2 tag to the file being created using the "blockout" function callback. + +static int write_tag_blockout (WavpackContext *wpc) +{ + M_Tag *m_tag = &wpc->m_tag; + int result = TRUE; + + if (m_tag->ape_tag_hdr.ID [0] == 'A' && m_tag->ape_tag_hdr.item_count) { + + // only write header if it's specified in the flags + + if (m_tag->ape_tag_hdr.flags & APE_TAG_CONTAINS_HEADER) { + m_tag->ape_tag_hdr.flags |= APE_TAG_THIS_IS_HEADER; + WavpackNativeToLittleEndian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); + result = wpc->blockout (wpc->wv_out, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr)); + WavpackLittleEndianToNative (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); + } + + if (m_tag->ape_tag_hdr.length > sizeof (m_tag->ape_tag_hdr)) + result = wpc->blockout (wpc->wv_out, m_tag->ape_tag_data, m_tag->ape_tag_hdr.length - sizeof (m_tag->ape_tag_hdr)); + + m_tag->ape_tag_hdr.flags &= ~APE_TAG_THIS_IS_HEADER; // this is NOT header + WavpackNativeToLittleEndian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); + result = wpc->blockout (wpc->wv_out, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr)); + WavpackLittleEndianToNative (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); + } + + if (!result) + strcpy (wpc->error_message, "can't write WavPack data, disk probably full!"); + + return result; +} + +// Write the [potentially] edited tag to the existing WavPack file using the reader callback functions. + +static int write_tag_reader (WavpackContext *wpc) +{ + M_Tag *m_tag = &wpc->m_tag; + int32_t tag_size = 0; + int result; + + // before we write an edited (or new) tag into an existing file, make sure it's safe and possible + + if (m_tag->tag_begins_file) { + strcpy (wpc->error_message, "can't edit tags located at the beginning of files!"); + return FALSE; + } + + if (!wpc->reader->can_seek (wpc->wv_in)) { + strcpy (wpc->error_message, "can't edit tags on pipes or unseekable files!"); + return FALSE; + } + + if (!(wpc->open_flags & OPEN_EDIT_TAGS)) { + strcpy (wpc->error_message, "can't edit tags without OPEN_EDIT_TAGS flag!"); + return FALSE; + } + + if (m_tag->ape_tag_hdr.ID [0] == 'A' && m_tag->ape_tag_hdr.item_count && + m_tag->ape_tag_hdr.length > sizeof (m_tag->ape_tag_hdr)) + tag_size = m_tag->ape_tag_hdr.length; + + // only write header if it's specified in the flags + + if (tag_size && (m_tag->ape_tag_hdr.flags & APE_TAG_CONTAINS_HEADER)) + tag_size += sizeof (m_tag->ape_tag_hdr); + + result = !wpc->reader->set_pos_rel (wpc->wv_in, m_tag->tag_file_pos, SEEK_END); + + if (result && tag_size < -m_tag->tag_file_pos && !wpc->reader->truncate_here) { + int nullcnt = (int) (-m_tag->tag_file_pos - tag_size); + char zero [1] = { 0 }; + + while (nullcnt--) + wpc->reader->write_bytes (wpc->wv_in, &zero, 1); + } + + if (result && tag_size) { + if (m_tag->ape_tag_hdr.flags & APE_TAG_CONTAINS_HEADER) { + m_tag->ape_tag_hdr.flags |= APE_TAG_THIS_IS_HEADER; + WavpackNativeToLittleEndian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); + result = (wpc->reader->write_bytes (wpc->wv_in, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr)) == sizeof (m_tag->ape_tag_hdr)); + WavpackLittleEndianToNative (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); + } + + result = (wpc->reader->write_bytes (wpc->wv_in, m_tag->ape_tag_data, m_tag->ape_tag_hdr.length - sizeof (m_tag->ape_tag_hdr)) == sizeof (m_tag->ape_tag_hdr)); + m_tag->ape_tag_hdr.flags &= ~APE_TAG_THIS_IS_HEADER; // this is NOT header + WavpackNativeToLittleEndian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); + result = (wpc->reader->write_bytes (wpc->wv_in, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr)) == sizeof (m_tag->ape_tag_hdr)); + WavpackLittleEndianToNative (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); + } + + if (result && tag_size < -m_tag->tag_file_pos && wpc->reader->truncate_here) + result = !wpc->reader->truncate_here (wpc->wv_in); + + if (!result) + strcpy (wpc->error_message, "can't write WavPack data, disk probably full!"); + + return result; +} + +// Copy the specified ID3v1 tag value (with specified field size) from the +// source pointer to the destination, eliminating leading spaces and trailing +// spaces and nulls. + +static void tagcpy (char *dest, char *src, int tag_size) +{ + char *s1 = src, *s2 = src + tag_size - 1; + + if (*s2 && !s2 [-1]) + s2--; + + while (s1 <= s2) + if (*s1 == ' ') + ++s1; + else if (!*s2 || *s2 == ' ') + --s2; + else + break; + + while (*s1 && s1 <= s2) + *dest++ = *s1++; + + *dest = 0; +} + +static int tagdata (char *src, int tag_size) +{ + char *s1 = src, *s2 = src + tag_size - 1; + + if (*s2 && !s2 [-1]) + s2--; + + while (s1 <= s2) + if (*s1 == ' ') + ++s1; + else if (!*s2 || *s2 == ' ') + --s2; + else + break; + + return (*s1 && s1 <= s2); +} diff --git a/third_party/wavpack/src/tags.c b/third_party/wavpack/src/tags.c index 56403ec..21884ea 100644 --- a/third_party/wavpack/src/tags.c +++ b/third_party/wavpack/src/tags.c @@ -1,247 +1,23 @@ //////////////////////////////////////////////////////////////////////////// // **** WAVPACK **** // // Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2009 Conifer Software. // +// Copyright (c) 1998 - 2013 Conifer Software. // // All Rights Reserved. // // Distributed under the BSD Software License (see license.txt) // //////////////////////////////////////////////////////////////////////////// // tags.c -// This module provides support for reading and writing metadata tags. +// This module provides support for reading metadata tags (either ID3v1 or +// APEv2) from WavPack files. No actual creation or manipulation of the tags +// is done in this module; this is just internal code to load the tags into +// memory. The high-level API functions are in the tag_utils.c module. #include #include #include "wavpack_local.h" -#ifdef WIN32 -#define stricmp(x,y) _stricmp(x,y) -#define fileno _fileno -#else -#define stricmp strcasecmp -#endif - -#ifdef DEBUG_ALLOC -#define malloc malloc_db -#define realloc realloc_db -#define free free_db -void *malloc_db (uint32_t size); -void *realloc_db (void *ptr, uint32_t size); -void free_db (void *ptr); -int32_t dump_alloc (void); -#endif - -#ifndef NO_TAGS - -static int get_ape_tag_item (M_Tag *m_tag, const char *item, char *value, int size, int type); -static int get_id3_tag_item (M_Tag *m_tag, const char *item, char *value, int size); -static int get_ape_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size, int type); -static int get_id3_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size); -static int append_ape_tag_item (WavpackContext *wpc, const char *item, const char *value, int vsize, int type); -static int write_tag_blockout (WavpackContext *wpc); -static int write_tag_reader (WavpackContext *wpc); -static void tagcpy (char *dest, char *src, int tag_size); -static int tagdata (char *src, int tag_size); - -//////////////////// Global functions part of external API ///////////////////////// - -// Count and return the total number of tag items in the specified file. - -int WavpackGetNumTagItems (WavpackContext *wpc) -{ - int i = 0; - - while (WavpackGetTagItemIndexed (wpc, i, NULL, 0)) - ++i; - - return i; -} - -// Count and return the total number of binary tag items in the specified file. This applies -// only to APEv2 tags and was implemented as a separate function to avoid breaking the old API. - -int WavpackGetNumBinaryTagItems (WavpackContext *wpc) -{ - int i = 0; - - while (WavpackGetBinaryTagItemIndexed (wpc, i, NULL, 0)) - ++i; - - return i; -} - -// Attempt to get the specified item from the specified file's ID3v1 or APEv2 -// tag. The "size" parameter specifies the amount of space available at "value", -// if the desired item will not fit in this space then ellipses (...) will -// be appended and the string terminated. Only text data are supported. The -// actual length of the string is returned (or 0 if no matching value found). -// Note that with APEv2 tags the length might not be the same as the number of -// characters because UTF-8 encoding is used. Also, APEv2 tags can have multiple -// (NULL separated) strings for a single value (this is why the length is -// returned). If this function is called with a NULL "value" pointer (or a -// zero "length") then only the actual length of the value data is returned -// (not counting the terminating NULL). This can be used to determine the -// actual memory to be allocated beforehand. - -int WavpackGetTagItem (WavpackContext *wpc, const char *item, char *value, int size) -{ - M_Tag *m_tag = &wpc->m_tag; - - if (value && size) - *value = 0; - - if (m_tag->ape_tag_hdr.ID [0] == 'A') - return get_ape_tag_item (m_tag, item, value, size, APE_TAG_TYPE_TEXT); - else if (m_tag->id3_tag.tag_id [0] == 'T') - return get_id3_tag_item (m_tag, item, value, size); - else - return 0; -} - -// Attempt to get the specified binary item from the specified file's APEv2 -// tag. The "size" parameter specifies the amount of space available at "value". -// If the desired item will not fit in this space then nothing will be copied -// and 0 will be returned, otherwise the actual size will be returned. If this -// function is called with a NULL "value" pointer (or a zero "length") then only -// the actual length of the value data is returned and can be used to determine -// the actual memory to be allocated beforehand. - -int WavpackGetBinaryTagItem (WavpackContext *wpc, const char *item, char *value, int size) -{ - M_Tag *m_tag = &wpc->m_tag; - - if (value && size) - *value = 0; - - if (m_tag->ape_tag_hdr.ID [0] == 'A') - return get_ape_tag_item (m_tag, item, value, size, APE_TAG_TYPE_BINARY); - else - return 0; -} - -// This function looks up the tag item name by index and is used when the -// application wants to access all the items in the file's ID3v1 or APEv2 tag. -// Note that this function accesses only the item's name; WavpackGetTagItem() -// still must be called to get the actual value. The "size" parameter specifies -// the amount of space available at "item", if the desired item will not fit in -// this space then ellipses (...) will be appended and the string terminated. -// The actual length of the string is returned (or 0 if no item exists for -// index). If this function is called with a NULL "value" pointer (or a -// zero "length") then only the actual length of the item name is returned -// (not counting the terminating NULL). This can be used to determine the -// actual memory to be allocated beforehand. For binary tag values use the -// otherwise identical WavpackGetBinaryTagItemIndexed (); - -int WavpackGetTagItemIndexed (WavpackContext *wpc, int index, char *item, int size) -{ - M_Tag *m_tag = &wpc->m_tag; - - if (item && size) - *item = 0; - - if (m_tag->ape_tag_hdr.ID [0] == 'A') - return get_ape_tag_item_indexed (m_tag, index, item, size, APE_TAG_TYPE_TEXT); - else if (m_tag->id3_tag.tag_id [0] == 'T') - return get_id3_tag_item_indexed (m_tag, index, item, size); - else - return 0; -} - -int WavpackGetBinaryTagItemIndexed (WavpackContext *wpc, int index, char *item, int size) -{ - M_Tag *m_tag = &wpc->m_tag; - - if (item && size) - *item = 0; - - if (m_tag->ape_tag_hdr.ID [0] == 'A') - return get_ape_tag_item_indexed (m_tag, index, item, size, APE_TAG_TYPE_BINARY); - else - return 0; -} - -// These two functions are used to append APEv2 tags to WavPack files; one is -// for text values (UTF-8 encoded) and the other is for binary values. If no tag -// has been started, then an empty one will be allocated first. When finished, -// use WavpackWriteTag() to write the completed tag to the file. The purpose of -// the passed size parameter is obvious for binary values, but might not be for -// text values. Keep in mind that APEv2 text values can have multiple values -// that are NULL separated, so the size is required to know the extent of the -// value (although the final terminating NULL is not included in the passed -// size). If the specified item already exists, it will be replaced with the -// new value. ID3v1 tags are not supported. - -int WavpackAppendTagItem (WavpackContext *wpc, const char *item, const char *value, int vsize) -{ - while (WavpackDeleteTagItem (wpc, item)); - return append_ape_tag_item (wpc, item, value, vsize, APE_TAG_TYPE_TEXT); -} - -int WavpackAppendBinaryTagItem (WavpackContext *wpc, const char *item, const char *value, int vsize) -{ - while (WavpackDeleteTagItem (wpc, item)); - return append_ape_tag_item (wpc, item, value, vsize, APE_TAG_TYPE_BINARY); -} - -// Delete the specified tag item from the APEv2 tag on the specified WavPack file -// (fields cannot be deleted from ID3v1 tags). A return value of TRUE indicates -// that the item was found and successfully deleted. - -int WavpackDeleteTagItem (WavpackContext *wpc, const char *item) -{ - M_Tag *m_tag = &wpc->m_tag; - - if (m_tag->ape_tag_hdr.ID [0] == 'A') { - unsigned char *p = m_tag->ape_tag_data; - unsigned char *q = p + m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr); - int i; - - for (i = 0; i < m_tag->ape_tag_hdr.item_count; ++i) { - int vsize, isize; - - vsize = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 8; // skip flags because we don't need them - for (isize = 0; p[isize] && p + isize < q; ++isize); - - if (vsize < 0 || vsize > m_tag->ape_tag_hdr.length || p + isize + vsize + 1 > q) - break; - - if (isize && vsize && !stricmp (item, (char *) p)) { - unsigned char *d = p - 8; - - p += isize + vsize + 1; - - while (p < q) - *d++ = *p++; - - m_tag->ape_tag_hdr.length = (int32_t)(d - m_tag->ape_tag_data) + sizeof (APE_Tag_Hdr); - m_tag->ape_tag_hdr.item_count--; - return 1; - } - else - p += isize + vsize + 1; - } - } - - return 0; -} - -// Once a APEv2 tag has been created with WavpackAppendTag(), this function is -// used to write the completed tag to the end of the WavPack file. Note that -// this function uses the same "blockout" function that is used to write -// regular WavPack blocks, although that's where the similarity ends. It is also -// used to write tags that have been edited on existing files. - -int WavpackWriteTag (WavpackContext *wpc) -{ - if (wpc->blockout) // this is the case for creating fresh WavPack files - return write_tag_blockout (wpc); - else // otherwise we are editing existing tags (OPEN_EDIT_TAGS) - return write_tag_reader (wpc); -} - -//////// Utility functions provided to other modules (but not part of lib API) ///////// - // This function attempts to load an ID3v1 or APEv2 tag from the specified // file into the specified M_Tag structure. The ID3 tag fits in completely, // but an APEv2 tag is variable length and so space must be allocated here @@ -278,12 +54,12 @@ int load_tag (WavpackContext *wpc) if (wpc->reader->read_bytes (wpc->wv_in, &m_tag->ape_tag_hdr, sizeof (APE_Tag_Hdr)) == sizeof (APE_Tag_Hdr) && !strncmp (m_tag->ape_tag_hdr.ID, "APETAGEX", 8)) { - little_endian_to_native (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); + WavpackLittleEndianToNative (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); if (m_tag->ape_tag_hdr.version == 2000 && m_tag->ape_tag_hdr.item_count && m_tag->ape_tag_hdr.length > sizeof (m_tag->ape_tag_hdr) && m_tag->ape_tag_hdr.length <= APE_TAG_MAX_LENGTH && - (m_tag->ape_tag_data = malloc (m_tag->ape_tag_hdr.length)) != NULL) { + (m_tag->ape_tag_data = (unsigned char *)malloc (m_tag->ape_tag_hdr.length)) != NULL) { ape_tag_items = m_tag->ape_tag_hdr.item_count; ape_tag_length = m_tag->ape_tag_hdr.length; @@ -315,7 +91,7 @@ int load_tag (WavpackContext *wpc) return FALSE; // something's wrong... } - little_endian_to_native (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); + WavpackLittleEndianToNative (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); if (m_tag->ape_tag_hdr.version != 2000 || m_tag->ape_tag_hdr.item_count != ape_tag_items || m_tag->ape_tag_hdr.length != ape_tag_length) { @@ -401,366 +177,3 @@ void free_tag (M_Tag *m_tag) m_tag->ape_tag_data = NULL; } } - -////////////////////////// local static functions ///////////////////////////// - -static int get_ape_tag_item (M_Tag *m_tag, const char *item, char *value, int size, int type) -{ - unsigned char *p = m_tag->ape_tag_data; - unsigned char *q = p + m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr); - int i; - - for (i = 0; i < m_tag->ape_tag_hdr.item_count && q - p > 8; ++i) { - int vsize, flags, isize; - - vsize = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4; - flags = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4; - for (isize = 0; p[isize] && p + isize < q; ++isize); - - if (vsize < 0 || vsize > m_tag->ape_tag_hdr.length || p + isize + vsize + 1 > q) - break; - - if (isize && vsize && !stricmp (item, (char *) p) && ((flags & 6) >> 1) == type) { - - if (!value || !size) - return vsize; - - if (type == APE_TAG_TYPE_BINARY) { - if (vsize <= size) { - memcpy (value, p + isize + 1, vsize); - return vsize; - } - else - return 0; - } - else if (vsize < size) { - memcpy (value, p + isize + 1, vsize); - value [vsize] = 0; - return vsize; - } - else if (size >= 4) { - memcpy (value, p + isize + 1, size - 1); - value [size - 4] = value [size - 3] = value [size - 2] = '.'; - value [size - 1] = 0; - return size - 1; - } - else - return 0; - } - else - p += isize + vsize + 1; - } - - return 0; -} - -static int get_id3_tag_item (M_Tag *m_tag, const char *item, char *value, int size) -{ - char lvalue [64]; - int len; - - lvalue [0] = 0; - - if (!stricmp (item, "title")) - tagcpy (lvalue, m_tag->id3_tag.title, sizeof (m_tag->id3_tag.title)); - else if (!stricmp (item, "artist")) - tagcpy (lvalue, m_tag->id3_tag.artist, sizeof (m_tag->id3_tag.artist)); - else if (!stricmp (item, "album")) - tagcpy (lvalue, m_tag->id3_tag.album, sizeof (m_tag->id3_tag.album)); - else if (!stricmp (item, "year")) - tagcpy (lvalue, m_tag->id3_tag.year, sizeof (m_tag->id3_tag.year)); - else if (!stricmp (item, "comment")) - tagcpy (lvalue, m_tag->id3_tag.comment, sizeof (m_tag->id3_tag.comment)); - else if (!stricmp (item, "track") && m_tag->id3_tag.comment [29] && !m_tag->id3_tag.comment [28]) - sprintf (lvalue, "%d", m_tag->id3_tag.comment [29]); - else - return 0; - - len = (int) strlen (lvalue); - - if (!value || !size) - return len; - - if (len < size) { - strcpy (value, lvalue); - return len; - } - else if (size >= 4) { - strncpy (value, lvalue, size - 1); - value [size - 4] = value [size - 3] = value [size - 2] = '.'; - value [size - 1] = 0; - return size - 1; - } - else - return 0; -} - -static int get_ape_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size, int type) -{ - unsigned char *p = m_tag->ape_tag_data; - unsigned char *q = p + m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr); - int i; - - for (i = 0; i < m_tag->ape_tag_hdr.item_count && index >= 0 && q - p > 8; ++i) { - int vsize, flags, isize; - - vsize = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4; - flags = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); p += 4; - for (isize = 0; p[isize] && p + isize < q; ++isize); - - if (vsize < 0 || vsize > m_tag->ape_tag_hdr.length || p + isize + vsize + 1 > q) - break; - - if (isize && vsize && ((flags & 6) >> 1) == type && !index--) { - - if (!item || !size) - return isize; - - if (isize < size) { - memcpy (item, p, isize); - item [isize] = 0; - return isize; - } - else if (size >= 4) { - memcpy (item, p, size - 1); - item [size - 4] = item [size - 3] = item [size - 2] = '.'; - item [size - 1] = 0; - return size - 1; - } - else - return 0; - } - else - p += isize + vsize + 1; - } - - return 0; -} - -static int get_id3_tag_item_indexed (M_Tag *m_tag, int index, char *item, int size) -{ - char lvalue [16]; - int len; - - lvalue [0] = 0; - - if (tagdata (m_tag->id3_tag.title, sizeof (m_tag->id3_tag.title)) && !index--) - strcpy (lvalue, "Title"); - else if (tagdata (m_tag->id3_tag.artist, sizeof (m_tag->id3_tag.artist)) && !index--) - strcpy (lvalue, "Artist"); - else if (tagdata (m_tag->id3_tag.album, sizeof (m_tag->id3_tag.album)) && !index--) - strcpy (lvalue, "Album"); - else if (tagdata (m_tag->id3_tag.year, sizeof (m_tag->id3_tag.year)) && !index--) - strcpy (lvalue, "Year"); - else if (tagdata (m_tag->id3_tag.comment, sizeof (m_tag->id3_tag.comment)) && !index--) - strcpy (lvalue, "Comment"); - else if (m_tag->id3_tag.comment [29] && !m_tag->id3_tag.comment [28] && !index--) - strcpy (lvalue, "Track"); - else - return 0; - - len = (int) strlen (lvalue); - - if (!item || !size) - return len; - - if (len < size) { - strcpy (item, lvalue); - return len; - } - else if (size >= 4) { - strncpy (item, lvalue, size - 1); - item [size - 4] = item [size - 3] = item [size - 2] = '.'; - item [size - 1] = 0; - return size - 1; - } - else - return 0; -} - -static int append_ape_tag_item (WavpackContext *wpc, const char *item, const char *value, int vsize, int type) -{ - M_Tag *m_tag = &wpc->m_tag; - int isize = (int) strlen (item); - - if (!m_tag->ape_tag_hdr.ID [0]) { - strncpy (m_tag->ape_tag_hdr.ID, "APETAGEX", sizeof (m_tag->ape_tag_hdr.ID)); - m_tag->ape_tag_hdr.version = 2000; - m_tag->ape_tag_hdr.length = sizeof (m_tag->ape_tag_hdr); - m_tag->ape_tag_hdr.item_count = 0; - m_tag->ape_tag_hdr.flags = APE_TAG_CONTAINS_HEADER; // we will include header on tags we originate - } - - if (m_tag->ape_tag_hdr.ID [0] == 'A') { - int new_item_len = vsize + isize + 9, flags = type << 1; - unsigned char *p; - - if (m_tag->ape_tag_hdr.length + new_item_len > APE_TAG_MAX_LENGTH) { - strcpy (wpc->error_message, "APEv2 tag exceeds maximum allowed length!"); - return FALSE; - } - - m_tag->ape_tag_hdr.item_count++; - m_tag->ape_tag_hdr.length += new_item_len; - p = m_tag->ape_tag_data = realloc (m_tag->ape_tag_data, m_tag->ape_tag_hdr.length); - p += m_tag->ape_tag_hdr.length - sizeof (APE_Tag_Hdr) - new_item_len; - - *p++ = (unsigned char) vsize; - *p++ = (unsigned char) (vsize >> 8); - *p++ = (unsigned char) (vsize >> 16); - *p++ = (unsigned char) (vsize >> 24); - - *p++ = (unsigned char) flags; - *p++ = (unsigned char) (flags >> 8); - *p++ = (unsigned char) (flags >> 16); - *p++ = (unsigned char) (flags >> 24); - - strcpy ((char *) p, item); - p += isize + 1; - memcpy (p, value, vsize); - - return TRUE; - } - else - return FALSE; -} - -static int write_tag_blockout (WavpackContext *wpc) -{ - M_Tag *m_tag = &wpc->m_tag; - int result = TRUE; - - if (m_tag->ape_tag_hdr.ID [0] == 'A' && m_tag->ape_tag_hdr.item_count) { - - // only write header if it's specified in the flags - - if (m_tag->ape_tag_hdr.flags & APE_TAG_CONTAINS_HEADER) { - m_tag->ape_tag_hdr.flags |= APE_TAG_THIS_IS_HEADER; - native_to_little_endian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); - result = wpc->blockout (wpc->wv_out, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr)); - little_endian_to_native (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); - } - - if (m_tag->ape_tag_hdr.length > sizeof (m_tag->ape_tag_hdr)) - result = wpc->blockout (wpc->wv_out, m_tag->ape_tag_data, m_tag->ape_tag_hdr.length - sizeof (m_tag->ape_tag_hdr)); - - m_tag->ape_tag_hdr.flags &= ~APE_TAG_THIS_IS_HEADER; // this is NOT header - native_to_little_endian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); - result = wpc->blockout (wpc->wv_out, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr)); - little_endian_to_native (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); - } - - if (!result) - strcpy (wpc->error_message, "can't write WavPack data, disk probably full!"); - - return result; -} - -static int write_tag_reader (WavpackContext *wpc) -{ - M_Tag *m_tag = &wpc->m_tag; - int32_t tag_size = 0; - int result; - - // before we write an edited (or new) tag into an existing file, make sure it's safe and possible - - if (m_tag->tag_begins_file) { - strcpy (wpc->error_message, "can't edit tags located at the beginning of files!"); - return FALSE; - } - - if (!wpc->reader->can_seek (wpc->wv_in)) { - strcpy (wpc->error_message, "can't edit tags on pipes or unseekable files!"); - return FALSE; - } - - if (!(wpc->open_flags & OPEN_EDIT_TAGS)) { - strcpy (wpc->error_message, "can't edit tags without OPEN_EDIT_TAGS flag!"); - return FALSE; - } - - if (m_tag->ape_tag_hdr.ID [0] == 'A' && m_tag->ape_tag_hdr.item_count && - m_tag->ape_tag_hdr.length > sizeof (m_tag->ape_tag_hdr)) - tag_size = m_tag->ape_tag_hdr.length; - - // only write header if it's specified in the flags - - if (m_tag->ape_tag_hdr.flags & APE_TAG_CONTAINS_HEADER) - tag_size += sizeof (m_tag->ape_tag_hdr); - - result = !wpc->reader->set_pos_rel (wpc->wv_in, m_tag->tag_file_pos, SEEK_END); - - if (result && tag_size < -m_tag->tag_file_pos) { - int nullcnt = -m_tag->tag_file_pos - tag_size; - char zero [1] = { 0 }; - - while (nullcnt--) - wpc->reader->write_bytes (wpc->wv_in, &zero, 1); - } - - if (result && tag_size) { - if (m_tag->ape_tag_hdr.flags & APE_TAG_CONTAINS_HEADER) { - m_tag->ape_tag_hdr.flags |= APE_TAG_THIS_IS_HEADER; - native_to_little_endian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); - result = (wpc->reader->write_bytes (wpc->wv_in, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr)) == sizeof (m_tag->ape_tag_hdr)); - little_endian_to_native (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); - } - - result = (wpc->reader->write_bytes (wpc->wv_in, m_tag->ape_tag_data, m_tag->ape_tag_hdr.length - sizeof (m_tag->ape_tag_hdr)) == sizeof (m_tag->ape_tag_hdr)); - m_tag->ape_tag_hdr.flags &= ~APE_TAG_THIS_IS_HEADER; // this is NOT header - native_to_little_endian (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); - result = (wpc->reader->write_bytes (wpc->wv_in, &m_tag->ape_tag_hdr, sizeof (m_tag->ape_tag_hdr)) == sizeof (m_tag->ape_tag_hdr)); - little_endian_to_native (&m_tag->ape_tag_hdr, APE_Tag_Hdr_Format); - } - - if (!result) - strcpy (wpc->error_message, "can't write WavPack data, disk probably full!"); - - return result; -} - -// Copy the specified ID3v1 tag value (with specified field size) from the -// source pointer to the destination, eliminating leading spaces and trailing -// spaces and nulls. - -static void tagcpy (char *dest, char *src, int tag_size) -{ - char *s1 = src, *s2 = src + tag_size - 1; - - if (*s2 && !s2 [-1]) - s2--; - - while (s1 <= s2) - if (*s1 == ' ') - ++s1; - else if (!*s2 || *s2 == ' ') - --s2; - else - break; - - while (*s1 && s1 <= s2) - *dest++ = *s1++; - - *dest = 0; -} - -static int tagdata (char *src, int tag_size) -{ - char *s1 = src, *s2 = src + tag_size - 1; - - if (*s2 && !s2 [-1]) - s2--; - - while (s1 <= s2) - if (*s1 == ' ') - ++s1; - else if (!*s2 || *s2 == ' ') - --s2; - else - break; - - return (*s1 && s1 <= s2); -} - -#endif - diff --git a/third_party/wavpack/src/unpack.c b/third_party/wavpack/src/unpack.c index b296723..c5ae9f7 100644 --- a/third_party/wavpack/src/unpack.c +++ b/third_party/wavpack/src/unpack.c @@ -1,468 +1,56 @@ //////////////////////////////////////////////////////////////////////////// // **** WAVPACK **** // // Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // +// Copyright (c) 1998 - 2013 Conifer Software. // // All Rights Reserved. // // Distributed under the BSD Software License (see license.txt) // //////////////////////////////////////////////////////////////////////////// // unpack.c -// This module actually handles the decompression of the audio data, except -// for the entropy decoding which is handled by the words? modules. For -// maximum efficiency, the conversion is isolated to tight loops that handle -// an entire buffer. +// This module actually handles the decompression of the audio data, except for +// the entropy decoding which is handled by the read_words.c module. For better +// efficiency, the conversion is isolated to tight loops that handle an entire +// buffer. + +#include +#include #include "wavpack_local.h" -#include -#include -#include -#include +#ifdef OPT_ASM_X86 + #define DECORR_STEREO_PASS_CONT unpack_decorr_stereo_pass_cont_x86 + #define DECORR_STEREO_PASS_CONT_AVAILABLE unpack_cpu_has_feature_x86(CPU_FEATURE_MMX) + #define DECORR_MONO_PASS_CONT unpack_decorr_mono_pass_cont_x86 +#elif defined(OPT_ASM_X64) && (defined (_WIN64) || defined(__CYGWIN__) || defined(__MINGW64__)) + #define DECORR_STEREO_PASS_CONT unpack_decorr_stereo_pass_cont_x64win + #define DECORR_STEREO_PASS_CONT_AVAILABLE 1 + #define DECORR_MONO_PASS_CONT unpack_decorr_mono_pass_cont_x64win +#elif defined(OPT_ASM_X64) + #define DECORR_STEREO_PASS_CONT unpack_decorr_stereo_pass_cont_x64 + #define DECORR_STEREO_PASS_CONT_AVAILABLE 1 + #define DECORR_MONO_PASS_CONT unpack_decorr_mono_pass_cont_x64 +#elif defined(OPT_ASM_ARM) + #define DECORR_STEREO_PASS_CONT unpack_decorr_stereo_pass_cont_armv7 + #define DECORR_STEREO_PASS_CONT_AVAILABLE 1 + #define DECORR_MONO_PASS_CONT unpack_decorr_mono_pass_cont_armv7 +#endif -// This flag provides faster decoding speed at the expense of more code. The -// improvement applies to 16-bit stereo lossless only. +#ifdef DECORR_STEREO_PASS_CONT +extern void DECORR_STEREO_PASS_CONT (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count, int32_t long_math); +extern void DECORR_MONO_PASS_CONT (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count, int32_t long_math); +#endif -#define FAST_DECODE +// This flag provides the functionality of terminating the decoding and muting +// the output when a lossy sample appears to be corrupt. This is automatic +// for lossless files because a corrupt sample is unambigious, but for lossy +// data it might be possible for this to falsely trigger (although I have never +// seen it). #define LOSSY_MUTE -#ifdef DEBUG_ALLOC -#define malloc malloc_db -#define realloc realloc_db -#define free free_db -void *malloc_db (uint32_t size); -void *realloc_db (void *ptr, uint32_t size); -void free_db (void *ptr); -int32_t dump_alloc (void); -#endif - ///////////////////////////// executable code //////////////////////////////// -// This function initializes everything required to unpack a WavPack block -// and must be called before unpack_samples() is called to obtain audio data. -// It is assumed that the WavpackHeader has been read into the wps->wphdr -// (in the current WavpackStream) and that the entire block has been read at -// wps->blockbuff. If a correction file is available (wpc->wvc_flag = TRUE) -// then the corresponding correction block must be read into wps->block2buff -// and its WavpackHeader has overwritten the header at wps->wphdr. This is -// where all the metadata blocks are scanned including those that contain -// bitstream data. - -int unpack_init (WavpackContext *wpc) -{ - WavpackStream *wps = wpc->streams [wpc->current_stream]; - unsigned char *blockptr, *block2ptr; - WavpackMetadata wpmd; - - wps->mute_error = FALSE; - wps->crc = wps->crc_x = 0xffffffff; - CLEAR (wps->wvbits); - CLEAR (wps->wvcbits); - CLEAR (wps->wvxbits); - CLEAR (wps->decorr_passes); - CLEAR (wps->dc); - CLEAR (wps->w); - - if (!(wps->wphdr.flags & MONO_FLAG) && wpc->config.num_channels && wps->wphdr.block_samples && - (wpc->reduced_channels == 1 || wpc->config.num_channels == 1)) { - wps->mute_error = TRUE; - return FALSE; - } - - if ((wps->wphdr.flags & UNKNOWN_FLAGS) || (wps->wphdr.flags & MONO_DATA) == MONO_DATA) { - wps->mute_error = TRUE; - return FALSE; - } - - blockptr = wps->blockbuff + sizeof (WavpackHeader); - - while (read_metadata_buff (&wpmd, wps->blockbuff, &blockptr)) - if (!process_metadata (wpc, &wpmd)) { - wps->mute_error = TRUE; - return FALSE; - } - - if (wps->wphdr.block_samples && wpc->wvc_flag && wps->block2buff) { - block2ptr = wps->block2buff + sizeof (WavpackHeader); - - while (read_metadata_buff (&wpmd, wps->block2buff, &block2ptr)) - if (!process_metadata (wpc, &wpmd)) { - wps->mute_error = TRUE; - return FALSE; - } - } - - if (wps->wphdr.block_samples && !bs_is_open (&wps->wvbits)) { - if (bs_is_open (&wps->wvcbits)) - strcpy (wpc->error_message, "can't unpack correction files alone!"); - - wps->mute_error = TRUE; - return FALSE; - } - - if (wps->wphdr.block_samples && !bs_is_open (&wps->wvxbits)) { - if ((wps->wphdr.flags & INT32_DATA) && wps->int32_sent_bits) - wpc->lossy_blocks = TRUE; - - if ((wps->wphdr.flags & FLOAT_DATA) && - wps->float_flags & (FLOAT_EXCEPTIONS | FLOAT_ZEROS_SENT | FLOAT_SHIFT_SENT | FLOAT_SHIFT_SAME)) - wpc->lossy_blocks = TRUE; - } - - if (wps->wphdr.block_samples) - wps->sample_index = wps->wphdr.block_index; - - return TRUE; -} - -// This function initialzes the main bitstream for audio samples, which must -// be in the "wv" file. - -int init_wv_bitstream (WavpackStream *wps, WavpackMetadata *wpmd) -{ - if (!wpmd->byte_length) - return FALSE; - - bs_open_read (&wps->wvbits, wpmd->data, (unsigned char *) wpmd->data + wpmd->byte_length); - return TRUE; -} - -// This function initialzes the "correction" bitstream for audio samples, -// which currently must be in the "wvc" file. - -int init_wvc_bitstream (WavpackStream *wps, WavpackMetadata *wpmd) -{ - if (!wpmd->byte_length) - return FALSE; - - bs_open_read (&wps->wvcbits, wpmd->data, (unsigned char *) wpmd->data + wpmd->byte_length); - return TRUE; -} - -// This function initialzes the "extra" bitstream for audio samples which -// contains the information required to losslessly decompress 32-bit float data -// or integer data that exceeds 24 bits. This bitstream is in the "wv" file -// for pure lossless data or the "wvc" file for hybrid lossless. This data -// would not be used for hybrid lossy mode. There is also a 32-bit CRC stored -// in the first 4 bytes of these blocks. - -int init_wvx_bitstream (WavpackStream *wps, WavpackMetadata *wpmd) -{ - unsigned char *cp = wpmd->data; - - if (wpmd->byte_length <= 4) - return FALSE; - - wps->crc_wvx = *cp++; - wps->crc_wvx |= (int32_t) *cp++ << 8; - wps->crc_wvx |= (int32_t) *cp++ << 16; - wps->crc_wvx |= (int32_t) *cp++ << 24; - - bs_open_read (&wps->wvxbits, cp, (unsigned char *) wpmd->data + wpmd->byte_length); - return TRUE; -} - -// Read decorrelation terms from specified metadata block into the -// decorr_passes array. The terms range from -3 to 8, plus 17 & 18; -// other values are reserved and generate errors for now. The delta -// ranges from 0 to 7 with all values valid. Note that the terms are -// stored in the opposite order in the decorr_passes array compared -// to packing. - -int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd) -{ - int termcnt = wpmd->byte_length; - unsigned char *byteptr = wpmd->data; - struct decorr_pass *dpp; - - if (termcnt > MAX_NTERMS) - return FALSE; - - wps->num_terms = termcnt; - - for (dpp = wps->decorr_passes + termcnt - 1; termcnt--; dpp--) { - dpp->term = (int)(*byteptr & 0x1f) - 5; - dpp->delta = (*byteptr++ >> 5) & 0x7; - - if (!dpp->term || dpp->term < -3 || (dpp->term > MAX_TERM && dpp->term < 17) || dpp->term > 18) - return FALSE; - } - - return TRUE; -} - -// Read decorrelation weights from specified metadata block into the -// decorr_passes array. The weights range +/-1024, but are rounded and -// truncated to fit in signed chars for metadata storage. Weights are -// separate for the two channels and are specified from the "last" term -// (first during encode). Unspecified weights are set to zero. - -int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd) -{ - int termcnt = wpmd->byte_length, tcount; - char *byteptr = wpmd->data; - struct decorr_pass *dpp; - - if (!(wps->wphdr.flags & MONO_DATA)) - termcnt /= 2; - - if (termcnt > wps->num_terms) - return FALSE; - - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) - dpp->weight_A = dpp->weight_B = 0; - - while (--dpp >= wps->decorr_passes && termcnt--) { - dpp->weight_A = restore_weight (*byteptr++); - - if (!(wps->wphdr.flags & MONO_DATA)) - dpp->weight_B = restore_weight (*byteptr++); - } - - return TRUE; -} - -// Read decorrelation samples from specified metadata block into the -// decorr_passes array. The samples are signed 32-bit values, but are -// converted to signed log2 values for storage in metadata. Values are -// stored for both channels and are specified from the "last" term -// (first during encode) with unspecified samples set to zero. The -// number of samples stored varies with the actual term value, so -// those must obviously come first in the metadata. - -int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd) -{ - unsigned char *byteptr = wpmd->data; - unsigned char *endptr = byteptr + wpmd->byte_length; - struct decorr_pass *dpp; - int tcount; - - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { - CLEAR (dpp->samples_A); - CLEAR (dpp->samples_B); - } - - if (wps->wphdr.version == 0x402 && (wps->wphdr.flags & HYBRID_FLAG)) { - if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr) - return FALSE; - - wps->dc.error [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - byteptr += 2; - - if (!(wps->wphdr.flags & MONO_DATA)) { - wps->dc.error [1] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - byteptr += 2; - } - } - - while (dpp-- > wps->decorr_passes && byteptr < endptr) - if (dpp->term > MAX_TERM) { - if (byteptr + (wps->wphdr.flags & MONO_DATA ? 4 : 8) > endptr) - return FALSE; - - dpp->samples_A [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - dpp->samples_A [1] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8))); - byteptr += 4; - - if (!(wps->wphdr.flags & MONO_DATA)) { - dpp->samples_B [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - dpp->samples_B [1] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8))); - byteptr += 4; - } - } - else if (dpp->term < 0) { - if (byteptr + 4 > endptr) - return FALSE; - - dpp->samples_A [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - dpp->samples_B [0] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8))); - byteptr += 4; - } - else { - int m = 0, cnt = dpp->term; - - while (cnt--) { - if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr) - return FALSE; - - dpp->samples_A [m] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - byteptr += 2; - - if (!(wps->wphdr.flags & MONO_DATA)) { - dpp->samples_B [m] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - byteptr += 2; - } - - m++; - } - } - - return byteptr == endptr; -} - -// Read the shaping weights from specified metadata block into the -// WavpackStream structure. Note that there must be two values (even -// for mono streams) and that the values are stored in the same -// manner as decorrelation weights. These would normally be read from -// the "correction" file and are used for lossless reconstruction of -// hybrid data. - -int read_shaping_info (WavpackStream *wps, WavpackMetadata *wpmd) -{ - if (wpmd->byte_length == 2) { - char *byteptr = wpmd->data; - - wps->dc.shaping_acc [0] = (int32_t) restore_weight (*byteptr++) << 16; - wps->dc.shaping_acc [1] = (int32_t) restore_weight (*byteptr++) << 16; - return TRUE; - } - else if (wpmd->byte_length >= (wps->wphdr.flags & MONO_DATA ? 4 : 8)) { - unsigned char *byteptr = wpmd->data; - - wps->dc.error [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - wps->dc.shaping_acc [0] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8))); - byteptr += 4; - - if (!(wps->wphdr.flags & MONO_DATA)) { - wps->dc.error [1] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - wps->dc.shaping_acc [1] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8))); - byteptr += 4; - } - - if (wpmd->byte_length == (wps->wphdr.flags & MONO_DATA ? 6 : 12)) { - wps->dc.shaping_delta [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - - if (!(wps->wphdr.flags & MONO_DATA)) - wps->dc.shaping_delta [1] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8))); - } - - return TRUE; - } - - return FALSE; -} - -// Read the int32 data from the specified metadata into the specified stream. -// This data is used for integer data that has more than 24 bits of magnitude -// or, in some cases, used to eliminate redundant bits from any audio stream. - -int read_int32_info (WavpackStream *wps, WavpackMetadata *wpmd) -{ - int bytecnt = wpmd->byte_length; - char *byteptr = wpmd->data; - - if (bytecnt != 4) - return FALSE; - - wps->int32_sent_bits = *byteptr++; - wps->int32_zeros = *byteptr++; - wps->int32_ones = *byteptr++; - wps->int32_dups = *byteptr; - - return TRUE; -} - -// Read multichannel information from metadata. The first byte is the total -// number of channels and the following bytes represent the channel_mask -// as described for Microsoft WAVEFORMATEX. - -int read_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd) -{ - int bytecnt = wpmd->byte_length, shift = 0; - unsigned char *byteptr = wpmd->data; - uint32_t mask = 0; - - if (!bytecnt || bytecnt > 6) - return FALSE; - - if (!wpc->config.num_channels) { - - if (bytecnt == 6) { - wpc->config.num_channels = (byteptr [0] | ((byteptr [2] & 0xf) << 8)) + 1; - wpc->max_streams = (byteptr [1] | ((byteptr [2] & 0xf0) << 4)) + 1; - - if (wpc->config.num_channels < wpc->max_streams) - return FALSE; - - byteptr += 3; - mask = *byteptr++; - mask |= (uint32_t) *byteptr++ << 8; - mask |= (uint32_t) *byteptr << 16; - } - else { - wpc->config.num_channels = *byteptr++; - - while (--bytecnt) { - mask |= (uint32_t) *byteptr++ << shift; - shift += 8; - } - } - - if (wpc->config.num_channels > wpc->max_streams * 2) - return FALSE; - - wpc->config.channel_mask = mask; - } - - return TRUE; -} - -// Read configuration information from metadata. - -int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd) -{ - int bytecnt = wpmd->byte_length; - unsigned char *byteptr = wpmd->data; - - if (bytecnt >= 3) { - wpc->config.flags &= 0xff; - wpc->config.flags |= (int32_t) *byteptr++ << 8; - wpc->config.flags |= (int32_t) *byteptr++ << 16; - wpc->config.flags |= (int32_t) *byteptr++ << 24; - - if (bytecnt >= 4 && (wpc->config.flags & CONFIG_EXTRA_MODE)) - wpc->config.xmode = *byteptr; - } - - return TRUE; -} - -// Read non-standard sampling rate from metadata. - -int read_sample_rate (WavpackContext *wpc, WavpackMetadata *wpmd) -{ - int bytecnt = wpmd->byte_length; - unsigned char *byteptr = wpmd->data; - - if (bytecnt == 3) { - wpc->config.sample_rate = (int32_t) *byteptr++; - wpc->config.sample_rate |= (int32_t) *byteptr++ << 8; - wpc->config.sample_rate |= (int32_t) *byteptr++ << 16; - } - - return TRUE; -} - -// Read wrapper data from metadata. Currently, this consists of the RIFF -// header and trailer that wav files contain around the audio data but could -// be used for other formats as well. Because WavPack files contain all the -// information required for decoding and playback, this data can probably -// be ignored except when an exact wavefile restoration is needed. - -int read_wrapper_data (WavpackContext *wpc, WavpackMetadata *wpmd) -{ - if ((wpc->open_flags & OPEN_WRAPPER) && wpc->wrapper_bytes < MAX_WRAPPER_BYTES) { - wpc->wrapper_data = realloc (wpc->wrapper_data, wpc->wrapper_bytes + wpmd->byte_length); - memcpy (wpc->wrapper_data + wpc->wrapper_bytes, wpmd->data, wpmd->byte_length); - wpc->wrapper_bytes += wpmd->byte_length; - } - - return TRUE; -} - -#ifndef NO_UNPACK - // This monster actually unpacks the WavPack bitstream(s) into the specified // buffer as 32-bit integers or floats (depending on orignal data). Lossy // samples will be clipped to their original limits (i.e. 8-bit samples are @@ -480,11 +68,7 @@ int read_wrapper_data (WavpackContext *wpc, WavpackMetadata *wpmd) // occurs or the end of the block is reached. static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); -static void decorr_stereo_pass_i (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); -static void decorr_stereo_pass_1717 (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); -static void decorr_stereo_pass_1718 (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); -static void decorr_stereo_pass_1818 (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); -static void decorr_stereo_pass_nn (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); +static void decorr_mono_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); static void fixup_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count); int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count) @@ -496,8 +80,14 @@ int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_co struct decorr_pass *dpp; int tcount, m = 0; - if (wps->sample_index + sample_count > wps->wphdr.block_index + wps->wphdr.block_samples) - sample_count = wps->wphdr.block_index + wps->wphdr.block_samples - wps->sample_index; + // don't attempt to decode past the end of the block, but watch out for overflow! + + if (wps->sample_index + sample_count > GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples && + GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples - wps->sample_index < sample_count) + sample_count = (uint32_t) (GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples - wps->sample_index); + + if (GET_BLOCK_INDEX (wps->wphdr) > wps->sample_index || wps->wphdr.block_samples < sample_count) + wps->mute_error = TRUE; if (wps->mute_error) { if (wpc->reduced_channels == 1 || wpc->config.num_channels == 1 || (flags & MONO_FLAG)) @@ -510,7 +100,7 @@ int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_co } if ((flags & HYBRID_FLAG) && !wps->block2buff) - mute_limit *= 2; + mute_limit = (mute_limit * 2) + 128; //////////////// handle lossless or hybrid lossy mono data ///////////////// @@ -529,40 +119,40 @@ int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_co else i = get_words_lossless (wps, buffer, sample_count); - for (bptr = buffer; bptr < eptr;) { - read_word = *bptr; - +#ifdef DECORR_MONO_PASS_CONT + if (sample_count < 16) + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) + decorr_mono_pass (dpp, buffer, sample_count); + else for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { - int32_t sam, temp; - int k; + int pre_samples = (dpp->term > MAX_TERM) ? 2 : dpp->term; - if (dpp->term > MAX_TERM) { - if (dpp->term & 1) - sam = 2 * dpp->samples_A [0] - dpp->samples_A [1]; - else - sam = dpp->samples_A [0] + ((dpp->samples_A [0] - dpp->samples_A [1]) >> 1); + decorr_mono_pass (dpp, buffer, pre_samples); - dpp->samples_A [1] = dpp->samples_A [0]; - k = 0; - } - else { - sam = dpp->samples_A [m]; - k = (m + dpp->term) & (MAX_TERM - 1); - } - - temp = apply_weight (dpp->weight_A, sam) + read_word; - update_weight (dpp->weight_A, dpp->delta, sam, read_word); - dpp->samples_A [k] = read_word = temp; + DECORR_MONO_PASS_CONT (dpp, buffer + pre_samples, sample_count - pre_samples, + ((flags & MAG_MASK) >> MAG_LSB) > 15); } +#else + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) + decorr_mono_pass (dpp, buffer, sample_count); +#endif - if (labs (read_word) > mute_limit) { +#ifndef LOSSY_MUTE + if (!(flags & HYBRID_FLAG)) +#endif + for (bptr = buffer; bptr < eptr; ++bptr) { + if (labs (bptr [0]) > mute_limit) { i = (uint32_t)(bptr - buffer); break; } - m = (m + 1) & (MAX_TERM - 1); - crc += (crc << 1) + (*bptr++ = read_word); + crc = crc * 3 + bptr [0]; } +#ifndef LOSSY_MUTE + else + for (bptr = buffer; bptr < eptr; ++bptr) + crc = crc * 3 + bptr [0]; +#endif } /////////////// handle lossless or hybrid lossy stereo data /////////////// @@ -583,36 +173,27 @@ int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_co else i = get_words_lossless (wps, buffer, sample_count); -#ifdef FAST_DECODE - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) - if (((flags & MAG_MASK) >> MAG_LSB) >= 16) +#ifdef DECORR_STEREO_PASS_CONT + if (sample_count < 16 || !DECORR_STEREO_PASS_CONT_AVAILABLE) { + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) decorr_stereo_pass (dpp, buffer, sample_count); - else if (tcount && dpp [0].term == 17 && dpp [1].term == 17) { - decorr_stereo_pass_1717 (dpp, buffer, sample_count); - tcount--; - dpp++; + + m = sample_count & (MAX_TERM - 1); + } + else + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { + int pre_samples = (dpp->term < 0 || dpp->term > MAX_TERM) ? 2 : dpp->term; + + decorr_stereo_pass (dpp, buffer, pre_samples); + + DECORR_STEREO_PASS_CONT (dpp, buffer + pre_samples * 2, sample_count - pre_samples, + ((flags & MAG_MASK) >> MAG_LSB) >= 16); } - else if (tcount && dpp [0].term == 17 && dpp [1].term == 18) { - decorr_stereo_pass_1718 (dpp, buffer, sample_count); - tcount--; - dpp++; - } - else if (tcount && dpp [0].term == 18 && dpp [1].term == 18) { - decorr_stereo_pass_1818 (dpp, buffer, sample_count); - tcount--; - dpp++; - } - else if (tcount && dpp [0].term >= 1 && dpp [0].term <= 7 && - dpp [1].term >= 1 && dpp [1].term <= 7) { - decorr_stereo_pass_nn (dpp, buffer, sample_count); - tcount--; - dpp++; - } - else - decorr_stereo_pass_i (dpp, buffer, sample_count); #else for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) decorr_stereo_pass (dpp, buffer, sample_count); + + m = sample_count & (MAX_TERM - 1); #endif if (flags & JOINT_STEREO) @@ -624,13 +205,14 @@ int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_co for (bptr = buffer; bptr < eptr; bptr += 2) crc += (crc << 3) + (bptr [0] << 1) + bptr [0] + bptr [1]; +#ifndef LOSSY_MUTE + if (!(flags & HYBRID_FLAG)) +#endif for (bptr = buffer; bptr < eptr; bptr += 16) if (labs (bptr [0]) > mute_limit || labs (bptr [1]) > mute_limit) { i = (uint32_t)(bptr - buffer) / 2; break; } - - m = sample_count & (MAX_TERM - 1); } /////////////////// handle hybrid lossless mono data //////////////////// @@ -686,10 +268,9 @@ int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_co crc += (crc << 1) + read_word; -#ifdef LOSSY_MUTE if (labs (read_word) > mute_limit) break; -#endif + *bptr++ = read_word; } @@ -858,10 +439,9 @@ int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_co right = right_c; } -#ifdef LOSSY_MUTE if (labs (left) > mute_limit || labs (right) > mute_limit) break; -#endif + crc += (crc << 3) + (left << 1) + left + right; *bptr++ = left; *bptr++ = right; @@ -917,6 +497,67 @@ int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_co return i; } +// General function to perform mono decorrelation pass on specified buffer +// (although since this is the reverse function it might technically be called +// "correlation" instead). This version handles all sample resolutions and +// weight deltas. The dpp->samples_X[] data is returned normalized for term +// values 1-8. + +static void decorr_mono_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) +{ + int32_t delta = dpp->delta, weight_A = dpp->weight_A; + int32_t *bptr, *eptr = buffer + sample_count, sam_A; + int m, k; + + switch (dpp->term) { + + case 17: + for (bptr = buffer; bptr < eptr; bptr++) { + sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1]; + dpp->samples_A [1] = dpp->samples_A [0]; + dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0]; + update_weight (weight_A, delta, sam_A, bptr [0]); + bptr [0] = dpp->samples_A [0]; + } + + break; + + case 18: + for (bptr = buffer; bptr < eptr; bptr++) { + sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1; + dpp->samples_A [1] = dpp->samples_A [0]; + dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0]; + update_weight (weight_A, delta, sam_A, bptr [0]); + bptr [0] = dpp->samples_A [0]; + } + + break; + + default: + for (m = 0, k = dpp->term & (MAX_TERM - 1), bptr = buffer; bptr < eptr; bptr++) { + sam_A = dpp->samples_A [m]; + dpp->samples_A [k] = apply_weight (weight_A, sam_A) + bptr [0]; + update_weight (weight_A, delta, sam_A, bptr [0]); + bptr [0] = dpp->samples_A [k]; + m = (m + 1) & (MAX_TERM - 1); + k = (k + 1) & (MAX_TERM - 1); + } + + if (m) { + int32_t temp_samples [MAX_TERM]; + + memcpy (temp_samples, dpp->samples_A, sizeof (dpp->samples_A)); + + for (k = 0; k < MAX_TERM; k++, m++) + dpp->samples_A [k] = temp_samples [m & (MAX_TERM - 1)]; + } + + break; + } + + dpp->weight_A = weight_A; +} + // General function to perform stereo decorrelation pass on specified buffer // (although since this is the reverse function it might technically be called // "correlation" instead). This version handles all sample resolutions and @@ -1028,245 +669,6 @@ static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_ } } -#ifdef FAST_DECODE - -// This function is a specialized version of decorr_stereo_pass() that works -// only with lower resolution data (<= 16-bit), but is otherwise identical. - -static void decorr_stereo_pass_i (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) -{ - int32_t *bptr, *eptr = buffer + (sample_count * 2); - int m, k; - - switch (dpp->term) { - case 17: - for (bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam, tmp; - - sam = 2 * dpp->samples_A [0] - dpp->samples_A [1]; - dpp->samples_A [1] = dpp->samples_A [0]; - bptr [0] = dpp->samples_A [0] = apply_weight_i (dpp->weight_A, sam) + (tmp = bptr [0]); - update_weight (dpp->weight_A, dpp->delta, sam, tmp); - - sam = 2 * dpp->samples_B [0] - dpp->samples_B [1]; - dpp->samples_B [1] = dpp->samples_B [0]; - bptr [1] = dpp->samples_B [0] = apply_weight_i (dpp->weight_B, sam) + (tmp = bptr [1]); - update_weight (dpp->weight_B, dpp->delta, sam, tmp); - } - - break; - - case 18: - for (bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam, tmp; - - sam = dpp->samples_A [0] + ((dpp->samples_A [0] - dpp->samples_A [1]) >> 1); - dpp->samples_A [1] = dpp->samples_A [0]; - bptr [0] = dpp->samples_A [0] = apply_weight_i (dpp->weight_A, sam) + (tmp = bptr [0]); - update_weight (dpp->weight_A, dpp->delta, sam, tmp); - - sam = dpp->samples_B [0] + ((dpp->samples_B [0] - dpp->samples_B [1]) >> 1); - dpp->samples_B [1] = dpp->samples_B [0]; - bptr [1] = dpp->samples_B [0] = apply_weight_i (dpp->weight_B, sam) + (tmp = bptr [1]); - update_weight (dpp->weight_B, dpp->delta, sam, tmp); - } - - break; - - default: - for (m = 0, k = dpp->term & (MAX_TERM - 1), bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam; - - sam = dpp->samples_A [m]; - dpp->samples_A [k] = apply_weight_i (dpp->weight_A, sam) + bptr [0]; - update_weight (dpp->weight_A, dpp->delta, sam, bptr [0]); - bptr [0] = dpp->samples_A [k]; - - sam = dpp->samples_B [m]; - dpp->samples_B [k] = apply_weight_i (dpp->weight_B, sam) + bptr [1]; - update_weight (dpp->weight_B, dpp->delta, sam, bptr [1]); - bptr [1] = dpp->samples_B [k]; - - m = (m + 1) & (MAX_TERM - 1); - k = (k + 1) & (MAX_TERM - 1); - } - - break; - - case -1: - for (bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam; - - sam = bptr [0] + apply_weight_i (dpp->weight_A, dpp->samples_A [0]); - update_weight_clip (dpp->weight_A, dpp->delta, dpp->samples_A [0], bptr [0]); - bptr [0] = sam; - dpp->samples_A [0] = bptr [1] + apply_weight_i (dpp->weight_B, sam); - update_weight_clip (dpp->weight_B, dpp->delta, sam, bptr [1]); - bptr [1] = dpp->samples_A [0]; - } - - break; - - case -2: - for (bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam; - - sam = bptr [1] + apply_weight_i (dpp->weight_B, dpp->samples_B [0]); - update_weight_clip (dpp->weight_B, dpp->delta, dpp->samples_B [0], bptr [1]); - bptr [1] = sam; - dpp->samples_B [0] = bptr [0] + apply_weight_i (dpp->weight_A, sam); - update_weight_clip (dpp->weight_A, dpp->delta, sam, bptr [0]); - bptr [0] = dpp->samples_B [0]; - } - - break; - - case -3: - for (bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam_A, sam_B; - - sam_A = bptr [0] + apply_weight_i (dpp->weight_A, dpp->samples_A [0]); - update_weight_clip (dpp->weight_A, dpp->delta, dpp->samples_A [0], bptr [0]); - sam_B = bptr [1] + apply_weight_i (dpp->weight_B, dpp->samples_B [0]); - update_weight_clip (dpp->weight_B, dpp->delta, dpp->samples_B [0], bptr [1]); - bptr [0] = dpp->samples_B [0] = sam_A; - bptr [1] = dpp->samples_A [0] = sam_B; - } - - break; - } -} - -// These functions are specialized versions of decorr_stereo_pass() that work -// only with lower resolution data (<= 16-bit) and handle the equivalent of -// *two* decorrelation passes. By combining two passes we save a read and write -// of the sample data and some overhead dealing with buffer pointers and looping. -// -// The cases handled are: -// 17,17 -- standard "fast" mode before version 4.40 -// 17,18 -- standard "fast" mode starting with 4.40 -// 18,18 -- used in the default and higher modes -// [1-7],[1-7] -- common in "high" and "very high" modes - -static void decorr_stereo_pass_1718 (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) -{ - int32_t *bptr, *eptr = buffer + (sample_count * 2); - - for (bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam; - - sam = 2 * dpp->samples_A [0] - dpp->samples_A [1]; - dpp->samples_A [1] = dpp->samples_A [0]; - dpp->samples_A [0] = apply_weight_i (dpp->weight_A, sam) + bptr [0]; - update_weight (dpp->weight_A, dpp->delta, sam, bptr [0]); - - sam = (dpp+1)->samples_A [0] + (((dpp+1)->samples_A [0] - (dpp+1)->samples_A [1]) >> 1); - (dpp+1)->samples_A [1] = (dpp+1)->samples_A [0]; - bptr [0] = (dpp+1)->samples_A [0] = apply_weight_i ((dpp+1)->weight_A, sam) + dpp->samples_A [0]; - update_weight ((dpp+1)->weight_A, (dpp+1)->delta, sam, dpp->samples_A [0]); - - sam = 2 * dpp->samples_B [0] - dpp->samples_B [1]; - dpp->samples_B [1] = dpp->samples_B [0]; - dpp->samples_B [0] = apply_weight_i (dpp->weight_B, sam) + bptr [1]; - update_weight (dpp->weight_B, dpp->delta, sam, bptr [1]); - - sam = (dpp+1)->samples_B [0] + (((dpp+1)->samples_B [0] - (dpp+1)->samples_B [1]) >> 1); - (dpp+1)->samples_B [1] = (dpp+1)->samples_B [0]; - bptr [1] = (dpp+1)->samples_B [0] = apply_weight_i ((dpp+1)->weight_B, sam) + dpp->samples_B [0]; - update_weight ((dpp+1)->weight_B, (dpp+1)->delta, sam, dpp->samples_B [0]); - } -} - -static void decorr_stereo_pass_1717 (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) -{ - int32_t *bptr, *eptr = buffer + (sample_count * 2); - - for (bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam; - - sam = 2 * dpp->samples_A [0] - dpp->samples_A [1]; - dpp->samples_A [1] = dpp->samples_A [0]; - dpp->samples_A [0] = apply_weight_i (dpp->weight_A, sam) + bptr [0]; - update_weight (dpp->weight_A, dpp->delta, sam, bptr [0]); - - sam = 2 * (dpp+1)->samples_A [0] - (dpp+1)->samples_A [1]; - (dpp+1)->samples_A [1] = (dpp+1)->samples_A [0]; - bptr [0] = (dpp+1)->samples_A [0] = apply_weight_i ((dpp+1)->weight_A, sam) + dpp->samples_A [0]; - update_weight ((dpp+1)->weight_A, (dpp+1)->delta, sam, dpp->samples_A [0]); - - sam = 2 * dpp->samples_B [0] - dpp->samples_B [1]; - dpp->samples_B [1] = dpp->samples_B [0]; - dpp->samples_B [0] = apply_weight_i (dpp->weight_B, sam) + bptr [1]; - update_weight (dpp->weight_B, dpp->delta, sam, bptr [1]); - - sam = 2 * (dpp+1)->samples_B [0] - (dpp+1)->samples_B [1]; - (dpp+1)->samples_B [1] = (dpp+1)->samples_B [0]; - bptr [1] = (dpp+1)->samples_B [0] = apply_weight_i ((dpp+1)->weight_B, sam) + dpp->samples_B [0]; - update_weight ((dpp+1)->weight_B, (dpp+1)->delta, sam, dpp->samples_B [0]); - } -} - -static void decorr_stereo_pass_1818 (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) -{ - int32_t *bptr, *eptr = buffer + (sample_count * 2); - - for (bptr = buffer; bptr < eptr; bptr += 2) { - int32_t sam; - - sam = dpp->samples_A [0] + ((dpp->samples_A [0] - dpp->samples_A [1]) >> 1); - dpp->samples_A [1] = dpp->samples_A [0]; - dpp->samples_A [0] = apply_weight_i (dpp->weight_A, sam) + bptr [0]; - update_weight (dpp->weight_A, dpp->delta, sam, bptr [0]); - - sam = (dpp+1)->samples_A [0] + (((dpp+1)->samples_A [0] - (dpp+1)->samples_A [1]) >> 1); - (dpp+1)->samples_A [1] = (dpp+1)->samples_A [0]; - bptr [0] = (dpp+1)->samples_A [0] = apply_weight_i ((dpp+1)->weight_A, sam) + dpp->samples_A [0]; - update_weight ((dpp+1)->weight_A, (dpp+1)->delta, sam, dpp->samples_A [0]); - - sam = dpp->samples_B [0] + ((dpp->samples_B [0] - dpp->samples_B [1]) >> 1); - dpp->samples_B [1] = dpp->samples_B [0]; - dpp->samples_B [0] = apply_weight_i (dpp->weight_B, sam) + bptr [1]; - update_weight (dpp->weight_B, dpp->delta, sam, bptr [1]); - - sam = (dpp+1)->samples_B [0] + (((dpp+1)->samples_B [0] - (dpp+1)->samples_B [1]) >> 1); - (dpp+1)->samples_B [1] = (dpp+1)->samples_B [0]; - bptr [1] = (dpp+1)->samples_B [0] = apply_weight_i ((dpp+1)->weight_B, sam) + dpp->samples_B [0]; - update_weight ((dpp+1)->weight_B, (dpp+1)->delta, sam, dpp->samples_B [0]); - } -} - -static void decorr_stereo_pass_nn (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) -{ - int32_t *bptr, *eptr = buffer + (sample_count * 2); - int m, k, j; - - m = 0; - k = dpp->term & (MAX_TERM - 1); - j = (dpp+1)->term & (MAX_TERM - 1); - - for (bptr = buffer; bptr < eptr; bptr += 2) { - int32_t tmp; - - dpp->samples_A [k] = apply_weight_i (dpp->weight_A, dpp->samples_A [m]) + (tmp = bptr [0]); - update_weight (dpp->weight_A, dpp->delta, dpp->samples_A [m], tmp); - - bptr [0] = (dpp+1)->samples_A [j] = apply_weight_i ((dpp+1)->weight_A, (dpp+1)->samples_A [m]) + (tmp = dpp->samples_A [k]); - update_weight ((dpp+1)->weight_A, (dpp+1)->delta, (dpp+1)->samples_A [m], tmp); - - dpp->samples_B [k] = apply_weight_i (dpp->weight_B, dpp->samples_B [m]) + (tmp = bptr [1]); - update_weight (dpp->weight_B, dpp->delta, dpp->samples_B [m], tmp); - - bptr [1] = (dpp+1)->samples_B [j] = apply_weight_i ((dpp+1)->weight_B, (dpp+1)->samples_B [m]) + (tmp = dpp->samples_B [k]); - update_weight ((dpp+1)->weight_B, (dpp+1)->delta, (dpp+1)->samples_B [m], tmp); - - m = (m + 1) & (MAX_TERM - 1); - k = (k + 1) & (MAX_TERM - 1); - j = (j + 1) & (MAX_TERM - 1); - } -} - -#endif - // This is a helper function for unpack_samples() that applies several final // operations. First, if the data is 32-bit float data, then that conversion // is done in the float.c module (whether lossy or lossless) and we return. @@ -1413,5 +815,3 @@ int check_crc_error (WavpackContext *wpc) return result; } - -#endif diff --git a/third_party/wavpack/src/unpack3.c b/third_party/wavpack/src/unpack3.c index d877a79..dccf085 100644 --- a/third_party/wavpack/src/unpack3.c +++ b/third_party/wavpack/src/unpack3.c @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////////////////////// // **** WAVPACK **** // // Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // +// Copyright (c) 1998 - 2013 Conifer Software. // // All Rights Reserved. // // Distributed under the BSD Software License (see license.txt) // //////////////////////////////////////////////////////////////////////////// @@ -12,387 +12,24 @@ // not including "raw" files. As these modes are all obsolete and are no // longer written, this code will not be fully documented other than the // global functions. However, full documenation is provided in the version -// 3.97 source code. +// 3.97 source code. Note that this module does only the low-level sample +// unpacking; the actual opening of the file (and obtaining information +// from it) is handled in the unpack3_open.c module. + +#ifdef ENABLE_LEGACY #include -#include #include -#include #include "wavpack_local.h" #include "unpack3.h" #define ATTEMPT_ERROR_MUTING -#ifdef DEBUG_ALLOC -#define malloc malloc_db -#define realloc realloc_db -#define free free_db -void *malloc_db (uint32_t size); -void *realloc_db (void *ptr, uint32_t size); -void free_db (void *ptr); -int32_t dump_alloc (void); -#endif - -static void unpack_init3 (WavpackStream3 *wps); -static int bs_open_read3 (Bitstream3 *bs, WavpackStreamReader *reader, void *id); -static void bs_close_read3 (Bitstream3 *bs); -#ifndef NO_SEEKING -static void bs_restore3 (Bitstream3 *bs); -#endif - -// This provides an extension to the WavpackOpenFileRead () function contained -// in the wputils.c module. It is assumed that an 'R' had been read as the -// first character of the file/stream (indicating a non-raw pre version 4.0 -// WavPack file) and had been pushed back onto the stream (or simply seeked -// back to). - -WavpackContext *open_file3 (WavpackContext *wpc, char *error) -{ - RiffChunkHeader RiffChunkHeader; - ChunkHeader ChunkHeader; - WavpackHeader3 wphdr; - WavpackStream3 *wps; - WaveHeader3 wavhdr; - - CLEAR (wavhdr); - wpc->stream3 = wps = (WavpackStream3 *) malloc (sizeof (WavpackStream3)); - CLEAR (*wps); - - if (wpc->reader->read_bytes (wpc->wv_in, &RiffChunkHeader, sizeof (RiffChunkHeader)) != - sizeof (RiffChunkHeader)) { - if (error) strcpy (error, "not a valid WavPack file!"); - return WavpackCloseFile (wpc); - } - - if (!strncmp (RiffChunkHeader.ckID, "RIFF", 4) && !strncmp (RiffChunkHeader.formType, "WAVE", 4)) { - - if (wpc->open_flags & OPEN_WRAPPER) { - wpc->wrapper_data = malloc (wpc->wrapper_bytes = sizeof (RiffChunkHeader)); - memcpy (wpc->wrapper_data, &RiffChunkHeader, sizeof (RiffChunkHeader)); - } - - // If the first chunk is a wave RIFF header, then read the various chunks - // until we get to the "data" chunk (and WavPack header should follow). If - // the first chunk is not a RIFF, then we assume a "raw" WavPack file and - // the WavPack header must be first. - - while (1) { - - if (wpc->reader->read_bytes (wpc->wv_in, &ChunkHeader, sizeof (ChunkHeader)) != - sizeof (ChunkHeader)) { - if (error) strcpy (error, "not a valid WavPack file!"); - return WavpackCloseFile (wpc); - } - else { - if (wpc->open_flags & OPEN_WRAPPER) { - wpc->wrapper_data = realloc (wpc->wrapper_data, wpc->wrapper_bytes + sizeof (ChunkHeader)); - memcpy (wpc->wrapper_data + wpc->wrapper_bytes, &ChunkHeader, sizeof (ChunkHeader)); - wpc->wrapper_bytes += sizeof (ChunkHeader); - } - - little_endian_to_native (&ChunkHeader, ChunkHeaderFormat); - - if (!strncmp (ChunkHeader.ckID, "fmt ", 4)) { - - if (ChunkHeader.ckSize < sizeof (wavhdr) || - wpc->reader->read_bytes (wpc->wv_in, &wavhdr, sizeof (wavhdr)) != sizeof (wavhdr)) { - if (error) strcpy (error, "not a valid WavPack file!"); - return WavpackCloseFile (wpc); - } - else if (wpc->open_flags & OPEN_WRAPPER) { - wpc->wrapper_data = realloc (wpc->wrapper_data, wpc->wrapper_bytes + sizeof (wavhdr)); - memcpy (wpc->wrapper_data + wpc->wrapper_bytes, &wavhdr, sizeof (wavhdr)); - wpc->wrapper_bytes += sizeof (wavhdr); - } - - little_endian_to_native (&wavhdr, WaveHeader3Format); - - if (ChunkHeader.ckSize > sizeof (wavhdr)) { - uint32_t bytes_to_skip = (ChunkHeader.ckSize + 1 - sizeof (wavhdr)) & ~1L; - - if (bytes_to_skip > 1024 * 1024) { - if (error) strcpy (error, "not a valid WavPack file!"); - return WavpackCloseFile (wpc); - } - - if (wpc->open_flags & OPEN_WRAPPER) { - wpc->wrapper_data = realloc (wpc->wrapper_data, wpc->wrapper_bytes + bytes_to_skip); - wpc->reader->read_bytes (wpc->wv_in, wpc->wrapper_data + wpc->wrapper_bytes, bytes_to_skip); - wpc->wrapper_bytes += bytes_to_skip; - } - else { - unsigned char *temp = malloc (bytes_to_skip); - wpc->reader->read_bytes (wpc->wv_in, temp, bytes_to_skip); - free (temp); - } - } - } - else if (!strncmp (ChunkHeader.ckID, "data", 4)) - break; - else if ((ChunkHeader.ckSize + 1) & ~1L) { - uint32_t bytes_to_skip = (ChunkHeader.ckSize + 1) & ~1L; - - if (bytes_to_skip > 1024 * 1024) { - if (error) strcpy (error, "not a valid WavPack file!"); - return WavpackCloseFile (wpc); - } - - if (wpc->open_flags & OPEN_WRAPPER) { - wpc->wrapper_data = realloc (wpc->wrapper_data, wpc->wrapper_bytes + bytes_to_skip); - wpc->reader->read_bytes (wpc->wv_in, wpc->wrapper_data + wpc->wrapper_bytes, bytes_to_skip); - wpc->wrapper_bytes += bytes_to_skip; - } - else { - unsigned char *temp = malloc (bytes_to_skip); - wpc->reader->read_bytes (wpc->wv_in, temp, bytes_to_skip); - free (temp); - } - } - } - } - } - else { - if (error) strcpy (error, "not a valid WavPack file!"); - return WavpackCloseFile (wpc); - } - - if (wavhdr.FormatTag != 1 || !wavhdr.NumChannels || wavhdr.NumChannels > 2 || - !wavhdr.SampleRate || wavhdr.BitsPerSample < 16 || wavhdr.BitsPerSample > 24 || - wavhdr.BlockAlign / wavhdr.NumChannels > 3 || wavhdr.BlockAlign % wavhdr.NumChannels || - wavhdr.BlockAlign / wavhdr.NumChannels < (wavhdr.BitsPerSample + 7) / 8) { - if (error) strcpy (error, "not a valid WavPack file!"); - return WavpackCloseFile (wpc); - } - - wpc->total_samples = ChunkHeader.ckSize / wavhdr.NumChannels / - ((wavhdr.BitsPerSample > 16) ? 3 : 2); - - if (wpc->reader->read_bytes (wpc->wv_in, &wphdr, 10) != 10) { - if (error) strcpy (error, "not a valid WavPack file!"); - return WavpackCloseFile (wpc); - } - - if (((char *) &wphdr) [8] == 2 && (wpc->reader->read_bytes (wpc->wv_in, ((char *) &wphdr) + 10, 2) != 2)) { - if (error) strcpy (error, "not a valid WavPack file!"); - return WavpackCloseFile (wpc); - } - else if (((char *) &wphdr) [8] == 3 && (wpc->reader->read_bytes (wpc->wv_in, ((char *) &wphdr) + 10, - sizeof (wphdr) - 10) != sizeof (wphdr) - 10)) { - if (error) strcpy (error, "not a valid WavPack file!"); - return WavpackCloseFile (wpc); - } - - little_endian_to_native (&wphdr, WavpackHeader3Format); - - // make sure this is a version we know about - - if (strncmp (wphdr.ckID, "wvpk", 4) || wphdr.version < 1 || wphdr.version > 3) { - if (error) strcpy (error, "not a valid WavPack file!"); - return WavpackCloseFile (wpc); - } - - // Because I ran out of flag bits in the WavPack header, an amazingly ugly - // kludge was forced upon me! This code takes care of preparing the flags - // field for internal use and checking for unknown formats we can't decode - - if (wphdr.version == 3) { - - if (wphdr.flags & EXTREME_DECORR) { - - if ((wphdr.flags & NOT_STORED_FLAGS) || - ((wphdr.bits) && - (((wphdr.flags & NEW_HIGH_FLAG) && - (wphdr.flags & (FAST_FLAG | HIGH_FLAG))) || - (wphdr.flags & CROSS_DECORR)))) { - if (error) strcpy (error, "not a valid WavPack file!"); - return WavpackCloseFile (wpc); - } - - if (wphdr.flags & CANCEL_EXTREME) - wphdr.flags &= ~(EXTREME_DECORR | CANCEL_EXTREME); - } - else - wphdr.flags &= ~CROSS_DECORR; - } - - // check to see if we should look for a "correction" file, and if so try - // to open it for reading, then set WVC_FLAG accordingly - - if (wpc->wvc_in && wphdr.version == 3 && wphdr.bits && (wphdr.flags & NEW_HIGH_FLAG)) { - wpc->file2len = wpc->reader->get_length (wpc->wvc_in); - wphdr.flags |= WVC_FLAG; - wpc->wvc_flag = TRUE; - } - else - wphdr.flags &= ~WVC_FLAG; - - // check WavPack version to handle special requirements of versions - // before 3.0 that had smaller headers - - if (wphdr.version < 3) { - wphdr.total_samples = wpc->total_samples; - wphdr.flags = wavhdr.NumChannels == 1 ? MONO_FLAG : 0; - wphdr.shift = 16 - wavhdr.BitsPerSample; - - if (wphdr.version == 1) - wphdr.bits = 0; - } - - wpc->config.sample_rate = wavhdr.SampleRate; - wpc->config.num_channels = wavhdr.NumChannels; - wpc->config.channel_mask = 5 - wavhdr.NumChannels; - - if (wphdr.flags & MONO_FLAG) - wpc->config.flags |= CONFIG_MONO_FLAG; - - if (wphdr.flags & EXTREME_DECORR) - wpc->config.flags |= CONFIG_HIGH_FLAG; - - if (wphdr.bits) { - if (wphdr.flags & NEW_HIGH_FLAG) - wpc->config.flags |= CONFIG_HYBRID_FLAG; - else - wpc->config.flags |= CONFIG_LOSSY_MODE; - } - else if (!(wphdr.flags & HIGH_FLAG)) - wpc->config.flags |= CONFIG_FAST_FLAG; - - wpc->config.bytes_per_sample = (wphdr.flags & BYTES_3) ? 3 : 2; - wpc->config.bits_per_sample = wavhdr.BitsPerSample; - - memcpy (&wps->wphdr, &wphdr, sizeof (wphdr)); - wps->wvbits.bufsiz = wps->wvcbits.bufsiz = 1024 * 1024; - return wpc; -} - -// return currently decoded sample index - -uint32_t get_sample_index3 (WavpackContext *wpc) -{ - WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3; - - return (wps) ? wps->sample_index : (uint32_t) -1; -} - -int get_version3 (WavpackContext *wpc) -{ - WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3; - - return (wps) ? wps->wphdr.version : 0; -} - -void free_stream3 (WavpackContext *wpc) -{ - WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3; - - if (wps) { -#ifndef NO_SEEKING - if (wps->unpack_data) - free (wps->unpack_data); -#endif - if (wps->wphdr.flags & WVC_FLAG) - bs_close_read3 (&wps->wvcbits); - - bs_close_read3 (&wps->wvbits); - - free (wps); - } -} - -static void bs_read3 (Bitstream3 *bs) -{ - uint32_t bytes_read; - - bytes_read = bs->reader->read_bytes (bs->id, bs->buf, bs->bufsiz); - bs->end = bs->buf + bytes_read; - bs->fpos += bytes_read; - - if (bs->end == bs->buf) { - memset (bs->buf, -1, bs->bufsiz); - bs->end += bs->bufsiz; - } - - bs->ptr = bs->buf; -} - -// Open the specified BitStream and associate with the specified file. The -// "bufsiz" field of the structure must be preset with the desired buffer -// size and the file's read pointer must be set to where the desired bit -// data is located. A return value of TRUE indicates an error in -// allocating buffer space. - -static int bs_open_read3 (Bitstream3 *bs, WavpackStreamReader *reader, void *id) -{ - bs->fpos = (bs->reader = reader)->get_pos (bs->id = id); - - if (!bs->buf) - bs->buf = (unsigned char *) malloc (bs->bufsiz); - - bs->end = bs->buf + bs->bufsiz; - bs->ptr = bs->end - 1; - bs->sr = bs->bc = 0; - bs->error = bs->buf ? 0 : 1; - bs->wrap = bs_read3; - return bs->error; -} - -#ifndef NO_SEEKING - -// This function is called after a call to unpack_restore() has restored -// the BitStream structure to a previous state and causes any required data -// to be read from the file. This function is NOT supported for overlapped -// operation. - -static void bs_restore3 (Bitstream3 *bs) -{ - uint32_t bytes_to_read = (uint32_t)(bs->end - bs->ptr - 1), bytes_read; - - bs->reader->set_pos_abs (bs->id, bs->fpos - bytes_to_read); - - if (bytes_to_read > 0) { - - bytes_read = bs->reader->read_bytes (bs->id, bs->ptr + 1, bytes_to_read); - - if (bytes_to_read != bytes_read) - bs->end = bs->ptr + 1 + bytes_read; - } -} - -#endif - -// This function is called to release any resources used by the BitStream -// and position the file pointer to the first byte past the read bits. - -static void bs_close_read3 (Bitstream3 *bs) -{ - if (bs->buf) { - free (bs->buf); - CLEAR (*bs); - } -} - -static uint32_t bs_unused_bytes (Bitstream3 *bs) -{ - if (bs->bc < 8) { - bs->bc += 8; - bs->ptr++; - } - - return (uint32_t)(bs->end - bs->ptr); -} - -static unsigned char *bs_unused_data (Bitstream3 *bs) -{ - if (bs->bc < 8) { - bs->bc += 8; - bs->ptr++; - } - - return bs->ptr; -} - -#ifndef NO_UNPACK +static int bs_open_read3 (Bitstream3 *bs, WavpackStreamReader64 *reader, void *id); +static uint32_t bs_unused_bytes (Bitstream3 *bs); +static unsigned char *bs_unused_data (Bitstream3 *bs); +static void init_words3 (WavpackStream3 *wps); //////////////////////////////// local macros ///////////////////////////////// @@ -426,13 +63,13 @@ static const signed char extreme_terms [] = { 1,1,1,2,4,-1,1,2,3,6,-2,8,5,7,4,1, static const signed char default_terms [] = { 1,1,1,-1,2,1,-2 }; static const signed char simple_terms [] = { 1,1,1,1 }; +///////////////////////////// executable code //////////////////////////////// + // This function initializes everything required to unpack WavPack // bitstreams and must be called before any unpacking is performed. Note // that the (WavpackHeader3 *) in the WavpackStream3 struct must be valid. -static void init_words3 (WavpackStream3 *wps); - -static void unpack_init3 (WavpackStream3 *wps) +void unpack_init3 (WavpackStream3 *wps) { int flags = wps->wphdr.flags; struct decorr_pass *dpp; @@ -461,9 +98,6 @@ static void unpack_init3 (WavpackStream3 *wps) #ifndef NO_SEEKING -#define SAVE(destin, item) { memcpy (destin, &item, sizeof (item)); destin = (char *) destin + sizeof (item); } -#define RESTORE(item, source) { memcpy (&item, source, sizeof (item)); source = (char *) source + sizeof (item); } - // This function returns the size (in bytes) required to save the unpacking // context. Note that the (WavpackHeader3 *) in the WavpackStream3 struct // must be valid. @@ -504,7 +138,7 @@ static int unpack_size (WavpackStream3 *wps) } if (flags & (HIGH_FLAG | NEW_HIGH_FLAG)) - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { if (dpp->term > 0) { byte_sum += sizeof (dpp->samples_A [0]) * dpp->term; byte_sum += sizeof (dpp->weight_A); @@ -518,6 +152,7 @@ static int unpack_size (WavpackStream3 *wps) byte_sum += sizeof (dpp->samples_A [0]) + sizeof (dpp->samples_B [0]); byte_sum += sizeof (dpp->weight_A) + sizeof (dpp->weight_B); } + } return byte_sum; } @@ -572,7 +207,7 @@ static void *unpack_save (WavpackStream3 *wps, void *destin) } if (flags & (HIGH_FLAG | NEW_HIGH_FLAG)) - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { if (dpp->term > 0) { int count = dpp->term; int index = wps->dc.m; @@ -602,177 +237,11 @@ static void *unpack_save (WavpackStream3 *wps, void *destin) SAVE (destin, dpp->samples_A [0]); SAVE (destin, dpp->samples_B [0]); } + } return destin; } -// This function restores the unpacking context from the specified pointer -// and returns the updated pointer. After this call, unpack_samples() will -// continue where it left off immediately before unpack_save() was called. -// If the WavPack files and bitstreams might have been closed and reopened, -// then the "keep_resources" flag should be set to avoid using the "old" -// resources that were originally saved (and are probably now invalid). - -static void *unpack_restore (WavpackStream3 *wps, void *source, int keep_resources) -{ - int flags = wps->wphdr.flags, tcount; - struct decorr_pass *dpp; - FILE *temp_file; - unsigned char *temp_buf; - - unpack_init3 (wps); - temp_file = wps->wvbits.id; - temp_buf = wps->wvbits.buf; - RESTORE (wps->wvbits, source); - - if (keep_resources) { - wps->wvbits.id = temp_file; - wps->wvbits.ptr += temp_buf - wps->wvbits.buf; - wps->wvbits.end += temp_buf - wps->wvbits.buf; - wps->wvbits.buf = temp_buf; - } - - bs_restore3 (&wps->wvbits); - - if (flags & WVC_FLAG) { - temp_file = wps->wvcbits.id; - temp_buf = wps->wvcbits.buf; - RESTORE (wps->wvcbits, source); - - if (keep_resources) { - wps->wvcbits.id = temp_file; - wps->wvcbits.ptr += temp_buf - wps->wvcbits.buf; - wps->wvcbits.end += temp_buf - wps->wvcbits.buf; - wps->wvcbits.buf = temp_buf; - } - - bs_restore3 (&wps->wvcbits); - } - - if (wps->wphdr.version == 3) { - if (wps->wphdr.bits) { - RESTORE (wps->w4, source); - } - else { - RESTORE (wps->w1, source); - } - - RESTORE (wps->w3, source); - RESTORE (wps->dc.crc, source); - } - else - RESTORE (wps->w2, source); - - if (wps->wphdr.bits) { - RESTORE (wps->dc.error, source); - } - else { - RESTORE (wps->dc.sum_level, source); - RESTORE (wps->dc.left_level, source); - RESTORE (wps->dc.right_level, source); - RESTORE (wps->dc.diff_level, source); - } - - if (flags & OVER_20) { - RESTORE (wps->dc.last_extra_bits, source); - RESTORE (wps->dc.extra_bits_count, source); - } - - if (!(flags & EXTREME_DECORR)) { - RESTORE (wps->dc.sample, source); - RESTORE (wps->dc.weight, source); - } - - if (flags & (HIGH_FLAG | NEW_HIGH_FLAG)) - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) - if (dpp->term > 0) { - int count = dpp->term; - int index = wps->dc.m; - - RESTORE (dpp->weight_A, source); - - while (count--) { - RESTORE (dpp->samples_A [index], source); - index = (index + 1) & (MAX_TERM - 1); - } - - if (!(flags & MONO_FLAG)) { - count = dpp->term; - index = wps->dc.m; - - RESTORE (dpp->weight_B, source); - - while (count--) { - RESTORE (dpp->samples_B [index], source); - index = (index + 1) & (MAX_TERM - 1); - } - } - } - else { - RESTORE (dpp->weight_A, source); - RESTORE (dpp->weight_B, source); - RESTORE (dpp->samples_A [0], source); - RESTORE (dpp->samples_B [0], source); - } - - return source; -} - -// This is an extension for WavpackSeekSample (). Note that because WavPack -// files created prior to version 4.0 are not inherently seekable, this -// function could take a long time if a forward seek is requested to an -// area that has not been played (or seeked through) yet. - - -int seek_sample3 (WavpackContext *wpc, uint32_t desired_index) -{ - int points_index = desired_index / ((wpc->total_samples >> 8) + 1); - WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3; - - if (desired_index >= wpc->total_samples) - return FALSE; - - while (points_index) - if (wps->index_points [points_index].saved && - wps->index_points [points_index].sample_index <= desired_index) - break; - else - points_index--; - - if (wps->index_points [points_index].saved) - if (wps->index_points [points_index].sample_index > wps->sample_index || - wps->sample_index > desired_index) { - wps->sample_index = wps->index_points [points_index].sample_index; - unpack_restore (wps, wps->unpack_data + points_index * wps->unpack_size, TRUE); - } - - if (desired_index > wps->sample_index) { - int32_t *buffer = (int32_t *) malloc (1024 * (wps->wphdr.flags & MONO_FLAG ? 4 : 8)); - uint32_t samples_to_skip = desired_index - wps->sample_index; - - while (1) { - if (samples_to_skip > 1024) { - if (unpack_samples3 (wpc, buffer, 1024) == 1024) - samples_to_skip -= 1024; - else - break; - } - else { - samples_to_skip -= unpack_samples3 (wpc, buffer, samples_to_skip); - break; - } - } - - free (buffer); - - if (samples_to_skip) - return FALSE; - } - - return TRUE; -} - - #endif // This monster actually unpacks the WavPack bitstream(s) into the specified @@ -797,7 +266,7 @@ int32_t unpack_samples3 (WavpackContext *wpc, int32_t *buffer, uint32_t sample_c WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3; int shift = wps->wphdr.shift, flags = wps->wphdr.flags, min_weight = 0, m = wps->dc.m, tcount; #ifndef NO_SEEKING - int points_index = wps->sample_index / ((wpc->total_samples >> 8) + 1); + int points_index = wps->sample_index / (((uint32_t) wpc->total_samples >> 8) + 1); #endif int32_t min_value, max_value, min_shifted, max_shifted; int32_t correction [2], crc = wps->dc.crc; @@ -820,7 +289,7 @@ int32_t unpack_samples3 (WavpackContext *wpc, int32_t *buffer, uint32_t sample_c #endif if (wps->sample_index + sample_count > wpc->total_samples) - sample_count = wpc->total_samples - wps->sample_index; + sample_count = (uint32_t) (wpc->total_samples - wps->sample_index); if (!sample_count) return 0; @@ -1705,22 +1174,22 @@ int32_t unpack_samples3 (WavpackContext *wpc, int32_t *buffer, uint32_t sample_c wpc->crc_errors++; if (wpc->open_flags & OPEN_WRAPPER) { - unsigned char *temp = malloc (1024); + unsigned char *temp = (unsigned char *)malloc (1024); uint32_t bcount; if (bs_unused_bytes (&wps->wvbits)) { - wpc->wrapper_data = realloc (wpc->wrapper_data, wpc->wrapper_bytes + bs_unused_bytes (&wps->wvbits)); + wpc->wrapper_data = (unsigned char *)realloc (wpc->wrapper_data, wpc->wrapper_bytes + bs_unused_bytes (&wps->wvbits)); memcpy (wpc->wrapper_data + wpc->wrapper_bytes, bs_unused_data (&wps->wvbits), bs_unused_bytes (&wps->wvbits)); wpc->wrapper_bytes += bs_unused_bytes (&wps->wvbits); } while (1) { - bcount = wpc->reader->read_bytes (wpc->wv_in, temp, sizeof (temp)); + bcount = wpc->reader->read_bytes (wpc->wv_in, temp, 1024); if (!bcount) break; - wpc->wrapper_data = realloc (wpc->wrapper_data, wpc->wrapper_bytes + bcount); + wpc->wrapper_data = (unsigned char *)realloc (wpc->wrapper_data, wpc->wrapper_bytes + bcount); memcpy (wpc->wrapper_data + wpc->wrapper_bytes, temp, bcount); wpc->wrapper_bytes += bcount; } @@ -1733,7 +1202,7 @@ int32_t unpack_samples3 (WavpackContext *wpc, int32_t *buffer, uint32_t sample_c for (c = 0; c < 16 && wpc->wrapper_data [c] == 0xff; ++c); if (c == 16) { - memcpy (wpc->wrapper_data, wpc->wrapper_data + 16, wpc->wrapper_bytes - 16); + memmove (wpc->wrapper_data, wpc->wrapper_data + 16, wpc->wrapper_bytes - 16); wpc->wrapper_bytes -= 16; } else { @@ -1753,12 +1222,6 @@ int32_t unpack_samples3 (WavpackContext *wpc, int32_t *buffer, uint32_t sample_c return i; } -///////////////////////////// local table storage //////////////////////////// - -extern const uint32_t bitset []; -extern const uint32_t bitmask []; -extern const char nbits_table []; - // This function initializes everything required to receive words with this // module and must be called BEFORE any other function in this module. @@ -1775,18 +1238,6 @@ static void init_words3 (WavpackStream3 *wps) wps->w4.bitrate = (wps->wphdr.bits / 2) - 768; } -// This macro counts the number of bits that are required to specify the -// unsigned 32-bit value, counting from the LSB to the most significant bit -// that is set. Return range is 0 - 32. - -#define count_bits(av) ( \ - (av) < (1 << 8) ? nbits_table [av] : \ - ( \ - (av) < (1L << 16) ? nbits_table [(av) >> 8] + 8 : \ - ((av) < (1L << 24) ? nbits_table [(av) >> 16] + 16 : nbits_table [(av) >> 24] + 24) \ - ) \ -) - static int32_t FASTCALL get_word1 (WavpackStream3 *wps, int chan) { uint32_t tmp1, tmp2, avalue; @@ -2033,7 +1484,7 @@ static int32_t FASTCALL get_word3 (WavpackStream3 *wps, int chan) } } -static int FASTCALL _log2 (uint32_t avalue); +static int FASTCALL wp3_log2 (uint32_t avalue); static int32_t FASTCALL get_word4 (WavpackStream3 *wps, int chan, int32_t *correction) { @@ -2076,22 +1527,22 @@ static int32_t FASTCALL get_word4 (WavpackStream3 *wps, int chan, int32_t *corre int slow_log_0, slow_log_1, balance; if (wps->wphdr.flags & MONO_FLAG) { - wps->w4.bits_acc [0] += wps->w4.bitrate + _log2 (wps->w4.fast_level [0]) - _log2 (wps->w4.slow_level [0]) + (3 << 8); + wps->w4.bits_acc [0] += wps->w4.bitrate + wp3_log2 (wps->w4.fast_level [0]) - wp3_log2 (wps->w4.slow_level [0]) + (3 << 8); if (wps->w4.bits_acc [0] < 0) wps->w4.bits_acc [0] = 0; } else { - slow_log_0 = _log2 (wps->w4.slow_level [0]); - slow_log_1 = _log2 (wps->w4.slow_level [1]); + slow_log_0 = wp3_log2 (wps->w4.slow_level [0]); + slow_log_1 = wp3_log2 (wps->w4.slow_level [1]); if (wps->wphdr.flags & JOINT_STEREO) balance = (slow_log_1 - slow_log_0 + 257) >> 1; else balance = (slow_log_1 - slow_log_0 + 1) >> 1; - wps->w4.bits_acc [0] += wps->w4.bitrate - balance + _log2 (wps->w4.fast_level [0]) - slow_log_0 + (3 << 8); - wps->w4.bits_acc [1] += wps->w4.bitrate + balance + _log2 (wps->w4.fast_level [1]) - slow_log_1 + (3 << 8); + wps->w4.bits_acc [0] += wps->w4.bitrate - balance + wp3_log2 (wps->w4.fast_level [0]) - slow_log_0 + (3 << 8); + wps->w4.bits_acc [1] += wps->w4.bitrate + balance + wp3_log2 (wps->w4.fast_level [1]) - slow_log_1 + (3 << 8); if (wps->w4.bits_acc [0] + wps->w4.bits_acc [1] < 0) wps->w4.bits_acc [0] = wps->w4.bits_acc [1] = 0; @@ -2171,7 +1622,7 @@ static int32_t FASTCALL get_word4 (WavpackStream3 *wps, int chan, int32_t *corre // fraction) from the supplied value. Using logarithms makes comparing // signal level values and calculating fractional bitrates much easier. -static int FASTCALL _log2 (uint32_t avalue) +static int FASTCALL wp3_log2 (uint32_t avalue) { int dbits; @@ -2191,5 +1642,62 @@ static int FASTCALL _log2 (uint32_t avalue) } } -#endif +static void bs_read3 (Bitstream3 *bs) +{ + uint32_t bytes_read; + + bytes_read = bs->reader->read_bytes (bs->id, bs->buf, bs->bufsiz); + bs->end = bs->buf + bytes_read; + bs->fpos += bytes_read; + + if (bs->end == bs->buf) { + memset (bs->buf, -1, bs->bufsiz); + bs->end += bs->bufsiz; + } + + bs->ptr = bs->buf; +} + +// Open the specified BitStream and associate with the specified file. The +// "bufsiz" field of the structure must be preset with the desired buffer +// size and the file's read pointer must be set to where the desired bit +// data is located. A return value of TRUE indicates an error in +// allocating buffer space. + +static int bs_open_read3 (Bitstream3 *bs, WavpackStreamReader64 *reader, void *id) +{ + bs->fpos = (bs->reader = reader)->get_pos (bs->id = id); + + if (!bs->buf) + bs->buf = (unsigned char *) malloc (bs->bufsiz); + + bs->end = bs->buf + bs->bufsiz; + bs->ptr = bs->end - 1; + bs->sr = bs->bc = 0; + bs->error = bs->buf ? 0 : 1; + bs->wrap = bs_read3; + return bs->error; +} + +static uint32_t bs_unused_bytes (Bitstream3 *bs) +{ + if (bs->bc < 8) { + bs->bc += 8; + bs->ptr++; + } + + return (uint32_t)(bs->end - bs->ptr); +} + +static unsigned char *bs_unused_data (Bitstream3 *bs) +{ + if (bs->bc < 8) { + bs->bc += 8; + bs->ptr++; + } + + return bs->ptr; +} + +#endif // ENABLE_LEGACY diff --git a/third_party/wavpack/src/unpack3.h b/third_party/wavpack/src/unpack3.h index cf3ca0e..ae351f8 100644 --- a/third_party/wavpack/src/unpack3.h +++ b/third_party/wavpack/src/unpack3.h @@ -12,9 +12,9 @@ // decoding old (versions 1, 2 & 3) WavPack files. typedef struct { - unsigned short FormatTag, NumChannels; + uint16_t FormatTag, NumChannels; uint32_t SampleRate, BytesPerSecond; - unsigned short BlockAlign, BitsPerSample; + uint16_t BlockAlign, BitsPerSample; } WaveHeader3; #define WaveHeader3Format "SSLLSS" @@ -22,9 +22,9 @@ typedef struct { typedef struct { char ckID [4]; int32_t ckSize; - short version; - short bits; // added for version 2.00 - short flags, shift; // added for version 3.00 + int16_t version; + int16_t bits; // added for version 2.00 + int16_t flags, shift; // added for version 3.00 int32_t total_samples, crc, crc2; char extension [4], extra_bc, extras [3]; } WavpackHeader3; @@ -62,8 +62,9 @@ typedef struct { typedef struct bs3 { void (*wrap)(struct bs3 *bs); unsigned char *buf, *end, *ptr; - uint32_t bufsiz, fpos, sr; - WavpackStreamReader *reader; + uint32_t bufsiz, sr; + int64_t fpos; + WavpackStreamReader64 *reader; int error, bc; void *id; } Bitstream3; @@ -111,3 +112,8 @@ typedef struct { int bits_acc [2], bitrate; } w4; } WavpackStream3; + +#define SAVE(destin, item) { memcpy (destin, &item, sizeof (item)); destin = (char *) destin + sizeof (item); } +#define RESTORE(item, source) { memcpy (&item, source, sizeof (item)); source = (char *) source + sizeof (item); } + +void unpack_init3 (WavpackStream3 *wps); diff --git a/third_party/wavpack/src/unpack3_open.c b/third_party/wavpack/src/unpack3_open.c new file mode 100644 index 0000000..1572aaf --- /dev/null +++ b/third_party/wavpack/src/unpack3_open.c @@ -0,0 +1,289 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// unpack3_open.c + +// This module provides an extension to the open_utils.c module for handling +// WavPack files prior to version 4.0, not including "raw" files. As these +// modes are all obsolete and are no longer written, this code will not be +// fully documented other than the global functions. However, full documenation +// is provided in the version 3.97 source code. Note that this module only +// provides the functionality of opening the files and obtaining information +// from them; the actual audio decoding is located in the unpack3.c module. + +#ifdef ENABLE_LEGACY + +#include +#include + +#include "wavpack_local.h" +#include "unpack3.h" + +#define ATTEMPT_ERROR_MUTING + +// This provides an extension to the WavpackOpenFileRead () function contained +// in the wputils.c module. It is assumed that an 'R' had been read as the +// first character of the file/stream (indicating a non-raw pre version 4.0 +// WavPack file) and had been pushed back onto the stream (or simply seeked +// back to). + +WavpackContext *open_file3 (WavpackContext *wpc, char *error) +{ + RiffChunkHeader RiffChunkHeader; + ChunkHeader ChunkHeader; + WavpackHeader3 wphdr; + WavpackStream3 *wps; + WaveHeader3 wavhdr; + + CLEAR (wavhdr); + wpc->stream3 = wps = (WavpackStream3 *) malloc (sizeof (WavpackStream3)); + CLEAR (*wps); + + if (wpc->reader->read_bytes (wpc->wv_in, &RiffChunkHeader, sizeof (RiffChunkHeader)) != + sizeof (RiffChunkHeader)) { + if (error) strcpy (error, "not a valid WavPack file!"); + return WavpackCloseFile (wpc); + } + + if (!strncmp (RiffChunkHeader.ckID, "RIFF", 4) && !strncmp (RiffChunkHeader.formType, "WAVE", 4)) { + + if (wpc->open_flags & OPEN_WRAPPER) { + wpc->wrapper_data = (unsigned char *)malloc (wpc->wrapper_bytes = sizeof (RiffChunkHeader)); + memcpy (wpc->wrapper_data, &RiffChunkHeader, sizeof (RiffChunkHeader)); + } + + // If the first chunk is a wave RIFF header, then read the various chunks + // until we get to the "data" chunk (and WavPack header should follow). If + // the first chunk is not a RIFF, then we assume a "raw" WavPack file and + // the WavPack header must be first. + + while (1) { + + if (wpc->reader->read_bytes (wpc->wv_in, &ChunkHeader, sizeof (ChunkHeader)) != + sizeof (ChunkHeader)) { + if (error) strcpy (error, "not a valid WavPack file!"); + return WavpackCloseFile (wpc); + } + else { + if (wpc->open_flags & OPEN_WRAPPER) { + wpc->wrapper_data = (unsigned char *)realloc (wpc->wrapper_data, wpc->wrapper_bytes + sizeof (ChunkHeader)); + memcpy (wpc->wrapper_data + wpc->wrapper_bytes, &ChunkHeader, sizeof (ChunkHeader)); + wpc->wrapper_bytes += sizeof (ChunkHeader); + } + + WavpackLittleEndianToNative (&ChunkHeader, ChunkHeaderFormat); + + if (!strncmp (ChunkHeader.ckID, "fmt ", 4)) { + + if (ChunkHeader.ckSize < sizeof (wavhdr) || + wpc->reader->read_bytes (wpc->wv_in, &wavhdr, sizeof (wavhdr)) != sizeof (wavhdr)) { + if (error) strcpy (error, "not a valid WavPack file!"); + return WavpackCloseFile (wpc); + } + else if (wpc->open_flags & OPEN_WRAPPER) { + wpc->wrapper_data = (unsigned char *)realloc (wpc->wrapper_data, wpc->wrapper_bytes + sizeof (wavhdr)); + memcpy (wpc->wrapper_data + wpc->wrapper_bytes, &wavhdr, sizeof (wavhdr)); + wpc->wrapper_bytes += sizeof (wavhdr); + } + + WavpackLittleEndianToNative (&wavhdr, WaveHeader3Format); + + if (ChunkHeader.ckSize > sizeof (wavhdr)) { + uint32_t bytes_to_skip = (ChunkHeader.ckSize + 1 - sizeof (wavhdr)) & ~1L; + + if (bytes_to_skip > 1024 * 1024) { + if (error) strcpy (error, "not a valid WavPack file!"); + return WavpackCloseFile (wpc); + } + + if (wpc->open_flags & OPEN_WRAPPER) { + wpc->wrapper_data = (unsigned char *)realloc (wpc->wrapper_data, wpc->wrapper_bytes + bytes_to_skip); + wpc->reader->read_bytes (wpc->wv_in, wpc->wrapper_data + wpc->wrapper_bytes, bytes_to_skip); + wpc->wrapper_bytes += bytes_to_skip; + } + else { + unsigned char *temp = (unsigned char *)malloc (bytes_to_skip); + wpc->reader->read_bytes (wpc->wv_in, temp, bytes_to_skip); + free (temp); + } + } + } + else if (!strncmp (ChunkHeader.ckID, "data", 4)) + break; + else if ((ChunkHeader.ckSize + 1) & ~1L) { + uint32_t bytes_to_skip = (ChunkHeader.ckSize + 1) & ~1L; + + if (bytes_to_skip > 1024 * 1024) { + if (error) strcpy (error, "not a valid WavPack file!"); + return WavpackCloseFile (wpc); + } + + if (wpc->open_flags & OPEN_WRAPPER) { + wpc->wrapper_data = (unsigned char *)realloc (wpc->wrapper_data, wpc->wrapper_bytes + bytes_to_skip); + wpc->reader->read_bytes (wpc->wv_in, wpc->wrapper_data + wpc->wrapper_bytes, bytes_to_skip); + wpc->wrapper_bytes += bytes_to_skip; + } + else { + unsigned char *temp = (unsigned char *)malloc (bytes_to_skip); + wpc->reader->read_bytes (wpc->wv_in, temp, bytes_to_skip); + free (temp); + } + } + } + } + } + else { + if (error) strcpy (error, "not a valid WavPack file!"); + return WavpackCloseFile (wpc); + } + + if (wavhdr.FormatTag != 1 || !wavhdr.NumChannels || wavhdr.NumChannels > 2 || + !wavhdr.SampleRate || wavhdr.BitsPerSample < 16 || wavhdr.BitsPerSample > 24 || + wavhdr.BlockAlign / wavhdr.NumChannels > 3 || wavhdr.BlockAlign % wavhdr.NumChannels || + wavhdr.BlockAlign / wavhdr.NumChannels < (wavhdr.BitsPerSample + 7) / 8) { + if (error) strcpy (error, "not a valid WavPack file!"); + return WavpackCloseFile (wpc); + } + + wpc->total_samples = ChunkHeader.ckSize / wavhdr.NumChannels / + ((wavhdr.BitsPerSample > 16) ? 3 : 2); + + if (wpc->reader->read_bytes (wpc->wv_in, &wphdr, 10) != 10) { + if (error) strcpy (error, "not a valid WavPack file!"); + return WavpackCloseFile (wpc); + } + + if (((char *) &wphdr) [8] == 2 && (wpc->reader->read_bytes (wpc->wv_in, ((char *) &wphdr) + 10, 2) != 2)) { + if (error) strcpy (error, "not a valid WavPack file!"); + return WavpackCloseFile (wpc); + } + else if (((char *) &wphdr) [8] == 3 && (wpc->reader->read_bytes (wpc->wv_in, ((char *) &wphdr) + 10, + sizeof (wphdr) - 10) != sizeof (wphdr) - 10)) { + if (error) strcpy (error, "not a valid WavPack file!"); + return WavpackCloseFile (wpc); + } + + WavpackLittleEndianToNative (&wphdr, WavpackHeader3Format); + + // make sure this is a version we know about + + if (strncmp (wphdr.ckID, "wvpk", 4) || wphdr.version < 1 || wphdr.version > 3) { + if (error) strcpy (error, "not a valid WavPack file!"); + return WavpackCloseFile (wpc); + } + + // Because I ran out of flag bits in the WavPack header, an amazingly ugly + // kludge was forced upon me! This code takes care of preparing the flags + // field for internal use and checking for unknown formats we can't decode + + if (wphdr.version == 3) { + + if (wphdr.flags & EXTREME_DECORR) { + + if ((wphdr.flags & NOT_STORED_FLAGS) || + ((wphdr.bits) && + (((wphdr.flags & NEW_HIGH_FLAG) && + (wphdr.flags & (FAST_FLAG | HIGH_FLAG))) || + (wphdr.flags & CROSS_DECORR)))) { + if (error) strcpy (error, "not a valid WavPack file!"); + return WavpackCloseFile (wpc); + } + + if (wphdr.flags & CANCEL_EXTREME) + wphdr.flags &= ~(EXTREME_DECORR | CANCEL_EXTREME); + } + else + wphdr.flags &= ~CROSS_DECORR; + } + + // check to see if we should look for a "correction" file, and if so try + // to open it for reading, then set WVC_FLAG accordingly + + if (wpc->wvc_in && wphdr.version == 3 && wphdr.bits && (wphdr.flags & NEW_HIGH_FLAG)) { + wpc->file2len = wpc->reader->get_length (wpc->wvc_in); + wphdr.flags |= WVC_FLAG; + wpc->wvc_flag = TRUE; + } + else + wphdr.flags &= ~WVC_FLAG; + + // check WavPack version to handle special requirements of versions + // before 3.0 that had smaller headers + + if (wphdr.version < 3) { + wphdr.total_samples = (int32_t) wpc->total_samples; + wphdr.flags = wavhdr.NumChannels == 1 ? MONO_FLAG : 0; + wphdr.shift = 16 - wavhdr.BitsPerSample; + + if (wphdr.version == 1) + wphdr.bits = 0; + } + + wpc->config.sample_rate = wavhdr.SampleRate; + wpc->config.num_channels = wavhdr.NumChannels; + wpc->config.channel_mask = 5 - wavhdr.NumChannels; + + if (wphdr.flags & MONO_FLAG) + wpc->config.flags |= CONFIG_MONO_FLAG; + + if (wphdr.flags & EXTREME_DECORR) + wpc->config.flags |= CONFIG_HIGH_FLAG; + + if (wphdr.bits) { + if (wphdr.flags & NEW_HIGH_FLAG) + wpc->config.flags |= CONFIG_HYBRID_FLAG; + else + wpc->config.flags |= CONFIG_LOSSY_MODE; + } + else if (!(wphdr.flags & HIGH_FLAG)) + wpc->config.flags |= CONFIG_FAST_FLAG; + + wpc->config.bytes_per_sample = (wphdr.flags & BYTES_3) ? 3 : 2; + wpc->config.bits_per_sample = wavhdr.BitsPerSample; + + memcpy (&wps->wphdr, &wphdr, sizeof (wphdr)); + wps->wvbits.bufsiz = wps->wvcbits.bufsiz = 1024 * 1024; + return wpc; +} + +// return currently decoded sample index + +uint32_t get_sample_index3 (WavpackContext *wpc) +{ + WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3; + + return (wps) ? wps->sample_index : (uint32_t) -1; +} + +int get_version3 (WavpackContext *wpc) +{ + WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3; + + return (wps) ? wps->wphdr.version : 0; +} + +void free_stream3 (WavpackContext *wpc) +{ + WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3; + + if (wps) { +#ifndef NO_SEEKING + if (wps->unpack_data) + free (wps->unpack_data); +#endif + if ((wps->wphdr.flags & WVC_FLAG) && wps->wvcbits.buf) + free (wps->wvcbits.buf); + + if (wps->wvbits.buf) + free (wps->wvbits.buf); + + free (wps); + } +} + +#endif // ENABLE_LEGACY diff --git a/third_party/wavpack/src/unpack3_seek.c b/third_party/wavpack/src/unpack3_seek.c new file mode 100644 index 0000000..f1ed27f --- /dev/null +++ b/third_party/wavpack/src/unpack3_seek.c @@ -0,0 +1,212 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// unpack3_seek.c + +// This module provides seeking support for WavPack files prior to version 4.0. + +#ifdef ENABLE_LEGACY +#ifndef NO_SEEKING + +#include +#include + +#include "wavpack_local.h" +#include "unpack3.h" + +static void *unpack_restore (WavpackStream3 *wps, void *source, int keep_resources); +static void bs_restore3 (Bitstream3 *bs); + +// This is an extension for WavpackSeekSample (). Note that because WavPack +// files created prior to version 4.0 are not inherently seekable, this +// function could take a long time if a forward seek is requested to an +// area that has not been played (or seeked through) yet. + +int seek_sample3 (WavpackContext *wpc, uint32_t desired_index) +{ + int points_index = desired_index / (((uint32_t) wpc->total_samples >> 8) + 1); + WavpackStream3 *wps = (WavpackStream3 *) wpc->stream3; + + if (desired_index >= wpc->total_samples) + return FALSE; + + while (points_index) + if (wps->index_points [points_index].saved && + wps->index_points [points_index].sample_index <= desired_index) + break; + else + points_index--; + + if (wps->index_points [points_index].saved) + if (wps->index_points [points_index].sample_index > wps->sample_index || + wps->sample_index > desired_index) { + wps->sample_index = wps->index_points [points_index].sample_index; + unpack_restore (wps, wps->unpack_data + points_index * wps->unpack_size, TRUE); + } + + if (desired_index > wps->sample_index) { + int32_t *buffer = (int32_t *) malloc (1024 * (wps->wphdr.flags & MONO_FLAG ? 4 : 8)); + uint32_t samples_to_skip = desired_index - wps->sample_index; + + while (1) { + if (samples_to_skip > 1024) { + if (unpack_samples3 (wpc, buffer, 1024) == 1024) + samples_to_skip -= 1024; + else + break; + } + else { + samples_to_skip -= unpack_samples3 (wpc, buffer, samples_to_skip); + break; + } + } + + free (buffer); + + if (samples_to_skip) + return FALSE; + } + + return TRUE; +} + +// This function restores the unpacking context from the specified pointer +// and returns the updated pointer. After this call, unpack_samples() will +// continue where it left off immediately before unpack_save() was called. +// If the WavPack files and bitstreams might have been closed and reopened, +// then the "keep_resources" flag should be set to avoid using the "old" +// resources that were originally saved (and are probably now invalid). + +static void *unpack_restore (WavpackStream3 *wps, void *source, int keep_resources) +{ + int flags = wps->wphdr.flags, tcount; + struct decorr_pass *dpp; + FILE *temp_file; + unsigned char *temp_buf; + + unpack_init3 (wps); + temp_file = wps->wvbits.id; + temp_buf = wps->wvbits.buf; + RESTORE (wps->wvbits, source); + + if (keep_resources) { + wps->wvbits.id = temp_file; + wps->wvbits.ptr += temp_buf - wps->wvbits.buf; + wps->wvbits.end += temp_buf - wps->wvbits.buf; + wps->wvbits.buf = temp_buf; + } + + bs_restore3 (&wps->wvbits); + + if (flags & WVC_FLAG) { + temp_file = wps->wvcbits.id; + temp_buf = wps->wvcbits.buf; + RESTORE (wps->wvcbits, source); + + if (keep_resources) { + wps->wvcbits.id = temp_file; + wps->wvcbits.ptr += temp_buf - wps->wvcbits.buf; + wps->wvcbits.end += temp_buf - wps->wvcbits.buf; + wps->wvcbits.buf = temp_buf; + } + + bs_restore3 (&wps->wvcbits); + } + + if (wps->wphdr.version == 3) { + if (wps->wphdr.bits) { + RESTORE (wps->w4, source); + } + else { + RESTORE (wps->w1, source); + } + + RESTORE (wps->w3, source); + RESTORE (wps->dc.crc, source); + } + else + RESTORE (wps->w2, source); + + if (wps->wphdr.bits) { + RESTORE (wps->dc.error, source); + } + else { + RESTORE (wps->dc.sum_level, source); + RESTORE (wps->dc.left_level, source); + RESTORE (wps->dc.right_level, source); + RESTORE (wps->dc.diff_level, source); + } + + if (flags & OVER_20) { + RESTORE (wps->dc.last_extra_bits, source); + RESTORE (wps->dc.extra_bits_count, source); + } + + if (!(flags & EXTREME_DECORR)) { + RESTORE (wps->dc.sample, source); + RESTORE (wps->dc.weight, source); + } + + if (flags & (HIGH_FLAG | NEW_HIGH_FLAG)) + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { + if (dpp->term > 0) { + int count = dpp->term; + int index = wps->dc.m; + + RESTORE (dpp->weight_A, source); + + while (count--) { + RESTORE (dpp->samples_A [index], source); + index = (index + 1) & (MAX_TERM - 1); + } + + if (!(flags & MONO_FLAG)) { + count = dpp->term; + index = wps->dc.m; + + RESTORE (dpp->weight_B, source); + + while (count--) { + RESTORE (dpp->samples_B [index], source); + index = (index + 1) & (MAX_TERM - 1); + } + } + } + else { + RESTORE (dpp->weight_A, source); + RESTORE (dpp->weight_B, source); + RESTORE (dpp->samples_A [0], source); + RESTORE (dpp->samples_B [0], source); + } + } + + return source; +} + +// This function is called after a call to unpack_restore() has restored +// the BitStream structure to a previous state and causes any required data +// to be read from the file. This function is NOT supported for overlapped +// operation. + +static void bs_restore3 (Bitstream3 *bs) +{ + uint32_t bytes_to_read = (uint32_t)(bs->end - bs->ptr - 1), bytes_read; + + bs->reader->set_pos_abs (bs->id, bs->fpos - bytes_to_read); + + if (bytes_to_read > 0) { + + bytes_read = bs->reader->read_bytes (bs->id, bs->ptr + 1, bytes_to_read); + + if (bytes_to_read != bytes_read) + bs->end = bs->ptr + 1 + bytes_read; + } +} + +#endif // NO_SEEKING +#endif // ENABLE_LEGACY diff --git a/third_party/wavpack/src/unpack_armv7.S b/third_party/wavpack/src/unpack_armv7.S new file mode 100644 index 0000000..f423de3 --- /dev/null +++ b/third_party/wavpack/src/unpack_armv7.S @@ -0,0 +1,887 @@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@ **** WAVPACK **** @@ +@@ Hybrid Lossless Wavefile Compressor @@ +@@ Copyright (c) 1998 - 2015 Conifer Software. @@ +@@ All Rights Reserved. @@ +@@ Distributed under the BSD Software License (see license.txt) @@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + + .text + .align + .global unpack_decorr_stereo_pass_cont_armv7 + .global unpack_decorr_mono_pass_cont_armv7 + +/* This is an assembly optimized version of the following WavPack function: + * + * void decorr_stereo_pass_cont (struct decorr_pass *dpp, + * int32_t *buffer, + * int32_t sample_counti, + * int32_t long_math); + * + * It performs a single pass of stereo decorrelation on the provided buffer. + * Note that this version of the function requires that up to 8 previous stereo + * samples are visible and correct. In other words, it ignores the "samples_*" + * fields in the decorr_pass structure and gets the history data directly + * from the buffer. It does, however, return the appropriate history samples + * to the decorr_pass structure before returning. + * + * This should work on all ARM architectures. This version of the code + * checks the magnitude of the decorrelation sample with a pair of shifts + * to avoid possible overflow (and therefore ignores the "long_math" arg). + * Previously I used the SSAT instruction for this, but then discovered that + * SSAT is not universally available (although on the armv7 I'm testing on + * it is slightly faster than the shifts). + * + * A mono version follows below. + */ + +/* + * on entry: + * + * r0 = struct decorr_pass *dpp + * r1 = int32_t *buffer + * r2 = int32_t sample_count + * r3 = int32_t long_math + */ + +unpack_decorr_stereo_pass_cont_armv7: + + stmfd sp!, {r4 - r8, r10, r11, lr} + + mov r5, r0 @ r5 = dpp + mov r11, #512 @ r11 = 512 for rounding + ldr r6, [r0, #4] @ r6 = dpp->delta + ldr r4, [r0, #8] @ r4 = dpp->weight_A + ldr r0, [r0, #12] @ r0 = dpp->weight_B + cmp r2, #0 @ exit if no samples to process + beq common_exit + + add r7, r1, r2, asl #3 @ r7 = buffer ending position + ldr r2, [r5, #0] @ r2 = dpp->term + cmp r2, #0 + bmi minus_term + + ldr lr, [r1, #-16] @ load 2 sample history from buffer + ldr r10, [r1, #-12] @ for terms 2, 17, and 18 + ldr r8, [r1, #-8] + ldr r3, [r1, #-4] + cmp r2, #17 + beq term_17_loop + cmp r2, #18 + beq term_18_loop + cmp r2, #2 + beq term_2_loop + b term_default_loop @ else handle default (1-8, except 2) + +minus_term: + mov r10, #1024 @ r10 = -1024 for weight clipping + rsb r10, r10, #0 @ (only used for negative terms) + cmn r2, #1 + beq term_minus_1 + cmn r2, #2 + beq term_minus_2 + cmn r2, #3 + beq term_minus_3 + b common_exit + +/* + ****************************************************************************** + * Loop to handle term = 17 condition + * + * r0 = dpp->weight_B r8 = previous left sample + * r1 = bptr r9 = + * r2 = current sample r10 = second previous right sample + * r3 = previous right sample r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = current decorrelation value + * r5 = dpp sp = + * r6 = dpp->delta lr = second previous left sample + * r7 = eptr pc = + ******************************************************************************* + */ + +term_17_loop: + rsb ip, lr, r8, asl #1 @ decorr value = (2 * prev) - 2nd prev + mov lr, r8 @ previous becomes 2nd previous + ldr r2, [r1], #4 @ get sample & update pointer + mov r8, ip, lsl #11 @ check magnitude by shifting left then right + cmp ip, r8, asr #11 @ and comparing, branch to 64-bit math if different + bne S117 + cmp ip, #0 + mla r8, ip, r4, r11 @ mult decorr value by weight, round, + add r8, r2, r8, asr #10 @ shift, and add to new sample + b S118 + +S117: mov r8, #0 @ use 64-bit multiply to avoid overflow + smlal r11, r8, r4, ip + add r8, r2, r8, lsl #22 + add r8, r8, r11, lsr #10 + mov r11, #512 + +S118: strne r8, [r1, #-4] @ if change possible, store sample back + cmpne r2, #0 + beq S325 + teq ip, r2 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +S325: rsb ip, r10, r3, asl #1 @ do same thing for right channel + mov r10, r3 + ldr r2, [r1], #4 + mov r3, ip, lsl #11 @ check magnitude by shifting left then right + cmp ip, r3, asr #11 @ and comparing, branch to 64-bit math if different + bne S119 + cmp ip, #0 + mla r3, ip, r0, r11 + add r3, r2, r3, asr #10 + b S120 + +S119: mov r3, #0 + smlal r11, r3, r0, ip + add r3, r2, r3, lsl #22 + add r3, r3, r11, lsr #10 + mov r11, #512 + +S120: strne r3, [r1, #-4] + cmpne r2, #0 + beq S329 + teq ip, r2 + submi r0, r0, r6 + addpl r0, r0, r6 + +S329: cmp r7, r1 @ loop back if more samples to do + bhi term_17_loop + b store_1718 @ common exit for terms 17 & 18 + +/* + ****************************************************************************** + * Loop to handle term = 18 condition + * + * r0 = dpp->weight_B r8 = previous left sample + * r1 = bptr r9 = + * r2 = current sample r10 = second previous right sample + * r3 = previous right sample r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = decorrelation value + * r5 = dpp sp = + * r6 = dpp->delta lr = second previous left sample + * r7 = eptr pc = + ******************************************************************************* + */ + +term_18_loop: + sub ip, r8, lr @ decorr value = + mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1 + add ip, r8, ip, asr #1 + ldr r2, [r1], #4 @ get sample & update pointer + mov r8, ip, lsl #11 @ check magnitude by shifting left then right + cmp ip, r8, asr #11 @ and comparing, branch to 64-bit math if different + bne S121 + cmp ip, #0 + mla r8, ip, r4, r11 @ mult decorr value by weight, round, + add r8, r2, r8, asr #10 @ shift, and add to new sample + b S122 + +S121: mov r8, #0 @ use 64-bit multiply to avoid overflow + smlal r11, r8, r4, ip + add r8, r2, r8, lsl #22 + add r8, r8, r11, lsr #10 + mov r11, #512 + +S122: strne r8, [r1, #-4] @ if change possible, store sample back + cmpne r2, #0 + beq S337 + teq ip, r2 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +S337: sub ip, r3, r10 @ do same thing for right channel + mov r10, r3 + add ip, r3, ip, asr #1 + ldr r2, [r1], #4 + mov r3, ip, lsl #11 @ check magnitude by shifting left then right + cmp ip, r3, asr #11 @ and comparing, branch to 64-bit math if different + bne S123 + cmp ip, #0 + mla r3, ip, r0, r11 + add r3, r2, r3, asr #10 + b S124 + +S123: mov r3, #0 + smlal r11, r3, r0, ip + add r3, r2, r3, lsl #22 + add r3, r3, r11, lsr #10 + mov r11, #512 + +S124: strne r3, [r1, #-4] + cmpne r2, #0 + beq S341 + teq ip, r2 + submi r0, r0, r6 + addpl r0, r0, r6 + +S341: cmp r7, r1 @ loop back if more samples to do + bhi term_18_loop + +/* common exit for terms 17 & 18 */ + +store_1718: + str r3, [r5, #48] @ store sample history into struct + str r8, [r5, #16] + str r10, [r5, #52] + str lr, [r5, #20] + b common_exit @ and return + +/* + ****************************************************************************** + * Loop to handle term = 2 condition + * (note that this case can be handled by the default term handler (1-8), but + * this special case is faster because it doesn't have to read memory twice) + * + * r0 = dpp->weight_B r8 = previous left sample + * r1 = bptr r9 = + * r2 = current sample r10 = second previous right sample + * r3 = previous right sample r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = decorrelation value + * r5 = dpp sp = + * r6 = dpp->delta lr = second previous left sample + * r7 = eptr pc = + ******************************************************************************* + */ + +term_2_loop: + mov ip, lr @ get decorrelation value + mov lr, r8 @ previous becomes 2nd previous + ldr r2, [r1], #4 @ get sample & update pointer + mov r8, ip, lsl #11 @ check magnitude by shifting left then right + cmp ip, r8, asr #11 @ and comparing, branch to 64-bit math if different + bne S125 + cmp ip, #0 + mla r8, ip, r4, r11 @ mult decorr value by weight, round, + add r8, r2, r8, asr #10 @ shift, and add to new sample + b S126 + +S125: mov r8, #0 @ use 64-bit multiply to avoid overflow + smlal r11, r8, r4, ip + add r8, r2, r8, lsl #22 + add r8, r8, r11, lsr #10 + mov r11, #512 + +S126: strne r8, [r1, #-4] @ if change possible, store sample back + cmpne r2, #0 + beq S225 + teq ip, r2 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +S225: mov ip, r10 @ do same thing for right channel + mov r10, r3 + ldr r2, [r1], #4 + mov r3, ip, lsl #11 @ check magnitude by shifting left then right + cmp ip, r3, asr #11 @ and comparing, branch to 64-bit math if different + bne S127 + cmp ip, #0 + mla r3, ip, r0, r11 + add r3, r2, r3, asr #10 + b S128 + +S127: mov r3, #0 + smlal r11, r3, r0, ip + add r3, r2, r3, lsl #22 + add r3, r3, r11, lsr #10 + mov r11, #512 + +S128: strne r3, [r1, #-4] + cmpne r2, #0 + beq S229 + teq ip, r2 + submi r0, r0, r6 + addpl r0, r0, r6 + +S229: cmp r7, r1 @ loop back if more samples to do + bhi term_2_loop + b default_term_exit @ this exit updates all dpp->samples + +/* + ****************************************************************************** + * Loop to handle default term condition + * + * r0 = dpp->weight_B r8 = result accumulator + * r1 = bptr r9 = + * r2 = dpp->term r10 = + * r3 = decorrelation value r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = current sample + * r5 = dpp sp = + * r6 = dpp->delta lr = + * r7 = eptr pc = + ******************************************************************************* + */ + +term_default_loop: + ldr ip, [r1] @ get original sample + ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term + mov r8, r3, lsl #11 @ check magnitude by shifting left then right + cmp r3, r8, asr #11 @ and comparing, branch to 64-bit math if different + bne S135 + cmp r3, #0 + mla r8, r3, r4, r11 @ mult decorr value by weight, round, + add r8, ip, r8, asr #10 @ shift and add to new sample + b S136 + +S135: mov r8, #0 @ use 64-bit multiply to avoid overflow + smlal r11, r8, r4, r3 + add r8, ip, r8, lsl #22 + add r8, r8, r11, lsr #10 + mov r11, #512 + +S136: str r8, [r1], #4 @ store update sample + cmpne ip, #0 + beq S350 + teq ip, r3 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +S350: ldr ip, [r1] @ do the same thing for right channel + ldr r3, [r1, -r2, asl #3] + mov r8, r3, lsl #11 @ check magnitude by shifting left then right + cmp r3, r8, asr #11 @ and comparing, branch to 64-bit math if different + bne S137 + cmp r3, #0 + mla r8, r3, r0, r11 + add r8, ip, r8, asr #10 + b S138 + +S137: mov r8, #0 + smlal r11, r8, r0, r3 + add r8, ip, r8, lsl #22 + add r8, r8, r11, lsr #10 + mov r11, #512 + +S138: str r8, [r1], #4 + cmpne ip, #0 + beq S354 + teq ip, r3 + submi r0, r0, r6 + addpl r0, r0, r6 + +S354: cmp r7, r1 @ loop back if more samples to do + bhi term_default_loop + +/* + * This exit is used by terms 1-8 to store the previous "term" samples (up to 8) + * into the decorr pass structure history + */ + +default_term_exit: + ldr r2, [r5, #0] @ r2 = dpp->term + +S358: sub r2, r2, #1 + sub r1, r1, #8 + ldr r3, [r1, #4] @ get right sample and store in dpp->samples_B [r2] + add r6, r5, #48 + str r3, [r6, r2, asl #2] + ldr r3, [r1, #0] @ get left sample and store in dpp->samples_A [r2] + add r6, r5, #16 + str r3, [r6, r2, asl #2] + cmp r2, #0 + bne S358 + b common_exit + +/* + ****************************************************************************** + * Loop to handle term = -1 condition + * + * r0 = dpp->weight_B r8 = + * r1 = bptr r9 = + * r2 = intermediate result r10 = -1024 (for clipping) + * r3 = previous right sample r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = current sample + * r5 = dpp sp = + * r6 = dpp->delta lr = updated left sample + * r7 = eptr pc = + ******************************************************************************* + */ + +term_minus_1: + ldr r3, [r1, #-4] + +term_minus_1_loop: + ldr ip, [r1] @ for left channel the decorrelation value + @ is the previous right sample (in r3) + mov lr, r3, lsl #11 @ check magnitude by shifting left then right + cmp r3, lr, asr #11 @ and comparing, branch to 64-bit math if different + bne S142 + cmp r3, #0 + mla r2, r3, r4, r11 + add lr, ip, r2, asr #10 + b S143 + +S142: mov lr, #0 @ use 64-bit multiply to avoid overflow + smlal r11, lr, r4, r3 + add lr, ip, lr, lsl #22 + add lr, lr, r11, lsr #10 + mov r11, #512 + +S143: str lr, [r1], #8 + cmpne ip, #0 + beq S361 + teq ip, r3 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + cmp r4, #1024 + movgt r4, #1024 + cmp r4, r10 + movlt r4, r10 + +S361: ldr r2, [r1, #-4] @ for right channel the decorrelation value + @ is the just updated right sample (in lr) + mov r3, lr, lsl #11 @ check magnitude by shifting left then right + cmp lr, r3, asr #11 @ and comparing, branch to 64-bit math if different + bne S144 + cmp lr, #0 + mla r3, lr, r0, r11 + add r3, r2, r3, asr #10 + b S145 + +S144: mov r3, #0 + smlal r11, r3, r0, lr + add r3, r2, r3, lsl #22 + add r3, r3, r11, lsr #10 + mov r11, #512 + +S145: strne r3, [r1, #-4] + cmpne r2, #0 + beq S369 + teq r2, lr + submi r0, r0, r6 + addpl r0, r0, r6 + cmp r0, #1024 @ then clip weight to +/-1024 + movgt r0, #1024 + cmp r0, r10 + movlt r0, r10 + +S369: cmp r7, r1 @ loop back if more samples to do + bhi term_minus_1_loop + + str r3, [r5, #16] @ else store right sample and exit + b common_exit + +/* + ****************************************************************************** + * Loop to handle term = -2 condition + * (note that the channels are processed in the reverse order here) + * + * r0 = dpp->weight_B r8 = + * r1 = bptr r9 = + * r2 = intermediate result r10 = -1024 (for clipping) + * r3 = previous left sample r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = current sample + * r5 = dpp sp = + * r6 = dpp->delta lr = updated right sample + * r7 = eptr pc = + ******************************************************************************* + */ + +term_minus_2: + ldr r3, [r1, #-8] + +term_minus_2_loop: + ldr ip, [r1, #4] @ for right channel the decorrelation value + @ is the previous left sample (in r3) + mov lr, r3, lsl #11 @ check magnitude by shifting left then right + cmp r3, lr, asr #11 @ and comparing, branch to 64-bit math if different + bne S146 + cmp r3, #0 + mla r2, r3, r0, r11 + add lr, ip, r2, asr #10 + b S147 + +S146: mov lr, #0 @ use 64-bit multiply to avoid overflow + smlal r11, lr, r0, r3 + add lr, ip, lr, lsl #22 + add lr, lr, r11, lsr #10 + mov r11, #512 + +S147: strne lr, [r1, #4] + cmpne ip, #0 + beq S380 + teq ip, r3 @ update weight based on signs + submi r0, r0, r6 + addpl r0, r0, r6 + cmp r0, #1024 @ then clip weight to +/-1024 + movgt r0, #1024 + cmp r0, r10 + movlt r0, r10 + +S380: ldr r2, [r1, #0] @ for left channel the decorrelation value + @ is the just updated left sample (in lr) + mov r3, lr, lsl #11 @ check magnitude by shifting left then right + cmp lr, r3, asr #11 @ and comparing, branch to 64-bit math if different + bne S148 + cmp lr, #0 + mla r3, lr, r4, r11 + add r3, r2, r3, asr #10 + b S149 + +S148: mov r3, #0 + smlal r11, r3, r4, lr + add r3, r2, r3, lsl #22 + add r3, r3, r11, lsr #10 + mov r11, #512 + +S149: str r3, [r1], #8 + cmpne r2, #0 + beq S388 + teq r2, lr + submi r4, r4, r6 + addpl r4, r4, r6 + cmp r4, #1024 + movgt r4, #1024 + cmp r4, r10 + movlt r4, r10 + +S388: cmp r7, r1 @ loop back if more samples to do + bhi term_minus_2_loop + + str r3, [r5, #48] @ else store left channel and exit + b common_exit + +/* + ****************************************************************************** + * Loop to handle term = -3 condition + * + * r0 = dpp->weight_B r8 = previous left sample + * r1 = bptr r9 = + * r2 = current left sample r10 = -1024 (for clipping) + * r3 = previous right sample r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = intermediate result + * r5 = dpp sp = + * r6 = dpp->delta lr = + * r7 = eptr pc = + ******************************************************************************* + */ + +term_minus_3: + ldr r3, [r1, #-4] @ load previous samples + ldr r8, [r1, #-8] + +term_minus_3_loop: + ldr ip, [r1] + mov r2, r3, lsl #11 @ check magnitude by shifting left then right + cmp r3, r2, asr #11 @ and comparing, branch to 64-bit math if different + bne S160 + cmp r3, #0 + mla r2, r3, r4, r11 + add r2, ip, r2, asr #10 + b S161 + +S160: mov r2, #0 @ use 64-bit multiply to avoid overflow + smlal r11, r2, r4, r3 + add r2, ip, r2, lsl #22 + add r2, r2, r11, lsr #10 + mov r11, #512 + +S161: str r2, [r1], #4 + cmpne ip, #0 + beq S399 + teq ip, r3 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + cmp r4, #1024 @ then clip weight to +/-1024 + movgt r4, #1024 + cmp r4, r10 + movlt r4, r10 + +S399: mov ip, r8 @ ip = previous left we use now + mov r8, r2 @ r8 = current left we use next time + ldr r2, [r1], #4 + mov r3, ip, lsl #11 @ check magnitude by shifting left then right + cmp ip, r3, asr #11 @ and comparing, branch to 64-bit math if different + bne S162 + cmp ip, #0 + mla r3, ip, r0, r11 + add r3, r2, r3, asr #10 + b S163 + +S162: mov r3, #0 + smlal r11, r3, r0, ip + add r3, r2, r3, lsl #22 + add r3, r3, r11, lsr #10 + mov r11, #512 + +S163: strne r3, [r1, #-4] + cmpne r2, #0 + beq S407 + teq ip, r2 + submi r0, r0, r6 + addpl r0, r0, r6 + cmp r0, #1024 + movgt r0, #1024 + cmp r0, r10 + movlt r0, r10 + +S407: cmp r7, r1 @ loop back if more samples to do + bhi term_minus_3_loop + + str r3, [r5, #16] @ else store previous samples & exit + str r8, [r5, #48] + +/* + * Before finally exiting we must store weights back for next time + */ + +common_exit: + str r4, [r5, #8] + str r0, [r5, #12] + ldmfd sp!, {r4 - r8, r10, r11, pc} + + + +/* This is a mono version of the function above. It does not handle negative terms. + * + * void decorr_mono_pass_cont (struct decorr_pass *dpp, + * int32_t *buffer, + * int32_t sample_counti, + * int32_t long_math); + * on entry: + * + * r0 = struct decorr_pass *dpp + * r1 = int32_t *buffer + * r2 = int32_t sample_count + * r3 = int32_t long_math + */ + +unpack_decorr_mono_pass_cont_armv7: + + stmfd sp!, {r4 - r8, r11, lr} + + mov r5, r0 @ r5 = dpp + mov r11, #512 @ r11 = 512 for rounding + ldr r6, [r0, #4] @ r6 = dpp->delta + ldr r4, [r0, #8] @ r4 = dpp->weight_A + cmp r2, #0 @ exit if no samples to process + beq mono_common_exit + + add r7, r1, r2, asl #2 @ r7 = buffer ending position + ldr r2, [r5, #0] @ r2 = dpp->term + + ldr lr, [r1, #-8] @ load 2 sample history from buffer + ldr r8, [r1, #-4] + cmp r2, #17 + beq mono_term_17_loop + cmp r2, #18 + beq mono_term_18_loop + cmp r2, #2 + beq mono_term_2_loop + b mono_term_default_loop @ else handle default (1-8, except 2) + +/* + ****************************************************************************** + * Loop to handle term = 17 condition + * + * r0 = r8 = previous sample + * r1 = bptr r9 = + * r2 = current sample r10 = + * r3 = r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = current decorrelation value + * r5 = dpp sp = + * r6 = dpp->delta lr = second previous sample + * r7 = eptr pc = + ******************************************************************************* + */ + +mono_term_17_loop: + rsb ip, lr, r8, asl #1 @ decorr value = (2 * prev) - 2nd prev + mov lr, r8 @ previous becomes 2nd previous + ldr r2, [r1], #4 @ get sample & update pointer + mov r8, ip, lsl #11 @ check magnitude by shifting left then right + cmp ip, r8, asr #11 @ and comparing, branch to 64-bit math if different + bne S717 + cmp ip, #0 + mla r8, ip, r4, r11 @ mult decorr value by weight, round, + add r8, r2, r8, asr #10 @ shift, and add to new sample + b S718 + +S717: mov r8, #0 + smlal r11, r8, r4, ip + add r8, r2, r8, lsl #22 + add r8, r8, r11, lsr #10 + mov r11, #512 + +S718: strne r8, [r1, #-4] @ if change possible, store sample back + cmpne r2, #0 + beq S129 + teq ip, r2 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +S129: cmp r7, r1 @ loop back if more samples to do + bhi mono_term_17_loop + b mono_store_1718 @ common exit for terms 17 & 18 + +/* + ****************************************************************************** + * Loop to handle term = 18 condition + * + * r0 = r8 = previous sample + * r1 = bptr r9 = + * r2 = current sample r10 = + * r3 = r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = decorrelation value + * r5 = dpp sp = + * r6 = dpp->delta lr = second previous sample + * r7 = eptr pc = + ******************************************************************************* + */ + +mono_term_18_loop: + sub ip, r8, lr @ decorr value = + mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1 + add ip, r8, ip, asr #1 + ldr r2, [r1], #4 @ get sample & update pointer + mov r8, ip, lsl #11 @ check magnitude by shifting left then right + cmp ip, r8, asr #11 @ and comparing, branch to 64-bit math if different + bne S817 + cmp ip, #0 + mla r8, ip, r4, r11 @ mult decorr value by weight, round, + add r8, r2, r8, asr #10 @ shift, and add to new sample + b S818 + +S817: mov r8, #0 + smlal r11, r8, r4, ip + add r8, r2, r8, lsl #22 + add r8, r8, r11, lsr #10 + mov r11, #512 + +S818: strne r8, [r1, #-4] @ if change possible, store sample back + cmpne r2, #0 + beq S141 + teq ip, r2 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +S141: cmp r7, r1 @ loop back if more samples to do + bhi mono_term_18_loop + +/* common exit for terms 17 & 18 */ + +mono_store_1718: + str r8, [r5, #16] @ store sample history into struct + str lr, [r5, #20] + b mono_common_exit @ and return + +/* + ****************************************************************************** + * Loop to handle term = 2 condition + * (note that this case can be handled by the default term handler (1-8), but + * this special case is faster because it doesn't have to read memory twice) + * + * r0 = r8 = previous sample + * r1 = bptr r9 = + * r2 = current sample r10 = + * r3 = r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = decorrelation value + * r5 = dpp sp = + * r6 = dpp->delta lr = second previous sample + * r7 = eptr pc = + ******************************************************************************* + */ + +mono_term_2_loop: + mov ip, lr @ get decorrelation value + mov lr, r8 @ previous becomes 2nd previous + ldr r2, [r1], #4 @ get sample & update pointer + mov r8, ip, lsl #11 @ check magnitude by shifting left then right + cmp ip, r8, asr #11 @ and comparing, branch to 64-bit math if different + bne S917 + cmp ip, #0 + mla r8, ip, r4, r11 @ mult decorr value by weight, round, + add r8, r2, r8, asr #10 @ shift, and add to new sample + b S918 + +S917: mov r8, #0 + smlal r11, r8, r4, ip + add r8, r2, r8, lsl #22 + add r8, r8, r11, lsr #10 + mov r11, #512 + +S918: strne r8, [r1, #-4] @ if change possible, store sample back + cmpne r2, #0 + beq S029 + teq ip, r2 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +S029: cmp r7, r1 @ loop back if more samples to do + bhi mono_term_2_loop + b mono_default_term_exit @ this exit updates all dpp->samples + +/* + ****************************************************************************** + * Loop to handle default term condition + * + * r0 = r8 = result accumulator + * r1 = bptr r9 = + * r2 = dpp->term r10 = + * r3 = decorrelation value r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = current sample + * r5 = dpp sp = + * r6 = dpp->delta lr = + * r7 = eptr pc = + ******************************************************************************* + */ + +mono_term_default_loop: + ldr ip, [r1] @ get original sample + ldr r3, [r1, -r2, asl #2] @ get decorrelation value based on term + mov r8, r3, lsl #11 @ check magnitude by shifting left then right + cmp r3, r8, asr #11 @ and comparing, branch to 64-bit math if different + bne S617 + mla r8, r3, r4, r11 @ mult decorr value by weight, round, + add r8, ip, r8, asr #10 @ shift and add to new sample + b S618 + +S617: mov r8, #0 + smlal r11, r8, r4, r3 + add r8, ip, r8, lsl #22 + add r8, r8, r11, lsr #10 + mov r11, #512 + +S618: str r8, [r1], #4 @ store update sample + cmp r3, #0 + cmpne ip, #0 + beq S154 + teq ip, r3 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +S154: cmp r7, r1 @ loop back if more samples to do + bhi mono_term_default_loop + +/* + * This exit is used by terms 1-8 to store the previous "term" samples (up to 8) + * into the decorr pass structure history + */ + +mono_default_term_exit: + ldr r2, [r5, #0] @ r2 = dpp->term + +S158: sub r2, r2, #1 + sub r1, r1, #4 + ldr r3, [r1, #0] @ get sample and store in dpp->samples_A [r2] + add r6, r5, #16 + str r3, [r6, r2, asl #2] + cmp r2, #0 + bne S158 + b mono_common_exit + +/* + * Before finally exiting we must store weight back for next time + */ + +mono_common_exit: + str r4, [r5, #8] + ldmfd sp!, {r4 - r8, r11, pc} + +#ifdef __ELF__ + .section .note.GNU-stack,"",%progbits +#endif + diff --git a/third_party/wavpack/src/unpack_dsd.c b/third_party/wavpack/src/unpack_dsd.c new file mode 100644 index 0000000..11aa04f --- /dev/null +++ b/third_party/wavpack/src/unpack_dsd.c @@ -0,0 +1,616 @@ +//////////////////////////////////////////////////////////////////////////// +// **** DSDPACK **** // +// Lossless DSD (Direct Stream Digital) Audio Compressor // +// Copyright (c) 2013 - 2016 David Bryant. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// unpack_dsd.c + +// This module actually handles the uncompression of the DSD audio data. + +#ifdef ENABLE_DSD + +#include +#include +#include + +#include "wavpack_local.h" + +///////////////////////////// executable code //////////////////////////////// + +// This function initialzes the main range-encoded data for DSD audio samples + +static int init_dsd_block_fast (WavpackStream *wps, WavpackMetadata *wpmd); +static int init_dsd_block_high (WavpackStream *wps, WavpackMetadata *wpmd); +static int decode_fast (WavpackStream *wps, int32_t *output, int sample_count); +static int decode_high (WavpackStream *wps, int32_t *output, int sample_count); + +int init_dsd_block (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + WavpackStream *wps = wpc->streams [wpc->current_stream]; + + if (wpmd->byte_length < 2) + return FALSE; + + wps->dsd.byteptr = (unsigned char *)wpmd->data; + wps->dsd.endptr = wps->dsd.byteptr + wpmd->byte_length; + wpc->dsd_multiplier = 1 << *wps->dsd.byteptr++; + wps->dsd.mode = *wps->dsd.byteptr++; + + if (!wps->dsd.mode) { + if (wps->dsd.endptr - wps->dsd.byteptr != wps->wphdr.block_samples * (wps->wphdr.flags & MONO_DATA ? 1 : 2)) { + return FALSE; + } + + wps->dsd.ready = 1; + return TRUE; + } + + if (wps->dsd.mode == 1) + return init_dsd_block_fast (wps, wpmd); + else if (wps->dsd.mode == 3) + return init_dsd_block_high (wps, wpmd); + else + return FALSE; +} + +int32_t unpack_dsd_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count) +{ + WavpackStream *wps = wpc->streams [wpc->current_stream]; + uint32_t flags = wps->wphdr.flags; + + // don't attempt to decode past the end of the block, but watch out for overflow! + + if (wps->sample_index + sample_count > GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples && + GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples - wps->sample_index < sample_count) + sample_count = (uint32_t) (GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples - wps->sample_index); + + if (GET_BLOCK_INDEX (wps->wphdr) > wps->sample_index || wps->wphdr.block_samples < sample_count) + wps->mute_error = TRUE; + + if (!wps->mute_error) { + if (!wps->dsd.mode) { + int total_samples = sample_count * ((flags & MONO_DATA) ? 1 : 2); + int32_t *bptr = buffer; + + if (wps->dsd.endptr - wps->dsd.byteptr < total_samples) + total_samples = (int)(wps->dsd.endptr - wps->dsd.byteptr); + + while (total_samples--) + wps->crc += (wps->crc << 1) + (*bptr++ = *wps->dsd.byteptr++); + } + else if (wps->dsd.mode == 1) { + if (!decode_fast (wps, buffer, sample_count)) + wps->mute_error = TRUE; + } + else if (!decode_high (wps, buffer, sample_count)) + wps->mute_error = TRUE; + } + + if (wps->mute_error) { + int samples_to_null; + if (wpc->reduced_channels == 1 || wpc->config.num_channels == 1 || (flags & MONO_FLAG)) + samples_to_null = sample_count; + else + samples_to_null = sample_count * 2; + + while (samples_to_null--) + *buffer++ = 0x55; + + wps->sample_index += sample_count; + return sample_count; + } + + if (flags & FALSE_STEREO) { + int32_t *dptr = buffer + sample_count * 2; + int32_t *sptr = buffer + sample_count; + int32_t c = sample_count; + + while (c--) { + *--dptr = *--sptr; + *--dptr = *sptr; + } + } + + wps->sample_index += sample_count; + + return sample_count; +} + +/*------------------------------------------------------------------------------------------------------------------------*/ + +// #define DSD_BYTE_READY(low,high) (((low) >> 24) == ((high) >> 24)) +// #define DSD_BYTE_READY(low,high) (!(((low) ^ (high)) >> 24)) +#define DSD_BYTE_READY(low,high) (!(((low) ^ (high)) & 0xff000000)) +#define MAX_HISTORY_BITS 5 + +static int init_dsd_block_fast (WavpackStream *wps, WavpackMetadata *wpmd) +{ + unsigned char history_bits, max_probability; + int total_summed_probabilities = 0, i; + + if (wps->dsd.byteptr == wps->dsd.endptr) + return FALSE; + + history_bits = *wps->dsd.byteptr++; + + if (wps->dsd.byteptr == wps->dsd.endptr || history_bits > MAX_HISTORY_BITS) + return FALSE; + + wps->dsd.history_bins = 1 << history_bits; + + wps->dsd.value_lookup = (unsigned char **)malloc (sizeof (*wps->dsd.value_lookup) * wps->dsd.history_bins); + memset (wps->dsd.value_lookup, 0, sizeof (*wps->dsd.value_lookup) * wps->dsd.history_bins); + wps->dsd.summed_probabilities = (int16_t (*)[256])malloc (sizeof (*wps->dsd.summed_probabilities) * wps->dsd.history_bins); + wps->dsd.probabilities = (unsigned char (*)[256])malloc (sizeof (*wps->dsd.probabilities) * wps->dsd.history_bins); + + max_probability = *wps->dsd.byteptr++; + + if (max_probability < 0xff) { + unsigned char *outptr = (unsigned char *) wps->dsd.probabilities; + unsigned char *outend = outptr + sizeof (*wps->dsd.probabilities) * wps->dsd.history_bins; + + while (outptr < outend && wps->dsd.byteptr < wps->dsd.endptr) { + int code = *wps->dsd.byteptr++; + + if (code > max_probability) { + int zcount = code - max_probability; + + while (outptr < outend && zcount--) + *outptr++ = 0; + } + else if (code) + *outptr++ = code; + else + break; + } + + if (outptr < outend || (wps->dsd.byteptr < wps->dsd.endptr && *wps->dsd.byteptr++)) + return FALSE; + } + else if (wps->dsd.endptr - wps->dsd.byteptr > (int) sizeof (*wps->dsd.probabilities) * wps->dsd.history_bins) { + memcpy (wps->dsd.probabilities, wps->dsd.byteptr, sizeof (*wps->dsd.probabilities) * wps->dsd.history_bins); + wps->dsd.byteptr += sizeof (*wps->dsd.probabilities) * wps->dsd.history_bins; + } + else + return FALSE; + + for (wps->dsd.p0 = 0; wps->dsd.p0 < wps->dsd.history_bins; ++wps->dsd.p0) { + int32_t sum_values; + unsigned char *vp; + + for (sum_values = i = 0; i < 256; ++i) + wps->dsd.summed_probabilities [wps->dsd.p0] [i] = sum_values += wps->dsd.probabilities [wps->dsd.p0] [i]; + + if (sum_values) { + total_summed_probabilities += sum_values; + vp = wps->dsd.value_lookup [wps->dsd.p0] = (unsigned char *)malloc (sum_values); + + for (i = 0; i < 256; i++) { + int c = wps->dsd.probabilities [wps->dsd.p0] [i]; + + while (c--) + *vp++ = i; + } + } + } + + if (wps->dsd.endptr - wps->dsd.byteptr < 4 || total_summed_probabilities > wps->dsd.history_bins * 1280) + return FALSE; + + for (i = 4; i--;) + wps->dsd.value = (wps->dsd.value << 8) | *wps->dsd.byteptr++; + + wps->dsd.p0 = wps->dsd.p1 = 0; + wps->dsd.low = 0; wps->dsd.high = 0xffffffff; + wps->dsd.ready = 1; + + return TRUE; +} + +static int decode_fast (WavpackStream *wps, int32_t *output, int sample_count) +{ + int total_samples = sample_count; + + if (!(wps->wphdr.flags & MONO_DATA)) + total_samples *= 2; + + while (total_samples--) { + int mult, index, code, i; + + if (!wps->dsd.summed_probabilities [wps->dsd.p0] [255]) + return 0; + + mult = (wps->dsd.high - wps->dsd.low) / wps->dsd.summed_probabilities [wps->dsd.p0] [255]; + + if (!mult) { + if (wps->dsd.endptr - wps->dsd.byteptr >= 4) + for (i = 4; i--;) + wps->dsd.value = (wps->dsd.value << 8) | *wps->dsd.byteptr++; + + wps->dsd.low = 0; + wps->dsd.high = 0xffffffff; + mult = wps->dsd.high / wps->dsd.summed_probabilities [wps->dsd.p0] [255]; + + if (!mult) + return 0; + } + + index = (wps->dsd.value - wps->dsd.low) / mult; + + if (index >= wps->dsd.summed_probabilities [wps->dsd.p0] [255]) + return 0; + + if ((*output++ = code = wps->dsd.value_lookup [wps->dsd.p0] [index])) + wps->dsd.low += wps->dsd.summed_probabilities [wps->dsd.p0] [code-1] * mult; + + wps->dsd.high = wps->dsd.low + wps->dsd.probabilities [wps->dsd.p0] [code] * mult - 1; + wps->crc += (wps->crc << 1) + code; + + if (wps->wphdr.flags & MONO_DATA) + wps->dsd.p0 = code & (wps->dsd.history_bins-1); + else { + wps->dsd.p0 = wps->dsd.p1; + wps->dsd.p1 = code & (wps->dsd.history_bins-1); + } + + while (DSD_BYTE_READY (wps->dsd.high, wps->dsd.low) && wps->dsd.byteptr < wps->dsd.endptr) { + wps->dsd.value = (wps->dsd.value << 8) | *wps->dsd.byteptr++; + wps->dsd.high = (wps->dsd.high << 8) | 0xff; + wps->dsd.low <<= 8; + } + } + + return sample_count; +} + +/*------------------------------------------------------------------------------------------------------------------------*/ + +#define PTABLE_BITS 8 +#define PTABLE_BINS (1<> 8; c--;) + value += (DOWN - value) >> DECAY; + + for (i = 0; i < PTABLE_BINS/2; ++i) { + table [i] = value; + table [PTABLE_BINS-1-i] = 0x100ffff - value; + + if (value > 0x010000) { + rate += (rate * rate_s + 128) >> 8; + + for (c = (rate + 64) >> 7; c--;) + value += (DOWN - value) >> DECAY; + } + } +} + +static int init_dsd_block_high (WavpackStream *wps, WavpackMetadata *wpmd) +{ + uint32_t flags = wps->wphdr.flags; + int channel, rate_i, rate_s, i; + + if (wps->dsd.endptr - wps->dsd.byteptr < ((flags & MONO_DATA) ? 13 : 20)) + return FALSE; + + rate_i = *wps->dsd.byteptr++; + rate_s = *wps->dsd.byteptr++; + + if (rate_s != RATE_S) + return FALSE; + + wps->dsd.ptable = (int32_t *)malloc (PTABLE_BINS * sizeof (*wps->dsd.ptable)); + init_ptable (wps->dsd.ptable, rate_i, rate_s); + + for (channel = 0; channel < ((flags & MONO_DATA) ? 1 : 2); ++channel) { + DSDfilters *sp = wps->dsd.filters + channel; + + sp->filter1 = *wps->dsd.byteptr++ << (PRECISION - 8); + sp->filter2 = *wps->dsd.byteptr++ << (PRECISION - 8); + sp->filter3 = *wps->dsd.byteptr++ << (PRECISION - 8); + sp->filter4 = *wps->dsd.byteptr++ << (PRECISION - 8); + sp->filter5 = *wps->dsd.byteptr++ << (PRECISION - 8); + sp->filter6 = 0; + sp->factor = *wps->dsd.byteptr++ & 0xff; + sp->factor |= (*wps->dsd.byteptr++ << 8) & 0xff00; + sp->factor = (sp->factor << 16) >> 16; + } + + wps->dsd.high = 0xffffffff; + wps->dsd.low = 0x0; + + for (i = 4; i--;) + wps->dsd.value = (wps->dsd.value << 8) | *wps->dsd.byteptr++; + + wps->dsd.ready = 1; + + return TRUE; +} + +static int decode_high (WavpackStream *wps, int32_t *output, int sample_count) +{ + int total_samples = sample_count, stereo = (wps->wphdr.flags & MONO_DATA) ? 0 : 1; + DSDfilters *sp = wps->dsd.filters; + + while (total_samples--) { + int bitcount = 8; + + sp [0].value = sp [0].filter1 - sp [0].filter5 + ((sp [0].filter6 * sp [0].factor) >> 2); + + if (stereo) + sp [1].value = sp [1].filter1 - sp [1].filter5 + ((sp [1].filter6 * sp [1].factor) >> 2); + + while (bitcount--) { + int32_t *pp = wps->dsd.ptable + ((sp [0].value >> (PRECISION - PRECISION_USE)) & PTABLE_MASK); + uint32_t split = wps->dsd.low + ((wps->dsd.high - wps->dsd.low) >> 8) * (*pp >> 16); + + if (wps->dsd.value <= split) { + wps->dsd.high = split; + *pp += (UP - *pp) >> DECAY; + sp [0].filter0 = -1; + } + else { + wps->dsd.low = split + 1; + *pp += (DOWN - *pp) >> DECAY; + sp [0].filter0 = 0; + } + + while (DSD_BYTE_READY (wps->dsd.high, wps->dsd.low) && wps->dsd.byteptr < wps->dsd.endptr) { + wps->dsd.value = (wps->dsd.value << 8) | *wps->dsd.byteptr++; + wps->dsd.high = (wps->dsd.high << 8) | 0xff; + wps->dsd.low <<= 8; + } + + sp [0].value += sp [0].filter6 << 3; + sp [0].byte = (sp [0].byte << 1) | (sp [0].filter0 & 1); + sp [0].factor += (((sp [0].value ^ sp [0].filter0) >> 31) | 1) & ((sp [0].value ^ (sp [0].value - (sp [0].filter6 << 4))) >> 31); + sp [0].filter1 += ((sp [0].filter0 & VALUE_ONE) - sp [0].filter1) >> 6; + sp [0].filter2 += ((sp [0].filter0 & VALUE_ONE) - sp [0].filter2) >> 4; + sp [0].filter3 += (sp [0].filter2 - sp [0].filter3) >> 4; + sp [0].filter4 += (sp [0].filter3 - sp [0].filter4) >> 4; + sp [0].value = (sp [0].filter4 - sp [0].filter5) >> 4; + sp [0].filter5 += sp [0].value; + sp [0].filter6 += (sp [0].value - sp [0].filter6) >> 3; + sp [0].value = sp [0].filter1 - sp [0].filter5 + ((sp [0].filter6 * sp [0].factor) >> 2); + + if (!stereo) + continue; + + pp = wps->dsd.ptable + ((sp [1].value >> (PRECISION - PRECISION_USE)) & PTABLE_MASK); + split = wps->dsd.low + ((wps->dsd.high - wps->dsd.low) >> 8) * (*pp >> 16); + + if (wps->dsd.value <= split) { + wps->dsd.high = split; + *pp += (UP - *pp) >> DECAY; + sp [1].filter0 = -1; + } + else { + wps->dsd.low = split + 1; + *pp += (DOWN - *pp) >> DECAY; + sp [1].filter0 = 0; + } + + while (DSD_BYTE_READY (wps->dsd.high, wps->dsd.low) && wps->dsd.byteptr < wps->dsd.endptr) { + wps->dsd.value = (wps->dsd.value << 8) | *wps->dsd.byteptr++; + wps->dsd.high = (wps->dsd.high << 8) | 0xff; + wps->dsd.low <<= 8; + } + + sp [1].value += sp [1].filter6 << 3; + sp [1].byte = (sp [1].byte << 1) | (sp [1].filter0 & 1); + sp [1].factor += (((sp [1].value ^ sp [1].filter0) >> 31) | 1) & ((sp [1].value ^ (sp [1].value - (sp [1].filter6 << 4))) >> 31); + sp [1].filter1 += ((sp [1].filter0 & VALUE_ONE) - sp [1].filter1) >> 6; + sp [1].filter2 += ((sp [1].filter0 & VALUE_ONE) - sp [1].filter2) >> 4; + sp [1].filter3 += (sp [1].filter2 - sp [1].filter3) >> 4; + sp [1].filter4 += (sp [1].filter3 - sp [1].filter4) >> 4; + sp [1].value = (sp [1].filter4 - sp [1].filter5) >> 4; + sp [1].filter5 += sp [1].value; + sp [1].filter6 += (sp [1].value - sp [1].filter6) >> 3; + sp [1].value = sp [1].filter1 - sp [1].filter5 + ((sp [1].filter6 * sp [1].factor) >> 2); + } + + wps->crc += (wps->crc << 1) + (*output++ = sp [0].byte & 0xff); + sp [0].factor -= (sp [0].factor + 512) >> 10; + + if (stereo) { + wps->crc += (wps->crc << 1) + (*output++ = wps->dsd.filters [1].byte & 0xff); + wps->dsd.filters [1].factor -= (wps->dsd.filters [1].factor + 512) >> 10; + } + } + + return sample_count; +} + +/*------------------------------------------------------------------------------------------------------------------------*/ + +#if 0 + +// 80 term DSD decimation filter +// < 1 dB down at 20 kHz +// > 108 dB stopband attenuation (fs/16) + +static const int32_t decm_filter [] = { + 4, 17, 56, 147, 336, 693, 1320, 2359, + 4003, 6502, 10170, 15392, 22623, 32389, 45275, 61920, + 82994, 109174, 141119, 179431, 224621, 277068, 336983, 404373, + 479004, 560384, 647741, 740025, 835917, 933849, 1032042, 1128551, + 1221329, 1308290, 1387386, 1456680, 1514425, 1559128, 1589610, 1605059, + 1605059, 1589610, 1559128, 1514425, 1456680, 1387386, 1308290, 1221329, + 1128551, 1032042, 933849, 835917, 740025, 647741, 560384, 479004, + 404373, 336983, 277068, 224621, 179431, 141119, 109174, 82994, + 61920, 45275, 32389, 22623, 15392, 10170, 6502, 4003, + 2359, 1320, 693, 336, 147, 56, 17, 4, +}; + +#define NUM_FILTER_TERMS 80 + +#else + +// 56 term decimation filter +// < 0.5 dB down at 20 kHz +// > 100 dB stopband attenuation (fs/12) + +static const int32_t decm_filter [] = { + 4, 17, 56, 147, 336, 692, 1315, 2337, + 3926, 6281, 9631, 14216, 20275, 28021, 37619, 49155, + 62616, 77870, 94649, 112551, 131049, 149507, 167220, 183448, + 197472, 208636, 216402, 220385, 220385, 216402, 208636, 197472, + 183448, 167220, 149507, 131049, 112551, 94649, 77870, 62616, + 49155, 37619, 28021, 20275, 14216, 9631, 6281, 3926, + 2337, 1315, 692, 336, 147, 56, 17, 4, +}; + +#define NUM_FILTER_TERMS 56 + +#endif + +#define HISTORY_BYTES ((NUM_FILTER_TERMS+7)/8) + +typedef struct { + unsigned char delay [HISTORY_BYTES]; +} DecimationChannel; + +typedef struct { + int32_t conv_tables [HISTORY_BYTES] [256]; + DecimationChannel *chans; + int num_channels; +} DecimationContext; + +void *decimate_dsd_init (int num_channels) +{ + DecimationContext *context = (DecimationContext *)malloc (sizeof (DecimationContext)); + double filter_sum = 0, filter_scale; + int skipped_terms, i, j; + + if (!context) + return context; + + memset (context, 0, sizeof (*context)); + context->num_channels = num_channels; + context->chans = (DecimationChannel *)malloc (num_channels * sizeof (DecimationChannel)); + + if (!context->chans) { + free (context); + return NULL; + } + + for (i = 0; i < NUM_FILTER_TERMS; ++i) + filter_sum += decm_filter [i]; + + filter_scale = ((1 << 23) - 1) / filter_sum * 16.0; + // fprintf (stderr, "convolution, %d terms, %f sum, %f scale\n", NUM_FILTER_TERMS, filter_sum, filter_scale); + + for (skipped_terms = i = 0; i < NUM_FILTER_TERMS; ++i) { + int scaled_term = (int) floor (decm_filter [i] * filter_scale + 0.5); + + if (scaled_term) { + for (j = 0; j < 256; ++j) + if (j & (0x80 >> (i & 0x7))) + context->conv_tables [i >> 3] [j] += scaled_term; + else + context->conv_tables [i >> 3] [j] -= scaled_term; + } + else + skipped_terms++; + } + + // fprintf (stderr, "%d terms skipped\n", skipped_terms); + + decimate_dsd_reset (context); + + return context; +} + +void decimate_dsd_reset (void *decimate_context) +{ + DecimationContext *context = (DecimationContext *) decimate_context; + int chan = 0, i; + + if (!context) + return; + + for (chan = 0; chan < context->num_channels; ++chan) + for (i = 0; i < HISTORY_BYTES; ++i) + context->chans [chan].delay [i] = 0x55; +} + +void decimate_dsd_run (void *decimate_context, int32_t *samples, int num_samples) +{ + DecimationContext *context = (DecimationContext *) decimate_context; + int chan = 0; + + if (!context) + return; + + while (num_samples) { + DecimationChannel *sp = context->chans + chan; + int sum = 0; + +#if (HISTORY_BYTES == 10) + sum += context->conv_tables [0] [sp->delay [0] = sp->delay [1]]; + sum += context->conv_tables [1] [sp->delay [1] = sp->delay [2]]; + sum += context->conv_tables [2] [sp->delay [2] = sp->delay [3]]; + sum += context->conv_tables [3] [sp->delay [3] = sp->delay [4]]; + sum += context->conv_tables [4] [sp->delay [4] = sp->delay [5]]; + sum += context->conv_tables [5] [sp->delay [5] = sp->delay [6]]; + sum += context->conv_tables [6] [sp->delay [6] = sp->delay [7]]; + sum += context->conv_tables [7] [sp->delay [7] = sp->delay [8]]; + sum += context->conv_tables [8] [sp->delay [8] = sp->delay [9]]; + sum += context->conv_tables [9] [sp->delay [9] = *samples]; +#elif (HISTORY_BYTES == 7) + sum += context->conv_tables [0] [sp->delay [0] = sp->delay [1]]; + sum += context->conv_tables [1] [sp->delay [1] = sp->delay [2]]; + sum += context->conv_tables [2] [sp->delay [2] = sp->delay [3]]; + sum += context->conv_tables [3] [sp->delay [3] = sp->delay [4]]; + sum += context->conv_tables [4] [sp->delay [4] = sp->delay [5]]; + sum += context->conv_tables [5] [sp->delay [5] = sp->delay [6]]; + sum += context->conv_tables [6] [sp->delay [6] = *samples]; +#else + int i; + + for (i = 0; i < HISTORY_BYTES-1; ++i) + sum += context->conv_tables [i] [sp->delay [i] = sp->delay [i+1]]; + + sum += context->conv_tables [i] [sp->delay [i] = *samples]; +#endif + + *samples++ = sum >> 4; + + if (++chan == context->num_channels) { + num_samples--; + chan = 0; + } + } +} + +void decimate_dsd_destroy (void *decimate_context) +{ + DecimationContext *context = (DecimationContext *) decimate_context; + + if (!context) + return; + + if (context->chans) + free (context->chans); + + free (context); +} + +#endif // ENABLE_DSD diff --git a/third_party/wavpack/src/unpack_floats.c b/third_party/wavpack/src/unpack_floats.c new file mode 100644 index 0000000..cc045dd --- /dev/null +++ b/third_party/wavpack/src/unpack_floats.c @@ -0,0 +1,134 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// unpack_floats.c + +// This module deals with the restoration of floating-point data. Note that no +// floating point math is involved here...the values are only processed with +// the macros that directly access the mantissa, exponent, and sign fields. +// That's why we use the f32 type instead of the built-in float type. + +#include + +#include "wavpack_local.h" + +static void float_values_nowvx (WavpackStream *wps, int32_t *values, int32_t num_values); + +void float_values (WavpackStream *wps, int32_t *values, int32_t num_values) +{ + uint32_t crc = wps->crc_x; + + if (!bs_is_open (&wps->wvxbits)) { + float_values_nowvx (wps, values, num_values); + return; + } + + while (num_values--) { + int shift_count = 0, exp = wps->float_max_exp; + f32 outval = 0; + uint32_t temp; + + if (*values == 0) { + if (wps->float_flags & FLOAT_ZEROS_SENT) { + if (getbit (&wps->wvxbits)) { + getbits (&temp, 23, &wps->wvxbits); + set_mantissa (outval, temp); + + if (exp >= 25) { + getbits (&temp, 8, &wps->wvxbits); + set_exponent (outval, temp); + } + + set_sign (outval, getbit (&wps->wvxbits)); + } + else if (wps->float_flags & FLOAT_NEG_ZEROS) + set_sign (outval, getbit (&wps->wvxbits)); + } + } + else { + *values <<= wps->float_shift; + + if (*values < 0) { + *values = -*values; + set_sign (outval, 1); + } + + if (*values == 0x1000000) { + if (getbit (&wps->wvxbits)) { + getbits (&temp, 23, &wps->wvxbits); + set_mantissa (outval, temp); + } + + set_exponent (outval, 255); + } + else { + if (exp) + while (!(*values & 0x800000) && --exp) { + shift_count++; + *values <<= 1; + } + + if (shift_count) { + if ((wps->float_flags & FLOAT_SHIFT_ONES) || + ((wps->float_flags & FLOAT_SHIFT_SAME) && getbit (&wps->wvxbits))) + *values |= ((1 << shift_count) - 1); + else if (wps->float_flags & FLOAT_SHIFT_SENT) { + getbits (&temp, shift_count, &wps->wvxbits); + *values |= temp & ((1 << shift_count) - 1); + } + } + + set_mantissa (outval, *values); + set_exponent (outval, exp); + } + } + + crc = crc * 27 + get_mantissa (outval) * 9 + get_exponent (outval) * 3 + get_sign (outval); + * (f32 *) values++ = outval; + } + + wps->crc_x = crc; +} + +static void float_values_nowvx (WavpackStream *wps, int32_t *values, int32_t num_values) +{ + while (num_values--) { + int shift_count = 0, exp = wps->float_max_exp; + f32 outval = 0; + + if (*values) { + *values <<= wps->float_shift; + + if (*values < 0) { + *values = -*values; + set_sign (outval, 1); + } + + if (*values >= 0x1000000) { + while (*values & 0xf000000) { + *values >>= 1; + ++exp; + } + } + else if (exp) { + while (!(*values & 0x800000) && --exp) { + shift_count++; + *values <<= 1; + } + + if (shift_count && (wps->float_flags & FLOAT_SHIFT_ONES)) + *values |= ((1 << shift_count) - 1); + } + + set_mantissa (outval, *values); + set_exponent (outval, exp); + } + + * (f32 *) values++ = outval; + } +} diff --git a/third_party/wavpack/src/unpack_seek.c b/third_party/wavpack/src/unpack_seek.c new file mode 100644 index 0000000..f3ab081 --- /dev/null +++ b/third_party/wavpack/src/unpack_seek.c @@ -0,0 +1,375 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// unpack_seek.c + +// This module provides the high-level API for unpacking audio data from +// a specific sample index (i.e., seeking). + +#ifndef NO_SEEKING + +#include +#include + +#include "wavpack_local.h" + +///////////////////////////// executable code //////////////////////////////// + +static int64_t find_sample (WavpackContext *wpc, void *infile, int64_t header_pos, int64_t sample); + +// Seek to the specifed sample index, returning TRUE on success. Note that +// files generated with version 4.0 or newer will seek almost immediately. +// Older files can take quite long if required to seek through unplayed +// portions of the file, but will create a seek map so that reverse seeks +// (or forward seeks to already scanned areas) will be very fast. After a +// FALSE return the file should not be accessed again (other than to close +// it); this is a fatal error. + +int WavpackSeekSample (WavpackContext *wpc, uint32_t sample) +{ + return WavpackSeekSample64 (wpc, sample); +} + +int WavpackSeekSample64 (WavpackContext *wpc, int64_t sample) +{ + WavpackStream *wps = wpc->streams ? wpc->streams [wpc->current_stream = 0] : NULL; + uint32_t bcount, samples_to_skip, samples_to_decode = 0; + int32_t *buffer; + + if (wpc->total_samples == -1 || sample >= wpc->total_samples || + !wpc->reader->can_seek (wpc->wv_in) || (wpc->open_flags & OPEN_STREAMING) || + (wpc->wvc_flag && !wpc->reader->can_seek (wpc->wvc_in))) + return FALSE; + +#ifdef ENABLE_LEGACY + if (wpc->stream3) + return seek_sample3 (wpc, (uint32_t) sample); +#endif + +#ifdef ENABLE_DSD + if (wpc->decimation_context) { // the decimation code needs some context to be sample accurate + if (sample < 16) { + samples_to_decode = (uint32_t) sample; + sample = 0; + } + else { + samples_to_decode = 16; + sample -= 16; + } + } +#endif + + if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) || sample < GET_BLOCK_INDEX (wps->wphdr) || + sample >= GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples) { + + free_streams (wpc); + wpc->filepos = find_sample (wpc, wpc->wv_in, wpc->filepos, sample); + + if (wpc->filepos == -1) + return FALSE; + + if (wpc->wvc_flag) { + wpc->file2pos = find_sample (wpc, wpc->wvc_in, 0, sample); + + if (wpc->file2pos == -1) + return FALSE; + } + } + + if (!wps->blockbuff) { + wpc->reader->set_pos_abs (wpc->wv_in, wpc->filepos); + wpc->reader->read_bytes (wpc->wv_in, &wps->wphdr, sizeof (WavpackHeader)); + WavpackLittleEndianToNative (&wps->wphdr, WavpackHeaderFormat); + wps->blockbuff = (unsigned char *)malloc (wps->wphdr.ckSize + 8); + memcpy (wps->blockbuff, &wps->wphdr, sizeof (WavpackHeader)); + + if (wpc->reader->read_bytes (wpc->wv_in, wps->blockbuff + sizeof (WavpackHeader), wps->wphdr.ckSize - 24) != + wps->wphdr.ckSize - 24) { + free_streams (wpc); + return FALSE; + } + + // render corrupt blocks harmless + if (!WavpackVerifySingleBlock (wps->blockbuff, !(wpc->open_flags & OPEN_NO_CHECKSUM))) { + wps->wphdr.ckSize = sizeof (WavpackHeader) - 8; + wps->wphdr.block_samples = 0; + memcpy (wps->blockbuff, &wps->wphdr, 32); + } + + SET_BLOCK_INDEX (wps->wphdr, GET_BLOCK_INDEX (wps->wphdr) - wpc->initial_index); + memcpy (wps->blockbuff, &wps->wphdr, sizeof (WavpackHeader)); + wps->init_done = FALSE; + + if (wpc->wvc_flag) { + wpc->reader->set_pos_abs (wpc->wvc_in, wpc->file2pos); + wpc->reader->read_bytes (wpc->wvc_in, &wps->wphdr, sizeof (WavpackHeader)); + WavpackLittleEndianToNative (&wps->wphdr, WavpackHeaderFormat); + wps->block2buff = (unsigned char *)malloc (wps->wphdr.ckSize + 8); + memcpy (wps->block2buff, &wps->wphdr, sizeof (WavpackHeader)); + + if (wpc->reader->read_bytes (wpc->wvc_in, wps->block2buff + sizeof (WavpackHeader), wps->wphdr.ckSize - 24) != + wps->wphdr.ckSize - 24) { + free_streams (wpc); + return FALSE; + } + + // render corrupt blocks harmless + if (!WavpackVerifySingleBlock (wps->block2buff, !(wpc->open_flags & OPEN_NO_CHECKSUM))) { + wps->wphdr.ckSize = sizeof (WavpackHeader) - 8; + wps->wphdr.block_samples = 0; + memcpy (wps->block2buff, &wps->wphdr, 32); + } + + SET_BLOCK_INDEX (wps->wphdr, GET_BLOCK_INDEX (wps->wphdr) - wpc->initial_index); + memcpy (wps->block2buff, &wps->wphdr, sizeof (WavpackHeader)); + } + + if (!wps->init_done && !unpack_init (wpc)) { + free_streams (wpc); + return FALSE; + } + + wps->init_done = TRUE; + } + + while (!wpc->reduced_channels && !(wps->wphdr.flags & FINAL_BLOCK)) { + if (++wpc->current_stream == wpc->num_streams) { + + if (wpc->num_streams == wpc->max_streams) { + free_streams (wpc); + return FALSE; + } + + wpc->streams = (WavpackStream **)realloc (wpc->streams, (wpc->num_streams + 1) * sizeof (wpc->streams [0])); + wps = wpc->streams [wpc->num_streams++] = (WavpackStream *)malloc (sizeof (WavpackStream)); + CLEAR (*wps); + bcount = read_next_header (wpc->reader, wpc->wv_in, &wps->wphdr); + + if (bcount == (uint32_t) -1) { + free_streams (wpc); + return FALSE; + } + + wps->blockbuff = (unsigned char *)malloc (wps->wphdr.ckSize + 8); + memcpy (wps->blockbuff, &wps->wphdr, 32); + + if (wpc->reader->read_bytes (wpc->wv_in, wps->blockbuff + 32, wps->wphdr.ckSize - 24) != + wps->wphdr.ckSize - 24) { + free_streams (wpc); + return FALSE; + } + + // render corrupt blocks harmless + if (!WavpackVerifySingleBlock (wps->blockbuff, !(wpc->open_flags & OPEN_NO_CHECKSUM))) { + wps->wphdr.ckSize = sizeof (WavpackHeader) - 8; + wps->wphdr.block_samples = 0; + memcpy (wps->blockbuff, &wps->wphdr, 32); + } + + wps->init_done = FALSE; + + if (wpc->wvc_flag && !read_wvc_block (wpc)) { + free_streams (wpc); + return FALSE; + } + + if (!wps->init_done && !unpack_init (wpc)) { + free_streams (wpc); + return FALSE; + } + + wps->init_done = TRUE; + } + else + wps = wpc->streams [wpc->current_stream]; + } + + if (sample < wps->sample_index) { + for (wpc->current_stream = 0; wpc->current_stream < wpc->num_streams; wpc->current_stream++) + if (!unpack_init (wpc)) + return FALSE; + else + wpc->streams [wpc->current_stream]->init_done = TRUE; + } + + samples_to_skip = (uint32_t) (sample - wps->sample_index); + + if (samples_to_skip > 131072) { + free_streams (wpc); + return FALSE; + } + + if (samples_to_skip) { + buffer = (int32_t *)malloc (samples_to_skip * 8); + + for (wpc->current_stream = 0; wpc->current_stream < wpc->num_streams; wpc->current_stream++) +#ifdef ENABLE_DSD + if (wpc->streams [wpc->current_stream]->wphdr.flags & DSD_FLAG) + unpack_dsd_samples (wpc, buffer, samples_to_skip); + else +#endif + unpack_samples (wpc, buffer, samples_to_skip); + + free (buffer); + } + + wpc->current_stream = 0; + +#ifdef ENABLE_DSD + if (wpc->decimation_context) + decimate_dsd_reset (wpc->decimation_context); + + if (samples_to_decode) { + buffer = (int32_t *)malloc (samples_to_decode * wpc->config.num_channels * 4); + + if (buffer) { + WavpackUnpackSamples (wpc, buffer, samples_to_decode); + free (buffer); + } + } +#endif + + return TRUE; +} + +// Find a valid WavPack header, searching either from the current file position +// (or from the specified position if not -1) and store it (endian corrected) +// at the specified pointer. The return value is the exact file position of the +// header, although we may have actually read past it. Because this function +// is used for seeking to a specific audio sample, it only considers blocks +// that contain audio samples for the initial stream to be valid. + +#define BUFSIZE 4096 + +static int64_t find_header (WavpackStreamReader64 *reader, void *id, int64_t filepos, WavpackHeader *wphdr) +{ + unsigned char *buffer = (unsigned char *)malloc (BUFSIZE), *sp = buffer, *ep = buffer; + + if (filepos != (uint32_t) -1 && reader->set_pos_abs (id, filepos)) { + free (buffer); + return -1; + } + + while (1) { + int bleft; + + if (sp < ep) { + bleft = (int)(ep - sp); + memcpy (buffer, sp, bleft); + ep -= (sp - buffer); + sp = buffer; + } + else { + if (sp > ep) + if (reader->set_pos_rel (id, (int32_t)(sp - ep), SEEK_CUR)) { + free (buffer); + return -1; + } + + sp = ep = buffer; + bleft = 0; + } + + ep += reader->read_bytes (id, ep, BUFSIZE - bleft); + + if (ep - sp < 32) { + free (buffer); + return -1; + } + + while (sp + 32 <= ep) + if (*sp++ == 'w' && *sp == 'v' && *++sp == 'p' && *++sp == 'k' && + !(*++sp & 1) && sp [2] < 16 && !sp [3] && (sp [2] || sp [1] || *sp >= 24) && sp [5] == 4 && + sp [4] >= (MIN_STREAM_VERS & 0xff) && sp [4] <= (MAX_STREAM_VERS & 0xff) && sp [18] < 3 && !sp [19]) { + memcpy (wphdr, sp - 4, sizeof (*wphdr)); + WavpackLittleEndianToNative (wphdr, WavpackHeaderFormat); + + if (wphdr->block_samples && (wphdr->flags & INITIAL_BLOCK)) { + free (buffer); + return reader->get_pos (id) - (ep - sp + 4); + } + + if (wphdr->ckSize > 1024) + sp += wphdr->ckSize - 1024; + } + } +} + +// Find the WavPack block that contains the specified sample. If "header_pos" +// is zero, then no information is assumed except the total number of samples +// in the file and its size in bytes. If "header_pos" is non-zero then we +// assume that it is the file position of the valid header image contained in +// the first stream and we can limit our search to either the portion above +// or below that point. If a .wvc file is being used, then this must be called +// for that file also. + +static int64_t find_sample (WavpackContext *wpc, void *infile, int64_t header_pos, int64_t sample) +{ + WavpackStream *wps = wpc->streams [wpc->current_stream]; + int64_t file_pos1 = 0, file_pos2 = wpc->reader->get_length (infile); + int64_t sample_pos1 = 0, sample_pos2 = wpc->total_samples; + double ratio = 0.96; + int file_skip = 0; + + if (sample >= wpc->total_samples) + return -1; + + if (header_pos && wps->wphdr.block_samples) { + if (GET_BLOCK_INDEX (wps->wphdr) > sample) { + sample_pos2 = GET_BLOCK_INDEX (wps->wphdr); + file_pos2 = header_pos; + } + else if (GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples <= sample) { + sample_pos1 = GET_BLOCK_INDEX (wps->wphdr); + file_pos1 = header_pos; + } + else + return header_pos; + } + + while (1) { + double bytes_per_sample; + int64_t seek_pos; + + bytes_per_sample = (double) file_pos2 - file_pos1; + bytes_per_sample /= sample_pos2 - sample_pos1; + seek_pos = file_pos1 + (file_skip ? 32 : 0); + seek_pos += (int64_t)(bytes_per_sample * (sample - sample_pos1) * ratio); + seek_pos = find_header (wpc->reader, infile, seek_pos, &wps->wphdr); + + if (seek_pos != (int64_t) -1) + SET_BLOCK_INDEX (wps->wphdr, GET_BLOCK_INDEX (wps->wphdr) - wpc->initial_index); + + if (seek_pos == (int64_t) -1 || seek_pos >= file_pos2) { + if (ratio > 0.0) { + if ((ratio -= 0.24) < 0.0) + ratio = 0.0; + } + else + return -1; + } + else if (GET_BLOCK_INDEX (wps->wphdr) > sample) { + sample_pos2 = GET_BLOCK_INDEX (wps->wphdr); + file_pos2 = seek_pos; + } + else if (GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples <= sample) { + + if (seek_pos == file_pos1) + file_skip = 1; + else { + sample_pos1 = GET_BLOCK_INDEX (wps->wphdr); + file_pos1 = seek_pos; + } + } + else + return seek_pos; + } +} + +#endif + diff --git a/third_party/wavpack/src/unpack_utils.c b/third_party/wavpack/src/unpack_utils.c new file mode 100644 index 0000000..ce7d7da --- /dev/null +++ b/third_party/wavpack/src/unpack_utils.c @@ -0,0 +1,411 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// unpack_utils.c + +// This module provides the high-level API for unpacking audio data from +// WavPack files. It manages the buffers used to interleave the data passed +// back to the application from the individual streams. The actual audio +// stream decompression is handled in the unpack.c module. + +#include +#include + +#include "wavpack_local.h" + +///////////////////////////// executable code //////////////////////////////// + +// Unpack the specified number of samples from the current file position. +// Note that "samples" here refers to "complete" samples, which would be +// 2 longs for stereo files or even more for multichannel files, so the +// required memory at "buffer" is 4 * samples * num_channels bytes. The +// audio data is returned right-justified in 32-bit longs in the endian +// mode native to the executing processor. So, if the original data was +// 16-bit, then the values returned would be +/-32k. Floating point data +// can also be returned if the source was floating point data (and this +// can be optionally normalized to +/-1.0 by using the appropriate flag +// in the call to WavpackOpenFileInput ()). The actual number of samples +// unpacked is returned, which should be equal to the number requested unless +// the end of fle is encountered or an error occurs. After all samples have +// been unpacked then 0 will be returned. + +uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples) +{ + WavpackStream *wps = wpc->streams ? wpc->streams [wpc->current_stream = 0] : NULL; + int num_channels = wpc->config.num_channels, file_done = FALSE; + uint32_t bcount, samples_unpacked = 0, samples_to_unpack; + int32_t *bptr = buffer; + +#ifdef ENABLE_LEGACY + if (wpc->stream3) + return unpack_samples3 (wpc, buffer, samples); +#endif + + while (samples) { + + // if the current block has no audio, or it's not the first block of a multichannel + // sequence, or the sample we're on is past the last sample in this block...we need + // to free up the streams and read the next block + + if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) || + wps->sample_index >= GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples) { + + int64_t nexthdrpos; + + if (wpc->wrapper_bytes >= MAX_WRAPPER_BYTES) + break; + + free_streams (wpc); + nexthdrpos = wpc->reader->get_pos (wpc->wv_in); + bcount = read_next_header (wpc->reader, wpc->wv_in, &wps->wphdr); + + if (bcount == (uint32_t) -1) + break; + + wpc->filepos = nexthdrpos + bcount; + + // allocate the memory for the entire raw block and read it in + + wps->blockbuff = (unsigned char *)malloc (wps->wphdr.ckSize + 8); + + if (!wps->blockbuff) + break; + + memcpy (wps->blockbuff, &wps->wphdr, 32); + + if (wpc->reader->read_bytes (wpc->wv_in, wps->blockbuff + 32, wps->wphdr.ckSize - 24) != + wps->wphdr.ckSize - 24) { + strcpy (wpc->error_message, "can't read all of last block!"); + wps->wphdr.block_samples = 0; + wps->wphdr.ckSize = 24; + break; + } + + // render corrupt blocks harmless + if (!WavpackVerifySingleBlock (wps->blockbuff, !(wpc->open_flags & OPEN_NO_CHECKSUM))) { + wps->wphdr.ckSize = sizeof (WavpackHeader) - 8; + wps->wphdr.block_samples = 0; + memcpy (wps->blockbuff, &wps->wphdr, 32); + } + + // potentially adjusting block_index must be done AFTER verifying block + + if (wpc->open_flags & OPEN_STREAMING) + SET_BLOCK_INDEX (wps->wphdr, wps->sample_index = 0); + else + SET_BLOCK_INDEX (wps->wphdr, GET_BLOCK_INDEX (wps->wphdr) - wpc->initial_index); + + memcpy (wps->blockbuff, &wps->wphdr, 32); + wps->init_done = FALSE; // we have not yet called unpack_init() for this block + + // if this block has audio, but not the sample index we were expecting, flag an error + + if (wps->wphdr.block_samples && wps->sample_index != GET_BLOCK_INDEX (wps->wphdr)) + wpc->crc_errors++; + + // if this block has audio, and we're in hybrid lossless mode, read the matching wvc block + + if (wps->wphdr.block_samples && wpc->wvc_flag) + read_wvc_block (wpc); + + // if the block does NOT have any audio, call unpack_init() to process non-audio stuff + + if (!wps->wphdr.block_samples) { + if (!wps->init_done && !unpack_init (wpc)) + wpc->crc_errors++; + + wps->init_done = TRUE; + } + } + + // if the current block has no audio, or it's not the first block of a multichannel + // sequence, or the sample we're on is past the last sample in this block...we need + // to loop back and read the next block + + if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) || + wps->sample_index >= GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples) + continue; + + // There seems to be some missing data, like a block was corrupted or something. + // If it's not too much data, just fill in with silence here and loop back. + + if (wps->sample_index < GET_BLOCK_INDEX (wps->wphdr)) { + int32_t zvalue = (wps->wphdr.flags & DSD_FLAG) ? 0x55 : 0; + + samples_to_unpack = (uint32_t) (GET_BLOCK_INDEX (wps->wphdr) - wps->sample_index); + + if (!samples_to_unpack || samples_to_unpack > 262144) { + strcpy (wpc->error_message, "discontinuity found, aborting file!"); + wps->wphdr.block_samples = 0; + wps->wphdr.ckSize = 24; + break; + } + + if (samples_to_unpack > samples) + samples_to_unpack = samples; + + wps->sample_index += samples_to_unpack; + samples_unpacked += samples_to_unpack; + samples -= samples_to_unpack; + + samples_to_unpack *= (wpc->reduced_channels ? wpc->reduced_channels : num_channels); + + while (samples_to_unpack--) + *bptr++ = zvalue; + + continue; + } + + // calculate number of samples to process from this block, then initialize the decoder for + // this block if we haven't already + + samples_to_unpack = (uint32_t) (GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples - wps->sample_index); + + if (samples_to_unpack > samples) + samples_to_unpack = samples; + + if (!wps->init_done && !unpack_init (wpc)) + wpc->crc_errors++; + + wps->init_done = TRUE; + + // if this block is not the final block of a multichannel sequence (and we're not truncating + // to stereo), then enter this conditional block...otherwise we just unpack the samples directly + + if (!wpc->reduced_channels && !(wps->wphdr.flags & FINAL_BLOCK)) { + int32_t *temp_buffer = (int32_t *)malloc (samples_to_unpack * 8), *src, *dst; + int offset = 0; // offset to next channel in sequence (0 to num_channels - 1) + uint32_t samcnt; + + // since we are getting samples from multiple bocks in a multichannel sequence, we must + // allocate a temporary buffer to unpack to so that we can re-interleave the samples + + if (!temp_buffer) + break; + + // loop through all the streams... + + while (1) { + + // if the stream has not been allocated and corresponding block read, do that here... + + if (wpc->current_stream == wpc->num_streams) { + wpc->streams = (WavpackStream **)realloc (wpc->streams, (wpc->num_streams + 1) * sizeof (wpc->streams [0])); + + if (!wpc->streams) + break; + + wps = wpc->streams [wpc->num_streams++] = (WavpackStream *)malloc (sizeof (WavpackStream)); + + if (!wps) + break; + + CLEAR (*wps); + bcount = read_next_header (wpc->reader, wpc->wv_in, &wps->wphdr); + + if (bcount == (uint32_t) -1) { + wpc->streams [0]->wphdr.block_samples = 0; + wpc->streams [0]->wphdr.ckSize = 24; + file_done = TRUE; + break; + } + + wps->blockbuff = (unsigned char *)malloc (wps->wphdr.ckSize + 8); + + if (!wps->blockbuff) + break; + + memcpy (wps->blockbuff, &wps->wphdr, 32); + + if (wpc->reader->read_bytes (wpc->wv_in, wps->blockbuff + 32, wps->wphdr.ckSize - 24) != + wps->wphdr.ckSize - 24) { + wpc->streams [0]->wphdr.block_samples = 0; + wpc->streams [0]->wphdr.ckSize = 24; + file_done = TRUE; + break; + } + + // render corrupt blocks harmless + if (!WavpackVerifySingleBlock (wps->blockbuff, !(wpc->open_flags & OPEN_NO_CHECKSUM))) { + wps->wphdr.ckSize = sizeof (WavpackHeader) - 8; + wps->wphdr.block_samples = 0; + memcpy (wps->blockbuff, &wps->wphdr, 32); + } + + // potentially adjusting block_index must be done AFTER verifying block + + if (wpc->open_flags & OPEN_STREAMING) + SET_BLOCK_INDEX (wps->wphdr, wps->sample_index = 0); + else + SET_BLOCK_INDEX (wps->wphdr, GET_BLOCK_INDEX (wps->wphdr) - wpc->initial_index); + + memcpy (wps->blockbuff, &wps->wphdr, 32); + + // if this block has audio, and we're in hybrid lossless mode, read the matching wvc block + + if (wpc->wvc_flag) + read_wvc_block (wpc); + + // initialize the unpacker for this block + + if (!unpack_init (wpc)) + wpc->crc_errors++; + + wps->init_done = TRUE; + } + else + wps = wpc->streams [wpc->current_stream]; + + // unpack the correct number of samples (either mono or stereo) into the temp buffer + +#ifdef ENABLE_DSD + if (wps->wphdr.flags & DSD_FLAG) + unpack_dsd_samples (wpc, src = temp_buffer, samples_to_unpack); + else +#endif + unpack_samples (wpc, src = temp_buffer, samples_to_unpack); + + samcnt = samples_to_unpack; + dst = bptr + offset; + + // if the block is mono, copy the samples from the single channel into the destination + // using num_channels as the stride + + if (wps->wphdr.flags & MONO_FLAG) { + while (samcnt--) { + dst [0] = *src++; + dst += num_channels; + } + + offset++; + } + + // if the block is stereo, and we don't have room for two more channels, just copy one + // and flag an error + + else if (offset == num_channels - 1) { + while (samcnt--) { + dst [0] = src [0]; + dst += num_channels; + src += 2; + } + + wpc->crc_errors++; + offset++; + } + + // otherwise copy the stereo samples into the destination + + else { + while (samcnt--) { + dst [0] = *src++; + dst [1] = *src++; + dst += num_channels; + } + + offset += 2; + } + + // check several clues that we're done with this set of blocks and exit if we are; else do next stream + + if ((wps->wphdr.flags & FINAL_BLOCK) || wpc->current_stream == wpc->max_streams - 1 || offset == num_channels) + break; + else + wpc->current_stream++; + } + + // if we didn't get all the channels we expected, mute the buffer and flag an error + + if (offset != num_channels) { + if (wps->wphdr.flags & DSD_FLAG) { + int samples_to_zero = samples_to_unpack * num_channels; + int32_t *zptr = bptr; + + while (samples_to_zero--) + *zptr++ = 0x55; + } + else + memset (bptr, 0, samples_to_unpack * num_channels * 4); + + wpc->crc_errors++; + } + + // go back to the first stream (we're going to leave them all loaded for now because they might have more samples) + // and free the temp buffer + + wps = wpc->streams [wpc->current_stream = 0]; + free (temp_buffer); + } + // catch the error situation where we have only one channel but run into a stereo block + // (this avoids overwriting the caller's buffer) + else if (!(wps->wphdr.flags & MONO_FLAG) && (num_channels == 1 || wpc->reduced_channels == 1)) { + memset (bptr, 0, samples_to_unpack * sizeof (*bptr)); + wps->sample_index += samples_to_unpack; + wpc->crc_errors++; + } +#ifdef ENABLE_DSD + else if (wps->wphdr.flags & DSD_FLAG) + unpack_dsd_samples (wpc, bptr, samples_to_unpack); +#endif + else + unpack_samples (wpc, bptr, samples_to_unpack); + + if (file_done) { + strcpy (wpc->error_message, "can't read all of last block!"); + break; + } + + if (wpc->reduced_channels) + bptr += samples_to_unpack * wpc->reduced_channels; + else + bptr += samples_to_unpack * num_channels; + + samples_unpacked += samples_to_unpack; + samples -= samples_to_unpack; + + // if we just finished a block, check for a calculated crc error + // (and back up the streams a little if possible in case we passed a header) + + if (wps->sample_index == GET_BLOCK_INDEX (wps->wphdr) + wps->wphdr.block_samples) { + if (check_crc_error (wpc)) { + int32_t *zptr = bptr, zvalue = (wps->wphdr.flags & DSD_FLAG) ? 0x55 : 0; + uint32_t samples_to_zero = wps->wphdr.block_samples; + + if (samples_to_zero > samples_to_unpack) + samples_to_zero = samples_to_unpack; + + samples_to_zero *= (wpc->reduced_channels ? wpc->reduced_channels : num_channels); + + while (samples_to_zero--) + *--zptr = zvalue; + + if (wps->blockbuff && wpc->reader->can_seek (wpc->wv_in)) { + int32_t rseek = ((WavpackHeader *) wps->blockbuff)->ckSize / 3; + wpc->reader->set_pos_rel (wpc->wv_in, (rseek > 16384) ? -16384 : -rseek, SEEK_CUR); + } + + if (wpc->wvc_flag && wps->block2buff && wpc->reader->can_seek (wpc->wvc_in)) { + int32_t rseek = ((WavpackHeader *) wps->block2buff)->ckSize / 3; + wpc->reader->set_pos_rel (wpc->wvc_in, (rseek > 16384) ? -16384 : -rseek, SEEK_CUR); + } + + wpc->crc_errors++; + } + } + + if (wpc->total_samples != -1 && wps->sample_index == wpc->total_samples) + break; + } + +#ifdef ENABLE_DSD + if (wpc->decimation_context) + decimate_dsd_run (wpc->decimation_context, buffer, samples_unpacked); +#endif + + return samples_unpacked; +} diff --git a/third_party/wavpack/src/unpack_x64.S b/third_party/wavpack/src/unpack_x64.S new file mode 100644 index 0000000..f9657cf --- /dev/null +++ b/third_party/wavpack/src/unpack_x64.S @@ -0,0 +1,957 @@ +############################################################################ +## **** WAVPACK **** ## +## Hybrid Lossless Wavefile Compressor ## +## Copyright (c) 1998 - 2015 Conifer Software. ## +## All Rights Reserved. ## +## Distributed under the BSD Software License (see license.txt) ## +############################################################################ + + .intel_syntax noprefix + .text + + .globl _unpack_decorr_stereo_pass_cont_x64win + .globl _unpack_decorr_mono_pass_cont_x64win + + .globl unpack_decorr_stereo_pass_cont_x64win + .globl unpack_decorr_mono_pass_cont_x64win + + .globl _unpack_decorr_stereo_pass_cont_x64 + .globl _unpack_decorr_mono_pass_cont_x64 + + .globl unpack_decorr_stereo_pass_cont_x64 + .globl unpack_decorr_mono_pass_cont_x64 + +# This is an assembly optimized version of the following WavPack function: +# +# void unpack_decorr_stereo_pass_cont (struct decorr_pass *dpp, +# int32_t *buffer, +# int32_t sample_count, +# int32_t long_math; +# +# It performs a single pass of stereo decorrelation on the provided buffer. +# Note that this version of the function requires that up to 8 previous +# stereo samples are visible and correct. In other words, it ignores the +# "samples_*" fields in the decorr_pass structure and gets the history data +# directly from the buffer. It does, however, return the appropriate history +# samples to the decorr_pass structure before returning. +# +# The "long_math" argument is used to specify that a 32-bit multiply is +# not enough for the "apply_weight" operation (although in this case it +# would only apply to the -1 and -2 terms because the MMX code does not have +# this limitation) but we ignore the parameter and use the overflow detection +# of the "imul" instruction to switch automatically to the "long_math" loop. +# +# This is written to work on an X86-64 processor (also called the AMD64) +# running in 64-bit mode and generally uses the MMX extensions to improve +# the performance by processing both stereo channels together. Unfortunately +# this is not easily used for terms -1 and -2, so these terms are handled +# sequentially with regular assembler code. +# +# This version has entry points for both the System V ABI and the Windows +# X64 ABI. It does not use the "red zone" or the "shadow area"; it saves the +# non-volatile registers for both ABIs on the stack and allocates another +# 8 bytes on the stack to store the dpp pointer. Note that it does NOT +# provide unwind data for the Windows ABI (the unpack_x64.asm module for +# MSVC does). The arguments are passed in registers: +# +# System V Windows +# rdi rcx struct decorr_pass *dpp +# rsi rdx int32_t *buffer +# edx r8 int32_t sample_count +# ecx r9 int32_t long_math +# +# registers after entry: +# +# rdi bptr +# rsi eptr +# +# stack usage: +# +# [rsp+0] = *dpp +# + +_unpack_decorr_stereo_pass_cont_x64win: +unpack_decorr_stereo_pass_cont_x64win: + push rbp + push rbx + push rdi + push rsi + sub rsp, 8 + mov rdi, rcx # copy params from win regs to Linux regs + mov rsi, rdx # so we can leave following code similar + mov rdx, r8 + mov rcx, r9 + jmp entry # jump into common portion + +_unpack_decorr_stereo_pass_cont_x64: +unpack_decorr_stereo_pass_cont_x64: + push rbp + push rbx + push rdi + push rsi + sub rsp, 8 + +entry: mov [rsp], rdi # store dpp* at [rsp] + and edx, edx # if sample_count is zero, do nothing + jz done + + mov rdi, rsi # rdi = bptr + lea rsi, [rdi+rdx*8] # rsi = eptr + + mov rax, [rsp] # get term from dpp struct & vector to handler + mov eax, [rax] + cmp al, 17 + je term_17_entry + cmp al, 18 + je term_18_entry + cmp al, -1 + je term_minus_1_entry + cmp al, -2 + je term_minus_2_entry + cmp al, -3 + je term_minus_3_entry + +# +# registers in default term loop: +# +# rbx term * -8 (for indexing correlation sample) +# rdi bptr +# rsi eptr +# +# mm0, mm1 scratch +# mm2 original sample values +# mm3 correlation sample +# mm4 zero (for pcmpeqd) +# mm5 weights +# mm6 delta +# mm7 512 (for rounding) +# + +default_term_entry: + imul rbx, rax, -8 # set RBX to term * -8 + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 # mm7 = round (512) + mov rdx, [rsp] # set RDX to *dpp + mov eax, [rdx+4] + movd mm6, eax + punpckldq mm6, mm6 # mm6 = delta (0-7) + mov eax, 0xFFFF # mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 # mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [rdx+8] # mm5 = weight_AB masked to 16 bits + pxor mm4, mm4 # mm4 = zero (for pcmpeqd) + jmp default_term_loop + + .balign 64 +default_term_loop: + movq mm3, [rdi+rbx] # mm3 = sam_AB + movq mm1, mm3 + movq mm0, mm3 + paddd mm1, mm1 + psrld mm0, 15 + psrlw mm1, 1 + pmaddwd mm0, mm5 + pmaddwd mm1, mm5 + movq mm2, [rdi] # mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + paddd mm0, mm2 + paddd mm0, mm1 # add shifted sums + movq [rdi], mm0 # store result + movq mm0, mm3 + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pcmpeqd mm2, mm4 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm4 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + cmp rdi, rsi # compare bptr and eptr to see if we're done + jb default_term_loop + + pslld mm5, 16 # sign-extend 16-bit weights back to dwords + psrad mm5, 16 + mov rdx, [rsp] # point to dpp + movq [rdx+8], mm5 # put weight_AB back + emms + + mov ecx, [rdx] # ecx = dpp->term + +default_store_samples: + dec ecx + sub rdi, 8 # back up one full sample + mov eax, [rdi+4] + mov [rdx+rcx*4+48], eax # store samples_B [ecx] + mov eax, [rdi] + mov [rdx+rcx*4+16], eax # store samples_A [ecx] + test ecx, ecx + jnz default_store_samples + jmp done + +# +# registers in term 17 & 18 loops: +# +# rdi bptr +# rsi eptr +# +# mm0, mm1 scratch +# mm2 original sample values +# mm3 correlation samples +# mm4 last calculated values (so we don't need to reload) +# mm5 weights +# mm6 delta +# mm7 512 (for rounding) +# + +term_17_entry: + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 # mm7 = round (512) + mov rdx, [rsp] # set RDX to *dpp + mov eax, [rdx+4] + movd mm6, eax + punpckldq mm6, mm6 # mm6 = delta (0-7) + mov eax, 0xFFFF # mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 # mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [rdx+8] # mm5 = weight_AB masked to 16 bits + movq mm4, [rdi-8] # preload last calculated values in mm4 + jmp term_17_loop + + .balign 64 +term_17_loop: + paddd mm4, mm4 + psubd mm4, [rdi-16] # mm3 = sam_AB + movq mm3, mm4 + movq mm1, mm3 + paddd mm1, mm1 + psrld mm4, 15 + psrlw mm1, 1 + pmaddwd mm4, mm5 + pmaddwd mm1, mm5 + movq mm2, [rdi] # mm2 = left_right + pslld mm4, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + paddd mm4, mm2 + paddd mm4, mm1 # add shifted sums + movq mm0, mm3 + movq [rdi], mm4 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + cmp rdi, rsi # compare bptr and eptr to see if we're done + jb term_17_loop + jmp term_1718_exit # terms 17 & 18 treat samples_AB[] the same + +term_18_entry: + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 # mm7 = round (512) + mov rdx, [rsp] # set RDX to *dpp + mov eax, [rdx+4] + movd mm6, eax + punpckldq mm6, mm6 # mm6 = delta (0-7) + mov eax, 0xFFFF # mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 # mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [rdx+8] # mm5 = weight_AB masked to 16 bits + movq mm4, [rdi-8] # preload last calculated values in mm4 + jmp term_18_loop + + .balign 64 +term_18_loop: + movq mm3, mm4 + psubd mm3, [rdi-16] + psrad mm3, 1 + paddd mm3, mm4 # mm3 = sam_AB + movq mm1, mm3 + movq mm4, mm3 + paddd mm1, mm1 + psrld mm4, 15 + psrlw mm1, 1 + pmaddwd mm4, mm5 + pmaddwd mm1, mm5 + movq mm2, [rdi] # mm2 = left_right + pslld mm4, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + paddd mm4, mm2 + paddd mm4, mm1 # add shifted sums + movq mm0, mm3 + movq [rdi], mm4 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + cmp rdi, rsi # compare bptr and eptr to see if we're done + jb term_18_loop + +term_1718_exit: + pslld mm5, 16 # sign-extend 16-bit weights back to dwords + psrad mm5, 16 + mov rdx, [rsp] # point to dpp + movq [rdx+8], mm5 # put weight_AB back + emms + + mov eax, [rdi-4] # dpp->samples_B [0] = bptr [-1]; + mov [rdx+48], eax + mov eax, [rdi-8] # dpp->samples_A [0] = bptr [-2]; + mov [rdx+16], eax + mov eax, [rdi-12] # dpp->samples_B [1] = bptr [-3]; + mov [rdx+52], eax + mov eax, [rdi-16] # dpp->samples_A [1] = bptr [-4]; + mov [rdx+20], eax + jmp done + +# +# registers in term -1 & -2 loops: +# +# eax,ebx,edx scratch +# ecx weight_A +# ebp weight_B +# rdi bptr +# rsi eptr +# r8d delta +# + +term_minus_1_entry: + cld + mov rdx, [rsp] # point to dpp + mov ecx, [rdx+8] # ecx = weight_A + mov ebp, [rdx+12] # ebp = weight_B + mov r8d, [rdx+4] # r8d = delta + mov eax, [rdi-4] + jmp term_minus_1_loop + + .balign 64 +term_minus_1_loop: + mov ebx, eax + imul eax, ecx + mov edx, [rdi] + jo OV11 + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + je L182 + test edx, edx + je L182 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L183 + mov ecx, edx +L183: xor ecx, ebx +L182: mov ebx, eax + imul eax, ebp + mov edx, [rdi] + jo OV12 + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + je L187 + test edx, edx + je L187 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, r8d + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L188 + mov ebp, edx +L188: xor ebp, ebx +L187: cmp rdi, rsi # compare bptr and eptr to see if we're done + jb term_minus_1_loop + jmp term_minus_1_done + +OV11: mov eax, ebx # restore previous sample into eax + jmp long_term_minus_1_loop + +OV12: mov eax, ebx # restore previous sample into eax + jmp L282 + + .balign 64 +long_term_minus_1_loop: + mov ebx, eax + imul ecx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [rdi] + add eax, edx + stosd + test ebx, ebx + je L282 + test edx, edx + je L282 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L283 + mov ecx, edx +L283: xor ecx, ebx +L282: mov ebx, eax + imul ebp + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [rdi] + add eax, edx + stosd + test ebx, ebx + je L287 + test edx, edx + je L287 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, r8d + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L288 + mov ebp, edx +L288: xor ebp, ebx +L287: cmp rdi, rsi # compare bptr and eptr to see if we're done + jb long_term_minus_1_loop + +term_minus_1_done: + mov rdx, [rsp] # point to dpp + mov [rdx+8], ecx # store weights back + mov [rdx+12], ebp + mov eax, [rdi-4] # dpp->samples_A [0] = bptr [-1]; + mov [rdx+16], eax + jmp done + +term_minus_2_entry: + mov rdx, [rsp] # point to dpp + mov ecx, [rdx+8] # ecx = weight_A + mov ebp, [rdx+12] # ebp = weight_B + mov r8d, [rdx+4] # r8d = delta + mov eax, [rdi-8] + jmp term_minus_2_loop + + .balign 64 +term_minus_2_loop: + mov ebx, eax + imul eax, ebp + mov edx, [rdi+4] + jo OV21 + sar eax, 10 + adc eax, edx + mov [rdi+4], eax + test ebx, ebx + je L194 + test edx, edx + je L194 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, r8d + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L195 + mov ebp, edx +L195: xor ebp, ebx +L194: mov ebx, eax + imul eax, ecx + mov edx, [rdi] + jo OV22 + sar eax, 10 + adc eax, edx + mov [rdi], eax + test ebx, ebx + je L199 + test edx, edx + je L199 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L200 + mov ecx, edx +L200: xor ecx, ebx +L199: add rdi, 8 + cmp rdi, rsi # compare bptr and eptr to see if we're done + jb term_minus_2_loop + jmp term_minus_2_done + +OV21: mov eax, ebx # restore previous sample into eax + jmp long_term_minus_2_loop + +OV22: mov eax, ebx # restore previous sample into eax + jmp L294 + + .balign 64 +long_term_minus_2_loop: + mov ebx, eax + imul ebp + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [rdi+4] + add eax, edx + mov [rdi+4], eax + test ebx, ebx + je L294 + test edx, edx + je L294 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, r8d + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L295 + mov ebp, edx +L295: xor ebp, ebx +L294: mov ebx, eax + imul ecx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [rdi] + add eax, edx + mov [rdi], eax + test ebx, ebx + je L299 + test edx, edx + je L299 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L300 + mov ecx, edx +L300: xor ecx, ebx +L299: add rdi, 8 + cmp rdi, rsi # compare bptr and eptr to see if we're done + jb long_term_minus_2_loop + +term_minus_2_done: + mov rdx, [rsp] # point to dpp + mov [rdx+8], ecx # store weights back + mov [rdx+12], ebp + mov eax, [rdi-8] # dpp->samples_B [0] = bptr [-2]; + mov [rdx+48], eax + jmp done + +# +# registers in term -3 loop: +# +# rdi bptr +# rsi eptr +# +# mm0, mm1 scratch +# mm2 original sample values +# mm3 correlation samples +# mm4 last calculated values (so we don't need to reload) +# mm5 weights +# mm6 delta +# mm7 512 (for rounding) +# + +term_minus_3_entry: + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 # mm7 = round (512) + mov rdx, [rsp] # set RDX to *dpp + mov eax, [rdx+4] + movd mm6, eax + punpckldq mm6, mm6 # mm6 = delta (0-7) + mov eax, 0xFFFF # mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 # mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [rdx+8] # mm5 = weight_AB masked to 16 bits + movq mm4, [rdi-8] + jmp term_minus_3_loop + + .balign 64 +term_minus_3_loop: + movq mm3, mm4 + psrlq mm3, 32 + punpckldq mm3, mm4 # mm3 = sam_AB + movq mm1, mm3 + movq mm4, mm3 + pslld mm1, 1 + psrld mm4, 15 + psrlw mm1, 1 + pmaddwd mm4, mm5 + pmaddwd mm1, mm5 + movq mm2, [rdi] # mm2 = left_right + pslld mm4, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + paddd mm4, mm2 + paddd mm4, mm1 # add shifted sums + movq [rdi], mm4 # store result + movq mm0, mm3 + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 # and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp rdi, rsi # compare bptr and eptr to see if we're done + jb term_minus_3_loop + + pslld mm5, 16 # sign-extend 16-bit weights back to dwords + psrad mm5, 16 + mov rdx, [rsp] # point to dpp + movq [rdx+8], mm5 # put weight_AB back + emms + + mov edx, [rdi-4] # dpp->samples_A [0] = bptr [-1]; + mov rax, [rsp] + mov [rax+16], edx + mov edx, [rdi-8] # dpp->samples_B [0] = bptr [-2]; + mov [rax+48], edx + +done: add rsp, 8 + pop rsi + pop rdi + pop rbx + pop rbp + ret + +####################################################################################################################### +# +# This is the mono version of the above function. It does not use MMX and does not handle negative terms. +# +# void unpack_decorr_mono_pass_cont (struct decorr_pass *dpp, +# int32_t *buffer, +# int32_t sample_count, +# int32_t long_math; +# arguments on entry: +# +# System V Windows +# rdi rcx struct decorr_pass *dpp +# rsi rdx int32_t *buffer +# edx r8 int32_t sample_count +# ecx r9 int32_t long_math +# +# registers after entry: +# +# rdi bptr +# rsi eptr +# +# stack usage: +# +# [rsp+0] = *dpp +# + +_unpack_decorr_mono_pass_cont_x64win: +unpack_decorr_mono_pass_cont_x64win: + push rbp + push rbx + push rdi + push rsi + sub rsp, 8 + + mov rdi, rcx # copy params from win regs to Linux regs + mov rsi, rdx # so we can leave following code similar + mov rdx, r8 + mov rcx, r9 + jmp mentry # jump into common portion + +_unpack_decorr_mono_pass_cont_x64: +unpack_decorr_mono_pass_cont_x64: + push rbp + push rbx + push rdi + push rsi + sub rsp, 8 + +mentry: mov [rsp], rdi # store dpp* into [rsp] + and edx, edx # if sample_count is zero, do nothing + jz mono_done + + cld # we use stosd + mov rdi, rsi # rdi = bptr + lea rsi, [rdi+rdx*4] # rsi = eptr + + mov rax, [rsp] # get term from dpp struct & vector to handler + mov eax, [rax] + cmp al, 17 + je mono_17_entry + cmp al, 18 + je mono_18_entry + +# +# registers during default term processing loop: +# rdi active buffer pointer +# rsi end of buffer pointer +# r8d delta +# ecx weight_A +# ebx term * -4 +# eax,edx scratch +# + +default_mono_entry: + imul rbx, rax, -4 # set rbx to term * -4 for decorrelation index + mov rdx, [rsp] + mov ecx, [rdx+8] # ecx = weight, r8d = delta + mov r8d, [rdx+4] + jmp default_mono_loop + +# +# registers during processing loop for terms 17 & 18: +# rdi active buffer pointer +# rsi end of buffer pointer +# r8d delta +# ecx weight_A +# ebp previously calculated value +# ebx calculated correlation sample +# eax,edx scratch +# + +mono_17_entry: + mov rdx, [rsp] # rdx = dpp* + mov ecx, [rdx+8] # ecx = weight, r8d = delta + mov r8d, [rdx+4] + mov ebp, [rdi-4] + jmp mono_17_loop + +mono_18_entry: + mov rdx, [rsp] # rdx = dpp* + mov ecx, [rdx+8] # ecx = weight, r8d = delta + mov r8d, [rdx+4] + mov ebp, [rdi-4] + jmp mono_18_loop + + .balign 64 +default_mono_loop: + mov eax, [rdi+rbx] + imul eax, ecx + mov edx, [rdi] + jo long_default_mono_loop + sar eax, 10 + adc eax, edx + mov [rdi], eax + mov eax, [rdi+rbx] + add rdi, 4 + test edx, edx + je L100 + test eax, eax + je L100 + xor eax, edx + cdq + xor ecx, edx + add ecx, r8d + xor ecx, edx +L100: cmp rdi, rsi # compare bptr and eptr to see if we're done + jb default_mono_loop + jmp default_mono_done + + .balign 64 +long_default_mono_loop: + mov eax, [rdi+rbx] + imul ecx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [rdi] + add eax, edx + mov [rdi], eax + mov eax, [rdi+rbx] + add rdi, 4 + test edx, edx + je L101 + test eax, eax + je L101 + xor eax, edx + cdq + xor ecx, edx + add ecx, r8d + xor ecx, edx +L101: cmp rdi, rsi # compare bptr and eptr to see if we're done + jb long_default_mono_loop + +default_mono_done: + mov rdx, [rsp] # edx = dpp* + mov [rdx+8], ecx # store weight_A back + mov ecx, [rdx] # ecx = dpp->term + +default_mono_store_samples: + dec ecx + sub rdi, 4 # back up one full sample + mov eax, [rdi] + mov [rdx+rcx*4+16], eax # store samples_A [ecx] + test ecx, ecx + jnz default_mono_store_samples + jmp mono_done + + .balign 64 +mono_17_loop: + lea ebx, [ebp+ebp] + sub ebx, [rdi-8] + mov eax, ecx + imul eax, ebx + mov edx, [rdi] + jo long_mono_17_loop + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L117 + test edx, edx + je L117 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + xor ecx, ebx +L117: cmp rdi, rsi # compare bptr and eptr to see if we're done + jb mono_17_loop + jmp mono_1718_exit + + .balign 64 +long_mono_17_loop: + lea ebx, [ebp+ebp] + sub ebx, [rdi-8] + mov eax, ecx + imul ebx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [rdi] + add eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L217 + test edx, edx + je L217 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + xor ecx, ebx +L217: cmp rdi, rsi # compare bptr and eptr to see if we're done + jb long_mono_17_loop + jmp mono_1718_exit + + .balign 64 +mono_18_loop: + lea ebx, [ebp+ebp*2] + sub ebx, [rdi-8] + sar ebx, 1 + mov eax, ecx + imul eax, ebx + mov edx, [rdi] + jo long_mono_18_loop + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L118 + test edx, edx + je L118 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + xor ecx, ebx +L118: cmp rdi, rsi # compare bptr and eptr to see if we're done + jb mono_18_loop + jmp mono_1718_exit + + .balign 64 +long_mono_18_loop: + lea ebx, [ebp+ebp*2] + sub ebx, [rdi-8] + sar ebx, 1 + mov eax, ecx + imul ebx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [rdi] + add eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L218 + test edx, edx + je L218 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + xor ecx, ebx +L218: cmp rdi, rsi # compare bptr and eptr to see if we're done + jb long_mono_18_loop + +mono_1718_exit: + mov rdx, [rsp] # edx = dpp* + mov [rdx+8], ecx # store weight_A back + mov eax, [rdi-4] # dpp->samples_A [0] = bptr [-1]; + mov [rdx+16], eax + mov eax, [rdi-8] # dpp->samples_A [1] = bptr [-2]; + mov [rdx+20], eax + +mono_done: + add rsp, 8 + pop rsi + pop rdi + pop rbx + pop rbp + ret + +#ifdef __ELF__ + .section .note.GNU-stack,"",@progbits +#endif + diff --git a/third_party/wavpack/src/unpack_x64.asm b/third_party/wavpack/src/unpack_x64.asm new file mode 100644 index 0000000..a4df18a --- /dev/null +++ b/third_party/wavpack/src/unpack_x64.asm @@ -0,0 +1,930 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; **** WAVPACK **** ;; +;; Hybrid Lossless Wavefile Compressor ;; +;; Copyright (c) 1998 - 2015 Conifer Software. ;; +;; All Rights Reserved. ;; +;; Distributed under the BSD Software License (see license.txt) ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + include + +asmcode segment page 'CODE' + +; This is an assembly optimized version of the following WavPack function: +; +; void unpack_decorr_stereo_pass_cont (struct decorr_pass *dpp, +; int32_t *buffer, +; int32_t sample_count, +; int32_t long_math; +; +; It performs a single pass of stereo decorrelation on the provided buffer. +; Note that this version of the function requires that up to 8 previous +; stereo samples are visible and correct. In other words, it ignores the +; "samples_*" fields in the decorr_pass structure and gets the history data +; directly from the buffer. It does, however, return the appropriate history +; samples to the decorr_pass structure before returning. +; +; The "long_math" argument is used to specify that a 32-bit multiply is +; not enough for the "apply_weight" operation (although in this case it +; would only apply to the -1 and -2 terms because the MMX code does not have +; this limitation) but we ignore the parameter and use the overflow detection +; of the "imul" instruction to switch automatically to the "long_math" loop. +; +; This is written to work on an X86-64 processor (also called the AMD64) +; running in 64-bit mode and generally uses the MMX extensions to improve +; the performance by processing both stereo channels together. Unfortunately +; this is not easily used for terms -1 and -2, so these terms are handled +; sequentially with regular assembler code. +; +; This version is for 64-bit Windows. The arguments are passed in registers: +; +; rcx struct decorr_pass *dpp +; rdx int32_t *buffer +; r8d int32_t sample_count +; r9d int32_t long_math +; +; registers after entry: +; +; rdi bptr +; rsi eptr +; ecx long_math (only used for terms -1 and -2) +; +; stack usage: +; +; [rsp+0] = *dpp +; + +unpack_decorr_stereo_pass_cont_x64win proc public frame + push_reg rbp ; save non-volatile registers on stack + push_reg rbx ; (alphabetically) + push_reg rdi + push_reg rsi + alloc_stack 8 ; allocate 8 bytes on stack & align to 16 bytes + end_prologue + + mov [rsp], rcx ; [rsp] = *dpp + mov rdi, rcx ; copy params from win regs to Linux regs + mov rsi, rdx ; so we can leave following code similar + mov rdx, r8 + mov rcx, r9 + + and edx, edx ; if sample_count is zero, do nothing + jz done + + mov rdi, rsi ; rdi = bptr + lea rsi, [rdi+rdx*8] ; rsi = eptr + + mov rax, [rsp] ; get term from dpp struct & vector to handler + mov eax, [rax] + cmp al, 17 + je term_17_entry + cmp al, 18 + je term_18_entry + cmp al, -1 + je term_minus_1_entry + cmp al, -2 + je term_minus_2_entry + cmp al, -3 + je term_minus_3_entry + +; +; registers in default term loop: +; +; rbx term * -8 (for indexing correlation sample) +; rdi bptr +; rsi eptr +; +; mm0, mm1 scratch +; mm2 original sample values +; mm3 correlation sample +; mm4 zero (for pcmpeqd) +; mm5 weights +; mm6 delta +; mm7 512 (for rounding) +; + +default_term_entry: + imul rbx, rax, -8 ; set RBX to term * -8 + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 ; mm7 = round (512) + mov rdx, [rsp] ; set RDX to *dpp + mov eax, [rdx+4] + movd mm6, eax + punpckldq mm6, mm6 ; mm6 = delta (0-7) + mov eax, 0FFFFh ; mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 ; mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [rdx+8] ; mm5 = weight_AB masked to 16 bits + pxor mm4, mm4 ; mm4 = zero (for pcmpeqd) + jmp default_term_loop + + align 64 +default_term_loop: + movq mm3, [rdi+rbx] ; mm3 = sam_AB + movq mm1, mm3 + movq mm0, mm3 + paddd mm1, mm1 + psrld mm0, 15 + psrlw mm1, 1 + pmaddwd mm0, mm5 + pmaddwd mm1, mm5 + movq mm2, [rdi] ; mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + paddd mm0, mm2 + paddd mm0, mm1 ; add shifted sums + movq [rdi], mm0 ; store result + movq mm0, mm3 + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pcmpeqd mm2, mm4 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm4 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + cmp rdi, rsi ; compare bptr and eptr to see if we're done + jb default_term_loop + + pslld mm5, 16 ; sign-extend 16-bit weights back to dwords + psrad mm5, 16 + mov rdx, [rsp] ; point to dpp + movq [rdx+8], mm5 ; put weight_AB back + emms + + mov ecx, [rdx] ; ecx = dpp->term + +default_store_samples: + dec ecx + sub rdi, 8 ; back up one full sample + mov eax, [rdi+4] + mov [rdx+rcx*4+48], eax ; store samples_B [ecx] + mov eax, [rdi] + mov [rdx+rcx*4+16], eax ; store samples_A [ecx] + test ecx, ecx + jnz default_store_samples + jmp done + +; +; registers in term 17 & 18 loops: +; +; rdi bptr +; rsi eptr +; +; mm0, mm1 scratch +; mm2 original sample values +; mm3 correlation samples +; mm4 last calculated values (so we don't need to reload) +; mm5 weights +; mm6 delta +; mm7 512 (for rounding) +; + +term_17_entry: + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 ; mm7 = round (512) + mov rdx, [rsp] ; set RDX to *dpp + mov eax, [rdx+4] + movd mm6, eax + punpckldq mm6, mm6 ; mm6 = delta (0-7) + mov eax, 0FFFFh ; mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 ; mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [rdx+8] ; mm5 = weight_AB masked to 16 bits + movq mm4, [rdi-8] ; preload last calculated values in mm4 + jmp term_17_loop + + align 64 +term_17_loop: + paddd mm4, mm4 + psubd mm4, [rdi-16] ; mm3 = sam_AB + movq mm3, mm4 + movq mm1, mm3 + paddd mm1, mm1 + psrld mm4, 15 + psrlw mm1, 1 + pmaddwd mm4, mm5 + pmaddwd mm1, mm5 + movq mm2, [rdi] ; mm2 = left_right + pslld mm4, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + paddd mm4, mm2 + paddd mm4, mm1 ; add shifted sums + movq mm0, mm3 + movq [rdi], mm4 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + cmp rdi, rsi ; compare bptr and eptr to see if we're done + jb term_17_loop + jmp term_1718_exit ; terms 17 & 18 treat samples_AB[] the same + +term_18_entry: + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 ; mm7 = round (512) + mov rdx, [rsp] ; set RDX to *dpp + mov eax, [rdx+4] + movd mm6, eax + punpckldq mm6, mm6 ; mm6 = delta (0-7) + mov eax, 0FFFFh ; mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 ; mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [rdx+8] ; mm5 = weight_AB masked to 16 bits + movq mm4, [rdi-8] ; preload last calculated values in mm4 + jmp term_18_loop + + align 64 +term_18_loop: + movq mm3, mm4 + psubd mm3, [rdi-16] + psrad mm3, 1 + paddd mm3, mm4 ; mm3 = sam_AB + movq mm1, mm3 + movq mm4, mm3 + paddd mm1, mm1 + psrld mm4, 15 + psrlw mm1, 1 + pmaddwd mm4, mm5 + pmaddwd mm1, mm5 + movq mm2, [rdi] ; mm2 = left_right + pslld mm4, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + paddd mm4, mm2 + paddd mm4, mm1 ; add shifted sums + movq mm0, mm3 + movq [rdi], mm4 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + cmp rdi, rsi ; compare bptr and eptr to see if we're done + jb term_18_loop + +term_1718_exit: + pslld mm5, 16 ; sign-extend 16-bit weights back to dwords + psrad mm5, 16 + mov rdx, [rsp] ; point to dpp + movq [rdx+8], mm5 ; put weight_AB back + emms + + mov eax, [rdi-4] ; dpp->samples_B [0] = bptr [-1]; + mov [rdx+48], eax + mov eax, [rdi-8] ; dpp->samples_A [0] = bptr [-2]; + mov [rdx+16], eax + mov eax, [rdi-12] ; dpp->samples_B [1] = bptr [-3]; + mov [rdx+52], eax + mov eax, [rdi-16] ; dpp->samples_A [1] = bptr [-4]; + mov [rdx+20], eax + jmp done + +; +; registers in term -1 & -2 loops: +; +; eax,ebx,edx scratch +; ecx weight_A +; ebp weight_B +; rdi bptr +; rsi eptr +; r8d delta +; + +term_minus_1_entry: + cld + mov rdx, [rsp] ; point to dpp + mov ecx, [rdx+8] ; ecx = weight_A + mov ebp, [rdx+12] ; ebp = weight_B + mov r8d, [rdx+4] ; r8d = delta + mov eax, [rdi-4] + jmp term_minus_1_loop + + align 64 +term_minus_1_loop: + mov ebx, eax + imul eax, ecx + mov edx, [rdi] + jo OV11 + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + je L182 + test edx, edx + je L182 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L183 + mov ecx, edx +L183: xor ecx, ebx +L182: mov ebx, eax + imul eax, ebp + mov edx, [rdi] + jo OV12 + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + je L187 + test edx, edx + je L187 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, r8d + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L188 + mov ebp, edx +L188: xor ebp, ebx +L187: cmp rdi, rsi ; compare bptr and eptr to see if we're done + jb term_minus_1_loop + jmp term_minus_1_done + +OV11: mov eax, ebx ; restore previous sample into eax + jmp long_term_minus_1_loop + +OV12: mov eax, ebx ; restore previous sample into eax + jmp L282 + + align 64 +long_term_minus_1_loop: + mov ebx, eax + imul ecx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [rdi] + add eax, edx + stosd + test ebx, ebx + je L282 + test edx, edx + je L282 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L283 + mov ecx, edx +L283: xor ecx, ebx +L282: mov ebx, eax + imul ebp + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [rdi] + add eax, edx + stosd + test ebx, ebx + je L287 + test edx, edx + je L287 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, r8d + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L288 + mov ebp, edx +L288: xor ebp, ebx +L287: cmp rdi, rsi ; compare bptr and eptr to see if we're done + jb long_term_minus_1_loop + +term_minus_1_done: + mov rdx, [rsp] ; point to dpp + mov [rdx+8], ecx ; store weights back + mov [rdx+12], ebp + mov eax, [rdi-4] ; dpp->samples_A [0] = bptr [-1]; + mov [rdx+16], eax + jmp done + +term_minus_2_entry: + mov rdx, [rsp] ; point to dpp + mov ecx, [rdx+8] ; ecx = weight_A + mov ebp, [rdx+12] ; ebp = weight_B + mov r8d, [rdx+4] ; r8d = delta + mov eax, [rdi-8] + jmp term_minus_2_loop + + align 64 +term_minus_2_loop: + mov ebx, eax + imul eax, ebp + mov edx, [rdi+4] + jo OV21 + sar eax, 10 + adc eax, edx + mov [rdi+4], eax + test ebx, ebx + je L194 + test edx, edx + je L194 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, r8d + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L195 + mov ebp, edx +L195: xor ebp, ebx +L194: mov ebx, eax + imul eax, ecx + mov edx, [rdi] + jo OV22 + sar eax, 10 + adc eax, edx + mov [rdi], eax + test ebx, ebx + je L199 + test edx, edx + je L199 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L200 + mov ecx, edx +L200: xor ecx, ebx +L199: add rdi, 8 + cmp rdi, rsi ; compare bptr and eptr to see if we're done + jb term_minus_2_loop + jmp term_minus_2_done + +OV21: mov eax, ebx ; restore previous sample into eax + jmp long_term_minus_2_loop + +OV22: mov eax, ebx ; restore previous sample into eax + jmp L294 + + align 64 +long_term_minus_2_loop: + mov ebx, eax + imul ebp + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [rdi+4] + add eax, edx + mov [rdi+4], eax + test ebx, ebx + je L294 + test edx, edx + je L294 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, r8d + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L295 + mov ebp, edx +L295: xor ebp, ebx +L294: mov ebx, eax + imul ecx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [rdi] + add eax, edx + mov [rdi], eax + test ebx, ebx + je L299 + test edx, edx + je L299 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L300 + mov ecx, edx +L300: xor ecx, ebx +L299: add rdi, 8 + cmp rdi, rsi ; compare bptr and eptr to see if we're done + jb long_term_minus_2_loop + +term_minus_2_done: + mov rdx, [rsp] ; point to dpp + mov [rdx+8], ecx ; store weights back + mov [rdx+12], ebp + mov eax, [rdi-8] ; dpp->samples_B [0] = bptr [-2]; + mov [rdx+48], eax + jmp done + +; +; registers in term -3 loop: +; +; rdi bptr +; rsi eptr +; +; mm0, mm1 scratch +; mm2 original sample values +; mm3 correlation samples +; mm4 last calculated values (so we don't need to reload) +; mm5 weights +; mm6 delta +; mm7 512 (for rounding) +; + +term_minus_3_entry: + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 ; mm7 = round (512) + mov rdx, [rsp] ; set RDX to *dpp + mov eax, [rdx+4] + movd mm6, eax + punpckldq mm6, mm6 ; mm6 = delta (0-7) + mov eax, 0FFFFh ; mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 ; mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [rdx+8] ; mm5 = weight_AB masked to 16 bits + movq mm4, [rdi-8] + jmp term_minus_3_loop + + align 64 +term_minus_3_loop: + movq mm3, mm4 + psrlq mm3, 32 + punpckldq mm3, mm4 ; mm3 = sam_AB + movq mm1, mm3 + movq mm4, mm3 + pslld mm1, 1 + psrld mm4, 15 + psrlw mm1, 1 + pmaddwd mm4, mm5 + pmaddwd mm1, mm5 + movq mm2, [rdi] ; mm2 = left_right + pslld mm4, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + paddd mm4, mm2 + paddd mm4, mm1 ; add shifted sums + movq [rdi], mm4 ; store result + movq mm0, mm3 + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add rdi, 8 + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 ; and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp rdi, rsi ; compare bptr and eptr to see if we're done + jb term_minus_3_loop + + pslld mm5, 16 ; sign-extend 16-bit weights back to dwords + psrad mm5, 16 + mov rdx, [rsp] ; point to dpp + movq [rdx+8], mm5 ; put weight_AB back + emms + + mov edx, [rdi-4] ; dpp->samples_A [0] = bptr [-1]; + mov rax, [rsp] + mov [rax+16], edx + mov edx, [rdi-8] ; dpp->samples_B [0] = bptr [-2]; + mov [rax+48], edx + +done: add rsp, 8 ; begin epilog by deallocating stack + pop rsi ; restore non-volatile registers & return + pop rdi + pop rbx + pop rbp + ret + +unpack_decorr_stereo_pass_cont_x64win endp + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; This is the mono version of the above function. It does not use MMX and does not +; handle negative terms (since they don't apply to mono), but is otherwise similar. +; +; void unpack_decorr_mono_pass_cont (struct decorr_pass *dpp, +; int32_t *buffer, +; int32_t sample_count, +; int32_t long_math; +; arguments on entry: +; +; rcx struct decorr_pass *dpp +; rdx int32_t *buffer +; r8d int32_t sample_count +; r9d int32_t long_math +; +; registers after entry: +; +; rdi bptr +; rsi eptr +; ecx long_math +; +; stack usage: +; +; [rsp+0] = *dpp +; + +unpack_decorr_mono_pass_cont_x64win proc public frame + push_reg rbp ; save non-volatile registers on stack + push_reg rbx ; (alphabetically) + push_reg rdi + push_reg rsi + alloc_stack 8 ; allocate 8 bytes on stack & align to 16 bytes + end_prologue + + mov [rsp], rcx ; [rsp] = *dpp + mov rdi, rcx ; copy params from win regs to Linux regs + mov rsi, rdx ; so we can leave following code similar + mov rdx, r8 + mov rcx, r9 + + and edx, edx ; if sample_count is zero, do nothing + jz mono_done + + cld + mov rdi, rsi ; rdi = bptr + lea rsi, [rdi+rdx*4] ; rsi = eptr + + mov rax, [rsp] ; get term from dpp struct & vector to handler + mov eax, [rax] + cmp al, 17 + je mono_17_entry + cmp al, 18 + je mono_18_entry + +; +; registers during default term processing loop: +; rdi active buffer pointer +; rsi end of buffer pointer +; r8d delta +; ecx weight_A +; ebx term * -4 +; eax,edx scratch +; + +default_mono_entry: + imul rbx, rax, -4 ; set rbx to term * -4 for decorrelation index + mov rdx, [rsp] + mov ecx, [rdx+8] ; ecx = weight, r8d = delta + mov r8d, [rdx+4] + jmp default_mono_loop + +; +; registers during processing loop for terms 17 & 18: +; rdi active buffer pointer +; rsi end of buffer pointer +; r8d delta +; ecx weight_A +; ebp previously calculated value +; ebx calculated correlation sample +; eax,edx scratch +; + +mono_17_entry: + mov rdx, [rsp] ; rdx = dpp* + mov ecx, [rdx+8] ; ecx = weight, r8d = delta + mov r8d, [rdx+4] + mov ebp, [rdi-4] + jmp mono_17_loop + +mono_18_entry: + mov rdx, [rsp] ; rdx = dpp* + mov ecx, [rdx+8] ; ecx = weight, r8d = delta + mov r8d, [rdx+4] + mov ebp, [rdi-4] + jmp mono_18_loop + + align 64 +default_mono_loop: + mov eax, [rdi+rbx] + imul eax, ecx + mov edx, [rdi] + jo long_default_mono_loop + sar eax, 10 + adc eax, edx + mov [rdi], eax + mov eax, [rdi+rbx] + add rdi, 4 + test edx, edx + je L100 + test eax, eax + je L100 + xor eax, edx + cdq + xor ecx, edx + add ecx, r8d + xor ecx, edx +L100: cmp rdi, rsi ; compare bptr and eptr to see if we're done + jb default_mono_loop + jmp default_mono_done + + align 64 +long_default_mono_loop: + mov eax, [rdi+rbx] + imul ecx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [rdi] + add eax, edx + mov [rdi], eax + mov eax, [rdi+rbx] + add rdi, 4 + test edx, edx + je L101 + test eax, eax + je L101 + xor eax, edx + cdq + xor ecx, edx + add ecx, r8d + xor ecx, edx +L101: cmp rdi, rsi ; compare bptr and eptr to see if we're done + jb long_default_mono_loop + +default_mono_done: + mov rdx, [rsp] ; edx = dpp* + mov [rdx+8], ecx ; store weight_A back + mov ecx, [rdx] ; ecx = dpp->term + +default_mono_store_samples: + dec ecx + sub rdi, 4 ; back up one full sample + mov eax, [rdi] + mov [rdx+rcx*4+16], eax ; store samples_A [ecx] + test ecx, ecx + jnz default_mono_store_samples + jmp mono_done + + align 64 +mono_17_loop: + lea ebx, [ebp+ebp] + sub ebx, [rdi-8] + mov eax, ecx + imul eax, ebx + mov edx, [rdi] + jo long_mono_17_loop + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L117 + test edx, edx + je L117 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + xor ecx, ebx +L117: cmp rdi, rsi ; compare bptr and eptr to see if we're done + jb mono_17_loop + jmp mono_1718_exit + + align 64 +long_mono_17_loop: + lea ebx, [ebp+ebp] + sub ebx, [rdi-8] + mov eax, ecx + imul ebx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [rdi] + add eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L217 + test edx, edx + je L217 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + xor ecx, ebx +L217: cmp rdi, rsi ; compare bptr and eptr to see if we're done + jb long_mono_17_loop + jmp mono_1718_exit + + align 64 +mono_18_loop: + lea ebx, [ebp+ebp*2] + sub ebx, [rdi-8] + sar ebx, 1 + mov eax, ecx + imul eax, ebx + mov edx, [rdi] + jo long_mono_18_loop + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L118 + test edx, edx + je L118 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + xor ecx, ebx +L118: cmp rdi, rsi ; compare bptr and eptr to see if we're done + jb mono_18_loop + jmp mono_1718_exit + + align 64 +long_mono_18_loop: + lea ebx, [ebp+ebp*2] + sub ebx, [rdi-8] + sar ebx, 1 + mov eax, ecx + imul ebx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [rdi] + add eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L218 + test edx, edx + je L218 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, r8d + xor ecx, ebx +L218: cmp rdi, rsi ; compare bptr and eptr to see if we're done + jb long_mono_18_loop + +mono_1718_exit: + mov rdx, [rsp] ; edx = dpp* + mov [rdx+8], ecx ; store weight_A back + mov eax, [rdi-4] ; dpp->samples_A [0] = bptr [-1]; + mov [rdx+16], eax + mov eax, [rdi-8] ; dpp->samples_A [1] = bptr [-2]; + mov [rdx+20], eax + +mono_done: + add rsp, 8 ; begin epilog by deallocating stack + pop rsi ; restore non-volatile registers & return + pop rdi + pop rbx + pop rbp + ret + +unpack_decorr_mono_pass_cont_x64win endp + +asmcode ends + + end + + diff --git a/third_party/wavpack/src/unpack_x86.S b/third_party/wavpack/src/unpack_x86.S new file mode 100644 index 0000000..104515b --- /dev/null +++ b/third_party/wavpack/src/unpack_x86.S @@ -0,0 +1,970 @@ +############################################################################ +## **** WAVPACK **** ## +## Hybrid Lossless Wavefile Compressor ## +## Copyright (c) 1998 - 2015 Conifer Software. ## +## All Rights Reserved. ## +## Distributed under the BSD Software License (see license.txt) ## +############################################################################ + + .intel_syntax noprefix + .text + + .globl _unpack_decorr_stereo_pass_cont_x86 + .globl _unpack_decorr_mono_pass_cont_x86 + .globl _unpack_cpu_has_feature_x86 + + .globl unpack_decorr_stereo_pass_cont_x86 + .globl unpack_decorr_mono_pass_cont_x86 + .globl unpack_cpu_has_feature_x86 + + +# This module contains X86 assembly optimized versions of functions required +# to decode WavPack files. Note that the stereo versions of these functions +# use the MMX registers and instructions of the X86 processor, and so a +# helper function is provided to make a runtime check for that feature. + +# This is an assembly optimized version of the following WavPack function: +# +# void unpack_decorr_stereo_pass_cont (struct decorr_pass *dpp, +# int32_t *buffer, +# int32_t sample_count, +# int32_t long_math; +# +# It performs a single pass of stereo decorrelation on the provided buffer. +# Note that this version of the function requires that up to 8 previous +# stereo samples are visible and correct. In other words, it ignores the +# "samples_*" fields in the decorr_pass structure and gets the history data +# directly from the buffer. It does, however, return the appropriate history +# samples to the decorr_pass structure before returning. +# +# The "long_math" argument is used to specify that a 32-bit multiply is +# not enough for the "apply_weight" operation (although in this case it +# would only apply to the -1 and -2 terms because the MMX code does not have +# this limitation) but we ignore the parameter and use the overflow detection +# of the "imul" instruction to switch automatically to the "long_math" loop. +# +# This is written to work on an IA-32 processor and uses the MMX extensions +# to improve the performance by processing both stereo channels together. +# For terms -1 and -2 the MMX extensions are not usable, and so these are +# performed independently without them. +# +# arguments on entry: +# +# struct decorr_pass *dpp [ebp+8] +# int32_t *buffer [ebp+12] +# int32_t sample_count [ebp+16] +# int32_t long_math [ebp+20] +# +# registers after entry: +# +# rdi bptr +# rsi eptr +# +# on stack (used for terms -1 and -2 only): +# +# int32_t delta DWORD [esp] +# + +_unpack_decorr_stereo_pass_cont_x86: +unpack_decorr_stereo_pass_cont_x86: + push ebp + mov ebp, esp + push ebx + push esi + push edi + + mov edx, [ebp+8] # copy delta from dpp to top of stack + mov eax, [edx+4] + push eax + + mov edi, [ebp+12] # edi = buffer + mov eax, [ebp+16] # get sample_count and divide by 8 + shl eax, 3 + jz done # exit now if there's nothing to do + + add eax, edi # else add to buffer point to make eptr + mov esi, eax + + mov eax, [ebp+8] # get term from dpp and vector appropriately + mov eax, [eax] + cmp eax, 17 + je term_17_entry + cmp eax, 18 + je term_18_entry + cmp eax, -1 + je term_minus_1_entry + cmp eax, -2 + je term_minus_2_entry + cmp eax, -3 + je term_minus_3_entry + +# +# registers during default term processing loop: +# edi active buffer pointer +# esi end of buffer pointer +# +# MMX: +# mm0, mm1 scratch +# mm2 original sample values +# mm3 correlation samples +# mm4 zero (for pcmpeqd) +# mm5 weights +# mm6 delta +# mm7 512 (for rounding) +# + +default_term_entry: + imul ebx, eax, -8 # set ebx to term * -8 for decorrelation index + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 # mm7 = round (512) + mov edx, [ebp+8] # edx = *dpp + mov eax, [edx+4] + movd mm6, eax + punpckldq mm6, mm6 # mm6 = delta (0-7) + mov eax, 0xFFFF # mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 # mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [edx+8] # mm5 = weight_AB masked to 16 bits + pxor mm4, mm4 # mm4 = zero (for pcmpeqd) + jmp default_term_loop + + .balign 64 +default_term_loop: + movq mm3, [edi+ebx] # mm3 = sam_AB + movq mm1, mm3 + movq mm0, mm3 + paddd mm1, mm1 + psrld mm0, 15 + psrlw mm1, 1 + pmaddwd mm0, mm5 + pmaddwd mm1, mm5 + movq mm2, [edi] # mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + paddd mm0, mm2 + paddd mm0, mm1 # add shifted sums + movq [edi], mm0 # store result + movq mm0, mm3 + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pcmpeqd mm2, mm4 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm4 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + cmp edi, esi # compare bptr and eptr to see if we're done + jb default_term_loop + + pslld mm5, 16 # sign-extend 16-bit weights back to dwords + psrad mm5, 16 + mov eax, [ebp+8] # point to dpp + movq [eax+8], mm5 # put weight_AB back + emms + mov edx, [ebp+8] # access dpp with edx + mov ecx, [edx] # ecx = dpp->term + +default_store_samples: + dec ecx + sub edi, 8 # back up one full sample + mov eax, [edi+4] + mov [edx+ecx*4+48], eax # store samples_B [ecx] + mov eax, [edi] + mov [edx+ecx*4+16], eax # store samples_A [ecx] + test ecx, ecx + jnz default_store_samples + + jmp done + +# +# registers during processing loop for terms 17 & 18: +# edi active buffer pointer +# esi end of buffer pointer +# +# MMX: +# mm0, mm1 scratch +# mm2 original sample values +# mm3 calculated correlation samples +# mm4 last calculated values (so we don't need to reload) +# mm5 weights +# mm6 delta +# mm7 512 (for rounding) +# + +term_17_entry: + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 # mm7 = round (512) + mov edx, [ebp+8] # point to dpp & get delta + mov eax, [edx+4] + movd mm6, eax + punpckldq mm6, mm6 # mm6 = delta (0-7) + mov eax, 0xFFFF # mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 # mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [edx+8] # mm5 = weight_AB masked to 16 bits + movq mm4, [edi-8] # preload previous calculated values + jmp term_17_loop + + .balign 64 +term_17_loop: + paddd mm4, mm4 + psubd mm4, [edi-16] # mm3 = sam_AB + movq mm3, mm4 + movq mm1, mm3 + paddd mm1, mm1 + psrld mm4, 15 + psrlw mm1, 1 + pmaddwd mm4, mm5 + pmaddwd mm1, mm5 + movq mm2, [edi] # mm2 = left_right + pslld mm4, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + paddd mm4, mm2 + paddd mm4, mm1 # add shifted sums + movq mm0, mm3 + movq [edi], mm4 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + cmp edi, esi # compare bptr and eptr to see if we're done + jb term_17_loop + + pslld mm5, 16 # sign-extend 16-bit weights back to dwords + psrad mm5, 16 + mov eax, [ebp+8] # point to dpp + movq [eax+8], mm5 # put weight_AB back + emms + jmp term_1718_exit + +term_18_entry: + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 # mm7 = round (512) + mov edx, [ebp+8] # point to dpp & get delta + mov eax, [edx+4] + movd mm6, eax + punpckldq mm6, mm6 # mm6 = delta (0-7) + mov eax, 0xFFFF # mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 # mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [edx+8] # mm5 = weight_AB masked to 16 bits + movq mm4, [edi-8] # preload previous calculated value + jmp term_18_loop + + .balign 64 +term_18_loop: + movq mm3, mm4 + psubd mm3, [edi-16] + psrad mm3, 1 + paddd mm3, mm4 # mm3 = sam_AB + movq mm1, mm3 + movq mm4, mm3 + paddd mm1, mm1 + psrld mm4, 15 + psrlw mm1, 1 + pmaddwd mm4, mm5 + pmaddwd mm1, mm5 + movq mm2, [edi] # mm2 = left_right + pslld mm4, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + paddd mm4, mm2 + paddd mm4, mm1 # add shifted sums + movq mm0, mm3 + movq [edi], mm4 # store result + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 # and add to weight_AB + pxor mm5, mm0 + cmp edi, esi # compare bptr and eptr to see if we're done + jb term_18_loop + + pslld mm5, 16 # sign-extend 16-bit weights back to dwords + psrad mm5, 16 + mov eax, [ebp+8] # point to dpp + movq [eax+8], mm5 # put weight_AB back + emms + +term_1718_exit: + mov edx, [edi-4] # dpp->samples_B [0] = bptr [-1]; + mov eax, [ebp+8] + mov [eax+48], edx + mov edx, [edi-8] # dpp->samples_A [0] = bptr [-2]; + mov [eax+16], edx + mov edx, [edi-12] # dpp->samples_B [1] = bptr [-3]; + mov [eax+52], edx + mov edx, [edi-16] # dpp->samples_A [1] = bptr [-4]; + mov [eax+20], edx + jmp done + +# +# registers in term -1 & -2 loops: +# +# eax,ebx,edx scratch +# ecx weight_A +# ebp weight_B +# edi bptr +# esi eptr +# + +term_minus_1_entry: + cld # we use stosd here... + mov eax, [ebp+8] # point to dpp + mov ecx, [eax+8] # ecx = weight_A and ebp = weight_B + mov ebp, [eax+12] + mov eax, [edi-4] + jmp term_minus_1_loop + + .balign 64 +term_minus_1_loop: + mov ebx, eax + imul eax, ecx + mov edx, [edi] + jo OV11 + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + je L182 + test edx, edx + je L182 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, [esp] + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L183 + mov ecx, edx +L183: xor ecx, ebx +L182: mov ebx, eax + imul eax, ebp + mov edx, [edi] + jo OV12 + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + je L189 + test edx, edx + je L189 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, [esp] + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L188 + mov ebp, edx +L188: xor ebp, ebx +L189: cmp edi, esi # compare bptr and eptr to see if we're done + jb term_minus_1_loop + jmp term_minus_1_done + +OV11: mov eax, ebx # restore previous sample into eax + jmp long_term_minus_1_loop + +OV12: mov eax, ebx # restore previous sample into eax + jmp L282 + + .balign 64 +long_term_minus_1_loop: + mov ebx, eax + imul ecx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [edi] + add eax, edx + stosd + test ebx, ebx + je L282 + test edx, edx + je L282 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, [esp] + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L283 + mov ecx, edx +L283: xor ecx, ebx +L282: mov ebx, eax + imul ebp + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [edi] + add eax, edx + stosd + test ebx, ebx + je L289 + test edx, edx + je L289 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, [esp] + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L288 + mov ebp, edx +L288: xor ebp, ebx +L289: cmp edi, esi # compare bptr and eptr to see if we're done + jb long_term_minus_1_loop + +term_minus_1_done: + mov edx, ebp + mov ebp, esp # restore ebp (we've pushed 4 DWORDS) + add ebp, 16 + mov eax, [ebp+8] # point to dpp + mov [eax+8], ecx + mov [eax+12], edx + mov edx, [edi-4] # dpp->samples_A [0] = bptr [-1] + mov [eax+16], edx + jmp done + + +term_minus_2_entry: + mov eax, [ebp+8] # point to dpp + mov ecx, [eax+8] # ecx = weight_A and ebp = weight_B + mov ebp, [eax+12] + mov eax, [edi-8] + jmp term_minus_2_loop + + .balign 64 +term_minus_2_loop: + mov ebx, eax + imul eax, ebp + mov edx, [edi+4] + jo OV21 + sar eax, 10 + adc eax, edx + mov [edi+4], eax + test ebx, ebx + je L194 + test edx, edx + je L194 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, [esp] + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L195 + mov ebp, edx +L195: xor ebp, ebx +L194: mov ebx, eax + imul eax, ecx + mov edx, [edi] + jo OV22 + sar eax, 10 + adc eax, edx + mov [edi], eax + add edi, 8 + test ebx, ebx + je L201 + test edx, edx + je L201 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, [esp] + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L200 + mov ecx, edx +L200: xor ecx, ebx +L201: cmp edi, esi # compare bptr and eptr to see if we're done + jb term_minus_2_loop + jmp term_minus_2_done + +OV21: mov eax, ebx # restore previous sample into eax + jmp long_term_minus_2_loop + +OV22: mov eax, ebx # restore previous sample into eax + jmp L294 + + .balign 64 +long_term_minus_2_loop: + mov ebx, eax + imul ebp + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [edi+4] + add eax, edx + mov [edi+4], eax + test ebx, ebx + je L294 + test edx, edx + je L294 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, [esp] + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L295 + mov ebp, edx +L295: xor ebp, ebx +L294: mov ebx, eax + imul ecx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [edi] + add eax, edx + mov [edi], eax + add edi, 8 + test ebx, ebx + je L301 + test edx, edx + je L301 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, [esp] + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L300 + mov ecx, edx +L300: xor ecx, ebx +L301: cmp edi, esi # compare bptr and eptr to see if we're done + jb long_term_minus_2_loop + +term_minus_2_done: + mov edx, ebp + lea ebp, [esp+16] # restore ebp (we've pushed 4 DWORDS) + mov eax, [ebp+8] # point to dpp + mov [eax+8], ecx + mov [eax+12], edx + mov edx, [edi-8] # dpp->samples_B [0] = bptr [-2]; + mov [eax+48], edx + jmp done + +# +# registers during processing loop for term -3: +# edi active buffer pointer +# esi end of buffer pointer +# +# MMX: +# mm0, mm1 scratch +# mm2 original sample values +# mm3 calculated correlation samples +# mm4 last calculated values (so we don't need to reload) +# mm5 weights +# mm6 delta +# mm7 512 (for rounding) +# + +term_minus_3_entry: + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 # mm7 = round (512) + mov edx, [ebp+8] # point to dpp & get delta + mov eax, [edx+4] + movd mm6, eax + punpckldq mm6, mm6 # mm6 = delta (0-7) + mov eax, 0xFFFF # mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 # mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [edx+8] # mm5 = weight_AB masked to 16 bits + movq mm4, [edi-8] # preload previous calculated values + jmp term_minus_3_loop + + .balign 64 +term_minus_3_loop: + movq mm3, mm4 # mm3 = swap dwords (mm4) + psrlq mm3, 32 + punpckldq mm3, mm4 # mm3 = sam_AB + movq mm1, mm3 + movq mm4, mm3 + pslld mm1, 1 + psrld mm4, 15 + psrlw mm1, 1 + pmaddwd mm4, mm5 + pmaddwd mm1, mm5 + movq mm2, [edi] # mm2 = left_right + pslld mm4, 5 + paddd mm1, mm7 # add 512 for rounding + psrad mm1, 10 + paddd mm4, mm2 + paddd mm4, mm1 # add shifted sums + movq [edi], mm4 # store result + movq mm0, mm3 + pxor mm0, mm2 + psrad mm0, 31 # mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pxor mm1, mm1 # mm1 = zero + pcmpeqd mm2, mm1 # mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 # mm3 = 1s if sam_AB was zero + por mm2, mm3 # mm2 = 1s if either was zero + pandn mm2, mm6 # mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 # and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp edi, esi # compare bptr and eptr to see if we're done + jb term_minus_3_loop + + pslld mm5, 16 # sign-extend 16-bit weights back to dwords + psrad mm5, 16 + mov eax, [ebp+8] # point to dpp + movq [eax+8], mm5 # put weight_AB back + emms + mov edx, [edi-4] # dpp->samples_A [0] = bptr [-1]; + mov eax, [ebp+8] + mov [eax+16], edx + mov edx, [edi-8] # dpp->samples_B [0] = bptr [-2]; + mov [eax+48], edx + +done: pop eax # pop delta & saved regs + pop edi + pop esi + pop ebx + pop ebp + ret + +####################################################################################################################### +# +# This is the mono version of the above function. It does not use MMX and does not handle negative terms. +# +# void unpack_decorr_mono_pass_cont (struct decorr_pass *dpp, +# int32_t *buffer, +# int32_t sample_count, +# int32_t long_math; +# arguments on entry: +# +# struct decorr_pass *dpp [ebp+8] +# int32_t *buffer [ebp+12] +# int32_t sample_count [ebp+16] +# int32_t long_math [ebp+20] +# +# registers after entry: +# +# rdi bptr +# rsi eptr +# +# on stack: +# +# int16_t delta DWORD [esp] +# + +_unpack_decorr_mono_pass_cont_x86: +unpack_decorr_mono_pass_cont_x86: + push ebp + mov ebp, esp + push ebx + push esi + push edi + cld + + mov edx, [ebp+8] # copy delta from dpp to local stack + mov eax, [edx+4] + push eax + + mov edi, [ebp+12] # edi = buffer + mov eax, [ebp+16] # get sample_count and multiply by 4 + shl eax, 2 + jz mono_done # exit now if there's nothing to do + lea esi, [edi+eax] # else add to buffer point to make eptr + + mov eax, [ebp+8] # get term from dpp and vector appropriately + mov eax, [eax] + cmp eax, 17 + je mono_17_entry + cmp eax, 18 + je mono_18_entry + +# +# registers during default term processing loop: +# edi active buffer pointer +# esi end of buffer pointer +# ecx weight_A +# ebp free +# ebx term * -4 +# eax,edx scratch +# + +default_mono_entry: + imul ebx, eax, -4 # set ebx to term * -4 for decorrelation index + mov edx, [ebp+8] # edx = dpp* + mov ecx, [edx+8] # ecx = weight + jmp default_mono_loop + +# +# registers during processing loop for terms 17 & 18: +# edi active buffer pointer +# esi end of buffer pointer +# ecx weight_A +# ebp previously calculated value +# ebx calculated correlation sample +# eax,edx scratch +# + +mono_17_entry: + mov edx, [ebp+8] # edx = dpp* + mov ecx, [edx+8] # ecx = weight_A + mov ebp, [edi-4] + jmp mono_17_loop + +mono_18_entry: + mov edx, [ebp+8] # edx = dpp* + mov ecx, [edx+8] # ecx = weight_A + mov ebp, [edi-4] + jmp mono_18_loop + + .balign 64 +default_mono_loop: + mov eax, [edi+ebx] + imul eax, ecx + mov edx, [edi] + jo long_default_mono_loop + sar eax, 10 + adc eax, edx + mov [edi], eax + mov eax, [edi+ebx] + add edi, 4 + test edx, edx + je L100 + test eax, eax + je L100 + xor eax, edx + cdq + xor ecx, edx + add ecx, [esp] + xor ecx, edx +L100: cmp edi, esi # compare bptr and eptr to see if we're done + jb default_mono_loop + jmp default_mono_done + + .balign 64 +long_default_mono_loop: + mov eax, [edi+ebx] + imul ecx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [edi] + add eax, edx + mov [edi], eax + mov eax, [edi+ebx] + add edi, 4 + test edx, edx + je L101 + test eax, eax + je L101 + xor eax, edx + cdq + xor ecx, edx + add ecx, [esp] + xor ecx, edx +L101: cmp edi, esi # compare bptr and eptr to see if we're done + jb long_default_mono_loop + +default_mono_done: + mov edx, [ebp+8] # edx = dpp* + mov [edx+8], ecx # store weight_A back + mov ecx, [edx] # ecx = dpp->term + +default_mono_store_samples: + dec ecx + sub edi, 4 # back up one full sample + mov eax, [edi] + mov [edx+ecx*4+16], eax # store samples_A [ecx] + test ecx, ecx + jnz default_mono_store_samples + jmp mono_done + + .balign 64 +mono_17_loop: + lea ebx, [ebp+ebp] + sub ebx, [edi-8] + mov eax, ecx + imul eax, ebx + mov edx, [edi] + jo long_mono_17_loop + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L117 + test edx, edx + je L117 + mov eax, [esp] + xor ebx, edx + sar ebx, 31 + xor eax, ebx + sub eax, ebx + add ecx, eax +L117: cmp edi, esi # compare bptr and eptr to see if we're done + jb mono_17_loop + jmp mono_1718_exit + + .balign 64 +long_mono_17_loop: + lea ebx, [ebp+ebp] + sub ebx, [edi-8] + mov eax, ecx + imul ebx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [edi] + add eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L217 + test edx, edx + je L217 + mov eax, [esp] + xor ebx, edx + sar ebx, 31 + xor eax, ebx + sub eax, ebx + add ecx, eax +L217: cmp edi, esi # compare bptr and eptr to see if we're done + jb long_mono_17_loop + jmp mono_1718_exit + + .balign 64 +mono_18_loop: + lea ebx, [ebp+ebp*2] + sub ebx, [edi-8] + sar ebx, 1 + mov eax, ecx + imul eax, ebx + mov edx, [edi] + jo long_mono_18_loop + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L118 + test edx, edx + je L118 + mov eax, [esp] + xor ebx, edx + sar ebx, 31 + xor eax, ebx + sub eax, ebx + add ecx, eax +L118: cmp edi, esi # compare bptr and eptr to see if we're done + jb mono_18_loop + jmp mono_1718_exit + + .balign 64 +long_mono_18_loop: + lea ebx, [ebp+ebp*2] + sub ebx, [edi-8] + sar ebx, 1 + mov eax, ecx + imul ebx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [edi] + add eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L218 + test edx, edx + je L218 + mov eax, [esp] + xor ebx, edx + sar ebx, 31 + xor eax, ebx + sub eax, ebx + add ecx, eax +L218: cmp edi, esi # compare bptr and eptr to see if we're done + jb long_mono_18_loop + +mono_1718_exit: + lea ebp, [esp+16] # restore ebp (we've pushed 4 DWORDS) + mov edx, [ebp+8] # edx = dpp* + mov [edx+8], ecx # store weight_A back + mov eax, [edi-4] # dpp->samples_A [0] = bptr [-1]; + mov [edx+16], eax + mov eax, [edi-8] # dpp->samples_A [1] = bptr [-2]; + mov [edx+20], eax + +mono_done: + pop eax # pop delta & saved regs + pop edi + pop esi + pop ebx + pop ebp + ret + +# Helper function to determine if specified CPU feature is available (used here for MMX). +# Input parameter is index of feature to be checked (EDX from CPUID(1) only, MMX = 23). +# Return value is the specified bit (0 or 1) or 0 if CPUID is not supported. + +_unpack_cpu_has_feature_x86: +unpack_cpu_has_feature_x86: + pushfd # save eflags + pushfd # push another copy + xor dword ptr [esp], 0x200000 # toggle ID bit on stack & pop it back into eflags + popfd + pushfd # store possibly modified eflags + pop eax # and pop back into eax + xor eax, [esp] # compare to original pushed eflags + popfd # restore original eflags + and eax, 0x200000 # eax = 1 if eflags ID bit was changable + jz oldcpu # return zero if CPUID is not available (wow!) + + push ebx # we must save ebx + mov eax, 1 # do cpuid (1) to get features into edx + cpuid + mov eax, edx # copy into eax for shift + mov cl, [esp+8] # get parameter and shift that bit index into LSB + sar eax, cl + and eax, 1 + pop ebx # restore ebx and return 0 or 1 + +oldcpu: ret # return value in eax + +#ifdef __ELF__ + .section .note.GNU-stack,"",@progbits +#endif + diff --git a/third_party/wavpack/src/unpack_x86.asm b/third_party/wavpack/src/unpack_x86.asm new file mode 100644 index 0000000..1d99155 --- /dev/null +++ b/third_party/wavpack/src/unpack_x86.asm @@ -0,0 +1,958 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; **** WAVPACK **** ;; +;; Hybrid Lossless Wavefile Compressor ;; +;; Copyright (c) 1998 - 2015 Conifer Software. ;; +;; All Rights Reserved. ;; +;; Distributed under the BSD Software License (see license.txt) ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + .686 + .mmx + .model flat +asmcode segment page 'CODE' + public _unpack_decorr_stereo_pass_cont_x86 + public _unpack_decorr_mono_pass_cont_x86 + public _unpack_cpu_has_feature_x86 + +; This is an assembly optimized version of the following WavPack function: +; +; void unpack_decorr_stereo_pass_cont (struct decorr_pass *dpp, +; int32_t *buffer, +; int32_t sample_count, +; int32_t long_math; +; +; It performs a single pass of stereo decorrelation on the provided buffer. +; Note that this version of the function requires that up to 8 previous +; stereo samples are visible and correct. In other words, it ignores the +; "samples_*" fields in the decorr_pass structure and gets the history data +; directly from the buffer. It does, however, return the appropriate history +; samples to the decorr_pass structure before returning. +; +; The "long_math" argument is used to specify that a 32-bit multiply is +; not enough for the "apply_weight" operation (although in this case it +; would only apply to the -1 and -2 terms because the MMX code does not have +; this limitation) but we ignore the parameter and use the overflow detection +; of the "imul" instruction to switch automatically to the "long_math" loop. +; +; This is written to work on an IA-32 processor and uses the MMX extensions +; to improve the performance by processing both stereo channels together. +; For terms -1 and -2 the MMX extensions are not usable, and so these are +; performed independently without them. +; +; arguments on entry: +; +; struct decorr_pass *dpp [ebp+8] +; int32_t *buffer [ebp+12] +; int32_t sample_count [ebp+16] +; int32_t long_math [ebp+20] +; +; registers after entry: +; +; rdi bptr +; rsi eptr +; +; on stack (used for terms -1 and -2 only): +; +; int32_t delta DWORD [esp] +; + +_unpack_decorr_stereo_pass_cont_x86: + push ebp + mov ebp, esp + push ebx + push esi + push edi + + mov edx, [ebp+8] ; copy delta from dpp to top of stack + mov eax, [edx+4] + push eax + + mov edi, [ebp+12] ; edi = buffer + mov eax, [ebp+16] ; get sample_count and divide by 8 + sal eax, 3 + jz done ; exit now if there's nothing to do + + add eax, edi ; else add to buffer point to make eptr + mov esi, eax + + mov eax, [ebp+8] ; get term from dpp and vector appropriately + mov eax, [eax] + cmp eax, 17 + je term_17_entry + cmp eax, 18 + je term_18_entry + cmp eax, -1 + je term_minus_1_entry + cmp eax, -2 + je term_minus_2_entry + cmp eax, -3 + je term_minus_3_entry + +; +; registers during default term processing loop: +; edi active buffer pointer +; esi end of buffer pointer +; +; MMX: +; mm0, mm1 scratch +; mm2 original sample values +; mm3 correlation samples +; mm4 zero (for pcmpeqd) +; mm5 weights +; mm6 delta +; mm7 512 (for rounding) +; + +default_term_entry: + imul ebx, eax, -8 ; set ebx to term * -8 for decorrelation index + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 ; mm7 = round (512) + mov edx, [ebp+8] ; edx = *dpp + mov eax, [edx+4] + movd mm6, eax + punpckldq mm6, mm6 ; mm6 = delta (0-7) + mov eax, 0FFFFh ; mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 ; mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [edx+8] ; mm5 = weight_AB masked to 16 bits + pxor mm4, mm4 ; mm4 = zero (for pcmpeqd) + jmp default_term_loop + + align 64 +default_term_loop: + movq mm3, [edi+ebx] ; mm3 = sam_AB + movq mm1, mm3 + movq mm0, mm3 + paddd mm1, mm1 + psrld mm0, 15 + psrlw mm1, 1 + pmaddwd mm0, mm5 + pmaddwd mm1, mm5 + movq mm2, [edi] ; mm2 = left_right + pslld mm0, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + paddd mm0, mm2 + paddd mm0, mm1 ; add shifted sums + movq [edi], mm0 ; store result + movq mm0, mm3 + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pcmpeqd mm2, mm4 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm4 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + cmp edi, esi ; compare bptr and eptr to see if we're done + jb default_term_loop + + pslld mm5, 16 ; sign-extend 16-bit weights back to dwords + psrad mm5, 16 + mov eax, [ebp+8] ; point to dpp + movq [eax+8], mm5 ; put weight_AB back + emms + mov edx, [ebp+8] ; access dpp with edx + mov ecx, [edx] ; ecx = dpp->term + +default_store_samples: + dec ecx + sub edi, 8 ; back up one full sample + mov eax, [edi+4] + mov [edx+ecx*4+48], eax ; store samples_B [ecx] + mov eax, [edi] + mov [edx+ecx*4+16], eax ; store samples_A [ecx] + test ecx, ecx + jnz default_store_samples + + jmp done + +; +; registers during processing loop for terms 17 & 18: +; edi active buffer pointer +; esi end of buffer pointer +; +; MMX: +; mm0, mm1 scratch +; mm2 original sample values +; mm3 calculated correlation samples +; mm4 last calculated values (so we don't need to reload) +; mm5 weights +; mm6 delta +; mm7 512 (for rounding) +; + +term_17_entry: + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 ; mm7 = round (512) + mov edx, [ebp+8] ; point to dpp & get delta + mov eax, [edx+4] + movd mm6, eax + punpckldq mm6, mm6 ; mm6 = delta (0-7) + mov eax, 0FFFFh ; mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 ; mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [edx+8] ; mm5 = weight_AB masked to 16 bits + movq mm4, [edi-8] ; preload previous calculated values + jmp term_17_loop + + align 64 +term_17_loop: + paddd mm4, mm4 + psubd mm4, [edi-16] ; mm3 = sam_AB + movq mm3, mm4 + movq mm1, mm3 + paddd mm1, mm1 + psrld mm4, 15 + psrlw mm1, 1 + pmaddwd mm4, mm5 + pmaddwd mm1, mm5 + movq mm2, [edi] ; mm2 = left_right + pslld mm4, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + paddd mm4, mm2 + paddd mm4, mm1 ; add shifted sums + movq mm0, mm3 + movq [edi], mm4 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + cmp edi, esi ; compare bptr and eptr to see if we're done + jb term_17_loop + + pslld mm5, 16 ; sign-extend 16-bit weights back to dwords + psrad mm5, 16 + mov eax, [ebp+8] ; point to dpp + movq [eax+8], mm5 ; put weight_AB back + emms + jmp term_1718_exit + +term_18_entry: + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 ; mm7 = round (512) + mov edx, [ebp+8] ; point to dpp & get delta + mov eax, [edx+4] + movd mm6, eax + punpckldq mm6, mm6 ; mm6 = delta (0-7) + mov eax, 0FFFFh ; mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 ; mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [edx+8] ; mm5 = weight_AB masked to 16 bits + movq mm4, [edi-8] ; preload previous calculated value + jmp term_18_loop + + align 64 +term_18_loop: + movq mm3, mm4 + psubd mm3, [edi-16] + psrad mm3, 1 + paddd mm3, mm4 ; mm3 = sam_AB + movq mm1, mm3 + movq mm4, mm3 + paddd mm1, mm1 + psrld mm4, 15 + psrlw mm1, 1 + pmaddwd mm4, mm5 + pmaddwd mm1, mm5 + movq mm2, [edi] ; mm2 = left_right + pslld mm4, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + paddd mm4, mm2 + paddd mm4, mm1 ; add shifted sums + movq mm0, mm3 + movq [edi], mm4 ; store result + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pxor mm5, mm0 + paddw mm5, mm2 ; and add to weight_AB + pxor mm5, mm0 + cmp edi, esi ; compare bptr and eptr to see if we're done + jb term_18_loop + + pslld mm5, 16 ; sign-extend 16-bit weights back to dwords + psrad mm5, 16 + mov eax, [ebp+8] ; point to dpp + movq [eax+8], mm5 ; put weight_AB back + emms + +term_1718_exit: + mov edx, [edi-4] ; dpp->samples_B [0] = bptr [-1]; + mov eax, [ebp+8] + mov [eax+48], edx + mov edx, [edi-8] ; dpp->samples_A [0] = bptr [-2]; + mov [eax+16], edx + mov edx, [edi-12] ; dpp->samples_B [1] = bptr [-3]; + mov [eax+52], edx + mov edx, [edi-16] ; dpp->samples_A [1] = bptr [-4]; + mov [eax+20], edx + jmp done + +; +; registers in term -1 & -2 loops: +; +; eax,ebx,edx scratch +; ecx weight_A +; ebp weight_B +; edi bptr +; esi eptr +; + +term_minus_1_entry: + cld ; we use stosd here... + mov eax, [ebp+8] ; point to dpp + mov ecx, [eax+8] ; ecx = weight_A and ebp = weight_B + mov ebp, [eax+12] + mov eax, [edi-4] + jmp term_minus_1_loop + + align 64 +term_minus_1_loop: + mov ebx, eax + imul eax, ecx + mov edx, [edi] + jo OV11 + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + je L182 + test edx, edx + je L182 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, [esp] + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L183 + mov ecx, edx +L183: xor ecx, ebx +L182: mov ebx, eax + imul eax, ebp + mov edx, [edi] + jo OV12 + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + je L189 + test edx, edx + je L189 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, [esp] + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L188 + mov ebp, edx +L188: xor ebp, ebx +L189: cmp edi, esi ; compare bptr and eptr to see if we're done + jb term_minus_1_loop + jmp term_minus_1_done + +OV11: mov eax, ebx ; restore previous sample into eax + jmp long_term_minus_1_loop + +OV12: mov eax, ebx ; restore previous sample into eax + jmp L282 + + align 64 +long_term_minus_1_loop: + mov ebx, eax + imul ecx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [edi] + add eax, edx + stosd + test ebx, ebx + je L282 + test edx, edx + je L282 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, [esp] + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L283 + mov ecx, edx +L283: xor ecx, ebx +L282: mov ebx, eax + imul ebp + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [edi] + add eax, edx + stosd + test ebx, ebx + je L289 + test edx, edx + je L289 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, [esp] + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L288 + mov ebp, edx +L288: xor ebp, ebx +L289: cmp edi, esi ; compare bptr and eptr to see if we're done + jb long_term_minus_1_loop + +term_minus_1_done: + mov edx, ebp + mov ebp, esp ; restore ebp (we've pushed 4 DWORDS) + add ebp, 16 + mov eax, [ebp+8] ; point to dpp + mov [eax+8], ecx + mov [eax+12], edx + mov edx, [edi-4] ; dpp->samples_A [0] = bptr [-1] + mov [eax+16], edx + jmp done + + +term_minus_2_entry: + mov eax, [ebp+8] ; point to dpp + mov ecx, [eax+8] ; ecx = weight_A and ebp = weight_B + mov ebp, [eax+12] + mov eax, [edi-8] + jmp term_minus_2_loop + + align 64 +term_minus_2_loop: + mov ebx, eax + imul eax, ebp + mov edx, [edi+4] + jo OV21 + sar eax, 10 + adc eax, edx + mov [edi+4], eax + test ebx, ebx + je L194 + test edx, edx + je L194 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, [esp] + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L195 + mov ebp, edx +L195: xor ebp, ebx +L194: mov ebx, eax + imul eax, ecx + mov edx, [edi] + jo OV22 + sar eax, 10 + adc eax, edx + mov [edi], eax + add edi, 8 + test ebx, ebx + je L201 + test edx, edx + je L201 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, [esp] + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L200 + mov ecx, edx +L200: xor ecx, ebx +L201: cmp edi, esi ; compare bptr and eptr to see if we're done + jb term_minus_2_loop + jmp term_minus_2_done + +OV21: mov eax, ebx ; restore previous sample into eax + jmp long_term_minus_2_loop + +OV22: mov eax, ebx ; restore previous sample into eax + jmp L294 + + align 64 +long_term_minus_2_loop: + mov ebx, eax + imul ebp + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [edi+4] + add eax, edx + mov [edi+4], eax + test ebx, ebx + je L294 + test edx, edx + je L294 + xor ebx, edx + sar ebx, 31 + xor ebp, ebx + add ebp, [esp] + mov edx, 1024 + add edx, ebx + cmp ebp, edx + jle L295 + mov ebp, edx +L295: xor ebp, ebx +L294: mov ebx, eax + imul ecx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [edi] + add eax, edx + mov [edi], eax + add edi, 8 + test ebx, ebx + je L301 + test edx, edx + je L301 + xor ebx, edx + sar ebx, 31 + xor ecx, ebx + add ecx, [esp] + mov edx, 1024 + add edx, ebx + cmp ecx, edx + jle L300 + mov ecx, edx +L300: xor ecx, ebx +L301: cmp edi, esi ; compare bptr and eptr to see if we're done + jb long_term_minus_2_loop + +term_minus_2_done: + mov edx, ebp + lea ebp, [esp+16] ; restore ebp (we've pushed 4 DWORDS) + mov eax, [ebp+8] ; point to dpp + mov [eax+8], ecx + mov [eax+12], edx + mov edx, [edi-8] ; dpp->samples_B [0] = bptr [-2]; + mov [eax+48], edx + jmp done + +; +; registers during processing loop for term -3: +; edi active buffer pointer +; esi end of buffer pointer +; +; MMX: +; mm0, mm1 scratch +; mm2 original sample values +; mm3 calculated correlation samples +; mm4 last calculated values (so we don't need to reload) +; mm5 weights +; mm6 delta +; mm7 512 (for rounding) +; + +term_minus_3_entry: + mov eax, 512 + movd mm7, eax + punpckldq mm7, mm7 ; mm7 = round (512) + mov edx, [ebp+8] ; point to dpp & get delta + mov eax, [edx+4] + movd mm6, eax + punpckldq mm6, mm6 ; mm6 = delta (0-7) + mov eax, 0FFFFh ; mask high weights to zero for PMADDWD + movd mm5, eax + punpckldq mm5, mm5 ; mm5 = weight mask 0x0000FFFF0000FFFF + pand mm5, [edx+8] ; mm5 = weight_AB masked to 16 bits + movq mm4, [edi-8] ; preload previous calculated values + jmp term_minus_3_loop + + align 64 +term_minus_3_loop: + movq mm3, mm4 ; mm3 = swap dwords (mm4) + psrlq mm3, 32 + punpckldq mm3, mm4 ; mm3 = sam_AB + movq mm1, mm3 + movq mm4, mm3 + pslld mm1, 1 + psrld mm4, 15 + psrlw mm1, 1 + pmaddwd mm4, mm5 + pmaddwd mm1, mm5 + movq mm2, [edi] ; mm2 = left_right + pslld mm4, 5 + paddd mm1, mm7 ; add 512 for rounding + psrad mm1, 10 + paddd mm4, mm2 + paddd mm4, mm1 ; add shifted sums + movq [edi], mm4 ; store result + movq mm0, mm3 + pxor mm0, mm2 + psrad mm0, 31 ; mm0 = sign (sam_AB ^ left_right) + add edi, 8 + pxor mm1, mm1 ; mm1 = zero + pcmpeqd mm2, mm1 ; mm2 = 1s if left_right was zero + pcmpeqd mm3, mm1 ; mm3 = 1s if sam_AB was zero + por mm2, mm3 ; mm2 = 1s if either was zero + pandn mm2, mm6 ; mask delta with zeros check + pcmpeqd mm1, mm1 + psubd mm1, mm7 + psubd mm1, mm7 + psubd mm1, mm0 + pxor mm5, mm0 + paddw mm5, mm1 + paddusw mm5, mm2 ; and add to weight_AB + psubw mm5, mm1 + pxor mm5, mm0 + cmp edi, esi ; compare bptr and eptr to see if we're done + jb term_minus_3_loop + + pslld mm5, 16 ; sign-extend 16-bit weights back to dwords + psrad mm5, 16 + mov eax, [ebp+8] ; point to dpp + movq [eax+8], mm5 ; put weight_AB back + emms + mov edx, [edi-4] ; dpp->samples_A [0] = bptr [-1]; + mov eax, [ebp+8] + mov [eax+16], edx + mov edx, [edi-8] ; dpp->samples_B [0] = bptr [-2]; + mov [eax+48], edx + +done: pop eax ; pop delta & saved regs + pop edi + pop esi + pop ebx + pop ebp + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; This is the mono version of the above function. It does not use MMX and does not handle negative terms. +; +; void unpack_decorr_mono_pass_cont (struct decorr_pass *dpp, +; int32_t *buffer, +; int32_t sample_count, +; int32_t long_math; +; arguments on entry: +; +; struct decorr_pass *dpp [ebp+8] +; int32_t *buffer [ebp+12] +; int32_t sample_count [ebp+16] +; int32_t long_math [ebp+20] +; +; registers after entry: +; +; rdi bptr +; rsi eptr +; +; on stack: +; +; int16_t delta DWORD [esp] +; + +_unpack_decorr_mono_pass_cont_x86: + push ebp + mov ebp, esp + push ebx + push esi + push edi + cld + + mov edx, [ebp+8] ; copy delta from dpp to local stack + mov eax, [edx+4] + push eax + + mov edi, [ebp+12] ; edi = buffer + mov eax, [ebp+16] ; get sample_count and multiply by 4 + sal eax, 2 + jz mono_done ; exit now if there's nothing to do + lea esi, [edi+eax] ; else add to buffer point to make eptr + + mov eax, [ebp+8] ; get term from dpp and vector appropriately + mov eax, [eax] + cmp eax, 17 + je mono_17_entry + cmp eax, 18 + je mono_18_entry + +; +; registers during default term processing loop: +; edi active buffer pointer +; esi end of buffer pointer +; ecx weight_A +; ebp free +; ebx term * -4 +; eax,edx scratch +; + +default_mono_entry: + imul ebx, eax, -4 ; set ebx to term * -4 for decorrelation index + mov edx, [ebp+8] ; edx = dpp* + mov ecx, [edx+8] ; ecx = weight + jmp default_mono_loop + +; +; registers during processing loop for terms 17 & 18: +; edi active buffer pointer +; esi end of buffer pointer +; ecx weight_A +; ebp previously calculated value +; ebx calculated correlation sample +; eax,edx scratch +; + +mono_17_entry: + mov edx, [ebp+8] ; edx = dpp* + mov ecx, [edx+8] ; ecx = weight_A + mov ebp, [edi-4] + jmp mono_17_loop + +mono_18_entry: + mov edx, [ebp+8] ; edx = dpp* + mov ecx, [edx+8] ; ecx = weight_A + mov ebp, [edi-4] + jmp mono_18_loop + + align 64 +default_mono_loop: + mov eax, [edi+ebx] + imul eax, ecx + mov edx, [edi] + jo long_default_mono_loop + sar eax, 10 + adc eax, edx + mov [edi], eax + mov eax, [edi+ebx] + add edi, 4 + test edx, edx + je L100 + test eax, eax + je L100 + xor eax, edx + cdq + xor ecx, edx + add ecx, [esp] + xor ecx, edx +L100: cmp edi, esi ; compare bptr and eptr to see if we're done + jb default_mono_loop + jmp default_mono_done + + align 64 +long_default_mono_loop: + mov eax, [edi+ebx] + imul ecx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [edi] + add eax, edx + mov [edi], eax + mov eax, [edi+ebx] + add edi, 4 + test edx, edx + je L101 + test eax, eax + je L101 + xor eax, edx + cdq + xor ecx, edx + add ecx, [esp] + xor ecx, edx +L101: cmp edi, esi ; compare bptr and eptr to see if we're done + jb long_default_mono_loop + +default_mono_done: + mov edx, [ebp+8] ; edx = dpp* + mov [edx+8], ecx ; store weight_A back + mov ecx, [edx] ; ecx = dpp->term + +default_mono_store_samples: + dec ecx + sub edi, 4 ; back up one full sample + mov eax, [edi] + mov [edx+ecx*4+16], eax ; store samples_A [ecx] + test ecx, ecx + jnz default_mono_store_samples + jmp mono_done + + align 64 +mono_17_loop: + lea ebx, [ebp+ebp] + sub ebx, [edi-8] + mov eax, ecx + imul eax, ebx + mov edx, [edi] + jo long_mono_17_loop + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L117 + test edx, edx + je L117 + mov eax, [esp] + xor ebx, edx + sar ebx, 31 + xor eax, ebx + sub eax, ebx + add ecx, eax +L117: cmp edi, esi ; compare bptr and eptr to see if we're done + jb mono_17_loop + jmp mono_1718_exit + + align 64 +long_mono_17_loop: + lea ebx, [ebp+ebp] + sub ebx, [edi-8] + mov eax, ecx + imul ebx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [edi] + add eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L217 + test edx, edx + je L217 + mov eax, [esp] + xor ebx, edx + sar ebx, 31 + xor eax, ebx + sub eax, ebx + add ecx, eax +L217: cmp edi, esi ; compare bptr and eptr to see if we're done + jb long_mono_17_loop + jmp mono_1718_exit + + align 64 +mono_18_loop: + lea ebx, [ebp+ebp*2] + sub ebx, [edi-8] + sar ebx, 1 + mov eax, ecx + imul eax, ebx + mov edx, [edi] + jo long_mono_18_loop + sar eax, 10 + adc eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L118 + test edx, edx + je L118 + mov eax, [esp] + xor ebx, edx + sar ebx, 31 + xor eax, ebx + sub eax, ebx + add ecx, eax +L118: cmp edi, esi ; compare bptr and eptr to see if we're done + jb mono_18_loop + jmp mono_1718_exit + + align 64 +long_mono_18_loop: + lea ebx, [ebp+ebp*2] + sub ebx, [edi-8] + sar ebx, 1 + mov eax, ecx + imul ebx + shl edx, 22 + shr eax, 10 + adc eax, edx + mov edx, [edi] + add eax, edx + stosd + test ebx, ebx + mov ebp, eax + je L218 + test edx, edx + je L218 + mov eax, [esp] + xor ebx, edx + sar ebx, 31 + xor eax, ebx + sub eax, ebx + add ecx, eax +L218: cmp edi, esi ; compare bptr and eptr to see if we're done + jb long_mono_18_loop + +mono_1718_exit: + lea ebp, [esp+16] ; restore ebp (we've pushed 4 DWORDS) + mov edx, [ebp+8] ; edx = dpp* + mov [edx+8], ecx ; store weight_A back + mov eax, [edi-4] ; dpp->samples_A [0] = bptr [-1]; + mov [edx+16], eax + mov eax, [edi-8] ; dpp->samples_A [1] = bptr [-2]; + mov [edx+20], eax + +mono_done: + pop eax ; pop delta & saved regs + pop edi + pop esi + pop ebx + pop ebp + ret + +; Helper function to determine if specified CPU feature is available (used here for MMX). +; Input parameter is index of feature to be checked (EDX from CPUID(1) only, MMX = 23). +; Return value is the specified bit (0 or 1) or 0 if CPUID is not supported. + +_unpack_cpu_has_feature_x86: + pushfd ; save eflags + pushfd ; push another copy + xor dword ptr [esp], 200000h ; toggle ID bit on stack & pop it back into eflags + popfd + pushfd ; store possibly modified eflags + pop eax ; and pop back into eax + xor eax, [esp] ; compare to original pushed eflags + popfd ; restore original eflags + and eax, 200000h ; eax = 1 if eflags ID bit was changable + jz oldcpu ; return zero if CPUID is not available (wow!) + + push ebx ; we must save ebx + mov eax, 1 ; do cpuid (1) to get features into edx + cpuid + mov eax, edx ; copy into eax for shift + mov cl, [esp+8] ; get parameter and shift that bit index into LSB + sar eax, cl + and eax, 1 + pop ebx ; restore ebx and return 0 or 1 + +oldcpu: ret ; return value in eax + +asmcode ends + + end + diff --git a/third_party/wavpack/src/wavpack_local.h b/third_party/wavpack/src/wavpack_local.h index 5c69108..fc75628 100644 --- a/third_party/wavpack/src/wavpack_local.h +++ b/third_party/wavpack/src/wavpack_local.h @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////////////////////// // **** WAVPACK **** // // Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // +// Copyright (c) 1998 - 2013 Conifer Software. // // All Rights Reserved. // // Distributed under the BSD Software License (see license.txt) // //////////////////////////////////////////////////////////////////////////// @@ -11,19 +11,17 @@ #ifndef WAVPACK_LOCAL_H #define WAVPACK_LOCAL_H -#ifndef __has_builtin -#define __has_builtin(x) 0 -#endif - -#if defined(WIN32) +#if defined(_WIN32) +#define strdup(x) _strdup(x) #define FASTCALL __fastcall #else #define FASTCALL #endif -#if defined(WIN32) || \ - (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) -#define BITSTREAM_SHORTS // use "shorts" for reading/writing bitstreams +#if defined(_WIN32) || \ + (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || \ + (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) +#define BITSTREAM_SHORTS // use 16-bit "shorts" for reading/writing bitstreams (instead of chars) // (only works on little-endian machines) #endif @@ -31,7 +29,7 @@ // This header file contains all the definitions required by WavPack. -#if defined(_WIN32) && !defined(__MINGW32__) +#if defined(_MSC_VER) && _MSC_VER < 1600 #include typedef unsigned __int64 uint64_t; typedef unsigned __int32 uint32_t; @@ -41,14 +39,13 @@ typedef __int64 int64_t; typedef __int32 int32_t; typedef __int16 int16_t; typedef __int8 int8_t; -typedef float float32_t; #else -#include +#include #endif // Because the C99 specification states that "The order of allocation of -// bit-fields within a unit (high-order to low-order or low-order to -// high-order) is implementation-defined" (6.7.2.1), I decided to change +// bit-fields within a unit (high-order to low-order or low-order to +// high-order) is implementation-defined" (6.7.2.1), I decided to change // the representation of floating-point values from a structure of // bit-fields to a 32-bit integer with access macros. Note that the WavPack // library doesn't use any floating-point math to implement compression of @@ -58,6 +55,7 @@ typedef float float32_t; typedef int32_t f32; #define get_mantissa(f) ((f) & 0x7fffff) +#define get_magnitude(f) ((f) & 0x7fffffff) #define get_exponent(f) (((f) >> 23) & 0xff) #define get_sign(f) (((f) >> 31) & 0x1) @@ -92,7 +90,8 @@ typedef struct { #define APE_TAG_MAX_LENGTH (1024 * 1024 * 16) typedef struct { - int32_t tag_file_pos, tag_begins_file; + int64_t tag_file_pos; + int tag_begins_file; ID3_Tag id3_tag; APE_Tag_Hdr ape_tag_hdr; unsigned char *ape_tag_data; @@ -115,12 +114,12 @@ typedef struct { #define ChunkHeaderFormat "4L" typedef struct { - unsigned short FormatTag, NumChannels; + uint16_t FormatTag, NumChannels; uint32_t SampleRate, BytesPerSecond; - unsigned short BlockAlign, BitsPerSample; - unsigned short cbSize, ValidBitsPerSample; + uint16_t BlockAlign, BitsPerSample; + uint16_t cbSize, ValidBitsPerSample; int32_t ChannelMask; - unsigned short SubFormat; + uint16_t SubFormat; char GUID [14]; } WaveHeader; @@ -135,13 +134,43 @@ typedef struct { typedef struct { char ckID [4]; uint32_t ckSize; - short version; - unsigned char track_no, index_no; + int16_t version; + unsigned char block_index_u8; + unsigned char total_samples_u8; uint32_t total_samples, block_index, block_samples, flags, crc; } WavpackHeader; #define WavpackHeaderFormat "4LS2LLLLL" +// Macros to access the 40-bit block_index field + +#define GET_BLOCK_INDEX(hdr) ( (int64_t) (hdr).block_index + ((int64_t) (hdr).block_index_u8 << 32) ) + +#define SET_BLOCK_INDEX(hdr,value) do { \ + int64_t tmp = (value); \ + (hdr).block_index = (uint32_t) tmp; \ + (hdr).block_index_u8 = \ + (unsigned char) (tmp >> 32); \ +} while (0) + +// Macros to access the 40-bit total_samples field, which is complicated by the fact that +// all 1's in the lower 32 bits indicates "unknown" (regardless of upper 8 bits) + +#define GET_TOTAL_SAMPLES(hdr) ( ((hdr).total_samples == (uint32_t) -1) ? -1 : \ + (int64_t) (hdr).total_samples + ((int64_t) (hdr).total_samples_u8 << 32) - (hdr).total_samples_u8 ) + +#define SET_TOTAL_SAMPLES(hdr,value) do { \ + int64_t tmp = (value); \ + if (tmp < 0) \ + (hdr).total_samples = (uint32_t) -1; \ + else { \ + tmp += (tmp / (int64_t) 0xffffffff); \ + (hdr).total_samples = (uint32_t) tmp; \ + (hdr).total_samples_u8 = \ + (unsigned char) (tmp >> 32); \ + } \ +} while (0) + // or-values for "flags" #define BYTES_STORED 3 // 1-4 bytes/sample @@ -169,17 +198,21 @@ typedef struct { #define SRATE_MASK (0xfL << SRATE_LSB) #define FALSE_STEREO 0x40000000 // block is stereo, but data is mono - -#define IGNORED_FLAGS 0x18000000 // reserved, but ignore if encountered #define NEW_SHAPING 0x20000000 // use IIR filter for negative shaping -#define UNKNOWN_FLAGS 0x80000000 // also reserved, but refuse decode if - // encountered #define MONO_DATA (MONO_FLAG | FALSE_STEREO) +// Introduced in WavPack 5.0: +#define HAS_CHECKSUM 0x10000000 // block contains a trailing checksum +#define DSD_FLAG 0x80000000 // block is encoded DSD (1-bit PCM) + +#define IGNORED_FLAGS 0x08000000 // reserved, but ignore if encountered +#define UNKNOWN_FLAGS 0x00000000 // we no longer have any of these spares + #define MIN_STREAM_VERS 0x402 // lowest stream version we'll decode #define MAX_STREAM_VERS 0x410 // highest stream version we'll decode or encode -#define CUR_STREAM_VERS 0x407 // stream version we are [normally] writing now + // (only stream version to support mono optimization) +#define CUR_STREAM_VERS 0x407 // universally compatible stream version //////////////////////////// WavPack Metadata ///////////////////////////////// @@ -211,14 +244,20 @@ typedef struct { #define ID_WVC_BITSTREAM 0xb #define ID_WVX_BITSTREAM 0xc #define ID_CHANNEL_INFO 0xd +#define ID_DSD_BLOCK 0xe #define ID_RIFF_HEADER (ID_OPTIONAL_DATA | 0x1) #define ID_RIFF_TRAILER (ID_OPTIONAL_DATA | 0x2) -#define ID_REPLAY_GAIN (ID_OPTIONAL_DATA | 0x3) -#define ID_CUESHEET (ID_OPTIONAL_DATA | 0x4) +#define ID_ALT_HEADER (ID_OPTIONAL_DATA | 0x3) +#define ID_ALT_TRAILER (ID_OPTIONAL_DATA | 0x4) #define ID_CONFIG_BLOCK (ID_OPTIONAL_DATA | 0x5) #define ID_MD5_CHECKSUM (ID_OPTIONAL_DATA | 0x6) #define ID_SAMPLE_RATE (ID_OPTIONAL_DATA | 0x7) +#define ID_ALT_EXTENSION (ID_OPTIONAL_DATA | 0x8) +#define ID_ALT_MD5_CHECKSUM (ID_OPTIONAL_DATA | 0x9) +#define ID_NEW_CONFIG_BLOCK (ID_OPTIONAL_DATA | 0xa) +#define ID_CHANNEL_IDENTITIES (ID_OPTIONAL_DATA | 0xb) +#define ID_BLOCK_CHECKSUM (ID_OPTIONAL_DATA | 0xf) ///////////////////////// WavPack Configuration /////////////////////////////// @@ -255,6 +294,7 @@ typedef struct { #define CONFIG_CREATE_EXE 0x40000 // create executable #define CONFIG_CREATE_WVC 0x80000 // create correction file #define CONFIG_OPTIMIZE_WVC 0x100000 // maximize bybrid compression +#define CONFIG_COMPATIBLE_WRITE 0x400000 // write files for decoders < 4.3 #define CONFIG_CALC_NOISE 0x800000 // calc noise in hybrid mode #define CONFIG_LOSSY_MODE 0x1000000 // obsolete (for information) #define CONFIG_EXTRA_MODE 0x2000000 // extra processing mode @@ -264,6 +304,8 @@ typedef struct { #define CONFIG_PAIR_UNDEF_CHANS 0x20000000 // encode undefined channels in stereo pairs #define CONFIG_OPTIMIZE_MONO 0x80000000 // optimize for mono streams posing as stereo +#define QMODE_DSD_AUDIO 0x30 // if either of these is set in qmode (version 5.0) + /* * These config flags were never actually used, or are no longer used, or are * used for something else now. They may be used in the future for what they @@ -305,7 +347,7 @@ typedef struct { typedef struct bs { #ifdef BITSTREAM_SHORTS - unsigned short *buf, *end, *ptr; + uint16_t *buf, *end, *ptr; #else unsigned char *buf, *end, *ptr; #endif @@ -320,8 +362,10 @@ typedef struct bs { #define MAX_NTERMS 16 #define MAX_TERM 8 +// Note that this structure is directly accessed in assembly files, so modify with care + struct decorr_pass { - int term, delta, weight_A, weight_B; + int32_t term, delta, weight_A, weight_B; int32_t samples_A [MAX_TERM], samples_B [MAX_TERM]; int32_t aweight_A, aweight_B; int32_t sum_A, sum_B; @@ -342,6 +386,10 @@ struct words_data { struct entropy_data c [2]; }; +typedef struct { + int32_t value, filter0, filter1, filter2, filter3, filter4, filter5, filter6, factor, byte; +} DSDfilters; + typedef struct { WavpackHeader wphdr; struct words_data w; @@ -350,9 +398,10 @@ typedef struct { unsigned char *block2buff, *block2end; int32_t *sample_buffer; + int64_t sample_index; int bits, num_terms, mute_error, joint_stereo, false_stereo, shift; int num_decorrs, num_passes, best_decorr, mask_decorr; - uint32_t sample_index, crc, crc_x, crc_wvx; + uint32_t crc, crc_x, crc_wvx; Bitstream wvbits, wvcbits, wvxbits; int init_done, wvc_skip; float delta_decay; @@ -363,12 +412,22 @@ typedef struct { struct { int32_t shaping_acc [2], shaping_delta [2], error [2]; double noise_sum, noise_ave, noise_max; - short *shaping_data, *shaping_array; + int16_t *shaping_data, *shaping_array; int32_t shaping_samples; } dc; struct decorr_pass decorr_passes [MAX_NTERMS], analysis_pass; const WavpackDecorrSpec *decorr_specs; + + struct { + unsigned char *byteptr, *endptr, (*probabilities) [256], **value_lookup, mode, ready; + int history_bins, p0, p1; + int16_t (*summed_probabilities) [256]; + uint32_t low, high, value; + DSDfilters filters [2]; + int32_t *ptable; + } dsd; + } WavpackStream; // flags for float_flags: @@ -399,6 +458,22 @@ typedef struct { int32_t (*write_bytes)(void *id, void *data, int32_t bcount); } WavpackStreamReader; +// Extended version of structure for handling large files and added +// functionality for truncating and closing files + +typedef struct { + int32_t (*read_bytes)(void *id, void *data, int32_t bcount); + int32_t (*write_bytes)(void *id, void *data, int32_t bcount); + int64_t (*get_pos)(void *id); // new signature for large files + int (*set_pos_abs)(void *id, int64_t pos); // new signature for large files + int (*set_pos_rel)(void *id, int64_t delta, int mode); // new signature for large files + int (*push_back_byte)(void *id, int c); + int64_t (*get_length)(void *id); // new signature for large files + int (*can_seek)(void *id); + int (*truncate_here)(void *id); // new function to truncate file at current position + int (*close)(void *id); // new function to close file +} WavpackStreamReader64; + typedef int (*WavpackBlockOutput)(void *id, void *data, int32_t bcount); typedef struct { @@ -414,12 +489,13 @@ typedef struct { WavpackBlockOutput blockout; void *wv_out, *wvc_out; - WavpackStreamReader *reader; + WavpackStreamReader64 *reader; void *wv_in, *wvc_in; - uint32_t filelen, file2len, filepos, file2pos, total_samples, crc_errors, first_flags; - int wvc_flag, open_flags, norm_offset, reduced_channels, lossy_blocks, close_files; - uint32_t block_samples, ave_block_samples, block_boundary, max_samples, acc_samples, initial_index, riff_trailer_bytes; + int64_t filelen, file2len, filepos, file2pos, total_samples, initial_index; + uint32_t crc_errors, first_flags; + int wvc_flag, open_flags, norm_offset, reduced_channels, lossy_blocks, version_five; + uint32_t block_samples, ave_block_samples, block_boundary, max_samples, acc_samples, riff_trailer_bytes; int riff_header_added, riff_header_created; M_Tag m_tag; @@ -427,6 +503,13 @@ typedef struct { WavpackStream **streams; void *stream3; + // these items were added in 5.0 to support alternate file types (especially CAF & DSD) + unsigned char file_format, *channel_reordering, *channel_identities; + uint32_t channel_layout, dsd_multiplier; + void *decimation_context; + char file_extension [8]; + + void (*close_callback)(void *wpc); char error_message [80]; } WavpackContext; @@ -434,6 +517,11 @@ typedef struct { #define CLEAR(destin) memset (&destin, 0, sizeof (destin)); +//////////////////////////////// decorrelation ////////////////////////////// +// modules: pack.c, unpack.c, unpack_floats.c, extra1.c, extra2.c + +// #define SKIP_DECORRELATION // experimental switch to disable all decorrelation on encode + // These macros implement the weight application and update operations // that are at the heart of the decorrelation loops. Note that there are // sometimes two and even three versions of each macro. Theses should be @@ -449,15 +537,17 @@ typedef struct { #if 1 // PERFCOND - apply decorrelation weight when 32-bit overflow is possible #define apply_weight_f(weight, sample) (((((sample & 0xffff) * weight) >> 9) + \ (((sample & ~0xffff) >> 9) * weight) + 1) >> 1) +#elif 1 +#define apply_weight_f(weight, sample) ((int32_t)((weight * (int64_t) sample + 512) >> 10)) #else #define apply_weight_f(weight, sample) ((int32_t)floor(((double) weight * sample + 512.0) / 1024.0)) #endif -#if 1 // PERFCOND - universal version that checks input magnitude (or simply uses 64-bit ints) -#define apply_weight(weight, sample) (sample != (short) sample ? \ +#if 1 // PERFCOND - universal version that checks input magnitude or always uses long version +#define apply_weight(weight, sample) (sample != (int16_t) sample ? \ apply_weight_f (weight, sample) : apply_weight_i (weight, sample)) #else -#define apply_weight(weight, sample) ((int32_t)((weight * (int64_t) sample + 512) >> 10)) +#define apply_weight(weight, sample) (apply_weight_f (weight, sample)) #endif #if 1 // PERFCOND @@ -471,9 +561,6 @@ typedef struct { if (source && result) (source ^ result) < 0 ? (weight -= delta) : (weight += delta); #endif -#define update_weight_d2(weight, delta, source, result) \ - if (source && result) weight -= (((source ^ result) >> 29) & 4) - 2; - #define update_weight_clip(weight, delta, source, result) \ if (source && result) { \ const int32_t s = (source ^ result) >> 31; \ @@ -481,29 +568,59 @@ typedef struct { weight = (weight ^ s) - s; \ } -#define update_weight_clip_d2(weight, delta, source, result) \ - if (source && result) { \ - const int32_t s = (source ^ result) >> 31; \ - if ((weight = (weight ^ s) + (2 - s)) > 1024) weight = 1024; \ - weight = (weight ^ s) - s; \ - } +void pack_init (WavpackContext *wpc); +int pack_block (WavpackContext *wpc, int32_t *buffer); +void send_general_metadata (WavpackContext *wpc); +void free_metadata (WavpackMetadata *wpmd); +int copy_metadata (WavpackMetadata *wpmd, unsigned char *buffer_start, unsigned char *buffer_end); +double WavpackGetEncodedNoise (WavpackContext *wpc, double *peak); +int unpack_init (WavpackContext *wpc); +int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd); +int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd); +int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd); +int read_shaping_info (WavpackStream *wps, WavpackMetadata *wpmd); +int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count); +int check_crc_error (WavpackContext *wpc); +int scan_float_data (WavpackStream *wps, f32 *values, int32_t num_values); +void send_float_data (WavpackStream *wps, f32 *values, int32_t num_values); +void float_values (WavpackStream *wps, int32_t *values, int32_t num_values); +void dynamic_noise_shaping (WavpackContext *wpc, int32_t *buffer, int shortening_allowed); +void execute_stereo (WavpackContext *wpc, int32_t *samples, int no_history, int do_samples); +void execute_mono (WavpackContext *wpc, int32_t *samples, int no_history, int do_samples); -// bits.c +////////////////////////// DSD related (including decimation) ////////////////////////// +// modules: pack_dsd.c unpack_dsd.c -void bs_open_read (Bitstream *bs, void *buffer_start, void *buffer_end); -void bs_open_write (Bitstream *bs, void *buffer_start, void *buffer_end); -uint32_t bs_close_read (Bitstream *bs); -uint32_t bs_close_write (Bitstream *bs); +void pack_dsd_init (WavpackContext *wpc); +int pack_dsd_block (WavpackContext *wpc, int32_t *buffer); +int init_dsd_block (WavpackContext *wpc, WavpackMetadata *wpmd); +int32_t unpack_dsd_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count); -int DoReadFile (FILE *hFile, void *lpBuffer, uint32_t nNumberOfBytesToRead, uint32_t *lpNumberOfBytesRead); -int DoWriteFile (FILE *hFile, void *lpBuffer, uint32_t nNumberOfBytesToWrite, uint32_t *lpNumberOfBytesWritten); -uint32_t DoGetFileSize (FILE *hFile), DoGetFilePosition (FILE *hFile); -int DoSetFilePositionRelative (FILE *hFile, int32_t pos, int mode); -int DoSetFilePositionAbsolute (FILE *hFile, uint32_t pos); -int DoUngetc (int c, FILE *hFile), DoDeleteFile (char *filename); -int DoCloseHandle (FILE *hFile), DoTruncateFile (FILE *hFile); +void *decimate_dsd_init (int num_channels); +void decimate_dsd_reset (void *decimate_context); +void decimate_dsd_run (void *decimate_context, int32_t *samples, int num_samples); +void decimate_dsd_destroy (void *decimate_context); + +///////////////////////////////// CPU feature detection //////////////////////////////// + +int unpack_cpu_has_feature_x86 (int findex), pack_cpu_has_feature_x86 (int findex); + +#define CPU_FEATURE_MMX 23 + +///////////////////////////// pre-4.0 version decoding //////////////////////////// +// modules: unpack3.c, unpack3_open.c, unpack3_seek.c + +WavpackContext *open_file3 (WavpackContext *wpc, char *error); +int32_t unpack_samples3 (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count); +int seek_sample3 (WavpackContext *wpc, uint32_t desired_index); +uint32_t get_sample_index3 (WavpackContext *wpc); +void free_stream3 (WavpackContext *wpc); +int get_version3 (WavpackContext *wpc); + +////////////////////////////// bitstream macros & functions ///////////////////////////// #define bs_is_open(bs) ((bs)->ptr != NULL) +uint32_t bs_close_read (Bitstream *bs); #define getbit(bs) ( \ (((bs)->bc) ? \ @@ -564,56 +681,51 @@ int DoCloseHandle (FILE *hFile), DoTruncateFile (FILE *hFile); } while ((bs)->bc >= sizeof (*((bs)->ptr)) * 8); \ } while (0) -void little_endian_to_native (void *data, char *format); -void native_to_little_endian (void *data, char *format); +///////////////////////////// entropy encoder / decoder //////////////////////////// +// modules: entropy_utils.c, read_words.c, write_words.c -// pack.c +// these control the time constant "slow_level" which is used for hybrid mode +// that controls bitrate as a function of residual level (HYBRID_BITRATE). +#define SLS 8 +#define SLO ((1 << (SLS - 1))) -void pack_init (WavpackContext *wpc); -int pack_block (WavpackContext *wpc, int32_t *buffer); -double WavpackGetEncodedNoise (WavpackContext *wpc, double *peak); +#define LIMIT_ONES 16 // maximum consecutive 1s sent for "div" data -// unpack.c +// these control the time constant of the 3 median level breakpoints +#define DIV0 128 // 5/7 of samples +#define DIV1 64 // 10/49 of samples +#define DIV2 32 // 20/343 of samples -int unpack_init (WavpackContext *wpc); -int init_wv_bitstream (WavpackStream *wps, WavpackMetadata *wpmd); -int init_wvc_bitstream (WavpackStream *wps, WavpackMetadata *wpmd); -int init_wvx_bitstream (WavpackStream *wps, WavpackMetadata *wpmd); -int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd); -int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd); -int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd); -int read_shaping_info (WavpackStream *wps, WavpackMetadata *wpmd); -int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd); -int read_int32_info (WavpackStream *wps, WavpackMetadata *wpmd); -int read_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd); -int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd); -int read_sample_rate (WavpackContext *wpc, WavpackMetadata *wpmd); -int read_wrapper_data (WavpackContext *wpc, WavpackMetadata *wpmd); -int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count); -int check_crc_error (WavpackContext *wpc); +// this macro retrieves the specified median breakpoint (without frac; min = 1) +#define GET_MED(med) (((c->median [med]) >> 4) + 1) -// unpack3.c +// These macros update the specified median breakpoints. Note that the median +// is incremented when the sample is higher than the median, else decremented. +// They are designed so that the median will never drop below 1 and the value +// is essentially stationary if there are 2 increments for every 5 decrements. -WavpackContext *open_file3 (WavpackContext *wpc, char *error); -int32_t unpack_samples3 (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count); -int seek_sample3 (WavpackContext *wpc, uint32_t desired_index); -uint32_t get_sample_index3 (WavpackContext *wpc); -void free_stream3 (WavpackContext *wpc); -int get_version3 (WavpackContext *wpc); +#define INC_MED0() (c->median [0] += ((c->median [0] + DIV0) / DIV0) * 5) +#define DEC_MED0() (c->median [0] -= ((c->median [0] + (DIV0-2)) / DIV0) * 2) +#define INC_MED1() (c->median [1] += ((c->median [1] + DIV1) / DIV1) * 5) +#define DEC_MED1() (c->median [1] -= ((c->median [1] + (DIV1-2)) / DIV1) * 2) +#define INC_MED2() (c->median [2] += ((c->median [2] + DIV2) / DIV2) * 5) +#define DEC_MED2() (c->median [2] -= ((c->median [2] + (DIV2-2)) / DIV2) * 2) -// metadata.c stuff - -int read_metadata_buff (WavpackMetadata *wpmd, unsigned char *blockbuff, unsigned char **buffptr); -int write_metadata_block (WavpackContext *wpc); -int copy_metadata (WavpackMetadata *wpmd, unsigned char *buffer_start, unsigned char *buffer_end); -int add_to_metadata (WavpackContext *wpc, void *data, uint32_t bcount, unsigned char id); -int process_metadata (WavpackContext *wpc, WavpackMetadata *wpmd); -void free_metadata (WavpackMetadata *wpmd); - -// words.c stuff +#ifdef HAVE___BUILTIN_CLZ +#define count_bits(av) ((av) ? 32 - __builtin_clz (av) : 0) +#elif defined (_WIN64) +static __inline int count_bits (uint32_t av) { unsigned long res; return _BitScanReverse (&res, av) ? (int)(res + 1) : 0; } +#else +#define count_bits(av) ( \ + (av) < (1 << 8) ? nbits_table [av] : \ + ( \ + (av) < (1L << 16) ? nbits_table [(av) >> 8] + 8 : \ + ((av) < (1L << 24) ? nbits_table [(av) >> 16] + 16 : nbits_table [(av) >> 24] + 24) \ + ) \ +) +#endif void init_words (WavpackStream *wps); -void word_set_bitrate (WavpackStream *wps); void write_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd); void write_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd); int read_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd); @@ -625,34 +737,39 @@ int32_t get_words_lossless (WavpackStream *wps, int32_t *buffer, int32_t nsample void flush_word (WavpackStream *wps); int32_t nosend_word (WavpackStream *wps, int32_t value, int chan); void scan_word (WavpackStream *wps, int32_t *samples, uint32_t num_samples, int dir); +void update_error_limit (WavpackStream *wps); -int log2s (int32_t value); -int32_t exp2s (int log); -uint32_t log2buffer (int32_t *samples, uint32_t num_samples, int limit); +extern const uint32_t bitset [32]; +extern const uint32_t bitmask [32]; +extern const char nbits_table [256]; + +int wp_log2s (int32_t value); +int32_t wp_exp2s (int log); +int FASTCALL wp_log2 (uint32_t avalue); + +#ifdef OPT_ASM_X86 +#define LOG2BUFFER log2buffer_x86 +#elif defined(OPT_ASM_X64) && (defined (_WIN64) || defined(__CYGWIN__) || defined(__MINGW64__)) +#define LOG2BUFFER log2buffer_x64win +#elif defined(OPT_ASM_X64) +#define LOG2BUFFER log2buffer_x64 +#else +#define LOG2BUFFER log2buffer +#endif + +uint32_t LOG2BUFFER (int32_t *samples, uint32_t num_samples, int limit); signed char store_weight (int weight); int restore_weight (signed char weight); #define WORD_EOF ((int32_t)(1L << 31)) -// float.c - -void write_float_info (WavpackStream *wps, WavpackMetadata *wpmd); -int scan_float_data (WavpackStream *wps, f32 *values, int32_t num_values); -void send_float_data (WavpackStream *wps, f32 *values, int32_t num_values); -int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd); -void float_values (WavpackStream *wps, int32_t *values, int32_t num_values); void WavpackFloatNormalize (int32_t *values, int32_t num_values, int delta_exp); -// extra?.c - -// void analyze_stereo (WavpackContext *wpc, int32_t *samples); -// void analyze_mono (WavpackContext *wpc, int32_t *samples); -void execute_stereo (WavpackContext *wpc, int32_t *samples, int no_history, int do_samples); -void execute_mono (WavpackContext *wpc, int32_t *samples, int no_history, int do_samples); - -// wputils.c +/////////////////////////// high-level unpacking API and support //////////////////////////// +// modules: open_utils.c, unpack_utils.c, unpack_seek.c, unpack_floats.c +WavpackContext *WavpackOpenFileInputEx64 (WavpackStreamReader64 *reader, void *wv_id, void *wvc_id, char *error, int flags, int norm_offset); WavpackContext *WavpackOpenFileInputEx (WavpackStreamReader *reader, void *wv_id, void *wvc_id, char *error, int flags, int norm_offset); WavpackContext *WavpackOpenFileInput (const char *infilename, char *error, int flags, int norm_offset); @@ -664,6 +781,16 @@ WavpackContext *WavpackOpenFileInput (const char *infilename, char *error, int f #define OPEN_STREAMING 0x20 // "streaming" mode blindly unpacks blocks // w/o regard to header file position info #define OPEN_EDIT_TAGS 0x40 // allow editing of tags +#define OPEN_FILE_UTF8 0x80 // assume filenames are UTF-8 encoded, not ANSI (Windows only) + +// new for version 5 + +#define OPEN_DSD_NATIVE 0x100 // open DSD files as bitstreams + // (returned as 8-bit "samples" stored in 32-bit words) +#define OPEN_DSD_AS_PCM 0x200 // open DSD files as 24-bit PCM (decimated 8x) +#define OPEN_ALT_TYPES 0x400 // application is aware of alternate file types & qmode + // (just affects retrieving wrappers & MD5 checksums) +#define OPEN_NO_CHECKSUM 0x800 // don't verify block checksums before decoding int WavpackGetMode (WavpackContext *wpc); @@ -682,15 +809,38 @@ int WavpackGetMode (WavpackContext *wpc); #define MODE_XMODE 0x7000 // mask for extra level (1-6, 0=unknown) #define MODE_DNS 0x8000 -char *WavpackGetErrorMessage (WavpackContext *wpc); +int WavpackGetQualifyMode (WavpackContext *wpc); int WavpackGetVersion (WavpackContext *wpc); uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples); -uint32_t WavpackGetNumSamples (WavpackContext *wpc); -uint32_t WavpackGetSampleIndex (WavpackContext *wpc); -int WavpackGetNumErrors (WavpackContext *wpc); -int WavpackLossyBlocks (WavpackContext *wpc); int WavpackSeekSample (WavpackContext *wpc, uint32_t sample); -WavpackContext *WavpackCloseFile (WavpackContext *wpc); +int WavpackSeekSample64 (WavpackContext *wpc, int64_t sample); +int WavpackGetMD5Sum (WavpackContext *wpc, unsigned char data [16]); + +int WavpackVerifySingleBlock (unsigned char *buffer, int verify_checksum); +uint32_t read_next_header (WavpackStreamReader64 *reader, void *id, WavpackHeader *wphdr); +int read_wvc_block (WavpackContext *wpc); + +/////////////////////////// high-level packing API and support //////////////////////////// +// modules: pack_utils.c, pack_floats.c + +WavpackContext *WavpackOpenFileOutput (WavpackBlockOutput blockout, void *wv_id, void *wvc_id); +int WavpackSetConfiguration (WavpackContext *wpc, WavpackConfig *config, uint32_t total_samples); +int WavpackSetConfiguration64 (WavpackContext *wpc, WavpackConfig *config, int64_t total_samples, const unsigned char *chan_ids); +int WavpackPackInit (WavpackContext *wpc); +int WavpackAddWrapper (WavpackContext *wpc, void *data, uint32_t bcount); +int WavpackPackSamples (WavpackContext *wpc, int32_t *sample_buffer, uint32_t sample_count); +int WavpackFlushSamples (WavpackContext *wpc); +int WavpackStoreMD5Sum (WavpackContext *wpc, unsigned char data [16]); +void WavpackSeekTrailingWrapper (WavpackContext *wpc); +void WavpackUpdateNumSamples (WavpackContext *wpc, void *first_block); +void *WavpackGetWrapperLocation (void *first_block, uint32_t *size); + +/////////////////////////////////// common utilities //////////////////////////////////// +// module: common_utils.c + +extern const uint32_t sample_rates [16]; +uint32_t WavpackGetLibraryVersion (void); +const char *WavpackGetLibraryVersionString (void); uint32_t WavpackGetSampleRate (WavpackContext *wpc); int WavpackGetBitsPerSample (WavpackContext *wpc); int WavpackGetBytesPerSample (WavpackContext *wpc); @@ -698,34 +848,33 @@ int WavpackGetNumChannels (WavpackContext *wpc); int WavpackGetChannelMask (WavpackContext *wpc); int WavpackGetReducedChannels (WavpackContext *wpc); int WavpackGetFloatNormExp (WavpackContext *wpc); -int WavpackGetMD5Sum (WavpackContext *wpc, unsigned char data [16]); +uint32_t WavpackGetNumSamples (WavpackContext *wpc); +int64_t WavpackGetNumSamples64 (WavpackContext *wpc); +uint32_t WavpackGetSampleIndex (WavpackContext *wpc); +int64_t WavpackGetSampleIndex64 (WavpackContext *wpc); +char *WavpackGetErrorMessage (WavpackContext *wpc); +int WavpackGetNumErrors (WavpackContext *wpc); +int WavpackLossyBlocks (WavpackContext *wpc); uint32_t WavpackGetWrapperBytes (WavpackContext *wpc); unsigned char *WavpackGetWrapperData (WavpackContext *wpc); void WavpackFreeWrapper (WavpackContext *wpc); -void WavpackSeekTrailingWrapper (WavpackContext *wpc); double WavpackGetProgress (WavpackContext *wpc); uint32_t WavpackGetFileSize (WavpackContext *wpc); +int64_t WavpackGetFileSize64 (WavpackContext *wpc); double WavpackGetRatio (WavpackContext *wpc); double WavpackGetAverageBitrate (WavpackContext *wpc, int count_wvc); double WavpackGetInstantBitrate (WavpackContext *wpc); - -WavpackContext *WavpackOpenFileOutput (WavpackBlockOutput blockout, void *wv_id, void *wvc_id); -int WavpackSetConfiguration (WavpackContext *wpc, WavpackConfig *config, uint32_t total_samples); -int WavpackAddWrapper (WavpackContext *wpc, void *data, uint32_t bcount); -int WavpackStoreMD5Sum (WavpackContext *wpc, unsigned char data [16]); -int WavpackPackInit (WavpackContext *wpc); -int WavpackPackSamples (WavpackContext *wpc, int32_t *sample_buffer, uint32_t sample_count); -int WavpackFlushSamples (WavpackContext *wpc); -void WavpackUpdateNumSamples (WavpackContext *wpc, void *first_block); -void *WavpackGetWrapperLocation (void *first_block, uint32_t *size); - +WavpackContext *WavpackCloseFile (WavpackContext *wpc); void WavpackLittleEndianToNative (void *data, char *format); void WavpackNativeToLittleEndian (void *data, char *format); +void WavpackBigEndianToNative (void *data, char *format); +void WavpackNativeToBigEndian (void *data, char *format); -uint32_t WavpackGetLibraryVersion (void); -const char *WavpackGetLibraryVersionString (void); +void install_close_callback (WavpackContext *wpc, void cb_func (void *wpc)); +void free_streams (WavpackContext *wpc); -// tags.c +/////////////////////////////////// tag utilities //////////////////////////////////// +// modules: tags.c, tag_utils.c int WavpackGetNumTagItems (WavpackContext *wpc); int WavpackGetTagItem (WavpackContext *wpc, const char *item, char *value, int size); @@ -742,58 +891,5 @@ void free_tag (M_Tag *m_tag); int valid_tag (M_Tag *m_tag); int editable_tag (M_Tag *m_tag); -///////////////////////////// SIMD helper macros ///////////////////////////// - -#ifdef OPT_MMX - -#if defined (__GNUC__) && !defined (__INTEL_COMPILER) -//directly map to gcc's native builtins for faster code - -#if __GNUC__ < 4 -typedef int __di __attribute__ ((__mode__ (__DI__))); -typedef int __m64 __attribute__ ((__mode__ (__V2SI__))); -typedef int __v4hi __attribute__ ((__mode__ (__V4HI__))); -#define _m_paddsw(m1, m2) (__m64) __builtin_ia32_paddsw ((__v4hi) m1, (__v4hi) m2) -#define _m_pand(m1, m2) (__m64) __builtin_ia32_pand ((__di) m1, (__di) m2) -#define _m_pandn(m1, m2) (__m64) __builtin_ia32_pandn ((__di) m1, (__di) m2) -#define _m_pmaddwd(m1, m2) __builtin_ia32_pmaddwd ((__v4hi) m1, (__v4hi) m2) -#define _m_por(m1, m2) (__m64) __builtin_ia32_por ((__di) m1, (__di) m2) -#define _m_pxor(m1, m2) (__m64) __builtin_ia32_pxor ((__di) m1, (__di) m2) -#else -typedef int __m64 __attribute__ ((__vector_size__ (8))); -typedef short __m64_16 __attribute__ ((__vector_size__ (8))); -#define _m_paddsw(m1, m2) (__m64) __builtin_ia32_paddsw ((__m64_16) m1, (__m64_16) m2) -#define _m_pand(m1, m2) __builtin_ia32_pand (m1, m2) -#define _m_pandn(m1, m2) __builtin_ia32_pandn (m1, m2) -#define _m_pmaddwd(m1, m2) __builtin_ia32_pmaddwd ((__m64_16) m1, (__m64_16) m2) -#define _m_por(m1, m2) __builtin_ia32_por (m1, m2) -#define _m_pxor(m1, m2) __builtin_ia32_pxor (m1, m2) #endif -#define _m_paddd(m1, m2) __builtin_ia32_paddd (m1, m2) -#define _m_pcmpeqd(m1, m2) __builtin_ia32_pcmpeqd (m1, m2) - -#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) || __GNUC__ > 4 || __has_builtin(__builtin_ia32_pslldi) -# define _m_pslldi(m1, m2) __builtin_ia32_pslldi ((__m64)m1, m2) -# define _m_psradi(m1, m2) __builtin_ia32_psradi ((__m64)m1, m2) -# define _m_psrldi(m1, m2) __builtin_ia32_psrldi ((__m64)m1, m2) -#else -# define _m_pslldi(m1, m2) __builtin_ia32_pslld (m1, m2) -# define _m_psradi(m1, m2) __builtin_ia32_psrad (m1, m2) -# define _m_psrldi(m1, m2) __builtin_ia32_psrld (m1, m2) -#endif - -#define _m_psubd(m1, m2) __builtin_ia32_psubd (m1, m2) -#define _m_punpckhdq(m1, m2) __builtin_ia32_punpckhdq (m1, m2) -#define _m_punpckldq(m1, m2) __builtin_ia32_punpckldq (m1, m2) -#define _mm_empty() __builtin_ia32_emms () -#define _mm_set_pi32(m1, m2) { m2, m1 } -#define _mm_set1_pi32(m) { m, m } - -#else -#include -#endif - -#endif //OPT_MMX - -#endif diff --git a/third_party/wavpack/src/wavpack_version.h b/third_party/wavpack/src/wavpack_version.h index ed6e241..6acf274 100644 --- a/third_party/wavpack/src/wavpack_version.h +++ b/third_party/wavpack/src/wavpack_version.h @@ -11,9 +11,9 @@ #ifndef WAVPACK_VERSION_H #define WAVPACK_VERSION_H -#define LIBWAVPACK_MAJOR 4 -#define LIBWAVPACK_MINOR 70 +#define LIBWAVPACK_MAJOR 5 +#define LIBWAVPACK_MINOR 1 #define LIBWAVPACK_MICRO 0 -#define LIBWAVPACK_VERSION_STRING "4.70.0" +#define LIBWAVPACK_VERSION_STRING "5.1.0" #endif diff --git a/third_party/wavpack/src/words.c b/third_party/wavpack/src/words.c deleted file mode 100644 index 368b07a..0000000 --- a/third_party/wavpack/src/words.c +++ /dev/null @@ -1,1525 +0,0 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -// words.c - -// This module provides entropy word encoding and decoding functions using -// a variation on the Rice method. This was introduced in version 3.93 -// because it allows splitting the data into a "lossy" stream and a -// "correction" stream in a very efficient manner and is therefore ideal -// for the "hybrid" mode. For 4.0, the efficiency of this method was -// significantly improved by moving away from the normal Rice restriction of -// using powers of two for the modulus divisions and now the method can be -// used for both hybrid and pure lossless encoding. - -// Samples are divided by median probabilities at 5/7 (71.43%), 10/49 (20.41%), -// and 20/343 (5.83%). Each zone has 3.5 times fewer samples than the -// previous. Using standard Rice coding on this data would result in 1.4 -// bits per sample average (not counting sign bit). However, there is a -// very simple encoding that is over 99% efficient with this data and -// results in about 1.22 bits per sample. - -#include "wavpack_local.h" - -#include -#include - -#ifdef DEBUG_ALLOC -#define malloc malloc_db -#define realloc realloc_db -#define free free_db -void *malloc_db (uint32_t size); -void *realloc_db (void *ptr, uint32_t size); -void free_db (void *ptr); -int32_t dump_alloc (void); -#endif - -//////////////////////////////// local macros ///////////////////////////////// - -#define USE_NEXT8_OPTIMIZATION // we normally want this, but code is easier to understand without it - -#define LIMIT_ONES 16 // maximum consecutive 1s sent for "div" data - -// these control the time constant "slow_level" which is used for hybrid mode -// that controls bitrate as a function of residual level (HYBRID_BITRATE). -#define SLS 8 -#define SLO ((1 << (SLS - 1))) - -// these control the time constant of the 3 median level breakpoints -#define DIV0 128 // 5/7 of samples -#define DIV1 64 // 10/49 of samples -#define DIV2 32 // 20/343 of samples - -// this macro retrieves the specified median breakpoint (without frac; min = 1) -#define GET_MED(med) (((c->median [med]) >> 4) + 1) - -// These macros update the specified median breakpoints. Note that the median -// is incremented when the sample is higher than the median, else decremented. -// They are designed so that the median will never drop below 1 and the value -// is essentially stationary if there are 2 increments for every 5 decrements. - -#define INC_MED0() (c->median [0] += ((c->median [0] + DIV0) / DIV0) * 5) -#define DEC_MED0() (c->median [0] -= ((c->median [0] + (DIV0-2)) / DIV0) * 2) -#define INC_MED1() (c->median [1] += ((c->median [1] + DIV1) / DIV1) * 5) -#define DEC_MED1() (c->median [1] -= ((c->median [1] + (DIV1-2)) / DIV1) * 2) -#define INC_MED2() (c->median [2] += ((c->median [2] + DIV2) / DIV2) * 5) -#define DEC_MED2() (c->median [2] -= ((c->median [2] + (DIV2-2)) / DIV2) * 2) - -#define count_bits(av) ( \ - (av) < (1 << 8) ? nbits_table [av] : \ - ( \ - (av) < (1L << 16) ? nbits_table [(av) >> 8] + 8 : \ - ((av) < (1L << 24) ? nbits_table [(av) >> 16] + 16 : nbits_table [(av) >> 24] + 24) \ - ) \ -) - -///////////////////////////// local table storage //////////////////////////// - -const uint32_t bitset [] = { - 1L << 0, 1L << 1, 1L << 2, 1L << 3, - 1L << 4, 1L << 5, 1L << 6, 1L << 7, - 1L << 8, 1L << 9, 1L << 10, 1L << 11, - 1L << 12, 1L << 13, 1L << 14, 1L << 15, - 1L << 16, 1L << 17, 1L << 18, 1L << 19, - 1L << 20, 1L << 21, 1L << 22, 1L << 23, - 1L << 24, 1L << 25, 1L << 26, 1L << 27, - 1L << 28, 1L << 29, 1L << 30, 1L << 31 -}; - -const uint32_t bitmask [] = { - (1L << 0) - 1, (1L << 1) - 1, (1L << 2) - 1, (1L << 3) - 1, - (1L << 4) - 1, (1L << 5) - 1, (1L << 6) - 1, (1L << 7) - 1, - (1L << 8) - 1, (1L << 9) - 1, (1L << 10) - 1, (1L << 11) - 1, - (1L << 12) - 1, (1L << 13) - 1, (1L << 14) - 1, (1L << 15) - 1, - (1L << 16) - 1, (1L << 17) - 1, (1L << 18) - 1, (1L << 19) - 1, - (1L << 20) - 1, (1L << 21) - 1, (1L << 22) - 1, (1L << 23) - 1, - (1L << 24) - 1, (1L << 25) - 1, (1L << 26) - 1, (1L << 27) - 1, - (1L << 28) - 1, (1L << 29) - 1, (1L << 30) - 1, 0x7fffffff -}; - -const char nbits_table [] = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, // 0 - 15 - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, // 16 - 31 - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, // 32 - 47 - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, // 48 - 63 - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 64 - 79 - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 80 - 95 - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 96 - 111 - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 112 - 127 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 128 - 143 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 144 - 159 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 160 - 175 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 176 - 191 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 192 - 207 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 208 - 223 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 224 - 239 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 // 240 - 255 -}; - -static const unsigned char log2_table [] = { - 0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x10, 0x11, 0x12, 0x14, 0x15, - 0x16, 0x18, 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a, - 0x2c, 0x2d, 0x2e, 0x2f, 0x31, 0x32, 0x33, 0x34, 0x36, 0x37, 0x38, 0x39, 0x3b, 0x3c, 0x3d, 0x3e, - 0x3f, 0x41, 0x42, 0x43, 0x44, 0x45, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4d, 0x4e, 0x4f, 0x50, 0x51, - 0x52, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, - 0x64, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x74, 0x75, - 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, - 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, - 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, - 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb2, - 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc0, - 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcb, 0xcc, 0xcd, 0xce, - 0xcf, 0xd0, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd8, 0xd9, 0xda, 0xdb, - 0xdc, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe4, 0xe5, 0xe6, 0xe7, 0xe7, - 0xe8, 0xe9, 0xea, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xee, 0xef, 0xf0, 0xf1, 0xf1, 0xf2, 0xf3, 0xf4, - 0xf4, 0xf5, 0xf6, 0xf7, 0xf7, 0xf8, 0xf9, 0xf9, 0xfa, 0xfb, 0xfc, 0xfc, 0xfd, 0xfe, 0xff, 0xff -}; - -static const unsigned char exp2_table [] = { - 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, - 0x0b, 0x0c, 0x0d, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15, 0x16, 0x16, - 0x17, 0x18, 0x19, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1d, 0x1e, 0x1f, 0x20, 0x20, 0x21, 0x22, 0x23, - 0x24, 0x24, 0x25, 0x26, 0x27, 0x28, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3a, 0x3b, 0x3c, 0x3d, - 0x3e, 0x3f, 0x40, 0x41, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x48, 0x49, 0x4a, 0x4b, - 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, - 0x5b, 0x5c, 0x5d, 0x5e, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, - 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, - 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x87, 0x88, 0x89, 0x8a, - 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, - 0x9c, 0x9d, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, - 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, - 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc8, 0xc9, 0xca, 0xcb, 0xcd, 0xce, 0xcf, 0xd0, 0xd2, 0xd3, 0xd4, - 0xd6, 0xd7, 0xd8, 0xd9, 0xdb, 0xdc, 0xdd, 0xde, 0xe0, 0xe1, 0xe2, 0xe4, 0xe5, 0xe6, 0xe8, 0xe9, - 0xea, 0xec, 0xed, 0xee, 0xf0, 0xf1, 0xf2, 0xf4, 0xf5, 0xf6, 0xf8, 0xf9, 0xfa, 0xfc, 0xfd, 0xff -}; - -#ifdef USE_NEXT8_OPTIMIZATION -static const char ones_count_table [] = { - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8 -}; -#endif - -///////////////////////////// executable code //////////////////////////////// - -static int FASTCALL mylog2 (uint32_t avalue); - -// Initialize entropy encoder for the specified stream. In lossless mode there -// are no parameters to select; in hybrid mode the bitrate mode and value need -// be initialized. - -#ifndef NO_PACK - -void init_words (WavpackStream *wps) -{ - CLEAR (wps->w); - - if (wps->wphdr.flags & HYBRID_FLAG) - word_set_bitrate (wps); -} - -// Set up parameters for hybrid mode based on header flags and "bits" field. -// This is currently only set up for the HYBRID_BITRATE mode in which the -// allowed error varies with the residual level (from "slow_level"). The -// simpler mode (which is not used yet) has the error level directly -// controlled from the metadata. - -void word_set_bitrate (WavpackStream *wps) -{ - int bitrate_0, bitrate_1; - - if (wps->wphdr.flags & HYBRID_BITRATE) { - if (wps->wphdr.flags & FALSE_STEREO) - bitrate_0 = (wps->bits * 2 - 512) < 568 ? 0 : (wps->bits * 2 - 512) - 568; - else - bitrate_0 = wps->bits < 568 ? 0 : wps->bits - 568; - - if (!(wps->wphdr.flags & MONO_DATA)) { - - if (wps->wphdr.flags & HYBRID_BALANCE) - bitrate_1 = (wps->wphdr.flags & JOINT_STEREO) ? 256 : 0; - else { - bitrate_1 = bitrate_0; - - if (wps->wphdr.flags & JOINT_STEREO) { - if (bitrate_0 < 128) { - bitrate_1 += bitrate_0; - bitrate_0 = 0; - } - else { - bitrate_0 -= 128; - bitrate_1 += 128; - } - } - } - } - else - bitrate_1 = 0; - } - else - bitrate_0 = bitrate_1 = 0; - - wps->w.bitrate_acc [0] = (int32_t) bitrate_0 << 16; - wps->w.bitrate_acc [1] = (int32_t) bitrate_1 << 16; -} - -// Allocates the correct space in the metadata structure and writes the -// current median values to it. Values are converted from 32-bit unsigned -// to our internal 16-bit mylog2 values, and read_entropy_vars () is called -// to read the values back because we must compensate for the loss through -// the log function. - -void write_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd) -{ - unsigned char *byteptr; - int temp; - - byteptr = wpmd->data = malloc (12); - wpmd->id = ID_ENTROPY_VARS; - - *byteptr++ = temp = mylog2 (wps->w.c [0].median [0]); - *byteptr++ = temp >> 8; - *byteptr++ = temp = mylog2 (wps->w.c [0].median [1]); - *byteptr++ = temp >> 8; - *byteptr++ = temp = mylog2 (wps->w.c [0].median [2]); - *byteptr++ = temp >> 8; - - if (!(wps->wphdr.flags & MONO_DATA)) { - *byteptr++ = temp = mylog2 (wps->w.c [1].median [0]); - *byteptr++ = temp >> 8; - *byteptr++ = temp = mylog2 (wps->w.c [1].median [1]); - *byteptr++ = temp >> 8; - *byteptr++ = temp = mylog2 (wps->w.c [1].median [2]); - *byteptr++ = temp >> 8; - } - - wpmd->byte_length = (int32_t)(byteptr - (unsigned char *) wpmd->data); - read_entropy_vars (wps, wpmd); -} - -// Allocates enough space in the metadata structure and writes the current -// high word of the bitrate accumulator and the slow_level values to it. The -// slow_level values are converted from 32-bit unsigned to our internal 16-bit -// mylog2 values. Afterward, read_entropy_vars () is called to read the values -// back because we must compensate for the loss through the log function and -// the truncation of the bitrate. - -void write_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd) -{ - unsigned char *byteptr; - int temp; - - word_set_bitrate (wps); - byteptr = wpmd->data = malloc (512); - wpmd->id = ID_HYBRID_PROFILE; - - if (wps->wphdr.flags & HYBRID_BITRATE) { - *byteptr++ = temp = log2s (wps->w.c [0].slow_level); - *byteptr++ = temp >> 8; - - if (!(wps->wphdr.flags & MONO_DATA)) { - *byteptr++ = temp = log2s (wps->w.c [1].slow_level); - *byteptr++ = temp >> 8; - } - } - - *byteptr++ = temp = wps->w.bitrate_acc [0] >> 16; - *byteptr++ = temp >> 8; - - if (!(wps->wphdr.flags & MONO_DATA)) { - *byteptr++ = temp = wps->w.bitrate_acc [1] >> 16; - *byteptr++ = temp >> 8; - } - - if (wps->w.bitrate_delta [0] | wps->w.bitrate_delta [1]) { - *byteptr++ = temp = log2s (wps->w.bitrate_delta [0]); - *byteptr++ = temp >> 8; - - if (!(wps->wphdr.flags & MONO_DATA)) { - *byteptr++ = temp = log2s (wps->w.bitrate_delta [1]); - *byteptr++ = temp >> 8; - } - } - - wpmd->byte_length = (int32_t)(byteptr - (unsigned char *) wpmd->data); - read_hybrid_profile (wps, wpmd); -} - -#endif - -// Read the median log2 values from the specifed metadata structure, convert -// them back to 32-bit unsigned values and store them. If length is not -// exactly correct then we flag and return an error. - -int read_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd) -{ - unsigned char *byteptr = wpmd->data; - - if (wpmd->byte_length != ((wps->wphdr.flags & MONO_DATA) ? 6 : 12)) - return FALSE; - - wps->w.c [0].median [0] = exp2s (byteptr [0] + (byteptr [1] << 8)); - wps->w.c [0].median [1] = exp2s (byteptr [2] + (byteptr [3] << 8)); - wps->w.c [0].median [2] = exp2s (byteptr [4] + (byteptr [5] << 8)); - - if (!(wps->wphdr.flags & MONO_DATA)) { - wps->w.c [1].median [0] = exp2s (byteptr [6] + (byteptr [7] << 8)); - wps->w.c [1].median [1] = exp2s (byteptr [8] + (byteptr [9] << 8)); - wps->w.c [1].median [2] = exp2s (byteptr [10] + (byteptr [11] << 8)); - } - - return TRUE; -} - -// Read the hybrid related values from the specifed metadata structure, convert -// them back to their internal formats and store them. The extended profile -// stuff is not implemented yet, so return an error if we get more data than -// we know what to do with. - -int read_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd) -{ - unsigned char *byteptr = wpmd->data; - unsigned char *endptr = byteptr + wpmd->byte_length; - - if (wps->wphdr.flags & HYBRID_BITRATE) { - if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr) - return FALSE; - - wps->w.c [0].slow_level = exp2s (byteptr [0] + (byteptr [1] << 8)); - byteptr += 2; - - if (!(wps->wphdr.flags & MONO_DATA)) { - wps->w.c [1].slow_level = exp2s (byteptr [0] + (byteptr [1] << 8)); - byteptr += 2; - } - } - - if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr) - return FALSE; - - wps->w.bitrate_acc [0] = (int32_t)(byteptr [0] + (byteptr [1] << 8)) << 16; - byteptr += 2; - - if (!(wps->wphdr.flags & MONO_DATA)) { - wps->w.bitrate_acc [1] = (int32_t)(byteptr [0] + (byteptr [1] << 8)) << 16; - byteptr += 2; - } - - if (byteptr < endptr) { - if (byteptr + (wps->wphdr.flags & MONO_DATA ? 2 : 4) > endptr) - return FALSE; - - wps->w.bitrate_delta [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - byteptr += 2; - - if (!(wps->wphdr.flags & MONO_DATA)) { - wps->w.bitrate_delta [1] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - byteptr += 2; - } - - if (byteptr < endptr) - return FALSE; - } - else - wps->w.bitrate_delta [0] = wps->w.bitrate_delta [1] = 0; - - return TRUE; -} - -// This function is called during both encoding and decoding of hybrid data to -// update the "error_limit" variable which determines the maximum sample error -// allowed in the main bitstream. In the HYBRID_BITRATE mode (which is the only -// currently implemented) this is calculated from the slow_level values and the -// bitrate accumulators. Note that the bitrate accumulators can be changing. - -static void update_error_limit (WavpackStream *wps) -{ - int bitrate_0 = (wps->w.bitrate_acc [0] += wps->w.bitrate_delta [0]) >> 16; - - if (wps->wphdr.flags & MONO_DATA) { - if (wps->wphdr.flags & HYBRID_BITRATE) { - int slow_log_0 = (wps->w.c [0].slow_level + SLO) >> SLS; - - if (slow_log_0 - bitrate_0 > -0x100) - wps->w.c [0].error_limit = exp2s (slow_log_0 - bitrate_0 + 0x100); - else - wps->w.c [0].error_limit = 0; - } - else - wps->w.c [0].error_limit = exp2s (bitrate_0); - } - else { - int bitrate_1 = (wps->w.bitrate_acc [1] += wps->w.bitrate_delta [1]) >> 16; - - if (wps->wphdr.flags & HYBRID_BITRATE) { - int slow_log_0 = (wps->w.c [0].slow_level + SLO) >> SLS; - int slow_log_1 = (wps->w.c [1].slow_level + SLO) >> SLS; - - if (wps->wphdr.flags & HYBRID_BALANCE) { - int balance = (slow_log_1 - slow_log_0 + bitrate_1 + 1) >> 1; - - if (balance > bitrate_0) { - bitrate_1 = bitrate_0 * 2; - bitrate_0 = 0; - } - else if (-balance > bitrate_0) { - bitrate_0 = bitrate_0 * 2; - bitrate_1 = 0; - } - else { - bitrate_1 = bitrate_0 + balance; - bitrate_0 = bitrate_0 - balance; - } - } - - if (slow_log_0 - bitrate_0 > -0x100) - wps->w.c [0].error_limit = exp2s (slow_log_0 - bitrate_0 + 0x100); - else - wps->w.c [0].error_limit = 0; - - if (slow_log_1 - bitrate_1 > -0x100) - wps->w.c [1].error_limit = exp2s (slow_log_1 - bitrate_1 + 0x100); - else - wps->w.c [1].error_limit = 0; - } - else { - wps->w.c [0].error_limit = exp2s (bitrate_0); - wps->w.c [1].error_limit = exp2s (bitrate_1); - } - } -} - -#ifndef NO_PACK - -// This function writes the specified word to the open bitstream "wvbits" and, -// if the bitstream "wvcbits" is open, writes any correction data there. This -// function will work for either lossless or hybrid but because a version -// optimized for lossless exits below, it would normally be used for the hybrid -// mode only. The return value is the actual value stored to the stream (even -// if a correction file is being created) and is used as feedback to the -// predictor. - -int32_t FASTCALL send_word (WavpackStream *wps, int32_t value, int chan) -{ - struct entropy_data *c = wps->w.c + chan; - uint32_t ones_count, low, mid, high; - int sign = (value < 0) ? 1 : 0; - - if (wps->w.c [0].median [0] < 2 && !wps->w.holding_zero && wps->w.c [1].median [0] < 2) { - if (wps->w.zeros_acc) { - if (value) - flush_word (wps); - else { - c->slow_level -= (c->slow_level + SLO) >> SLS; - wps->w.zeros_acc++; - return 0; - } - } - else if (value) - putbit_0 (&wps->wvbits); - else { - c->slow_level -= (c->slow_level + SLO) >> SLS; - CLEAR (wps->w.c [0].median); - CLEAR (wps->w.c [1].median); - wps->w.zeros_acc = 1; - return 0; - } - } - - if (sign) - value = ~value; - - if ((wps->wphdr.flags & HYBRID_FLAG) && !chan) - update_error_limit (wps); - - if (value < (int32_t) GET_MED (0)) { - ones_count = low = 0; - high = GET_MED (0) - 1; - DEC_MED0 (); - } - else { - low = GET_MED (0); - INC_MED0 (); - - if (value - low < GET_MED (1)) { - ones_count = 1; - high = low + GET_MED (1) - 1; - DEC_MED1 (); - } - else { - low += GET_MED (1); - INC_MED1 (); - - if (value - low < GET_MED (2)) { - ones_count = 2; - high = low + GET_MED (2) - 1; - DEC_MED2 (); - } - else { - ones_count = 2 + (value - low) / GET_MED (2); - low += (ones_count - 2) * GET_MED (2); - high = low + GET_MED (2) - 1; - INC_MED2 (); - } - } - } - - mid = (high + low + 1) >> 1; - - if (wps->w.holding_zero) { - if (ones_count) - wps->w.holding_one++; - - flush_word (wps); - - if (ones_count) { - wps->w.holding_zero = 1; - ones_count--; - } - else - wps->w.holding_zero = 0; - } - else - wps->w.holding_zero = 1; - - wps->w.holding_one = ones_count * 2; - - if (!c->error_limit) { - if (high != low) { - uint32_t maxcode = high - low, code = value - low; - int bitcount = count_bits (maxcode); - uint32_t extras = bitset [bitcount] - maxcode - 1; - - if (code < extras) { - wps->w.pend_data |= code << wps->w.pend_count; - wps->w.pend_count += bitcount - 1; - } - else { - wps->w.pend_data |= ((code + extras) >> 1) << wps->w.pend_count; - wps->w.pend_count += bitcount - 1; - wps->w.pend_data |= ((code + extras) & 1) << wps->w.pend_count++; - } - } - - mid = value; - } - else - while (high - low > c->error_limit) - if (value < (int32_t) mid) { - mid = ((high = mid - 1) + low + 1) >> 1; - wps->w.pend_count++; - } - else { - mid = (high + (low = mid) + 1) >> 1; - wps->w.pend_data |= bitset [wps->w.pend_count++]; - } - - wps->w.pend_data |= ((int32_t) sign << wps->w.pend_count++); - - if (!wps->w.holding_zero) - flush_word (wps); - - if (bs_is_open (&wps->wvcbits) && c->error_limit) { - uint32_t code = value - low, maxcode = high - low; - int bitcount = count_bits (maxcode); - uint32_t extras = bitset [bitcount] - maxcode - 1; - - if (bitcount) { - if (code < extras) - putbits (code, bitcount - 1, &wps->wvcbits); - else { - putbits ((code + extras) >> 1, bitcount - 1, &wps->wvcbits); - putbit ((code + extras) & 1, &wps->wvcbits); - } - } - } - - if (wps->wphdr.flags & HYBRID_BITRATE) { - c->slow_level -= (c->slow_level + SLO) >> SLS; - c->slow_level += mylog2 (mid); - } - - return sign ? ~mid : mid; -} - -// This function is an optimized version of send_word() that only handles -// lossless (error_limit == 0) and sends an entire buffer of either mono or -// stereo data rather than a single sample. Unlike the generalized -// send_word(), it does not return values because it always encodes -// the exact value passed. - -void send_words_lossless (WavpackStream *wps, int32_t *buffer, int32_t nsamples) -{ - struct entropy_data *c = wps->w.c; - int32_t value, csamples; - - if (!(wps->wphdr.flags & MONO_DATA)) - nsamples *= 2; - - for (csamples = 0; csamples < nsamples; ++csamples) { - int sign = ((value = *buffer++) < 0) ? 1 : 0; - uint32_t ones_count, low, high; - - if (!(wps->wphdr.flags & MONO_DATA)) - c = wps->w.c + (csamples & 1); - - if (wps->w.c [0].median [0] < 2 && !wps->w.holding_zero && wps->w.c [1].median [0] < 2) { - if (wps->w.zeros_acc) { - if (value) - flush_word (wps); - else { - wps->w.zeros_acc++; - continue; - } - } - else if (value) - putbit_0 (&wps->wvbits); - else { - CLEAR (wps->w.c [0].median); - CLEAR (wps->w.c [1].median); - wps->w.zeros_acc = 1; - continue; - } - } - - if (sign) - value = ~value; - - if (value < (int32_t) GET_MED (0)) { - ones_count = low = 0; - high = GET_MED (0) - 1; - DEC_MED0 (); - } - else { - low = GET_MED (0); - INC_MED0 (); - - if (value - low < GET_MED (1)) { - ones_count = 1; - high = low + GET_MED (1) - 1; - DEC_MED1 (); - } - else { - low += GET_MED (1); - INC_MED1 (); - - if (value - low < GET_MED (2)) { - ones_count = 2; - high = low + GET_MED (2) - 1; - DEC_MED2 (); - } - else { - ones_count = 2 + (value - low) / GET_MED (2); - low += (ones_count - 2) * GET_MED (2); - high = low + GET_MED (2) - 1; - INC_MED2 (); - } - } - } - - if (wps->w.holding_zero) { - if (ones_count) - wps->w.holding_one++; - - flush_word (wps); - - if (ones_count) { - wps->w.holding_zero = 1; - ones_count--; - } - else - wps->w.holding_zero = 0; - } - else - wps->w.holding_zero = 1; - - wps->w.holding_one = ones_count * 2; - - if (high != low) { - uint32_t maxcode = high - low, code = value - low; - int bitcount = count_bits (maxcode); - uint32_t extras = bitset [bitcount] - maxcode - 1; - - if (code < extras) { - wps->w.pend_data |= code << wps->w.pend_count; - wps->w.pend_count += bitcount - 1; - } - else { - wps->w.pend_data |= ((code + extras) >> 1) << wps->w.pend_count; - wps->w.pend_count += bitcount - 1; - wps->w.pend_data |= ((code + extras) & 1) << wps->w.pend_count++; - } - } - - wps->w.pend_data |= ((int32_t) sign << wps->w.pend_count++); - - if (!wps->w.holding_zero) - flush_word (wps); - } -} - -// Used by send_word() and send_word_lossless() to actually send most the -// accumulated data onto the bitstream. This is also called directly from -// clients when all words have been sent. - -void flush_word (WavpackStream *wps) -{ - if (wps->w.zeros_acc) { - int cbits = count_bits (wps->w.zeros_acc); - - while (cbits--) - putbit_1 (&wps->wvbits); - - putbit_0 (&wps->wvbits); - - while (wps->w.zeros_acc > 1) { - putbit (wps->w.zeros_acc & 1, &wps->wvbits); - wps->w.zeros_acc >>= 1; - } - - wps->w.zeros_acc = 0; - } - - if (wps->w.holding_one) { -#ifdef LIMIT_ONES - if (wps->w.holding_one >= LIMIT_ONES) { - int cbits; - - putbits ((1L << LIMIT_ONES) - 1, LIMIT_ONES + 1, &wps->wvbits); - wps->w.holding_one -= LIMIT_ONES; - cbits = count_bits (wps->w.holding_one); - - while (cbits--) - putbit_1 (&wps->wvbits); - - putbit_0 (&wps->wvbits); - - while (wps->w.holding_one > 1) { - putbit (wps->w.holding_one & 1, &wps->wvbits); - wps->w.holding_one >>= 1; - } - - wps->w.holding_zero = 0; - } - else - putbits (bitmask [wps->w.holding_one], wps->w.holding_one, &wps->wvbits); - - wps->w.holding_one = 0; -#else - do { - putbit_1 (&wps->wvbits); - } while (--wps->w.holding_one); -#endif - } - - if (wps->w.holding_zero) { - putbit_0 (&wps->wvbits); - wps->w.holding_zero = 0; - } - - if (wps->w.pend_count) { - putbits (wps->w.pend_data, wps->w.pend_count, &wps->wvbits); - wps->w.pend_data = wps->w.pend_count = 0; - } -} - -// This function is similar to send_word() except that no data is actually -// written to any stream, but it does return the value that would have been -// sent to a hybrid stream. It is used to determine beforehand how much noise -// will be added to samples. - -int32_t nosend_word (WavpackStream *wps, int32_t value, int chan) -{ - struct entropy_data *c = wps->w.c + chan; - uint32_t ones_count, low, mid, high; - int sign = (value < 0) ? 1 : 0; - - if (sign) - value = ~value; - - if ((wps->wphdr.flags & HYBRID_FLAG) && !chan) - update_error_limit (wps); - - if (value < (int32_t) GET_MED (0)) { - low = 0; - high = GET_MED (0) - 1; - DEC_MED0 (); - } - else { - low = GET_MED (0); - INC_MED0 (); - - if (value - low < GET_MED (1)) { - high = low + GET_MED (1) - 1; - DEC_MED1 (); - } - else { - low += GET_MED (1); - INC_MED1 (); - - if (value - low < GET_MED (2)) { - high = low + GET_MED (2) - 1; - DEC_MED2 (); - } - else { - ones_count = 2 + (value - low) / GET_MED (2); - low += (ones_count - 2) * GET_MED (2); - high = low + GET_MED (2) - 1; - INC_MED2 (); - } - } - } - - mid = (high + low + 1) >> 1; - - if (!c->error_limit) - mid = value; - else - while (high - low > c->error_limit) - if (value < (int32_t) mid) - mid = ((high = mid - 1) + low + 1) >> 1; - else - mid = (high + (low = mid) + 1) >> 1; - - c->slow_level -= (c->slow_level + SLO) >> SLS; - c->slow_level += mylog2 (mid); - - return sign ? ~mid : mid; -} - -// This function is used to scan some number of samples to set the variables -// "slow_level" and the "median" array. In pure symetrical encoding mode this -// would not be needed because these values would simply be continued from the -// previous block. However, in the -X modes and the 32-bit modes we cannot do -// this because parameters may change between blocks and the variables might -// not apply. This function can work in mono or stereo and can scan a block -// in either direction. - -void scan_word (WavpackStream *wps, int32_t *samples, uint32_t num_samples, int dir) -{ - uint32_t flags = wps->wphdr.flags, value, low; - struct entropy_data *c = wps->w.c; - int chan; - - init_words (wps); - - if (flags & MONO_DATA) { - if (dir < 0) { - samples += (num_samples - 1); - dir = -1; - } - else - dir = 1; - } - else { - if (dir < 0) { - samples += (num_samples - 1) * 2; - dir = -2; - } - else - dir = 2; - } - - while (num_samples--) { - - value = labs (samples [chan = 0]); - - if (flags & HYBRID_BITRATE) { - wps->w.c [0].slow_level -= (wps->w.c [0].slow_level + SLO) >> SLS; - wps->w.c [0].slow_level += mylog2 (value); - } - - if (value < GET_MED (0)) { - DEC_MED0 (); - } - else { - low = GET_MED (0); - INC_MED0 (); - - if (value - low < GET_MED (1)) { - DEC_MED1 (); - } - else { - low += GET_MED (1); - INC_MED1 (); - - if (value - low < GET_MED (2)) { - DEC_MED2 (); - } - else { - INC_MED2 (); - } - } - } - - if (!(flags & MONO_DATA)) { - value = labs (samples [chan = 1]); - c++; - - if (wps->wphdr.flags & HYBRID_BITRATE) { - wps->w.c [1].slow_level -= (wps->w.c [1].slow_level + SLO) >> SLS; - wps->w.c [1].slow_level += mylog2 (value); - } - - if (value < GET_MED (0)) { - DEC_MED0 (); - } - else { - low = GET_MED (0); - INC_MED0 (); - - if (value - low < GET_MED (1)) { - DEC_MED1 (); - } - else { - low += GET_MED (1); - INC_MED1 (); - - if (value - low < GET_MED (2)) { - DEC_MED2 (); - } - else { - INC_MED2 (); - } - } - } - - c--; - } - - samples += dir; - } -} - -#endif - -#ifndef NO_UNPACK - -static uint32_t FASTCALL read_code (Bitstream *bs, uint32_t maxcode); - -// Read the next word from the bitstream "wvbits" and return the value. This -// function can be used for hybrid or lossless streams, but since an -// optimized version is available for lossless this function would normally -// be used for hybrid only. If a hybrid lossless stream is being read then -// the "correction" offset is written at the specified pointer. A return value -// of WORD_EOF indicates that the end of the bitstream was reached (all 1s) or -// some other error occurred. - -int32_t FASTCALL get_word (WavpackStream *wps, int chan, int32_t *correction) -{ - register struct entropy_data *c = wps->w.c + chan; - uint32_t ones_count, low, mid, high; - int next8, sign; - int32_t value; - - if (correction) - *correction = 0; - - if (!(wps->w.c [0].median [0] & ~1) && !wps->w.holding_zero && !wps->w.holding_one && !(wps->w.c [1].median [0] & ~1)) { - uint32_t mask; - int cbits; - - if (wps->w.zeros_acc) { - if (--wps->w.zeros_acc) { - c->slow_level -= (c->slow_level + SLO) >> SLS; - return 0; - } - } - else { - for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits); - - if (cbits == 33) - return WORD_EOF; - - if (cbits < 2) - wps->w.zeros_acc = cbits; - else { - for (mask = 1, wps->w.zeros_acc = 0; --cbits; mask <<= 1) - if (getbit (&wps->wvbits)) - wps->w.zeros_acc |= mask; - - wps->w.zeros_acc |= mask; - } - - if (wps->w.zeros_acc) { - c->slow_level -= (c->slow_level + SLO) >> SLS; - CLEAR (wps->w.c [0].median); - CLEAR (wps->w.c [1].median); - return 0; - } - } - } - - if (wps->w.holding_zero) - ones_count = wps->w.holding_zero = 0; - else { -#ifdef USE_NEXT8_OPTIMIZATION - if (wps->wvbits.bc < 8) { - if (++(wps->wvbits.ptr) == wps->wvbits.end) - wps->wvbits.wrap (&wps->wvbits); - - next8 = (wps->wvbits.sr |= *(wps->wvbits.ptr) << wps->wvbits.bc) & 0xff; - wps->wvbits.bc += sizeof (*(wps->wvbits.ptr)) * 8; - } - else - next8 = wps->wvbits.sr & 0xff; - - if (next8 == 0xff) { - wps->wvbits.bc -= 8; - wps->wvbits.sr >>= 8; - - for (ones_count = 8; ones_count < (LIMIT_ONES + 1) && getbit (&wps->wvbits); ++ones_count); - - if (ones_count == (LIMIT_ONES + 1)) - return WORD_EOF; - - if (ones_count == LIMIT_ONES) { - uint32_t mask; - int cbits; - - for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits); - - if (cbits == 33) - return WORD_EOF; - - if (cbits < 2) - ones_count = cbits; - else { - for (mask = 1, ones_count = 0; --cbits; mask <<= 1) - if (getbit (&wps->wvbits)) - ones_count |= mask; - - ones_count |= mask; - } - - ones_count += LIMIT_ONES; - } - } - else { - wps->wvbits.bc -= (ones_count = ones_count_table [next8]) + 1; - wps->wvbits.sr >>= ones_count + 1; - } -#else - for (ones_count = 0; ones_count < (LIMIT_ONES + 1) && getbit (&wps->wvbits); ++ones_count); - - if (ones_count >= LIMIT_ONES) { - uint32_t mask; - int cbits; - - if (ones_count == (LIMIT_ONES + 1)) - return WORD_EOF; - - for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits); - - if (cbits == 33) - return WORD_EOF; - - if (cbits < 2) - ones_count = cbits; - else { - for (mask = 1, ones_count = 0; --cbits; mask <<= 1) - if (getbit (&wps->wvbits)) - ones_count |= mask; - - ones_count |= mask; - } - - ones_count += LIMIT_ONES; - } -#endif - - if (wps->w.holding_one) { - wps->w.holding_one = ones_count & 1; - ones_count = (ones_count >> 1) + 1; - } - else { - wps->w.holding_one = ones_count & 1; - ones_count >>= 1; - } - - wps->w.holding_zero = ~wps->w.holding_one & 1; - } - - if ((wps->wphdr.flags & HYBRID_FLAG) && !chan) - update_error_limit (wps); - - if (ones_count == 0) { - low = 0; - high = GET_MED (0) - 1; - DEC_MED0 (); - } - else { - low = GET_MED (0); - INC_MED0 (); - - if (ones_count == 1) { - high = low + GET_MED (1) - 1; - DEC_MED1 (); - } - else { - low += GET_MED (1); - INC_MED1 (); - - if (ones_count == 2) { - high = low + GET_MED (2) - 1; - DEC_MED2 (); - } - else { - low += (ones_count - 2) * GET_MED (2); - high = low + GET_MED (2) - 1; - INC_MED2 (); - } - } - } - - low &= 0x7fffffff; - high &= 0x7fffffff; - mid = (high + low + 1) >> 1; - - if (!c->error_limit) - mid = read_code (&wps->wvbits, high - low) + low; - else while (high - low > c->error_limit) { - if (getbit (&wps->wvbits)) - mid = (high + (low = mid) + 1) >> 1; - else - mid = ((high = mid - 1) + low + 1) >> 1; - } - - sign = getbit (&wps->wvbits); - - if (bs_is_open (&wps->wvcbits) && c->error_limit) { - value = read_code (&wps->wvcbits, high - low) + low; - - if (correction) - *correction = sign ? (mid - value) : (value - mid); - } - - if (wps->wphdr.flags & HYBRID_BITRATE) { - c->slow_level -= (c->slow_level + SLO) >> SLS; - c->slow_level += mylog2 (mid); - } - - return sign ? ~mid : mid; -} - -// This is an optimized version of get_word() that is used for lossless only -// (error_limit == 0). Also, rather than obtaining a single sample, it can be -// used to obtain an entire buffer of either mono or stereo samples. - -int32_t get_words_lossless (WavpackStream *wps, int32_t *buffer, int32_t nsamples) -{ - struct entropy_data *c = wps->w.c; - uint32_t ones_count, low, high; - Bitstream *bs = &wps->wvbits; - int32_t csamples; - - if (!(wps->wphdr.flags & MONO_DATA)) - nsamples *= 2; - - for (csamples = 0; csamples < nsamples; ++csamples) { - if (!(wps->wphdr.flags & MONO_DATA)) - c = wps->w.c + (csamples & 1); - - if (wps->w.c [0].median [0] < 2 && !wps->w.holding_zero && !wps->w.holding_one && wps->w.c [1].median [0] < 2) { - uint32_t mask; - int cbits; - - if (wps->w.zeros_acc) { - if (--wps->w.zeros_acc) { - *buffer++ = 0; - continue; - } - } - else { - for (cbits = 0; cbits < 33 && getbit (bs); ++cbits); - - if (cbits == 33) - break; - - if (cbits < 2) - wps->w.zeros_acc = cbits; - else { - for (mask = 1, wps->w.zeros_acc = 0; --cbits; mask <<= 1) - if (getbit (bs)) - wps->w.zeros_acc |= mask; - - wps->w.zeros_acc |= mask; - } - - if (wps->w.zeros_acc) { - CLEAR (wps->w.c [0].median); - CLEAR (wps->w.c [1].median); - *buffer++ = 0; - continue; - } - } - } - - if (wps->w.holding_zero) - ones_count = wps->w.holding_zero = 0; - else { -#ifdef USE_NEXT8_OPTIMIZATION - int next8; - - if (bs->bc < 8) { - if (++(bs->ptr) == bs->end) - bs->wrap (bs); - - next8 = (bs->sr |= *(bs->ptr) << bs->bc) & 0xff; - bs->bc += sizeof (*(bs->ptr)) * 8; - } - else - next8 = bs->sr & 0xff; - - if (next8 == 0xff) { - bs->bc -= 8; - bs->sr >>= 8; - - for (ones_count = 8; ones_count < (LIMIT_ONES + 1) && getbit (bs); ++ones_count); - - if (ones_count == (LIMIT_ONES + 1)) - break; - - if (ones_count == LIMIT_ONES) { - uint32_t mask; - int cbits; - - for (cbits = 0; cbits < 33 && getbit (bs); ++cbits); - - if (cbits == 33) - break; - - if (cbits < 2) - ones_count = cbits; - else { - for (mask = 1, ones_count = 0; --cbits; mask <<= 1) - if (getbit (bs)) - ones_count |= mask; - - ones_count |= mask; - } - - ones_count += LIMIT_ONES; - } - } - else { - bs->bc -= (ones_count = ones_count_table [next8]) + 1; - bs->sr >>= ones_count + 1; - } -#else - for (ones_count = 0; ones_count < (LIMIT_ONES + 1) && getbit (bs); ++ones_count); - - if (ones_count >= LIMIT_ONES) { - uint32_t mask; - int cbits; - - if (ones_count == (LIMIT_ONES + 1)) - break; - - for (cbits = 0; cbits < 33 && getbit (bs); ++cbits); - - if (cbits == 33) - break; - - if (cbits < 2) - ones_count = cbits; - else { - for (mask = 1, ones_count = 0; --cbits; mask <<= 1) - if (getbit (bs)) - ones_count |= mask; - - ones_count |= mask; - } - - ones_count += LIMIT_ONES; - } -#endif - if (wps->w.holding_one) { - wps->w.holding_one = ones_count & 1; - ones_count = (ones_count >> 1) + 1; - } - else { - wps->w.holding_one = ones_count & 1; - ones_count >>= 1; - } - - wps->w.holding_zero = ~wps->w.holding_one & 1; - } - - if (ones_count == 0) { - low = 0; - high = GET_MED (0) - 1; - DEC_MED0 (); - } - else { - low = GET_MED (0); - INC_MED0 (); - - if (ones_count == 1) { - high = low + GET_MED (1) - 1; - DEC_MED1 (); - } - else { - low += GET_MED (1); - INC_MED1 (); - - if (ones_count == 2) { - high = low + GET_MED (2) - 1; - DEC_MED2 (); - } - else { - low += (ones_count - 2) * GET_MED (2); - high = low + GET_MED (2) - 1; - INC_MED2 (); - } - } - } - - low += read_code (bs, high - low); - *buffer++ = (getbit (bs)) ? ~low : low; - } - - return (wps->wphdr.flags & MONO_DATA) ? csamples : (csamples / 2); -} - -// Read a single unsigned value from the specified bitstream with a value -// from 0 to maxcode. If there are exactly a power of two number of possible -// codes then this will read a fixed number of bits; otherwise it reads the -// minimum number of bits and then determines whether another bit is needed -// to define the code. - -static uint32_t FASTCALL read_code (Bitstream *bs, uint32_t maxcode) -{ - uint32_t extras, code; - int bitcount; - - if (maxcode < 2) - return maxcode ? getbit (bs) : 0; - - bitcount = count_bits (maxcode); - extras = bitset [bitcount] - maxcode - 1; - - while (bs->bc < bitcount) { - if (++(bs->ptr) == bs->end) - bs->wrap (bs); - - bs->sr |= *(bs->ptr) << bs->bc; - bs->bc += sizeof (*(bs->ptr)) * 8; - } - - if ((code = bs->sr & bitmask [bitcount - 1]) >= extras) - code = (code << 1) - extras + ((bs->sr >> (bitcount - 1)) & 1); - else - bitcount--; - - if (bs->bc > 32) { - bs->bc -= bitcount; - bs->sr = *(bs->ptr) >> (sizeof (*(bs->ptr)) * 8 - bs->bc); - } - else { - bs->sr >>= bitcount; - bs->bc -= bitcount; - } - - return code; -} - -#endif - -// The concept of a base 2 logarithm is used in many parts of WavPack. It is -// a way of sufficiently accurately representing 32-bit signed and unsigned -// values storing only 16 bits (actually fewer). It is also used in the hybrid -// mode for quickly comparing the relative magnitude of large values (i.e. -// division) and providing smooth exponentials using only addition. - -// These are not strict logarithms in that they become linear around zero and -// can therefore represent both zero and negative values. They have 8 bits -// of precision and in "roundtrip" conversions the total error never exceeds 1 -// part in 225 except for the cases of +/-115 and +/-195 (which error by 1). - - -// This function returns the log2 for the specified 32-bit unsigned value. -// The maximum value allowed is about 0xff800000 and returns 8447. - -static int FASTCALL mylog2 (uint32_t avalue) -{ - int dbits; - - if ((avalue += avalue >> 9) < (1 << 8)) { - dbits = nbits_table [avalue]; - return (dbits << 8) + log2_table [(avalue << (9 - dbits)) & 0xff]; - } - else { - if (avalue < (1L << 16)) - dbits = nbits_table [avalue >> 8] + 8; - else if (avalue < (1L << 24)) - dbits = nbits_table [avalue >> 16] + 16; - else - dbits = nbits_table [avalue >> 24] + 24; - - return (dbits << 8) + log2_table [(avalue >> (dbits - 9)) & 0xff]; - } -} - -// This function scans a buffer of longs and accumulates the total log2 value -// of all the samples. This is useful for determining maximum compression -// because the bitstream storage required for entropy coding is proportional -// to the base 2 log of the samples. - -uint32_t log2buffer (int32_t *samples, uint32_t num_samples, int limit) -{ - uint32_t result = 0, avalue; - int dbits; - - while (num_samples--) { - avalue = abs (*samples++); - - if ((avalue += avalue >> 9) < (1 << 8)) { - dbits = nbits_table [avalue]; - result += (dbits << 8) + log2_table [(avalue << (9 - dbits)) & 0xff]; - } - else { - if (avalue < (1L << 16)) - dbits = nbits_table [avalue >> 8] + 8; - else if (avalue < (1L << 24)) - dbits = nbits_table [avalue >> 16] + 16; - else - dbits = nbits_table [avalue >> 24] + 24; - - result += dbits = (dbits << 8) + log2_table [(avalue >> (dbits - 9)) & 0xff]; - - if (limit && dbits >= limit) - return (uint32_t) -1; - } - } - - return result; -} - -// This function returns the log2 for the specified 32-bit signed value. -// All input values are valid and the return values are in the range of -// +/- 8192. - -int log2s (int32_t value) -{ - return (value < 0) ? -mylog2 (-value) : mylog2 (value); -} - -// This function returns the original integer represented by the supplied -// logarithm (at least within the provided accuracy). The log is signed, -// but since a full 32-bit value is returned this can be used for unsigned -// conversions as well (i.e. the input range is -8192 to +8447). - -int32_t exp2s (int log) -{ - uint32_t value; - - if (log < 0) - return -exp2s (-log); - - value = exp2_table [log & 0xff] | 0x100; - - if ((log >>= 8) <= 9) - return value >> (9 - log); - else - return value << (log - 9); -} - -// These two functions convert internal weights (which are normally +/-1024) -// to and from an 8-bit signed character version for storage in metadata. The -// weights are clipped here in the case that they are outside that range. - -signed char store_weight (int weight) -{ - if (weight > 1024) - weight = 1024; - else if (weight < -1024) - weight = -1024; - - if (weight > 0) - weight -= (weight + 64) >> 7; - - return (weight + 4) >> 3; -} - -int restore_weight (signed char weight) -{ - int result; - - if ((result = (int) weight << 3) > 0) - result += (result + 64) >> 7; - - return result; -} diff --git a/third_party/wavpack/src/wputils.c b/third_party/wavpack/src/wputils.c deleted file mode 100644 index 5d30f1e..0000000 --- a/third_party/wavpack/src/wputils.c +++ /dev/null @@ -1,2350 +0,0 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -// wputils.c - -// This module provides a high-level interface to reading and writing WavPack -// files. WavPack input files can be opened as standard "C" streams using a -// provided filename. However, an alternate entry uses stream-reading -// callbacks to make using another file I/O method easy. Note that in this -// case the user application is responsible for finding and opening the .wvc -// file if the use of them is desired. - -// For writing WavPack files there are no I/O routines used; a callback for -// writing completed blocks is provided. - -#include -#include -#include -#include -#include - -#if defined (WIN32) || defined (__OS2__) -#include -#endif - -#ifndef LIBWAVPACK_VERSION_STRING -#include "wavpack_version.h" -#endif - -#include "wavpack_local.h" - -#ifdef WIN32 -#define stricmp(x,y) _stricmp(x,y) -#define fileno _fileno -#else -#define stricmp strcasecmp -#endif - -#ifdef DEBUG_ALLOC -#define malloc malloc_db -#define realloc realloc_db -#define free free_db -void *malloc_db (uint32_t size); -void *realloc_db (void *ptr, uint32_t size); -void free_db (void *ptr); -int32_t dump_alloc (void); -#endif - -static void free_streams (WavpackContext *wpc); - -///////////////////////////// local table storage //////////////////////////// - -static const uint32_t sample_rates [] = { 6000, 8000, 9600, 11025, 12000, 16000, 22050, - 24000, 32000, 44100, 48000, 64000, 88200, 96000, 192000 }; - -///////////////////////////// executable code //////////////////////////////// - -#if !defined(NO_UNPACK) || defined(INFO_ONLY) - -static uint32_t read_next_header (WavpackStreamReader *reader, void *id, WavpackHeader *wphdr); -static uint32_t seek_final_index (WavpackStreamReader *reader, void *id); -static int read_wvc_block (WavpackContext *wpc); - -// This code provides an interface between the reader callback mechanism that -// WavPack uses internally and the standard fstream C library. - -#ifndef NO_USE_FSTREAMS - -static int32_t read_bytes (void *id, void *data, int32_t bcount) -{ - return (int32_t) fread (data, 1, bcount, (FILE*) id); -} - -static uint32_t get_pos (void *id) -{ - return ftell ((FILE*) id); -} - -static int set_pos_abs (void *id, uint32_t pos) -{ - return fseek (id, pos, SEEK_SET); -} - -static int set_pos_rel (void *id, int32_t delta, int mode) -{ - return fseek (id, delta, mode); -} - -static int push_back_byte (void *id, int c) -{ - return ungetc (c, id); -} - -static uint32_t get_length (void *id) -{ - FILE *file = id; - struct stat statbuf; - - if (!file || fstat (fileno (file), &statbuf) || !(statbuf.st_mode & S_IFREG)) - return 0; - - return statbuf.st_size; -} - -static int can_seek (void *id) -{ - FILE *file = id; - struct stat statbuf; - - return file && !fstat (fileno (file), &statbuf) && (statbuf.st_mode & S_IFREG); -} - -static int32_t write_bytes (void *id, void *data, int32_t bcount) -{ - return (int32_t) fwrite (data, 1, bcount, (FILE*) id); -} - -static WavpackStreamReader freader = { - read_bytes, get_pos, set_pos_abs, set_pos_rel, push_back_byte, get_length, can_seek, - write_bytes -}; - -// This function attempts to open the specified WavPack file for reading. If -// this fails for any reason then an appropriate message is copied to "error" -// (which must accept 80 characters) and NULL is returned, otherwise a -// pointer to a WavpackContext structure is returned (which is used to call -// all other functions in this module). A filename beginning with "-" is -// assumed to be stdin. The "flags" argument has the following bit mask -// values to specify details of the open operation: - -// OPEN_WVC: attempt to open/read "correction" file -// OPEN_TAGS: attempt to read ID3v1 / APEv2 tags (requires seekable file) -// OPEN_WRAPPER: make audio wrapper available (i.e. RIFF) to caller -// OPEN_2CH_MAX: open only first stream of multichannel file (usually L/R) -// OPEN_NORMALIZE: normalize floating point data to +/- 1.0 (w/ offset exp) -// OPEN_STREAMING: blindly unpacks blocks w/o regard to header file position -// OPEN_EDIT_TAGS: allow editing of tags (file must be writable) - -// Version 4.2 of the WavPack library adds the OPEN_STREAMING flag. This is -// essentially a "raw" mode where the library will simply decode any blocks -// fed it through the reader callback, regardless of where those blocks came -// from in a stream. The only requirement is that complete WavPack blocks are -// fed to the decoder (and this may require multiple blocks in multichannel -// mode) and that complete blocks are decoded (even if all samples are not -// actually required). All the blocks must contain the same number of channels -// and bit resolution, and the correction data must be either present or not. -// All other parameters may change from block to block (like lossy/lossless). -// Obviously, in this mode any seeking must be performed by the application -// (and again, decoding must start at the beginning of the block containing -// the seek sample). - -WavpackContext *WavpackOpenFileInput (const char *infilename, char *error, int flags, int norm_offset) -{ - char *file_mode = (flags & OPEN_EDIT_TAGS) ? "r+b" : "rb"; - FILE *wv_id, *wvc_id; - WavpackContext *wpc; - - if (*infilename == '-') { - wv_id = stdin; -#if defined(WIN32) - _setmode (fileno (stdin), O_BINARY); -#endif -#if defined(__OS2__) - setmode (fileno (stdin), O_BINARY); -#endif - } - else if ((wv_id = fopen (infilename, file_mode)) == NULL) { - if (error) strcpy (error, (flags & OPEN_EDIT_TAGS) ? "can't open file for editing" : "can't open file"); - return NULL; - } - - if (wv_id != stdin && (flags & OPEN_WVC)) { - char *in2filename = malloc (strlen (infilename) + 10); - - strcpy (in2filename, infilename); - strcat (in2filename, "c"); - wvc_id = fopen (in2filename, "rb"); - free (in2filename); - } - else - wvc_id = NULL; - - wpc = WavpackOpenFileInputEx (&freader, wv_id, wvc_id, error, flags, norm_offset); - - if (!wpc) { - if (wv_id) - fclose (wv_id); - - if (wvc_id) - fclose (wvc_id); - } - else - wpc->close_files = TRUE; - - return wpc; -} - -#endif - -// This function is identical to WavpackOpenFileInput() except that instead -// of providing a filename to open, the caller provides a pointer to a set of -// reader callbacks and instances of up to two streams. The first of these -// streams is required and contains the regular WavPack data stream; the second -// contains the "correction" file if desired. Unlike the standard open -// function which handles the correction file transparently, in this case it -// is the responsibility of the caller to be aware of correction files. - -WavpackContext *WavpackOpenFileInputEx (WavpackStreamReader *reader, void *wv_id, void *wvc_id, char *error, int flags, int norm_offset) -{ - WavpackContext *wpc = malloc (sizeof (WavpackContext)); - WavpackStream *wps; - int num_blocks = 0; - unsigned char first_byte; - uint32_t bcount; - - if (!wpc) { - if (error) strcpy (error, "can't allocate memory"); - return NULL; - } - - CLEAR (*wpc); - wpc->wv_in = wv_id; - wpc->wvc_in = wvc_id; - wpc->reader = reader; - wpc->total_samples = (uint32_t) -1; - wpc->norm_offset = norm_offset; - wpc->max_streams = OLD_MAX_STREAMS; // use this until overwritten with actual number - wpc->open_flags = flags; - - wpc->filelen = wpc->reader->get_length (wpc->wv_in); - -#ifndef NO_TAGS - if ((flags & (OPEN_TAGS | OPEN_EDIT_TAGS)) && wpc->reader->can_seek (wpc->wv_in)) { - load_tag (wpc); - wpc->reader->set_pos_abs (wpc->wv_in, 0); - - if ((flags & OPEN_EDIT_TAGS) && !editable_tag (&wpc->m_tag)) { - if (error) strcpy (error, "can't edit tags located at the beginning of files!"); - return WavpackCloseFile (wpc); - } - } -#endif - -#ifndef VER4_ONLY - if (wpc->reader->read_bytes (wpc->wv_in, &first_byte, 1) != 1) { - if (error) strcpy (error, "can't read all of WavPack file!"); - return WavpackCloseFile (wpc); - } - - wpc->reader->push_back_byte (wpc->wv_in, first_byte); - - if (first_byte == 'R') - return open_file3 (wpc, error); -#endif - - wpc->streams = malloc ((wpc->num_streams = 1) * sizeof (wpc->streams [0])); - wpc->streams [0] = wps = malloc (sizeof (WavpackStream)); - CLEAR (*wps); - - while (!wps->wphdr.block_samples) { - - wpc->filepos = wpc->reader->get_pos (wpc->wv_in); - bcount = read_next_header (wpc->reader, wpc->wv_in, &wps->wphdr); - - if (bcount == (uint32_t) -1 || - (!wps->wphdr.block_samples && num_blocks++ > 16)) { - if (error) strcpy (error, "not compatible with this version of WavPack file!"); - return WavpackCloseFile (wpc); - } - - wpc->filepos += bcount; - wps->blockbuff = malloc (wps->wphdr.ckSize + 8); - memcpy (wps->blockbuff, &wps->wphdr, 32); - - if (wpc->reader->read_bytes (wpc->wv_in, wps->blockbuff + 32, wps->wphdr.ckSize - 24) != wps->wphdr.ckSize - 24) { - if (error) strcpy (error, "can't read all of WavPack file!"); - return WavpackCloseFile (wpc); - } - - wps->init_done = FALSE; - - if (wps->wphdr.block_samples && !(flags & OPEN_STREAMING)) { - if (wps->wphdr.block_index || wps->wphdr.total_samples == (uint32_t) -1) { - wpc->initial_index = wps->wphdr.block_index; - wps->wphdr.block_index = 0; - - if (wpc->reader->can_seek (wpc->wv_in)) { - uint32_t pos_save = wpc->reader->get_pos (wpc->wv_in); - uint32_t final_index = seek_final_index (wpc->reader, wpc->wv_in); - - if (final_index != (uint32_t) -1) - wpc->total_samples = final_index - wpc->initial_index; - - wpc->reader->set_pos_abs (wpc->wv_in, pos_save); - } - } - else - wpc->total_samples = wps->wphdr.total_samples; - } - - if (wpc->wvc_in && wps->wphdr.block_samples && (wps->wphdr.flags & HYBRID_FLAG)) { - wpc->file2len = wpc->reader->get_length (wpc->wvc_in); - wpc->wvc_flag = TRUE; - } - - if (wpc->wvc_flag && !read_wvc_block (wpc)) { - if (error) strcpy (error, "not compatible with this version of correction file!"); - return WavpackCloseFile (wpc); - } - - if (!wps->init_done && !unpack_init (wpc)) { - if (error) strcpy (error, wpc->error_message [0] ? wpc->error_message : - "not compatible with this version of WavPack file!"); - - return WavpackCloseFile (wpc); - } - - wps->init_done = TRUE; - } - - wpc->config.flags &= ~0xff; - wpc->config.flags |= wps->wphdr.flags & 0xff; - wpc->config.bytes_per_sample = (wps->wphdr.flags & BYTES_STORED) + 1; - wpc->config.float_norm_exp = wps->float_norm_exp; - - wpc->config.bits_per_sample = (wpc->config.bytes_per_sample * 8) - - ((wps->wphdr.flags & SHIFT_MASK) >> SHIFT_LSB); - - if (!wpc->config.sample_rate) { - if (!wps->wphdr.block_samples || (wps->wphdr.flags & SRATE_MASK) == SRATE_MASK) - wpc->config.sample_rate = 44100; - else - wpc->config.sample_rate = sample_rates [(wps->wphdr.flags & SRATE_MASK) >> SRATE_LSB]; - } - - if (!wpc->config.num_channels) { - wpc->config.num_channels = (wps->wphdr.flags & MONO_FLAG) ? 1 : 2; - wpc->config.channel_mask = 0x5 - wpc->config.num_channels; - } - - if ((flags & OPEN_2CH_MAX) && !(wps->wphdr.flags & FINAL_BLOCK)) - wpc->reduced_channels = (wps->wphdr.flags & MONO_FLAG) ? 1 : 2; - - return wpc; -} - -// This function obtains general information about an open input file and -// returns a mask with the following bit values: - -// MODE_WVC: a .wvc file has been found and will be used for lossless -// MODE_LOSSLESS: file is lossless (either pure or hybrid) -// MODE_HYBRID: file is hybrid mode (either lossy or lossless) -// MODE_FLOAT: audio data is 32-bit ieee floating point -// MODE_VALID_TAG: file conatins a valid ID3v1 or APEv2 tag -// MODE_HIGH: file was created in "high" mode (information only) -// MODE_FAST: file was created in "fast" mode (information only) -// MODE_EXTRA: file was created using "extra" mode (information only) -// MODE_APETAG: file contains a valid APEv2 tag -// MODE_SFX: file was created as a "self-extracting" executable -// MODE_VERY_HIGH: file was created in the "very high" mode (or in -// the "high" mode prior to 4.4) -// MODE_MD5: file contains an MD5 checksum -// MODE_XMODE: level used for extra mode (1-6, 0=unknown) -// MODE_DNS: dynamic noise shaping - -int WavpackGetMode (WavpackContext *wpc) -{ - int mode = 0; - - if (wpc) { - if (wpc->config.flags & CONFIG_HYBRID_FLAG) - mode |= MODE_HYBRID; - else if (!(wpc->config.flags & CONFIG_LOSSY_MODE)) - mode |= MODE_LOSSLESS; - - if (wpc->wvc_flag) - mode |= (MODE_LOSSLESS | MODE_WVC); - - if (wpc->lossy_blocks) - mode &= ~MODE_LOSSLESS; - - if (wpc->config.flags & CONFIG_FLOAT_DATA) - mode |= MODE_FLOAT; - - if (wpc->config.flags & (CONFIG_HIGH_FLAG | CONFIG_VERY_HIGH_FLAG)) { - mode |= MODE_HIGH; - - if ((wpc->config.flags & CONFIG_VERY_HIGH_FLAG) || - (wpc->streams && wpc->streams [0] && wpc->streams [0]->wphdr.version < 0x405)) - mode |= MODE_VERY_HIGH; - } - - if (wpc->config.flags & CONFIG_FAST_FLAG) - mode |= MODE_FAST; - - if (wpc->config.flags & CONFIG_EXTRA_MODE) - mode |= (MODE_EXTRA | (wpc->config.xmode << 12)); - - if (wpc->config.flags & CONFIG_CREATE_EXE) - mode |= MODE_SFX; - - if (wpc->config.flags & CONFIG_MD5_CHECKSUM) - mode |= MODE_MD5; - - if ((wpc->config.flags & CONFIG_HYBRID_FLAG) && (wpc->config.flags & CONFIG_DYNAMIC_SHAPING) && - wpc->streams && wpc->streams [0] && wpc->streams [0]->wphdr.version >= 0x407) - mode |= MODE_DNS; - -#ifndef NO_TAGS - if (valid_tag (&wpc->m_tag)) { - mode |= MODE_VALID_TAG; - - if (valid_tag (&wpc->m_tag) == 'A') - mode |= MODE_APETAG; - } -#endif - } - - return mode; -} - -// This function returns the major version number of the WavPack program -// (or library) that created the open file. Currently, this can be 1 to 4. -// Minor versions are not recorded in WavPack files. - -int WavpackGetVersion (WavpackContext *wpc) -{ - if (wpc) { -#ifndef VER4_ONLY - if (wpc->stream3) - return get_version3 (wpc); -#endif - return 4; - } - - return 0; -} - -#endif - -// This function returns a pointer to a string describing the last error -// generated by WavPack. - -char *WavpackGetErrorMessage (WavpackContext *wpc) -{ - return wpc->error_message; -} - -#ifndef NO_UNPACK - -// Unpack the specified number of samples from the current file position. -// Note that "samples" here refers to "complete" samples, which would be -// 2 longs for stereo files or even more for multichannel files, so the -// required memory at "buffer" is 4 * samples * num_channels bytes. The -// audio data is returned right-justified in 32-bit longs in the endian -// mode native to the executing processor. So, if the original data was -// 16-bit, then the values returned would be +/-32k. Floating point data -// can also be returned if the source was floating point data (and this -// can be optionally normalized to +/-1.0 by using the appropriate flag -// in the call to WavpackOpenFileInput ()). The actual number of samples -// unpacked is returned, which should be equal to the number requested unless -// the end of fle is encountered or an error occurs. After all samples have -// been unpacked then 0 will be returned. - -uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples) -{ - WavpackStream *wps = wpc->streams ? wpc->streams [wpc->current_stream = 0] : NULL; - uint32_t bcount, samples_unpacked = 0, samples_to_unpack; - int num_channels = wpc->config.num_channels; - int file_done = FALSE; - -#ifndef VER4_ONLY - if (wpc->stream3) - return unpack_samples3 (wpc, buffer, samples); -#endif - - while (samples) { - if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) || - wps->sample_index >= wps->wphdr.block_index + wps->wphdr.block_samples) { - - uint32_t nexthdrpos; - - if (wpc->wrapper_bytes >= MAX_WRAPPER_BYTES) - break; - - free_streams (wpc); - nexthdrpos = wpc->reader->get_pos (wpc->wv_in); - bcount = read_next_header (wpc->reader, wpc->wv_in, &wps->wphdr); - - if (bcount == (uint32_t) -1) - break; - - wpc->filepos = nexthdrpos; - - if (wpc->open_flags & OPEN_STREAMING) - wps->wphdr.block_index = wps->sample_index = 0; - else - wps->wphdr.block_index -= wpc->initial_index; - - wpc->filepos += bcount; - wps->blockbuff = malloc (wps->wphdr.ckSize + 8); - memcpy (wps->blockbuff, &wps->wphdr, 32); - - if (wpc->reader->read_bytes (wpc->wv_in, wps->blockbuff + 32, wps->wphdr.ckSize - 24) != - wps->wphdr.ckSize - 24) { - strcpy (wpc->error_message, "can't read all of last block!"); - wps->wphdr.block_samples = 0; - wps->wphdr.ckSize = 24; - break; - } - - wps->init_done = FALSE; - - if (wps->wphdr.block_samples && wps->sample_index != wps->wphdr.block_index) - wpc->crc_errors++; - - if (wps->wphdr.block_samples && wpc->wvc_flag) - read_wvc_block (wpc); - - if (!wps->wphdr.block_samples) { - if (!wps->init_done && !unpack_init (wpc)) - wpc->crc_errors++; - - wps->init_done = TRUE; - } - } - - if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) || - wps->sample_index >= wps->wphdr.block_index + wps->wphdr.block_samples) - continue; - - if (wps->sample_index < wps->wphdr.block_index) { - samples_to_unpack = wps->wphdr.block_index - wps->sample_index; - - if (samples_to_unpack > 262144) { - strcpy (wpc->error_message, "discontinuity found, aborting file!"); - wps->wphdr.block_samples = 0; - wps->wphdr.ckSize = 24; - break; - } - - if (samples_to_unpack > samples) - samples_to_unpack = samples; - - wps->sample_index += samples_to_unpack; - samples_unpacked += samples_to_unpack; - samples -= samples_to_unpack; - - if (wpc->reduced_channels) - samples_to_unpack *= wpc->reduced_channels; - else - samples_to_unpack *= num_channels; - - while (samples_to_unpack--) - *buffer++ = 0; - - continue; - } - - samples_to_unpack = wps->wphdr.block_index + wps->wphdr.block_samples - wps->sample_index; - - if (samples_to_unpack > samples) - samples_to_unpack = samples; - - if (!wps->init_done && !unpack_init (wpc)) - wpc->crc_errors++; - - wps->init_done = TRUE; - - if (!wpc->reduced_channels && !(wps->wphdr.flags & FINAL_BLOCK)) { - int32_t *temp_buffer = malloc (samples_to_unpack * 8), *src, *dst; - int offset = 0; - uint32_t samcnt; - - while (1) { - if (wpc->current_stream == wpc->num_streams) { - wpc->streams = realloc (wpc->streams, (wpc->num_streams + 1) * sizeof (wpc->streams [0])); - wps = wpc->streams [wpc->num_streams++] = malloc (sizeof (WavpackStream)); - CLEAR (*wps); - bcount = read_next_header (wpc->reader, wpc->wv_in, &wps->wphdr); - - if (bcount == (uint32_t) -1) { - wpc->streams [0]->wphdr.block_samples = 0; - wpc->streams [0]->wphdr.ckSize = 24; - file_done = TRUE; - break; - } - - if (wpc->open_flags & OPEN_STREAMING) - wps->wphdr.block_index = wps->sample_index = 0; - else - wps->wphdr.block_index -= wpc->initial_index; - - wps->blockbuff = malloc (wps->wphdr.ckSize + 8); - memcpy (wps->blockbuff, &wps->wphdr, 32); - - if (wpc->reader->read_bytes (wpc->wv_in, wps->blockbuff + 32, wps->wphdr.ckSize - 24) != - wps->wphdr.ckSize - 24) { - wpc->streams [0]->wphdr.block_samples = 0; - wpc->streams [0]->wphdr.ckSize = 24; - file_done = TRUE; - break; - } - - wps->init_done = FALSE; - - if (wpc->wvc_flag) - read_wvc_block (wpc); - - if (!wps->init_done && !unpack_init (wpc)) - wpc->crc_errors++; - - wps->init_done = TRUE; - } - else - wps = wpc->streams [wpc->current_stream]; - - unpack_samples (wpc, src = temp_buffer, samples_to_unpack); - samcnt = samples_to_unpack; - dst = buffer + offset; - - if (wps->wphdr.flags & MONO_FLAG) { - while (samcnt--) { - dst [0] = *src++; - dst += num_channels; - } - - offset++; - } - else if (offset == num_channels - 1) { - while (samcnt--) { - dst [0] = src [0]; - dst += num_channels; - src += 2; - } - - wpc->crc_errors++; - offset++; - } - else { - while (samcnt--) { - dst [0] = *src++; - dst [1] = *src++; - dst += num_channels; - } - - offset += 2; - } - - if ((wps->wphdr.flags & FINAL_BLOCK) || wpc->current_stream == wpc->max_streams - 1 || offset == num_channels) - break; - else - wpc->current_stream++; - } - - wps = wpc->streams [wpc->current_stream = 0]; - free (temp_buffer); - } - else - unpack_samples (wpc, buffer, samples_to_unpack); - - if (file_done) { - strcpy (wpc->error_message, "can't read all of last block!"); - break; - } - - if (wpc->reduced_channels) - buffer += samples_to_unpack * wpc->reduced_channels; - else - buffer += samples_to_unpack * num_channels; - - samples_unpacked += samples_to_unpack; - samples -= samples_to_unpack; - - if (wps->sample_index == wps->wphdr.block_index + wps->wphdr.block_samples) { - if (check_crc_error (wpc) && wps->blockbuff) { - - if (wpc->reader->can_seek (wpc->wv_in)) { - int32_t rseek = ((WavpackHeader *) wps->blockbuff)->ckSize / 3; - wpc->reader->set_pos_rel (wpc->wv_in, (rseek > 16384) ? -16384 : -rseek, SEEK_CUR); - } - - if (wpc->wvc_flag && wps->block2buff && wpc->reader->can_seek (wpc->wvc_in)) { - int32_t rseek = ((WavpackHeader *) wps->block2buff)->ckSize / 3; - wpc->reader->set_pos_rel (wpc->wvc_in, (rseek > 16384) ? -16384 : -rseek, SEEK_CUR); - } - - wpc->crc_errors++; - } - } - - if (wpc->total_samples != (uint32_t) -1 && wps->sample_index == wpc->total_samples) - break; - } - - return samples_unpacked; -} - -#ifndef NO_SEEKING - -static uint32_t find_sample (WavpackContext *wpc, void *infile, uint32_t header_pos, uint32_t sample); - -// Seek to the specifed sample index, returning TRUE on success. Note that -// files generated with version 4.0 or newer will seek almost immediately. -// Older files can take quite long if required to seek through unplayed -// portions of the file, but will create a seek map so that reverse seeks -// (or forward seeks to already scanned areas) will be very fast. After a -// FALSE return the file should not be accessed again (other than to close -// it); this is a fatal error. - -int WavpackSeekSample (WavpackContext *wpc, uint32_t sample) -{ - WavpackStream *wps = wpc->streams ? wpc->streams [wpc->current_stream = 0] : NULL; - uint32_t bcount, samples_to_skip; - int32_t *buffer; - - if (wpc->total_samples == (uint32_t) -1 || sample >= wpc->total_samples || - !wpc->reader->can_seek (wpc->wv_in) || (wpc->open_flags & OPEN_STREAMING) || - (wpc->wvc_flag && !wpc->reader->can_seek (wpc->wvc_in))) - return FALSE; - -#ifndef VER4_ONLY - if (wpc->stream3) - return seek_sample3 (wpc, sample); -#endif - - if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) || sample < wps->wphdr.block_index || - sample >= wps->wphdr.block_index + wps->wphdr.block_samples) { - - free_streams (wpc); - wpc->filepos = find_sample (wpc, wpc->wv_in, wpc->filepos, sample); - - if (wpc->filepos == (uint32_t) -1) - return FALSE; - - if (wpc->wvc_flag) { - wpc->file2pos = find_sample (wpc, wpc->wvc_in, 0, sample); - - if (wpc->file2pos == (uint32_t) -1) - return FALSE; - } - } - - if (!wps->blockbuff) { - wpc->reader->set_pos_abs (wpc->wv_in, wpc->filepos); - wpc->reader->read_bytes (wpc->wv_in, &wps->wphdr, sizeof (WavpackHeader)); - little_endian_to_native (&wps->wphdr, WavpackHeaderFormat); - wps->wphdr.block_index -= wpc->initial_index; - wps->blockbuff = malloc (wps->wphdr.ckSize + 8); - memcpy (wps->blockbuff, &wps->wphdr, sizeof (WavpackHeader)); - - if (wpc->reader->read_bytes (wpc->wv_in, wps->blockbuff + sizeof (WavpackHeader), wps->wphdr.ckSize - 24) != - wps->wphdr.ckSize - 24) { - free_streams (wpc); - return FALSE; - } - - wps->init_done = FALSE; - - if (wpc->wvc_flag) { - wpc->reader->set_pos_abs (wpc->wvc_in, wpc->file2pos); - wpc->reader->read_bytes (wpc->wvc_in, &wps->wphdr, sizeof (WavpackHeader)); - little_endian_to_native (&wps->wphdr, WavpackHeaderFormat); - wps->wphdr.block_index -= wpc->initial_index; - wps->block2buff = malloc (wps->wphdr.ckSize + 8); - memcpy (wps->block2buff, &wps->wphdr, sizeof (WavpackHeader)); - - if (wpc->reader->read_bytes (wpc->wvc_in, wps->block2buff + sizeof (WavpackHeader), wps->wphdr.ckSize - 24) != - wps->wphdr.ckSize - 24) { - free_streams (wpc); - return FALSE; - } - } - - if (!wps->init_done && !unpack_init (wpc)) { - free_streams (wpc); - return FALSE; - } - - wps->init_done = TRUE; - } - - while (!wpc->reduced_channels && !(wps->wphdr.flags & FINAL_BLOCK)) { - if (++wpc->current_stream == wpc->num_streams) { - - if (wpc->num_streams == wpc->max_streams) { - free_streams (wpc); - return FALSE; - } - - wpc->streams = realloc (wpc->streams, (wpc->num_streams + 1) * sizeof (wpc->streams [0])); - wps = wpc->streams [wpc->num_streams++] = malloc (sizeof (WavpackStream)); - CLEAR (*wps); - bcount = read_next_header (wpc->reader, wpc->wv_in, &wps->wphdr); - - if (bcount == (uint32_t) -1) { - free_streams (wpc); - return FALSE; - } - - wps->blockbuff = malloc (wps->wphdr.ckSize + 8); - memcpy (wps->blockbuff, &wps->wphdr, 32); - - if (wpc->reader->read_bytes (wpc->wv_in, wps->blockbuff + 32, wps->wphdr.ckSize - 24) != - wps->wphdr.ckSize - 24) { - free_streams (wpc); - return FALSE; - } - - wps->init_done = FALSE; - - if (wpc->wvc_flag && !read_wvc_block (wpc)) { - free_streams (wpc); - return FALSE; - } - - if (!wps->init_done && !unpack_init (wpc)) { - free_streams (wpc); - return FALSE; - } - - wps->init_done = TRUE; - } - else - wps = wpc->streams [wpc->current_stream]; - } - - if (sample < wps->sample_index) { - for (wpc->current_stream = 0; wpc->current_stream < wpc->num_streams; wpc->current_stream++) - if (!unpack_init (wpc)) - return FALSE; - else - wpc->streams [wpc->current_stream]->init_done = TRUE; - } - - samples_to_skip = sample - wps->sample_index; - - if (samples_to_skip > 131072) { - free_streams (wpc); - return FALSE; - } - - if (samples_to_skip) { - buffer = malloc (samples_to_skip * 8); - - for (wpc->current_stream = 0; wpc->current_stream < wpc->num_streams; wpc->current_stream++) - unpack_samples (wpc, buffer, samples_to_skip); - - free (buffer); - } - - wpc->current_stream = 0; - return TRUE; -} - -#endif - -#endif - -#ifndef NO_PACK - -// Open context for writing WavPack files. The returned context pointer is used -// in all following calls to the library. The "blockout" function will be used -// to store the actual completed WavPack blocks and will be called with the id -// pointers containing user defined data (one for the wv file and one for the -// wvc file). A return value of NULL indicates that memory could not be -// allocated for the context. - -WavpackContext *WavpackOpenFileOutput (WavpackBlockOutput blockout, void *wv_id, void *wvc_id) -{ - WavpackContext *wpc = malloc (sizeof (WavpackContext)); - - if (!wpc) - return NULL; - - CLEAR (*wpc); - wpc->blockout = blockout; - wpc->wv_out = wv_id; - wpc->wvc_out = wvc_id; - return wpc; -} - -// Set configuration for writing WavPack files. This must be done before -// sending any actual samples, however it is okay to send wrapper or other -// metadata before calling this. The "config" structure contains the following -// required information: - -// config->bytes_per_sample see WavpackGetBytesPerSample() for info -// config->bits_per_sample see WavpackGetBitsPerSample() for info -// config->channel_mask Microsoft standard (mono = 4, stereo = 3) -// config->num_channels self evident -// config->sample_rate self evident - -// In addition, the following fields and flags may be set: - -// config->flags: -// -------------- -// o CONFIG_HYBRID_FLAG select hybrid mode (must set bitrate) -// o CONFIG_JOINT_STEREO select joint stereo (must set override also) -// o CONFIG_JOINT_OVERRIDE override default joint stereo selection -// o CONFIG_HYBRID_SHAPE select hybrid noise shaping (set override & -// shaping_weight != 0.0) -// o CONFIG_SHAPE_OVERRIDE override default hybrid noise shaping -// (set CONFIG_HYBRID_SHAPE and shaping_weight) -// o CONFIG_FAST_FLAG "fast" compression mode -// o CONFIG_HIGH_FLAG "high" compression mode -// o CONFIG_BITRATE_KBPS hybrid bitrate is kbps, not bits / sample -// o CONFIG_CREATE_WVC create correction file -// o CONFIG_OPTIMIZE_WVC maximize bybrid compression (-cc option) -// o CONFIG_CALC_NOISE calc noise in hybrid mode -// o CONFIG_EXTRA_MODE extra processing mode (slow!) -// o CONFIG_SKIP_WVX no wvx stream for floats & large ints -// o CONFIG_MD5_CHECKSUM specify if you plan to store MD5 signature -// o CONFIG_CREATE_EXE specify if you plan to prepend sfx module -// o CONFIG_OPTIMIZE_MONO detect and optimize for mono files posing as -// stereo (uses a more recent stream format that -// is not compatible with decoders < 4.3) - -// config->bitrate hybrid bitrate in either bits/sample or kbps -// config->shaping_weight hybrid noise shaping coefficient override -// config->block_samples force samples per WavPack block (0 = use deflt) -// config->float_norm_exp select floating-point data (127 for +/-1.0) -// config->xmode extra mode processing value override - -// If the number of samples to be written is known then it should be passed -// here. If the duration is not known then pass -1. In the case that the size -// is not known (or the writing is terminated early) then it is suggested that -// the application retrieve the first block written and let the library update -// the total samples indication. A function is provided to do this update and -// it should be done to the "correction" file also. If this cannot be done -// (because a pipe is being used, for instance) then a valid WavPack will still -// be created, but when applications want to access that file they will have -// to seek all the way to the end to determine the actual duration. Also, if -// a RIFF header has been included then it should be updated as well or the -// WavPack file will not be directly unpackable to a valid wav file (although -// it will still be usable by itself). A return of FALSE indicates an error. - -int WavpackSetConfiguration (WavpackContext *wpc, WavpackConfig *config, uint32_t total_samples) -{ - uint32_t flags = (config->bytes_per_sample - 1), bps = 0, shift = 0; - uint32_t chan_mask = config->channel_mask; - int num_chans = config->num_channels; - int i; - - wpc->total_samples = total_samples; - wpc->config.sample_rate = config->sample_rate; - wpc->config.num_channels = config->num_channels; - wpc->config.channel_mask = config->channel_mask; - wpc->config.bits_per_sample = config->bits_per_sample; - wpc->config.bytes_per_sample = config->bytes_per_sample; - wpc->config.block_samples = config->block_samples; - wpc->config.flags = config->flags; - - if (config->flags & CONFIG_VERY_HIGH_FLAG) - wpc->config.flags |= CONFIG_HIGH_FLAG; - - if (config->float_norm_exp) { - wpc->config.float_norm_exp = config->float_norm_exp; - wpc->config.flags |= CONFIG_FLOAT_DATA; - flags |= FLOAT_DATA; - } - else - shift = (config->bytes_per_sample * 8) - config->bits_per_sample; - - for (i = 0; i < 15; ++i) - if (wpc->config.sample_rate == sample_rates [i]) - break; - - flags |= i << SRATE_LSB; - flags |= shift << SHIFT_LSB; - - if (config->flags & CONFIG_HYBRID_FLAG) { - flags |= HYBRID_FLAG | HYBRID_BITRATE | HYBRID_BALANCE; - - if (!(wpc->config.flags & CONFIG_SHAPE_OVERRIDE)) { - wpc->config.flags |= CONFIG_HYBRID_SHAPE | CONFIG_AUTO_SHAPING; - flags |= HYBRID_SHAPE | NEW_SHAPING; - } - else if (wpc->config.flags & CONFIG_HYBRID_SHAPE) { - wpc->config.shaping_weight = config->shaping_weight; - flags |= HYBRID_SHAPE | NEW_SHAPING; - } - - if (wpc->config.flags & CONFIG_OPTIMIZE_WVC) - flags |= CROSS_DECORR; - - if (config->flags & CONFIG_BITRATE_KBPS) { - bps = (uint32_t) floor (config->bitrate * 256000.0 / config->sample_rate / config->num_channels + 0.5); - - if (bps > (64 << 8)) - bps = 64 << 8; - } - else - bps = (uint32_t) floor (config->bitrate * 256.0 + 0.5); - } - else - flags |= CROSS_DECORR; - - if (!(config->flags & CONFIG_JOINT_OVERRIDE) || (config->flags & CONFIG_JOINT_STEREO)) - flags |= JOINT_STEREO; - - if (config->flags & CONFIG_CREATE_WVC) - wpc->wvc_flag = TRUE; - - wpc->stream_version = (config->flags & CONFIG_OPTIMIZE_MONO) ? MAX_STREAM_VERS : CUR_STREAM_VERS; - - for (wpc->current_stream = 0; num_chans; wpc->current_stream++) { - WavpackStream *wps = malloc (sizeof (WavpackStream)); - uint32_t stereo_mask, mono_mask; - int pos, chans = 0; - - wpc->streams = realloc (wpc->streams, (wpc->current_stream + 1) * sizeof (wpc->streams [0])); - wpc->streams [wpc->current_stream] = wps; - CLEAR (*wps); - - for (pos = 1; pos <= 18; ++pos) { - stereo_mask = 3 << (pos - 1); - mono_mask = 1 << (pos - 1); - - if ((chan_mask & stereo_mask) == stereo_mask && (mono_mask & 0x251)) { - chan_mask &= ~stereo_mask; - chans = 2; - break; - } - else if (chan_mask & mono_mask) { - chan_mask &= ~mono_mask; - chans = 1; - break; - } - } - - if (!chans) { - if (config->flags & CONFIG_PAIR_UNDEF_CHANS) - chans = num_chans > 1 ? 2 : 1; - else - chans = 1; - } - - num_chans -= chans; - - if (num_chans && wpc->current_stream == NEW_MAX_STREAMS - 1) - break; - - memcpy (wps->wphdr.ckID, "wvpk", 4); - wps->wphdr.ckSize = sizeof (WavpackHeader) - 8; - wps->wphdr.total_samples = wpc->total_samples; - wps->wphdr.version = wpc->stream_version; - wps->wphdr.flags = flags; - wps->bits = bps; - - if (!wpc->current_stream) - wps->wphdr.flags |= INITIAL_BLOCK; - - if (!num_chans) - wps->wphdr.flags |= FINAL_BLOCK; - - if (chans == 1) { - wps->wphdr.flags &= ~(JOINT_STEREO | CROSS_DECORR | HYBRID_BALANCE); - wps->wphdr.flags |= MONO_FLAG; - } - } - - wpc->num_streams = wpc->current_stream; - wpc->current_stream = 0; - - if (num_chans) { - strcpy (wpc->error_message, "too many channels!"); - return FALSE; - } - - if (config->flags & CONFIG_EXTRA_MODE) - wpc->config.xmode = config->xmode ? config->xmode : 1; - - return TRUE; -} - -// Prepare to actually pack samples by determining the size of the WavPack -// blocks and allocating sample buffers and initializing each stream. Call -// after WavpackSetConfiguration() and before WavpackPackSamples(). A return -// of FALSE indicates an error. - -int WavpackPackInit (WavpackContext *wpc) -{ - if (wpc->metabytes > 16384) // 16384 bytes still leaves plenty of room for audio - write_metadata_block (wpc); // in this block (otherwise write a special one) - - if (wpc->config.flags & CONFIG_HIGH_FLAG) - wpc->block_samples = wpc->config.sample_rate; - else if (!(wpc->config.sample_rate % 2)) - wpc->block_samples = wpc->config.sample_rate / 2; - else - wpc->block_samples = wpc->config.sample_rate; - - while (wpc->block_samples * wpc->config.num_channels > 150000) - wpc->block_samples /= 2; - - while (wpc->block_samples * wpc->config.num_channels < 40000) - wpc->block_samples *= 2; - - if (wpc->config.block_samples) { - if ((wpc->config.flags & CONFIG_MERGE_BLOCKS) && - wpc->block_samples > (uint32_t) wpc->config.block_samples) { - wpc->block_boundary = wpc->config.block_samples; - wpc->block_samples /= wpc->config.block_samples; - wpc->block_samples *= wpc->config.block_samples; - } - else - wpc->block_samples = wpc->config.block_samples; - } - - wpc->ave_block_samples = wpc->block_samples; - wpc->max_samples = wpc->block_samples + (wpc->block_samples >> 1); - - for (wpc->current_stream = 0; wpc->current_stream < wpc->num_streams; wpc->current_stream++) { - WavpackStream *wps = wpc->streams [wpc->current_stream]; - - wps->sample_buffer = malloc (wpc->max_samples * (wps->wphdr.flags & MONO_FLAG ? 4 : 8)); - pack_init (wpc); - } - - return TRUE; -} - -// Pack the specified samples. Samples must be stored in longs in the native -// endian format of the executing processor. The number of samples specified -// indicates composite samples (sometimes called "frames"). So, the actual -// number of data points would be this "sample_count" times the number of -// channels. Note that samples are accumulated here until enough exist to -// create a complete WavPack block (or several blocks for multichannel audio). -// If an application wants to break a block at a specific sample, then it must -// simply call WavpackFlushSamples() to force an early termination. Completed -// WavPack blocks are send to the function provided in the initial call to -// WavpackOpenFileOutput(). A return of FALSE indicates an error. - -static int pack_streams (WavpackContext *wpc, uint32_t block_samples); -static int create_riff_header (WavpackContext *wpc); - -int WavpackPackSamples (WavpackContext *wpc, int32_t *sample_buffer, uint32_t sample_count) -{ - int nch = wpc->config.num_channels; - - while (sample_count) { - int32_t *source_pointer = sample_buffer; - unsigned int samples_to_copy; - - if (!wpc->riff_header_added && !wpc->riff_header_created && !create_riff_header (wpc)) - return FALSE; - - if (wpc->acc_samples + sample_count > wpc->max_samples) - samples_to_copy = wpc->max_samples - wpc->acc_samples; - else - samples_to_copy = sample_count; - - for (wpc->current_stream = 0; wpc->current_stream < wpc->num_streams; wpc->current_stream++) { - WavpackStream *wps = wpc->streams [wpc->current_stream]; - int32_t *dptr, *sptr, cnt; - - dptr = wps->sample_buffer + wpc->acc_samples * (wps->wphdr.flags & MONO_FLAG ? 1 : 2); - sptr = source_pointer; - cnt = samples_to_copy; - - if (wps->wphdr.flags & MONO_FLAG) { - while (cnt--) { - *dptr++ = *sptr; - sptr += nch; - } - - source_pointer++; - } - else { - while (cnt--) { - *dptr++ = sptr [0]; - *dptr++ = sptr [1]; - sptr += nch; - } - - source_pointer += 2; - } - } - - sample_buffer += samples_to_copy * nch; - sample_count -= samples_to_copy; - - if ((wpc->acc_samples += samples_to_copy) == wpc->max_samples && - !pack_streams (wpc, wpc->block_samples)) - return FALSE; - } - - return TRUE; -} - -// Flush all accumulated samples into WavPack blocks. This is normally called -// after all samples have been sent to WavpackPackSamples(), but can also be -// called to terminate a WavPack block at a specific sample (in other words it -// is possible to continue after this operation). This is also called to -// dump non-audio blocks like those holding metadata for various purposes. -// A return of FALSE indicates an error. - -int WavpackFlushSamples (WavpackContext *wpc) -{ - while (wpc->acc_samples) { - uint32_t block_samples; - - if (wpc->acc_samples > wpc->block_samples) - block_samples = wpc->acc_samples / 2; - else - block_samples = wpc->acc_samples; - - if (!pack_streams (wpc, block_samples)) - return FALSE; - } - - if (wpc->metacount) - write_metadata_block (wpc); - - return TRUE; -} - -// Note: The following function is no longer required because a proper wav -// header is now automatically generated for the application. However, if the -// application wants to generate its own header or wants to include additional -// chunks, then this function can still be used in which case the automatic -// wav header generation is suppressed. - -// Add wrapper (currently RIFF only) to WavPack blocks. This should be called -// before sending any audio samples for the RIFF header or after all samples -// have been sent for any RIFF trailer. WavpackFlushSamples() should be called -// between sending the last samples and calling this for trailer data to make -// sure that headers and trailers don't get mixed up in very short files. If -// the exact contents of the RIFF header are not known because, for example, -// the file duration is uncertain or trailing chunks are possible, simply write -// a "dummy" header of the correct length. When all data has been written it -// will be possible to read the first block written and update the header -// directly. An example of this can be found in the Audition filter. A -// return of FALSE indicates an error. - -int WavpackAddWrapper (WavpackContext *wpc, void *data, uint32_t bcount) -{ - uint32_t index = WavpackGetSampleIndex (wpc); - unsigned char meta_id; - - if (!index || index == (uint32_t) -1) { - wpc->riff_header_added = TRUE; - meta_id = ID_RIFF_HEADER; - } - else { - wpc->riff_trailer_bytes += bcount; - meta_id = ID_RIFF_TRAILER; - } - - return add_to_metadata (wpc, data, bcount, meta_id); -} - -// Store computed MD5 sum in WavPack metadata. Note that the user must compute -// the 16 byte sum; it is not done here. A return of FALSE indicates an error. - -int WavpackStoreMD5Sum (WavpackContext *wpc, unsigned char data [16]) -{ - return add_to_metadata (wpc, data, 16, ID_MD5_CHECKSUM); -} - -static int create_riff_header (WavpackContext *wpc) -{ - RiffChunkHeader riffhdr; - ChunkHeader datahdr, fmthdr; - WaveHeader wavhdr; - - uint32_t total_samples = wpc->total_samples, total_data_bytes; - int32_t channel_mask = wpc->config.channel_mask; - int32_t sample_rate = wpc->config.sample_rate; - int bytes_per_sample = wpc->config.bytes_per_sample; - int bits_per_sample = wpc->config.bits_per_sample; - int format = (wpc->config.float_norm_exp) ? 3 : 1; - int num_channels = wpc->config.num_channels; - int wavhdrsize = 16; - - wpc->riff_header_created = TRUE; - - if (format == 3 && wpc->config.float_norm_exp != 127) { - strcpy (wpc->error_message, "can't create valid RIFF wav header for non-normalized floating data!"); - return FALSE; - } - - if (total_samples == (uint32_t) -1) - total_samples = 0x7ffff000 / (bytes_per_sample * num_channels); - - total_data_bytes = total_samples * bytes_per_sample * num_channels; - - CLEAR (wavhdr); - - wavhdr.FormatTag = format; - wavhdr.NumChannels = num_channels; - wavhdr.SampleRate = sample_rate; - wavhdr.BytesPerSecond = sample_rate * num_channels * bytes_per_sample; - wavhdr.BlockAlign = bytes_per_sample * num_channels; - wavhdr.BitsPerSample = bits_per_sample; - - if (num_channels > 2 || channel_mask != 0x5 - num_channels) { - wavhdrsize = sizeof (wavhdr); - wavhdr.cbSize = 22; - wavhdr.ValidBitsPerSample = bits_per_sample; - wavhdr.SubFormat = format; - wavhdr.ChannelMask = channel_mask; - wavhdr.FormatTag = 0xfffe; - wavhdr.BitsPerSample = bytes_per_sample * 8; - wavhdr.GUID [4] = 0x10; - wavhdr.GUID [6] = 0x80; - wavhdr.GUID [9] = 0xaa; - wavhdr.GUID [11] = 0x38; - wavhdr.GUID [12] = 0x9b; - wavhdr.GUID [13] = 0x71; - } - - strncpy (riffhdr.ckID, "RIFF", sizeof (riffhdr.ckID)); - strncpy (riffhdr.formType, "WAVE", sizeof (riffhdr.formType)); - riffhdr.ckSize = sizeof (riffhdr) + wavhdrsize + sizeof (datahdr) + total_data_bytes; - strncpy (fmthdr.ckID, "fmt ", sizeof (fmthdr.ckID)); - fmthdr.ckSize = wavhdrsize; - - strncpy (datahdr.ckID, "data", sizeof (datahdr.ckID)); - datahdr.ckSize = total_data_bytes; - - // write the RIFF chunks up to just before the data starts - - native_to_little_endian (&riffhdr, ChunkHeaderFormat); - native_to_little_endian (&fmthdr, ChunkHeaderFormat); - native_to_little_endian (&wavhdr, WaveHeaderFormat); - native_to_little_endian (&datahdr, ChunkHeaderFormat); - - return add_to_metadata (wpc, &riffhdr, sizeof (riffhdr), ID_RIFF_HEADER) && - add_to_metadata (wpc, &fmthdr, sizeof (fmthdr), ID_RIFF_HEADER) && - add_to_metadata (wpc, &wavhdr, wavhdrsize, ID_RIFF_HEADER) && - add_to_metadata (wpc, &datahdr, sizeof (datahdr), ID_RIFF_HEADER); -} - -static int pack_streams (WavpackContext *wpc, uint32_t block_samples) -{ - uint32_t max_blocksize, bcount; - unsigned char *outbuff, *outend, *out2buff, *out2end; - int result = TRUE; - - if ((wpc->config.flags & CONFIG_FLOAT_DATA) && !(wpc->config.flags & CONFIG_SKIP_WVX)) - max_blocksize = block_samples * 16 + 4096; - else - max_blocksize = block_samples * 10 + 4096; - - out2buff = (wpc->wvc_flag) ? malloc (max_blocksize) : NULL; - out2end = out2buff + max_blocksize; - outbuff = malloc (max_blocksize); - outend = outbuff + max_blocksize; - - for (wpc->current_stream = 0; wpc->current_stream < wpc->num_streams; wpc->current_stream++) { - WavpackStream *wps = wpc->streams [wpc->current_stream]; - uint32_t flags = wps->wphdr.flags; - - flags &= ~MAG_MASK; - flags += (1 << MAG_LSB) * ((flags & BYTES_STORED) * 8 + 7); - - wps->wphdr.block_index = wps->sample_index; - wps->wphdr.block_samples = block_samples; - wps->wphdr.flags = flags; - wps->block2buff = out2buff; - wps->block2end = out2end; - wps->blockbuff = outbuff; - wps->blockend = outend; - - result = pack_block (wpc, wps->sample_buffer); - wps->blockbuff = wps->block2buff = NULL; - - if (wps->wphdr.block_samples != block_samples) - block_samples = wps->wphdr.block_samples; - - if (!result) { - strcpy (wpc->error_message, "output buffer overflowed!"); - break; - } - - bcount = ((WavpackHeader *) outbuff)->ckSize + 8; - native_to_little_endian ((WavpackHeader *) outbuff, WavpackHeaderFormat); - result = wpc->blockout (wpc->wv_out, outbuff, bcount); - - if (!result) { - strcpy (wpc->error_message, "can't write WavPack data, disk probably full!"); - break; - } - - wpc->filelen += bcount; - - if (out2buff) { - bcount = ((WavpackHeader *) out2buff)->ckSize + 8; - native_to_little_endian ((WavpackHeader *) out2buff, WavpackHeaderFormat); - result = wpc->blockout (wpc->wvc_out, out2buff, bcount); - - if (!result) { - strcpy (wpc->error_message, "can't write WavPack data, disk probably full!"); - break; - } - - wpc->file2len += bcount; - } - - if (wpc->acc_samples != block_samples) - memmove (wps->sample_buffer, wps->sample_buffer + block_samples * (flags & MONO_FLAG ? 1 : 2), - (wpc->acc_samples - block_samples) * sizeof (int32_t) * (flags & MONO_FLAG ? 1 : 2)); - } - - wpc->current_stream = 0; - wpc->ave_block_samples = (wpc->ave_block_samples * 0x7 + block_samples + 0x4) >> 3; - wpc->acc_samples -= block_samples; - free (outbuff); - - if (out2buff) - free (out2buff); - - return result; -} - -// Given the pointer to the first block written (to either a .wv or .wvc file), -// update the block with the actual number of samples written. If the wav -// header was generated by the library, then it is updated also. This should -// be done if WavpackSetConfiguration() was called with an incorrect number -// of samples (or -1). It is the responsibility of the application to read and -// rewrite the block. An example of this can be found in the Audition filter. - -void WavpackUpdateNumSamples (WavpackContext *wpc, void *first_block) -{ - uint32_t wrapper_size; - - little_endian_to_native (first_block, WavpackHeaderFormat); - ((WavpackHeader *) first_block)->total_samples = WavpackGetSampleIndex (wpc); - - /* note that since the RIFF wrapper will not necessarily be properly aligned, - we copy it into a newly allocated buffer before modifying it */ - - if (wpc->riff_header_created) { - if (WavpackGetWrapperLocation (first_block, &wrapper_size)) { - uint32_t data_size = WavpackGetSampleIndex (wpc) * WavpackGetNumChannels (wpc) * WavpackGetBytesPerSample (wpc); - RiffChunkHeader *riffhdr; - ChunkHeader *datahdr; - void *wrapper_buff; - - riffhdr = wrapper_buff = malloc (wrapper_size); - memcpy (wrapper_buff, WavpackGetWrapperLocation (first_block, NULL), wrapper_size); - datahdr = (ChunkHeader *)((char *) riffhdr + wrapper_size - sizeof (ChunkHeader)); - - if (!strncmp (riffhdr->ckID, "RIFF", 4)) { - little_endian_to_native (riffhdr, ChunkHeaderFormat); - riffhdr->ckSize = wrapper_size + data_size - 8 + wpc->riff_trailer_bytes; - native_to_little_endian (riffhdr, ChunkHeaderFormat); - } - - if (!strncmp (datahdr->ckID, "data", 4)) { - little_endian_to_native (datahdr, ChunkHeaderFormat); - datahdr->ckSize = data_size; - native_to_little_endian (datahdr, ChunkHeaderFormat); - } - - memcpy (WavpackGetWrapperLocation (first_block, NULL), wrapper_buff, wrapper_size); - free (wrapper_buff); - } - } - - native_to_little_endian (first_block, WavpackHeaderFormat); -} - -// Note: The following function is no longer required because the wav header -// automatically generated for the application will also be updated by -// WavpackUpdateNumSamples (). However, if the application wants to generate -// its own header or wants to include additional chunks, then this function -// still must be used to update the application generated header. - -// Given the pointer to the first block written to a WavPack file, this -// function returns the location of the stored RIFF header that was originally -// written with WavpackAddWrapper(). This would normally be used to update -// the wav header to indicate that a different number of samples was actually -// written or if additional RIFF chunks are written at the end of the file. -// The "size" parameter can be set to non-NULL to obtain the exact size of the -// RIFF header, and the function will return FALSE if the header is not found -// in the block's metadata (or it is not a valid WavPack block). It is the -// responsibility of the application to read and rewrite the block. An example -// of this can be found in the Audition filter. - -static void *find_metadata (void *wavpack_block, int desired_id, uint32_t *size); - -void *WavpackGetWrapperLocation (void *first_block, uint32_t *size) -{ - void *loc; - - little_endian_to_native (first_block, WavpackHeaderFormat); - loc = find_metadata (first_block, ID_RIFF_HEADER, size); - native_to_little_endian (first_block, WavpackHeaderFormat); - - return loc; -} - -static void *find_metadata (void *wavpack_block, int desired_id, uint32_t *size) -{ - WavpackHeader *wphdr = wavpack_block; - unsigned char *dp, meta_id, c1, c2; - int32_t bcount, meta_bc; - - if (strncmp (wphdr->ckID, "wvpk", 4)) - return NULL; - - bcount = wphdr->ckSize - sizeof (WavpackHeader) + 8; - dp = (unsigned char *)(wphdr + 1); - - while (bcount >= 2) { - meta_id = *dp++; - c1 = *dp++; - - meta_bc = c1 << 1; - bcount -= 2; - - if (meta_id & ID_LARGE) { - if (bcount < 2) - break; - - c1 = *dp++; - c2 = *dp++; - meta_bc += ((uint32_t) c1 << 9) + ((uint32_t) c2 << 17); - bcount -= 2; - } - - if ((meta_id & ID_UNIQUE) == desired_id) { - if ((bcount - meta_bc) >= 0) { - if (size) - *size = meta_bc - ((meta_id & ID_ODD_SIZE) ? 1 : 0); - - return dp; - } - else - return NULL; - } - - bcount -= meta_bc; - dp += meta_bc; - } - - return NULL; -} - -#endif - -// Get total number of samples contained in the WavPack file, or -1 if unknown - -uint32_t WavpackGetNumSamples (WavpackContext *wpc) -{ - return wpc ? wpc->total_samples : (uint32_t) -1; -} - -// Get the current sample index position, or -1 if unknown - -uint32_t WavpackGetSampleIndex (WavpackContext *wpc) -{ - if (wpc) { -#if !defined(VER4_ONLY) && !defined(NO_UNPACK) - if (wpc->stream3) - return get_sample_index3 (wpc); - else if (wpc->streams && wpc->streams [0]) - return wpc->streams [0]->sample_index; -#else - if (wpc->streams && wpc->streams [0]) - return wpc->streams [0]->sample_index; -#endif - } - - return (uint32_t) -1; -} - -// Get the number of errors encountered so far - -int WavpackGetNumErrors (WavpackContext *wpc) -{ - return wpc ? wpc->crc_errors : 0; -} - -// return TRUE if any uncorrected lossy blocks were actually written or read - -int WavpackLossyBlocks (WavpackContext *wpc) -{ - return wpc ? wpc->lossy_blocks : 0; -} - -// Calculate the progress through the file as a double from 0.0 (for begin) -// to 1.0 (for done). A return value of -1.0 indicates that the progress is -// unknown. - -double WavpackGetProgress (WavpackContext *wpc) -{ - if (wpc && wpc->total_samples != (uint32_t) -1 && wpc->total_samples != 0) - return (double) WavpackGetSampleIndex (wpc) / wpc->total_samples; - else - return -1.0; -} - -// Return the total size of the WavPack file(s) in bytes. - -uint32_t WavpackGetFileSize (WavpackContext *wpc) -{ - return wpc ? wpc->filelen + wpc->file2len : 0; -} - -// Calculate the ratio of the specified WavPack file size to the size of the -// original audio data as a double greater than 0.0 and (usually) smaller than -// 1.0. A value greater than 1.0 represents "negative" compression and a -// return value of 0.0 indicates that the ratio cannot be determined. - -double WavpackGetRatio (WavpackContext *wpc) -{ - if (wpc && wpc->total_samples != (uint32_t) -1 && wpc->filelen) { - double output_size = (double) wpc->total_samples * wpc->config.num_channels * - wpc->config.bytes_per_sample; - double input_size = (double) wpc->filelen + wpc->file2len; - - if (output_size >= 1.0 && input_size >= 1.0) - return input_size / output_size; - } - - return 0.0; -} - -// Calculate the average bitrate of the WavPack file in bits per second. A -// return of 0.0 indicates that the bitrate cannot be determined. An option is -// provided to use (or not use) any attendant .wvc file. - -double WavpackGetAverageBitrate (WavpackContext *wpc, int count_wvc) -{ - if (wpc && wpc->total_samples != (uint32_t) -1 && wpc->filelen) { - double output_time = (double) wpc->total_samples / wpc->config.sample_rate; - double input_size = (double) wpc->filelen + (count_wvc ? wpc->file2len : 0); - - if (output_time >= 0.1 && input_size >= 1.0) - return input_size * 8.0 / output_time; - } - - return 0.0; -} - -#ifndef NO_UNPACK - -// Calculate the bitrate of the current WavPack file block in bits per second. -// This can be used for an "instant" bit display and gets updated from about -// 1 to 4 times per second. A return of 0.0 indicates that the bitrate cannot -// be determined. - -double WavpackGetInstantBitrate (WavpackContext *wpc) -{ - if (wpc && wpc->stream3) - return WavpackGetAverageBitrate (wpc, TRUE); - - if (wpc && wpc->streams && wpc->streams [0] && wpc->streams [0]->wphdr.block_samples) { - double output_time = (double) wpc->streams [0]->wphdr.block_samples / wpc->config.sample_rate; - double input_size = 0; - int si; - - for (si = 0; si < wpc->num_streams; ++si) { - if (wpc->streams [si]->blockbuff) - input_size += ((WavpackHeader *) wpc->streams [si]->blockbuff)->ckSize; - - if (wpc->streams [si]->block2buff) - input_size += ((WavpackHeader *) wpc->streams [si]->block2buff)->ckSize; - } - - if (output_time > 0.0 && input_size >= 1.0) - return input_size * 8.0 / output_time; - } - - return 0.0; -} - -#endif - -// Close the specified WavPack file and release all resources used by it. -// Returns NULL. - -WavpackContext *WavpackCloseFile (WavpackContext *wpc) -{ - if (wpc->streams) { - free_streams (wpc); - - if (wpc->streams [0]) - free (wpc->streams [0]); - - free (wpc->streams); - } - -#if !defined(VER4_ONLY) && !defined(NO_UNPACK) - if (wpc->stream3) - free_stream3 (wpc); -#endif - -#if !defined(NO_UNPACK) || defined(INFO_ONLY) - if (wpc->close_files) { -#ifndef NO_USE_FSTREAMS - if (wpc->wv_in != NULL) - fclose (wpc->wv_in); - - if (wpc->wvc_in != NULL) - fclose (wpc->wvc_in); -#endif - } - - WavpackFreeWrapper (wpc); -#endif - -#ifndef NO_TAGS - free_tag (&wpc->m_tag); -#endif - - free (wpc); - - return NULL; -} - -// Returns the sample rate of the specified WavPack file - -uint32_t WavpackGetSampleRate (WavpackContext *wpc) -{ - return wpc ? wpc->config.sample_rate : 44100; -} - -// Returns the number of channels of the specified WavPack file. Note that -// this is the actual number of channels contained in the file even if the -// OPEN_2CH_MAX flag was specified when the file was opened. - -int WavpackGetNumChannels (WavpackContext *wpc) -{ - return wpc ? wpc->config.num_channels : 2; -} - -// Returns the standard Microsoft channel mask for the specified WavPack -// file. A value of zero indicates that there is no speaker assignment -// information. - -int WavpackGetChannelMask (WavpackContext *wpc) -{ - return wpc ? wpc->config.channel_mask : 0; -} - -// Return the normalization value for floating point data (valid only -// if floating point data is present). A value of 127 indicates that -// the floating point range is +/- 1.0. Higher values indicate a -// larger floating point range. - -int WavpackGetFloatNormExp (WavpackContext *wpc) -{ - return wpc->config.float_norm_exp; -} - -// Returns the actual number of valid bits per sample contained in the -// original file, which may or may not be a multiple of 8. Floating data -// always has 32 bits, integers may be from 1 to 32 bits each. When this -// value is not a multiple of 8, then the "extra" bits are located in the -// LSBs of the results. That is, values are right justified when unpacked -// into ints, but are left justified in the number of bytes used by the -// original data. - -int WavpackGetBitsPerSample (WavpackContext *wpc) -{ - return wpc ? wpc->config.bits_per_sample : 16; -} - -// Returns the number of bytes used for each sample (1 to 4) in the original -// file. This is required information for the user of this module because the -// audio data is returned in the LOWER bytes of the long buffer and must be -// left-shifted 8, 16, or 24 bits if normalized longs are required. - -int WavpackGetBytesPerSample (WavpackContext *wpc) -{ - return wpc ? wpc->config.bytes_per_sample : 2; -} - -#if !defined(NO_UNPACK) || defined(INFO_ONLY) - -// If the OPEN_2CH_MAX flag is specified when opening the file, this function -// will return the actual number of channels decoded from the file (which may -// or may not be less than the actual number of channels, but will always be -// 1 or 2). Normally, this will be the front left and right channels of a -// multichannel file. - -int WavpackGetReducedChannels (WavpackContext *wpc) -{ - if (wpc) - return wpc->reduced_channels ? wpc->reduced_channels : wpc->config.num_channels; - else - return 2; -} - -// These routines are used to access (and free) header and trailer data that -// was retrieved from the Wavpack file. The header will be available before -// the samples are decoded and the trailer will be available after all samples -// have been read. - -uint32_t WavpackGetWrapperBytes (WavpackContext *wpc) -{ - return wpc ? wpc->wrapper_bytes : 0; -} - -unsigned char *WavpackGetWrapperData (WavpackContext *wpc) -{ - return wpc ? wpc->wrapper_data : NULL; -} - -void WavpackFreeWrapper (WavpackContext *wpc) -{ - if (wpc && wpc->wrapper_data) { - free (wpc->wrapper_data); - wpc->wrapper_data = NULL; - wpc->wrapper_bytes = 0; - } -} - -// Normally the trailing wrapper will not be available when a WavPack file is first -// opened for reading because it is stored in the final block of the file. This -// function forces a seek to the end of the file to pick up any trailing wrapper -// stored there (then use WavPackGetWrapper**() to obtain). This can obviously only -// be used for seekable files (not pipes) and is not available for pre-4.0 WavPack -// files. - -static void seek_riff_trailer (WavpackContext *wpc); - -void WavpackSeekTrailingWrapper (WavpackContext *wpc) -{ - if ((wpc->open_flags & OPEN_WRAPPER) && - wpc->reader->can_seek (wpc->wv_in) && !wpc->stream3) { - uint32_t pos_save = wpc->reader->get_pos (wpc->wv_in); - - seek_riff_trailer (wpc); - wpc->reader->set_pos_abs (wpc->wv_in, pos_save); - } -} - -// Get any MD5 checksum stored in the metadata (should be called after reading -// last sample or an extra seek will occur). A return value of FALSE indicates -// that no MD5 checksum was stored. - -static int seek_md5 (WavpackStreamReader *reader, void *id, unsigned char data [16]); - -int WavpackGetMD5Sum (WavpackContext *wpc, unsigned char data [16]) -{ - if (wpc->config.flags & CONFIG_MD5_CHECKSUM) { - if (wpc->config.md5_read) { - memcpy (data, wpc->config.md5_checksum, 16); - return TRUE; - } - else if (wpc->reader->can_seek (wpc->wv_in)) { - uint32_t pos_save = wpc->reader->get_pos (wpc->wv_in); - - wpc->config.md5_read = seek_md5 (wpc->reader, wpc->wv_in, wpc->config.md5_checksum); - wpc->reader->set_pos_abs (wpc->wv_in, pos_save); - - if (wpc->config.md5_read) { - memcpy (data, wpc->config.md5_checksum, 16); - return TRUE; - } - else - return FALSE; - } - } - - return FALSE; -} - -#endif - -// Free all memory allocated for raw WavPack blocks (for all allocated streams) -// and free all additonal streams. This does not free the default stream ([0]) -// which is always kept around. - -static void free_streams (WavpackContext *wpc) -{ - int si = wpc->num_streams; - - while (si--) { - if (wpc->streams [si]->blockbuff) { - free (wpc->streams [si]->blockbuff); - wpc->streams [si]->blockbuff = NULL; - } - - if (wpc->streams [si]->block2buff) { - free (wpc->streams [si]->block2buff); - wpc->streams [si]->block2buff = NULL; - } - - if (wpc->streams [si]->sample_buffer) { - free (wpc->streams [si]->sample_buffer); - wpc->streams [si]->sample_buffer = NULL; - } - - if (wpc->streams [si]->dc.shaping_data) { - free (wpc->streams [si]->dc.shaping_data); - wpc->streams [si]->dc.shaping_data = NULL; - } - - if (si) { - wpc->num_streams--; - free (wpc->streams [si]); - wpc->streams [si] = NULL; - } - } - - wpc->current_stream = 0; -} - -#if !defined(NO_UNPACK) || defined(INFO_ONLY) - -// Read from current file position until a valid 32-byte WavPack 4.0 header is -// found and read into the specified pointer. The number of bytes skipped is -// returned. If no WavPack header is found within 1 meg, then a -1 is returned -// to indicate the error. No additional bytes are read past the header and it -// is returned in the processor's native endian mode. Seeking is not required. - -static uint32_t read_next_header (WavpackStreamReader *reader, void *id, WavpackHeader *wphdr) -{ - unsigned char buffer [sizeof (*wphdr)], *sp = buffer + sizeof (*wphdr), *ep = sp; - uint32_t bytes_skipped = 0; - int bleft; - - while (1) { - if (sp < ep) { - bleft = (int)(ep - sp); - memcpy (buffer, sp, bleft); - } - else - bleft = 0; - - if (reader->read_bytes (id, buffer + bleft, sizeof (*wphdr) - bleft) != sizeof (*wphdr) - bleft) - return -1; - - sp = buffer; - - if (*sp++ == 'w' && *sp == 'v' && *++sp == 'p' && *++sp == 'k' && - !(*++sp & 1) && sp [2] < 16 && !sp [3] && (sp [2] || sp [1] || *sp >= 24) && sp [5] == 4 && - sp [4] >= (MIN_STREAM_VERS & 0xff) && sp [4] <= (MAX_STREAM_VERS & 0xff) && sp [18] < 3 && !sp [19]) { - memcpy (wphdr, buffer, sizeof (*wphdr)); - little_endian_to_native (wphdr, WavpackHeaderFormat); - return bytes_skipped; - } - - while (sp < ep && *sp != 'w') - sp++; - - if ((bytes_skipped += (uint32_t)(sp - buffer)) > 1024 * 1024) - return -1; - } -} - -// This function is used to seek to end of a file to determine its actual -// length in samples by reading the last header block containing data. -// Currently, all WavPack files contain the sample length in the first block -// containing samples, however this might not always be the case. Obviously, -// this function requires a seekable file or stream and leaves the file -// pointer undefined. A return value of -1 indicates the length could not -// be determined. - -static uint32_t seek_final_index (WavpackStreamReader *reader, void *id) -{ - uint32_t result = (uint32_t) -1, bcount; - WavpackHeader wphdr; - unsigned char *tempbuff; - - if (reader->get_length (id) > 1200000L) - reader->set_pos_rel (id, -1048576L, SEEK_END); - else - reader->set_pos_abs (id, 0); - - while (1) { - bcount = read_next_header (reader, id, &wphdr); - - if (bcount == (uint32_t) -1) - return result; - - tempbuff = malloc (wphdr.ckSize + 8); - memcpy (tempbuff, &wphdr, 32); - - if (reader->read_bytes (id, tempbuff + 32, wphdr.ckSize - 24) != wphdr.ckSize - 24) { - free (tempbuff); - return result; - } - - free (tempbuff); - - if (wphdr.block_samples && (wphdr.flags & FINAL_BLOCK)) - result = wphdr.block_index + wphdr.block_samples; - } -} - -static int seek_md5 (WavpackStreamReader *reader, void *id, unsigned char data [16]) -{ - unsigned char meta_id, c1, c2; - uint32_t bcount, meta_bc; - WavpackHeader wphdr; - - if (reader->get_length (id) > 1200000L) - reader->set_pos_rel (id, -1048576L, SEEK_END); - - while (1) { - bcount = read_next_header (reader, id, &wphdr); - - if (bcount == (uint32_t) -1) - return FALSE; - - bcount = wphdr.ckSize - sizeof (WavpackHeader) + 8; - - while (bcount >= 2) { - if (reader->read_bytes (id, &meta_id, 1) != 1 || - reader->read_bytes (id, &c1, 1) != 1) - return FALSE; - - meta_bc = c1 << 1; - bcount -= 2; - - if (meta_id & ID_LARGE) { - if (bcount < 2 || reader->read_bytes (id, &c1, 1) != 1 || - reader->read_bytes (id, &c2, 1) != 1) - return FALSE; - - meta_bc += ((uint32_t) c1 << 9) + ((uint32_t) c2 << 17); - bcount -= 2; - } - - if (meta_id == ID_MD5_CHECKSUM) - return (meta_bc == 16 && bcount >= 16 && - reader->read_bytes (id, data, 16) == 16); - - reader->set_pos_rel (id, meta_bc, SEEK_CUR); - bcount -= meta_bc; - } - } -} - -static void seek_riff_trailer (WavpackContext *wpc) -{ - WavpackStreamReader *reader = wpc->reader; - void *id = wpc->wv_in; - unsigned char meta_id, c1, c2; - uint32_t bcount, meta_bc; - WavpackHeader wphdr; - - if (reader->get_length (id) > 1200000L) - reader->set_pos_rel (id, -1048576L, SEEK_END); - - while (1) { - bcount = read_next_header (reader, id, &wphdr); - - if (bcount == (uint32_t) -1) - return; - - bcount = wphdr.ckSize - sizeof (WavpackHeader) + 8; - - while (bcount >= 2) { - if (reader->read_bytes (id, &meta_id, 1) != 1 || - reader->read_bytes (id, &c1, 1) != 1) - return; - - meta_bc = c1 << 1; - bcount -= 2; - - if (meta_id & ID_LARGE) { - if (bcount < 2 || reader->read_bytes (id, &c1, 1) != 1 || - reader->read_bytes (id, &c2, 1) != 1) - return; - - meta_bc += ((uint32_t) c1 << 9) + ((uint32_t) c2 << 17); - bcount -= 2; - } - - if ((meta_id & ID_UNIQUE) == ID_RIFF_TRAILER) { - wpc->wrapper_data = realloc (wpc->wrapper_data, wpc->wrapper_bytes + meta_bc); - - if (reader->read_bytes (id, wpc->wrapper_data + wpc->wrapper_bytes, meta_bc) == meta_bc) - wpc->wrapper_bytes += meta_bc; - else - return; - } - else - reader->set_pos_rel (id, meta_bc, SEEK_CUR); - - bcount -= meta_bc; - } - } -} - -// Compare the regular wv file block header to a potential matching wvc -// file block header and return action code based on analysis: -// -// 0 = use wvc block (assuming rest of block is readable) -// 1 = bad match; try to read next wvc block -// -1 = bad match; ignore wvc file for this block and backup fp (if -// possible) and try to use this block next time - -static int match_wvc_header (WavpackHeader *wv_hdr, WavpackHeader *wvc_hdr) -{ - if (wv_hdr->block_index == wvc_hdr->block_index && - wv_hdr->block_samples == wvc_hdr->block_samples) { - int wvi = 0, wvci = 0; - - if (wv_hdr->flags == wvc_hdr->flags) - return 0; - - if (wv_hdr->flags & INITIAL_BLOCK) - wvi -= 1; - - if (wv_hdr->flags & FINAL_BLOCK) - wvi += 1; - - if (wvc_hdr->flags & INITIAL_BLOCK) - wvci -= 1; - - if (wvc_hdr->flags & FINAL_BLOCK) - wvci += 1; - - return (wvci - wvi < 0) ? 1 : -1; - } - - if ((int32_t)(wvc_hdr->block_index - wv_hdr->block_index) < 0) - return 1; - else - return -1; -} - -// Read the wvc block that matches the regular wv block that has been -// read for the current stream. If an exact match is not found then -// we either keep reading or back up and (possibly) use the block -// later. The skip_wvc flag is set if not matching wvc block is found -// so that we can still decode using only the lossy version (although -// we flag this as an error). A return of FALSE indicates a serious -// error (not just that we missed one wvc block). - -static int read_wvc_block (WavpackContext *wpc) -{ - WavpackStream *wps = wpc->streams [wpc->current_stream]; - uint32_t bcount, file2pos; - WavpackHeader wphdr; - int compare_result; - - while (1) { - file2pos = wpc->reader->get_pos (wpc->wvc_in); - bcount = read_next_header (wpc->reader, wpc->wvc_in, &wphdr); - - if (bcount == (uint32_t) -1) { - wps->wvc_skip = TRUE; - wpc->crc_errors++; - return FALSE; - } - - if (wpc->open_flags & OPEN_STREAMING) - wphdr.block_index = wps->sample_index = 0; - else - wphdr.block_index -= wpc->initial_index; - - if (wphdr.flags & INITIAL_BLOCK) - wpc->file2pos = file2pos + bcount; - - compare_result = match_wvc_header (&wps->wphdr, &wphdr); - - if (!compare_result) { - wps->block2buff = malloc (wphdr.ckSize + 8); - memcpy (wps->block2buff, &wphdr, 32); - - if (wpc->reader->read_bytes (wpc->wvc_in, wps->block2buff + 32, wphdr.ckSize - 24) != - wphdr.ckSize - 24 || (wphdr.flags & UNKNOWN_FLAGS)) { - free (wps->block2buff); - wps->block2buff = NULL; - wps->wvc_skip = TRUE; - wpc->crc_errors++; - return FALSE; - } - - wps->wvc_skip = FALSE; - memcpy (&wps->wphdr, &wphdr, 32); - return TRUE; - } - else if (compare_result == -1) { - wps->wvc_skip = TRUE; - wpc->reader->set_pos_rel (wpc->wvc_in, -32, SEEK_CUR); - wpc->crc_errors++; - return TRUE; - } - } -} - -#ifndef NO_SEEKING - -// Find a valid WavPack header, searching either from the current file position -// (or from the specified position if not -1) and store it (endian corrected) -// at the specified pointer. The return value is the exact file position of the -// header, although we may have actually read past it. Because this function -// is used for seeking to a specific audio sample, it only considers blocks -// that contain audio samples for the initial stream to be valid. - -#define BUFSIZE 4096 - -static uint32_t find_header (WavpackStreamReader *reader, void *id, uint32_t filepos, WavpackHeader *wphdr) -{ - unsigned char *buffer = malloc (BUFSIZE), *sp = buffer, *ep = buffer; - - if (filepos != (uint32_t) -1 && reader->set_pos_abs (id, filepos)) { - free (buffer); - return -1; - } - - while (1) { - int bleft; - - if (sp < ep) { - bleft = (int)(ep - sp); - memcpy (buffer, sp, bleft); - ep -= (sp - buffer); - sp = buffer; - } - else { - if (sp > ep) - if (reader->set_pos_rel (id, (int32_t)(sp - ep), SEEK_CUR)) { - free (buffer); - return -1; - } - - sp = ep = buffer; - bleft = 0; - } - - ep += reader->read_bytes (id, ep, BUFSIZE - bleft); - - if (ep - sp < 32) { - free (buffer); - return -1; - } - - while (sp + 32 <= ep) - if (*sp++ == 'w' && *sp == 'v' && *++sp == 'p' && *++sp == 'k' && - !(*++sp & 1) && sp [2] < 16 && !sp [3] && (sp [2] || sp [1] || *sp >= 24) && sp [5] == 4 && - sp [4] >= (MIN_STREAM_VERS & 0xff) && sp [4] <= (MAX_STREAM_VERS & 0xff) && sp [18] < 3 && !sp [19]) { - memcpy (wphdr, sp - 4, sizeof (*wphdr)); - little_endian_to_native (wphdr, WavpackHeaderFormat); - - if (wphdr->block_samples && (wphdr->flags & INITIAL_BLOCK)) { - free (buffer); - return (uint32_t) (reader->get_pos (id) - (ep - sp + 4)); - } - - if (wphdr->ckSize > 1024) - sp += wphdr->ckSize - 1024; - } - } -} - -// Find the WavPack block that contains the specified sample. If "header_pos" -// is zero, then no information is assumed except the total number of samples -// in the file and its size in bytes. If "header_pos" is non-zero then we -// assume that it is the file position of the valid header image contained in -// the first stream and we can limit our search to either the portion above -// or below that point. If a .wvc file is being used, then this must be called -// for that file also. - -static uint32_t find_sample (WavpackContext *wpc, void *infile, uint32_t header_pos, uint32_t sample) -{ - WavpackStream *wps = wpc->streams [wpc->current_stream]; - uint32_t file_pos1 = 0, file_pos2 = wpc->reader->get_length (infile); - uint32_t sample_pos1 = 0, sample_pos2 = wpc->total_samples; - double ratio = 0.96; - int file_skip = 0; - - if (sample >= wpc->total_samples) - return -1; - - if (header_pos && wps->wphdr.block_samples) { - if (wps->wphdr.block_index > sample) { - sample_pos2 = wps->wphdr.block_index; - file_pos2 = header_pos; - } - else if (wps->wphdr.block_index + wps->wphdr.block_samples <= sample) { - sample_pos1 = wps->wphdr.block_index; - file_pos1 = header_pos; - } - else - return header_pos; - } - - while (1) { - double bytes_per_sample; - uint32_t seek_pos; - - bytes_per_sample = file_pos2 - file_pos1; - bytes_per_sample /= sample_pos2 - sample_pos1; - seek_pos = file_pos1 + (file_skip ? 32 : 0); - seek_pos += (uint32_t)(bytes_per_sample * (sample - sample_pos1) * ratio); - seek_pos = find_header (wpc->reader, infile, seek_pos, &wps->wphdr); - - if (seek_pos != (uint32_t) -1) - wps->wphdr.block_index -= wpc->initial_index; - - if (seek_pos == (uint32_t) -1 || seek_pos >= file_pos2) { - if (ratio > 0.0) { - if ((ratio -= 0.24) < 0.0) - ratio = 0.0; - } - else - return -1; - } - else if (wps->wphdr.block_index > sample) { - sample_pos2 = wps->wphdr.block_index; - file_pos2 = seek_pos; - } - else if (wps->wphdr.block_index + wps->wphdr.block_samples <= sample) { - - if (seek_pos == file_pos1) - file_skip = 1; - else { - sample_pos1 = wps->wphdr.block_index; - file_pos1 = seek_pos; - } - } - else - return seek_pos; - } -} - -#endif - -#endif - -void WavpackLittleEndianToNative (void *data, char *format) -{ - little_endian_to_native (data, format); -} - -void WavpackNativeToLittleEndian (void *data, char *format) -{ - native_to_little_endian (data, format); -} - -uint32_t WavpackGetLibraryVersion (void) -{ - return (LIBWAVPACK_MAJOR<<16) - |(LIBWAVPACK_MINOR<<8) - |(LIBWAVPACK_MICRO<<0); -} - -const char *WavpackGetLibraryVersionString (void) -{ - return LIBWAVPACK_VERSION_STRING; -} - diff --git a/third_party/wavpack/src/write_words.c b/third_party/wavpack/src/write_words.c new file mode 100644 index 0000000..6e6c6b6 --- /dev/null +++ b/third_party/wavpack/src/write_words.c @@ -0,0 +1,688 @@ +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2013 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// write_words.c + +// This module provides entropy word encoding functions using +// a variation on the Rice method. This was introduced in version 3.93 +// because it allows splitting the data into a "lossy" stream and a +// "correction" stream in a very efficient manner and is therefore ideal +// for the "hybrid" mode. For 4.0, the efficiency of this method was +// significantly improved by moving away from the normal Rice restriction of +// using powers of two for the modulus divisions and now the method can be +// used for both hybrid and pure lossless encoding. + +// Samples are divided by median probabilities at 5/7 (71.43%), 10/49 (20.41%), +// and 20/343 (5.83%). Each zone has 3.5 times fewer samples than the +// previous. Using standard Rice coding on this data would result in 1.4 +// bits per sample average (not counting sign bit). However, there is a +// very simple encoding that is over 99% efficient with this data and +// results in about 1.22 bits per sample. + +#include +#include + +#include "wavpack_local.h" + +///////////////////////////// executable code //////////////////////////////// + +// Initialize entropy encoder for the specified stream. In lossless mode there +// are no parameters to select; in hybrid mode the bitrate mode and value need +// be initialized. + +static void word_set_bitrate (WavpackStream *wps); + +void init_words (WavpackStream *wps) +{ + CLEAR (wps->w); + + if (wps->wphdr.flags & HYBRID_FLAG) + word_set_bitrate (wps); +} + +// Set up parameters for hybrid mode based on header flags and "bits" field. +// This is currently only set up for the HYBRID_BITRATE mode in which the +// allowed error varies with the residual level (from "slow_level"). The +// simpler mode (which is not used yet) has the error level directly +// controlled from the metadata. + +static void word_set_bitrate (WavpackStream *wps) +{ + int bitrate_0, bitrate_1; + + if (wps->wphdr.flags & HYBRID_BITRATE) { + if (wps->wphdr.flags & FALSE_STEREO) + bitrate_0 = (wps->bits * 2 - 512) < 568 ? 0 : (wps->bits * 2 - 512) - 568; + else + bitrate_0 = wps->bits < 568 ? 0 : wps->bits - 568; + + if (!(wps->wphdr.flags & MONO_DATA)) { + + if (wps->wphdr.flags & HYBRID_BALANCE) + bitrate_1 = (wps->wphdr.flags & JOINT_STEREO) ? 256 : 0; + else { + bitrate_1 = bitrate_0; + + if (wps->wphdr.flags & JOINT_STEREO) { + if (bitrate_0 < 128) { + bitrate_1 += bitrate_0; + bitrate_0 = 0; + } + else { + bitrate_0 -= 128; + bitrate_1 += 128; + } + } + } + } + else + bitrate_1 = 0; + } + else + bitrate_0 = bitrate_1 = 0; + + wps->w.bitrate_acc [0] = (int32_t) bitrate_0 << 16; + wps->w.bitrate_acc [1] = (int32_t) bitrate_1 << 16; +} + +// Allocates the correct space in the metadata structure and writes the +// current median values to it. Values are converted from 32-bit unsigned +// to our internal 16-bit wp_log2 values, and read_entropy_vars () is called +// to read the values back because we must compensate for the loss through +// the log function. + +void write_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd) +{ + unsigned char *byteptr; + int temp; + + byteptr = wpmd->data = malloc (12); + wpmd->id = ID_ENTROPY_VARS; + + *byteptr++ = temp = wp_log2 (wps->w.c [0].median [0]); + *byteptr++ = temp >> 8; + *byteptr++ = temp = wp_log2 (wps->w.c [0].median [1]); + *byteptr++ = temp >> 8; + *byteptr++ = temp = wp_log2 (wps->w.c [0].median [2]); + *byteptr++ = temp >> 8; + + if (!(wps->wphdr.flags & MONO_DATA)) { + *byteptr++ = temp = wp_log2 (wps->w.c [1].median [0]); + *byteptr++ = temp >> 8; + *byteptr++ = temp = wp_log2 (wps->w.c [1].median [1]); + *byteptr++ = temp >> 8; + *byteptr++ = temp = wp_log2 (wps->w.c [1].median [2]); + *byteptr++ = temp >> 8; + } + + wpmd->byte_length = (int32_t)(byteptr - (unsigned char *) wpmd->data); + read_entropy_vars (wps, wpmd); +} + +// Allocates enough space in the metadata structure and writes the current +// high word of the bitrate accumulator and the slow_level values to it. The +// slow_level values are converted from 32-bit unsigned to our internal 16-bit +// wp_log2 values. Afterward, read_entropy_vars () is called to read the values +// back because we must compensate for the loss through the log function and +// the truncation of the bitrate. + +void write_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd) +{ + unsigned char *byteptr; + int temp; + + word_set_bitrate (wps); + byteptr = wpmd->data = malloc (512); + wpmd->id = ID_HYBRID_PROFILE; + + if (wps->wphdr.flags & HYBRID_BITRATE) { + *byteptr++ = temp = wp_log2s (wps->w.c [0].slow_level); + *byteptr++ = temp >> 8; + + if (!(wps->wphdr.flags & MONO_DATA)) { + *byteptr++ = temp = wp_log2s (wps->w.c [1].slow_level); + *byteptr++ = temp >> 8; + } + } + + *byteptr++ = temp = wps->w.bitrate_acc [0] >> 16; + *byteptr++ = temp >> 8; + + if (!(wps->wphdr.flags & MONO_DATA)) { + *byteptr++ = temp = wps->w.bitrate_acc [1] >> 16; + *byteptr++ = temp >> 8; + } + + if (wps->w.bitrate_delta [0] | wps->w.bitrate_delta [1]) { + *byteptr++ = temp = wp_log2s (wps->w.bitrate_delta [0]); + *byteptr++ = temp >> 8; + + if (!(wps->wphdr.flags & MONO_DATA)) { + *byteptr++ = temp = wp_log2s (wps->w.bitrate_delta [1]); + *byteptr++ = temp >> 8; + } + } + + wpmd->byte_length = (int32_t)(byteptr - (unsigned char *) wpmd->data); + read_hybrid_profile (wps, wpmd); +} + +// This function writes the specified word to the open bitstream "wvbits" and, +// if the bitstream "wvcbits" is open, writes any correction data there. This +// function will work for either lossless or hybrid but because a version +// optimized for lossless exits below, it would normally be used for the hybrid +// mode only. The return value is the actual value stored to the stream (even +// if a correction file is being created) and is used as feedback to the +// predictor. + +int32_t FASTCALL send_word (WavpackStream *wps, int32_t value, int chan) +{ + struct entropy_data *c = wps->w.c + chan; + uint32_t ones_count, low, mid, high; + int sign = (value < 0) ? 1 : 0; + + if (wps->w.c [0].median [0] < 2 && !wps->w.holding_zero && wps->w.c [1].median [0] < 2) { + if (wps->w.zeros_acc) { + if (value) + flush_word (wps); + else { + c->slow_level -= (c->slow_level + SLO) >> SLS; + wps->w.zeros_acc++; + return 0; + } + } + else if (value) + putbit_0 (&wps->wvbits); + else { + c->slow_level -= (c->slow_level + SLO) >> SLS; + CLEAR (wps->w.c [0].median); + CLEAR (wps->w.c [1].median); + wps->w.zeros_acc = 1; + return 0; + } + } + + if (sign) + value = ~value; + + if ((wps->wphdr.flags & HYBRID_FLAG) && !chan) + update_error_limit (wps); + + if (value < (int32_t) GET_MED (0)) { + ones_count = low = 0; + high = GET_MED (0) - 1; + DEC_MED0 (); + } + else { + low = GET_MED (0); + INC_MED0 (); + + if (value - low < GET_MED (1)) { + ones_count = 1; + high = low + GET_MED (1) - 1; + DEC_MED1 (); + } + else { + low += GET_MED (1); + INC_MED1 (); + + if (value - low < GET_MED (2)) { + ones_count = 2; + high = low + GET_MED (2) - 1; + DEC_MED2 (); + } + else { + ones_count = 2 + (value - low) / GET_MED (2); + low += (ones_count - 2) * GET_MED (2); + high = low + GET_MED (2) - 1; + INC_MED2 (); + } + } + } + + mid = (high + low + 1) >> 1; + + if (wps->w.holding_zero) { + if (ones_count) + wps->w.holding_one++; + + flush_word (wps); + + if (ones_count) { + wps->w.holding_zero = 1; + ones_count--; + } + else + wps->w.holding_zero = 0; + } + else + wps->w.holding_zero = 1; + + wps->w.holding_one = ones_count * 2; + + if (!c->error_limit) { + if (high != low) { + uint32_t maxcode = high - low, code = value - low; + int bitcount = count_bits (maxcode); + uint32_t extras = bitset [bitcount] - maxcode - 1; + + if (code < extras) { + wps->w.pend_data |= code << wps->w.pend_count; + wps->w.pend_count += bitcount - 1; + } + else { + wps->w.pend_data |= ((code + extras) >> 1) << wps->w.pend_count; + wps->w.pend_count += bitcount - 1; + wps->w.pend_data |= ((code + extras) & 1) << wps->w.pend_count++; + } + } + + mid = value; + } + else + while (high - low > c->error_limit) + if (value < (int32_t) mid) { + mid = ((high = mid - 1) + low + 1) >> 1; + wps->w.pend_count++; + } + else { + mid = (high + (low = mid) + 1) >> 1; + wps->w.pend_data |= bitset [wps->w.pend_count++]; + } + + wps->w.pend_data |= ((int32_t) sign << wps->w.pend_count++); + + if (!wps->w.holding_zero) + flush_word (wps); + + if (bs_is_open (&wps->wvcbits) && c->error_limit) { + uint32_t code = value - low, maxcode = high - low; + int bitcount = count_bits (maxcode); + uint32_t extras = bitset [bitcount] - maxcode - 1; + + if (bitcount) { + if (code < extras) + putbits (code, bitcount - 1, &wps->wvcbits); + else { + putbits ((code + extras) >> 1, bitcount - 1, &wps->wvcbits); + putbit ((code + extras) & 1, &wps->wvcbits); + } + } + } + + if (wps->wphdr.flags & HYBRID_BITRATE) { + c->slow_level -= (c->slow_level + SLO) >> SLS; + c->slow_level += wp_log2 (mid); + } + + return sign ? ~mid : mid; +} + +// This function is an optimized version of send_word() that only handles +// lossless (error_limit == 0) and sends an entire buffer of either mono or +// stereo data rather than a single sample. Unlike the generalized +// send_word(), it does not return values because it always encodes +// the exact value passed. + +void send_words_lossless (WavpackStream *wps, int32_t *buffer, int32_t nsamples) +{ + struct entropy_data *c = wps->w.c; + int32_t value, csamples; + + if (!(wps->wphdr.flags & MONO_DATA)) + nsamples *= 2; + + for (csamples = 0; csamples < nsamples; ++csamples) { + int sign = ((value = *buffer++) < 0) ? 1 : 0; + uint32_t ones_count, low, high; + + if (!(wps->wphdr.flags & MONO_DATA)) + c = wps->w.c + (csamples & 1); + + if (wps->w.c [0].median [0] < 2 && !wps->w.holding_zero && wps->w.c [1].median [0] < 2) { + if (wps->w.zeros_acc) { + if (value) + flush_word (wps); + else { + wps->w.zeros_acc++; + continue; + } + } + else if (value) + putbit_0 (&wps->wvbits); + else { + CLEAR (wps->w.c [0].median); + CLEAR (wps->w.c [1].median); + wps->w.zeros_acc = 1; + continue; + } + } + + if (sign) + value = ~value; + + if (value < (int32_t) GET_MED (0)) { + ones_count = low = 0; + high = GET_MED (0) - 1; + DEC_MED0 (); + } + else { + low = GET_MED (0); + INC_MED0 (); + + if (value - low < GET_MED (1)) { + ones_count = 1; + high = low + GET_MED (1) - 1; + DEC_MED1 (); + } + else { + low += GET_MED (1); + INC_MED1 (); + + if (value - low < GET_MED (2)) { + ones_count = 2; + high = low + GET_MED (2) - 1; + DEC_MED2 (); + } + else { + ones_count = 2 + (value - low) / GET_MED (2); + low += (ones_count - 2) * GET_MED (2); + high = low + GET_MED (2) - 1; + INC_MED2 (); + } + } + } + + if (wps->w.holding_zero) { + if (ones_count) + wps->w.holding_one++; + + flush_word (wps); + + if (ones_count) { + wps->w.holding_zero = 1; + ones_count--; + } + else + wps->w.holding_zero = 0; + } + else + wps->w.holding_zero = 1; + + wps->w.holding_one = ones_count * 2; + + if (high != low) { + uint32_t maxcode = high - low, code = value - low; + int bitcount = count_bits (maxcode); + uint32_t extras = bitset [bitcount] - maxcode - 1; + + if (code < extras) { + wps->w.pend_data |= code << wps->w.pend_count; + wps->w.pend_count += bitcount - 1; + } + else { + wps->w.pend_data |= ((code + extras) >> 1) << wps->w.pend_count; + wps->w.pend_count += bitcount - 1; + wps->w.pend_data |= ((code + extras) & 1) << wps->w.pend_count++; + } + } + + wps->w.pend_data |= ((int32_t) sign << wps->w.pend_count++); + + if (!wps->w.holding_zero) + flush_word (wps); + } +} + +// Used by send_word() and send_word_lossless() to actually send most the +// accumulated data onto the bitstream. This is also called directly from +// clients when all words have been sent. + +void flush_word (WavpackStream *wps) +{ + if (wps->w.zeros_acc) { + int cbits = count_bits (wps->w.zeros_acc); + + while (cbits--) + putbit_1 (&wps->wvbits); + + putbit_0 (&wps->wvbits); + + while (wps->w.zeros_acc > 1) { + putbit (wps->w.zeros_acc & 1, &wps->wvbits); + wps->w.zeros_acc >>= 1; + } + + wps->w.zeros_acc = 0; + } + + if (wps->w.holding_one) { +#ifdef LIMIT_ONES + if (wps->w.holding_one >= LIMIT_ONES) { + int cbits; + + putbits ((1L << LIMIT_ONES) - 1, LIMIT_ONES + 1, &wps->wvbits); + wps->w.holding_one -= LIMIT_ONES; + cbits = count_bits (wps->w.holding_one); + + while (cbits--) + putbit_1 (&wps->wvbits); + + putbit_0 (&wps->wvbits); + + while (wps->w.holding_one > 1) { + putbit (wps->w.holding_one & 1, &wps->wvbits); + wps->w.holding_one >>= 1; + } + + wps->w.holding_zero = 0; + } + else + putbits (bitmask [wps->w.holding_one], wps->w.holding_one, &wps->wvbits); + + wps->w.holding_one = 0; +#else + do { + putbit_1 (&wps->wvbits); + } while (--wps->w.holding_one); +#endif + } + + if (wps->w.holding_zero) { + putbit_0 (&wps->wvbits); + wps->w.holding_zero = 0; + } + + if (wps->w.pend_count) { + putbits (wps->w.pend_data, wps->w.pend_count, &wps->wvbits); + wps->w.pend_data = wps->w.pend_count = 0; + } +} + +// This function is similar to send_word() except that no data is actually +// written to any stream, but it does return the value that would have been +// sent to a hybrid stream. It is used to determine beforehand how much noise +// will be added to samples. + +int32_t nosend_word (WavpackStream *wps, int32_t value, int chan) +{ + struct entropy_data *c = wps->w.c + chan; + uint32_t ones_count, low, mid, high; + int sign = (value < 0) ? 1 : 0; + + if (sign) + value = ~value; + + if ((wps->wphdr.flags & HYBRID_FLAG) && !chan) + update_error_limit (wps); + + if (value < (int32_t) GET_MED (0)) { + low = 0; + high = GET_MED (0) - 1; + DEC_MED0 (); + } + else { + low = GET_MED (0); + INC_MED0 (); + + if (value - low < GET_MED (1)) { + high = low + GET_MED (1) - 1; + DEC_MED1 (); + } + else { + low += GET_MED (1); + INC_MED1 (); + + if (value - low < GET_MED (2)) { + high = low + GET_MED (2) - 1; + DEC_MED2 (); + } + else { + ones_count = 2 + (value - low) / GET_MED (2); + low += (ones_count - 2) * GET_MED (2); + high = low + GET_MED (2) - 1; + INC_MED2 (); + } + } + } + + mid = (high + low + 1) >> 1; + + if (!c->error_limit) + mid = value; + else + while (high - low > c->error_limit) + if (value < (int32_t) mid) + mid = ((high = mid - 1) + low + 1) >> 1; + else + mid = (high + (low = mid) + 1) >> 1; + + c->slow_level -= (c->slow_level + SLO) >> SLS; + c->slow_level += wp_log2 (mid); + + return sign ? ~mid : mid; +} + +// This function is used to scan some number of samples to set the variables +// "slow_level" and the "median" array. In pure symetrical encoding mode this +// would not be needed because these values would simply be continued from the +// previous block. However, in the -X modes and the 32-bit modes we cannot do +// this because parameters may change between blocks and the variables might +// not apply. This function can work in mono or stereo and can scan a block +// in either direction. + +static void scan_word_pass (WavpackStream *wps, int32_t *samples, uint32_t num_samples, int dir) +{ + uint32_t flags = wps->wphdr.flags, value, low; + struct entropy_data *c = wps->w.c; + int chan; + + if (flags & MONO_DATA) { + if (dir < 0) { + samples += (num_samples - 1); + dir = -1; + } + else + dir = 1; + } + else { + if (dir < 0) { + samples += (num_samples - 1) * 2; + dir = -2; + } + else + dir = 2; + } + + while (num_samples--) { + + value = labs (samples [chan = 0]); + + if (flags & HYBRID_BITRATE) { + wps->w.c [0].slow_level -= (wps->w.c [0].slow_level + SLO) >> SLS; + wps->w.c [0].slow_level += wp_log2 (value); + } + + if (value < GET_MED (0)) { + DEC_MED0 (); + } + else { + low = GET_MED (0); + INC_MED0 (); + + if (value - low < GET_MED (1)) { + DEC_MED1 (); + } + else { + low += GET_MED (1); + INC_MED1 (); + + if (value - low < GET_MED (2)) { + DEC_MED2 (); + } + else { + INC_MED2 (); + } + } + } + + if (!(flags & MONO_DATA)) { + value = labs (samples [chan = 1]); + c++; + + if (wps->wphdr.flags & HYBRID_BITRATE) { + wps->w.c [1].slow_level -= (wps->w.c [1].slow_level + SLO) >> SLS; + wps->w.c [1].slow_level += wp_log2 (value); + } + + if (value < GET_MED (0)) { + DEC_MED0 (); + } + else { + low = GET_MED (0); + INC_MED0 (); + + if (value - low < GET_MED (1)) { + DEC_MED1 (); + } + else { + low += GET_MED (1); + INC_MED1 (); + + if (value - low < GET_MED (2)) { + DEC_MED2 (); + } + else { + INC_MED2 (); + } + } + } + + c--; + } + + samples += dir; + } +} + +// Wrapper for scan_word_pass() than ensures that at least 2048 samples are processed by +// potentially making multiple passes through the data. See description of scan_word_pass() +// for more details. + +void scan_word (WavpackStream *wps, int32_t *samples, uint32_t num_samples, int dir) +{ + init_words (wps); + + if (num_samples) { + int passes = (2048 + num_samples - 1) / num_samples; // i.e., ceil (2048.0 / num_samples) + + while (passes--) + scan_word_pass (wps, samples, num_samples, dir); + } +} +