summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.am5
-rw-r--r--configure.in13
-rw-r--r--libdemac/SOURCES15
-rw-r--r--libdemac/crc.c120
-rw-r--r--libdemac/decoder.c193
-rw-r--r--libdemac/decoder.h40
-rw-r--r--libdemac/demac.h45
-rw-r--r--libdemac/demac_config.h112
-rw-r--r--libdemac/entropy.c464
-rw-r--r--libdemac/entropy.h40
-rw-r--r--libdemac/filter.c252
-rw-r--r--libdemac/filter.h50
-rw-r--r--libdemac/filter_1280_15.c27
-rw-r--r--libdemac/filter_16_11.c27
-rw-r--r--libdemac/filter_256_13.c27
-rw-r--r--libdemac/filter_32_10.c27
-rw-r--r--libdemac/filter_64_11.c27
-rw-r--r--libdemac/parser.c402
-rw-r--r--libdemac/parser.h137
-rw-r--r--libdemac/predictor-arm.S694
-rw-r--r--libdemac/predictor-cf.S659
-rw-r--r--libdemac/predictor.c271
-rw-r--r--libdemac/predictor.h38
-rw-r--r--libdemac/vector_math16_armv5te.h312
-rw-r--r--libdemac/vector_math16_armv6.h289
-rw-r--r--libdemac/vector_math16_cf.h326
-rw-r--r--libdemac/vector_math32_armv4.h205
-rw-r--r--libdemac/vector_math_generic.h160
-rw-r--r--playlist.c5
-rw-r--r--plugins/ape/Makefile.am2
-rw-r--r--plugins/ape/ape.c1
31 files changed, 4977 insertions, 8 deletions
diff --git a/Makefile.am b/Makefile.am
index 844ce894..3213626a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,7 +1,10 @@
## Process this file with automake to produce Makefile.in
ACLOCAL_AMFLAGS = -I m4
-SUBDIRS = gme/Game_Music_Emu-0.5.2 gme/Game_Music_Emu-0.5.2/gme sid/sidplay-libs-2.1.0 dumb pixmaps plugins/lastfm plugins/ape
+test HAVE_LIBMAC=1 && APE_DIR="plugins/ape"
+
+SUBDIRS = gme/Game_Music_Emu-0.5.2 gme/Game_Music_Emu-0.5.2/gme sid/sidplay-libs-2.1.0 dumb pixmaps\
+ plugins/lastfm ${APE_DIR}
dumbpath=@top_srcdir@/dumb
sidpath=@top_srcdir@/sid/sidplay-libs-2.1.0
diff --git a/configure.in b/configure.in
index 2ea01239..64350a4b 100644
--- a/configure.in
+++ b/configure.in
@@ -17,11 +17,9 @@ LT_INIT
AC_CONFIG_MACRO_DIR([m4])
AC_C_BIGENDIAN
AC_DEFINE([PREFIX], [], [Installation prefix])
-if test "x$prefix" = "xNONE" ; then
- AC_DEFINE_UNQUOTED(PREFIX, "${ac_default_prefix}")
-else
- AC_DEFINE_UNQUOTED(PREFIX, "${prefix}")
-fi
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+AC_DEFINE_UNQUOTED(PREFIX, "${prefix}")
CFLAGS="$CFLAGS -D_GNU_SOURCE -O2 -DHAVE_UNIX"
CPPFLAGS="$CFLAGS"
@@ -35,6 +33,11 @@ AC_CHECK_LIB([FLAC], [main],,AC_MSG_ERROR([flac not found]))
AC_SUBST(DEPS_CFLAGS)
AC_SUBST(DEPS_LIBS)
+# ape plugin
+AC_CHECK_LIB([mac], [main], [HAVE_LIBMAC=1 ; APE_LIBS="-lmac"])
+AC_SUBST(HAVE_LIBMAC)
+AC_SUBST(APE_LIBS)
+
AC_OUTPUT([
Makefile
pixmaps/Makefile
diff --git a/libdemac/SOURCES b/libdemac/SOURCES
new file mode 100644
index 00000000..5a448237
--- /dev/null
+++ b/libdemac/SOURCES
@@ -0,0 +1,15 @@
+crc.c
+predictor.c
+#ifdef CPU_ARM
+predictor-arm.S
+#elif defined CPU_COLDFIRE
+predictor-cf.S
+#endif
+entropy.c
+decoder.c
+parser.c
+filter_1280_15.c
+filter_16_11.c
+filter_256_13.c
+filter_32_10.c
+filter_64_11.c
diff --git a/libdemac/crc.c b/libdemac/crc.c
new file mode 100644
index 00000000..30a49c8d
--- /dev/null
+++ b/libdemac/crc.c
@@ -0,0 +1,120 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: crc.c 19643 2009-01-02 21:43:52Z bertrik $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include <inttypes.h>
+#include "demac.h"
+
+static const uint32_t crctab32[] =
+{
+ 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA,
+ 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
+ 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
+ 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
+ 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE,
+ 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
+ 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,
+ 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
+ 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
+ 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
+ 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940,
+ 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
+ 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116,
+ 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
+ 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
+ 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
+
+ 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A,
+ 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
+ 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818,
+ 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
+ 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
+ 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
+ 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C,
+ 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
+ 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2,
+ 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
+ 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
+ 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
+ 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086,
+ 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
+ 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4,
+ 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
+
+ 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
+ 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
+ 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
+ 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
+ 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE,
+ 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
+ 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
+ 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
+ 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252,
+ 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
+ 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60,
+ 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
+ 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
+ 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
+ 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04,
+ 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
+
+ 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A,
+ 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
+ 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
+ 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
+ 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E,
+ 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
+ 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C,
+ 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
+ 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
+ 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
+ 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0,
+ 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
+ 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6,
+ 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
+ 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
+ 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
+};
+
+uint32_t ape_initcrc(void)
+{
+ return 0xffffffff;
+}
+
+/* Update the CRC from a block of WAV-format audio data */
+uint32_t ape_updatecrc(unsigned char *block, int count, uint32_t crc)
+{
+ while (count--)
+ crc = (crc >> 8) ^ crctab32[(crc & 0xff) ^ *block++];
+
+ return crc;
+}
+
+uint32_t ape_finishcrc(uint32_t crc)
+{
+ crc ^= 0xffffffff;
+ crc >>= 1;
+
+ return crc;
+}
+
diff --git a/libdemac/decoder.c b/libdemac/decoder.c
new file mode 100644
index 00000000..d6327de2
--- /dev/null
+++ b/libdemac/decoder.c
@@ -0,0 +1,193 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: decoder.c 19552 2008-12-21 23:49:02Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "demac.h"
+#include "predictor.h"
+#include "entropy.h"
+#include "filter.h"
+#include "demac_config.h"
+
+/* Statically allocate the filter buffers */
+
+static filter_int filterbuf32[(32*3 + FILTER_HISTORY_SIZE) * 2]
+ IBSS_ATTR __attribute__((aligned(16))); /* 2432/4864 bytes */
+static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2]
+ IBSS_ATTR __attribute__((aligned(16))); /* 5120/10240 bytes */
+
+/* This is only needed for "insane" files, and no current Rockbox targets
+ can hope to decode them in realtime, although the Gigabeat S comes close. */
+static filter_int filterbuf1280[(1280*3 + FILTER_HISTORY_SIZE) * 2]
+ IBSS_ATTR_DEMAC_INSANEBUF __attribute__((aligned(16)));
+ /* 17408 or 34816 bytes */
+
+void init_frame_decoder(struct ape_ctx_t* ape_ctx,
+ unsigned char* inbuffer, int* firstbyte,
+ int* bytesconsumed)
+{
+ init_entropy_decoder(ape_ctx, inbuffer, firstbyte, bytesconsumed);
+ //printf("CRC=0x%08x\n",ape_ctx->CRC);
+ //printf("Flags=0x%08x\n",ape_ctx->frameflags);
+
+ init_predictor_decoder(&ape_ctx->predictor);
+
+ switch (ape_ctx->compressiontype)
+ {
+ case 2000:
+ init_filter_16_11(filterbuf32);
+ break;
+
+ case 3000:
+ init_filter_64_11(filterbuf256);
+ break;
+
+ case 4000:
+ init_filter_256_13(filterbuf256);
+ init_filter_32_10(filterbuf32);
+ break;
+
+ case 5000:
+ init_filter_1280_15(filterbuf1280);
+ init_filter_256_13(filterbuf256);
+ init_filter_16_11(filterbuf32);
+ }
+}
+
+int ICODE_ATTR_DEMAC decode_chunk(struct ape_ctx_t* ape_ctx,
+ unsigned char* inbuffer, int* firstbyte,
+ int* bytesconsumed,
+ int32_t* decoded0, int32_t* decoded1,
+ int count)
+{
+ int32_t left, right;
+#ifdef ROCKBOX
+ int scale = (APE_OUTPUT_DEPTH - ape_ctx->bps);
+ #define SCALE(x) ((x) << scale)
+#else
+ #define SCALE(x) (x)
+#endif
+
+ if ((ape_ctx->channels==1) || ((ape_ctx->frameflags
+ & (APE_FRAMECODE_PSEUDO_STEREO|APE_FRAMECODE_STEREO_SILENCE))
+ == APE_FRAMECODE_PSEUDO_STEREO)) {
+
+ entropy_decode(ape_ctx, inbuffer, firstbyte, bytesconsumed,
+ decoded0, NULL, count);
+
+ if (ape_ctx->frameflags & APE_FRAMECODE_MONO_SILENCE) {
+ /* We are pure silence, so we're done. */
+ return 0;
+ }
+
+ switch (ape_ctx->compressiontype)
+ {
+ case 2000:
+ apply_filter_16_11(ape_ctx->fileversion,decoded0,NULL,count);
+ break;
+
+ case 3000:
+ apply_filter_64_11(ape_ctx->fileversion,decoded0,NULL,count);
+ break;
+
+ case 4000:
+ apply_filter_32_10(ape_ctx->fileversion,decoded0,NULL,count);
+ apply_filter_256_13(ape_ctx->fileversion,decoded0,NULL,count);
+ break;
+
+ case 5000:
+ apply_filter_16_11(ape_ctx->fileversion,decoded0,NULL,count);
+ apply_filter_256_13(ape_ctx->fileversion,decoded0,NULL,count);
+ apply_filter_1280_15(ape_ctx->fileversion,decoded0,NULL,count);
+ }
+
+ /* Now apply the predictor decoding */
+ predictor_decode_mono(&ape_ctx->predictor,decoded0,count);
+
+ if (ape_ctx->channels==2) {
+ /* Pseudo-stereo - copy left channel to right channel */
+ while (count--)
+ {
+ left = *decoded0;
+ *(decoded1++) = *(decoded0++) = SCALE(left);
+ }
+ }
+#ifdef ROCKBOX
+ else {
+ /* Scale to output depth */
+ while (count--)
+ {
+ left = *decoded0;
+ *(decoded0++) = SCALE(left);
+ }
+ }
+#endif
+ } else { /* Stereo */
+ entropy_decode(ape_ctx, inbuffer, firstbyte, bytesconsumed,
+ decoded0, decoded1, count);
+
+ if ((ape_ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE)
+ == APE_FRAMECODE_STEREO_SILENCE) {
+ /* We are pure silence, so we're done. */
+ return 0;
+ }
+
+ /* Apply filters - compression type 1000 doesn't have any */
+ switch (ape_ctx->compressiontype)
+ {
+ case 2000:
+ apply_filter_16_11(ape_ctx->fileversion,decoded0,decoded1,count);
+ break;
+
+ case 3000:
+ apply_filter_64_11(ape_ctx->fileversion,decoded0,decoded1,count);
+ break;
+
+ case 4000:
+ apply_filter_32_10(ape_ctx->fileversion,decoded0,decoded1,count);
+ apply_filter_256_13(ape_ctx->fileversion,decoded0,decoded1,count);
+ break;
+
+ case 5000:
+ apply_filter_16_11(ape_ctx->fileversion,decoded0,decoded1,count);
+ apply_filter_256_13(ape_ctx->fileversion,decoded0,decoded1,count);
+ apply_filter_1280_15(ape_ctx->fileversion,decoded0,decoded1,count);
+ }
+
+ /* Now apply the predictor decoding */
+ predictor_decode_stereo(&ape_ctx->predictor,decoded0,decoded1,count);
+
+ /* Decorrelate and scale to output depth */
+ while (count--)
+ {
+ left = *decoded1 - (*decoded0 / 2);
+ right = left + *decoded0;
+
+ *(decoded0++) = SCALE(left);
+ *(decoded1++) = SCALE(right);
+ }
+ }
+ return 0;
+}
diff --git a/libdemac/decoder.h b/libdemac/decoder.h
new file mode 100644
index 00000000..e8810652
--- /dev/null
+++ b/libdemac/decoder.h
@@ -0,0 +1,40 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: decoder.h 19743 2009-01-10 21:10:56Z zagor $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _APE_DECODER_H
+#define _APE_DECODER_H
+
+#include <inttypes.h>
+#include "parser.h"
+
+void init_frame_decoder(struct ape_ctx_t* ape_ctx,
+ unsigned char* inbuffer, int* firstbyte,
+ int* bytesconsumed);
+
+int decode_chunk(struct ape_ctx_t* ape_ctx,
+ unsigned char* inbuffer, int* firstbyte,
+ int* bytesconsumed,
+ int32_t* decoded0, int32_t* decoded1,
+ int count);
+#endif
diff --git a/libdemac/demac.h b/libdemac/demac.h
new file mode 100644
index 00000000..9a699a6e
--- /dev/null
+++ b/libdemac/demac.h
@@ -0,0 +1,45 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: demac.h 19743 2009-01-10 21:10:56Z zagor $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _APE_DECODER_H
+#define _APE_DECODER_H
+
+#include <inttypes.h>
+#include "parser.h"
+
+void init_frame_decoder(struct ape_ctx_t* ape_ctx,
+ unsigned char* inbuffer, int* firstbyte,
+ int* bytesconsumed);
+
+int decode_chunk(struct ape_ctx_t* ape_ctx,
+ unsigned char* inbuffer, int* firstbyte,
+ int* bytesconsumed,
+ int32_t* decoded0, int32_t* decoded1,
+ int count);
+
+uint32_t ape_initcrc(void);
+uint32_t ape_updatecrc(unsigned char *block, int count, uint32_t crc);
+uint32_t ape_finishcrc(uint32_t crc);
+
+#endif
diff --git a/libdemac/demac_config.h b/libdemac/demac_config.h
new file mode 100644
index 00000000..8f3ad1e0
--- /dev/null
+++ b/libdemac/demac_config.h
@@ -0,0 +1,112 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: demac_config.h 19199 2008-11-24 18:40:49Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _DEMAC_CONFIG_H
+#define _DEMAC_CONFIG_H
+
+/* Build-time choices for libdemac.
+ * Note that this file is included by both .c and .S files. */
+
+#ifdef ROCKBOX
+
+#include "config.h"
+
+#ifndef __ASSEMBLER__
+#include "codeclib.h"
+#include <codecs.h>
+#endif
+
+#define APE_OUTPUT_DEPTH 29
+
+/* On ARMv4, using 32 bit ints for the filters is faster. */
+#if defined(CPU_ARM) && (ARM_ARCH == 4)
+#define FILTER_BITS 32
+#endif
+
+#if CONFIG_CPU == PP5002
+/* Code in IRAM for speed, not enough IRAM for the insane filter buffer. */
+#define ICODE_SECTION_DEMAC_ARM .icode
+#define ICODE_ATTR_DEMAC ICODE_ATTR
+#define IBSS_ATTR_DEMAC_INSANEBUF
+#elif CONFIG_CPU == PP5020
+/* Not enough IRAM for the insane filter buffer. */
+#define ICODE_SECTION_DEMAC_ARM .text
+#define ICODE_ATTR_DEMAC
+#define IBSS_ATTR_DEMAC_INSANEBUF
+#else
+#define ICODE_SECTION_DEMAC_ARM .text
+#define ICODE_ATTR_DEMAC
+#define IBSS_ATTR_DEMAC_INSANEBUF IBSS_ATTR
+#endif
+
+#else /* !ROCKBOX */
+
+#define APE_OUTPUT_DEPTH (ape_ctx->bps)
+
+#define IBSS_ATTR
+#define IBSS_ATTR_DEMAC_INSANEBUF
+#define ICONST_ATTR
+#define ICODE_ATTR
+#define ICODE_ATTR_DEMAC
+
+/* Use to give gcc hints on which branch is most likely taken */
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define LIKELY(x) __builtin_expect(!!(x), 1)
+#define UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+#define LIKELY(x) (x)
+#define UNLIKELY(x) (x)
+#endif
+
+#endif /* !ROCKBOX */
+
+/* Defaults */
+
+#ifndef UDIV32
+#define UDIV32(a, b) (a / b)
+#endif
+
+#ifndef FILTER_HISTORY_SIZE
+#define FILTER_HISTORY_SIZE 512
+#endif
+
+#ifndef PREDICTOR_HISTORY_SIZE
+#define PREDICTOR_HISTORY_SIZE 512
+#endif
+
+#ifndef FILTER_BITS
+#define FILTER_BITS 16
+#endif
+
+
+#ifndef __ASSEMBLER__
+#include <inttypes.h>
+#if FILTER_BITS == 32
+typedef int32_t filter_int;
+#elif FILTER_BITS == 16
+typedef int16_t filter_int;
+#endif
+#endif
+
+#endif /* _DEMAC_CONFIG_H */
diff --git a/libdemac/entropy.c b/libdemac/entropy.c
new file mode 100644
index 00000000..df6bb7fd
--- /dev/null
+++ b/libdemac/entropy.c
@@ -0,0 +1,464 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: entropy.c 19552 2008-12-21 23:49:02Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "parser.h"
+#include "entropy.h"
+#include "demac_config.h"
+
+#define MODEL_ELEMENTS 64
+
+/*
+ The following counts arrays for use with the range decoder are
+ hard-coded in the Monkey's Audio decoder.
+*/
+
+static const int counts_3970[65] ICONST_ATTR =
+{
+ 0,14824,28224,39348,47855,53994,58171,60926,
+ 62682,63786,64463,64878,65126,65276,65365,65419,
+ 65450,65469,65480,65487,65491,65493,65494,65495,
+ 65496,65497,65498,65499,65500,65501,65502,65503,
+ 65504,65505,65506,65507,65508,65509,65510,65511,
+ 65512,65513,65514,65515,65516,65517,65518,65519,
+ 65520,65521,65522,65523,65524,65525,65526,65527,
+ 65528,65529,65530,65531,65532,65533,65534,65535,
+ 65536
+};
+
+/* counts_diff_3970[i] = counts_3970[i+1] - counts_3970[i] */
+static const int counts_diff_3970[64] ICONST_ATTR =
+{
+ 14824,13400,11124,8507,6139,4177,2755,1756,
+ 1104,677,415,248,150,89,54,31,
+ 19,11,7,4,2,1,1,1,
+ 1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1
+};
+
+static const int counts_3980[65] ICONST_ATTR =
+{
+ 0,19578,36160,48417,56323,60899,63265,64435,
+ 64971,65232,65351,65416,65447,65466,65476,65482,
+ 65485,65488,65490,65491,65492,65493,65494,65495,
+ 65496,65497,65498,65499,65500,65501,65502,65503,
+ 65504,65505,65506,65507,65508,65509,65510,65511,
+ 65512,65513,65514,65515,65516,65517,65518,65519,
+ 65520,65521,65522,65523,65524,65525,65526,65527,
+ 65528,65529,65530,65531,65532,65533,65534,65535,
+ 65536
+};
+
+/* counts_diff_3980[i] = counts_3980[i+1] - counts_3980[i] */
+
+static const int counts_diff_3980[64] ICONST_ATTR =
+{
+ 19578,16582,12257,7906,4576,2366,1170,536,
+ 261,119,65,31,19,10,6,3,
+ 3,2,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1
+};
+
+/*
+
+Range decoder adapted from rangecod.c included in:
+
+ http://www.compressconsult.com/rangecoder/rngcod13.zip
+
+ rangecod.c range encoding
+
+ (c) Michael Schindler
+ 1997, 1998, 1999, 2000
+ http://www.compressconsult.com/
+ michael@compressconsult.com
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+
+The encoding functions were removed, and functions turned into "static
+inline" functions. Some minor cosmetic changes were made (e.g. turning
+pre-processor symbols into upper-case, removing the rc parameter from
+each function (and the RNGC macro)).
+
+*/
+
+/* BITSTREAM READING FUNCTIONS */
+
+/* We deal with the input data one byte at a time - to ensure
+ functionality on CPUs of any endianness regardless of any requirements
+ for aligned reads.
+*/
+
+static unsigned char* bytebuffer IBSS_ATTR;
+static int bytebufferoffset IBSS_ATTR;
+
+static inline void skip_byte(void)
+{
+ bytebufferoffset--;
+ bytebuffer += bytebufferoffset & 4;
+ bytebufferoffset &= 3;
+}
+
+static inline int read_byte(void)
+{
+ int ch = bytebuffer[bytebufferoffset];
+
+ skip_byte();
+
+ return ch;
+}
+
+/* RANGE DECODING FUNCTIONS */
+
+/* SIZE OF RANGE ENCODING CODE VALUES. */
+
+#define CODE_BITS 32
+#define TOP_VALUE ((unsigned int)1 << (CODE_BITS-1))
+#define SHIFT_BITS (CODE_BITS - 9)
+#define EXTRA_BITS ((CODE_BITS-2) % 8 + 1)
+#define BOTTOM_VALUE (TOP_VALUE >> 8)
+
+struct rangecoder_t
+{
+ uint32_t low; /* low end of interval */
+ uint32_t range; /* length of interval */
+ uint32_t help; /* bytes_to_follow resp. intermediate value */
+ unsigned int buffer; /* buffer for input/output */
+};
+
+static struct rangecoder_t rc IBSS_ATTR;
+
+/* Start the decoder */
+static inline void range_start_decoding(void)
+{
+ rc.buffer = read_byte();
+ rc.low = rc.buffer >> (8 - EXTRA_BITS);
+ rc.range = (uint32_t) 1 << EXTRA_BITS;
+}
+
+static inline void range_dec_normalize(void)
+{
+ while (rc.range <= BOTTOM_VALUE)
+ {
+ rc.buffer = (rc.buffer << 8) | read_byte();
+ rc.low = (rc.low << 8) | ((rc.buffer >> 1) & 0xff);
+ rc.range <<= 8;
+ }
+}
+
+/* Calculate culmulative frequency for next symbol. Does NO update!*/
+/* tot_f is the total frequency */
+/* or: totf is (code_value)1<<shift */
+/* returns the culmulative frequency */
+static inline int range_decode_culfreq(int tot_f)
+{
+ range_dec_normalize();
+ rc.help = UDIV32(rc.range, tot_f);
+ return UDIV32(rc.low, rc.help);
+}
+
+static inline int range_decode_culshift(int shift)
+{
+ range_dec_normalize();
+ rc.help = rc.range >> shift;
+ return UDIV32(rc.low, rc.help);
+}
+
+
+/* Update decoding state */
+/* sy_f is the interval length (frequency of the symbol) */
+/* lt_f is the lower end (frequency sum of < symbols) */
+static inline void range_decode_update(int sy_f, int lt_f)
+{
+ rc.low -= rc.help * lt_f;
+ rc.range = rc.help * sy_f;
+}
+
+
+/* Decode a byte/short without modelling */
+static inline unsigned char decode_byte(void)
+{ int tmp = range_decode_culshift(8);
+ range_decode_update( 1,tmp);
+ return tmp;
+}
+
+static inline int short range_decode_short(void)
+{ int tmp = range_decode_culshift(16);
+ range_decode_update( 1,tmp);
+ return tmp;
+}
+
+/* Decode n bits (n <= 16) without modelling - based on range_decode_short */
+static inline int range_decode_bits(int n)
+{ int tmp = range_decode_culshift(n);
+ range_decode_update( 1,tmp);
+ return tmp;
+}
+
+
+/* Finish decoding */
+static inline void range_done_decoding(void)
+{ range_dec_normalize(); /* normalize to use up all bytes */
+}
+
+/*
+ range_get_symbol_* functions based on main decoding loop in simple_d.c from
+ http://www.compressconsult.com/rangecoder/rngcod13.zip
+ (c) Michael Schindler
+*/
+
+static inline int range_get_symbol_3980(void)
+{
+ int symbol, cf;
+
+ cf = range_decode_culshift(16);
+
+ /* figure out the symbol inefficiently; a binary search would be much better */
+ for (symbol = 0; counts_3980[symbol+1] <= cf; symbol++);
+
+ range_decode_update(counts_diff_3980[symbol],counts_3980[symbol]);
+
+ return symbol;
+}
+
+static inline int range_get_symbol_3970(void)
+{
+ int symbol, cf;
+
+ cf = range_decode_culshift(16);
+
+ /* figure out the symbol inefficiently; a binary search would be much better */
+ for (symbol = 0; counts_3970[symbol+1] <= cf; symbol++);
+
+ range_decode_update(counts_diff_3970[symbol],counts_3970[symbol]);
+
+ return symbol;
+}
+
+/* MAIN DECODING FUNCTIONS */
+
+struct rice_t
+{
+ uint32_t k;
+ uint32_t ksum;
+};
+
+static struct rice_t riceX IBSS_ATTR;
+static struct rice_t riceY IBSS_ATTR;
+
+static inline void update_rice(struct rice_t* rice, int x)
+{
+ rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5);
+
+ if (UNLIKELY(rice->k == 0)) {
+ rice->k = 1;
+ } else {
+ uint32_t lim = 1 << (rice->k + 4);
+ if (UNLIKELY(rice->ksum < lim)) {
+ rice->k--;
+ } else if (UNLIKELY(rice->ksum >= 2 * lim)) {
+ rice->k++;
+ }
+ }
+}
+
+static inline int entropy_decode3980(struct rice_t* rice)
+{
+ int base, x, pivot, overflow;
+
+ pivot = rice->ksum >> 5;
+ if (UNLIKELY(pivot == 0))
+ pivot=1;
+
+ overflow = range_get_symbol_3980();
+
+ if (UNLIKELY(overflow == (MODEL_ELEMENTS-1))) {
+ overflow = range_decode_short() << 16;
+ overflow |= range_decode_short();
+ }
+
+ if (pivot >= 0x10000) {
+ /* Codepath for 24-bit streams */
+ int nbits, lo_bits, base_hi, base_lo;
+
+ /* Count the number of bits in pivot */
+ nbits = 17; /* We know there must be at least 17 bits */
+ while ((pivot >> nbits) > 0) { nbits++; }
+
+ /* base_lo is the low (nbits-16) bits of base
+ base_hi is the high 16 bits of base
+ */
+ lo_bits = (nbits - 16);
+
+ base_hi = range_decode_culfreq((pivot >> lo_bits) + 1);
+ range_decode_update(1, base_hi);
+
+ base_lo = range_decode_culshift(lo_bits);
+ range_decode_update(1, base_lo);
+
+ base = (base_hi << lo_bits) + base_lo;
+ } else {
+ /* Codepath for 16-bit streams */
+ base = range_decode_culfreq(pivot);
+ range_decode_update(1, base);
+ }
+
+ x = base + (overflow * pivot);
+ update_rice(rice, x);
+
+ /* Convert to signed */
+ if (x & 1)
+ return (x >> 1) + 1;
+ else
+ return -(x >> 1);
+}
+
+
+static inline int entropy_decode3970(struct rice_t* rice)
+{
+ int x, tmpk;
+
+ int overflow = range_get_symbol_3970();
+
+ if (UNLIKELY(overflow == (MODEL_ELEMENTS - 1))) {
+ tmpk = range_decode_bits(5);
+ overflow = 0;
+ } else {
+ tmpk = (rice->k < 1) ? 0 : rice->k - 1;
+ }
+
+ if (tmpk <= 16) {
+ x = range_decode_bits(tmpk);
+ } else {
+ x = range_decode_short();
+ x |= (range_decode_bits(tmpk - 16) << 16);
+ }
+ x += (overflow << tmpk);
+
+ update_rice(rice, x);
+
+ /* Convert to signed */
+ if (x & 1)
+ return (x >> 1) + 1;
+ else
+ return -(x >> 1);
+}
+
+void init_entropy_decoder(struct ape_ctx_t* ape_ctx,
+ unsigned char* inbuffer, int* firstbyte,
+ int* bytesconsumed)
+{
+ bytebuffer = inbuffer;
+ bytebufferoffset = *firstbyte;
+
+ /* Read the CRC */
+ ape_ctx->CRC = read_byte();
+ ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
+ ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
+ ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
+
+ /* Read the frame flags if they exist */
+ ape_ctx->frameflags = 0;
+ if ((ape_ctx->fileversion > 3820) && (ape_ctx->CRC & 0x80000000)) {
+ ape_ctx->CRC &= ~0x80000000;
+
+ ape_ctx->frameflags = read_byte();
+ ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
+ ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
+ ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
+ }
+ /* Keep a count of the blocks decoded in this frame */
+ ape_ctx->blocksdecoded = 0;
+
+ /* Initialise the rice structs */
+ riceX.k = 10;
+ riceX.ksum = (1 << riceX.k) * 16;
+ riceY.k = 10;
+ riceY.ksum = (1 << riceY.k) * 16;
+
+ /* The first 8 bits of input are ignored. */
+ skip_byte();
+
+ range_start_decoding();
+
+ /* Return the new state of the buffer */
+ *bytesconsumed = (intptr_t)bytebuffer - (intptr_t)inbuffer;
+ *firstbyte = bytebufferoffset;
+}
+
+void ICODE_ATTR_DEMAC entropy_decode(struct ape_ctx_t* ape_ctx,
+ unsigned char* inbuffer, int* firstbyte,
+ int* bytesconsumed,
+ int32_t* decoded0, int32_t* decoded1,
+ int blockstodecode)
+{
+ bytebuffer = inbuffer;
+ bytebufferoffset = *firstbyte;
+
+ ape_ctx->blocksdecoded += blockstodecode;
+
+ if ((ape_ctx->frameflags & APE_FRAMECODE_LEFT_SILENCE)
+ && ((ape_ctx->frameflags & APE_FRAMECODE_RIGHT_SILENCE)
+ || (decoded1 == NULL))) {
+ /* We are pure silence, just memset the output buffer. */
+ memset(decoded0, 0, blockstodecode * sizeof(int32_t));
+ if (decoded1 != NULL)
+ memset(decoded1, 0, blockstodecode * sizeof(int32_t));
+ } else {
+ if (ape_ctx->fileversion > 3970) {
+ while (LIKELY(blockstodecode--)) {
+ *(decoded0++) = entropy_decode3980(&riceY);
+ if (decoded1 != NULL)
+ *(decoded1++) = entropy_decode3980(&riceX);
+ }
+ } else {
+ while (LIKELY(blockstodecode--)) {
+ *(decoded0++) = entropy_decode3970(&riceY);
+ if (decoded1 != NULL)
+ *(decoded1++) = entropy_decode3970(&riceX);
+ }
+ }
+ }
+
+ if (ape_ctx->blocksdecoded == ape_ctx->currentframeblocks)
+ {
+ range_done_decoding();
+ }
+
+ /* Return the new state of the buffer */
+ *bytesconsumed = bytebuffer - inbuffer;
+ *firstbyte = bytebufferoffset;
+}
diff --git a/libdemac/entropy.h b/libdemac/entropy.h
new file mode 100644
index 00000000..ffef6211
--- /dev/null
+++ b/libdemac/entropy.h
@@ -0,0 +1,40 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: entropy.h 19236 2008-11-26 18:01:18Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _APE_ENTROPY_H
+#define _APE_ENTROPY_H
+
+#include <inttypes.h>
+
+void init_entropy_decoder(struct ape_ctx_t* ape_ctx,
+ unsigned char* inbuffer, int* firstbyte,
+ int* bytesconsumed);
+
+void entropy_decode(struct ape_ctx_t* ape_ctx,
+ unsigned char* inbuffer, int* firstbyte,
+ int* bytesconsumed,
+ int32_t* decoded0, int32_t* decoded1,
+ int blockstodecode);
+
+#endif
diff --git a/libdemac/filter.c b/libdemac/filter.c
new file mode 100644
index 00000000..9f1abfb8
--- /dev/null
+++ b/libdemac/filter.c
@@ -0,0 +1,252 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: filter.c 19556 2008-12-22 08:33:49Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include <string.h>
+#include <inttypes.h>
+
+#include "demac.h"
+#include "filter.h"
+#include "demac_config.h"
+
+#if FILTER_BITS == 32
+
+#if defined(CPU_ARM) && (ARM_ARCH == 4)
+#include "vector_math32_armv4.h"
+#else
+#include "vector_math_generic.h"
+#endif
+
+#else /* FILTER_BITS == 16 */
+
+#ifdef CPU_COLDFIRE
+#include "vector_math16_cf.h"
+#elif defined(CPU_ARM) && (ARM_ARCH >= 6)
+#include "vector_math16_armv6.h"
+#elif defined(CPU_ARM) && (ARM_ARCH >= 5)
+/* Assume all our ARMv5 targets are ARMv5te(j) */
+#include "vector_math16_armv5te.h"
+#else
+#include "vector_math_generic.h"
+#endif
+
+#endif /* FILTER_BITS */
+
+struct filter_t {
+ filter_int* coeffs; /* ORDER entries */
+
+ /* We store all the filter delays in a single buffer */
+ filter_int* history_end;
+
+ filter_int* delay;
+ filter_int* adaptcoeffs;
+
+ int avg;
+};
+
+/* We name the functions according to the ORDER and FRACBITS
+ pre-processor symbols and build multiple .o files from this .c file
+ - this increases code-size but gives the compiler more scope for
+ optimising the individual functions, as well as replacing a lot of
+ variables with constants.
+*/
+
+#if FRACBITS == 11
+ #if ORDER == 16
+ #define INIT_FILTER init_filter_16_11
+ #define APPLY_FILTER apply_filter_16_11
+ #elif ORDER == 64
+ #define INIT_FILTER init_filter_64_11
+ #define APPLY_FILTER apply_filter_64_11
+ #endif
+#elif FRACBITS == 13
+ #define INIT_FILTER init_filter_256_13
+ #define APPLY_FILTER apply_filter_256_13
+#elif FRACBITS == 10
+ #define INIT_FILTER init_filter_32_10
+ #define APPLY_FILTER apply_filter_32_10
+#elif FRACBITS == 15
+ #define INIT_FILTER init_filter_1280_15
+ #define APPLY_FILTER apply_filter_1280_15
+#endif
+
+/* Some macros to handle the fixed-point stuff */
+
+/* Convert from (32-FRACBITS).FRACBITS fixed-point format to an
+ integer (rounding to nearest). */
+#define FP_HALF (1 << (FRACBITS - 1)) /* 0.5 in fixed-point format. */
+#define FP_TO_INT(x) ((x + FP_HALF) >> FRACBITS) /* round(x) */
+
+#if defined(CPU_ARM) && (ARM_ARCH >= 6)
+#define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; })
+#else
+#define SATURATE(x) (LIKELY((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF)
+#endif
+
+/* Apply the filter with state f to count entries in data[] */
+
+static void ICODE_ATTR_DEMAC do_apply_filter_3980(struct filter_t* f,
+ int32_t* data, int count)
+{
+ int res;
+ int absres;
+
+#ifdef PREPARE_SCALARPRODUCT
+ PREPARE_SCALARPRODUCT
+#endif
+
+ while(LIKELY(count--))
+ {
+ res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER));
+
+ if (LIKELY(*data != 0)) {
+ if (*data < 0)
+ vector_add(f->coeffs, f->adaptcoeffs - ORDER);
+ else
+ vector_sub(f->coeffs, f->adaptcoeffs - ORDER);
+ }
+
+ res += *data;
+
+ *data++ = res;
+
+ /* Update the output history */
+ *f->delay++ = SATURATE(res);
+
+ /* Version 3.98 and later files */
+
+ /* Update the adaption coefficients */
+ absres = (res < 0 ? -res : res);
+
+ if (UNLIKELY(absres > 3 * f->avg))
+ *f->adaptcoeffs = ((res >> 25) & 64) - 32;
+ else if (3 * absres > 4 * f->avg)
+ *f->adaptcoeffs = ((res >> 26) & 32) - 16;
+ else if (LIKELY(absres > 0))
+ *f->adaptcoeffs = ((res >> 27) & 16) - 8;
+ else
+ *f->adaptcoeffs = 0;
+
+ f->avg += (absres - f->avg) / 16;
+
+ f->adaptcoeffs[-1] >>= 1;
+ f->adaptcoeffs[-2] >>= 1;
+ f->adaptcoeffs[-8] >>= 1;
+
+ f->adaptcoeffs++;
+
+ /* Have we filled the history buffer? */
+ if (UNLIKELY(f->delay == f->history_end)) {
+ memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
+ (ORDER*2) * sizeof(filter_int));
+ f->adaptcoeffs = f->coeffs + ORDER*2;
+ f->delay = f->coeffs + ORDER*3;
+ }
+ }
+}
+
+static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f,
+ int32_t* data, int count)
+{
+ int res;
+
+#ifdef PREPARE_SCALARPRODUCT
+ PREPARE_SCALARPRODUCT
+#endif
+
+ while(LIKELY(count--))
+ {
+ res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER));
+
+ if (LIKELY(*data != 0)) {
+ if (*data < 0)
+ vector_add(f->coeffs, f->adaptcoeffs - ORDER);
+ else
+ vector_sub(f->coeffs, f->adaptcoeffs - ORDER);
+ }
+
+ /* Convert res from (32-FRACBITS).FRACBITS fixed-point format to an
+ integer (rounding to nearest) and add the input value to
+ it */
+ res += *data;
+
+ *data++ = res;
+
+ /* Update the output history */
+ *f->delay++ = SATURATE(res);
+
+ /* Version ??? to < 3.98 files (untested) */
+ f->adaptcoeffs[0] = (res == 0) ? 0 : ((res >> 28) & 8) - 4;
+ f->adaptcoeffs[-4] >>= 1;
+ f->adaptcoeffs[-8] >>= 1;
+
+ f->adaptcoeffs++;
+
+ /* Have we filled the history buffer? */
+ if (UNLIKELY(f->delay == f->history_end)) {
+ memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
+ (ORDER*2) * sizeof(filter_int));
+ f->adaptcoeffs = f->coeffs + ORDER*2;
+ f->delay = f->coeffs + ORDER*3;
+ }
+ }
+}
+
+static struct filter_t filter0 IBSS_ATTR;
+static struct filter_t filter1 IBSS_ATTR;
+
+static void do_init_filter(struct filter_t* f, filter_int* buf)
+{
+ f->coeffs = buf;
+ f->history_end = buf + ORDER*3 + FILTER_HISTORY_SIZE;
+
+ /* Init pointers */
+ f->adaptcoeffs = f->coeffs + ORDER*2;
+ f->delay = f->coeffs + ORDER*3;
+
+ /* Zero coefficients and history buffer */
+ memset(f->coeffs, 0, ORDER*3 * sizeof(filter_int));
+
+ /* Zero the running average */
+ f->avg = 0;
+}
+
+void INIT_FILTER(filter_int* buf)
+{
+ do_init_filter(&filter0, buf);
+ do_init_filter(&filter1, buf + ORDER*3 + FILTER_HISTORY_SIZE);
+}
+
+void ICODE_ATTR_DEMAC APPLY_FILTER(int fileversion, int32_t* data0,
+ int32_t* data1, int count)
+{
+ if (fileversion >= 3980) {
+ do_apply_filter_3980(&filter0, data0, count);
+ if (data1 != NULL)
+ do_apply_filter_3980(&filter1, data1, count);
+ } else {
+ do_apply_filter_3970(&filter0, data0, count);
+ if (data1 != NULL)
+ do_apply_filter_3970(&filter1, data1, count);
+ }
+}
diff --git a/libdemac/filter.h b/libdemac/filter.h
new file mode 100644
index 00000000..946c76c5
--- /dev/null
+++ b/libdemac/filter.h
@@ -0,0 +1,50 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: filter.h 19236 2008-11-26 18:01:18Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _APE_FILTER_H
+#define _APE_FILTER_H
+
+#include "demac_config.h"
+
+void init_filter_16_11(filter_int* buf);
+void apply_filter_16_11(int fileversion, int32_t* decoded0,
+ int32_t* decoded1, int count);
+
+void init_filter_64_11(filter_int* buf);
+void apply_filter_64_11(int fileversion, int32_t* decoded0,
+ int32_t* decoded1, int count);
+
+void init_filter_32_10(filter_int* buf);
+void apply_filter_32_10(int fileversion, int32_t* decoded0,
+ int32_t* decoded1, int count);
+
+void init_filter_256_13(filter_int* buf);
+void apply_filter_256_13(int fileversion, int32_t* decoded0,
+ int32_t* decoded1, int count);
+
+void init_filter_1280_15(filter_int* buf);
+void apply_filter_1280_15(int fileversion, int32_t* decoded0,
+ int32_t* decoded1, int count);
+
+#endif
diff --git a/libdemac/filter_1280_15.c b/libdemac/filter_1280_15.c
new file mode 100644
index 00000000..d7916385
--- /dev/null
+++ b/libdemac/filter_1280_15.c
@@ -0,0 +1,27 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: filter_1280_15.c 19743 2009-01-10 21:10:56Z zagor $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define ORDER 1280
+#define FRACBITS 15
+#include "filter.c"
diff --git a/libdemac/filter_16_11.c b/libdemac/filter_16_11.c
new file mode 100644
index 00000000..b33e09eb
--- /dev/null
+++ b/libdemac/filter_16_11.c
@@ -0,0 +1,27 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: filter_16_11.c 19743 2009-01-10 21:10:56Z zagor $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define ORDER 16
+#define FRACBITS 11
+#include "filter.c"
diff --git a/libdemac/filter_256_13.c b/libdemac/filter_256_13.c
new file mode 100644
index 00000000..2dd9d06d
--- /dev/null
+++ b/libdemac/filter_256_13.c
@@ -0,0 +1,27 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: filter_256_13.c 19743 2009-01-10 21:10:56Z zagor $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define ORDER 256
+#define FRACBITS 13
+#include "filter.c"
diff --git a/libdemac/filter_32_10.c b/libdemac/filter_32_10.c
new file mode 100644
index 00000000..bdafb876
--- /dev/null
+++ b/libdemac/filter_32_10.c
@@ -0,0 +1,27 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: filter_32_10.c 19743 2009-01-10 21:10:56Z zagor $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define ORDER 32
+#define FRACBITS 10
+#include "filter.c"
diff --git a/libdemac/filter_64_11.c b/libdemac/filter_64_11.c
new file mode 100644
index 00000000..26c85f19
--- /dev/null
+++ b/libdemac/filter_64_11.c
@@ -0,0 +1,27 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: filter_64_11.c 19743 2009-01-10 21:10:56Z zagor $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define ORDER 64
+#define FRACBITS 11
+#include "filter.c"
diff --git a/libdemac/parser.c b/libdemac/parser.c
new file mode 100644
index 00000000..948640d7
--- /dev/null
+++ b/libdemac/parser.c
@@ -0,0 +1,402 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: parser.c 13601 2007-06-09 00:58:15Z dave $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include <inttypes.h>
+#include <string.h>
+#ifndef ROCKBOX
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#endif
+
+#include "parser.h"
+
+#ifdef APE_MAX
+#undef APE_MAX
+#endif
+#define APE_MAX(a,b) ((a)>(b)?(a):(b))
+
+
+static inline int16_t get_int16(unsigned char* buf)
+{
+ return(buf[0] | (buf[1] << 8));
+}
+
+static inline uint16_t get_uint16(unsigned char* buf)
+{
+ return(buf[0] | (buf[1] << 8));
+}
+
+static inline uint32_t get_uint32(unsigned char* buf)
+{
+ return(buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24));
+}
+
+
+int ape_parseheaderbuf(unsigned char* buf, struct ape_ctx_t* ape_ctx)
+{
+ unsigned char* header;
+
+ memset(ape_ctx,0,sizeof(struct ape_ctx_t));
+ /* TODO: Skip any leading junk such as id3v2 tags */
+ ape_ctx->junklength = 0;
+
+ memcpy(ape_ctx->magic, buf, 4);
+ if (memcmp(ape_ctx->magic,"MAC ",4)!=0)
+ {
+ return -1;
+ }
+
+ ape_ctx->fileversion = get_int16(buf + 4);
+
+ if (ape_ctx->fileversion >= 3980)
+ {
+ ape_ctx->padding1 = get_int16(buf + 6);
+ ape_ctx->descriptorlength = get_uint32(buf + 8);
+ ape_ctx->headerlength = get_uint32(buf + 12);
+ ape_ctx->seektablelength = get_uint32(buf + 16);
+ ape_ctx->wavheaderlength = get_uint32(buf + 20);
+ ape_ctx->audiodatalength = get_uint32(buf + 24);
+ ape_ctx->audiodatalength_high = get_uint32(buf + 28);
+ ape_ctx->wavtaillength = get_uint32(buf + 32);
+ memcpy(ape_ctx->md5, buf + 36, 16);
+
+ header = buf + ape_ctx->descriptorlength;
+
+ /* Read header data */
+ ape_ctx->compressiontype = get_uint16(header + 0);
+ ape_ctx->formatflags = get_uint16(header + 2);
+ ape_ctx->blocksperframe = get_uint32(header + 4);
+ ape_ctx->finalframeblocks = get_uint32(header + 8);
+ ape_ctx->totalframes = get_uint32(header + 12);
+ ape_ctx->bps = get_uint16(header + 16);
+ ape_ctx->channels = get_uint16(header + 18);
+ ape_ctx->samplerate = get_uint32(header + 20);
+
+ ape_ctx->seektablefilepos = ape_ctx->junklength +
+ ape_ctx->descriptorlength +
+ ape_ctx->headerlength;
+
+ ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->descriptorlength +
+ ape_ctx->headerlength + ape_ctx->seektablelength +
+ ape_ctx->wavheaderlength;
+ } else {
+ ape_ctx->headerlength = 32;
+ ape_ctx->compressiontype = get_uint16(buf + 6);
+ ape_ctx->formatflags = get_uint16(buf + 8);
+ ape_ctx->channels = get_uint16(buf + 10);
+ ape_ctx->samplerate = get_uint32(buf + 12);
+ ape_ctx->wavheaderlength = get_uint32(buf + 16);
+ ape_ctx->totalframes = get_uint32(buf + 24);
+ ape_ctx->finalframeblocks = get_uint32(buf + 28);
+
+ if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_PEAK_LEVEL)
+ {
+ ape_ctx->headerlength += 4;
+ }
+
+ if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS)
+ {
+ ape_ctx->seektablelength = get_uint32(buf + ape_ctx->headerlength);
+ ape_ctx->seektablelength *= sizeof(int32_t);
+ ape_ctx->headerlength += 4;
+ } else {
+ ape_ctx->seektablelength = ape_ctx->totalframes * sizeof(int32_t);
+ }
+
+ if (ape_ctx->formatflags & MAC_FORMAT_FLAG_8_BIT)
+ ape_ctx->bps = 8;
+ else if (ape_ctx->formatflags & MAC_FORMAT_FLAG_24_BIT)
+ ape_ctx->bps = 24;
+ else
+ ape_ctx->bps = 16;
+
+ if (ape_ctx->fileversion >= 3950)
+ ape_ctx->blocksperframe = 73728 * 4;
+ else if ((ape_ctx->fileversion >= 3900) || (ape_ctx->fileversion >= 3800 && ape_ctx->compressiontype >= 4000))
+ ape_ctx->blocksperframe = 73728;
+ else
+ ape_ctx->blocksperframe = 9216;
+
+ ape_ctx->seektablefilepos = ape_ctx->junklength + ape_ctx->headerlength +
+ ape_ctx->wavheaderlength;
+
+ ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->headerlength +
+ ape_ctx->wavheaderlength + ape_ctx->seektablelength;
+ }
+
+ ape_ctx->totalsamples = ape_ctx->finalframeblocks;
+ if (ape_ctx->totalframes > 1)
+ ape_ctx->totalsamples += ape_ctx->blocksperframe * (ape_ctx->totalframes-1);
+
+ ape_ctx->numseekpoints = APE_MAX(ape_ctx->maxseekpoints,
+ ape_ctx->seektablelength / sizeof(int32_t));
+
+ return 0;
+}
+
+
+#ifndef ROCKBOX
+/* Helper functions */
+
+static int read_uint16(int fd, uint16_t* x)
+{
+ unsigned char tmp[2];
+ int n;
+
+ n = read(fd,tmp,2);
+
+ if (n != 2)
+ return -1;
+
+ *x = tmp[0] | (tmp[1] << 8);
+
+ return 0;
+}
+
+static int read_int16(int fd, int16_t* x)
+{
+ return read_uint16(fd, (uint16_t*)x);
+}
+
+static int read_uint32(int fd, uint32_t* x)
+{
+ unsigned char tmp[4];
+ int n;
+
+ n = read(fd,tmp,4);
+
+ if (n != 4)
+ return -1;
+
+ *x = tmp[0] | (tmp[1] << 8) | (tmp[2] << 16) | (tmp[3] << 24);
+
+ return 0;
+}
+
+int ape_parseheader(int fd, struct ape_ctx_t* ape_ctx)
+{
+ int i,n;
+
+ /* TODO: Skip any leading junk such as id3v2 tags */
+ ape_ctx->junklength = 0;
+
+ lseek(fd,ape_ctx->junklength,SEEK_SET);
+
+ n = read(fd,&ape_ctx->magic,4);
+ if (n != 4) return -1;
+
+ if (memcmp(ape_ctx->magic,"MAC ",4)!=0)
+ {
+ return -1;
+ }
+
+ if (read_int16(fd,&ape_ctx->fileversion) < 0)
+ return -1;
+
+ if (ape_ctx->fileversion >= 3980)
+ {
+ if (read_int16(fd,&ape_ctx->padding1) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->descriptorlength) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->headerlength) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->seektablelength) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->wavheaderlength) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->audiodatalength) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->audiodatalength_high) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->wavtaillength) < 0)
+ return -1;
+ if (read(fd,&ape_ctx->md5,16) != 16)
+ return -1;
+
+ /* Skip any unknown bytes at the end of the descriptor. This is for future
+ compatibility */
+ if (ape_ctx->descriptorlength > 52)
+ lseek(fd,ape_ctx->descriptorlength - 52, SEEK_CUR);
+
+ /* Read header data */
+ if (read_uint16(fd,&ape_ctx->compressiontype) < 0)
+ return -1;
+ if (read_uint16(fd,&ape_ctx->formatflags) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->blocksperframe) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->finalframeblocks) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->totalframes) < 0)
+ return -1;
+ if (read_uint16(fd,&ape_ctx->bps) < 0)
+ return -1;
+ if (read_uint16(fd,&ape_ctx->channels) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->samplerate) < 0)
+ return -1;
+ } else {
+ ape_ctx->descriptorlength = 0;
+ ape_ctx->headerlength = 32;
+
+ if (read_uint16(fd,&ape_ctx->compressiontype) < 0)
+ return -1;
+ if (read_uint16(fd,&ape_ctx->formatflags) < 0)
+ return -1;
+ if (read_uint16(fd,&ape_ctx->channels) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->samplerate) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->wavheaderlength) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->wavtaillength) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->totalframes) < 0)
+ return -1;
+ if (read_uint32(fd,&ape_ctx->finalframeblocks) < 0)
+ return -1;
+
+ if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_PEAK_LEVEL)
+ {
+ lseek(fd, 4, SEEK_CUR); /* Skip the peak level */
+ ape_ctx->headerlength += 4;
+ }
+
+ if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS)
+ {
+ if (read_uint32(fd,&ape_ctx->seektablelength) < 0)
+ return -1;
+ ape_ctx->headerlength += 4;
+ ape_ctx->seektablelength *= sizeof(int32_t);
+ } else {
+ ape_ctx->seektablelength = ape_ctx->totalframes * sizeof(int32_t);
+ }
+
+ if (ape_ctx->formatflags & MAC_FORMAT_FLAG_8_BIT)
+ ape_ctx->bps = 8;
+ else if (ape_ctx->formatflags & MAC_FORMAT_FLAG_24_BIT)
+ ape_ctx->bps = 24;
+ else
+ ape_ctx->bps = 16;
+
+ if (ape_ctx->fileversion >= 3950)
+ ape_ctx->blocksperframe = 73728 * 4;
+ else if ((ape_ctx->fileversion >= 3900) || (ape_ctx->fileversion >= 3800 && ape_ctx->compressiontype >= 4000))
+ ape_ctx->blocksperframe = 73728;
+ else
+ ape_ctx->blocksperframe = 9216;
+
+ /* Skip any stored wav header */
+ if (!(ape_ctx->formatflags & MAC_FORMAT_FLAG_CREATE_WAV_HEADER))
+ {
+ lseek(fd, ape_ctx->wavheaderlength, SEEK_CUR);
+ }
+ }
+
+ ape_ctx->totalsamples = ape_ctx->finalframeblocks;
+ if (ape_ctx->totalframes > 1)
+ ape_ctx->totalsamples += ape_ctx->blocksperframe * (ape_ctx->totalframes-1);
+
+ if (ape_ctx->seektablelength > 0)
+ {
+ ape_ctx->seektable = malloc(ape_ctx->seektablelength);
+ if (ape_ctx->seektable == NULL)
+ return -1;
+ for (i=0; i < ape_ctx->seektablelength / sizeof(uint32_t); i++)
+ {
+ if (read_uint32(fd,&ape_ctx->seektable[i]) < 0)
+ {
+ free(ape_ctx->seektable);
+ return -1;
+ }
+ }
+ }
+
+ ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->descriptorlength +
+ ape_ctx->headerlength + ape_ctx->seektablelength +
+ ape_ctx->wavheaderlength;
+
+ return 0;
+}
+
+void ape_dumpinfo(struct ape_ctx_t* ape_ctx)
+{
+ int i;
+
+ printf("Descriptor Block:\n\n");
+ printf("magic = \"%c%c%c%c\"\n",
+ ape_ctx->magic[0],ape_ctx->magic[1],
+ ape_ctx->magic[2],ape_ctx->magic[3]);
+ printf("fileversion = %d\n",ape_ctx->fileversion);
+ printf("descriptorlength = %d\n",ape_ctx->descriptorlength);
+ printf("headerlength = %d\n",ape_ctx->headerlength);
+ printf("seektablelength = %d\n",ape_ctx->seektablelength);
+ printf("wavheaderlength = %d\n",ape_ctx->wavheaderlength);
+ printf("audiodatalength = %d\n",ape_ctx->audiodatalength);
+ printf("audiodatalength_high = %d\n",ape_ctx->audiodatalength_high);
+ printf("wavtaillength = %d\n",ape_ctx->wavtaillength);
+ printf("md5 = ");
+ for (i = 0; i < 16; i++)
+ printf("%02x",ape_ctx->md5[i]);
+ printf("\n");
+
+ printf("\nHeader Block:\n\n");
+
+ printf("compressiontype = %d\n",ape_ctx->compressiontype);
+ printf("formatflags = %d\n",ape_ctx->formatflags);
+ printf("blocksperframe = %d\n",ape_ctx->blocksperframe);
+ printf("finalframeblocks = %d\n",ape_ctx->finalframeblocks);
+ printf("totalframes = %d\n",ape_ctx->totalframes);
+ printf("bps = %d\n",ape_ctx->bps);
+ printf("channels = %d\n",ape_ctx->channels);
+ printf("samplerate = %d\n",ape_ctx->samplerate);
+
+ printf("\nSeektable\n\n");
+ if ((ape_ctx->seektablelength / sizeof(uint32_t)) != ape_ctx->totalframes)
+ {
+ printf("No seektable\n");
+ }
+ else
+ {
+ for ( i = 0; i < ape_ctx->seektablelength / sizeof(uint32_t) ; i++)
+ {
+ if (i < ape_ctx->totalframes-1) {
+ printf("%8d %d (%d bytes)\n",i,ape_ctx->seektable[i],ape_ctx->seektable[i+1]-ape_ctx->seektable[i]);
+ } else {
+ printf("%8d %d\n",i,ape_ctx->seektable[i]);
+ }
+ }
+ }
+ printf("\nCalculated information:\n\n");
+ printf("junklength = %d\n",ape_ctx->junklength);
+ printf("firstframe = %d\n",ape_ctx->firstframe);
+ printf("totalsamples = %d\n",ape_ctx->totalsamples);
+}
+
+#endif /* !ROCKBOX */
diff --git a/libdemac/parser.h b/libdemac/parser.h
new file mode 100644
index 00000000..0cfa665d
--- /dev/null
+++ b/libdemac/parser.h
@@ -0,0 +1,137 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: parser.h 19552 2008-12-21 23:49:02Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _APE_PARSER_H
+#define _APE_PARSER_H
+
+#include <inttypes.h>
+#include "demac_config.h"
+
+/* The earliest and latest file formats supported by this library */
+#define APE_MIN_VERSION 3970
+#define APE_MAX_VERSION 3990
+
+#define MAC_FORMAT_FLAG_8_BIT 1 // is 8-bit [OBSOLETE]
+#define MAC_FORMAT_FLAG_CRC 2 // uses the new CRC32 error detection [OBSOLETE]
+#define MAC_FORMAT_FLAG_HAS_PEAK_LEVEL 4 // uint32 nPeakLevel after the header [OBSOLETE]
+#define MAC_FORMAT_FLAG_24_BIT 8 // is 24-bit [OBSOLETE]
+#define MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS 16 // has the number of seek elements after the peak level
+#define MAC_FORMAT_FLAG_CREATE_WAV_HEADER 32 // create the wave header on decompression (not stored)
+
+
+/* Special frame codes:
+
+ MONO_SILENCE - All PCM samples in frame are zero (mono streams only)
+ LEFT_SILENCE - All PCM samples for left channel in frame are zero (stereo streams)
+ RIGHT_SILENCE - All PCM samples for left channel in frame are zero (stereo streams)
+ PSEUDO_STEREO - Left and Right channels are identical
+
+*/
+
+#define APE_FRAMECODE_MONO_SILENCE 1
+#define APE_FRAMECODE_LEFT_SILENCE 1 /* same as mono */
+#define APE_FRAMECODE_RIGHT_SILENCE 2
+#define APE_FRAMECODE_STEREO_SILENCE 3 /* combined */
+#define APE_FRAMECODE_PSEUDO_STEREO 4
+
+#define PREDICTOR_ORDER 8
+/* Total size of all predictor histories - 50 * sizeof(int32_t) */
+#define PREDICTOR_SIZE 50
+
+
+/* NOTE: This struct is used in predictor-arm.S - any updates need to
+ be reflected there. */
+
+struct predictor_t
+{
+ /* Filter histories */
+ int32_t* buf;
+
+ int32_t YlastA;
+ int32_t XlastA;
+
+ /* NOTE: The order of the next four fields is important for
+ predictor-arm.S */
+ int32_t YfilterB;
+ int32_t XfilterA;
+ int32_t XfilterB;
+ int32_t YfilterA;
+
+ /* Adaption co-efficients */
+ int32_t YcoeffsA[4];
+ int32_t XcoeffsA[4];
+ int32_t YcoeffsB[5];
+ int32_t XcoeffsB[5];
+ int32_t historybuffer[PREDICTOR_HISTORY_SIZE + PREDICTOR_SIZE];
+};
+
+struct ape_ctx_t
+{
+ /* Derived fields */
+ uint32_t junklength;
+ uint32_t firstframe;
+ uint32_t totalsamples;
+
+ /* Info from Descriptor Block */
+ char magic[4];
+ int16_t fileversion;
+ int16_t padding1;
+ uint32_t descriptorlength;
+ uint32_t headerlength;
+ uint32_t seektablelength;
+ uint32_t wavheaderlength;
+ uint32_t audiodatalength;
+ uint32_t audiodatalength_high;
+ uint32_t wavtaillength;
+ uint8_t md5[16];
+
+ /* Info from Header Block */
+ uint16_t compressiontype;
+ uint16_t formatflags;
+ uint32_t blocksperframe;
+ uint32_t finalframeblocks;
+ uint32_t totalframes;
+ uint16_t bps;
+ uint16_t channels;
+ uint32_t samplerate;
+
+ /* Seektable */
+ uint32_t* seektable; /* Seektable buffer */
+ uint32_t maxseekpoints; /* Max seekpoints we can store (size of seektable buffer) */
+ uint32_t numseekpoints; /* Number of seekpoints */
+ int seektablefilepos; /* Location in .ape file of seektable */
+
+ /* Decoder state */
+ uint32_t CRC;
+ int frameflags;
+ int currentframeblocks;
+ int blocksdecoded;
+ struct predictor_t predictor;
+};
+
+int ape_parseheader(int fd, struct ape_ctx_t* ape_ctx);
+int ape_parseheaderbuf(unsigned char* buf, struct ape_ctx_t* ape_ctx);
+void ape_dumpinfo(struct ape_ctx_t* ape_ctx);
+
+#endif
diff --git a/libdemac/predictor-arm.S b/libdemac/predictor-arm.S
new file mode 100644
index 00000000..bfb96738
--- /dev/null
+++ b/libdemac/predictor-arm.S
@@ -0,0 +1,694 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: predictor-arm.S 21916 2009-07-17 09:17:54Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+#include "demac_config.h"
+
+ .section ICODE_SECTION_DEMAC_ARM,"ax",%progbits
+
+ .align 2
+
+/* NOTE: The following need to be kept in sync with parser.h */
+
+#define YDELAYA 200
+#define YDELAYB 168
+#define XDELAYA 136
+#define XDELAYB 104
+#define YADAPTCOEFFSA 72
+#define XADAPTCOEFFSA 56
+#define YADAPTCOEFFSB 40
+#define XADAPTCOEFFSB 20
+
+/* struct predictor_t members: */
+#define buf 0 /* int32_t* buf */
+
+#define YlastA 4 /* int32_t YlastA; */
+#define XlastA 8 /* int32_t XlastA; */
+
+#define YfilterB 12 /* int32_t YfilterB; */
+#define XfilterA 16 /* int32_t XfilterA; */
+
+#define XfilterB 20 /* int32_t XfilterB; */
+#define YfilterA 24 /* int32_t YfilterA; */
+
+#define YcoeffsA 28 /* int32_t YcoeffsA[4]; */
+#define XcoeffsA 44 /* int32_t XcoeffsA[4]; */
+#define YcoeffsB 60 /* int32_t YcoeffsB[5]; */
+#define XcoeffsB 80 /* int32_t XcoeffsB[5]; */
+
+#define historybuffer 100 /* int32_t historybuffer[] */
+
+@ Macro for loading 2 registers, for various ARM versions.
+@ Registers must start with an even register, and must be consecutive.
+
+.macro LDR2OFS reg1, reg2, base, offset
+#if ARM_ARCH >= 6
+ ldrd \reg1, [\base, \offset]
+#else /* ARM_ARCH < 6 */
+#ifdef CPU_ARM7TDMI
+ add \reg1, \base, \offset
+ ldmia \reg1, {\reg1, \reg2}
+#else /* ARM9 (v4 and v5) is faster this way */
+ ldr \reg1, [\base, \offset]
+ ldr \reg2, [\base, \offset+4]
+#endif
+#endif /* ARM_ARCH */
+.endm
+
+@ Macro for storing 2 registers, for various ARM versions.
+@ Registers must start with an even register, and must be consecutive.
+
+.macro STR2OFS reg1, reg2, base, offset
+#if ARM_ARCH >= 6
+ strd \reg1, [\base, \offset]
+#else
+ str \reg1, [\base, \offset]
+ str \reg2, [\base, \offset+4]
+#endif
+.endm
+
+ .global predictor_decode_stereo
+ .type predictor_decode_stereo,%function
+
+@ Register usage:
+@
+@ r0-r11 - scratch
+@ r12 - struct predictor_t* p
+@ r14 - int32_t* p->buf
+
+@ void predictor_decode_stereo(struct predictor_t* p,
+@ int32_t* decoded0,
+@ int32_t* decoded1,
+@ int count)
+
+predictor_decode_stereo:
+ stmdb sp!, {r1-r11, lr}
+
+ @ r1 (decoded0) is [sp]
+ @ r2 (decoded1) is [sp, #4]
+ @ r3 (count) is [sp, #8]
+
+ mov r12, r0 @ r12 := p
+ ldr r14, [r0] @ r14 := p->buf
+
+loop:
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR Y
+
+@ Predictor Y, Filter A
+
+ ldr r11, [r12, #YlastA] @ r11 := p->YlastA
+
+ add r2, r14, #YDELAYA-12 @ r2 := &p->buf[YDELAYA-3]
+ ldmia r2, {r2, r3, r10} @ r2 := p->buf[YDELAYA-3]
+ @ r3 := p->buf[YDELAYA-2]
+ @ r10 := p->buf[YDELAYA-1]
+
+ add r6, r12, #YcoeffsA
+ ldmia r6, {r6 - r9} @ r6 := p->YcoeffsA[0]
+ @ r7 := p->YcoeffsA[1]
+ @ r8 := p->YcoeffsA[2]
+ @ r9 := p->YcoeffsA[3]
+
+ subs r10, r11, r10 @ r10 := r11 - r10
+
+ STR2OFS r10, r11, r14, #YDELAYA-4
+ @ p->buf[YDELAYA-1] = r10
+ @ p->buf[YDELAYA] = r11
+
+ mul r0, r11, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0]
+ mla r0, r10, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
+ mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
+ mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
+
+ @ flags were set above, in the subs instruction
+ mvngt r10, #0
+ movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
+
+ cmp r11, #0
+ mvngt r11, #0
+ movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
+
+ STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4
+ @ p->buf[YADAPTCOEFFSA-1] := r10
+ @ p->buf[YADAPTCOEFFSA] := r11
+
+ @ NOTE: r0 now contains predictionA - don't overwrite.
+
+@ Predictor Y, Filter B
+
+ LDR2OFS r6, r7, r12, #YfilterB @ r6 := p->YfilterB
+ @ r7 := p->XfilterA
+
+ add r2, r14, #YDELAYB-16 @ r2 := &p->buf[YDELAYB-4]
+ ldmia r2, {r2 - r4, r10} @ r2 := p->buf[YDELAYB-4]
+ @ r3 := p->buf[YDELAYB-3]
+ @ r4 := p->buf[YDELAYB-2]
+ @ r10 := p->buf[YDELAYB-1]
+
+ rsb r6, r6, r6, lsl #5 @ r6 := r6 * 32 - r6 ( == r6*31)
+ sub r11, r7, r6, asr #5 @ r11 (p->buf[YDELAYB]) := r7 - (r6 >> 5)
+
+ str r7, [r12, #YfilterB] @ p->YfilterB := r7 (p->XfilterA)
+
+ add r5, r12, #YcoeffsB
+ ldmia r5, {r5 - r9} @ r5 := p->YcoeffsB[0]
+ @ r6 := p->YcoeffsB[1]
+ @ r7 := p->YcoeffsB[2]
+ @ r8 := p->YcoeffsB[3]
+ @ r9 := p->YcoeffsB[4]
+
+ subs r10, r11, r10 @ r10 := r11 - r10
+
+ STR2OFS r10, r11, r14, #YDELAYB-4
+ @ p->buf[YDELAYB-1] = r10
+ @ p->buf[YDELAYB] = r11
+
+ mul r1, r11, r5 @ r1 := p->buf[YDELAYB] * p->YcoeffsB[0]
+ mla r1, r10, r6, r1 @ r1 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
+ mla r1, r4, r7, r1 @ r1 += p->buf[YDELAYB-2] * p->YcoeffsB[2]
+ mla r1, r3, r8, r1 @ r1 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
+ mla r1, r2, r9, r1 @ r1 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
+
+ @ flags were set above, in the subs instruction
+ mvngt r10, #0
+ movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
+
+ cmp r11, #0
+ mvngt r11, #0
+ movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
+
+ STR2OFS r10, r11, r14, #YADAPTCOEFFSB-4
+ @ p->buf[YADAPTCOEFFSB-1] := r10
+ @ p->buf[YADAPTCOEFFSB] := r11
+
+ @ r0 still contains predictionA
+ @ r1 contains predictionB
+
+ @ Finish Predictor Y
+
+ ldr r2, [sp] @ r2 := decoded0
+ add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1)
+ ldr r4, [r12, #YfilterA] @ r4 := p->YfilterA
+ ldr r3, [r2] @ r3 := *decoded0
+ rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31)
+ add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10)
+ str r1, [r12, #YlastA] @ p->YlastA := r1
+ add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5)
+ str r1, [r12, #YfilterA] @ p->YfilterA := r1
+
+ @ r1 contains p->YfilterA
+ @ r2 contains decoded0
+ @ r3 contains *decoded0
+
+ @ r5, r6, r7, r8, r9 contain p->YcoeffsB[0..4]
+ @ r10, r11 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB]
+
+ str r1, [r2], #4 @ *(decoded0++) := r1 (p->YfilterA)
+ str r2, [sp] @ save decoded0
+ cmp r3, #0
+ beq 3f
+
+ add r2, r14, #YADAPTCOEFFSB-16
+ ldmia r2, {r2 - r4} @ r2 := p->buf[YADAPTCOEFFSB-4]
+ @ r3 := p->buf[YADAPTCOEFFSB-3]
+ @ r4 := p->buf[YADAPTCOEFFSB-2]
+ blt 1f
+
+ @ *decoded0 > 0
+
+ sub r5, r5, r11 @ r5 := p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
+ sub r6, r6, r10 @ r6 := p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
+ sub r9, r9, r2 @ r9 := p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
+ sub r8, r8, r3 @ r8 := p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
+ sub r7, r7, r4 @ r7 := p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
+
+ add r0, r12, #YcoeffsB
+ stmia r0, {r5 - r9} @ Save p->YcoeffsB[]
+
+ add r1, r12, #YcoeffsA
+ ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0]
+ @ r3 := p->YcoeffsA[1]
+ @ r4 := p->YcoeffsA[2]
+ @ r5 := p->YcoeffsA[3]
+
+ add r6, r14, #YADAPTCOEFFSA-12
+ ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3]
+ @ r7 := p->buf[YADAPTCOEFFSA-2]
+ @ r8 := p->buf[YADAPTCOEFFSA-1]
+ @ r9 := p->buf[YADAPTCOEFFSA]
+
+ sub r5, r5, r6 @ r5 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
+ sub r4, r4, r7 @ r4 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
+ sub r3, r3, r8 @ r3 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
+ sub r2, r2, r9 @ r2 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
+
+ b 2f
+
+
+1: @ *decoded0 < 0
+
+ add r5, r5, r11 @ r5 := p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
+ add r6, r6, r10 @ r6 := p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
+ add r9, r9, r2 @ r9 := p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
+ add r8, r8, r3 @ r9 := p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
+ add r7, r7, r4 @ r8 := p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
+
+ add r0, r12, #YcoeffsB
+ stmia r0, {r5 - r9} @ Save p->YcoeffsB[]
+
+ add r1, r12, #YcoeffsA
+ ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0]
+ @ r3 := p->YcoeffsA[1]
+ @ r4 := p->YcoeffsA[2]
+ @ r5 := p->YcoeffsA[3]
+
+ add r6, r14, #YADAPTCOEFFSA-12
+ ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3]
+ @ r7 := p->buf[YADAPTCOEFFSA-2]
+ @ r8 := p->buf[YADAPTCOEFFSA-1]
+ @ r9 := p->buf[YADAPTCOEFFSA]
+
+ add r5, r5, r6 @ r5 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
+ add r4, r4, r7 @ r4 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
+ add r3, r3, r8 @ r3 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
+ add r2, r2, r9 @ r2 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
+
+2:
+ stmia r1, {r2 - r5} @ Save p->YcoeffsA
+
+3:
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR X
+
+@ Predictor X, Filter A
+
+ ldr r11, [r12, #XlastA] @ r11 := p->XlastA
+
+ add r2, r14, #XDELAYA-12 @ r2 := &p->buf[XDELAYA-3]
+ ldmia r2, {r2, r3, r10} @ r2 := p->buf[XDELAYA-3]
+ @ r3 := p->buf[XDELAYA-2]
+ @ r10 := p->buf[XDELAYA-1]
+
+ add r6, r12, #XcoeffsA
+ ldmia r6, {r6 - r9} @ r6 := p->XcoeffsA[0]
+ @ r7 := p->XcoeffsA[1]
+ @ r8 := p->XcoeffsA[2]
+ @ r9 := p->XcoeffsA[3]
+
+ subs r10, r11, r10 @ r10 := r11 - r10
+
+ STR2OFS r10, r11, r14, #XDELAYA-4
+ @ p->buf[XDELAYA-1] = r10
+ @ p->buf[XDELAYA] = r11
+
+ mul r0, r11, r6 @ r0 := p->buf[XDELAYA] * p->XcoeffsA[0]
+ mla r0, r10, r7, r0 @ r0 += p->buf[XDELAYA-1] * p->XcoeffsA[1]
+ mla r0, r3, r8, r0 @ r0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
+ mla r0, r2, r9, r0 @ r0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
+
+ @ flags were set above, in the subs instruction
+ mvngt r10, #0
+ movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
+
+ cmp r11, #0
+ mvngt r11, #0
+ movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
+
+ STR2OFS r10, r11, r14, #XADAPTCOEFFSA-4
+ @ p->buf[XADAPTCOEFFSA-1] := r10
+ @ p->buf[XADAPTCOEFFSA] := r11
+
+ @ NOTE: r0 now contains predictionA - don't overwrite.
+
+@ Predictor X, Filter B
+
+ LDR2OFS r6, r7, r12, #XfilterB @ r6 := p->XfilterB
+ @ r7 := p->YfilterA
+
+ add r2, r14, #XDELAYB-16 @ r2 := &p->buf[XDELAYB-4]
+ ldmia r2, {r2 - r4, r10} @ r2 := p->buf[XDELAYB-4]
+ @ r3 := p->buf[XDELAYB-3]
+ @ r4 := p->buf[XDELAYB-2]
+ @ r10 := p->buf[XDELAYB-1]
+
+ rsb r6, r6, r6, lsl #5 @ r6 := r2 * 32 - r6 ( == r6*31)
+ sub r11, r7, r6, asr #5 @ r11 (p->buf[XDELAYB]) := r7 - (r6 >> 5)
+
+ str r7, [r12, #XfilterB] @ p->XfilterB := r7 (p->YfilterA)
+
+ add r5, r12, #XcoeffsB
+ ldmia r5, {r5 - r9} @ r5 := p->XcoeffsB[0]
+ @ r6 := p->XcoeffsB[1]
+ @ r7 := p->XcoeffsB[2]
+ @ r8 := p->XcoeffsB[3]
+ @ r9 := p->XcoeffsB[4]
+
+ subs r10, r11, r10 @ r10 := r11 - r10
+
+ STR2OFS r10, r11, r14, #XDELAYB-4
+ @ p->buf[XDELAYB-1] = r10
+ @ p->buf[XDELAYB] = r11
+
+ mul r1, r11, r5 @ r1 := p->buf[XDELAYB] * p->XcoeffsB[0]
+ mla r1, r10, r6, r1 @ r1 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
+ mla r1, r4, r7, r1 @ r1 += p->buf[XDELAYB-2] * p->XcoeffsB[2]
+ mla r1, r3, r8, r1 @ r1 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
+ mla r1, r2, r9, r1 @ r1 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
+
+ @ flags were set above, in the subs instruction
+ mvngt r10, #0
+ movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
+
+ cmp r11, #0
+ mvngt r11, #0
+ movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
+
+ STR2OFS r10, r11, r14, #XADAPTCOEFFSB-4
+ @ p->buf[XADAPTCOEFFSB-1] := r10
+ @ p->buf[XADAPTCOEFFSB] := r11
+
+ @ r0 still contains predictionA
+ @ r1 contains predictionB
+
+ @ Finish Predictor X
+
+ ldr r2, [sp, #4] @ r2 := decoded1
+ add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1)
+ ldr r4, [r12, #XfilterA] @ r4 := p->XfilterA
+ ldr r3, [r2] @ r3 := *decoded1
+ rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31)
+ add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10)
+ str r1, [r12, #XlastA] @ p->XlastA := r1
+ add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5)
+ str r1, [r12, #XfilterA] @ p->XfilterA := r1
+
+ @ r1 contains p->XfilterA
+ @ r2 contains decoded1
+ @ r3 contains *decoded1
+
+ @ r5, r6, r7, r8, r9 contain p->XcoeffsB[0..4]
+ @ r10, r11 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB]
+
+ str r1, [r2], #4 @ *(decoded1++) := r1 (p->XfilterA)
+ str r2, [sp, #4] @ save decoded1
+ cmp r3, #0
+ beq 3f
+
+ add r2, r14, #XADAPTCOEFFSB-16
+ ldmia r2, {r2 - r4} @ r2 := p->buf[XADAPTCOEFFSB-4]
+ @ r3 := p->buf[XADAPTCOEFFSB-3]
+ @ r4 := p->buf[XADAPTCOEFFSB-2]
+ blt 1f
+
+ @ *decoded1 > 0
+
+ sub r5, r5, r11 @ r5 := p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
+ sub r6, r6, r10 @ r6 := p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
+ sub r9, r9, r2 @ r9 := p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
+ sub r8, r8, r3 @ r8 := p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
+ sub r7, r7, r4 @ r7 := p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
+
+ add r0, r12, #XcoeffsB
+ stmia r0, {r5 - r9} @ Save p->XcoeffsB[]
+
+ add r1, r12, #XcoeffsA
+ ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0]
+ @ r3 := p->XcoeffsA[1]
+ @ r4 := p->XcoeffsA[2]
+ @ r5 := p->XcoeffsA[3]
+
+ add r6, r14, #XADAPTCOEFFSA-12
+ ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3]
+ @ r7 := p->buf[XADAPTCOEFFSA-2]
+ @ r8 := p->buf[XADAPTCOEFFSA-1]
+ @ r9 := p->buf[XADAPTCOEFFSA]
+
+ sub r5, r5, r6 @ r5 := p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3]
+ sub r4, r4, r7 @ r4 := p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2]
+ sub r3, r3, r8 @ r3 := p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1]
+ sub r2, r2, r9 @ r2 := p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA]
+
+ b 2f
+
+
+1: @ *decoded1 < 0
+
+ add r5, r5, r11 @ r5 := p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
+ add r6, r6, r10 @ r6 := p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
+ add r9, r9, r2 @ r9 := p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
+ add r8, r8, r3 @ r8 := p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
+ add r7, r7, r4 @ r7 := p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
+
+ add r0, r12, #XcoeffsB
+ stmia r0, {r5 - r9} @ Save p->XcoeffsB[]
+
+ add r1, r12, #XcoeffsA
+ ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0]
+ @ r3 := p->XcoeffsA[1]
+ @ r4 := p->XcoeffsA[2]
+ @ r5 := p->XcoeffsA[3]
+
+ add r6, r14, #XADAPTCOEFFSA-12
+ ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3]
+ @ r7 := p->buf[XADAPTCOEFFSA-2]
+ @ r8 := p->buf[XADAPTCOEFFSA-1]
+ @ r9 := p->buf[XADAPTCOEFFSA]
+
+ add r5, r5, r6 @ r5 := p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3]
+ add r4, r4, r7 @ r4 := p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2]
+ add r3, r3, r8 @ r3 := p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1]
+ add r2, r2, r9 @ r2 := p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
+
+2:
+ stmia r1, {r2 - r5} @ Save p->XcoeffsA
+
+3:
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON
+
+ add r14, r14, #4 @ p->buf++
+
+ add r11, r12, #historybuffer @ r11 := &p->historybuffer[0]
+
+ sub r10, r14, #PREDICTOR_HISTORY_SIZE*4
+ @ r10 := p->buf - PREDICTOR_HISTORY_SIZE
+
+ ldr r0, [sp, #8]
+ cmp r10, r11
+ beq move_hist @ The history buffer is full, we need to do a memmove
+
+ @ Check loop count
+ subs r0, r0, #1
+ strne r0, [sp, #8]
+ bne loop
+
+done:
+ str r14, [r12] @ Save value of p->buf
+ add sp, sp, #12 @ Don't bother restoring r1-r3
+ ldmia sp!, {r4 - r11, pc}
+
+move_hist:
+ @ dest = r11 (p->historybuffer)
+ @ src = r14 (p->buf)
+ @ n = 200
+
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+
+ ldr r0, [sp, #8]
+ add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0]
+
+ @ Check loop count
+ subs r0, r0, #1
+ strne r0, [sp, #8]
+ bne loop
+
+ b done
+ .size predictor_decode_stereo, .-predictor_decode_stereo
+
+ .global predictor_decode_mono
+ .type predictor_decode_mono,%function
+
+@ Register usage:
+@
+@ r0-r11 - scratch
+@ r12 - struct predictor_t* p
+@ r14 - int32_t* p->buf
+
+@ void predictor_decode_mono(struct predictor_t* p,
+@ int32_t* decoded0,
+@ int count)
+
+predictor_decode_mono:
+ stmdb sp!, {r1, r2, r4-r11, lr}
+
+ @ r1 (decoded0) is [sp]
+ @ r2 (count) is [sp, #4]
+
+ mov r12, r0 @ r12 := p
+ ldr r14, [r0] @ r14 := p->buf
+
+loopm:
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR
+
+ ldr r11, [r12, #YlastA] @ r11 := p->YlastA
+
+ add r2, r14, #YDELAYA-12 @ r2 := &p->buf[YDELAYA-3]
+ ldmia r2, {r2, r3, r10} @ r2 := p->buf[YDELAYA-3]
+ @ r3 := p->buf[YDELAYA-2]
+ @ r10 := p->buf[YDELAYA-1]
+
+ add r5, r12, #YcoeffsA @ r5 := &p->YcoeffsA[0]
+ ldmia r5, {r6 - r9} @ r6 := p->YcoeffsA[0]
+ @ r7 := p->YcoeffsA[1]
+ @ r8 := p->YcoeffsA[2]
+ @ r9 := p->YcoeffsA[3]
+
+ subs r10, r11, r10 @ r10 := r11 - r10
+
+ STR2OFS r10, r11, r14, #YDELAYA-4
+ @ p->buf[YDELAYA-1] = r10
+ @ p->buf[YDELAYA] = r11
+
+ mul r0, r11, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0]
+ mla r0, r10, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
+ mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
+ mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
+
+ @ flags were set above, in the subs instruction
+ mvngt r10, #0
+ movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
+
+ cmp r11, #0
+ mvngt r11, #0
+ movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
+
+ STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4
+ @ p->buf[YADAPTCOEFFSA-1] := r10
+ @ p->buf[YADAPTCOEFFSA] := r11
+
+ ldr r2, [sp] @ r2 := decoded0
+ ldr r4, [r12, #YfilterA] @ r4 := p->YfilterA
+ ldr r3, [r2] @ r3 := *decoded0
+ rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31)
+ add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10)
+ str r1, [r12, #YlastA] @ p->YlastA := r1
+ add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5)
+ str r1, [r12, #YfilterA] @ p->YfilterA := r1
+
+ @ r1 contains p->YfilterA
+ @ r2 contains decoded0
+ @ r3 contains *decoded0
+
+ @ r6, r7, r8, r9 contain p->YcoeffsA[0..3]
+ @ r10, r11 contain p->buf[YADAPTCOEFFSA-1] and p->buf[YADAPTCOEFFSA]
+
+ str r1, [r2], #4 @ *(decoded0++) := r1 (p->YfilterA)
+ str r2, [sp] @ save decoded0
+ cmp r3, #0
+ beq 3f
+
+ LDR2OFS r2, r3, r14, #YADAPTCOEFFSA-12
+ @ r2 := p->buf[YADAPTCOEFFSA-3]
+ @ r3 := p->buf[YADAPTCOEFFSA-2]
+ blt 1f
+
+ @ *decoded0 > 0
+
+ sub r6, r6, r11 @ r6 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
+ sub r7, r7, r10 @ r7 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
+ sub r9, r9, r2 @ r9 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
+ sub r8, r8, r3 @ r8 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
+
+ b 2f
+
+1: @ *decoded0 < 0
+
+ add r6, r6, r11 @ r6 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
+ add r7, r7, r10 @ r7 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
+ add r9, r9, r2 @ r9 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
+ add r8, r8, r3 @ r8 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
+
+2:
+ stmia r5, {r6 - r9} @ Save p->YcoeffsA
+
+3:
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON
+
+ add r14, r14, #4 @ p->buf++
+
+ add r11, r12, #historybuffer @ r11 := &p->historybuffer[0]
+
+ sub r10, r14, #PREDICTOR_HISTORY_SIZE*4
+ @ r10 := p->buf - PREDICTOR_HISTORY_SIZE
+
+ ldr r0, [sp, #4]
+ cmp r10, r11
+ beq move_histm @ The history buffer is full, we need to do a memmove
+
+ @ Check loop count
+ subs r0, r0, #1
+ strne r0, [sp, #4]
+ bne loopm
+
+donem:
+ str r14, [r12] @ Save value of p->buf
+ add sp, sp, #8 @ Don't bother restoring r1, r2
+ ldmia sp!, {r4 - r11, pc}
+
+move_histm:
+ @ dest = r11 (p->historybuffer)
+ @ src = r14 (p->buf)
+ @ n = 200
+
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+
+ ldr r0, [sp, #4]
+ add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0]
+
+ @ Check loop count
+ subs r0, r0, #1
+ strne r0, [sp, #4]
+ bne loopm
+
+ b donem
+ .size predictor_decode_mono, .-predictor_decode_mono
diff --git a/libdemac/predictor-cf.S b/libdemac/predictor-cf.S
new file mode 100644
index 00000000..341e57f1
--- /dev/null
+++ b/libdemac/predictor-cf.S
@@ -0,0 +1,659 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: predictor-cf.S 19296 2008-12-02 02:26:04Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+Coldfire predictor copyright (C) 2007 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+#include "demac_config.h"
+
+/* NOTE: The following need to be kept in sync with parser.h */
+
+#define YDELAYA 200
+#define YDELAYB 168
+#define XDELAYA 136
+#define XDELAYB 104
+#define YADAPTCOEFFSA 72
+#define XADAPTCOEFFSA 56
+#define YADAPTCOEFFSB 40
+#define XADAPTCOEFFSB 20
+
+/* struct predictor_t members: */
+#define buf 0 /* int32_t* buf */
+
+#define YlastA 4 /* int32_t YlastA; */
+#define XlastA 8 /* int32_t XlastA; */
+
+#define YfilterB 12 /* int32_t YfilterB; */
+#define XfilterA 16 /* int32_t XfilterA; */
+
+#define XfilterB 20 /* int32_t XfilterB; */
+#define YfilterA 24 /* int32_t YfilterA; */
+
+#define YcoeffsA 28 /* int32_t YcoeffsA[4]; */
+#define XcoeffsA 44 /* int32_t XcoeffsA[4]; */
+#define YcoeffsB 60 /* int32_t YcoeffsB[5]; */
+#define XcoeffsB 80 /* int32_t XcoeffsB[5]; */
+
+#define historybuffer 100 /* int32_t historybuffer[] */
+
+
+ .text
+
+ .align 2
+
+ .global predictor_decode_stereo
+ .type predictor_decode_stereo,@function
+
+| void predictor_decode_stereo(struct predictor_t* p,
+| int32_t* decoded0,
+| int32_t* decoded1,
+| int count)
+
+predictor_decode_stereo:
+ lea.l (-12*4,%sp), %sp
+ movem.l %d2-%d7/%a2-%a6, (4,%sp)
+
+ movem.l (12*4+8,%sp), %a3-%a5 | %a3 = decoded0
+ | %a4 = decoded1
+ move.l %a5, (%sp) | (%sp) = count
+
+ move.l #0, %macsr | signed integer mode
+ move.l (12*4+4,%sp), %a6 | %a6 = p
+ move.l (%a6), %a5 | %a5 = p->buf
+
+.loop:
+
+ | ***** PREDICTOR Y *****
+
+ | Predictor Y, Filter A
+
+ move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
+
+ movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3]
+ | %d1 = p->buf[YDELAYA-2]
+ | %d2 = p->buf[YDELAYA-1]
+
+ move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3
+
+ sub.l %d3, %d2
+ neg.l %d2 | %d2 = %d3 - %d2
+
+ move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2
+
+ movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
+ | %d5 = p->YcoeffsA[1]
+ | %d6 = p->YcoeffsA[2]
+ | %d7 = p->YcoeffsA[3]
+
+ mac.l %d3, %d4, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0]
+ mac.l %d2, %d5, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
+ mac.l %d1, %d6, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
+ mac.l %d0, %d7, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
+
+ tst.l %d2
+ beq.s 1f
+ spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
+ extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
+ or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
+1: | %d2 = SIGN(%d2)
+ move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2
+
+ tst.l %d3
+ beq.s 1f
+ spl.b %d3
+ extb.l %d3
+ or.l #1, %d3
+1: | %d3 = SIGN(%d3)
+ move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3
+
+ | Predictor Y, Filter B
+
+ movem.l (YfilterB,%a6), %d2-%d3 | %d2 = p->YfilterB
+ | %d3 = p->XfilterA
+ move.l %d3, (YfilterB,%a6) | p->YfilterB = %d3
+
+ move.l %d2, %d1 | %d1 = %d2
+ lsl.l #5, %d2 | %d2 = %d2 * 32
+ sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2)
+ asr.l #5, %d2 | %d2 >>= 5
+ sub.l %d2, %d3 | %d3 -= %d2
+
+ movem.l (YDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[YDELAYB-4]
+ | %d5 = p->buf[YDELAYB-3]
+ | %d6 = p->buf[YDELAYB-2]
+ | %d7 = p->buf[YDELAYB-1]
+ sub.l %d3, %d7
+ neg.l %d7 | %d7 = %d3 - %d7
+
+ move.l %d7, (YDELAYB-4,%a5) | p->buf[YDELAYB-1] = %d7
+
+ movem.l (YcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->YcoeffsB[0]
+ | %d2 = p->YcoeffsB[1]
+ | %a0 = p->YcoeffsB[2]
+ | %a1 = p->YcoeffsB[3]
+ | %a2 = p->YcoeffsB[4]
+
+ mac.l %d3, %d1, %acc1 | %acc1 = p->buf[YDELAYB] * p->YcoeffsB[0]
+ mac.l %d7, %d2, %acc1 | %acc1 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
+ mac.l %d6, %a0, %acc1 | %acc1 += p->buf[YDELAYB-2] * p->YcoeffsB[2]
+ mac.l %d5, %a1, %acc1 | %acc1 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
+ mac.l %d4, %a2, %acc1 | %acc1 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
+
+ move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3
+
+ tst.l %d7
+ beq.s 1f
+ spl.b %d7
+ extb.l %d7
+ or.l #1, %d7
+1: | %d7 = SIGN(%d7)
+ move.l %d7, (YADAPTCOEFFSB-4,%a5) | p->buf[YADAPTCOEFFSB-1] = %d7
+ tst.l %d3
+ beq.s 1f
+ spl.b %d3
+ extb.l %d3
+ or.l #1, %d3
+1: | %d3 = SIGN(%d3)
+ move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3
+
+ | %d1, %d2, %a0, %a1, %a2 contain p->YcoeffsB[0..4]
+ | %d7, %d3 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB]
+
+ move.l (%a3), %d0 | %d0 = *decoded0
+ beq.s 3f
+
+ movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[YADAPTCOEFFSB-4]
+ | %d5 = p->buf[YADAPTCOEFFSB-3]
+ | %d6 = p->buf[YADAPTCOEFFSB-2]
+
+ bmi.s 1f | flags still valid here
+
+ | *decoded0 > 0
+
+ sub.l %d3, %d1 | %d1 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
+ sub.l %d7, %d2 | %d2 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
+ sub.l %d6, %a0 | %a0 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
+ sub.l %d5, %a1 | %a1 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
+ sub.l %d4, %a2 | %a2 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
+
+ movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[]
+
+ movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
+ | %d5 = p->YcoeffsA[1]
+ | %d6 = p->YcoeffsA[2]
+ | %d7 = p->YcoeffsA[3]
+
+ movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
+ | %d2 = p->buf[YADAPTCOEFFSA-3]
+ | %a0 = p->buf[YADAPTCOEFFSA-2]
+ | %a1 = p->buf[YADAPTCOEFFSA-1]
+ | %a2 = p->buf[YADAPTCOEFFSA]
+
+ sub.l %a2, %d4 | %d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
+ sub.l %a1, %d5 | %d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
+ sub.l %a0, %d6 | %d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
+ sub.l %d2, %d7 | %d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
+
+ bra.s 2f
+
+1: | *decoded0 < 0
+
+ add.l %d3, %d1 | %d1 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
+ add.l %d7, %d2 | %d2 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
+ add.l %d6, %a0 | %a0 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
+ add.l %d5, %a1 | %a1 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
+ add.l %d4, %a2 | %a2 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
+
+ movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[]
+
+ movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
+ | %d5 = p->YcoeffsA[1]
+ | %d6 = p->YcoeffsA[2]
+ | %d7 = p->YcoeffsA[3]
+
+ movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
+ | %d2 = p->buf[YADAPTCOEFFSA-3]
+ | %a0 = p->buf[YADAPTCOEFFSA-2]
+ | %a1 = p->buf[YADAPTCOEFFSA-1]
+ | %a2 = p->buf[YADAPTCOEFFSA]
+
+ add.l %a2, %d4 | %d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
+ add.l %a1, %d5 | %d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
+ add.l %a0, %d6 | %d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
+ add.l %d2, %d7 | %d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
+
+2:
+ movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[]
+
+3:
+ | Finish Predictor Y
+
+ movclr.l %acc0, %d1 | %d1 = predictionA
+ movclr.l %acc1, %d2 | %d2 = predictionB
+ asr.l #1, %d2
+ add.l %d2, %d1 | %d1 += (%d2 >> 1)
+ asr.l #8, %d1
+ asr.l #2, %d1 | %d1 >>= 10
+ add.l %d0, %d1 | %d1 += %d0
+ move.l %d1, (YlastA,%a6) | p->YlastA = %d1
+
+ move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA
+ move.l %d2, %d0
+ lsl.l #5, %d2
+ sub.l %d0, %d2 | %d2 = 31 * %d2
+ asr.l #5, %d2 | %d2 >>= 5
+ add.l %d1, %d2
+ move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2
+
+ | *decoded0 stored 2 instructions down, avoiding pipeline stall
+
+ | ***** PREDICTOR X *****
+
+ | Predictor X, Filter A
+
+ move.l (XlastA,%a6), %d3 | %d3 = p->XlastA
+
+ move.l %d2, (%a3)+ | *(decoded0++) = %d2 (p->YfilterA)
+
+ movem.l (XDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[XDELAYA-3]
+ | %d1 = p->buf[XDELAYA-2]
+ | %d2 = p->buf[XDELAYA-1]
+
+ move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3
+
+ sub.l %d3, %d2
+ neg.l %d2 | %d2 = %d3 -%d2
+
+ move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2
+
+ movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
+ | %d5 = p->XcoeffsA[1]
+ | %d6 = p->XcoeffsA[2]
+ | %d7 = p->XcoeffsA[3]
+
+ mac.l %d3, %d4, %acc0 | %acc0 = p->buf[XDELAYA] * p->XcoeffsA[0]
+ mac.l %d2, %d5, %acc0 | %acc0 += p->buf[XDELAYA-1] * p->XcoeffsA[1]
+ mac.l %d1, %d6, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
+ mac.l %d0, %d7, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
+
+ tst.l %d2
+ beq.s 1f
+ spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
+ extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
+ or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
+1: | %d2 = SIGN(%d2)
+ move.l %d2, (XADAPTCOEFFSA-4,%a5) | p->buf[XADAPTCOEFFSA-1] = %d2
+
+ tst.l %d3
+ beq.s 1f
+ spl.b %d3
+ extb.l %d3
+ or.l #1, %d3
+1: | %d3 = SIGN(%d3)
+ move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = %d3
+
+ | Predictor X, Filter B
+
+ movem.l (XfilterB,%a6), %d2-%d3 | %d2 = p->XfilterB
+ | %d3 = p->YfilterA
+ move.l %d3, (XfilterB,%a6) | p->XfilterB = %d3
+
+ move.l %d2, %d1 | %d1 = %d2
+ lsl.l #5, %d2 | %d2 = %d2 * 32
+ sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2)
+ asr.l #5, %d2 | %d2 >>= 5
+ sub.l %d2, %d3 | %d3 -= %d2
+
+ movem.l (XDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[XDELAYB-4]
+ | %d5 = p->buf[XDELAYB-3]
+ | %d6 = p->buf[XDELAYB-2]
+ | %d7 = p->buf[XDELAYB-1]
+ sub.l %d3, %d7
+ neg.l %d7 | %d7 = %d3 - %d7
+
+ move.l %d7, (XDELAYB-4,%a5) | p->buf[XDELAYB-1] = %d7
+
+ movem.l (XcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->XcoeffsB[0]
+ | %d2 = p->XcoeffsB[1]
+ | %a0 = p->XcoeffsB[2]
+ | %a1 = p->XcoeffsB[3]
+ | %a2 = p->XcoeffsB[4]
+
+ mac.l %d3, %d1, %acc1 | %acc1 = p->buf[XDELAYB] * p->XcoeffsB[0]
+ mac.l %d7, %d2, %acc1 | %acc1 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
+ mac.l %d6, %a0, %acc1 | %acc1 += p->buf[XDELAYB-2] * p->XcoeffsB[2]
+ mac.l %d5, %a1, %acc1 | %acc1 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
+ mac.l %d4, %a2, %acc1 | %acc1 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
+
+ move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3
+
+ tst.l %d7
+ beq.s 1f
+ spl.b %d7
+ extb.l %d7
+ or.l #1, %d7
+1: | %d7 = SIGN(%d7)
+ move.l %d7, (XADAPTCOEFFSB-4,%a5) | p->buf[XADAPTCOEFFSB-1] = %d7
+
+ tst.l %d3
+ beq.s 1f
+ spl.b %d3
+ extb.l %d3
+ or.l #1, %d3
+1: | %d3 = SIGN(%d3)
+ move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3
+
+ | %d1, %d2, %a0, %a1, %a2 contain p->XcoeffsB[0..4]
+ | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB]
+
+ move.l (%a4), %d0 | %d0 = *decoded1
+ beq.s 3f
+
+ movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[XADAPTCOEFFSB-4]
+ | %d5 = p->buf[XADAPTCOEFFSB-3]
+ | %d6 = p->buf[XADAPTCOEFFSB-2]
+
+ bmi.s 1f | flags still valid here
+
+ | *decoded1 > 0
+
+ sub.l %d3, %d1 | %d1 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
+ sub.l %d7, %d2 | %d2 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
+ sub.l %d6, %a0 | %a0 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
+ sub.l %d5, %a1 | %a1 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
+ sub.l %d4, %a2 | %a2 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
+
+ movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[]
+
+ movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
+ | %d5 = p->XcoeffsA[1]
+ | %d6 = p->XcoeffsA[2]
+ | %d7 = p->XcoeffsA[3]
+
+ movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
+ | %d2 = p->buf[XADAPTCOEFFSA-3]
+ | %a0 = p->buf[XADAPTCOEFFSA-2]
+ | %a1 = p->buf[XADAPTCOEFFSA-1]
+ | %a2 = p->buf[XADAPTCOEFFSA]
+
+ sub.l %a2, %d4 | %d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA]
+ sub.l %a1, %d5 | %d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1]
+ sub.l %a0, %d6 | %d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2]
+ sub.l %d2, %d7 | %d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3]
+
+ bra.s 2f
+
+1: | *decoded1 < 0
+
+ add.l %d3, %d1 | %d1 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
+ add.l %d7, %d2 | %d2 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
+ add.l %d6, %a0 | %a0 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
+ add.l %d5, %a1 | %a1 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
+ add.l %d4, %a2 | %a2 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
+
+ movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[]
+
+ movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
+ | %d5 = p->XcoeffsA[1]
+ | %d6 = p->XcoeffsA[2]
+ | %d7 = p->XcoeffsA[3]
+
+ movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
+ | %d2 = p->buf[XADAPTCOEFFSA-3]
+ | %a0 = p->buf[XADAPTCOEFFSA-2]
+ | %a1 = p->buf[XADAPTCOEFFSA-1]
+ | %a2 = p->buf[XADAPTCOEFFSA]
+
+ add.l %a2, %d4 | %d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
+ add.l %a1, %d5 | %d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1]
+ add.l %a0, %d6 | %d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2]
+ add.l %d2, %d7 | %d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3]
+
+2:
+ movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[]
+
+3:
+ | Finish Predictor X
+
+ movclr.l %acc0, %d1 | %d1 = predictionA
+ movclr.l %acc1, %d2 | %d2 = predictionB
+ asr.l #1, %d2
+ add.l %d2, %d1 | %d1 += (%d2 >> 1)
+ asr.l #8, %d1
+ asr.l #2, %d1 | %d1 >>= 10
+ add.l %d0, %d1 | %d1 += %d0
+ move.l %d1, (XlastA,%a6) | p->XlastA = %d1
+
+ move.l (XfilterA,%a6), %d2 | %d2 = p->XfilterA
+ move.l %d2, %d0
+ lsl.l #5, %d2
+ sub.l %d0, %d2 | %d2 = 31 * %d2
+ asr.l #5, %d2 | %d6 >>= 2
+ add.l %d1, %d2
+ move.l %d2, (XfilterA,%a6) | p->XfilterA = %d2
+
+ | *decoded1 stored 3 instructions down, avoiding pipeline stall
+
+ | ***** COMMON *****
+
+ addq.l #4, %a5 | p->buf++
+ lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a2
+ | %a2 = &p->historybuffer[PREDICTOR_HISTORY_SIZE]
+
+ move.l %d2, (%a4)+ | *(decoded1++) = %d2 (p->XfilterA)
+
+ cmp.l %a2, %a5
+ beq.s .move_hist | History buffer is full, we need to do a memmove
+
+ subq.l #1, (%sp) | decrease loop count
+ bne.w .loop
+
+.done:
+ move.l %a5, (%a6) | Save value of p->buf
+ movem.l (4,%sp), %d2-%d7/%a2-%a6
+ lea.l (12*4,%sp), %sp
+ rts
+
+.move_hist:
+ lea.l (historybuffer,%a6), %a2
+
+ | dest = %a2 (p->historybuffer)
+ | src = %a5 (p->buf)
+ | n = 200
+
+ movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes
+ movem.l %d0-%d7/%a0-%a1, (%a2)
+ movem.l (40,%a5), %d0-%d7/%a0-%a1 | 40 bytes
+ movem.l %d0-%d7/%a0-%a1, (40,%a2)
+ movem.l (80,%a5), %d0-%d7/%a0-%a1 | 40 bytes
+ movem.l %d0-%d7/%a0-%a1, (80,%a2)
+ movem.l (120,%a5), %d0-%d7/%a0-%a1 | 40 bytes
+ movem.l %d0-%d7/%a0-%a1, (120,%a2)
+ movem.l (160,%a5), %d0-%d7/%a0-%a1 | 40 bytes
+ movem.l %d0-%d7/%a0-%a1, (160,%a2)
+
+ move.l %a2, %a5 | p->buf = &p->historybuffer[0]
+
+ subq.l #1, (%sp) | decrease loop count
+ bne.w .loop
+
+ bra.s .done
+ .size predictor_decode_stereo, .-predictor_decode_stereo
+
+
+ .global predictor_decode_mono
+ .type predictor_decode_mono,@function
+
+| void predictor_decode_mono(struct predictor_t* p,
+| int32_t* decoded0,
+| int count)
+
+predictor_decode_mono:
+ lea.l (-11*4,%sp), %sp
+ movem.l %d2-%d7/%a2-%a6, (%sp)
+
+ move.l #0, %macsr | signed integer mode
+
+ move.l (11*4+4,%sp), %a6 | %a6 = p
+ move.l (11*4+8,%sp), %a4 | %a4 = decoded0
+ move.l (11*4+12,%sp), %d7 | %d7 = count
+ move.l (%a6), %a5 | %a5 = p->buf
+
+ move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
+
+.loopm:
+
+ | ***** PREDICTOR *****
+
+ movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3]
+ | %d1 = p->buf[YDELAYA-2]
+ | %d2 = p->buf[YDELAYA-1]
+
+ move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3
+
+ sub.l %d3, %d2
+ neg.l %d2 | %d2 = %d3 - %d2
+
+ move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2
+
+ movem.l (YcoeffsA,%a6), %a0-%a3 | %a0 = p->YcoeffsA[0]
+ | %a1 = p->YcoeffsA[1]
+ | %a2 = p->YcoeffsA[2]
+ | %a3 = p->YcoeffsA[3]
+
+ mac.l %d3, %a0, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0]
+ mac.l %d2, %a1, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
+ mac.l %d1, %a2, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
+ mac.l %d0, %a3, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
+
+ tst.l %d2
+ beq.s 1f
+ spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
+ extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
+ or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
+1: | %d2 = SIGN(%d2)
+ move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2
+
+ tst.l %d3
+ beq.s 1f
+ spl.b %d3
+ extb.l %d3
+ or.l #1, %d3
+1: | %d3 = SIGN(%d3)
+ move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3
+
+ move.l (%a4), %d0 | %d0 = *decoded0
+ beq.s 3f
+
+ movem.l (YADAPTCOEFFSA-12,%a5),%d4-%d5 | %d4 = p->buf[YADAPTCOEFFSA-3]
+ | %d5 = p->buf[YADAPTCOEFFSA-2]
+
+ bmi.s 1f | flags still valid here
+
+ | *decoded0 > 0
+
+ sub.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
+ sub.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
+ sub.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
+ sub.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
+
+ bra.s 2f
+
+1: | *decoded0 < 0
+
+ add.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
+ add.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
+ add.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
+ add.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
+
+2:
+ movem.l %a0-%a3, (YcoeffsA,%a6) | save p->YcoeffsA[]
+
+3:
+ | Finish Predictor
+
+ movclr.l %acc0, %d3 | %d3 = predictionA
+ asr.l #8, %d3
+ asr.l #2, %d3 | %d3 >>= 10
+ add.l %d0, %d3 | %d3 += %d0
+
+ move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA
+ move.l %d2, %d0
+ lsl.l #5, %d2
+ sub.l %d0, %d2 | %d2 = 31 * %d2
+ asr.l #5, %d2 | %d2 >>= 5
+ add.l %d3, %d2
+ move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2
+
+ | *decoded0 stored 3 instructions down, avoiding pipeline stall
+
+ | ***** COMMON *****
+
+ addq.l #4, %a5 | p->buf++
+ lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a3
+ | %a3 = &p->historybuffer[PREDICTOR_HISTORY_SIZE]
+
+ move.l %d2, (%a4)+ | *(decoded0++) = %d2 (p->YfilterA)
+
+ cmp.l %a3, %a5
+ beq.s .move_histm | History buffer is full, we need to do a memmove
+
+ subq.l #1, %d7 | decrease loop count
+ bne.w .loopm
+
+ move.l %d3, (YlastA,%a6) | %d3 = p->YlastA
+
+.donem:
+ move.l %a5, (%a6) | Save value of p->buf
+ movem.l (%sp), %d2-%d7/%a2-%a6
+ lea.l (11*4,%sp), %sp
+ rts
+
+.move_histm:
+ move.l %d3, (YlastA,%a6) | %d3 = p->YlastA
+
+ lea.l (historybuffer,%a6), %a3
+
+ | dest = %a3 (p->historybuffer)
+ | src = %a5 (p->buf)
+ | n = 200
+
+ movem.l (%a5), %d0-%d6/%a0-%a2 | 40 bytes
+ movem.l %d0-%d6/%a0-%a2, (%a3)
+ movem.l (40,%a5), %d0-%d6/%a0-%a2 | 40 bytes
+ movem.l %d0-%d6/%a0-%a2, (40,%a3)
+ movem.l (80,%a5), %d0-%d6/%a0-%a2 | 40 bytes
+ movem.l %d0-%d6/%a0-%a2, (80,%a3)
+ movem.l (120,%a5), %d0-%d6/%a0-%a2 | 40 bytes
+ movem.l %d0-%d6/%a0-%a2, (120,%a3)
+ movem.l (160,%a5), %d0-%d6/%a0-%a2 | 40 bytes
+ movem.l %d0-%d6/%a0-%a2, (160,%a3)
+
+ move.l %a3, %a5 | p->buf = &p->historybuffer[0]
+
+ move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
+
+ subq.l #1, %d7 | decrease loop count
+ bne.w .loopm
+
+ bra.s .donem
+ .size predictor_decode_mono, .-predictor_decode_mono
diff --git a/libdemac/predictor.c b/libdemac/predictor.c
new file mode 100644
index 00000000..7d914b5f
--- /dev/null
+++ b/libdemac/predictor.c
@@ -0,0 +1,271 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: predictor.c 19375 2008-12-09 23:20:59Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "parser.h"
+#include "predictor.h"
+#include "demac_config.h"
+
+/* Return 0 if x is zero, -1 if x is positive, 1 if x is negative */
+#define SIGN(x) (x) ? (((x) > 0) ? -1 : 1) : 0
+
+static const int32_t initial_coeffs[4] = {
+ 360, 317, -109, 98
+};
+
+#define YDELAYA (18 + PREDICTOR_ORDER*4)
+#define YDELAYB (18 + PREDICTOR_ORDER*3)
+#define XDELAYA (18 + PREDICTOR_ORDER*2)
+#define XDELAYB (18 + PREDICTOR_ORDER)
+
+#define YADAPTCOEFFSA (18)
+#define XADAPTCOEFFSA (14)
+#define YADAPTCOEFFSB (10)
+#define XADAPTCOEFFSB (5)
+
+void init_predictor_decoder(struct predictor_t* p)
+{
+ /* Zero the history buffers */
+ memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(int32_t));
+ p->buf = p->historybuffer;
+
+ /* Initialise and zero the co-efficients */
+ memcpy(p->YcoeffsA, initial_coeffs, sizeof(initial_coeffs));
+ memcpy(p->XcoeffsA, initial_coeffs, sizeof(initial_coeffs));
+ memset(p->YcoeffsB, 0, sizeof(p->YcoeffsB));
+ memset(p->XcoeffsB, 0, sizeof(p->XcoeffsB));
+
+ p->YfilterA = 0;
+ p->YfilterB = 0;
+ p->YlastA = 0;
+
+ p->XfilterA = 0;
+ p->XfilterB = 0;
+ p->XlastA = 0;
+}
+
+#if !defined(CPU_ARM) && !defined(CPU_COLDFIRE)
+void ICODE_ATTR_DEMAC predictor_decode_stereo(struct predictor_t* p,
+ int32_t* decoded0,
+ int32_t* decoded1,
+ int count)
+{
+ int32_t predictionA, predictionB;
+
+ while (LIKELY(count--))
+ {
+ /* Predictor Y */
+ p->buf[YDELAYA] = p->YlastA;
+ p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]);
+
+ p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1];
+ p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]);
+
+ predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) +
+ (p->buf[YDELAYA-1] * p->YcoeffsA[1]) +
+ (p->buf[YDELAYA-2] * p->YcoeffsA[2]) +
+ (p->buf[YDELAYA-3] * p->YcoeffsA[3]);
+
+ /* Apply a scaled first-order filter compression */
+ p->buf[YDELAYB] = p->XfilterA - ((p->YfilterB * 31) >> 5);
+ p->buf[YADAPTCOEFFSB] = SIGN(p->buf[YDELAYB]);
+ p->YfilterB = p->XfilterA;
+
+ p->buf[YDELAYB-1] = p->buf[YDELAYB] - p->buf[YDELAYB-1];
+ p->buf[YADAPTCOEFFSB-1] = SIGN(p->buf[YDELAYB-1]);
+
+ predictionB = (p->buf[YDELAYB] * p->YcoeffsB[0]) +
+ (p->buf[YDELAYB-1] * p->YcoeffsB[1]) +
+ (p->buf[YDELAYB-2] * p->YcoeffsB[2]) +
+ (p->buf[YDELAYB-3] * p->YcoeffsB[3]) +
+ (p->buf[YDELAYB-4] * p->YcoeffsB[4]);
+
+ p->YlastA = *decoded0 + ((predictionA + (predictionB >> 1)) >> 10);
+ p->YfilterA = p->YlastA + ((p->YfilterA * 31) >> 5);
+
+ /* Predictor X */
+
+ p->buf[XDELAYA] = p->XlastA;
+ p->buf[XADAPTCOEFFSA] = SIGN(p->buf[XDELAYA]);
+ p->buf[XDELAYA-1] = p->buf[XDELAYA] - p->buf[XDELAYA-1];
+ p->buf[XADAPTCOEFFSA-1] = SIGN(p->buf[XDELAYA-1]);
+
+ predictionA = (p->buf[XDELAYA] * p->XcoeffsA[0]) +
+ (p->buf[XDELAYA-1] * p->XcoeffsA[1]) +
+ (p->buf[XDELAYA-2] * p->XcoeffsA[2]) +
+ (p->buf[XDELAYA-3] * p->XcoeffsA[3]);
+
+ /* Apply a scaled first-order filter compression */
+ p->buf[XDELAYB] = p->YfilterA - ((p->XfilterB * 31) >> 5);
+ p->buf[XADAPTCOEFFSB] = SIGN(p->buf[XDELAYB]);
+ p->XfilterB = p->YfilterA;
+ p->buf[XDELAYB-1] = p->buf[XDELAYB] - p->buf[XDELAYB-1];
+ p->buf[XADAPTCOEFFSB-1] = SIGN(p->buf[XDELAYB-1]);
+
+ predictionB = (p->buf[XDELAYB] * p->XcoeffsB[0]) +
+ (p->buf[XDELAYB-1] * p->XcoeffsB[1]) +
+ (p->buf[XDELAYB-2] * p->XcoeffsB[2]) +
+ (p->buf[XDELAYB-3] * p->XcoeffsB[3]) +
+ (p->buf[XDELAYB-4] * p->XcoeffsB[4]);
+
+ p->XlastA = *decoded1 + ((predictionA + (predictionB >> 1)) >> 10);
+ p->XfilterA = p->XlastA + ((p->XfilterA * 31) >> 5);
+
+ if (LIKELY(*decoded0 != 0))
+ {
+ if (*decoded0 > 0)
+ {
+ p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA];
+ p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1];
+ p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2];
+ p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3];
+
+ p->YcoeffsB[0] -= p->buf[YADAPTCOEFFSB];
+ p->YcoeffsB[1] -= p->buf[YADAPTCOEFFSB-1];
+ p->YcoeffsB[2] -= p->buf[YADAPTCOEFFSB-2];
+ p->YcoeffsB[3] -= p->buf[YADAPTCOEFFSB-3];
+ p->YcoeffsB[4] -= p->buf[YADAPTCOEFFSB-4];
+ }
+ else
+ {
+ p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA];
+ p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1];
+ p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2];
+ p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3];
+
+ p->YcoeffsB[0] += p->buf[YADAPTCOEFFSB];
+ p->YcoeffsB[1] += p->buf[YADAPTCOEFFSB-1];
+ p->YcoeffsB[2] += p->buf[YADAPTCOEFFSB-2];
+ p->YcoeffsB[3] += p->buf[YADAPTCOEFFSB-3];
+ p->YcoeffsB[4] += p->buf[YADAPTCOEFFSB-4];
+ }
+ }
+
+ *(decoded0++) = p->YfilterA;
+
+ if (LIKELY(*decoded1 != 0))
+ {
+ if (*decoded1 > 0)
+ {
+ p->XcoeffsA[0] -= p->buf[XADAPTCOEFFSA];
+ p->XcoeffsA[1] -= p->buf[XADAPTCOEFFSA-1];
+ p->XcoeffsA[2] -= p->buf[XADAPTCOEFFSA-2];
+ p->XcoeffsA[3] -= p->buf[XADAPTCOEFFSA-3];
+
+ p->XcoeffsB[0] -= p->buf[XADAPTCOEFFSB];
+ p->XcoeffsB[1] -= p->buf[XADAPTCOEFFSB-1];
+ p->XcoeffsB[2] -= p->buf[XADAPTCOEFFSB-2];
+ p->XcoeffsB[3] -= p->buf[XADAPTCOEFFSB-3];
+ p->XcoeffsB[4] -= p->buf[XADAPTCOEFFSB-4];
+ }
+ else
+ {
+ p->XcoeffsA[0] += p->buf[XADAPTCOEFFSA];
+ p->XcoeffsA[1] += p->buf[XADAPTCOEFFSA-1];
+ p->XcoeffsA[2] += p->buf[XADAPTCOEFFSA-2];
+ p->XcoeffsA[3] += p->buf[XADAPTCOEFFSA-3];
+
+ p->XcoeffsB[0] += p->buf[XADAPTCOEFFSB];
+ p->XcoeffsB[1] += p->buf[XADAPTCOEFFSB-1];
+ p->XcoeffsB[2] += p->buf[XADAPTCOEFFSB-2];
+ p->XcoeffsB[3] += p->buf[XADAPTCOEFFSB-3];
+ p->XcoeffsB[4] += p->buf[XADAPTCOEFFSB-4];
+ }
+ }
+
+ *(decoded1++) = p->XfilterA;
+
+ /* Combined */
+ p->buf++;
+
+ /* Have we filled the history buffer? */
+ if (UNLIKELY(p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE)) {
+ memmove(p->historybuffer, p->buf,
+ PREDICTOR_SIZE * sizeof(int32_t));
+ p->buf = p->historybuffer;
+ }
+ }
+}
+
+void ICODE_ATTR_DEMAC predictor_decode_mono(struct predictor_t* p,
+ int32_t* decoded0,
+ int count)
+{
+ int32_t predictionA, currentA, A;
+
+ currentA = p->YlastA;
+
+ while (LIKELY(count--))
+ {
+ A = *decoded0;
+
+ p->buf[YDELAYA] = currentA;
+ p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1];
+
+ predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) +
+ (p->buf[YDELAYA-1] * p->YcoeffsA[1]) +
+ (p->buf[YDELAYA-2] * p->YcoeffsA[2]) +
+ (p->buf[YDELAYA-3] * p->YcoeffsA[3]);
+
+ currentA = A + (predictionA >> 10);
+
+ p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]);
+ p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]);
+
+ if (LIKELY(A != 0))
+ {
+ if (A > 0)
+ {
+ p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA];
+ p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1];
+ p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2];
+ p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3];
+ }
+ else
+ {
+ p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA];
+ p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1];
+ p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2];
+ p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3];
+ }
+ }
+
+ p->buf++;
+
+ /* Have we filled the history buffer? */
+ if (UNLIKELY(p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE)) {
+ memmove(p->historybuffer, p->buf,
+ PREDICTOR_SIZE * sizeof(int32_t));
+ p->buf = p->historybuffer;
+ }
+
+ p->YfilterA = currentA + ((p->YfilterA * 31) >> 5);
+ *(decoded0++) = p->YfilterA;
+ }
+
+ p->YlastA = currentA;
+}
+#endif
diff --git a/libdemac/predictor.h b/libdemac/predictor.h
new file mode 100644
index 00000000..ccf2b39b
--- /dev/null
+++ b/libdemac/predictor.h
@@ -0,0 +1,38 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: predictor.h 19236 2008-11-26 18:01:18Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _APE_PREDICTOR_H
+#define _APE_PREDICTOR_H
+
+#include <inttypes.h>
+#include "parser.h"
+#include "filter.h"
+
+void init_predictor_decoder(struct predictor_t* p);
+void predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0,
+ int32_t* decoded1, int count);
+void predictor_decode_mono(struct predictor_t* p, int32_t* decoded0,
+ int count);
+
+#endif
diff --git a/libdemac/vector_math16_armv5te.h b/libdemac/vector_math16_armv5te.h
new file mode 100644
index 00000000..81a5cb6e
--- /dev/null
+++ b/libdemac/vector_math16_armv5te.h
@@ -0,0 +1,312 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: vector_math16_armv5te.h 19260 2008-11-28 23:50:22Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+ARMv5te vector math copyright (C) 2008 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+/* This version fetches data as 32 bit words, and *requires* v1 to be
+ * 32 bit aligned, otherwise it will result either in a data abort, or
+ * incorrect results (if ARM aligncheck is disabled). */
+static inline void vector_add(int16_t* v1, int16_t* v2)
+{
+#if ORDER > 16
+ int cnt = ORDER>>4;
+#endif
+
+#define ADDHALFREGS(sum, s1) /* Adds register */ \
+ "mov " #s1 ", " #s1 ", ror #16 \n" /* halves straight. */ \
+ "add r8 , " #s1 ", " #sum ", lsl #16 \n" /* Clobbers 's1' */ \
+ "add " #sum ", " #s1 ", " #sum ", lsr #16 \n" /* and r8. */ \
+ "mov " #sum ", " #sum ", lsl #16 \n" \
+ "orr " #sum ", " #sum ", r8 , lsr #16 \n"
+
+#define ADDHALFXREGS(sum, s1, s2) /* Adds register */ \
+ "add " #s1 ", " #s1 ", " #sum ", lsl #16 \n" /* halves across. */ \
+ "add " #sum ", " #s2 ", " #sum ", lsr #16 \n" /* Clobbers 's1'. */ \
+ "mov " #sum ", " #sum ", lsl #16 \n" \
+ "orr " #sum ", " #sum ", " #s1 ", lsr #16 \n"
+
+ asm volatile (
+ "tst %[v2], #2 \n"
+ "beq 20f \n"
+
+ "10: \n"
+ "ldrh r4, [%[v2]], #2 \n"
+ "mov r4, r4, lsl #16 \n"
+ "1: \n"
+ "ldmia %[v1], {r0-r3} \n"
+ "ldmia %[v2]!, {r5-r8} \n"
+ ADDHALFXREGS(r0, r4, r5)
+ ADDHALFXREGS(r1, r5, r6)
+ ADDHALFXREGS(r2, r6, r7)
+ ADDHALFXREGS(r3, r7, r8)
+ "stmia %[v1]!, {r0-r3} \n"
+ "mov r4, r8 \n"
+ "ldmia %[v1], {r0-r3} \n"
+ "ldmia %[v2]!, {r5-r8} \n"
+ ADDHALFXREGS(r0, r4, r5)
+ ADDHALFXREGS(r1, r5, r6)
+ ADDHALFXREGS(r2, r6, r7)
+ ADDHALFXREGS(r3, r7, r8)
+ "stmia %[v1]!, {r0-r3} \n"
+#if ORDER > 16
+ "mov r4, r8 \n"
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+ "b 99f \n"
+
+ "20: \n"
+ "1: \n"
+ "ldmia %[v1], {r0-r3} \n"
+ "ldmia %[v2]!, {r4-r7} \n"
+ ADDHALFREGS(r0, r4)
+ ADDHALFREGS(r1, r5)
+ ADDHALFREGS(r2, r6)
+ ADDHALFREGS(r3, r7)
+ "stmia %[v1]!, {r0-r3} \n"
+ "ldmia %[v1], {r0-r3} \n"
+ "ldmia %[v2]!, {r4-r7} \n"
+ ADDHALFREGS(r0, r4)
+ ADDHALFREGS(r1, r5)
+ ADDHALFREGS(r2, r6)
+ ADDHALFREGS(r3, r7)
+ "stmia %[v1]!, {r0-r3} \n"
+#if ORDER > 16
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+
+ "99: \n"
+ : /* outputs */
+#if ORDER > 16
+ [cnt]"+r"(cnt),
+#endif
+ [v1] "+r"(v1),
+ [v2] "+r"(v2)
+ : /* inputs */
+ : /* clobbers */
+ "r0", "r1", "r2", "r3", "r4",
+ "r5", "r6", "r7", "r8", "memory"
+ );
+}
+
+/* This version fetches data as 32 bit words, and *requires* v1 to be
+ * 32 bit aligned, otherwise it will result either in a data abort, or
+ * incorrect results (if ARM aligncheck is disabled). */
+static inline void vector_sub(int16_t* v1, int16_t* v2)
+{
+#if ORDER > 16
+ int cnt = ORDER>>4;
+#endif
+
+#define SUBHALFREGS(dif, s1) /* Subtracts register */ \
+ "sub r8 , " #dif ", " #s1 "\n" /* halves straight. */ \
+ "and r8 , r8 , r9 \n" /* Needs r9 = 0x0000ffff, */ \
+ "mov " #dif ", " #dif ", lsr #16 \n" /* clobbers r8. */ \
+ "sub " #dif ", " #dif ", " #s1 ", lsr #16 \n" \
+ "orr " #dif ", r8 , " #dif ", lsl #16 \n"
+
+#define SUBHALFXREGS(dif, s1, s2) /* Subtracts register */ \
+ "sub " #s1 ", " #dif ", " #s1 ", lsr #16 \n" /* halves across. */ \
+ "and " #s1 ", " #s1 ", r9 \n" /* Needs r9 = 0x0000ffff, */ \
+ "rsb " #dif ", " #s2 ", " #dif ", lsr #16 \n" /* clobbers 's1'. */ \
+ "orr " #dif ", " #s1 ", " #dif ", lsl #16 \n"
+
+ asm volatile (
+ "mov r9, #0xff \n"
+ "orr r9, r9, #0xff00 \n"
+ "tst %[v2], #2 \n"
+ "beq 20f \n"
+
+ "10: \n"
+ "ldrh r4, [%[v2]], #2 \n"
+ "mov r4, r4, lsl #16 \n"
+ "1: \n"
+ "ldmia %[v1], {r0-r3} \n"
+ "ldmia %[v2]!, {r5-r8} \n"
+ SUBHALFXREGS(r0, r4, r5)
+ SUBHALFXREGS(r1, r5, r6)
+ SUBHALFXREGS(r2, r6, r7)
+ SUBHALFXREGS(r3, r7, r8)
+ "stmia %[v1]!, {r0-r3} \n"
+ "mov r4, r8 \n"
+ "ldmia %[v1], {r0-r3} \n"
+ "ldmia %[v2]!, {r5-r8} \n"
+ SUBHALFXREGS(r0, r4, r5)
+ SUBHALFXREGS(r1, r5, r6)
+ SUBHALFXREGS(r2, r6, r7)
+ SUBHALFXREGS(r3, r7, r8)
+ "stmia %[v1]!, {r0-r3} \n"
+#if ORDER > 16
+ "mov r4, r8 \n"
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+ "b 99f \n"
+
+ "20: \n"
+ "1: \n"
+ "ldmia %[v1], {r0-r3} \n"
+ "ldmia %[v2]!, {r4-r7} \n"
+ SUBHALFREGS(r0, r4)
+ SUBHALFREGS(r1, r5)
+ SUBHALFREGS(r2, r6)
+ SUBHALFREGS(r3, r7)
+ "stmia %[v1]!, {r0-r3} \n"
+ "ldmia %[v1], {r0-r3} \n"
+ "ldmia %[v2]!, {r4-r7} \n"
+ SUBHALFREGS(r0, r4)
+ SUBHALFREGS(r1, r5)
+ SUBHALFREGS(r2, r6)
+ SUBHALFREGS(r3, r7)
+ "stmia %[v1]!, {r0-r3} \n"
+#if ORDER > 16
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+
+ "99: \n"
+ : /* outputs */
+#if ORDER > 16
+ [cnt]"+r"(cnt),
+#endif
+ [v1] "+r"(v1),
+ [v2] "+r"(v2)
+ : /* inputs */
+ : /* clobbers */
+ "r0", "r1", "r2", "r3", "r4", "r5",
+ "r6", "r7", "r8", "r9", "memory"
+ );
+}
+
+/* This version fetches data as 32 bit words, and *requires* v1 to be
+ * 32 bit aligned, otherwise it will result either in a data abort, or
+ * incorrect results (if ARM aligncheck is disabled). */
+static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
+{
+ int res;
+#if ORDER > 32
+ int cnt = ORDER>>5;
+#endif
+
+#if ORDER > 16
+#define MLA_BLOCKS "3"
+#else
+#define MLA_BLOCKS "1"
+#endif
+
+ asm volatile (
+#if ORDER > 32
+ "mov %[res], #0 \n"
+#endif
+ "tst %[v2], #2 \n"
+ "beq 20f \n"
+
+ "10: \n"
+ "ldrh r7, [%[v2]], #2 \n"
+#if ORDER > 32
+ "mov r7, r7, lsl #16 \n"
+ "1: \n"
+ "ldmia %[v1]!, {r0-r3} \n"
+ "smlabt %[res], r0, r7, %[res] \n"
+#else
+ "ldmia %[v1]!, {r0-r3} \n"
+ "smulbb %[res], r0, r7 \n"
+#endif
+ "ldmia %[v2]!, {r4-r7} \n"
+ "smlatb %[res], r0, r4, %[res] \n"
+ "smlabt %[res], r1, r4, %[res] \n"
+ "smlatb %[res], r1, r5, %[res] \n"
+ "smlabt %[res], r2, r5, %[res] \n"
+ "smlatb %[res], r2, r6, %[res] \n"
+ "smlabt %[res], r3, r6, %[res] \n"
+ "smlatb %[res], r3, r7, %[res] \n"
+
+ ".rept " MLA_BLOCKS "\n"
+ "ldmia %[v1]!, {r0-r3} \n"
+ "smlabt %[res], r0, r7, %[res] \n"
+ "ldmia %[v2]!, {r4-r7} \n"
+ "smlatb %[res], r0, r4, %[res] \n"
+ "smlabt %[res], r1, r4, %[res] \n"
+ "smlatb %[res], r1, r5, %[res] \n"
+ "smlabt %[res], r2, r5, %[res] \n"
+ "smlatb %[res], r2, r6, %[res] \n"
+ "smlabt %[res], r3, r6, %[res] \n"
+ "smlatb %[res], r3, r7, %[res] \n"
+ ".endr \n"
+#if ORDER > 32
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+ "b 99f \n"
+
+ "20: \n"
+ "1: \n"
+ "ldmia %[v1]!, {r0-r3} \n"
+ "ldmia %[v2]!, {r4-r7} \n"
+#if ORDER > 32
+ "smlabb %[res], r0, r4, %[res] \n"
+#else
+ "smulbb %[res], r0, r4 \n"
+#endif
+ "smlatt %[res], r0, r4, %[res] \n"
+ "smlabb %[res], r1, r5, %[res] \n"
+ "smlatt %[res], r1, r5, %[res] \n"
+ "smlabb %[res], r2, r6, %[res] \n"
+ "smlatt %[res], r2, r6, %[res] \n"
+ "smlabb %[res], r3, r7, %[res] \n"
+ "smlatt %[res], r3, r7, %[res] \n"
+
+ ".rept " MLA_BLOCKS "\n"
+ "ldmia %[v1]!, {r0-r3} \n"
+ "ldmia %[v2]!, {r4-r7} \n"
+ "smlabb %[res], r0, r4, %[res] \n"
+ "smlatt %[res], r0, r4, %[res] \n"
+ "smlabb %[res], r1, r5, %[res] \n"
+ "smlatt %[res], r1, r5, %[res] \n"
+ "smlabb %[res], r2, r6, %[res] \n"
+ "smlatt %[res], r2, r6, %[res] \n"
+ "smlabb %[res], r3, r7, %[res] \n"
+ "smlatt %[res], r3, r7, %[res] \n"
+ ".endr \n"
+#if ORDER > 32
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+
+ "99: \n"
+ : /* outputs */
+#if ORDER > 32
+ [cnt]"+r"(cnt),
+#endif
+ [v1] "+r"(v1),
+ [v2] "+r"(v2),
+ [res]"=r"(res)
+ : /* inputs */
+ : /* clobbers */
+ "r0", "r1", "r2", "r3",
+ "r4", "r5", "r6", "r7"
+ );
+ return res;
+}
diff --git a/libdemac/vector_math16_armv6.h b/libdemac/vector_math16_armv6.h
new file mode 100644
index 00000000..f6505f42
--- /dev/null
+++ b/libdemac/vector_math16_armv6.h
@@ -0,0 +1,289 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: vector_math16_armv6.h 19198 2008-11-24 18:40:43Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+ARMv6 vector math copyright (C) 2008 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+/* This version fetches data as 32 bit words, and *requires* v1 to be
+ * 32 bit aligned, otherwise it will result either in a data abort, or
+ * incorrect results (if ARM aligncheck is disabled). */
+static inline void vector_add(int16_t* v1, int16_t* v2)
+{
+#if ORDER > 32
+ int cnt = ORDER>>5;
+#endif
+
+#if ORDER > 16
+#define ADD_SUB_BLOCKS "4"
+#else
+#define ADD_SUB_BLOCKS "2"
+#endif
+
+ asm volatile (
+ "tst %[v2], #2 \n"
+ "beq 20f \n"
+
+ "10: \n"
+ "bic %[v2], %[v2], #2 \n"
+ "ldmia %[v2]!, {r4-r5} \n"
+ "1: \n"
+ ".rept " ADD_SUB_BLOCKS "\n"
+ "ldmia %[v2]!, {r6-r7} \n"
+ "ldmia %[v1], {r0-r3} \n"
+ "mov r5, r5, ror #16 \n"
+ "pkhtb r4, r5, r4, asr #16 \n"
+ "sadd16 r0, r0, r4 \n"
+ "pkhbt r5, r5, r6, lsl #16 \n"
+ "sadd16 r1, r1, r5 \n"
+ "ldmia %[v2]!, {r4-r5} \n"
+ "mov r7, r7, ror #16 \n"
+ "pkhtb r6, r7, r6, asr #16 \n"
+ "sadd16 r2, r2, r6 \n"
+ "pkhbt r7, r7, r4, lsl #16 \n"
+ "sadd16 r3, r3, r7 \n"
+ "stmia %[v1]!, {r0-r3} \n"
+ ".endr \n"
+#if ORDER > 32
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+ "b 99f \n"
+
+ "20: \n"
+ "1: \n"
+ ".rept " ADD_SUB_BLOCKS "\n"
+ "ldmia %[v2]!, {r4-r7} \n"
+ "ldmia %[v1], {r0-r3} \n"
+ "sadd16 r0, r0, r4 \n"
+ "sadd16 r1, r1, r5 \n"
+ "sadd16 r2, r2, r6 \n"
+ "sadd16 r3, r3, r7 \n"
+ "stmia %[v1]!, {r0-r3} \n"
+ ".endr \n"
+#if ORDER > 32
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+
+ "99: \n"
+ : /* outputs */
+#if ORDER > 32
+ [cnt]"+r"(cnt),
+#endif
+ [v1] "+r"(v1),
+ [v2] "+r"(v2)
+ : /* inputs */
+ : /* clobbers */
+ "r0", "r1", "r2", "r3", "r4",
+ "r5", "r6", "r7", "memory"
+ );
+}
+
+/* This version fetches data as 32 bit words, and *requires* v1 to be
+ * 32 bit aligned, otherwise it will result either in a data abort, or
+ * incorrect results (if ARM aligncheck is disabled). */
+static inline void vector_sub(int16_t* v1, int16_t* v2)
+{
+#if ORDER > 32
+ int cnt = ORDER>>5;
+#endif
+
+ asm volatile (
+ "tst %[v2], #2 \n"
+ "beq 20f \n"
+
+ "10: \n"
+ "bic %[v2], %[v2], #2 \n"
+ "ldmia %[v2]!, {r4-r5} \n"
+ "1: \n"
+ ".rept " ADD_SUB_BLOCKS "\n"
+ "ldmia %[v2]!, {r6-r7} \n"
+ "ldmia %[v1], {r0-r3} \n"
+ "mov r5, r5, ror #16 \n"
+ "pkhtb r4, r5, r4, asr #16 \n"
+ "ssub16 r0, r0, r4 \n"
+ "pkhbt r5, r5, r6, lsl #16 \n"
+ "ssub16 r1, r1, r5 \n"
+ "ldmia %[v2]!, {r4-r5} \n"
+ "mov r7, r7, ror #16 \n"
+ "pkhtb r6, r7, r6, asr #16 \n"
+ "ssub16 r2, r2, r6 \n"
+ "pkhbt r7, r7, r4, lsl #16 \n"
+ "ssub16 r3, r3, r7 \n"
+ "stmia %[v1]!, {r0-r3} \n"
+ ".endr \n"
+#if ORDER > 32
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+ "b 99f \n"
+
+ "20: \n"
+ "1: \n"
+ ".rept " ADD_SUB_BLOCKS "\n"
+ "ldmia %[v2]!, {r4-r7} \n"
+ "ldmia %[v1], {r0-r3} \n"
+ "ssub16 r0, r0, r4 \n"
+ "ssub16 r1, r1, r5 \n"
+ "ssub16 r2, r2, r6 \n"
+ "ssub16 r3, r3, r7 \n"
+ "stmia %[v1]!, {r0-r3} \n"
+ ".endr \n"
+#if ORDER > 32
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+
+ "99: \n"
+ : /* outputs */
+#if ORDER > 32
+ [cnt]"+r"(cnt),
+#endif
+ [v1] "+r"(v1),
+ [v2] "+r"(v2)
+ : /* inputs */
+ : /* clobbers */
+ "r0", "r1", "r2", "r3", "r4",
+ "r5", "r6", "r7", "memory"
+ );
+}
+
+/* This version fetches data as 32 bit words, and *requires* v1 to be
+ * 32 bit aligned, otherwise it will result either in a data abort, or
+ * incorrect results (if ARM aligncheck is disabled). */
+static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
+{
+ int res;
+#if ORDER > 32
+ int cnt = ORDER>>5;
+#endif
+
+#if ORDER > 16
+#define MLA_BLOCKS "3"
+#else
+#define MLA_BLOCKS "1"
+#endif
+
+ asm volatile (
+#if ORDER > 32
+ "mov %[res], #0 \n"
+#endif
+ "tst %[v2], #2 \n"
+ "beq 20f \n"
+
+ "10: \n"
+ "bic %[v2], %[v2], #2 \n"
+ "ldmia %[v2]!, {r5-r7} \n"
+ "ldmia %[v1]!, {r0-r1} \n"
+ "1: \n"
+ "pkhbt r8, r6, r5 \n"
+ "ldmia %[v2]!, {r4-r5} \n"
+#if ORDER > 32
+ "smladx %[res], r0, r8, %[res] \n"
+#else
+ "smuadx %[res], r0, r8 \n"
+#endif
+ ".rept " MLA_BLOCKS "\n"
+ "pkhbt r8, r7, r6 \n"
+ "ldmia %[v1]!, {r2-r3} \n"
+ "smladx %[res], r1, r8, %[res] \n"
+ "pkhbt r8, r4, r7 \n"
+ "ldmia %[v2]!, {r6-r7} \n"
+ "smladx %[res], r2, r8, %[res] \n"
+ "pkhbt r8, r5, r4 \n"
+ "ldmia %[v1]!, {r0-r1} \n"
+ "smladx %[res], r3, r8, %[res] \n"
+ "pkhbt r8, r6, r5 \n"
+ "ldmia %[v2]!, {r4-r5} \n"
+ "smladx %[res], r0, r8, %[res] \n"
+ ".endr \n"
+
+ "pkhbt r8, r7, r6 \n"
+ "ldmia %[v1]!, {r2-r3} \n"
+ "smladx %[res], r1, r8, %[res] \n"
+ "pkhbt r8, r4, r7 \n"
+#if ORDER > 32
+ "subs %[cnt], %[cnt], #1 \n"
+ "ldmneia %[v2]!, {r6-r7} \n"
+ "smladx %[res], r2, r8, %[res] \n"
+ "pkhbt r8, r5, r4 \n"
+ "ldmneia %[v1]!, {r0-r1} \n"
+ "smladx %[res], r3, r8, %[res] \n"
+ "bne 1b \n"
+#else
+ "pkhbt r5, r5, r4 \n"
+ "smladx %[res], r2, r8, %[res] \n"
+ "smladx %[res], r3, r5, %[res] \n"
+#endif
+ "b 99f \n"
+
+ "20: \n"
+ "ldmia %[v1]!, {r0-r1} \n"
+ "ldmia %[v2]!, {r5-r7} \n"
+ "1: \n"
+ "ldmia %[v1]!, {r2-r3} \n"
+#if ORDER > 32
+ "smlad %[res], r0, r5, %[res] \n"
+#else
+ "smuad %[res], r0, r5 \n"
+#endif
+ ".rept " MLA_BLOCKS "\n"
+ "ldmia %[v2]!, {r4-r5} \n"
+ "smlad %[res], r1, r6, %[res] \n"
+ "ldmia %[v1]!, {r0-r1} \n"
+ "smlad %[res], r2, r7, %[res] \n"
+ "ldmia %[v2]!, {r6-r7} \n"
+ "smlad %[res], r3, r4, %[res] \n"
+ "ldmia %[v1]!, {r2-r3} \n"
+ "smlad %[res], r0, r5, %[res] \n"
+ ".endr \n"
+
+ "ldmia %[v2]!, {r4-r5} \n"
+ "smlad %[res], r1, r6, %[res] \n"
+#if ORDER > 32
+ "subs %[cnt], %[cnt], #1 \n"
+ "ldmneia %[v1]!, {r0-r1} \n"
+ "smlad %[res], r2, r7, %[res] \n"
+ "ldmneia %[v2]!, {r6-r7} \n"
+ "smlad %[res], r3, r4, %[res] \n"
+ "bne 1b \n"
+#else
+ "smlad %[res], r2, r7, %[res] \n"
+ "smlad %[res], r3, r4, %[res] \n"
+#endif
+
+ "99: \n"
+ : /* outputs */
+#if ORDER > 32
+ [cnt]"+r"(cnt),
+#endif
+ [v1] "+r"(v1),
+ [v2] "+r"(v2),
+ [res]"=r"(res)
+ : /* inputs */
+ : /* clobbers */
+ "r0", "r1", "r2", "r3", "r4",
+ "r5", "r6", "r7", "r8"
+ );
+ return res;
+}
diff --git a/libdemac/vector_math16_cf.h b/libdemac/vector_math16_cf.h
new file mode 100644
index 00000000..e51767b8
--- /dev/null
+++ b/libdemac/vector_math16_cf.h
@@ -0,0 +1,326 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: vector_math16_cf.h 19144 2008-11-19 21:31:33Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+Coldfire vector math copyright (C) 2007 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+/* This version fetches data as 32 bit words, and *recommends* v1 to be
+ * 32 bit aligned, otherwise performance will suffer. */
+static inline void vector_add(int16_t* v1, int16_t* v2)
+{
+#if ORDER > 16
+ int cnt = ORDER>>4;
+#endif
+
+#define ADDHALFREGS(s1, sum) /* Add register halves straight. */ \
+ "move.l " #s1 ", %%d4 \n" /* 's1' can be an A or D reg. */ \
+ "add.l " #sum ", " #s1 "\n" /* 'sum' must be a D reg. */ \
+ "clr.w %%d4 \n" /* 's1' and %%d4 are clobbered! */ \
+ "add.l %%d4 , " #sum "\n" \
+ "move.w " #s1 ", " #sum "\n"
+
+#define ADDHALFXREGS(s1, s2, sum) /* Add register halves across. */ \
+ "clr.w " #sum " \n" /* Needs 'sum' pre-swapped, swaps */ \
+ "add.l " #s1 ", " #sum "\n" /* 's2', and clobbers 's1'. */ \
+ "swap " #s2 " \n" /* 's1' can be an A or D reg. */ \
+ "add.l " #s2 ", " #s1 "\n" /* 'sum' and 's2' must be D regs. */ \
+ "move.w " #s1 ", " #sum "\n"
+
+ asm volatile (
+ "move.l %[v2], %%d0 \n"
+ "and.l #2, %%d0 \n"
+ "jeq 20f \n"
+
+ "10: \n"
+ "move.w (%[v2])+, %%d0 \n"
+ "swap %%d0 \n"
+ "1: \n"
+ "movem.l (%[v1]), %%a0-%%a3 \n"
+ "movem.l (%[v2]), %%d1-%%d4 \n"
+ ADDHALFXREGS(%%a0, %%d1, %%d0)
+ "move.l %%d0, (%[v1])+ \n"
+ ADDHALFXREGS(%%a1, %%d2, %%d1)
+ "move.l %%d1, (%[v1])+ \n"
+ ADDHALFXREGS(%%a2, %%d3, %%d2)
+ "move.l %%d2, (%[v1])+ \n"
+ ADDHALFXREGS(%%a3, %%d4, %%d3)
+ "move.l %%d3, (%[v1])+ \n"
+ "lea.l (16, %[v2]), %[v2] \n"
+ "move.l %%d4, %%d0 \n"
+
+ "movem.l (%[v1]), %%a0-%%a3 \n"
+ "movem.l (%[v2]), %%d1-%%d4 \n"
+ ADDHALFXREGS(%%a0, %%d1, %%d0)
+ "move.l %%d0, (%[v1])+ \n"
+ ADDHALFXREGS(%%a1, %%d2, %%d1)
+ "move.l %%d1, (%[v1])+ \n"
+ ADDHALFXREGS(%%a2, %%d3, %%d2)
+ "move.l %%d2, (%[v1])+ \n"
+ ADDHALFXREGS(%%a3, %%d4, %%d3)
+ "move.l %%d3, (%[v1])+ \n"
+#if ORDER > 16
+ "lea.l (16, %[v2]), %[v2] \n"
+ "move.l %%d4, %%d0 \n"
+
+ "subq.l #1, %[cnt] \n"
+ "jne 1b \n"
+#endif
+ "jra 99f \n"
+
+ "20: \n"
+ "1: \n"
+ "movem.l (%[v2]), %%a0-%%a3 \n"
+ "movem.l (%[v1]), %%d0-%%d3 \n"
+ ADDHALFREGS(%%a0, %%d0)
+ "move.l %%d0, (%[v1])+ \n"
+ ADDHALFREGS(%%a1, %%d1)
+ "move.l %%d1, (%[v1])+ \n"
+ ADDHALFREGS(%%a2, %%d2)
+ "move.l %%d2, (%[v1])+ \n"
+ ADDHALFREGS(%%a3, %%d3)
+ "move.l %%d3, (%[v1])+ \n"
+ "lea.l (16, %[v2]), %[v2] \n"
+
+ "movem.l (%[v2]), %%a0-%%a3 \n"
+ "movem.l (%[v1]), %%d0-%%d3 \n"
+ ADDHALFREGS(%%a0, %%d0)
+ "move.l %%d0, (%[v1])+ \n"
+ ADDHALFREGS(%%a1, %%d1)
+ "move.l %%d1, (%[v1])+ \n"
+ ADDHALFREGS(%%a2, %%d2)
+ "move.l %%d2, (%[v1])+ \n"
+ ADDHALFREGS(%%a3, %%d3)
+ "move.l %%d3, (%[v1])+ \n"
+#if ORDER > 16
+ "lea.l (16, %[v2]), %[v2] \n"
+
+ "subq.l #1, %[cnt] \n"
+ "jne 1b \n"
+#endif
+ "99: \n"
+ : /* outputs */
+#if ORDER > 16
+ [cnt]"+d"(cnt),
+#endif
+ [v1] "+a"(v1),
+ [v2] "+a"(v2)
+ : /* inputs */
+ : /* clobbers */
+ "d0", "d1", "d2", "d3", "d4",
+ "a0", "a1", "a2", "a3", "memory"
+ );
+}
+
+/* This version fetches data as 32 bit words, and *recommends* v1 to be
+ * 32 bit aligned, otherwise performance will suffer. */
+static inline void vector_sub(int16_t* v1, int16_t* v2)
+{
+#if ORDER > 16
+ int cnt = ORDER>>4;
+#endif
+
+#define SUBHALFREGS(min, sub, dif) /* Subtract register halves straight. */ \
+ "move.l " #min ", " #dif "\n" /* 'min' can be an A or D reg */ \
+ "sub.l " #sub ", " #min "\n" /* 'sub' and 'dif' must be D regs */ \
+ "clr.w " #sub "\n" /* 'min' and 'sub' are clobbered! */ \
+ "sub.l " #sub ", " #dif "\n" \
+ "move.w " #min ", " #dif "\n"
+
+#define SUBHALFXREGS(min, s2, s1d) /* Subtract register halves across. */ \
+ "clr.w " #s1d "\n" /* Needs 's1d' pre-swapped, swaps */ \
+ "sub.l " #s1d ", " #min "\n" /* 's2' and clobbers 'min'. */ \
+ "move.l " #min ", " #s1d "\n" /* 'min' can be an A or D reg, */ \
+ "swap " #s2 "\n" /* 's2' and 's1d' must be D regs. */ \
+ "sub.l " #s2 ", " #min "\n" \
+ "move.w " #min ", " #s1d "\n"
+
+ asm volatile (
+ "move.l %[v2], %%d0 \n"
+ "and.l #2, %%d0 \n"
+ "jeq 20f \n"
+
+ "10: \n"
+ "move.w (%[v2])+, %%d0 \n"
+ "swap %%d0 \n"
+ "1: \n"
+ "movem.l (%[v2]), %%d1-%%d4 \n"
+ "movem.l (%[v1]), %%a0-%%a3 \n"
+ SUBHALFXREGS(%%a0, %%d1, %%d0)
+ "move.l %%d0, (%[v1])+ \n"
+ SUBHALFXREGS(%%a1, %%d2, %%d1)
+ "move.l %%d1, (%[v1])+ \n"
+ SUBHALFXREGS(%%a2, %%d3, %%d2)
+ "move.l %%d2, (%[v1])+ \n"
+ SUBHALFXREGS(%%a3, %%d4, %%d3)
+ "move.l %%d3, (%[v1])+ \n"
+ "lea.l (16, %[v2]), %[v2] \n"
+ "move.l %%d4, %%d0 \n"
+
+ "movem.l (%[v2]), %%d1-%%d4 \n"
+ "movem.l (%[v1]), %%a0-%%a3 \n"
+ SUBHALFXREGS(%%a0, %%d1, %%d0)
+ "move.l %%d0, (%[v1])+ \n"
+ SUBHALFXREGS(%%a1, %%d2, %%d1)
+ "move.l %%d1, (%[v1])+ \n"
+ SUBHALFXREGS(%%a2, %%d3, %%d2)
+ "move.l %%d2, (%[v1])+ \n"
+ SUBHALFXREGS(%%a3, %%d4, %%d3)
+ "move.l %%d3, (%[v1])+ \n"
+#if ORDER > 16
+ "lea.l (16, %[v2]), %[v2] \n"
+ "move.l %%d4, %%d0 \n"
+
+ "subq.l #1, %[cnt] \n"
+ "bne.w 1b \n"
+#endif
+ "jra 99f \n"
+
+ "20: \n"
+ "1: \n"
+ "movem.l (%[v2]), %%d1-%%d4 \n"
+ "movem.l (%[v1]), %%a0-%%a3 \n"
+ SUBHALFREGS(%%a0, %%d1, %%d0)
+ "move.l %%d0, (%[v1])+ \n"
+ SUBHALFREGS(%%a1, %%d2, %%d1)
+ "move.l %%d1, (%[v1])+ \n"
+ SUBHALFREGS(%%a2, %%d3, %%d2)
+ "move.l %%d2, (%[v1])+ \n"
+ SUBHALFREGS(%%a3, %%d4, %%d3)
+ "move.l %%d3, (%[v1])+ \n"
+ "lea.l (16, %[v2]), %[v2] \n"
+
+ "movem.l (%[v2]), %%d1-%%d4 \n"
+ "movem.l (%[v1]), %%a0-%%a3 \n"
+ SUBHALFREGS(%%a0, %%d1, %%d0)
+ "move.l %%d0, (%[v1])+ \n"
+ SUBHALFREGS(%%a1, %%d2, %%d1)
+ "move.l %%d1, (%[v1])+ \n"
+ SUBHALFREGS(%%a2, %%d3, %%d2)
+ "move.l %%d2, (%[v1])+ \n"
+ SUBHALFREGS(%%a3, %%d4, %%d3)
+ "move.l %%d3, (%[v1])+ \n"
+#if ORDER > 16
+ "lea.l (16, %[v2]), %[v2] \n"
+
+ "subq.l #1, %[cnt] \n"
+ "bne.w 1b \n"
+#endif
+
+ "99: \n"
+ : /* outputs */
+#if ORDER > 16
+ [cnt]"+d"(cnt),
+#endif
+ [v1] "+a"(v1),
+ [v2] "+a"(v2)
+ : /* inputs */
+ : /* clobbers */
+ "d0", "d1", "d2", "d3", "d4",
+ "a0", "a1", "a2", "a3", "memory"
+ );
+}
+
+#define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */
+
+/* This version fetches data as 32 bit words, and *recommends* v1 to be
+ * 32 bit aligned, otherwise performance will suffer. It also needs EMAC
+ * in signed integer mode - call above macro before use. */
+static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
+{
+ int res;
+#if ORDER > 32
+ int cnt = ORDER>>5;
+#endif
+
+#if ORDER > 16
+#define MAC_BLOCKS "7"
+#else
+#define MAC_BLOCKS "3"
+#endif
+
+ asm volatile (
+ "move.l %[v2], %%d0 \n"
+ "and.l #2, %%d0 \n"
+ "jeq 20f \n"
+
+ "10: \n"
+ "move.l (%[v1])+, %%d0 \n"
+ "move.w (%[v2])+, %%d1 \n"
+ "1: \n"
+ ".rept " MAC_BLOCKS "\n"
+ "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+ "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+ "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+ "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+ ".endr \n"
+
+ "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+ "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+ "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+#if ORDER > 32
+ "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+ "subq.l #1, %[res] \n"
+ "bne.w 1b \n"
+#else
+ "mac.w %%d0l, %%d1u, %%acc0 \n"
+#endif
+ "jra 99f \n"
+
+ "20: \n"
+ "move.l (%[v1])+, %%d0 \n"
+ "move.l (%[v2])+, %%d1 \n"
+ "1: \n"
+ ".rept " MAC_BLOCKS "\n"
+ "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
+ "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+ "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+ "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+ ".endr \n"
+
+ "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
+ "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+#if ORDER > 32
+ "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+ "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+ "subq.l #1, %[res] \n"
+ "bne.w 1b \n"
+#else
+ "mac.w %%d2u, %%d1u, %%acc0 \n"
+ "mac.w %%d2l, %%d1l, %%acc0 \n"
+#endif
+
+ "99: \n"
+ "movclr.l %%acc0, %[res] \n"
+ : /* outputs */
+ [v1]"+a"(v1),
+ [v2]"+a"(v2),
+ [res]"=d"(res)
+ : /* inputs */
+#if ORDER > 32
+ [cnt]"[res]"(cnt)
+#endif
+ : /* clobbers */
+ "d0", "d1", "d2"
+ );
+ return res;
+}
diff --git a/libdemac/vector_math32_armv4.h b/libdemac/vector_math32_armv4.h
new file mode 100644
index 00000000..47bc5e94
--- /dev/null
+++ b/libdemac/vector_math32_armv4.h
@@ -0,0 +1,205 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: vector_math32_armv4.h 19144 2008-11-19 21:31:33Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+ARMv4 vector math copyright (C) 2008 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+static inline void vector_add(int32_t* v1, int32_t* v2)
+{
+#if ORDER > 32
+ int cnt = ORDER>>5;
+#endif
+
+#if ORDER > 16
+#define ADD_SUB_BLOCKS "8"
+#else
+#define ADD_SUB_BLOCKS "4"
+#endif
+
+ asm volatile (
+ "1: \n"
+ ".rept " ADD_SUB_BLOCKS "\n"
+ "ldmia %[v1], {r0-r3} \n"
+ "ldmia %[v2]!, {r4-r7} \n"
+ "add r0, r0, r4 \n"
+ "add r1, r1, r5 \n"
+ "add r2, r2, r6 \n"
+ "add r3, r3, r7 \n"
+ "stmia %[v1]!, {r0-r3} \n"
+ ".endr \n"
+#if ORDER > 32
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+ : /* outputs */
+#if ORDER > 32
+ [cnt]"+r"(cnt),
+#endif
+ [v1] "+r"(v1),
+ [v2] "+r"(v2)
+ : /* inputs */
+ : /* clobbers */
+ "r0", "r1", "r2", "r3", "r4",
+ "r5", "r6", "r7", "memory"
+ );
+}
+
+static inline void vector_sub(int32_t* v1, int32_t* v2)
+{
+#if ORDER > 32
+ int cnt = ORDER>>5;
+#endif
+
+ asm volatile (
+ "1: \n"
+ ".rept " ADD_SUB_BLOCKS "\n"
+ "ldmia %[v1], {r0-r3} \n"
+ "ldmia %[v2]!, {r4-r7} \n"
+ "sub r0, r0, r4 \n"
+ "sub r1, r1, r5 \n"
+ "sub r2, r2, r6 \n"
+ "sub r3, r3, r7 \n"
+ "stmia %[v1]!, {r0-r3} \n"
+ ".endr \n"
+#if ORDER > 32
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+ : /* outputs */
+#if ORDER > 32
+ [cnt]"+r"(cnt),
+#endif
+ [v1] "+r"(v1),
+ [v2] "+r"(v2)
+ : /* inputs */
+ : /* clobbers */
+ "r0", "r1", "r2", "r3", "r4",
+ "r5", "r6", "r7", "memory"
+ );
+}
+
+static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
+{
+ int res;
+#if ORDER > 32
+ int cnt = ORDER>>5;
+#endif
+
+ asm volatile (
+#if ORDER > 16
+#if ORDER > 32
+ "mov %[res], #0 \n"
+#endif
+ "ldmia %[v2]!, {r6-r7} \n"
+ "1: \n"
+ "ldmia %[v1]!, {r0,r1,r3-r5} \n"
+#if ORDER > 32
+ "mla %[res], r6, r0, %[res] \n"
+#else
+ "mul %[res], r6, r0 \n"
+#endif
+ "mla %[res], r7, r1, %[res] \n"
+ "ldmia %[v2]!, {r0-r2,r6-r8} \n"
+ "mla %[res], r0, r3, %[res] \n"
+ "mla %[res], r1, r4, %[res] \n"
+ "mla %[res], r2, r5, %[res] \n"
+ "ldmia %[v1]!, {r0-r4} \n"
+ "mla %[res], r6, r0, %[res] \n"
+ "mla %[res], r7, r1, %[res] \n"
+ "mla %[res], r8, r2, %[res] \n"
+ "ldmia %[v2]!, {r0,r1,r6-r8} \n"
+ "mla %[res], r0, r3, %[res] \n"
+ "mla %[res], r1, r4, %[res] \n"
+ "ldmia %[v1]!, {r0-r5} \n"
+ "mla %[res], r6, r0, %[res] \n"
+ "mla %[res], r7, r1, %[res] \n"
+ "mla %[res], r8, r2, %[res] \n"
+ "ldmia %[v2]!, {r0-r2,r6,r7} \n"
+ "mla %[res], r0, r3, %[res] \n"
+ "mla %[res], r1, r4, %[res] \n"
+ "mla %[res], r2, r5, %[res] \n"
+ "ldmia %[v1]!, {r0,r1,r3-r5} \n"
+ "mla %[res], r6, r0, %[res] \n"
+ "mla %[res], r7, r1, %[res] \n"
+ "ldmia %[v2]!, {r0-r2,r6-r8} \n"
+ "mla %[res], r0, r3, %[res] \n"
+ "mla %[res], r1, r4, %[res] \n"
+ "mla %[res], r2, r5, %[res] \n"
+ "ldmia %[v1]!, {r0-r4} \n"
+ "mla %[res], r6, r0, %[res] \n"
+ "mla %[res], r7, r1, %[res] \n"
+ "mla %[res], r8, r2, %[res] \n"
+ "ldmia %[v2]!, {r0,r1,r6-r8} \n"
+ "mla %[res], r0, r3, %[res] \n"
+ "mla %[res], r1, r4, %[res] \n"
+ "ldmia %[v1]!, {r0-r5} \n"
+ "mla %[res], r6, r0, %[res] \n"
+ "mla %[res], r7, r1, %[res] \n"
+ "mla %[res], r8, r2, %[res] \n"
+#if ORDER > 32
+ "ldmia %[v2]!, {r0-r2,r6,r7} \n"
+#else
+ "ldmia %[v2]!, {r0-r2} \n"
+#endif
+ "mla %[res], r0, r3, %[res] \n"
+ "mla %[res], r1, r4, %[res] \n"
+ "mla %[res], r2, r5, %[res] \n"
+#if ORDER > 32
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+
+#else /* ORDER <= 16 */
+ "ldmia %[v1]!, {r0-r3} \n"
+ "ldmia %[v2]!, {r4-r7} \n"
+ "mul %[res], r4, r0 \n"
+ "mla %[res], r5, r1, %[res] \n"
+ "mla %[res], r6, r2, %[res] \n"
+ "mla %[res], r7, r3, %[res] \n"
+
+ ".rept 3 \n"
+ "ldmia %[v1]!, {r0-r3} \n"
+ "ldmia %[v2]!, {r4-r7} \n"
+ "mla %[res], r4, r0, %[res] \n"
+ "mla %[res], r5, r1, %[res] \n"
+ "mla %[res], r6, r2, %[res] \n"
+ "mla %[res], r7, r3, %[res] \n"
+ ".endr \n"
+#endif /* ORDER <= 16 */
+ : /* outputs */
+#if ORDER > 32
+ [cnt]"+r"(cnt),
+#endif
+ [v1] "+r"(v1),
+ [v2] "+r"(v2),
+ [res]"=r"(res)
+ : /* inputs */
+ : /* clobbers */
+ "r0", "r1", "r2", "r3",
+ "r4", "r5", "r6", "r7"
+#if ORDER > 16
+ ,"r8"
+#endif
+ );
+ return res;
+}
diff --git a/libdemac/vector_math_generic.h b/libdemac/vector_math_generic.h
new file mode 100644
index 00000000..b2dab1e2
--- /dev/null
+++ b/libdemac/vector_math_generic.h
@@ -0,0 +1,160 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: vector_math_generic.h 19144 2008-11-19 21:31:33Z amiconn $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include "demac_config.h"
+
+static inline void vector_add(filter_int* v1, filter_int* v2)
+{
+#if ORDER > 32
+ int order = (ORDER >> 5);
+ while (order--)
+#endif
+ {
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+#if ORDER > 16
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+ *v1++ += *v2++;
+#endif
+ }
+}
+
+static inline void vector_sub(filter_int* v1, filter_int* v2)
+{
+#if ORDER > 32
+ int order = (ORDER >> 5);
+ while (order--)
+#endif
+ {
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+#if ORDER > 16
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+ *v1++ -= *v2++;
+#endif
+ }
+}
+
+static inline int32_t scalarproduct(filter_int* v1, filter_int* v2)
+{
+ int res = 0;
+
+#if ORDER > 32
+ int order = (ORDER >> 5);
+ while (order--)
+#endif
+ {
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+#if ORDER > 16
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+ res += *v1++ * *v2++;
+#endif
+ }
+ return res;
+}
diff --git a/playlist.c b/playlist.c
index e53ccc2c..08c0e410 100644
--- a/playlist.c
+++ b/playlist.c
@@ -178,6 +178,8 @@ pl_insert_cue_from_buffer (playItem_t *after, const char *fname, const uint8_t *
}
}
else if (!strncmp (p, "FILE ", 5)) {
+ // ignore
+#if 0
pl_get_qvalue_from_cue (p + 5, file);
// copy directory name
char dirname[1024];
@@ -201,6 +203,7 @@ pl_insert_cue_from_buffer (playItem_t *after, const char *fname, const uint8_t *
// copy full name in place of relative name
strcpy (file, dirname);
// printf ("ended up as: %s\n", file);
+#endif
}
else if (!strncmp (p, "TRACK ", 6)) {
pl_get_value_from_cue (p + 6, track);
@@ -242,7 +245,7 @@ pl_insert_cue_from_buffer (playItem_t *after, const char *fname, const uint8_t *
playItem_t *it = malloc (sizeof (playItem_t));
memset (it, 0, sizeof (playItem_t));
it->decoder = decoder;
- it->fname = strdup (file);
+ it->fname = strdup (fname);
it->tracknum = atoi (track);
it->timestart = tstart;
it->timeend = -1; // will be filled by next read, or by decoder
diff --git a/plugins/ape/Makefile.am b/plugins/ape/Makefile.am
index b7d0b805..07b43270 100644
--- a/plugins/ape/Makefile.am
+++ b/plugins/ape/Makefile.am
@@ -3,5 +3,5 @@ pkglib_LTLIBRARIES = ape.la
ape_la_SOURCES = ape.c apewrapper.cpp apewrapper.h
ape_la_LDFLAGS = -module
-ape_la_LIBADD = $(LDADD) $(APE_DEPS_LIBS) -lmac
+ape_la_LIBADD = $(LDADD) $(APE_LIBS)
AM_CFLAGS = $(APE_DEPS_CFLAGS) -std=c99
diff --git a/plugins/ape/ape.c b/plugins/ape/ape.c
index 175d1127..619d8f1c 100644
--- a/plugins/ape/ape.c
+++ b/plugins/ape/ape.c
@@ -46,6 +46,7 @@ static int
ape_init (DB_playItem_t *it) {
ape_dec = ape_decompress_create (it->fname);
if (!ape_dec) {
+ printf ("ape_decompress_create failed for file %s\n", it->fname);
return -1;
}
WAVEFORMATEX wfe;