diff options
author | waker <wakeroid@gmail.com> | 2009-09-01 19:59:19 +0200 |
---|---|---|
committer | waker <wakeroid@gmail.com> | 2009-09-01 19:59:19 +0200 |
commit | 96c45759a327b0176894fd3ab5aa04ad8b3202fc (patch) | |
tree | 62ab5346972d4b2287cb50032c34282f3ca24af2 /plugins/demac | |
parent | 72d88ede43ceea5479965b45d482f0f1817bf6b6 (diff) |
fixed apedec (mac plugin) license; moved libdemac; started work on demac plugin
Diffstat (limited to 'plugins/demac')
28 files changed, 5022 insertions, 0 deletions
diff --git a/plugins/demac/Makefile.am b/plugins/demac/Makefile.am new file mode 100644 index 00000000..e1296c5e --- /dev/null +++ b/plugins/demac/Makefile.am @@ -0,0 +1,6 @@ +demacdir = $(libdir)/$(PACKAGE) +pkglib_LTLIBRARIES = demac.la +demac_la_SOURCES = demac.c +demac_la_LDFLAGS = -module +#demac_la_LIBADD = $(LDADD) $(demac_LIBS) +#AM_CFLAGS = $(demac_DEPS_CFLAGS) -std=c99 diff --git a/plugins/demac/demac.c b/plugins/demac/demac.c new file mode 100644 index 00000000..a757ebfb --- /dev/null +++ b/plugins/demac/demac.c @@ -0,0 +1,57 @@ +/* + DeaDBeeF - ultimate music player for GNU/Linux systems with X11 + Copyright (C) 2009 Alexey Yakovenko + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ +#include <stdio.h> +#include <string.h> +#include <time.h> +#include "../../deadbeef.h" + +#define min(x,y) ((x)<(y)?(x):(y)) +#define max(x,y) ((x)>(y)?(x):(y)) + +static DB_decoder_t plugin; +static DB_functions_t *deadbeef; + +static const char *exts[] = { "ape", NULL }; +static const char *filetypes[] = { "APE", NULL }; + +// define plugin interface +static DB_decoder_t plugin = { + .plugin.version_major = 0, + .plugin.version_minor = 1, + .plugin.type = DB_PLUGIN_DECODER, + .plugin.name = "Monkey's Audio decoder", + .plugin.descr = "Based on demac", + .plugin.author = "Alexey Yakovenko", + .plugin.email = "waker@users.sourceforge.net", + .plugin.website = "http://deadbeef.sf.net", +/// .init = demac_init, +/// .free = demac_free, +/// .read_int16 = demac_read, +/// .seek = demac_seek, +/// .insert = demac_insert, + .exts = exts, + .id = "stddemac", + .filetypes = filetypes +}; + +DB_plugin_t * +demac_load (DB_functions_t *api) { + deadbeef = api; + return DB_PLUGIN (&plugin); +} diff --git a/plugins/demac/libdemac/SOURCES b/plugins/demac/libdemac/SOURCES new file mode 100644 index 00000000..5a448237 --- /dev/null +++ b/plugins/demac/libdemac/SOURCES @@ -0,0 +1,15 @@ +crc.c +predictor.c +#ifdef CPU_ARM +predictor-arm.S +#elif defined CPU_COLDFIRE +predictor-cf.S +#endif +entropy.c +decoder.c +parser.c +filter_1280_15.c +filter_16_11.c +filter_256_13.c +filter_32_10.c +filter_64_11.c diff --git a/plugins/demac/libdemac/crc.c b/plugins/demac/libdemac/crc.c new file mode 100644 index 00000000..30a49c8d --- /dev/null +++ b/plugins/demac/libdemac/crc.c @@ -0,0 +1,120 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: crc.c 19643 2009-01-02 21:43:52Z bertrik $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#include <inttypes.h> +#include "demac.h" + +static const uint32_t crctab32[] = +{ + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, + 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, + 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, + 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, + 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, + 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, + 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, + 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, + 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, + 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, + 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, + 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, + 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, + 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, + 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, + 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, + 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, + 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, + 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, + 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, + 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, + 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D +}; + +uint32_t ape_initcrc(void) +{ + return 0xffffffff; +} + +/* Update the CRC from a block of WAV-format audio data */ +uint32_t ape_updatecrc(unsigned char *block, int count, uint32_t crc) +{ + while (count--) + crc = (crc >> 8) ^ crctab32[(crc & 0xff) ^ *block++]; + + return crc; +} + +uint32_t ape_finishcrc(uint32_t crc) +{ + crc ^= 0xffffffff; + crc >>= 1; + + return crc; +} + diff --git a/plugins/demac/libdemac/decoder.c b/plugins/demac/libdemac/decoder.c new file mode 100644 index 00000000..d6327de2 --- /dev/null +++ b/plugins/demac/libdemac/decoder.c @@ -0,0 +1,193 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: decoder.c 19552 2008-12-21 23:49:02Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#include <inttypes.h> +#include <string.h> + +#include "demac.h" +#include "predictor.h" +#include "entropy.h" +#include "filter.h" +#include "demac_config.h" + +/* Statically allocate the filter buffers */ + +static filter_int filterbuf32[(32*3 + FILTER_HISTORY_SIZE) * 2] + IBSS_ATTR __attribute__((aligned(16))); /* 2432/4864 bytes */ +static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2] + IBSS_ATTR __attribute__((aligned(16))); /* 5120/10240 bytes */ + +/* This is only needed for "insane" files, and no current Rockbox targets + can hope to decode them in realtime, although the Gigabeat S comes close. */ +static filter_int filterbuf1280[(1280*3 + FILTER_HISTORY_SIZE) * 2] + IBSS_ATTR_DEMAC_INSANEBUF __attribute__((aligned(16))); + /* 17408 or 34816 bytes */ + +void init_frame_decoder(struct ape_ctx_t* ape_ctx, + unsigned char* inbuffer, int* firstbyte, + int* bytesconsumed) +{ + init_entropy_decoder(ape_ctx, inbuffer, firstbyte, bytesconsumed); + //printf("CRC=0x%08x\n",ape_ctx->CRC); + //printf("Flags=0x%08x\n",ape_ctx->frameflags); + + init_predictor_decoder(&ape_ctx->predictor); + + switch (ape_ctx->compressiontype) + { + case 2000: + init_filter_16_11(filterbuf32); + break; + + case 3000: + init_filter_64_11(filterbuf256); + break; + + case 4000: + init_filter_256_13(filterbuf256); + init_filter_32_10(filterbuf32); + break; + + case 5000: + init_filter_1280_15(filterbuf1280); + init_filter_256_13(filterbuf256); + init_filter_16_11(filterbuf32); + } +} + +int ICODE_ATTR_DEMAC decode_chunk(struct ape_ctx_t* ape_ctx, + unsigned char* inbuffer, int* firstbyte, + int* bytesconsumed, + int32_t* decoded0, int32_t* decoded1, + int count) +{ + int32_t left, right; +#ifdef ROCKBOX + int scale = (APE_OUTPUT_DEPTH - ape_ctx->bps); + #define SCALE(x) ((x) << scale) +#else + #define SCALE(x) (x) +#endif + + if ((ape_ctx->channels==1) || ((ape_ctx->frameflags + & (APE_FRAMECODE_PSEUDO_STEREO|APE_FRAMECODE_STEREO_SILENCE)) + == APE_FRAMECODE_PSEUDO_STEREO)) { + + entropy_decode(ape_ctx, inbuffer, firstbyte, bytesconsumed, + decoded0, NULL, count); + + if (ape_ctx->frameflags & APE_FRAMECODE_MONO_SILENCE) { + /* We are pure silence, so we're done. */ + return 0; + } + + switch (ape_ctx->compressiontype) + { + case 2000: + apply_filter_16_11(ape_ctx->fileversion,decoded0,NULL,count); + break; + + case 3000: + apply_filter_64_11(ape_ctx->fileversion,decoded0,NULL,count); + break; + + case 4000: + apply_filter_32_10(ape_ctx->fileversion,decoded0,NULL,count); + apply_filter_256_13(ape_ctx->fileversion,decoded0,NULL,count); + break; + + case 5000: + apply_filter_16_11(ape_ctx->fileversion,decoded0,NULL,count); + apply_filter_256_13(ape_ctx->fileversion,decoded0,NULL,count); + apply_filter_1280_15(ape_ctx->fileversion,decoded0,NULL,count); + } + + /* Now apply the predictor decoding */ + predictor_decode_mono(&ape_ctx->predictor,decoded0,count); + + if (ape_ctx->channels==2) { + /* Pseudo-stereo - copy left channel to right channel */ + while (count--) + { + left = *decoded0; + *(decoded1++) = *(decoded0++) = SCALE(left); + } + } +#ifdef ROCKBOX + else { + /* Scale to output depth */ + while (count--) + { + left = *decoded0; + *(decoded0++) = SCALE(left); + } + } +#endif + } else { /* Stereo */ + entropy_decode(ape_ctx, inbuffer, firstbyte, bytesconsumed, + decoded0, decoded1, count); + + if ((ape_ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE) + == APE_FRAMECODE_STEREO_SILENCE) { + /* We are pure silence, so we're done. */ + return 0; + } + + /* Apply filters - compression type 1000 doesn't have any */ + switch (ape_ctx->compressiontype) + { + case 2000: + apply_filter_16_11(ape_ctx->fileversion,decoded0,decoded1,count); + break; + + case 3000: + apply_filter_64_11(ape_ctx->fileversion,decoded0,decoded1,count); + break; + + case 4000: + apply_filter_32_10(ape_ctx->fileversion,decoded0,decoded1,count); + apply_filter_256_13(ape_ctx->fileversion,decoded0,decoded1,count); + break; + + case 5000: + apply_filter_16_11(ape_ctx->fileversion,decoded0,decoded1,count); + apply_filter_256_13(ape_ctx->fileversion,decoded0,decoded1,count); + apply_filter_1280_15(ape_ctx->fileversion,decoded0,decoded1,count); + } + + /* Now apply the predictor decoding */ + predictor_decode_stereo(&ape_ctx->predictor,decoded0,decoded1,count); + + /* Decorrelate and scale to output depth */ + while (count--) + { + left = *decoded1 - (*decoded0 / 2); + right = left + *decoded0; + + *(decoded0++) = SCALE(left); + *(decoded1++) = SCALE(right); + } + } + return 0; +} diff --git a/plugins/demac/libdemac/decoder.h b/plugins/demac/libdemac/decoder.h new file mode 100644 index 00000000..e8810652 --- /dev/null +++ b/plugins/demac/libdemac/decoder.h @@ -0,0 +1,40 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: decoder.h 19743 2009-01-10 21:10:56Z zagor $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#ifndef _APE_DECODER_H +#define _APE_DECODER_H + +#include <inttypes.h> +#include "parser.h" + +void init_frame_decoder(struct ape_ctx_t* ape_ctx, + unsigned char* inbuffer, int* firstbyte, + int* bytesconsumed); + +int decode_chunk(struct ape_ctx_t* ape_ctx, + unsigned char* inbuffer, int* firstbyte, + int* bytesconsumed, + int32_t* decoded0, int32_t* decoded1, + int count); +#endif diff --git a/plugins/demac/libdemac/demac.h b/plugins/demac/libdemac/demac.h new file mode 100644 index 00000000..9a699a6e --- /dev/null +++ b/plugins/demac/libdemac/demac.h @@ -0,0 +1,45 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: demac.h 19743 2009-01-10 21:10:56Z zagor $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#ifndef _APE_DECODER_H +#define _APE_DECODER_H + +#include <inttypes.h> +#include "parser.h" + +void init_frame_decoder(struct ape_ctx_t* ape_ctx, + unsigned char* inbuffer, int* firstbyte, + int* bytesconsumed); + +int decode_chunk(struct ape_ctx_t* ape_ctx, + unsigned char* inbuffer, int* firstbyte, + int* bytesconsumed, + int32_t* decoded0, int32_t* decoded1, + int count); + +uint32_t ape_initcrc(void); +uint32_t ape_updatecrc(unsigned char *block, int count, uint32_t crc); +uint32_t ape_finishcrc(uint32_t crc); + +#endif diff --git a/plugins/demac/libdemac/demac_config.h b/plugins/demac/libdemac/demac_config.h new file mode 100644 index 00000000..8f3ad1e0 --- /dev/null +++ b/plugins/demac/libdemac/demac_config.h @@ -0,0 +1,112 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: demac_config.h 19199 2008-11-24 18:40:49Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#ifndef _DEMAC_CONFIG_H +#define _DEMAC_CONFIG_H + +/* Build-time choices for libdemac. + * Note that this file is included by both .c and .S files. */ + +#ifdef ROCKBOX + +#include "config.h" + +#ifndef __ASSEMBLER__ +#include "codeclib.h" +#include <codecs.h> +#endif + +#define APE_OUTPUT_DEPTH 29 + +/* On ARMv4, using 32 bit ints for the filters is faster. */ +#if defined(CPU_ARM) && (ARM_ARCH == 4) +#define FILTER_BITS 32 +#endif + +#if CONFIG_CPU == PP5002 +/* Code in IRAM for speed, not enough IRAM for the insane filter buffer. */ +#define ICODE_SECTION_DEMAC_ARM .icode +#define ICODE_ATTR_DEMAC ICODE_ATTR +#define IBSS_ATTR_DEMAC_INSANEBUF +#elif CONFIG_CPU == PP5020 +/* Not enough IRAM for the insane filter buffer. */ +#define ICODE_SECTION_DEMAC_ARM .text +#define ICODE_ATTR_DEMAC +#define IBSS_ATTR_DEMAC_INSANEBUF +#else +#define ICODE_SECTION_DEMAC_ARM .text +#define ICODE_ATTR_DEMAC +#define IBSS_ATTR_DEMAC_INSANEBUF IBSS_ATTR +#endif + +#else /* !ROCKBOX */ + +#define APE_OUTPUT_DEPTH (ape_ctx->bps) + +#define IBSS_ATTR +#define IBSS_ATTR_DEMAC_INSANEBUF +#define ICONST_ATTR +#define ICODE_ATTR +#define ICODE_ATTR_DEMAC + +/* Use to give gcc hints on which branch is most likely taken */ +#if defined(__GNUC__) && __GNUC__ >= 3 +#define LIKELY(x) __builtin_expect(!!(x), 1) +#define UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + +#endif /* !ROCKBOX */ + +/* Defaults */ + +#ifndef UDIV32 +#define UDIV32(a, b) (a / b) +#endif + +#ifndef FILTER_HISTORY_SIZE +#define FILTER_HISTORY_SIZE 512 +#endif + +#ifndef PREDICTOR_HISTORY_SIZE +#define PREDICTOR_HISTORY_SIZE 512 +#endif + +#ifndef FILTER_BITS +#define FILTER_BITS 16 +#endif + + +#ifndef __ASSEMBLER__ +#include <inttypes.h> +#if FILTER_BITS == 32 +typedef int32_t filter_int; +#elif FILTER_BITS == 16 +typedef int16_t filter_int; +#endif +#endif + +#endif /* _DEMAC_CONFIG_H */ diff --git a/plugins/demac/libdemac/entropy.c b/plugins/demac/libdemac/entropy.c new file mode 100644 index 00000000..df6bb7fd --- /dev/null +++ b/plugins/demac/libdemac/entropy.c @@ -0,0 +1,464 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: entropy.c 19552 2008-12-21 23:49:02Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#include <inttypes.h> +#include <string.h> + +#include "parser.h" +#include "entropy.h" +#include "demac_config.h" + +#define MODEL_ELEMENTS 64 + +/* + The following counts arrays for use with the range decoder are + hard-coded in the Monkey's Audio decoder. +*/ + +static const int counts_3970[65] ICONST_ATTR = +{ + 0,14824,28224,39348,47855,53994,58171,60926, + 62682,63786,64463,64878,65126,65276,65365,65419, + 65450,65469,65480,65487,65491,65493,65494,65495, + 65496,65497,65498,65499,65500,65501,65502,65503, + 65504,65505,65506,65507,65508,65509,65510,65511, + 65512,65513,65514,65515,65516,65517,65518,65519, + 65520,65521,65522,65523,65524,65525,65526,65527, + 65528,65529,65530,65531,65532,65533,65534,65535, + 65536 +}; + +/* counts_diff_3970[i] = counts_3970[i+1] - counts_3970[i] */ +static const int counts_diff_3970[64] ICONST_ATTR = +{ + 14824,13400,11124,8507,6139,4177,2755,1756, + 1104,677,415,248,150,89,54,31, + 19,11,7,4,2,1,1,1, + 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1 +}; + +static const int counts_3980[65] ICONST_ATTR = +{ + 0,19578,36160,48417,56323,60899,63265,64435, + 64971,65232,65351,65416,65447,65466,65476,65482, + 65485,65488,65490,65491,65492,65493,65494,65495, + 65496,65497,65498,65499,65500,65501,65502,65503, + 65504,65505,65506,65507,65508,65509,65510,65511, + 65512,65513,65514,65515,65516,65517,65518,65519, + 65520,65521,65522,65523,65524,65525,65526,65527, + 65528,65529,65530,65531,65532,65533,65534,65535, + 65536 +}; + +/* counts_diff_3980[i] = counts_3980[i+1] - counts_3980[i] */ + +static const int counts_diff_3980[64] ICONST_ATTR = +{ + 19578,16582,12257,7906,4576,2366,1170,536, + 261,119,65,31,19,10,6,3, + 3,2,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1 +}; + +/* + +Range decoder adapted from rangecod.c included in: + + http://www.compressconsult.com/rangecoder/rngcod13.zip + + rangecod.c range encoding + + (c) Michael Schindler + 1997, 1998, 1999, 2000 + http://www.compressconsult.com/ + michael@compressconsult.com + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + +The encoding functions were removed, and functions turned into "static +inline" functions. Some minor cosmetic changes were made (e.g. turning +pre-processor symbols into upper-case, removing the rc parameter from +each function (and the RNGC macro)). + +*/ + +/* BITSTREAM READING FUNCTIONS */ + +/* We deal with the input data one byte at a time - to ensure + functionality on CPUs of any endianness regardless of any requirements + for aligned reads. +*/ + +static unsigned char* bytebuffer IBSS_ATTR; +static int bytebufferoffset IBSS_ATTR; + +static inline void skip_byte(void) +{ + bytebufferoffset--; + bytebuffer += bytebufferoffset & 4; + bytebufferoffset &= 3; +} + +static inline int read_byte(void) +{ + int ch = bytebuffer[bytebufferoffset]; + + skip_byte(); + + return ch; +} + +/* RANGE DECODING FUNCTIONS */ + +/* SIZE OF RANGE ENCODING CODE VALUES. */ + +#define CODE_BITS 32 +#define TOP_VALUE ((unsigned int)1 << (CODE_BITS-1)) +#define SHIFT_BITS (CODE_BITS - 9) +#define EXTRA_BITS ((CODE_BITS-2) % 8 + 1) +#define BOTTOM_VALUE (TOP_VALUE >> 8) + +struct rangecoder_t +{ + uint32_t low; /* low end of interval */ + uint32_t range; /* length of interval */ + uint32_t help; /* bytes_to_follow resp. intermediate value */ + unsigned int buffer; /* buffer for input/output */ +}; + +static struct rangecoder_t rc IBSS_ATTR; + +/* Start the decoder */ +static inline void range_start_decoding(void) +{ + rc.buffer = read_byte(); + rc.low = rc.buffer >> (8 - EXTRA_BITS); + rc.range = (uint32_t) 1 << EXTRA_BITS; +} + +static inline void range_dec_normalize(void) +{ + while (rc.range <= BOTTOM_VALUE) + { + rc.buffer = (rc.buffer << 8) | read_byte(); + rc.low = (rc.low << 8) | ((rc.buffer >> 1) & 0xff); + rc.range <<= 8; + } +} + +/* Calculate culmulative frequency for next symbol. Does NO update!*/ +/* tot_f is the total frequency */ +/* or: totf is (code_value)1<<shift */ +/* returns the culmulative frequency */ +static inline int range_decode_culfreq(int tot_f) +{ + range_dec_normalize(); + rc.help = UDIV32(rc.range, tot_f); + return UDIV32(rc.low, rc.help); +} + +static inline int range_decode_culshift(int shift) +{ + range_dec_normalize(); + rc.help = rc.range >> shift; + return UDIV32(rc.low, rc.help); +} + + +/* Update decoding state */ +/* sy_f is the interval length (frequency of the symbol) */ +/* lt_f is the lower end (frequency sum of < symbols) */ +static inline void range_decode_update(int sy_f, int lt_f) +{ + rc.low -= rc.help * lt_f; + rc.range = rc.help * sy_f; +} + + +/* Decode a byte/short without modelling */ +static inline unsigned char decode_byte(void) +{ int tmp = range_decode_culshift(8); + range_decode_update( 1,tmp); + return tmp; +} + +static inline int short range_decode_short(void) +{ int tmp = range_decode_culshift(16); + range_decode_update( 1,tmp); + return tmp; +} + +/* Decode n bits (n <= 16) without modelling - based on range_decode_short */ +static inline int range_decode_bits(int n) +{ int tmp = range_decode_culshift(n); + range_decode_update( 1,tmp); + return tmp; +} + + +/* Finish decoding */ +static inline void range_done_decoding(void) +{ range_dec_normalize(); /* normalize to use up all bytes */ +} + +/* + range_get_symbol_* functions based on main decoding loop in simple_d.c from + http://www.compressconsult.com/rangecoder/rngcod13.zip + (c) Michael Schindler +*/ + +static inline int range_get_symbol_3980(void) +{ + int symbol, cf; + + cf = range_decode_culshift(16); + + /* figure out the symbol inefficiently; a binary search would be much better */ + for (symbol = 0; counts_3980[symbol+1] <= cf; symbol++); + + range_decode_update(counts_diff_3980[symbol],counts_3980[symbol]); + + return symbol; +} + +static inline int range_get_symbol_3970(void) +{ + int symbol, cf; + + cf = range_decode_culshift(16); + + /* figure out the symbol inefficiently; a binary search would be much better */ + for (symbol = 0; counts_3970[symbol+1] <= cf; symbol++); + + range_decode_update(counts_diff_3970[symbol],counts_3970[symbol]); + + return symbol; +} + +/* MAIN DECODING FUNCTIONS */ + +struct rice_t +{ + uint32_t k; + uint32_t ksum; +}; + +static struct rice_t riceX IBSS_ATTR; +static struct rice_t riceY IBSS_ATTR; + +static inline void update_rice(struct rice_t* rice, int x) +{ + rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5); + + if (UNLIKELY(rice->k == 0)) { + rice->k = 1; + } else { + uint32_t lim = 1 << (rice->k + 4); + if (UNLIKELY(rice->ksum < lim)) { + rice->k--; + } else if (UNLIKELY(rice->ksum >= 2 * lim)) { + rice->k++; + } + } +} + +static inline int entropy_decode3980(struct rice_t* rice) +{ + int base, x, pivot, overflow; + + pivot = rice->ksum >> 5; + if (UNLIKELY(pivot == 0)) + pivot=1; + + overflow = range_get_symbol_3980(); + + if (UNLIKELY(overflow == (MODEL_ELEMENTS-1))) { + overflow = range_decode_short() << 16; + overflow |= range_decode_short(); + } + + if (pivot >= 0x10000) { + /* Codepath for 24-bit streams */ + int nbits, lo_bits, base_hi, base_lo; + + /* Count the number of bits in pivot */ + nbits = 17; /* We know there must be at least 17 bits */ + while ((pivot >> nbits) > 0) { nbits++; } + + /* base_lo is the low (nbits-16) bits of base + base_hi is the high 16 bits of base + */ + lo_bits = (nbits - 16); + + base_hi = range_decode_culfreq((pivot >> lo_bits) + 1); + range_decode_update(1, base_hi); + + base_lo = range_decode_culshift(lo_bits); + range_decode_update(1, base_lo); + + base = (base_hi << lo_bits) + base_lo; + } else { + /* Codepath for 16-bit streams */ + base = range_decode_culfreq(pivot); + range_decode_update(1, base); + } + + x = base + (overflow * pivot); + update_rice(rice, x); + + /* Convert to signed */ + if (x & 1) + return (x >> 1) + 1; + else + return -(x >> 1); +} + + +static inline int entropy_decode3970(struct rice_t* rice) +{ + int x, tmpk; + + int overflow = range_get_symbol_3970(); + + if (UNLIKELY(overflow == (MODEL_ELEMENTS - 1))) { + tmpk = range_decode_bits(5); + overflow = 0; + } else { + tmpk = (rice->k < 1) ? 0 : rice->k - 1; + } + + if (tmpk <= 16) { + x = range_decode_bits(tmpk); + } else { + x = range_decode_short(); + x |= (range_decode_bits(tmpk - 16) << 16); + } + x += (overflow << tmpk); + + update_rice(rice, x); + + /* Convert to signed */ + if (x & 1) + return (x >> 1) + 1; + else + return -(x >> 1); +} + +void init_entropy_decoder(struct ape_ctx_t* ape_ctx, + unsigned char* inbuffer, int* firstbyte, + int* bytesconsumed) +{ + bytebuffer = inbuffer; + bytebufferoffset = *firstbyte; + + /* Read the CRC */ + ape_ctx->CRC = read_byte(); + ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte(); + ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte(); + ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte(); + + /* Read the frame flags if they exist */ + ape_ctx->frameflags = 0; + if ((ape_ctx->fileversion > 3820) && (ape_ctx->CRC & 0x80000000)) { + ape_ctx->CRC &= ~0x80000000; + + ape_ctx->frameflags = read_byte(); + ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte(); + ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte(); + ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte(); + } + /* Keep a count of the blocks decoded in this frame */ + ape_ctx->blocksdecoded = 0; + + /* Initialise the rice structs */ + riceX.k = 10; + riceX.ksum = (1 << riceX.k) * 16; + riceY.k = 10; + riceY.ksum = (1 << riceY.k) * 16; + + /* The first 8 bits of input are ignored. */ + skip_byte(); + + range_start_decoding(); + + /* Return the new state of the buffer */ + *bytesconsumed = (intptr_t)bytebuffer - (intptr_t)inbuffer; + *firstbyte = bytebufferoffset; +} + +void ICODE_ATTR_DEMAC entropy_decode(struct ape_ctx_t* ape_ctx, + unsigned char* inbuffer, int* firstbyte, + int* bytesconsumed, + int32_t* decoded0, int32_t* decoded1, + int blockstodecode) +{ + bytebuffer = inbuffer; + bytebufferoffset = *firstbyte; + + ape_ctx->blocksdecoded += blockstodecode; + + if ((ape_ctx->frameflags & APE_FRAMECODE_LEFT_SILENCE) + && ((ape_ctx->frameflags & APE_FRAMECODE_RIGHT_SILENCE) + || (decoded1 == NULL))) { + /* We are pure silence, just memset the output buffer. */ + memset(decoded0, 0, blockstodecode * sizeof(int32_t)); + if (decoded1 != NULL) + memset(decoded1, 0, blockstodecode * sizeof(int32_t)); + } else { + if (ape_ctx->fileversion > 3970) { + while (LIKELY(blockstodecode--)) { + *(decoded0++) = entropy_decode3980(&riceY); + if (decoded1 != NULL) + *(decoded1++) = entropy_decode3980(&riceX); + } + } else { + while (LIKELY(blockstodecode--)) { + *(decoded0++) = entropy_decode3970(&riceY); + if (decoded1 != NULL) + *(decoded1++) = entropy_decode3970(&riceX); + } + } + } + + if (ape_ctx->blocksdecoded == ape_ctx->currentframeblocks) + { + range_done_decoding(); + } + + /* Return the new state of the buffer */ + *bytesconsumed = bytebuffer - inbuffer; + *firstbyte = bytebufferoffset; +} diff --git a/plugins/demac/libdemac/entropy.h b/plugins/demac/libdemac/entropy.h new file mode 100644 index 00000000..ffef6211 --- /dev/null +++ b/plugins/demac/libdemac/entropy.h @@ -0,0 +1,40 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: entropy.h 19236 2008-11-26 18:01:18Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#ifndef _APE_ENTROPY_H +#define _APE_ENTROPY_H + +#include <inttypes.h> + +void init_entropy_decoder(struct ape_ctx_t* ape_ctx, + unsigned char* inbuffer, int* firstbyte, + int* bytesconsumed); + +void entropy_decode(struct ape_ctx_t* ape_ctx, + unsigned char* inbuffer, int* firstbyte, + int* bytesconsumed, + int32_t* decoded0, int32_t* decoded1, + int blockstodecode); + +#endif diff --git a/plugins/demac/libdemac/filter.c b/plugins/demac/libdemac/filter.c new file mode 100644 index 00000000..9f1abfb8 --- /dev/null +++ b/plugins/demac/libdemac/filter.c @@ -0,0 +1,252 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: filter.c 19556 2008-12-22 08:33:49Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#include <string.h> +#include <inttypes.h> + +#include "demac.h" +#include "filter.h" +#include "demac_config.h" + +#if FILTER_BITS == 32 + +#if defined(CPU_ARM) && (ARM_ARCH == 4) +#include "vector_math32_armv4.h" +#else +#include "vector_math_generic.h" +#endif + +#else /* FILTER_BITS == 16 */ + +#ifdef CPU_COLDFIRE +#include "vector_math16_cf.h" +#elif defined(CPU_ARM) && (ARM_ARCH >= 6) +#include "vector_math16_armv6.h" +#elif defined(CPU_ARM) && (ARM_ARCH >= 5) +/* Assume all our ARMv5 targets are ARMv5te(j) */ +#include "vector_math16_armv5te.h" +#else +#include "vector_math_generic.h" +#endif + +#endif /* FILTER_BITS */ + +struct filter_t { + filter_int* coeffs; /* ORDER entries */ + + /* We store all the filter delays in a single buffer */ + filter_int* history_end; + + filter_int* delay; + filter_int* adaptcoeffs; + + int avg; +}; + +/* We name the functions according to the ORDER and FRACBITS + pre-processor symbols and build multiple .o files from this .c file + - this increases code-size but gives the compiler more scope for + optimising the individual functions, as well as replacing a lot of + variables with constants. +*/ + +#if FRACBITS == 11 + #if ORDER == 16 + #define INIT_FILTER init_filter_16_11 + #define APPLY_FILTER apply_filter_16_11 + #elif ORDER == 64 + #define INIT_FILTER init_filter_64_11 + #define APPLY_FILTER apply_filter_64_11 + #endif +#elif FRACBITS == 13 + #define INIT_FILTER init_filter_256_13 + #define APPLY_FILTER apply_filter_256_13 +#elif FRACBITS == 10 + #define INIT_FILTER init_filter_32_10 + #define APPLY_FILTER apply_filter_32_10 +#elif FRACBITS == 15 + #define INIT_FILTER init_filter_1280_15 + #define APPLY_FILTER apply_filter_1280_15 +#endif + +/* Some macros to handle the fixed-point stuff */ + +/* Convert from (32-FRACBITS).FRACBITS fixed-point format to an + integer (rounding to nearest). */ +#define FP_HALF (1 << (FRACBITS - 1)) /* 0.5 in fixed-point format. */ +#define FP_TO_INT(x) ((x + FP_HALF) >> FRACBITS) /* round(x) */ + +#if defined(CPU_ARM) && (ARM_ARCH >= 6) +#define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; }) +#else +#define SATURATE(x) (LIKELY((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF) +#endif + +/* Apply the filter with state f to count entries in data[] */ + +static void ICODE_ATTR_DEMAC do_apply_filter_3980(struct filter_t* f, + int32_t* data, int count) +{ + int res; + int absres; + +#ifdef PREPARE_SCALARPRODUCT + PREPARE_SCALARPRODUCT +#endif + + while(LIKELY(count--)) + { + res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER)); + + if (LIKELY(*data != 0)) { + if (*data < 0) + vector_add(f->coeffs, f->adaptcoeffs - ORDER); + else + vector_sub(f->coeffs, f->adaptcoeffs - ORDER); + } + + res += *data; + + *data++ = res; + + /* Update the output history */ + *f->delay++ = SATURATE(res); + + /* Version 3.98 and later files */ + + /* Update the adaption coefficients */ + absres = (res < 0 ? -res : res); + + if (UNLIKELY(absres > 3 * f->avg)) + *f->adaptcoeffs = ((res >> 25) & 64) - 32; + else if (3 * absres > 4 * f->avg) + *f->adaptcoeffs = ((res >> 26) & 32) - 16; + else if (LIKELY(absres > 0)) + *f->adaptcoeffs = ((res >> 27) & 16) - 8; + else + *f->adaptcoeffs = 0; + + f->avg += (absres - f->avg) / 16; + + f->adaptcoeffs[-1] >>= 1; + f->adaptcoeffs[-2] >>= 1; + f->adaptcoeffs[-8] >>= 1; + + f->adaptcoeffs++; + + /* Have we filled the history buffer? */ + if (UNLIKELY(f->delay == f->history_end)) { + memmove(f->coeffs + ORDER, f->delay - (ORDER*2), + (ORDER*2) * sizeof(filter_int)); + f->adaptcoeffs = f->coeffs + ORDER*2; + f->delay = f->coeffs + ORDER*3; + } + } +} + +static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f, + int32_t* data, int count) +{ + int res; + +#ifdef PREPARE_SCALARPRODUCT + PREPARE_SCALARPRODUCT +#endif + + while(LIKELY(count--)) + { + res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER)); + + if (LIKELY(*data != 0)) { + if (*data < 0) + vector_add(f->coeffs, f->adaptcoeffs - ORDER); + else + vector_sub(f->coeffs, f->adaptcoeffs - ORDER); + } + + /* Convert res from (32-FRACBITS).FRACBITS fixed-point format to an + integer (rounding to nearest) and add the input value to + it */ + res += *data; + + *data++ = res; + + /* Update the output history */ + *f->delay++ = SATURATE(res); + + /* Version ??? to < 3.98 files (untested) */ + f->adaptcoeffs[0] = (res == 0) ? 0 : ((res >> 28) & 8) - 4; + f->adaptcoeffs[-4] >>= 1; + f->adaptcoeffs[-8] >>= 1; + + f->adaptcoeffs++; + + /* Have we filled the history buffer? */ + if (UNLIKELY(f->delay == f->history_end)) { + memmove(f->coeffs + ORDER, f->delay - (ORDER*2), + (ORDER*2) * sizeof(filter_int)); + f->adaptcoeffs = f->coeffs + ORDER*2; + f->delay = f->coeffs + ORDER*3; + } + } +} + +static struct filter_t filter0 IBSS_ATTR; +static struct filter_t filter1 IBSS_ATTR; + +static void do_init_filter(struct filter_t* f, filter_int* buf) +{ + f->coeffs = buf; + f->history_end = buf + ORDER*3 + FILTER_HISTORY_SIZE; + + /* Init pointers */ + f->adaptcoeffs = f->coeffs + ORDER*2; + f->delay = f->coeffs + ORDER*3; + + /* Zero coefficients and history buffer */ + memset(f->coeffs, 0, ORDER*3 * sizeof(filter_int)); + + /* Zero the running average */ + f->avg = 0; +} + +void INIT_FILTER(filter_int* buf) +{ + do_init_filter(&filter0, buf); + do_init_filter(&filter1, buf + ORDER*3 + FILTER_HISTORY_SIZE); +} + +void ICODE_ATTR_DEMAC APPLY_FILTER(int fileversion, int32_t* data0, + int32_t* data1, int count) +{ + if (fileversion >= 3980) { + do_apply_filter_3980(&filter0, data0, count); + if (data1 != NULL) + do_apply_filter_3980(&filter1, data1, count); + } else { + do_apply_filter_3970(&filter0, data0, count); + if (data1 != NULL) + do_apply_filter_3970(&filter1, data1, count); + } +} diff --git a/plugins/demac/libdemac/filter.h b/plugins/demac/libdemac/filter.h new file mode 100644 index 00000000..946c76c5 --- /dev/null +++ b/plugins/demac/libdemac/filter.h @@ -0,0 +1,50 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: filter.h 19236 2008-11-26 18:01:18Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#ifndef _APE_FILTER_H +#define _APE_FILTER_H + +#include "demac_config.h" + +void init_filter_16_11(filter_int* buf); +void apply_filter_16_11(int fileversion, int32_t* decoded0, + int32_t* decoded1, int count); + +void init_filter_64_11(filter_int* buf); +void apply_filter_64_11(int fileversion, int32_t* decoded0, + int32_t* decoded1, int count); + +void init_filter_32_10(filter_int* buf); +void apply_filter_32_10(int fileversion, int32_t* decoded0, + int32_t* decoded1, int count); + +void init_filter_256_13(filter_int* buf); +void apply_filter_256_13(int fileversion, int32_t* decoded0, + int32_t* decoded1, int count); + +void init_filter_1280_15(filter_int* buf); +void apply_filter_1280_15(int fileversion, int32_t* decoded0, + int32_t* decoded1, int count); + +#endif diff --git a/plugins/demac/libdemac/filter_1280_15.c b/plugins/demac/libdemac/filter_1280_15.c new file mode 100644 index 00000000..d7916385 --- /dev/null +++ b/plugins/demac/libdemac/filter_1280_15.c @@ -0,0 +1,27 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: filter_1280_15.c 19743 2009-01-10 21:10:56Z zagor $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#define ORDER 1280 +#define FRACBITS 15 +#include "filter.c" diff --git a/plugins/demac/libdemac/filter_16_11.c b/plugins/demac/libdemac/filter_16_11.c new file mode 100644 index 00000000..b33e09eb --- /dev/null +++ b/plugins/demac/libdemac/filter_16_11.c @@ -0,0 +1,27 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: filter_16_11.c 19743 2009-01-10 21:10:56Z zagor $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#define ORDER 16 +#define FRACBITS 11 +#include "filter.c" diff --git a/plugins/demac/libdemac/filter_256_13.c b/plugins/demac/libdemac/filter_256_13.c new file mode 100644 index 00000000..2dd9d06d --- /dev/null +++ b/plugins/demac/libdemac/filter_256_13.c @@ -0,0 +1,27 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: filter_256_13.c 19743 2009-01-10 21:10:56Z zagor $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#define ORDER 256 +#define FRACBITS 13 +#include "filter.c" diff --git a/plugins/demac/libdemac/filter_32_10.c b/plugins/demac/libdemac/filter_32_10.c new file mode 100644 index 00000000..bdafb876 --- /dev/null +++ b/plugins/demac/libdemac/filter_32_10.c @@ -0,0 +1,27 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: filter_32_10.c 19743 2009-01-10 21:10:56Z zagor $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#define ORDER 32 +#define FRACBITS 10 +#include "filter.c" diff --git a/plugins/demac/libdemac/filter_64_11.c b/plugins/demac/libdemac/filter_64_11.c new file mode 100644 index 00000000..26c85f19 --- /dev/null +++ b/plugins/demac/libdemac/filter_64_11.c @@ -0,0 +1,27 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: filter_64_11.c 19743 2009-01-10 21:10:56Z zagor $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#define ORDER 64 +#define FRACBITS 11 +#include "filter.c" diff --git a/plugins/demac/libdemac/parser.c b/plugins/demac/libdemac/parser.c new file mode 100644 index 00000000..948640d7 --- /dev/null +++ b/plugins/demac/libdemac/parser.c @@ -0,0 +1,402 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: parser.c 13601 2007-06-09 00:58:15Z dave $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#include <inttypes.h> +#include <string.h> +#ifndef ROCKBOX +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#endif + +#include "parser.h" + +#ifdef APE_MAX +#undef APE_MAX +#endif +#define APE_MAX(a,b) ((a)>(b)?(a):(b)) + + +static inline int16_t get_int16(unsigned char* buf) +{ + return(buf[0] | (buf[1] << 8)); +} + +static inline uint16_t get_uint16(unsigned char* buf) +{ + return(buf[0] | (buf[1] << 8)); +} + +static inline uint32_t get_uint32(unsigned char* buf) +{ + return(buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24)); +} + + +int ape_parseheaderbuf(unsigned char* buf, struct ape_ctx_t* ape_ctx) +{ + unsigned char* header; + + memset(ape_ctx,0,sizeof(struct ape_ctx_t)); + /* TODO: Skip any leading junk such as id3v2 tags */ + ape_ctx->junklength = 0; + + memcpy(ape_ctx->magic, buf, 4); + if (memcmp(ape_ctx->magic,"MAC ",4)!=0) + { + return -1; + } + + ape_ctx->fileversion = get_int16(buf + 4); + + if (ape_ctx->fileversion >= 3980) + { + ape_ctx->padding1 = get_int16(buf + 6); + ape_ctx->descriptorlength = get_uint32(buf + 8); + ape_ctx->headerlength = get_uint32(buf + 12); + ape_ctx->seektablelength = get_uint32(buf + 16); + ape_ctx->wavheaderlength = get_uint32(buf + 20); + ape_ctx->audiodatalength = get_uint32(buf + 24); + ape_ctx->audiodatalength_high = get_uint32(buf + 28); + ape_ctx->wavtaillength = get_uint32(buf + 32); + memcpy(ape_ctx->md5, buf + 36, 16); + + header = buf + ape_ctx->descriptorlength; + + /* Read header data */ + ape_ctx->compressiontype = get_uint16(header + 0); + ape_ctx->formatflags = get_uint16(header + 2); + ape_ctx->blocksperframe = get_uint32(header + 4); + ape_ctx->finalframeblocks = get_uint32(header + 8); + ape_ctx->totalframes = get_uint32(header + 12); + ape_ctx->bps = get_uint16(header + 16); + ape_ctx->channels = get_uint16(header + 18); + ape_ctx->samplerate = get_uint32(header + 20); + + ape_ctx->seektablefilepos = ape_ctx->junklength + + ape_ctx->descriptorlength + + ape_ctx->headerlength; + + ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->descriptorlength + + ape_ctx->headerlength + ape_ctx->seektablelength + + ape_ctx->wavheaderlength; + } else { + ape_ctx->headerlength = 32; + ape_ctx->compressiontype = get_uint16(buf + 6); + ape_ctx->formatflags = get_uint16(buf + 8); + ape_ctx->channels = get_uint16(buf + 10); + ape_ctx->samplerate = get_uint32(buf + 12); + ape_ctx->wavheaderlength = get_uint32(buf + 16); + ape_ctx->totalframes = get_uint32(buf + 24); + ape_ctx->finalframeblocks = get_uint32(buf + 28); + + if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_PEAK_LEVEL) + { + ape_ctx->headerlength += 4; + } + + if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS) + { + ape_ctx->seektablelength = get_uint32(buf + ape_ctx->headerlength); + ape_ctx->seektablelength *= sizeof(int32_t); + ape_ctx->headerlength += 4; + } else { + ape_ctx->seektablelength = ape_ctx->totalframes * sizeof(int32_t); + } + + if (ape_ctx->formatflags & MAC_FORMAT_FLAG_8_BIT) + ape_ctx->bps = 8; + else if (ape_ctx->formatflags & MAC_FORMAT_FLAG_24_BIT) + ape_ctx->bps = 24; + else + ape_ctx->bps = 16; + + if (ape_ctx->fileversion >= 3950) + ape_ctx->blocksperframe = 73728 * 4; + else if ((ape_ctx->fileversion >= 3900) || (ape_ctx->fileversion >= 3800 && ape_ctx->compressiontype >= 4000)) + ape_ctx->blocksperframe = 73728; + else + ape_ctx->blocksperframe = 9216; + + ape_ctx->seektablefilepos = ape_ctx->junklength + ape_ctx->headerlength + + ape_ctx->wavheaderlength; + + ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->headerlength + + ape_ctx->wavheaderlength + ape_ctx->seektablelength; + } + + ape_ctx->totalsamples = ape_ctx->finalframeblocks; + if (ape_ctx->totalframes > 1) + ape_ctx->totalsamples += ape_ctx->blocksperframe * (ape_ctx->totalframes-1); + + ape_ctx->numseekpoints = APE_MAX(ape_ctx->maxseekpoints, + ape_ctx->seektablelength / sizeof(int32_t)); + + return 0; +} + + +#ifndef ROCKBOX +/* Helper functions */ + +static int read_uint16(int fd, uint16_t* x) +{ + unsigned char tmp[2]; + int n; + + n = read(fd,tmp,2); + + if (n != 2) + return -1; + + *x = tmp[0] | (tmp[1] << 8); + + return 0; +} + +static int read_int16(int fd, int16_t* x) +{ + return read_uint16(fd, (uint16_t*)x); +} + +static int read_uint32(int fd, uint32_t* x) +{ + unsigned char tmp[4]; + int n; + + n = read(fd,tmp,4); + + if (n != 4) + return -1; + + *x = tmp[0] | (tmp[1] << 8) | (tmp[2] << 16) | (tmp[3] << 24); + + return 0; +} + +int ape_parseheader(int fd, struct ape_ctx_t* ape_ctx) +{ + int i,n; + + /* TODO: Skip any leading junk such as id3v2 tags */ + ape_ctx->junklength = 0; + + lseek(fd,ape_ctx->junklength,SEEK_SET); + + n = read(fd,&ape_ctx->magic,4); + if (n != 4) return -1; + + if (memcmp(ape_ctx->magic,"MAC ",4)!=0) + { + return -1; + } + + if (read_int16(fd,&ape_ctx->fileversion) < 0) + return -1; + + if (ape_ctx->fileversion >= 3980) + { + if (read_int16(fd,&ape_ctx->padding1) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->descriptorlength) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->headerlength) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->seektablelength) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->wavheaderlength) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->audiodatalength) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->audiodatalength_high) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->wavtaillength) < 0) + return -1; + if (read(fd,&ape_ctx->md5,16) != 16) + return -1; + + /* Skip any unknown bytes at the end of the descriptor. This is for future + compatibility */ + if (ape_ctx->descriptorlength > 52) + lseek(fd,ape_ctx->descriptorlength - 52, SEEK_CUR); + + /* Read header data */ + if (read_uint16(fd,&ape_ctx->compressiontype) < 0) + return -1; + if (read_uint16(fd,&ape_ctx->formatflags) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->blocksperframe) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->finalframeblocks) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->totalframes) < 0) + return -1; + if (read_uint16(fd,&ape_ctx->bps) < 0) + return -1; + if (read_uint16(fd,&ape_ctx->channels) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->samplerate) < 0) + return -1; + } else { + ape_ctx->descriptorlength = 0; + ape_ctx->headerlength = 32; + + if (read_uint16(fd,&ape_ctx->compressiontype) < 0) + return -1; + if (read_uint16(fd,&ape_ctx->formatflags) < 0) + return -1; + if (read_uint16(fd,&ape_ctx->channels) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->samplerate) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->wavheaderlength) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->wavtaillength) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->totalframes) < 0) + return -1; + if (read_uint32(fd,&ape_ctx->finalframeblocks) < 0) + return -1; + + if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_PEAK_LEVEL) + { + lseek(fd, 4, SEEK_CUR); /* Skip the peak level */ + ape_ctx->headerlength += 4; + } + + if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS) + { + if (read_uint32(fd,&ape_ctx->seektablelength) < 0) + return -1; + ape_ctx->headerlength += 4; + ape_ctx->seektablelength *= sizeof(int32_t); + } else { + ape_ctx->seektablelength = ape_ctx->totalframes * sizeof(int32_t); + } + + if (ape_ctx->formatflags & MAC_FORMAT_FLAG_8_BIT) + ape_ctx->bps = 8; + else if (ape_ctx->formatflags & MAC_FORMAT_FLAG_24_BIT) + ape_ctx->bps = 24; + else + ape_ctx->bps = 16; + + if (ape_ctx->fileversion >= 3950) + ape_ctx->blocksperframe = 73728 * 4; + else if ((ape_ctx->fileversion >= 3900) || (ape_ctx->fileversion >= 3800 && ape_ctx->compressiontype >= 4000)) + ape_ctx->blocksperframe = 73728; + else + ape_ctx->blocksperframe = 9216; + + /* Skip any stored wav header */ + if (!(ape_ctx->formatflags & MAC_FORMAT_FLAG_CREATE_WAV_HEADER)) + { + lseek(fd, ape_ctx->wavheaderlength, SEEK_CUR); + } + } + + ape_ctx->totalsamples = ape_ctx->finalframeblocks; + if (ape_ctx->totalframes > 1) + ape_ctx->totalsamples += ape_ctx->blocksperframe * (ape_ctx->totalframes-1); + + if (ape_ctx->seektablelength > 0) + { + ape_ctx->seektable = malloc(ape_ctx->seektablelength); + if (ape_ctx->seektable == NULL) + return -1; + for (i=0; i < ape_ctx->seektablelength / sizeof(uint32_t); i++) + { + if (read_uint32(fd,&ape_ctx->seektable[i]) < 0) + { + free(ape_ctx->seektable); + return -1; + } + } + } + + ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->descriptorlength + + ape_ctx->headerlength + ape_ctx->seektablelength + + ape_ctx->wavheaderlength; + + return 0; +} + +void ape_dumpinfo(struct ape_ctx_t* ape_ctx) +{ + int i; + + printf("Descriptor Block:\n\n"); + printf("magic = \"%c%c%c%c\"\n", + ape_ctx->magic[0],ape_ctx->magic[1], + ape_ctx->magic[2],ape_ctx->magic[3]); + printf("fileversion = %d\n",ape_ctx->fileversion); + printf("descriptorlength = %d\n",ape_ctx->descriptorlength); + printf("headerlength = %d\n",ape_ctx->headerlength); + printf("seektablelength = %d\n",ape_ctx->seektablelength); + printf("wavheaderlength = %d\n",ape_ctx->wavheaderlength); + printf("audiodatalength = %d\n",ape_ctx->audiodatalength); + printf("audiodatalength_high = %d\n",ape_ctx->audiodatalength_high); + printf("wavtaillength = %d\n",ape_ctx->wavtaillength); + printf("md5 = "); + for (i = 0; i < 16; i++) + printf("%02x",ape_ctx->md5[i]); + printf("\n"); + + printf("\nHeader Block:\n\n"); + + printf("compressiontype = %d\n",ape_ctx->compressiontype); + printf("formatflags = %d\n",ape_ctx->formatflags); + printf("blocksperframe = %d\n",ape_ctx->blocksperframe); + printf("finalframeblocks = %d\n",ape_ctx->finalframeblocks); + printf("totalframes = %d\n",ape_ctx->totalframes); + printf("bps = %d\n",ape_ctx->bps); + printf("channels = %d\n",ape_ctx->channels); + printf("samplerate = %d\n",ape_ctx->samplerate); + + printf("\nSeektable\n\n"); + if ((ape_ctx->seektablelength / sizeof(uint32_t)) != ape_ctx->totalframes) + { + printf("No seektable\n"); + } + else + { + for ( i = 0; i < ape_ctx->seektablelength / sizeof(uint32_t) ; i++) + { + if (i < ape_ctx->totalframes-1) { + printf("%8d %d (%d bytes)\n",i,ape_ctx->seektable[i],ape_ctx->seektable[i+1]-ape_ctx->seektable[i]); + } else { + printf("%8d %d\n",i,ape_ctx->seektable[i]); + } + } + } + printf("\nCalculated information:\n\n"); + printf("junklength = %d\n",ape_ctx->junklength); + printf("firstframe = %d\n",ape_ctx->firstframe); + printf("totalsamples = %d\n",ape_ctx->totalsamples); +} + +#endif /* !ROCKBOX */ diff --git a/plugins/demac/libdemac/parser.h b/plugins/demac/libdemac/parser.h new file mode 100644 index 00000000..0cfa665d --- /dev/null +++ b/plugins/demac/libdemac/parser.h @@ -0,0 +1,137 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: parser.h 19552 2008-12-21 23:49:02Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#ifndef _APE_PARSER_H +#define _APE_PARSER_H + +#include <inttypes.h> +#include "demac_config.h" + +/* The earliest and latest file formats supported by this library */ +#define APE_MIN_VERSION 3970 +#define APE_MAX_VERSION 3990 + +#define MAC_FORMAT_FLAG_8_BIT 1 // is 8-bit [OBSOLETE] +#define MAC_FORMAT_FLAG_CRC 2 // uses the new CRC32 error detection [OBSOLETE] +#define MAC_FORMAT_FLAG_HAS_PEAK_LEVEL 4 // uint32 nPeakLevel after the header [OBSOLETE] +#define MAC_FORMAT_FLAG_24_BIT 8 // is 24-bit [OBSOLETE] +#define MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS 16 // has the number of seek elements after the peak level +#define MAC_FORMAT_FLAG_CREATE_WAV_HEADER 32 // create the wave header on decompression (not stored) + + +/* Special frame codes: + + MONO_SILENCE - All PCM samples in frame are zero (mono streams only) + LEFT_SILENCE - All PCM samples for left channel in frame are zero (stereo streams) + RIGHT_SILENCE - All PCM samples for left channel in frame are zero (stereo streams) + PSEUDO_STEREO - Left and Right channels are identical + +*/ + +#define APE_FRAMECODE_MONO_SILENCE 1 +#define APE_FRAMECODE_LEFT_SILENCE 1 /* same as mono */ +#define APE_FRAMECODE_RIGHT_SILENCE 2 +#define APE_FRAMECODE_STEREO_SILENCE 3 /* combined */ +#define APE_FRAMECODE_PSEUDO_STEREO 4 + +#define PREDICTOR_ORDER 8 +/* Total size of all predictor histories - 50 * sizeof(int32_t) */ +#define PREDICTOR_SIZE 50 + + +/* NOTE: This struct is used in predictor-arm.S - any updates need to + be reflected there. */ + +struct predictor_t +{ + /* Filter histories */ + int32_t* buf; + + int32_t YlastA; + int32_t XlastA; + + /* NOTE: The order of the next four fields is important for + predictor-arm.S */ + int32_t YfilterB; + int32_t XfilterA; + int32_t XfilterB; + int32_t YfilterA; + + /* Adaption co-efficients */ + int32_t YcoeffsA[4]; + int32_t XcoeffsA[4]; + int32_t YcoeffsB[5]; + int32_t XcoeffsB[5]; + int32_t historybuffer[PREDICTOR_HISTORY_SIZE + PREDICTOR_SIZE]; +}; + +struct ape_ctx_t +{ + /* Derived fields */ + uint32_t junklength; + uint32_t firstframe; + uint32_t totalsamples; + + /* Info from Descriptor Block */ + char magic[4]; + int16_t fileversion; + int16_t padding1; + uint32_t descriptorlength; + uint32_t headerlength; + uint32_t seektablelength; + uint32_t wavheaderlength; + uint32_t audiodatalength; + uint32_t audiodatalength_high; + uint32_t wavtaillength; + uint8_t md5[16]; + + /* Info from Header Block */ + uint16_t compressiontype; + uint16_t formatflags; + uint32_t blocksperframe; + uint32_t finalframeblocks; + uint32_t totalframes; + uint16_t bps; + uint16_t channels; + uint32_t samplerate; + + /* Seektable */ + uint32_t* seektable; /* Seektable buffer */ + uint32_t maxseekpoints; /* Max seekpoints we can store (size of seektable buffer) */ + uint32_t numseekpoints; /* Number of seekpoints */ + int seektablefilepos; /* Location in .ape file of seektable */ + + /* Decoder state */ + uint32_t CRC; + int frameflags; + int currentframeblocks; + int blocksdecoded; + struct predictor_t predictor; +}; + +int ape_parseheader(int fd, struct ape_ctx_t* ape_ctx); +int ape_parseheaderbuf(unsigned char* buf, struct ape_ctx_t* ape_ctx); +void ape_dumpinfo(struct ape_ctx_t* ape_ctx); + +#endif diff --git a/plugins/demac/libdemac/predictor-arm.S b/plugins/demac/libdemac/predictor-arm.S new file mode 100644 index 00000000..bfb96738 --- /dev/null +++ b/plugins/demac/libdemac/predictor-arm.S @@ -0,0 +1,694 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: predictor-arm.S 21916 2009-07-17 09:17:54Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ +#include "demac_config.h" + + .section ICODE_SECTION_DEMAC_ARM,"ax",%progbits + + .align 2 + +/* NOTE: The following need to be kept in sync with parser.h */ + +#define YDELAYA 200 +#define YDELAYB 168 +#define XDELAYA 136 +#define XDELAYB 104 +#define YADAPTCOEFFSA 72 +#define XADAPTCOEFFSA 56 +#define YADAPTCOEFFSB 40 +#define XADAPTCOEFFSB 20 + +/* struct predictor_t members: */ +#define buf 0 /* int32_t* buf */ + +#define YlastA 4 /* int32_t YlastA; */ +#define XlastA 8 /* int32_t XlastA; */ + +#define YfilterB 12 /* int32_t YfilterB; */ +#define XfilterA 16 /* int32_t XfilterA; */ + +#define XfilterB 20 /* int32_t XfilterB; */ +#define YfilterA 24 /* int32_t YfilterA; */ + +#define YcoeffsA 28 /* int32_t YcoeffsA[4]; */ +#define XcoeffsA 44 /* int32_t XcoeffsA[4]; */ +#define YcoeffsB 60 /* int32_t YcoeffsB[5]; */ +#define XcoeffsB 80 /* int32_t XcoeffsB[5]; */ + +#define historybuffer 100 /* int32_t historybuffer[] */ + +@ Macro for loading 2 registers, for various ARM versions. +@ Registers must start with an even register, and must be consecutive. + +.macro LDR2OFS reg1, reg2, base, offset +#if ARM_ARCH >= 6 + ldrd \reg1, [\base, \offset] +#else /* ARM_ARCH < 6 */ +#ifdef CPU_ARM7TDMI + add \reg1, \base, \offset + ldmia \reg1, {\reg1, \reg2} +#else /* ARM9 (v4 and v5) is faster this way */ + ldr \reg1, [\base, \offset] + ldr \reg2, [\base, \offset+4] +#endif +#endif /* ARM_ARCH */ +.endm + +@ Macro for storing 2 registers, for various ARM versions. +@ Registers must start with an even register, and must be consecutive. + +.macro STR2OFS reg1, reg2, base, offset +#if ARM_ARCH >= 6 + strd \reg1, [\base, \offset] +#else + str \reg1, [\base, \offset] + str \reg2, [\base, \offset+4] +#endif +.endm + + .global predictor_decode_stereo + .type predictor_decode_stereo,%function + +@ Register usage: +@ +@ r0-r11 - scratch +@ r12 - struct predictor_t* p +@ r14 - int32_t* p->buf + +@ void predictor_decode_stereo(struct predictor_t* p, +@ int32_t* decoded0, +@ int32_t* decoded1, +@ int count) + +predictor_decode_stereo: + stmdb sp!, {r1-r11, lr} + + @ r1 (decoded0) is [sp] + @ r2 (decoded1) is [sp, #4] + @ r3 (count) is [sp, #8] + + mov r12, r0 @ r12 := p + ldr r14, [r0] @ r14 := p->buf + +loop: + +@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR Y + +@ Predictor Y, Filter A + + ldr r11, [r12, #YlastA] @ r11 := p->YlastA + + add r2, r14, #YDELAYA-12 @ r2 := &p->buf[YDELAYA-3] + ldmia r2, {r2, r3, r10} @ r2 := p->buf[YDELAYA-3] + @ r3 := p->buf[YDELAYA-2] + @ r10 := p->buf[YDELAYA-1] + + add r6, r12, #YcoeffsA + ldmia r6, {r6 - r9} @ r6 := p->YcoeffsA[0] + @ r7 := p->YcoeffsA[1] + @ r8 := p->YcoeffsA[2] + @ r9 := p->YcoeffsA[3] + + subs r10, r11, r10 @ r10 := r11 - r10 + + STR2OFS r10, r11, r14, #YDELAYA-4 + @ p->buf[YDELAYA-1] = r10 + @ p->buf[YDELAYA] = r11 + + mul r0, r11, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0] + mla r0, r10, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] + mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] + mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] + + @ flags were set above, in the subs instruction + mvngt r10, #0 + movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) + + cmp r11, #0 + mvngt r11, #0 + movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) + + STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4 + @ p->buf[YADAPTCOEFFSA-1] := r10 + @ p->buf[YADAPTCOEFFSA] := r11 + + @ NOTE: r0 now contains predictionA - don't overwrite. + +@ Predictor Y, Filter B + + LDR2OFS r6, r7, r12, #YfilterB @ r6 := p->YfilterB + @ r7 := p->XfilterA + + add r2, r14, #YDELAYB-16 @ r2 := &p->buf[YDELAYB-4] + ldmia r2, {r2 - r4, r10} @ r2 := p->buf[YDELAYB-4] + @ r3 := p->buf[YDELAYB-3] + @ r4 := p->buf[YDELAYB-2] + @ r10 := p->buf[YDELAYB-1] + + rsb r6, r6, r6, lsl #5 @ r6 := r6 * 32 - r6 ( == r6*31) + sub r11, r7, r6, asr #5 @ r11 (p->buf[YDELAYB]) := r7 - (r6 >> 5) + + str r7, [r12, #YfilterB] @ p->YfilterB := r7 (p->XfilterA) + + add r5, r12, #YcoeffsB + ldmia r5, {r5 - r9} @ r5 := p->YcoeffsB[0] + @ r6 := p->YcoeffsB[1] + @ r7 := p->YcoeffsB[2] + @ r8 := p->YcoeffsB[3] + @ r9 := p->YcoeffsB[4] + + subs r10, r11, r10 @ r10 := r11 - r10 + + STR2OFS r10, r11, r14, #YDELAYB-4 + @ p->buf[YDELAYB-1] = r10 + @ p->buf[YDELAYB] = r11 + + mul r1, r11, r5 @ r1 := p->buf[YDELAYB] * p->YcoeffsB[0] + mla r1, r10, r6, r1 @ r1 += p->buf[YDELAYB-1] * p->YcoeffsB[1] + mla r1, r4, r7, r1 @ r1 += p->buf[YDELAYB-2] * p->YcoeffsB[2] + mla r1, r3, r8, r1 @ r1 += p->buf[YDELAYB-3] * p->YcoeffsB[3] + mla r1, r2, r9, r1 @ r1 += p->buf[YDELAYB-4] * p->YcoeffsB[4] + + @ flags were set above, in the subs instruction + mvngt r10, #0 + movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) + + cmp r11, #0 + mvngt r11, #0 + movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) + + STR2OFS r10, r11, r14, #YADAPTCOEFFSB-4 + @ p->buf[YADAPTCOEFFSB-1] := r10 + @ p->buf[YADAPTCOEFFSB] := r11 + + @ r0 still contains predictionA + @ r1 contains predictionB + + @ Finish Predictor Y + + ldr r2, [sp] @ r2 := decoded0 + add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1) + ldr r4, [r12, #YfilterA] @ r4 := p->YfilterA + ldr r3, [r2] @ r3 := *decoded0 + rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31) + add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10) + str r1, [r12, #YlastA] @ p->YlastA := r1 + add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5) + str r1, [r12, #YfilterA] @ p->YfilterA := r1 + + @ r1 contains p->YfilterA + @ r2 contains decoded0 + @ r3 contains *decoded0 + + @ r5, r6, r7, r8, r9 contain p->YcoeffsB[0..4] + @ r10, r11 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB] + + str r1, [r2], #4 @ *(decoded0++) := r1 (p->YfilterA) + str r2, [sp] @ save decoded0 + cmp r3, #0 + beq 3f + + add r2, r14, #YADAPTCOEFFSB-16 + ldmia r2, {r2 - r4} @ r2 := p->buf[YADAPTCOEFFSB-4] + @ r3 := p->buf[YADAPTCOEFFSB-3] + @ r4 := p->buf[YADAPTCOEFFSB-2] + blt 1f + + @ *decoded0 > 0 + + sub r5, r5, r11 @ r5 := p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] + sub r6, r6, r10 @ r6 := p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] + sub r9, r9, r2 @ r9 := p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] + sub r8, r8, r3 @ r8 := p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] + sub r7, r7, r4 @ r7 := p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] + + add r0, r12, #YcoeffsB + stmia r0, {r5 - r9} @ Save p->YcoeffsB[] + + add r1, r12, #YcoeffsA + ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0] + @ r3 := p->YcoeffsA[1] + @ r4 := p->YcoeffsA[2] + @ r5 := p->YcoeffsA[3] + + add r6, r14, #YADAPTCOEFFSA-12 + ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3] + @ r7 := p->buf[YADAPTCOEFFSA-2] + @ r8 := p->buf[YADAPTCOEFFSA-1] + @ r9 := p->buf[YADAPTCOEFFSA] + + sub r5, r5, r6 @ r5 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] + sub r4, r4, r7 @ r4 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] + sub r3, r3, r8 @ r3 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] + sub r2, r2, r9 @ r2 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] + + b 2f + + +1: @ *decoded0 < 0 + + add r5, r5, r11 @ r5 := p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] + add r6, r6, r10 @ r6 := p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] + add r9, r9, r2 @ r9 := p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] + add r8, r8, r3 @ r9 := p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] + add r7, r7, r4 @ r8 := p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] + + add r0, r12, #YcoeffsB + stmia r0, {r5 - r9} @ Save p->YcoeffsB[] + + add r1, r12, #YcoeffsA + ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0] + @ r3 := p->YcoeffsA[1] + @ r4 := p->YcoeffsA[2] + @ r5 := p->YcoeffsA[3] + + add r6, r14, #YADAPTCOEFFSA-12 + ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3] + @ r7 := p->buf[YADAPTCOEFFSA-2] + @ r8 := p->buf[YADAPTCOEFFSA-1] + @ r9 := p->buf[YADAPTCOEFFSA] + + add r5, r5, r6 @ r5 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] + add r4, r4, r7 @ r4 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] + add r3, r3, r8 @ r3 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] + add r2, r2, r9 @ r2 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] + +2: + stmia r1, {r2 - r5} @ Save p->YcoeffsA + +3: + +@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR X + +@ Predictor X, Filter A + + ldr r11, [r12, #XlastA] @ r11 := p->XlastA + + add r2, r14, #XDELAYA-12 @ r2 := &p->buf[XDELAYA-3] + ldmia r2, {r2, r3, r10} @ r2 := p->buf[XDELAYA-3] + @ r3 := p->buf[XDELAYA-2] + @ r10 := p->buf[XDELAYA-1] + + add r6, r12, #XcoeffsA + ldmia r6, {r6 - r9} @ r6 := p->XcoeffsA[0] + @ r7 := p->XcoeffsA[1] + @ r8 := p->XcoeffsA[2] + @ r9 := p->XcoeffsA[3] + + subs r10, r11, r10 @ r10 := r11 - r10 + + STR2OFS r10, r11, r14, #XDELAYA-4 + @ p->buf[XDELAYA-1] = r10 + @ p->buf[XDELAYA] = r11 + + mul r0, r11, r6 @ r0 := p->buf[XDELAYA] * p->XcoeffsA[0] + mla r0, r10, r7, r0 @ r0 += p->buf[XDELAYA-1] * p->XcoeffsA[1] + mla r0, r3, r8, r0 @ r0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] + mla r0, r2, r9, r0 @ r0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] + + @ flags were set above, in the subs instruction + mvngt r10, #0 + movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) + + cmp r11, #0 + mvngt r11, #0 + movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) + + STR2OFS r10, r11, r14, #XADAPTCOEFFSA-4 + @ p->buf[XADAPTCOEFFSA-1] := r10 + @ p->buf[XADAPTCOEFFSA] := r11 + + @ NOTE: r0 now contains predictionA - don't overwrite. + +@ Predictor X, Filter B + + LDR2OFS r6, r7, r12, #XfilterB @ r6 := p->XfilterB + @ r7 := p->YfilterA + + add r2, r14, #XDELAYB-16 @ r2 := &p->buf[XDELAYB-4] + ldmia r2, {r2 - r4, r10} @ r2 := p->buf[XDELAYB-4] + @ r3 := p->buf[XDELAYB-3] + @ r4 := p->buf[XDELAYB-2] + @ r10 := p->buf[XDELAYB-1] + + rsb r6, r6, r6, lsl #5 @ r6 := r2 * 32 - r6 ( == r6*31) + sub r11, r7, r6, asr #5 @ r11 (p->buf[XDELAYB]) := r7 - (r6 >> 5) + + str r7, [r12, #XfilterB] @ p->XfilterB := r7 (p->YfilterA) + + add r5, r12, #XcoeffsB + ldmia r5, {r5 - r9} @ r5 := p->XcoeffsB[0] + @ r6 := p->XcoeffsB[1] + @ r7 := p->XcoeffsB[2] + @ r8 := p->XcoeffsB[3] + @ r9 := p->XcoeffsB[4] + + subs r10, r11, r10 @ r10 := r11 - r10 + + STR2OFS r10, r11, r14, #XDELAYB-4 + @ p->buf[XDELAYB-1] = r10 + @ p->buf[XDELAYB] = r11 + + mul r1, r11, r5 @ r1 := p->buf[XDELAYB] * p->XcoeffsB[0] + mla r1, r10, r6, r1 @ r1 += p->buf[XDELAYB-1] * p->XcoeffsB[1] + mla r1, r4, r7, r1 @ r1 += p->buf[XDELAYB-2] * p->XcoeffsB[2] + mla r1, r3, r8, r1 @ r1 += p->buf[XDELAYB-3] * p->XcoeffsB[3] + mla r1, r2, r9, r1 @ r1 += p->buf[XDELAYB-4] * p->XcoeffsB[4] + + @ flags were set above, in the subs instruction + mvngt r10, #0 + movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) + + cmp r11, #0 + mvngt r11, #0 + movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) + + STR2OFS r10, r11, r14, #XADAPTCOEFFSB-4 + @ p->buf[XADAPTCOEFFSB-1] := r10 + @ p->buf[XADAPTCOEFFSB] := r11 + + @ r0 still contains predictionA + @ r1 contains predictionB + + @ Finish Predictor X + + ldr r2, [sp, #4] @ r2 := decoded1 + add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1) + ldr r4, [r12, #XfilterA] @ r4 := p->XfilterA + ldr r3, [r2] @ r3 := *decoded1 + rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31) + add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10) + str r1, [r12, #XlastA] @ p->XlastA := r1 + add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5) + str r1, [r12, #XfilterA] @ p->XfilterA := r1 + + @ r1 contains p->XfilterA + @ r2 contains decoded1 + @ r3 contains *decoded1 + + @ r5, r6, r7, r8, r9 contain p->XcoeffsB[0..4] + @ r10, r11 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB] + + str r1, [r2], #4 @ *(decoded1++) := r1 (p->XfilterA) + str r2, [sp, #4] @ save decoded1 + cmp r3, #0 + beq 3f + + add r2, r14, #XADAPTCOEFFSB-16 + ldmia r2, {r2 - r4} @ r2 := p->buf[XADAPTCOEFFSB-4] + @ r3 := p->buf[XADAPTCOEFFSB-3] + @ r4 := p->buf[XADAPTCOEFFSB-2] + blt 1f + + @ *decoded1 > 0 + + sub r5, r5, r11 @ r5 := p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] + sub r6, r6, r10 @ r6 := p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] + sub r9, r9, r2 @ r9 := p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] + sub r8, r8, r3 @ r8 := p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] + sub r7, r7, r4 @ r7 := p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] + + add r0, r12, #XcoeffsB + stmia r0, {r5 - r9} @ Save p->XcoeffsB[] + + add r1, r12, #XcoeffsA + ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0] + @ r3 := p->XcoeffsA[1] + @ r4 := p->XcoeffsA[2] + @ r5 := p->XcoeffsA[3] + + add r6, r14, #XADAPTCOEFFSA-12 + ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3] + @ r7 := p->buf[XADAPTCOEFFSA-2] + @ r8 := p->buf[XADAPTCOEFFSA-1] + @ r9 := p->buf[XADAPTCOEFFSA] + + sub r5, r5, r6 @ r5 := p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3] + sub r4, r4, r7 @ r4 := p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2] + sub r3, r3, r8 @ r3 := p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1] + sub r2, r2, r9 @ r2 := p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA] + + b 2f + + +1: @ *decoded1 < 0 + + add r5, r5, r11 @ r5 := p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] + add r6, r6, r10 @ r6 := p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] + add r9, r9, r2 @ r9 := p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] + add r8, r8, r3 @ r8 := p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] + add r7, r7, r4 @ r7 := p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] + + add r0, r12, #XcoeffsB + stmia r0, {r5 - r9} @ Save p->XcoeffsB[] + + add r1, r12, #XcoeffsA + ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0] + @ r3 := p->XcoeffsA[1] + @ r4 := p->XcoeffsA[2] + @ r5 := p->XcoeffsA[3] + + add r6, r14, #XADAPTCOEFFSA-12 + ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3] + @ r7 := p->buf[XADAPTCOEFFSA-2] + @ r8 := p->buf[XADAPTCOEFFSA-1] + @ r9 := p->buf[XADAPTCOEFFSA] + + add r5, r5, r6 @ r5 := p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3] + add r4, r4, r7 @ r4 := p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2] + add r3, r3, r8 @ r3 := p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1] + add r2, r2, r9 @ r2 := p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] + +2: + stmia r1, {r2 - r5} @ Save p->XcoeffsA + +3: + +@@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON + + add r14, r14, #4 @ p->buf++ + + add r11, r12, #historybuffer @ r11 := &p->historybuffer[0] + + sub r10, r14, #PREDICTOR_HISTORY_SIZE*4 + @ r10 := p->buf - PREDICTOR_HISTORY_SIZE + + ldr r0, [sp, #8] + cmp r10, r11 + beq move_hist @ The history buffer is full, we need to do a memmove + + @ Check loop count + subs r0, r0, #1 + strne r0, [sp, #8] + bne loop + +done: + str r14, [r12] @ Save value of p->buf + add sp, sp, #12 @ Don't bother restoring r1-r3 + ldmia sp!, {r4 - r11, pc} + +move_hist: + @ dest = r11 (p->historybuffer) + @ src = r14 (p->buf) + @ n = 200 + + ldmia r14!, {r0-r9} @ 40 bytes + stmia r11!, {r0-r9} + ldmia r14!, {r0-r9} @ 40 bytes + stmia r11!, {r0-r9} + ldmia r14!, {r0-r9} @ 40 bytes + stmia r11!, {r0-r9} + ldmia r14!, {r0-r9} @ 40 bytes + stmia r11!, {r0-r9} + ldmia r14!, {r0-r9} @ 40 bytes + stmia r11!, {r0-r9} + + ldr r0, [sp, #8] + add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0] + + @ Check loop count + subs r0, r0, #1 + strne r0, [sp, #8] + bne loop + + b done + .size predictor_decode_stereo, .-predictor_decode_stereo + + .global predictor_decode_mono + .type predictor_decode_mono,%function + +@ Register usage: +@ +@ r0-r11 - scratch +@ r12 - struct predictor_t* p +@ r14 - int32_t* p->buf + +@ void predictor_decode_mono(struct predictor_t* p, +@ int32_t* decoded0, +@ int count) + +predictor_decode_mono: + stmdb sp!, {r1, r2, r4-r11, lr} + + @ r1 (decoded0) is [sp] + @ r2 (count) is [sp, #4] + + mov r12, r0 @ r12 := p + ldr r14, [r0] @ r14 := p->buf + +loopm: + +@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR + + ldr r11, [r12, #YlastA] @ r11 := p->YlastA + + add r2, r14, #YDELAYA-12 @ r2 := &p->buf[YDELAYA-3] + ldmia r2, {r2, r3, r10} @ r2 := p->buf[YDELAYA-3] + @ r3 := p->buf[YDELAYA-2] + @ r10 := p->buf[YDELAYA-1] + + add r5, r12, #YcoeffsA @ r5 := &p->YcoeffsA[0] + ldmia r5, {r6 - r9} @ r6 := p->YcoeffsA[0] + @ r7 := p->YcoeffsA[1] + @ r8 := p->YcoeffsA[2] + @ r9 := p->YcoeffsA[3] + + subs r10, r11, r10 @ r10 := r11 - r10 + + STR2OFS r10, r11, r14, #YDELAYA-4 + @ p->buf[YDELAYA-1] = r10 + @ p->buf[YDELAYA] = r11 + + mul r0, r11, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0] + mla r0, r10, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] + mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] + mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] + + @ flags were set above, in the subs instruction + mvngt r10, #0 + movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) + + cmp r11, #0 + mvngt r11, #0 + movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) + + STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4 + @ p->buf[YADAPTCOEFFSA-1] := r10 + @ p->buf[YADAPTCOEFFSA] := r11 + + ldr r2, [sp] @ r2 := decoded0 + ldr r4, [r12, #YfilterA] @ r4 := p->YfilterA + ldr r3, [r2] @ r3 := *decoded0 + rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31) + add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10) + str r1, [r12, #YlastA] @ p->YlastA := r1 + add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5) + str r1, [r12, #YfilterA] @ p->YfilterA := r1 + + @ r1 contains p->YfilterA + @ r2 contains decoded0 + @ r3 contains *decoded0 + + @ r6, r7, r8, r9 contain p->YcoeffsA[0..3] + @ r10, r11 contain p->buf[YADAPTCOEFFSA-1] and p->buf[YADAPTCOEFFSA] + + str r1, [r2], #4 @ *(decoded0++) := r1 (p->YfilterA) + str r2, [sp] @ save decoded0 + cmp r3, #0 + beq 3f + + LDR2OFS r2, r3, r14, #YADAPTCOEFFSA-12 + @ r2 := p->buf[YADAPTCOEFFSA-3] + @ r3 := p->buf[YADAPTCOEFFSA-2] + blt 1f + + @ *decoded0 > 0 + + sub r6, r6, r11 @ r6 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] + sub r7, r7, r10 @ r7 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] + sub r9, r9, r2 @ r9 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] + sub r8, r8, r3 @ r8 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] + + b 2f + +1: @ *decoded0 < 0 + + add r6, r6, r11 @ r6 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] + add r7, r7, r10 @ r7 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] + add r9, r9, r2 @ r9 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] + add r8, r8, r3 @ r8 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] + +2: + stmia r5, {r6 - r9} @ Save p->YcoeffsA + +3: + +@@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON + + add r14, r14, #4 @ p->buf++ + + add r11, r12, #historybuffer @ r11 := &p->historybuffer[0] + + sub r10, r14, #PREDICTOR_HISTORY_SIZE*4 + @ r10 := p->buf - PREDICTOR_HISTORY_SIZE + + ldr r0, [sp, #4] + cmp r10, r11 + beq move_histm @ The history buffer is full, we need to do a memmove + + @ Check loop count + subs r0, r0, #1 + strne r0, [sp, #4] + bne loopm + +donem: + str r14, [r12] @ Save value of p->buf + add sp, sp, #8 @ Don't bother restoring r1, r2 + ldmia sp!, {r4 - r11, pc} + +move_histm: + @ dest = r11 (p->historybuffer) + @ src = r14 (p->buf) + @ n = 200 + + ldmia r14!, {r0-r9} @ 40 bytes + stmia r11!, {r0-r9} + ldmia r14!, {r0-r9} @ 40 bytes + stmia r11!, {r0-r9} + ldmia r14!, {r0-r9} @ 40 bytes + stmia r11!, {r0-r9} + ldmia r14!, {r0-r9} @ 40 bytes + stmia r11!, {r0-r9} + ldmia r14!, {r0-r9} @ 40 bytes + stmia r11!, {r0-r9} + + ldr r0, [sp, #4] + add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0] + + @ Check loop count + subs r0, r0, #1 + strne r0, [sp, #4] + bne loopm + + b donem + .size predictor_decode_mono, .-predictor_decode_mono diff --git a/plugins/demac/libdemac/predictor-cf.S b/plugins/demac/libdemac/predictor-cf.S new file mode 100644 index 00000000..341e57f1 --- /dev/null +++ b/plugins/demac/libdemac/predictor-cf.S @@ -0,0 +1,659 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: predictor-cf.S 19296 2008-12-02 02:26:04Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +Coldfire predictor copyright (C) 2007 Jens Arnold + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ +#include "demac_config.h" + +/* NOTE: The following need to be kept in sync with parser.h */ + +#define YDELAYA 200 +#define YDELAYB 168 +#define XDELAYA 136 +#define XDELAYB 104 +#define YADAPTCOEFFSA 72 +#define XADAPTCOEFFSA 56 +#define YADAPTCOEFFSB 40 +#define XADAPTCOEFFSB 20 + +/* struct predictor_t members: */ +#define buf 0 /* int32_t* buf */ + +#define YlastA 4 /* int32_t YlastA; */ +#define XlastA 8 /* int32_t XlastA; */ + +#define YfilterB 12 /* int32_t YfilterB; */ +#define XfilterA 16 /* int32_t XfilterA; */ + +#define XfilterB 20 /* int32_t XfilterB; */ +#define YfilterA 24 /* int32_t YfilterA; */ + +#define YcoeffsA 28 /* int32_t YcoeffsA[4]; */ +#define XcoeffsA 44 /* int32_t XcoeffsA[4]; */ +#define YcoeffsB 60 /* int32_t YcoeffsB[5]; */ +#define XcoeffsB 80 /* int32_t XcoeffsB[5]; */ + +#define historybuffer 100 /* int32_t historybuffer[] */ + + + .text + + .align 2 + + .global predictor_decode_stereo + .type predictor_decode_stereo,@function + +| void predictor_decode_stereo(struct predictor_t* p, +| int32_t* decoded0, +| int32_t* decoded1, +| int count) + +predictor_decode_stereo: + lea.l (-12*4,%sp), %sp + movem.l %d2-%d7/%a2-%a6, (4,%sp) + + movem.l (12*4+8,%sp), %a3-%a5 | %a3 = decoded0 + | %a4 = decoded1 + move.l %a5, (%sp) | (%sp) = count + + move.l #0, %macsr | signed integer mode + move.l (12*4+4,%sp), %a6 | %a6 = p + move.l (%a6), %a5 | %a5 = p->buf + +.loop: + + | ***** PREDICTOR Y ***** + + | Predictor Y, Filter A + + move.l (YlastA,%a6), %d3 | %d3 = p->YlastA + + movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3] + | %d1 = p->buf[YDELAYA-2] + | %d2 = p->buf[YDELAYA-1] + + move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3 + + sub.l %d3, %d2 + neg.l %d2 | %d2 = %d3 - %d2 + + move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2 + + movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0] + | %d5 = p->YcoeffsA[1] + | %d6 = p->YcoeffsA[2] + | %d7 = p->YcoeffsA[3] + + mac.l %d3, %d4, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0] + mac.l %d2, %d5, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] + mac.l %d1, %d6, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] + mac.l %d0, %d7, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] + + tst.l %d2 + beq.s 1f + spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 + extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 + or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 +1: | %d2 = SIGN(%d2) + move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2 + + tst.l %d3 + beq.s 1f + spl.b %d3 + extb.l %d3 + or.l #1, %d3 +1: | %d3 = SIGN(%d3) + move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3 + + | Predictor Y, Filter B + + movem.l (YfilterB,%a6), %d2-%d3 | %d2 = p->YfilterB + | %d3 = p->XfilterA + move.l %d3, (YfilterB,%a6) | p->YfilterB = %d3 + + move.l %d2, %d1 | %d1 = %d2 + lsl.l #5, %d2 | %d2 = %d2 * 32 + sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2) + asr.l #5, %d2 | %d2 >>= 5 + sub.l %d2, %d3 | %d3 -= %d2 + + movem.l (YDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[YDELAYB-4] + | %d5 = p->buf[YDELAYB-3] + | %d6 = p->buf[YDELAYB-2] + | %d7 = p->buf[YDELAYB-1] + sub.l %d3, %d7 + neg.l %d7 | %d7 = %d3 - %d7 + + move.l %d7, (YDELAYB-4,%a5) | p->buf[YDELAYB-1] = %d7 + + movem.l (YcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->YcoeffsB[0] + | %d2 = p->YcoeffsB[1] + | %a0 = p->YcoeffsB[2] + | %a1 = p->YcoeffsB[3] + | %a2 = p->YcoeffsB[4] + + mac.l %d3, %d1, %acc1 | %acc1 = p->buf[YDELAYB] * p->YcoeffsB[0] + mac.l %d7, %d2, %acc1 | %acc1 += p->buf[YDELAYB-1] * p->YcoeffsB[1] + mac.l %d6, %a0, %acc1 | %acc1 += p->buf[YDELAYB-2] * p->YcoeffsB[2] + mac.l %d5, %a1, %acc1 | %acc1 += p->buf[YDELAYB-3] * p->YcoeffsB[3] + mac.l %d4, %a2, %acc1 | %acc1 += p->buf[YDELAYB-4] * p->YcoeffsB[4] + + move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3 + + tst.l %d7 + beq.s 1f + spl.b %d7 + extb.l %d7 + or.l #1, %d7 +1: | %d7 = SIGN(%d7) + move.l %d7, (YADAPTCOEFFSB-4,%a5) | p->buf[YADAPTCOEFFSB-1] = %d7 + tst.l %d3 + beq.s 1f + spl.b %d3 + extb.l %d3 + or.l #1, %d3 +1: | %d3 = SIGN(%d3) + move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3 + + | %d1, %d2, %a0, %a1, %a2 contain p->YcoeffsB[0..4] + | %d7, %d3 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB] + + move.l (%a3), %d0 | %d0 = *decoded0 + beq.s 3f + + movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[YADAPTCOEFFSB-4] + | %d5 = p->buf[YADAPTCOEFFSB-3] + | %d6 = p->buf[YADAPTCOEFFSB-2] + + bmi.s 1f | flags still valid here + + | *decoded0 > 0 + + sub.l %d3, %d1 | %d1 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] + sub.l %d7, %d2 | %d2 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] + sub.l %d6, %a0 | %a0 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] + sub.l %d5, %a1 | %a1 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] + sub.l %d4, %a2 | %a2 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] + + movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[] + + movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0] + | %d5 = p->YcoeffsA[1] + | %d6 = p->YcoeffsA[2] + | %d7 = p->YcoeffsA[3] + + movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 + | %d2 = p->buf[YADAPTCOEFFSA-3] + | %a0 = p->buf[YADAPTCOEFFSA-2] + | %a1 = p->buf[YADAPTCOEFFSA-1] + | %a2 = p->buf[YADAPTCOEFFSA] + + sub.l %a2, %d4 | %d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] + sub.l %a1, %d5 | %d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] + sub.l %a0, %d6 | %d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] + sub.l %d2, %d7 | %d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] + + bra.s 2f + +1: | *decoded0 < 0 + + add.l %d3, %d1 | %d1 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] + add.l %d7, %d2 | %d2 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] + add.l %d6, %a0 | %a0 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] + add.l %d5, %a1 | %a1 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] + add.l %d4, %a2 | %a2 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] + + movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[] + + movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0] + | %d5 = p->YcoeffsA[1] + | %d6 = p->YcoeffsA[2] + | %d7 = p->YcoeffsA[3] + + movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 + | %d2 = p->buf[YADAPTCOEFFSA-3] + | %a0 = p->buf[YADAPTCOEFFSA-2] + | %a1 = p->buf[YADAPTCOEFFSA-1] + | %a2 = p->buf[YADAPTCOEFFSA] + + add.l %a2, %d4 | %d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] + add.l %a1, %d5 | %d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] + add.l %a0, %d6 | %d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] + add.l %d2, %d7 | %d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] + +2: + movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[] + +3: + | Finish Predictor Y + + movclr.l %acc0, %d1 | %d1 = predictionA + movclr.l %acc1, %d2 | %d2 = predictionB + asr.l #1, %d2 + add.l %d2, %d1 | %d1 += (%d2 >> 1) + asr.l #8, %d1 + asr.l #2, %d1 | %d1 >>= 10 + add.l %d0, %d1 | %d1 += %d0 + move.l %d1, (YlastA,%a6) | p->YlastA = %d1 + + move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA + move.l %d2, %d0 + lsl.l #5, %d2 + sub.l %d0, %d2 | %d2 = 31 * %d2 + asr.l #5, %d2 | %d2 >>= 5 + add.l %d1, %d2 + move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2 + + | *decoded0 stored 2 instructions down, avoiding pipeline stall + + | ***** PREDICTOR X ***** + + | Predictor X, Filter A + + move.l (XlastA,%a6), %d3 | %d3 = p->XlastA + + move.l %d2, (%a3)+ | *(decoded0++) = %d2 (p->YfilterA) + + movem.l (XDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[XDELAYA-3] + | %d1 = p->buf[XDELAYA-2] + | %d2 = p->buf[XDELAYA-1] + + move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3 + + sub.l %d3, %d2 + neg.l %d2 | %d2 = %d3 -%d2 + + move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2 + + movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0] + | %d5 = p->XcoeffsA[1] + | %d6 = p->XcoeffsA[2] + | %d7 = p->XcoeffsA[3] + + mac.l %d3, %d4, %acc0 | %acc0 = p->buf[XDELAYA] * p->XcoeffsA[0] + mac.l %d2, %d5, %acc0 | %acc0 += p->buf[XDELAYA-1] * p->XcoeffsA[1] + mac.l %d1, %d6, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] + mac.l %d0, %d7, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] + + tst.l %d2 + beq.s 1f + spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 + extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 + or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 +1: | %d2 = SIGN(%d2) + move.l %d2, (XADAPTCOEFFSA-4,%a5) | p->buf[XADAPTCOEFFSA-1] = %d2 + + tst.l %d3 + beq.s 1f + spl.b %d3 + extb.l %d3 + or.l #1, %d3 +1: | %d3 = SIGN(%d3) + move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = %d3 + + | Predictor X, Filter B + + movem.l (XfilterB,%a6), %d2-%d3 | %d2 = p->XfilterB + | %d3 = p->YfilterA + move.l %d3, (XfilterB,%a6) | p->XfilterB = %d3 + + move.l %d2, %d1 | %d1 = %d2 + lsl.l #5, %d2 | %d2 = %d2 * 32 + sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2) + asr.l #5, %d2 | %d2 >>= 5 + sub.l %d2, %d3 | %d3 -= %d2 + + movem.l (XDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[XDELAYB-4] + | %d5 = p->buf[XDELAYB-3] + | %d6 = p->buf[XDELAYB-2] + | %d7 = p->buf[XDELAYB-1] + sub.l %d3, %d7 + neg.l %d7 | %d7 = %d3 - %d7 + + move.l %d7, (XDELAYB-4,%a5) | p->buf[XDELAYB-1] = %d7 + + movem.l (XcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->XcoeffsB[0] + | %d2 = p->XcoeffsB[1] + | %a0 = p->XcoeffsB[2] + | %a1 = p->XcoeffsB[3] + | %a2 = p->XcoeffsB[4] + + mac.l %d3, %d1, %acc1 | %acc1 = p->buf[XDELAYB] * p->XcoeffsB[0] + mac.l %d7, %d2, %acc1 | %acc1 += p->buf[XDELAYB-1] * p->XcoeffsB[1] + mac.l %d6, %a0, %acc1 | %acc1 += p->buf[XDELAYB-2] * p->XcoeffsB[2] + mac.l %d5, %a1, %acc1 | %acc1 += p->buf[XDELAYB-3] * p->XcoeffsB[3] + mac.l %d4, %a2, %acc1 | %acc1 += p->buf[XDELAYB-4] * p->XcoeffsB[4] + + move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3 + + tst.l %d7 + beq.s 1f + spl.b %d7 + extb.l %d7 + or.l #1, %d7 +1: | %d7 = SIGN(%d7) + move.l %d7, (XADAPTCOEFFSB-4,%a5) | p->buf[XADAPTCOEFFSB-1] = %d7 + + tst.l %d3 + beq.s 1f + spl.b %d3 + extb.l %d3 + or.l #1, %d3 +1: | %d3 = SIGN(%d3) + move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3 + + | %d1, %d2, %a0, %a1, %a2 contain p->XcoeffsB[0..4] + | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB] + + move.l (%a4), %d0 | %d0 = *decoded1 + beq.s 3f + + movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[XADAPTCOEFFSB-4] + | %d5 = p->buf[XADAPTCOEFFSB-3] + | %d6 = p->buf[XADAPTCOEFFSB-2] + + bmi.s 1f | flags still valid here + + | *decoded1 > 0 + + sub.l %d3, %d1 | %d1 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] + sub.l %d7, %d2 | %d2 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] + sub.l %d6, %a0 | %a0 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] + sub.l %d5, %a1 | %a1 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] + sub.l %d4, %a2 | %a2 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] + + movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[] + + movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0] + | %d5 = p->XcoeffsA[1] + | %d6 = p->XcoeffsA[2] + | %d7 = p->XcoeffsA[3] + + movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 + | %d2 = p->buf[XADAPTCOEFFSA-3] + | %a0 = p->buf[XADAPTCOEFFSA-2] + | %a1 = p->buf[XADAPTCOEFFSA-1] + | %a2 = p->buf[XADAPTCOEFFSA] + + sub.l %a2, %d4 | %d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA] + sub.l %a1, %d5 | %d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1] + sub.l %a0, %d6 | %d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2] + sub.l %d2, %d7 | %d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3] + + bra.s 2f + +1: | *decoded1 < 0 + + add.l %d3, %d1 | %d1 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] + add.l %d7, %d2 | %d2 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] + add.l %d6, %a0 | %a0 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] + add.l %d5, %a1 | %a1 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] + add.l %d4, %a2 | %a2 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] + + movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[] + + movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0] + | %d5 = p->XcoeffsA[1] + | %d6 = p->XcoeffsA[2] + | %d7 = p->XcoeffsA[3] + + movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 + | %d2 = p->buf[XADAPTCOEFFSA-3] + | %a0 = p->buf[XADAPTCOEFFSA-2] + | %a1 = p->buf[XADAPTCOEFFSA-1] + | %a2 = p->buf[XADAPTCOEFFSA] + + add.l %a2, %d4 | %d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] + add.l %a1, %d5 | %d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1] + add.l %a0, %d6 | %d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2] + add.l %d2, %d7 | %d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3] + +2: + movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[] + +3: + | Finish Predictor X + + movclr.l %acc0, %d1 | %d1 = predictionA + movclr.l %acc1, %d2 | %d2 = predictionB + asr.l #1, %d2 + add.l %d2, %d1 | %d1 += (%d2 >> 1) + asr.l #8, %d1 + asr.l #2, %d1 | %d1 >>= 10 + add.l %d0, %d1 | %d1 += %d0 + move.l %d1, (XlastA,%a6) | p->XlastA = %d1 + + move.l (XfilterA,%a6), %d2 | %d2 = p->XfilterA + move.l %d2, %d0 + lsl.l #5, %d2 + sub.l %d0, %d2 | %d2 = 31 * %d2 + asr.l #5, %d2 | %d6 >>= 2 + add.l %d1, %d2 + move.l %d2, (XfilterA,%a6) | p->XfilterA = %d2 + + | *decoded1 stored 3 instructions down, avoiding pipeline stall + + | ***** COMMON ***** + + addq.l #4, %a5 | p->buf++ + lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a2 + | %a2 = &p->historybuffer[PREDICTOR_HISTORY_SIZE] + + move.l %d2, (%a4)+ | *(decoded1++) = %d2 (p->XfilterA) + + cmp.l %a2, %a5 + beq.s .move_hist | History buffer is full, we need to do a memmove + + subq.l #1, (%sp) | decrease loop count + bne.w .loop + +.done: + move.l %a5, (%a6) | Save value of p->buf + movem.l (4,%sp), %d2-%d7/%a2-%a6 + lea.l (12*4,%sp), %sp + rts + +.move_hist: + lea.l (historybuffer,%a6), %a2 + + | dest = %a2 (p->historybuffer) + | src = %a5 (p->buf) + | n = 200 + + movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes + movem.l %d0-%d7/%a0-%a1, (%a2) + movem.l (40,%a5), %d0-%d7/%a0-%a1 | 40 bytes + movem.l %d0-%d7/%a0-%a1, (40,%a2) + movem.l (80,%a5), %d0-%d7/%a0-%a1 | 40 bytes + movem.l %d0-%d7/%a0-%a1, (80,%a2) + movem.l (120,%a5), %d0-%d7/%a0-%a1 | 40 bytes + movem.l %d0-%d7/%a0-%a1, (120,%a2) + movem.l (160,%a5), %d0-%d7/%a0-%a1 | 40 bytes + movem.l %d0-%d7/%a0-%a1, (160,%a2) + + move.l %a2, %a5 | p->buf = &p->historybuffer[0] + + subq.l #1, (%sp) | decrease loop count + bne.w .loop + + bra.s .done + .size predictor_decode_stereo, .-predictor_decode_stereo + + + .global predictor_decode_mono + .type predictor_decode_mono,@function + +| void predictor_decode_mono(struct predictor_t* p, +| int32_t* decoded0, +| int count) + +predictor_decode_mono: + lea.l (-11*4,%sp), %sp + movem.l %d2-%d7/%a2-%a6, (%sp) + + move.l #0, %macsr | signed integer mode + + move.l (11*4+4,%sp), %a6 | %a6 = p + move.l (11*4+8,%sp), %a4 | %a4 = decoded0 + move.l (11*4+12,%sp), %d7 | %d7 = count + move.l (%a6), %a5 | %a5 = p->buf + + move.l (YlastA,%a6), %d3 | %d3 = p->YlastA + +.loopm: + + | ***** PREDICTOR ***** + + movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3] + | %d1 = p->buf[YDELAYA-2] + | %d2 = p->buf[YDELAYA-1] + + move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3 + + sub.l %d3, %d2 + neg.l %d2 | %d2 = %d3 - %d2 + + move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2 + + movem.l (YcoeffsA,%a6), %a0-%a3 | %a0 = p->YcoeffsA[0] + | %a1 = p->YcoeffsA[1] + | %a2 = p->YcoeffsA[2] + | %a3 = p->YcoeffsA[3] + + mac.l %d3, %a0, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0] + mac.l %d2, %a1, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] + mac.l %d1, %a2, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] + mac.l %d0, %a3, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] + + tst.l %d2 + beq.s 1f + spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 + extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 + or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 +1: | %d2 = SIGN(%d2) + move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2 + + tst.l %d3 + beq.s 1f + spl.b %d3 + extb.l %d3 + or.l #1, %d3 +1: | %d3 = SIGN(%d3) + move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3 + + move.l (%a4), %d0 | %d0 = *decoded0 + beq.s 3f + + movem.l (YADAPTCOEFFSA-12,%a5),%d4-%d5 | %d4 = p->buf[YADAPTCOEFFSA-3] + | %d5 = p->buf[YADAPTCOEFFSA-2] + + bmi.s 1f | flags still valid here + + | *decoded0 > 0 + + sub.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] + sub.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] + sub.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] + sub.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] + + bra.s 2f + +1: | *decoded0 < 0 + + add.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] + add.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] + add.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] + add.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] + +2: + movem.l %a0-%a3, (YcoeffsA,%a6) | save p->YcoeffsA[] + +3: + | Finish Predictor + + movclr.l %acc0, %d3 | %d3 = predictionA + asr.l #8, %d3 + asr.l #2, %d3 | %d3 >>= 10 + add.l %d0, %d3 | %d3 += %d0 + + move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA + move.l %d2, %d0 + lsl.l #5, %d2 + sub.l %d0, %d2 | %d2 = 31 * %d2 + asr.l #5, %d2 | %d2 >>= 5 + add.l %d3, %d2 + move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2 + + | *decoded0 stored 3 instructions down, avoiding pipeline stall + + | ***** COMMON ***** + + addq.l #4, %a5 | p->buf++ + lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a3 + | %a3 = &p->historybuffer[PREDICTOR_HISTORY_SIZE] + + move.l %d2, (%a4)+ | *(decoded0++) = %d2 (p->YfilterA) + + cmp.l %a3, %a5 + beq.s .move_histm | History buffer is full, we need to do a memmove + + subq.l #1, %d7 | decrease loop count + bne.w .loopm + + move.l %d3, (YlastA,%a6) | %d3 = p->YlastA + +.donem: + move.l %a5, (%a6) | Save value of p->buf + movem.l (%sp), %d2-%d7/%a2-%a6 + lea.l (11*4,%sp), %sp + rts + +.move_histm: + move.l %d3, (YlastA,%a6) | %d3 = p->YlastA + + lea.l (historybuffer,%a6), %a3 + + | dest = %a3 (p->historybuffer) + | src = %a5 (p->buf) + | n = 200 + + movem.l (%a5), %d0-%d6/%a0-%a2 | 40 bytes + movem.l %d0-%d6/%a0-%a2, (%a3) + movem.l (40,%a5), %d0-%d6/%a0-%a2 | 40 bytes + movem.l %d0-%d6/%a0-%a2, (40,%a3) + movem.l (80,%a5), %d0-%d6/%a0-%a2 | 40 bytes + movem.l %d0-%d6/%a0-%a2, (80,%a3) + movem.l (120,%a5), %d0-%d6/%a0-%a2 | 40 bytes + movem.l %d0-%d6/%a0-%a2, (120,%a3) + movem.l (160,%a5), %d0-%d6/%a0-%a2 | 40 bytes + movem.l %d0-%d6/%a0-%a2, (160,%a3) + + move.l %a3, %a5 | p->buf = &p->historybuffer[0] + + move.l (YlastA,%a6), %d3 | %d3 = p->YlastA + + subq.l #1, %d7 | decrease loop count + bne.w .loopm + + bra.s .donem + .size predictor_decode_mono, .-predictor_decode_mono diff --git a/plugins/demac/libdemac/predictor.c b/plugins/demac/libdemac/predictor.c new file mode 100644 index 00000000..7d914b5f --- /dev/null +++ b/plugins/demac/libdemac/predictor.c @@ -0,0 +1,271 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: predictor.c 19375 2008-12-09 23:20:59Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#include <inttypes.h> +#include <string.h> + +#include "parser.h" +#include "predictor.h" +#include "demac_config.h" + +/* Return 0 if x is zero, -1 if x is positive, 1 if x is negative */ +#define SIGN(x) (x) ? (((x) > 0) ? -1 : 1) : 0 + +static const int32_t initial_coeffs[4] = { + 360, 317, -109, 98 +}; + +#define YDELAYA (18 + PREDICTOR_ORDER*4) +#define YDELAYB (18 + PREDICTOR_ORDER*3) +#define XDELAYA (18 + PREDICTOR_ORDER*2) +#define XDELAYB (18 + PREDICTOR_ORDER) + +#define YADAPTCOEFFSA (18) +#define XADAPTCOEFFSA (14) +#define YADAPTCOEFFSB (10) +#define XADAPTCOEFFSB (5) + +void init_predictor_decoder(struct predictor_t* p) +{ + /* Zero the history buffers */ + memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(int32_t)); + p->buf = p->historybuffer; + + /* Initialise and zero the co-efficients */ + memcpy(p->YcoeffsA, initial_coeffs, sizeof(initial_coeffs)); + memcpy(p->XcoeffsA, initial_coeffs, sizeof(initial_coeffs)); + memset(p->YcoeffsB, 0, sizeof(p->YcoeffsB)); + memset(p->XcoeffsB, 0, sizeof(p->XcoeffsB)); + + p->YfilterA = 0; + p->YfilterB = 0; + p->YlastA = 0; + + p->XfilterA = 0; + p->XfilterB = 0; + p->XlastA = 0; +} + +#if !defined(CPU_ARM) && !defined(CPU_COLDFIRE) +void ICODE_ATTR_DEMAC predictor_decode_stereo(struct predictor_t* p, + int32_t* decoded0, + int32_t* decoded1, + int count) +{ + int32_t predictionA, predictionB; + + while (LIKELY(count--)) + { + /* Predictor Y */ + p->buf[YDELAYA] = p->YlastA; + p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]); + + p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1]; + p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]); + + predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) + + (p->buf[YDELAYA-1] * p->YcoeffsA[1]) + + (p->buf[YDELAYA-2] * p->YcoeffsA[2]) + + (p->buf[YDELAYA-3] * p->YcoeffsA[3]); + + /* Apply a scaled first-order filter compression */ + p->buf[YDELAYB] = p->XfilterA - ((p->YfilterB * 31) >> 5); + p->buf[YADAPTCOEFFSB] = SIGN(p->buf[YDELAYB]); + p->YfilterB = p->XfilterA; + + p->buf[YDELAYB-1] = p->buf[YDELAYB] - p->buf[YDELAYB-1]; + p->buf[YADAPTCOEFFSB-1] = SIGN(p->buf[YDELAYB-1]); + + predictionB = (p->buf[YDELAYB] * p->YcoeffsB[0]) + + (p->buf[YDELAYB-1] * p->YcoeffsB[1]) + + (p->buf[YDELAYB-2] * p->YcoeffsB[2]) + + (p->buf[YDELAYB-3] * p->YcoeffsB[3]) + + (p->buf[YDELAYB-4] * p->YcoeffsB[4]); + + p->YlastA = *decoded0 + ((predictionA + (predictionB >> 1)) >> 10); + p->YfilterA = p->YlastA + ((p->YfilterA * 31) >> 5); + + /* Predictor X */ + + p->buf[XDELAYA] = p->XlastA; + p->buf[XADAPTCOEFFSA] = SIGN(p->buf[XDELAYA]); + p->buf[XDELAYA-1] = p->buf[XDELAYA] - p->buf[XDELAYA-1]; + p->buf[XADAPTCOEFFSA-1] = SIGN(p->buf[XDELAYA-1]); + + predictionA = (p->buf[XDELAYA] * p->XcoeffsA[0]) + + (p->buf[XDELAYA-1] * p->XcoeffsA[1]) + + (p->buf[XDELAYA-2] * p->XcoeffsA[2]) + + (p->buf[XDELAYA-3] * p->XcoeffsA[3]); + + /* Apply a scaled first-order filter compression */ + p->buf[XDELAYB] = p->YfilterA - ((p->XfilterB * 31) >> 5); + p->buf[XADAPTCOEFFSB] = SIGN(p->buf[XDELAYB]); + p->XfilterB = p->YfilterA; + p->buf[XDELAYB-1] = p->buf[XDELAYB] - p->buf[XDELAYB-1]; + p->buf[XADAPTCOEFFSB-1] = SIGN(p->buf[XDELAYB-1]); + + predictionB = (p->buf[XDELAYB] * p->XcoeffsB[0]) + + (p->buf[XDELAYB-1] * p->XcoeffsB[1]) + + (p->buf[XDELAYB-2] * p->XcoeffsB[2]) + + (p->buf[XDELAYB-3] * p->XcoeffsB[3]) + + (p->buf[XDELAYB-4] * p->XcoeffsB[4]); + + p->XlastA = *decoded1 + ((predictionA + (predictionB >> 1)) >> 10); + p->XfilterA = p->XlastA + ((p->XfilterA * 31) >> 5); + + if (LIKELY(*decoded0 != 0)) + { + if (*decoded0 > 0) + { + p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA]; + p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1]; + p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2]; + p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3]; + + p->YcoeffsB[0] -= p->buf[YADAPTCOEFFSB]; + p->YcoeffsB[1] -= p->buf[YADAPTCOEFFSB-1]; + p->YcoeffsB[2] -= p->buf[YADAPTCOEFFSB-2]; + p->YcoeffsB[3] -= p->buf[YADAPTCOEFFSB-3]; + p->YcoeffsB[4] -= p->buf[YADAPTCOEFFSB-4]; + } + else + { + p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA]; + p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1]; + p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2]; + p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3]; + + p->YcoeffsB[0] += p->buf[YADAPTCOEFFSB]; + p->YcoeffsB[1] += p->buf[YADAPTCOEFFSB-1]; + p->YcoeffsB[2] += p->buf[YADAPTCOEFFSB-2]; + p->YcoeffsB[3] += p->buf[YADAPTCOEFFSB-3]; + p->YcoeffsB[4] += p->buf[YADAPTCOEFFSB-4]; + } + } + + *(decoded0++) = p->YfilterA; + + if (LIKELY(*decoded1 != 0)) + { + if (*decoded1 > 0) + { + p->XcoeffsA[0] -= p->buf[XADAPTCOEFFSA]; + p->XcoeffsA[1] -= p->buf[XADAPTCOEFFSA-1]; + p->XcoeffsA[2] -= p->buf[XADAPTCOEFFSA-2]; + p->XcoeffsA[3] -= p->buf[XADAPTCOEFFSA-3]; + + p->XcoeffsB[0] -= p->buf[XADAPTCOEFFSB]; + p->XcoeffsB[1] -= p->buf[XADAPTCOEFFSB-1]; + p->XcoeffsB[2] -= p->buf[XADAPTCOEFFSB-2]; + p->XcoeffsB[3] -= p->buf[XADAPTCOEFFSB-3]; + p->XcoeffsB[4] -= p->buf[XADAPTCOEFFSB-4]; + } + else + { + p->XcoeffsA[0] += p->buf[XADAPTCOEFFSA]; + p->XcoeffsA[1] += p->buf[XADAPTCOEFFSA-1]; + p->XcoeffsA[2] += p->buf[XADAPTCOEFFSA-2]; + p->XcoeffsA[3] += p->buf[XADAPTCOEFFSA-3]; + + p->XcoeffsB[0] += p->buf[XADAPTCOEFFSB]; + p->XcoeffsB[1] += p->buf[XADAPTCOEFFSB-1]; + p->XcoeffsB[2] += p->buf[XADAPTCOEFFSB-2]; + p->XcoeffsB[3] += p->buf[XADAPTCOEFFSB-3]; + p->XcoeffsB[4] += p->buf[XADAPTCOEFFSB-4]; + } + } + + *(decoded1++) = p->XfilterA; + + /* Combined */ + p->buf++; + + /* Have we filled the history buffer? */ + if (UNLIKELY(p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE)) { + memmove(p->historybuffer, p->buf, + PREDICTOR_SIZE * sizeof(int32_t)); + p->buf = p->historybuffer; + } + } +} + +void ICODE_ATTR_DEMAC predictor_decode_mono(struct predictor_t* p, + int32_t* decoded0, + int count) +{ + int32_t predictionA, currentA, A; + + currentA = p->YlastA; + + while (LIKELY(count--)) + { + A = *decoded0; + + p->buf[YDELAYA] = currentA; + p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1]; + + predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) + + (p->buf[YDELAYA-1] * p->YcoeffsA[1]) + + (p->buf[YDELAYA-2] * p->YcoeffsA[2]) + + (p->buf[YDELAYA-3] * p->YcoeffsA[3]); + + currentA = A + (predictionA >> 10); + + p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]); + p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]); + + if (LIKELY(A != 0)) + { + if (A > 0) + { + p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA]; + p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1]; + p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2]; + p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3]; + } + else + { + p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA]; + p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1]; + p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2]; + p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3]; + } + } + + p->buf++; + + /* Have we filled the history buffer? */ + if (UNLIKELY(p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE)) { + memmove(p->historybuffer, p->buf, + PREDICTOR_SIZE * sizeof(int32_t)); + p->buf = p->historybuffer; + } + + p->YfilterA = currentA + ((p->YfilterA * 31) >> 5); + *(decoded0++) = p->YfilterA; + } + + p->YlastA = currentA; +} +#endif diff --git a/plugins/demac/libdemac/predictor.h b/plugins/demac/libdemac/predictor.h new file mode 100644 index 00000000..ccf2b39b --- /dev/null +++ b/plugins/demac/libdemac/predictor.h @@ -0,0 +1,38 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: predictor.h 19236 2008-11-26 18:01:18Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#ifndef _APE_PREDICTOR_H +#define _APE_PREDICTOR_H + +#include <inttypes.h> +#include "parser.h" +#include "filter.h" + +void init_predictor_decoder(struct predictor_t* p); +void predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, + int32_t* decoded1, int count); +void predictor_decode_mono(struct predictor_t* p, int32_t* decoded0, + int count); + +#endif diff --git a/plugins/demac/libdemac/vector_math16_armv5te.h b/plugins/demac/libdemac/vector_math16_armv5te.h new file mode 100644 index 00000000..81a5cb6e --- /dev/null +++ b/plugins/demac/libdemac/vector_math16_armv5te.h @@ -0,0 +1,312 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: vector_math16_armv5te.h 19260 2008-11-28 23:50:22Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +ARMv5te vector math copyright (C) 2008 Jens Arnold + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +/* This version fetches data as 32 bit words, and *requires* v1 to be + * 32 bit aligned, otherwise it will result either in a data abort, or + * incorrect results (if ARM aligncheck is disabled). */ +static inline void vector_add(int16_t* v1, int16_t* v2) +{ +#if ORDER > 16 + int cnt = ORDER>>4; +#endif + +#define ADDHALFREGS(sum, s1) /* Adds register */ \ + "mov " #s1 ", " #s1 ", ror #16 \n" /* halves straight. */ \ + "add r8 , " #s1 ", " #sum ", lsl #16 \n" /* Clobbers 's1' */ \ + "add " #sum ", " #s1 ", " #sum ", lsr #16 \n" /* and r8. */ \ + "mov " #sum ", " #sum ", lsl #16 \n" \ + "orr " #sum ", " #sum ", r8 , lsr #16 \n" + +#define ADDHALFXREGS(sum, s1, s2) /* Adds register */ \ + "add " #s1 ", " #s1 ", " #sum ", lsl #16 \n" /* halves across. */ \ + "add " #sum ", " #s2 ", " #sum ", lsr #16 \n" /* Clobbers 's1'. */ \ + "mov " #sum ", " #sum ", lsl #16 \n" \ + "orr " #sum ", " #sum ", " #s1 ", lsr #16 \n" + + asm volatile ( + "tst %[v2], #2 \n" + "beq 20f \n" + + "10: \n" + "ldrh r4, [%[v2]], #2 \n" + "mov r4, r4, lsl #16 \n" + "1: \n" + "ldmia %[v1], {r0-r3} \n" + "ldmia %[v2]!, {r5-r8} \n" + ADDHALFXREGS(r0, r4, r5) + ADDHALFXREGS(r1, r5, r6) + ADDHALFXREGS(r2, r6, r7) + ADDHALFXREGS(r3, r7, r8) + "stmia %[v1]!, {r0-r3} \n" + "mov r4, r8 \n" + "ldmia %[v1], {r0-r3} \n" + "ldmia %[v2]!, {r5-r8} \n" + ADDHALFXREGS(r0, r4, r5) + ADDHALFXREGS(r1, r5, r6) + ADDHALFXREGS(r2, r6, r7) + ADDHALFXREGS(r3, r7, r8) + "stmia %[v1]!, {r0-r3} \n" +#if ORDER > 16 + "mov r4, r8 \n" + "subs %[cnt], %[cnt], #1 \n" + "bne 1b \n" +#endif + "b 99f \n" + + "20: \n" + "1: \n" + "ldmia %[v1], {r0-r3} \n" + "ldmia %[v2]!, {r4-r7} \n" + ADDHALFREGS(r0, r4) + ADDHALFREGS(r1, r5) + ADDHALFREGS(r2, r6) + ADDHALFREGS(r3, r7) + "stmia %[v1]!, {r0-r3} \n" + "ldmia %[v1], {r0-r3} \n" + "ldmia %[v2]!, {r4-r7} \n" + ADDHALFREGS(r0, r4) + ADDHALFREGS(r1, r5) + ADDHALFREGS(r2, r6) + ADDHALFREGS(r3, r7) + "stmia %[v1]!, {r0-r3} \n" +#if ORDER > 16 + "subs %[cnt], %[cnt], #1 \n" + "bne 1b \n" +#endif + + "99: \n" + : /* outputs */ +#if ORDER > 16 + [cnt]"+r"(cnt), +#endif + [v1] "+r"(v1), + [v2] "+r"(v2) + : /* inputs */ + : /* clobbers */ + "r0", "r1", "r2", "r3", "r4", + "r5", "r6", "r7", "r8", "memory" + ); +} + +/* This version fetches data as 32 bit words, and *requires* v1 to be + * 32 bit aligned, otherwise it will result either in a data abort, or + * incorrect results (if ARM aligncheck is disabled). */ +static inline void vector_sub(int16_t* v1, int16_t* v2) +{ +#if ORDER > 16 + int cnt = ORDER>>4; +#endif + +#define SUBHALFREGS(dif, s1) /* Subtracts register */ \ + "sub r8 , " #dif ", " #s1 "\n" /* halves straight. */ \ + "and r8 , r8 , r9 \n" /* Needs r9 = 0x0000ffff, */ \ + "mov " #dif ", " #dif ", lsr #16 \n" /* clobbers r8. */ \ + "sub " #dif ", " #dif ", " #s1 ", lsr #16 \n" \ + "orr " #dif ", r8 , " #dif ", lsl #16 \n" + +#define SUBHALFXREGS(dif, s1, s2) /* Subtracts register */ \ + "sub " #s1 ", " #dif ", " #s1 ", lsr #16 \n" /* halves across. */ \ + "and " #s1 ", " #s1 ", r9 \n" /* Needs r9 = 0x0000ffff, */ \ + "rsb " #dif ", " #s2 ", " #dif ", lsr #16 \n" /* clobbers 's1'. */ \ + "orr " #dif ", " #s1 ", " #dif ", lsl #16 \n" + + asm volatile ( + "mov r9, #0xff \n" + "orr r9, r9, #0xff00 \n" + "tst %[v2], #2 \n" + "beq 20f \n" + + "10: \n" + "ldrh r4, [%[v2]], #2 \n" + "mov r4, r4, lsl #16 \n" + "1: \n" + "ldmia %[v1], {r0-r3} \n" + "ldmia %[v2]!, {r5-r8} \n" + SUBHALFXREGS(r0, r4, r5) + SUBHALFXREGS(r1, r5, r6) + SUBHALFXREGS(r2, r6, r7) + SUBHALFXREGS(r3, r7, r8) + "stmia %[v1]!, {r0-r3} \n" + "mov r4, r8 \n" + "ldmia %[v1], {r0-r3} \n" + "ldmia %[v2]!, {r5-r8} \n" + SUBHALFXREGS(r0, r4, r5) + SUBHALFXREGS(r1, r5, r6) + SUBHALFXREGS(r2, r6, r7) + SUBHALFXREGS(r3, r7, r8) + "stmia %[v1]!, {r0-r3} \n" +#if ORDER > 16 + "mov r4, r8 \n" + "subs %[cnt], %[cnt], #1 \n" + "bne 1b \n" +#endif + "b 99f \n" + + "20: \n" + "1: \n" + "ldmia %[v1], {r0-r3} \n" + "ldmia %[v2]!, {r4-r7} \n" + SUBHALFREGS(r0, r4) + SUBHALFREGS(r1, r5) + SUBHALFREGS(r2, r6) + SUBHALFREGS(r3, r7) + "stmia %[v1]!, {r0-r3} \n" + "ldmia %[v1], {r0-r3} \n" + "ldmia %[v2]!, {r4-r7} \n" + SUBHALFREGS(r0, r4) + SUBHALFREGS(r1, r5) + SUBHALFREGS(r2, r6) + SUBHALFREGS(r3, r7) + "stmia %[v1]!, {r0-r3} \n" +#if ORDER > 16 + "subs %[cnt], %[cnt], #1 \n" + "bne 1b \n" +#endif + + "99: \n" + : /* outputs */ +#if ORDER > 16 + [cnt]"+r"(cnt), +#endif + [v1] "+r"(v1), + [v2] "+r"(v2) + : /* inputs */ + : /* clobbers */ + "r0", "r1", "r2", "r3", "r4", "r5", + "r6", "r7", "r8", "r9", "memory" + ); +} + +/* This version fetches data as 32 bit words, and *requires* v1 to be + * 32 bit aligned, otherwise it will result either in a data abort, or + * incorrect results (if ARM aligncheck is disabled). */ +static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) +{ + int res; +#if ORDER > 32 + int cnt = ORDER>>5; +#endif + +#if ORDER > 16 +#define MLA_BLOCKS "3" +#else +#define MLA_BLOCKS "1" +#endif + + asm volatile ( +#if ORDER > 32 + "mov %[res], #0 \n" +#endif + "tst %[v2], #2 \n" + "beq 20f \n" + + "10: \n" + "ldrh r7, [%[v2]], #2 \n" +#if ORDER > 32 + "mov r7, r7, lsl #16 \n" + "1: \n" + "ldmia %[v1]!, {r0-r3} \n" + "smlabt %[res], r0, r7, %[res] \n" +#else + "ldmia %[v1]!, {r0-r3} \n" + "smulbb %[res], r0, r7 \n" +#endif + "ldmia %[v2]!, {r4-r7} \n" + "smlatb %[res], r0, r4, %[res] \n" + "smlabt %[res], r1, r4, %[res] \n" + "smlatb %[res], r1, r5, %[res] \n" + "smlabt %[res], r2, r5, %[res] \n" + "smlatb %[res], r2, r6, %[res] \n" + "smlabt %[res], r3, r6, %[res] \n" + "smlatb %[res], r3, r7, %[res] \n" + + ".rept " MLA_BLOCKS "\n" + "ldmia %[v1]!, {r0-r3} \n" + "smlabt %[res], r0, r7, %[res] \n" + "ldmia %[v2]!, {r4-r7} \n" + "smlatb %[res], r0, r4, %[res] \n" + "smlabt %[res], r1, r4, %[res] \n" + "smlatb %[res], r1, r5, %[res] \n" + "smlabt %[res], r2, r5, %[res] \n" + "smlatb %[res], r2, r6, %[res] \n" + "smlabt %[res], r3, r6, %[res] \n" + "smlatb %[res], r3, r7, %[res] \n" + ".endr \n" +#if ORDER > 32 + "subs %[cnt], %[cnt], #1 \n" + "bne 1b \n" +#endif + "b 99f \n" + + "20: \n" + "1: \n" + "ldmia %[v1]!, {r0-r3} \n" + "ldmia %[v2]!, {r4-r7} \n" +#if ORDER > 32 + "smlabb %[res], r0, r4, %[res] \n" +#else + "smulbb %[res], r0, r4 \n" +#endif + "smlatt %[res], r0, r4, %[res] \n" + "smlabb %[res], r1, r5, %[res] \n" + "smlatt %[res], r1, r5, %[res] \n" + "smlabb %[res], r2, r6, %[res] \n" + "smlatt %[res], r2, r6, %[res] \n" + "smlabb %[res], r3, r7, %[res] \n" + "smlatt %[res], r3, r7, %[res] \n" + + ".rept " MLA_BLOCKS "\n" + "ldmia %[v1]!, {r0-r3} \n" + "ldmia %[v2]!, {r4-r7} \n" + "smlabb %[res], r0, r4, %[res] \n" + "smlatt %[res], r0, r4, %[res] \n" + "smlabb %[res], r1, r5, %[res] \n" + "smlatt %[res], r1, r5, %[res] \n" + "smlabb %[res], r2, r6, %[res] \n" + "smlatt %[res], r2, r6, %[res] \n" + "smlabb %[res], r3, r7, %[res] \n" + "smlatt %[res], r3, r7, %[res] \n" + ".endr \n" +#if ORDER > 32 + "subs %[cnt], %[cnt], #1 \n" + "bne 1b \n" +#endif + + "99: \n" + : /* outputs */ +#if ORDER > 32 + [cnt]"+r"(cnt), +#endif + [v1] "+r"(v1), + [v2] "+r"(v2), + [res]"=r"(res) + : /* inputs */ + : /* clobbers */ + "r0", "r1", "r2", "r3", + "r4", "r5", "r6", "r7" + ); + return res; +} diff --git a/plugins/demac/libdemac/vector_math16_armv6.h b/plugins/demac/libdemac/vector_math16_armv6.h new file mode 100644 index 00000000..f6505f42 --- /dev/null +++ b/plugins/demac/libdemac/vector_math16_armv6.h @@ -0,0 +1,289 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: vector_math16_armv6.h 19198 2008-11-24 18:40:43Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +ARMv6 vector math copyright (C) 2008 Jens Arnold + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +/* This version fetches data as 32 bit words, and *requires* v1 to be + * 32 bit aligned, otherwise it will result either in a data abort, or + * incorrect results (if ARM aligncheck is disabled). */ +static inline void vector_add(int16_t* v1, int16_t* v2) +{ +#if ORDER > 32 + int cnt = ORDER>>5; +#endif + +#if ORDER > 16 +#define ADD_SUB_BLOCKS "4" +#else +#define ADD_SUB_BLOCKS "2" +#endif + + asm volatile ( + "tst %[v2], #2 \n" + "beq 20f \n" + + "10: \n" + "bic %[v2], %[v2], #2 \n" + "ldmia %[v2]!, {r4-r5} \n" + "1: \n" + ".rept " ADD_SUB_BLOCKS "\n" + "ldmia %[v2]!, {r6-r7} \n" + "ldmia %[v1], {r0-r3} \n" + "mov r5, r5, ror #16 \n" + "pkhtb r4, r5, r4, asr #16 \n" + "sadd16 r0, r0, r4 \n" + "pkhbt r5, r5, r6, lsl #16 \n" + "sadd16 r1, r1, r5 \n" + "ldmia %[v2]!, {r4-r5} \n" + "mov r7, r7, ror #16 \n" + "pkhtb r6, r7, r6, asr #16 \n" + "sadd16 r2, r2, r6 \n" + "pkhbt r7, r7, r4, lsl #16 \n" + "sadd16 r3, r3, r7 \n" + "stmia %[v1]!, {r0-r3} \n" + ".endr \n" +#if ORDER > 32 + "subs %[cnt], %[cnt], #1 \n" + "bne 1b \n" +#endif + "b 99f \n" + + "20: \n" + "1: \n" + ".rept " ADD_SUB_BLOCKS "\n" + "ldmia %[v2]!, {r4-r7} \n" + "ldmia %[v1], {r0-r3} \n" + "sadd16 r0, r0, r4 \n" + "sadd16 r1, r1, r5 \n" + "sadd16 r2, r2, r6 \n" + "sadd16 r3, r3, r7 \n" + "stmia %[v1]!, {r0-r3} \n" + ".endr \n" +#if ORDER > 32 + "subs %[cnt], %[cnt], #1 \n" + "bne 1b \n" +#endif + + "99: \n" + : /* outputs */ +#if ORDER > 32 + [cnt]"+r"(cnt), +#endif + [v1] "+r"(v1), + [v2] "+r"(v2) + : /* inputs */ + : /* clobbers */ + "r0", "r1", "r2", "r3", "r4", + "r5", "r6", "r7", "memory" + ); +} + +/* This version fetches data as 32 bit words, and *requires* v1 to be + * 32 bit aligned, otherwise it will result either in a data abort, or + * incorrect results (if ARM aligncheck is disabled). */ +static inline void vector_sub(int16_t* v1, int16_t* v2) +{ +#if ORDER > 32 + int cnt = ORDER>>5; +#endif + + asm volatile ( + "tst %[v2], #2 \n" + "beq 20f \n" + + "10: \n" + "bic %[v2], %[v2], #2 \n" + "ldmia %[v2]!, {r4-r5} \n" + "1: \n" + ".rept " ADD_SUB_BLOCKS "\n" + "ldmia %[v2]!, {r6-r7} \n" + "ldmia %[v1], {r0-r3} \n" + "mov r5, r5, ror #16 \n" + "pkhtb r4, r5, r4, asr #16 \n" + "ssub16 r0, r0, r4 \n" + "pkhbt r5, r5, r6, lsl #16 \n" + "ssub16 r1, r1, r5 \n" + "ldmia %[v2]!, {r4-r5} \n" + "mov r7, r7, ror #16 \n" + "pkhtb r6, r7, r6, asr #16 \n" + "ssub16 r2, r2, r6 \n" + "pkhbt r7, r7, r4, lsl #16 \n" + "ssub16 r3, r3, r7 \n" + "stmia %[v1]!, {r0-r3} \n" + ".endr \n" +#if ORDER > 32 + "subs %[cnt], %[cnt], #1 \n" + "bne 1b \n" +#endif + "b 99f \n" + + "20: \n" + "1: \n" + ".rept " ADD_SUB_BLOCKS "\n" + "ldmia %[v2]!, {r4-r7} \n" + "ldmia %[v1], {r0-r3} \n" + "ssub16 r0, r0, r4 \n" + "ssub16 r1, r1, r5 \n" + "ssub16 r2, r2, r6 \n" + "ssub16 r3, r3, r7 \n" + "stmia %[v1]!, {r0-r3} \n" + ".endr \n" +#if ORDER > 32 + "subs %[cnt], %[cnt], #1 \n" + "bne 1b \n" +#endif + + "99: \n" + : /* outputs */ +#if ORDER > 32 + [cnt]"+r"(cnt), +#endif + [v1] "+r"(v1), + [v2] "+r"(v2) + : /* inputs */ + : /* clobbers */ + "r0", "r1", "r2", "r3", "r4", + "r5", "r6", "r7", "memory" + ); +} + +/* This version fetches data as 32 bit words, and *requires* v1 to be + * 32 bit aligned, otherwise it will result either in a data abort, or + * incorrect results (if ARM aligncheck is disabled). */ +static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) +{ + int res; +#if ORDER > 32 + int cnt = ORDER>>5; +#endif + +#if ORDER > 16 +#define MLA_BLOCKS "3" +#else +#define MLA_BLOCKS "1" +#endif + + asm volatile ( +#if ORDER > 32 + "mov %[res], #0 \n" +#endif + "tst %[v2], #2 \n" + "beq 20f \n" + + "10: \n" + "bic %[v2], %[v2], #2 \n" + "ldmia %[v2]!, {r5-r7} \n" + "ldmia %[v1]!, {r0-r1} \n" + "1: \n" + "pkhbt r8, r6, r5 \n" + "ldmia %[v2]!, {r4-r5} \n" +#if ORDER > 32 + "smladx %[res], r0, r8, %[res] \n" +#else + "smuadx %[res], r0, r8 \n" +#endif + ".rept " MLA_BLOCKS "\n" + "pkhbt r8, r7, r6 \n" + "ldmia %[v1]!, {r2-r3} \n" + "smladx %[res], r1, r8, %[res] \n" + "pkhbt r8, r4, r7 \n" + "ldmia %[v2]!, {r6-r7} \n" + "smladx %[res], r2, r8, %[res] \n" + "pkhbt r8, r5, r4 \n" + "ldmia %[v1]!, {r0-r1} \n" + "smladx %[res], r3, r8, %[res] \n" + "pkhbt r8, r6, r5 \n" + "ldmia %[v2]!, {r4-r5} \n" + "smladx %[res], r0, r8, %[res] \n" + ".endr \n" + + "pkhbt r8, r7, r6 \n" + "ldmia %[v1]!, {r2-r3} \n" + "smladx %[res], r1, r8, %[res] \n" + "pkhbt r8, r4, r7 \n" +#if ORDER > 32 + "subs %[cnt], %[cnt], #1 \n" + "ldmneia %[v2]!, {r6-r7} \n" + "smladx %[res], r2, r8, %[res] \n" + "pkhbt r8, r5, r4 \n" + "ldmneia %[v1]!, {r0-r1} \n" + "smladx %[res], r3, r8, %[res] \n" + "bne 1b \n" +#else + "pkhbt r5, r5, r4 \n" + "smladx %[res], r2, r8, %[res] \n" + "smladx %[res], r3, r5, %[res] \n" +#endif + "b 99f \n" + + "20: \n" + "ldmia %[v1]!, {r0-r1} \n" + "ldmia %[v2]!, {r5-r7} \n" + "1: \n" + "ldmia %[v1]!, {r2-r3} \n" +#if ORDER > 32 + "smlad %[res], r0, r5, %[res] \n" +#else + "smuad %[res], r0, r5 \n" +#endif + ".rept " MLA_BLOCKS "\n" + "ldmia %[v2]!, {r4-r5} \n" + "smlad %[res], r1, r6, %[res] \n" + "ldmia %[v1]!, {r0-r1} \n" + "smlad %[res], r2, r7, %[res] \n" + "ldmia %[v2]!, {r6-r7} \n" + "smlad %[res], r3, r4, %[res] \n" + "ldmia %[v1]!, {r2-r3} \n" + "smlad %[res], r0, r5, %[res] \n" + ".endr \n" + + "ldmia %[v2]!, {r4-r5} \n" + "smlad %[res], r1, r6, %[res] \n" +#if ORDER > 32 + "subs %[cnt], %[cnt], #1 \n" + "ldmneia %[v1]!, {r0-r1} \n" + "smlad %[res], r2, r7, %[res] \n" + "ldmneia %[v2]!, {r6-r7} \n" + "smlad %[res], r3, r4, %[res] \n" + "bne 1b \n" +#else + "smlad %[res], r2, r7, %[res] \n" + "smlad %[res], r3, r4, %[res] \n" +#endif + + "99: \n" + : /* outputs */ +#if ORDER > 32 + [cnt]"+r"(cnt), +#endif + [v1] "+r"(v1), + [v2] "+r"(v2), + [res]"=r"(res) + : /* inputs */ + : /* clobbers */ + "r0", "r1", "r2", "r3", "r4", + "r5", "r6", "r7", "r8" + ); + return res; +} diff --git a/plugins/demac/libdemac/vector_math16_cf.h b/plugins/demac/libdemac/vector_math16_cf.h new file mode 100644 index 00000000..e51767b8 --- /dev/null +++ b/plugins/demac/libdemac/vector_math16_cf.h @@ -0,0 +1,326 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: vector_math16_cf.h 19144 2008-11-19 21:31:33Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +Coldfire vector math copyright (C) 2007 Jens Arnold + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +/* This version fetches data as 32 bit words, and *recommends* v1 to be + * 32 bit aligned, otherwise performance will suffer. */ +static inline void vector_add(int16_t* v1, int16_t* v2) +{ +#if ORDER > 16 + int cnt = ORDER>>4; +#endif + +#define ADDHALFREGS(s1, sum) /* Add register halves straight. */ \ + "move.l " #s1 ", %%d4 \n" /* 's1' can be an A or D reg. */ \ + "add.l " #sum ", " #s1 "\n" /* 'sum' must be a D reg. */ \ + "clr.w %%d4 \n" /* 's1' and %%d4 are clobbered! */ \ + "add.l %%d4 , " #sum "\n" \ + "move.w " #s1 ", " #sum "\n" + +#define ADDHALFXREGS(s1, s2, sum) /* Add register halves across. */ \ + "clr.w " #sum " \n" /* Needs 'sum' pre-swapped, swaps */ \ + "add.l " #s1 ", " #sum "\n" /* 's2', and clobbers 's1'. */ \ + "swap " #s2 " \n" /* 's1' can be an A or D reg. */ \ + "add.l " #s2 ", " #s1 "\n" /* 'sum' and 's2' must be D regs. */ \ + "move.w " #s1 ", " #sum "\n" + + asm volatile ( + "move.l %[v2], %%d0 \n" + "and.l #2, %%d0 \n" + "jeq 20f \n" + + "10: \n" + "move.w (%[v2])+, %%d0 \n" + "swap %%d0 \n" + "1: \n" + "movem.l (%[v1]), %%a0-%%a3 \n" + "movem.l (%[v2]), %%d1-%%d4 \n" + ADDHALFXREGS(%%a0, %%d1, %%d0) + "move.l %%d0, (%[v1])+ \n" + ADDHALFXREGS(%%a1, %%d2, %%d1) + "move.l %%d1, (%[v1])+ \n" + ADDHALFXREGS(%%a2, %%d3, %%d2) + "move.l %%d2, (%[v1])+ \n" + ADDHALFXREGS(%%a3, %%d4, %%d3) + "move.l %%d3, (%[v1])+ \n" + "lea.l (16, %[v2]), %[v2] \n" + "move.l %%d4, %%d0 \n" + + "movem.l (%[v1]), %%a0-%%a3 \n" + "movem.l (%[v2]), %%d1-%%d4 \n" + ADDHALFXREGS(%%a0, %%d1, %%d0) + "move.l %%d0, (%[v1])+ \n" + ADDHALFXREGS(%%a1, %%d2, %%d1) + "move.l %%d1, (%[v1])+ \n" + ADDHALFXREGS(%%a2, %%d3, %%d2) + "move.l %%d2, (%[v1])+ \n" + ADDHALFXREGS(%%a3, %%d4, %%d3) + "move.l %%d3, (%[v1])+ \n" +#if ORDER > 16 + "lea.l (16, %[v2]), %[v2] \n" + "move.l %%d4, %%d0 \n" + + "subq.l #1, %[cnt] \n" + "jne 1b \n" +#endif + "jra 99f \n" + + "20: \n" + "1: \n" + "movem.l (%[v2]), %%a0-%%a3 \n" + "movem.l (%[v1]), %%d0-%%d3 \n" + ADDHALFREGS(%%a0, %%d0) + "move.l %%d0, (%[v1])+ \n" + ADDHALFREGS(%%a1, %%d1) + "move.l %%d1, (%[v1])+ \n" + ADDHALFREGS(%%a2, %%d2) + "move.l %%d2, (%[v1])+ \n" + ADDHALFREGS(%%a3, %%d3) + "move.l %%d3, (%[v1])+ \n" + "lea.l (16, %[v2]), %[v2] \n" + + "movem.l (%[v2]), %%a0-%%a3 \n" + "movem.l (%[v1]), %%d0-%%d3 \n" + ADDHALFREGS(%%a0, %%d0) + "move.l %%d0, (%[v1])+ \n" + ADDHALFREGS(%%a1, %%d1) + "move.l %%d1, (%[v1])+ \n" + ADDHALFREGS(%%a2, %%d2) + "move.l %%d2, (%[v1])+ \n" + ADDHALFREGS(%%a3, %%d3) + "move.l %%d3, (%[v1])+ \n" +#if ORDER > 16 + "lea.l (16, %[v2]), %[v2] \n" + + "subq.l #1, %[cnt] \n" + "jne 1b \n" +#endif + "99: \n" + : /* outputs */ +#if ORDER > 16 + [cnt]"+d"(cnt), +#endif + [v1] "+a"(v1), + [v2] "+a"(v2) + : /* inputs */ + : /* clobbers */ + "d0", "d1", "d2", "d3", "d4", + "a0", "a1", "a2", "a3", "memory" + ); +} + +/* This version fetches data as 32 bit words, and *recommends* v1 to be + * 32 bit aligned, otherwise performance will suffer. */ +static inline void vector_sub(int16_t* v1, int16_t* v2) +{ +#if ORDER > 16 + int cnt = ORDER>>4; +#endif + +#define SUBHALFREGS(min, sub, dif) /* Subtract register halves straight. */ \ + "move.l " #min ", " #dif "\n" /* 'min' can be an A or D reg */ \ + "sub.l " #sub ", " #min "\n" /* 'sub' and 'dif' must be D regs */ \ + "clr.w " #sub "\n" /* 'min' and 'sub' are clobbered! */ \ + "sub.l " #sub ", " #dif "\n" \ + "move.w " #min ", " #dif "\n" + +#define SUBHALFXREGS(min, s2, s1d) /* Subtract register halves across. */ \ + "clr.w " #s1d "\n" /* Needs 's1d' pre-swapped, swaps */ \ + "sub.l " #s1d ", " #min "\n" /* 's2' and clobbers 'min'. */ \ + "move.l " #min ", " #s1d "\n" /* 'min' can be an A or D reg, */ \ + "swap " #s2 "\n" /* 's2' and 's1d' must be D regs. */ \ + "sub.l " #s2 ", " #min "\n" \ + "move.w " #min ", " #s1d "\n" + + asm volatile ( + "move.l %[v2], %%d0 \n" + "and.l #2, %%d0 \n" + "jeq 20f \n" + + "10: \n" + "move.w (%[v2])+, %%d0 \n" + "swap %%d0 \n" + "1: \n" + "movem.l (%[v2]), %%d1-%%d4 \n" + "movem.l (%[v1]), %%a0-%%a3 \n" + SUBHALFXREGS(%%a0, %%d1, %%d0) + "move.l %%d0, (%[v1])+ \n" + SUBHALFXREGS(%%a1, %%d2, %%d1) + "move.l %%d1, (%[v1])+ \n" + SUBHALFXREGS(%%a2, %%d3, %%d2) + "move.l %%d2, (%[v1])+ \n" + SUBHALFXREGS(%%a3, %%d4, %%d3) + "move.l %%d3, (%[v1])+ \n" + "lea.l (16, %[v2]), %[v2] \n" + "move.l %%d4, %%d0 \n" + + "movem.l (%[v2]), %%d1-%%d4 \n" + "movem.l (%[v1]), %%a0-%%a3 \n" + SUBHALFXREGS(%%a0, %%d1, %%d0) + "move.l %%d0, (%[v1])+ \n" + SUBHALFXREGS(%%a1, %%d2, %%d1) + "move.l %%d1, (%[v1])+ \n" + SUBHALFXREGS(%%a2, %%d3, %%d2) + "move.l %%d2, (%[v1])+ \n" + SUBHALFXREGS(%%a3, %%d4, %%d3) + "move.l %%d3, (%[v1])+ \n" +#if ORDER > 16 + "lea.l (16, %[v2]), %[v2] \n" + "move.l %%d4, %%d0 \n" + + "subq.l #1, %[cnt] \n" + "bne.w 1b \n" +#endif + "jra 99f \n" + + "20: \n" + "1: \n" + "movem.l (%[v2]), %%d1-%%d4 \n" + "movem.l (%[v1]), %%a0-%%a3 \n" + SUBHALFREGS(%%a0, %%d1, %%d0) + "move.l %%d0, (%[v1])+ \n" + SUBHALFREGS(%%a1, %%d2, %%d1) + "move.l %%d1, (%[v1])+ \n" + SUBHALFREGS(%%a2, %%d3, %%d2) + "move.l %%d2, (%[v1])+ \n" + SUBHALFREGS(%%a3, %%d4, %%d3) + "move.l %%d3, (%[v1])+ \n" + "lea.l (16, %[v2]), %[v2] \n" + + "movem.l (%[v2]), %%d1-%%d4 \n" + "movem.l (%[v1]), %%a0-%%a3 \n" + SUBHALFREGS(%%a0, %%d1, %%d0) + "move.l %%d0, (%[v1])+ \n" + SUBHALFREGS(%%a1, %%d2, %%d1) + "move.l %%d1, (%[v1])+ \n" + SUBHALFREGS(%%a2, %%d3, %%d2) + "move.l %%d2, (%[v1])+ \n" + SUBHALFREGS(%%a3, %%d4, %%d3) + "move.l %%d3, (%[v1])+ \n" +#if ORDER > 16 + "lea.l (16, %[v2]), %[v2] \n" + + "subq.l #1, %[cnt] \n" + "bne.w 1b \n" +#endif + + "99: \n" + : /* outputs */ +#if ORDER > 16 + [cnt]"+d"(cnt), +#endif + [v1] "+a"(v1), + [v2] "+a"(v2) + : /* inputs */ + : /* clobbers */ + "d0", "d1", "d2", "d3", "d4", + "a0", "a1", "a2", "a3", "memory" + ); +} + +#define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */ + +/* This version fetches data as 32 bit words, and *recommends* v1 to be + * 32 bit aligned, otherwise performance will suffer. It also needs EMAC + * in signed integer mode - call above macro before use. */ +static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) +{ + int res; +#if ORDER > 32 + int cnt = ORDER>>5; +#endif + +#if ORDER > 16 +#define MAC_BLOCKS "7" +#else +#define MAC_BLOCKS "3" +#endif + + asm volatile ( + "move.l %[v2], %%d0 \n" + "and.l #2, %%d0 \n" + "jeq 20f \n" + + "10: \n" + "move.l (%[v1])+, %%d0 \n" + "move.w (%[v2])+, %%d1 \n" + "1: \n" + ".rept " MAC_BLOCKS "\n" + "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" + "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" + "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" + "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" + ".endr \n" + + "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" + "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" + "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" +#if ORDER > 32 + "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" + "subq.l #1, %[res] \n" + "bne.w 1b \n" +#else + "mac.w %%d0l, %%d1u, %%acc0 \n" +#endif + "jra 99f \n" + + "20: \n" + "move.l (%[v1])+, %%d0 \n" + "move.l (%[v2])+, %%d1 \n" + "1: \n" + ".rept " MAC_BLOCKS "\n" + "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" + "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" + "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" + "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n" + ".endr \n" + + "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" + "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" +#if ORDER > 32 + "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" + "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n" + "subq.l #1, %[res] \n" + "bne.w 1b \n" +#else + "mac.w %%d2u, %%d1u, %%acc0 \n" + "mac.w %%d2l, %%d1l, %%acc0 \n" +#endif + + "99: \n" + "movclr.l %%acc0, %[res] \n" + : /* outputs */ + [v1]"+a"(v1), + [v2]"+a"(v2), + [res]"=d"(res) + : /* inputs */ +#if ORDER > 32 + [cnt]"[res]"(cnt) +#endif + : /* clobbers */ + "d0", "d1", "d2" + ); + return res; +} diff --git a/plugins/demac/libdemac/vector_math32_armv4.h b/plugins/demac/libdemac/vector_math32_armv4.h new file mode 100644 index 00000000..47bc5e94 --- /dev/null +++ b/plugins/demac/libdemac/vector_math32_armv4.h @@ -0,0 +1,205 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: vector_math32_armv4.h 19144 2008-11-19 21:31:33Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +ARMv4 vector math copyright (C) 2008 Jens Arnold + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +static inline void vector_add(int32_t* v1, int32_t* v2) +{ +#if ORDER > 32 + int cnt = ORDER>>5; +#endif + +#if ORDER > 16 +#define ADD_SUB_BLOCKS "8" +#else +#define ADD_SUB_BLOCKS "4" +#endif + + asm volatile ( + "1: \n" + ".rept " ADD_SUB_BLOCKS "\n" + "ldmia %[v1], {r0-r3} \n" + "ldmia %[v2]!, {r4-r7} \n" + "add r0, r0, r4 \n" + "add r1, r1, r5 \n" + "add r2, r2, r6 \n" + "add r3, r3, r7 \n" + "stmia %[v1]!, {r0-r3} \n" + ".endr \n" +#if ORDER > 32 + "subs %[cnt], %[cnt], #1 \n" + "bne 1b \n" +#endif + : /* outputs */ +#if ORDER > 32 + [cnt]"+r"(cnt), +#endif + [v1] "+r"(v1), + [v2] "+r"(v2) + : /* inputs */ + : /* clobbers */ + "r0", "r1", "r2", "r3", "r4", + "r5", "r6", "r7", "memory" + ); +} + +static inline void vector_sub(int32_t* v1, int32_t* v2) +{ +#if ORDER > 32 + int cnt = ORDER>>5; +#endif + + asm volatile ( + "1: \n" + ".rept " ADD_SUB_BLOCKS "\n" + "ldmia %[v1], {r0-r3} \n" + "ldmia %[v2]!, {r4-r7} \n" + "sub r0, r0, r4 \n" + "sub r1, r1, r5 \n" + "sub r2, r2, r6 \n" + "sub r3, r3, r7 \n" + "stmia %[v1]!, {r0-r3} \n" + ".endr \n" +#if ORDER > 32 + "subs %[cnt], %[cnt], #1 \n" + "bne 1b \n" +#endif + : /* outputs */ +#if ORDER > 32 + [cnt]"+r"(cnt), +#endif + [v1] "+r"(v1), + [v2] "+r"(v2) + : /* inputs */ + : /* clobbers */ + "r0", "r1", "r2", "r3", "r4", + "r5", "r6", "r7", "memory" + ); +} + +static inline int32_t scalarproduct(int32_t* v1, int32_t* v2) +{ + int res; +#if ORDER > 32 + int cnt = ORDER>>5; +#endif + + asm volatile ( +#if ORDER > 16 +#if ORDER > 32 + "mov %[res], #0 \n" +#endif + "ldmia %[v2]!, {r6-r7} \n" + "1: \n" + "ldmia %[v1]!, {r0,r1,r3-r5} \n" +#if ORDER > 32 + "mla %[res], r6, r0, %[res] \n" +#else + "mul %[res], r6, r0 \n" +#endif + "mla %[res], r7, r1, %[res] \n" + "ldmia %[v2]!, {r0-r2,r6-r8} \n" + "mla %[res], r0, r3, %[res] \n" + "mla %[res], r1, r4, %[res] \n" + "mla %[res], r2, r5, %[res] \n" + "ldmia %[v1]!, {r0-r4} \n" + "mla %[res], r6, r0, %[res] \n" + "mla %[res], r7, r1, %[res] \n" + "mla %[res], r8, r2, %[res] \n" + "ldmia %[v2]!, {r0,r1,r6-r8} \n" + "mla %[res], r0, r3, %[res] \n" + "mla %[res], r1, r4, %[res] \n" + "ldmia %[v1]!, {r0-r5} \n" + "mla %[res], r6, r0, %[res] \n" + "mla %[res], r7, r1, %[res] \n" + "mla %[res], r8, r2, %[res] \n" + "ldmia %[v2]!, {r0-r2,r6,r7} \n" + "mla %[res], r0, r3, %[res] \n" + "mla %[res], r1, r4, %[res] \n" + "mla %[res], r2, r5, %[res] \n" + "ldmia %[v1]!, {r0,r1,r3-r5} \n" + "mla %[res], r6, r0, %[res] \n" + "mla %[res], r7, r1, %[res] \n" + "ldmia %[v2]!, {r0-r2,r6-r8} \n" + "mla %[res], r0, r3, %[res] \n" + "mla %[res], r1, r4, %[res] \n" + "mla %[res], r2, r5, %[res] \n" + "ldmia %[v1]!, {r0-r4} \n" + "mla %[res], r6, r0, %[res] \n" + "mla %[res], r7, r1, %[res] \n" + "mla %[res], r8, r2, %[res] \n" + "ldmia %[v2]!, {r0,r1,r6-r8} \n" + "mla %[res], r0, r3, %[res] \n" + "mla %[res], r1, r4, %[res] \n" + "ldmia %[v1]!, {r0-r5} \n" + "mla %[res], r6, r0, %[res] \n" + "mla %[res], r7, r1, %[res] \n" + "mla %[res], r8, r2, %[res] \n" +#if ORDER > 32 + "ldmia %[v2]!, {r0-r2,r6,r7} \n" +#else + "ldmia %[v2]!, {r0-r2} \n" +#endif + "mla %[res], r0, r3, %[res] \n" + "mla %[res], r1, r4, %[res] \n" + "mla %[res], r2, r5, %[res] \n" +#if ORDER > 32 + "subs %[cnt], %[cnt], #1 \n" + "bne 1b \n" +#endif + +#else /* ORDER <= 16 */ + "ldmia %[v1]!, {r0-r3} \n" + "ldmia %[v2]!, {r4-r7} \n" + "mul %[res], r4, r0 \n" + "mla %[res], r5, r1, %[res] \n" + "mla %[res], r6, r2, %[res] \n" + "mla %[res], r7, r3, %[res] \n" + + ".rept 3 \n" + "ldmia %[v1]!, {r0-r3} \n" + "ldmia %[v2]!, {r4-r7} \n" + "mla %[res], r4, r0, %[res] \n" + "mla %[res], r5, r1, %[res] \n" + "mla %[res], r6, r2, %[res] \n" + "mla %[res], r7, r3, %[res] \n" + ".endr \n" +#endif /* ORDER <= 16 */ + : /* outputs */ +#if ORDER > 32 + [cnt]"+r"(cnt), +#endif + [v1] "+r"(v1), + [v2] "+r"(v2), + [res]"=r"(res) + : /* inputs */ + : /* clobbers */ + "r0", "r1", "r2", "r3", + "r4", "r5", "r6", "r7" +#if ORDER > 16 + ,"r8" +#endif + ); + return res; +} diff --git a/plugins/demac/libdemac/vector_math_generic.h b/plugins/demac/libdemac/vector_math_generic.h new file mode 100644 index 00000000..b2dab1e2 --- /dev/null +++ b/plugins/demac/libdemac/vector_math_generic.h @@ -0,0 +1,160 @@ +/* + +libdemac - A Monkey's Audio decoder + +$Id: vector_math_generic.h 19144 2008-11-19 21:31:33Z amiconn $ + +Copyright (C) Dave Chapman 2007 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA + +*/ + +#include "demac_config.h" + +static inline void vector_add(filter_int* v1, filter_int* v2) +{ +#if ORDER > 32 + int order = (ORDER >> 5); + while (order--) +#endif + { + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; +#if ORDER > 16 + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; + *v1++ += *v2++; +#endif + } +} + +static inline void vector_sub(filter_int* v1, filter_int* v2) +{ +#if ORDER > 32 + int order = (ORDER >> 5); + while (order--) +#endif + { + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; +#if ORDER > 16 + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; + *v1++ -= *v2++; +#endif + } +} + +static inline int32_t scalarproduct(filter_int* v1, filter_int* v2) +{ + int res = 0; + +#if ORDER > 32 + int order = (ORDER >> 5); + while (order--) +#endif + { + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; +#if ORDER > 16 + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; + res += *v1++ * *v2++; +#endif + } + return res; +} |