Doing it the fast way. O(N log(N)) operations.
The derivation of the algorithm in doFFT() comes from the following two iterations of the code:
/** (c)2008 Max Vilimpoc, All Rights Reserved */
/**
* FFT Module, works for N up to 256, after which
* the bitshift has to be modified.
*/
#ifndef __UC_PRODUCTION__
#include <stdio.h>
#endif
#include <stdint.h>
#include <math.h>
#include <string.h>
enum TE_CONSTS
{
N = 64,
M = N/2,
};
typedef struct
{
float re;
float im;
} CPLX;
static uint8_t LOG2N;
static CPLX Xf[N];
static CPLX Tw[M];
/** Complex math functions, because C sucks. */
static void radAngleToCplx(CPLX * const out, float radAngle)
{
out->re = cos(radAngle);
out->im = -sin(radAngle);
}
static void cplxAddInPlace(CPLX * inoutA, CPLX * inB)
{
inoutA->re += inB->re;
inoutA->im += inB->im;
}
static CPLX cplxSub(CPLX * inA, CPLX * inB)
{
CPLX out;
out.re = inA->re - inB->re;
out.im = inA->im - inB->im;
return out;
}
static CPLX cplxMult(const CPLX * const inA, const CPLX * const inB)
{
CPLX out;
out.re = (inA->re * inB->re) - (inA->im * inB->im);
out.im = (inA->re * inB->im) + (inA->im * inB->re);
return out;
}
static void cplxPrintf(const CPLX * const in)
{
printf("%+3.3f + %+3.3f\n", (float) in->re, (float) in->im);
}
static void dumpArray(const CPLX * inA, uint32_t length)
{
uint32_t n8;
for (n8 = 0; n8 < length; ++n8)
{
/* Output sample array normal order. */
cplxPrintf(inA++);
}
}
static void bitReverseSamples(void)
{
uint8_t n8, r8;
CPLX temp;
dumpArray(Xf, N);
for (n8 = 0; n8 < M; ++n8)
{
r8 = 0;
r8 |= (n8 & 0x80) >> 7;
r8 |= (n8 & 0x40) >> 5;
r8 |= (n8 & 0x20) >> 3;
r8 |= (n8 & 0x10) >> 1;
r8 |= (n8 & 0x08) << 1;
r8 |= (n8 & 0x04) << 3;
r8 |= (n8 & 0x02) << 5;
r8 |= (n8 & 0x01) << 7;
r8 = r8 >> (8 - LOG2N);
temp = Xf[r8];
Xf[r8] = Xf[n8];
Xf[n8] = temp;
/* this could be done w/rotate left/right plus carry in 16 cycles. */
printf("n8: %d, r8: %d\n", n8, r8);
}
dumpArray(Xf, N);
}
static void butterfly(CPLX * const XfA, CPLX * const XfB, CPLX * const tw)
{
CPLX XfB2 = cplxMult(XfB, tw);
// XfB = XfA - XfB * tw;
*XfB = cplxSub(XfA, &XfB2);
// XfA = XfA + XfB * tw;
cplxAddInPlace(XfA, &XfB2);
}
static void initTwiddles()
{
LOG2N = log10(N) / log10(2);
printf("LOG2N: %u\n", LOG2N);
uint8_t k;
double angle;
for (k = 0; k < M; ++k)
{
angle = 2 * M_PI * k / N;
printf("angle = %d / %d * 2PI = %f ", k, N, angle);
radAngleToCplx(&Tw[k], angle);
cplxPrintf(&Tw[k]);
}
}
static void initSamples()
{
Xf[0].re = 1;
Xf[1].re = 1;
}
void doFFT(uint8_t numPts)
{
uint8_t LOG2N = log10(numPts) / log10(2);
uint8_t maxAffineA = LOG2N - 1;
uint8_t pass, i, down;
uint8_t limitA, limitB, limitC;
for (pass = 0; pass < LOG2N; ++pass)
{
limitA = 1 << (maxAffineA - pass);
limitB = 1 << pass;
limitC = limitB << 1;
for (i = 0; i < limitA; ++i)
{
for (down = 0; down < limitB; ++down)
{
butterfly(&Xf[limitC * i + down], &Xf[limitC * i + down + limitB], &Tw[down]);
}
}
}
}
int main(int argc, char **argv)
{
initTwiddles();
initSamples();
bitReverseSamples();
printf("sizeof(Xf): %lu\n", sizeof(Xf));
printf("Xf:\n");
dumpArray(Xf, N);
printf("Tw:\n");
dumpArray(Tw, M);
// FFT full-length reconstruction.
/*
// 1pt -> 2pt DFTs.
for (i = 0; i < (1 << 3); ++i)
{
for (down = 0; down < (1 << 0); ++down)
{
butterfly(&Xf[(1 << 1) * i + down], &Xf[(1 << 1) * i + down + (1 << 0)], &Tw[down]);
}
}
printf("Xf (2pts done):\n");
dumpArray(Xf, N);
// 2pt -> 4pt DFTs.
for (i = 0; i < (1 << 2); ++i)
{
for (down = 0; down < (1 << 1); ++down)
{
butterfly(&Xf[(1 << 2) * i + down], &Xf[(1 << 2) * i + down + (1 << 1)], &Tw[down]);
}
}
printf("Xf (4pts done):\n");
dumpArray(Xf, N);
// 4pt -> 8pt DFTs.
for (i = 0; i < (1 << 1); ++i)
{
for (down = 0; down < (1 << 2); ++down)
{
butterfly(&Xf[(1 << 3) * i + down], &Xf[(1 << 3) * i + down + (1 << 2)], &Tw[down]);
}
}
printf("Xf (8pts done):\n");
dumpArray(Xf, N);
// 8 pt -> 16pt DFTs.
for (i = 0; i < (1 << 0); ++i)
{
for (down = 0; down < (1 << 3); ++down)
{
butterfly(&Xf[(1 << 4) * i + down], &Xf[(1 << 4) * i + down + (1 << 3)], &Tw[down]);
}
}
printf("Xf (16pts done):\n");
dumpArray(Xf, N);
*/
doFFT(N);
printf("Xf (%d pts done):\n", N);
dumpArray(Xf, N);
return 0;
}