/* * Copyright 2005 by the Massachusetts Institute of Technology. * * Permission to use, copy, modify, and distribute this * software and its documentation for any purpose and without * fee is hereby granted, provided that the above copyright * notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting * documentation, and that the name of M.I.T. not be used in * advertising or publicity pertaining to distribution of the * software without specific, written prior permission. * M.I.T. makes no representations about the suitability of * this software for any purpose. It is provided "as is" * without express or implied warranty. */ /** * @description * This file contains functions that implement Discrete Cosine Transforms and * their inverses. When reference is made to the IEEE DCT specification, it * is refering to the IEEE DCT specification used by both MPEG and JPEG. * A definition of what makes an 8x8 DCT conform to the IEEE specification, as well * as a pseudocode implementation, can be found in Appendix A of the MPEG-2 specification * (ISO/IEC 13818-2) on P. 125. * * @author Matthew Drake * @author Rodric Rabbah * @file DCT.str * @version 1.0 */ /** * Transforms an 8x8 signal from the frequency domain to the signal domain * using an inverse Discrete Cosine Transform in accordance with * the IEEE specification for a 2-dimensional 8x8 iDCT. * @input 64 values representing an 8x8 array of values in the * frequency domain, ordered * by row and then column. Vertical frequency increases * along each row and horizontal frequency along each column. * @output 64 values representing an 8x8 array of values in the * signal domain, ordered by row and then column. * @param mode indicates algorithm to use; * mode == 0: reference, coarse implementation * mode == 1: reference, fine (parallel) implementation * mode == 2: fast, coarse implementation * mode == 3: fast, fine (parallel) implementation */ int->int pipeline iDCT8x8_ieee(int mode) { // modes: // 0: reference, coarse // 1: reference, fine (parallel) // 2: fast, coarse // 3: fast, fine (parallel) if (mode == 0) add iDCT_2D_reference_coarse(8); else if (mode == 1) add iDCT_2D_reference_fine(8); else if (mode == 2) add iDCT8x8_2D_fast_coarse(); else add iDCT8x8_2D_fast_fine(); } /** * Transforms an 8x8 signal from the signal domain to the frequency domain * using a Discrete Cosine Transform in accordance with the IEEE * specification for a 2-dimensional 8x8 DCT. * @input 64 values representing an 8x8 array of values in the signal * domain, ordered by row and then column. * @output 64 values representing an 8x8 array of values in the * frequency domain, ordered by row and then column. Vertical frequency * increases along each row and horizontal frequency along each column. * @param mode indicates algorithm to use; * mode == 0: reference, coarse implementation, * mode == 1: reference, fine (parallel) implementation */ int->int pipeline DCT8x8_ieee(int mode) { // modes: // 0: reference, coarse // 1: reference, fine (parallel) if (mode == 0) add DCT_2D_reference_coarse(8); else add DCT_2D_reference_fine(8); } /** * Transforms a 2D signal from the frequency domain to the signal domain * using an inverse Discrete Cosine Transform. * @param size The number of elements in each dimension of the signal. * @input size x size values, representing an array of values in the frequency * domain, ordered by row and then column. Vertical frequency increases * along each row and horizontal frequency along each column. * @output size x size values representing an array of values in the signal * domain, ordered by row and then column. */ int->int pipeline iDCT_2D_reference_fine(int size) { add int->float filter { work pop 1 push 1 { push((float) pop()); } } add iDCT_1D_Y_reference_fine(size); add iDCT_1D_X_reference_fine(size); add float->int filter { work pop 1 push 1 { push((int) floor(pop() + 0.5)); } } } /** * Transforms a 2D signal from the frequency domain to the signal domain * using a FAST inverse Discrete Cosine Transform. * @input size x size values, representing an array of values in the frequency * domain, ordered by row and then column. Vertical frequency increases * along each row and horizontal frequency along each column. * @output size x size values representing an array of values in the signal * domain, ordered by row and then column. */ int->int pipeline iDCT8x8_2D_fast_coarse() { add iDCT8x8_1D_row_fast(); add iDCT8x8_1D_col_fast(); } /** * Transforms a 2D signal from the frequency domain to the signal domain * using a FAST inverse Discrete Cosine Transform. * @input size x size values, representing an array of values in the frequency * domain, ordered by row and then column. Vertical frequency increases * along each row and horizontal frequency along each column. * @output size x size values representing an array of values in the signal * domain, ordered by row and then column. */ int->int pipeline iDCT8x8_2D_fast_fine() { add iDCT8x8_1D_X_fast_fine(); add iDCT8x8_1D_Y_fast_fine(); } /** * Transforms a 2D signal from the signal domain to the frequency domain * using a Discrete Cosine Transform. * @param size The number of elements in each dimension of the signal. * @input size x size values, representing an array of values in the signal * domain, ordered by row and then column. * @output size x size values representing an array of values in the * frequency domain, ordered by row and then column. Vertical frequency * increases along each row and horizontal frequency along each column. */ int->int pipeline DCT_2D_reference_fine(int size) { add int->float filter { work pop 1 push 1 { int v = pop(); push((float) v); } } add DCT_1D_X_reference_fine(size); add DCT_1D_Y_reference_fine(size); add float->int filter { work pop 1 push 1 { float v = floor(pop() + 0.5); push((int) v); } } } /** * @internal */ float->float splitjoin iDCT_1D_X_reference_fine(int size) { split roundrobin(size); for (int i = 0; i < size; i++) { add iDCT_1D_reference_fine(size); } join roundrobin(size); } /** * @internal */ float->float splitjoin iDCT_1D_Y_reference_fine(int size) { split roundrobin(1); for (int i = 0; i < size; i++) { add iDCT_1D_reference_fine(size); } join roundrobin(1); } /** * @internal */ int->int splitjoin iDCT8x8_1D_X_fast_fine() { split roundrobin(8); for (int i = 0; i < 8; i++) { add iDCT8x8_1D_row_fast(); } join roundrobin(8); } /** * @internal */ int->int splitjoin iDCT8x8_1D_Y_fast_fine() { split roundrobin(1); for (int i = 0; i < 8; i++) { add iDCT8x8_1D_col_fast_fine(); } join roundrobin(1); } /** * @internal */ float->float splitjoin DCT_1D_X_reference_fine(int size) { split roundrobin(size); for (int i = 0; i < size; i++) { add DCT_1D_reference_fine(size); } join roundrobin(size); } /** * @internal */ float->float splitjoin DCT_1D_Y_reference_fine(int size) { split roundrobin(1); for (int i = 0; i < size; i++) { add DCT_1D_reference_fine(size); } join roundrobin(1); } /** * @internal * Based on the implementation given in the C MPEG-2 reference implementation */ int->int filter iDCT_2D_reference_coarse(int size) { float[size][size] coeff; init { for (int freq = 0; freq < size; freq++) { float scale = (freq == 0) ? sqrt(0.125) : 0.5; for (int time = 0; time < size; time++) coeff[freq][time] = scale * cos((pi/(float)size) * freq * (time + 0.5)); } } work pop size*size push size*size { float[size][size] block_x; int i, j, k; for (i = 0; i < size; i++) for (j = 0; j < size; j++) { block_x[i][j] = 0; for (k = 0; k < size; k++) { block_x[i][j] += coeff[k][j] * peek(size*i + k /* that is buffer[i][k] */); } } for (i = 0; i < size; i++) { for (j = 0; j < size; j++) { float block_y = 0.0; for (k = 0; k < size; k++) { block_y += coeff[k][i] * block_x[k][j]; } block_y = floor(block_y + 0.5); push((int) block_y); } } for (i = 0; i < size*size; i++) pop(); } } /** * Transforms a 2D signal from the signal domain to the frequency domain * using a Discrete Cosine Transform. * @param size The number of elements in each dimension of the signal. * @input size values, representing an array of values in the signal * domain, ordered by row and then column. * @output size values representing an array of values in the * frequency domain, ordered by row and then column. Vertical frequency * increases along each row and horizontal frequency along each column. */ int->int filter DCT_2D_reference_coarse(int size) { float[size][size] coeff; init { for (int i = 0; i < size; i++) { float s = (i == 0) ? sqrt(0.125) : 0.5; for (int j = 0; j < size; j++) coeff[i][j] = s * cos((pi / size) * i * (j + 0.5)); } } work pop size*size push size*size { float[size][size] block_x; int i, j, k; for (i = 0; i < size; i++) for (j = 0; j < size; j++) { block_x[i][j] = 0.0; for (k = 0; k < size; k++) { block_x[i][j] += coeff[j][k] * peek(size*i + k); } } for (i = 0; i < size; i++) { for (j = 0; j < size; j++) { float block_y = 0.0; for (k = 0; k < size; k++) { block_y += coeff[i][k] * block_x[k][j]; } block_y = floor(block_y + 0.5); push((int) block_y); } } for (i = 0; i < size*size; i++) pop(); } } /** * Transforms a 1D signal from the frequency domain to the signal domain * using an inverse Discrete Cosine Transform. * @param size The number of elements in each dimension of the signal. * @input size values, representing an array of values in the frequency * domain, ordered by row and then column. Vertical frequency increases * along each row and horizontal frequency along each column. * @output size values representing an array of values in the signal * domain, ordered by row and then column. */ float->float filter iDCT_1D_reference_fine(int size) { float[size][size] coeff; init { for (int x = 0; x < size; x++) { for (int u = 0; u < size; u++) { float Cu = 1; if (u == 0) Cu = 1/sqrt(2); coeff[x][u] = 0.5 * Cu * cos(u * pi * (2.0 * x+1) / (2.0 * size)); } } } work peek size pop size push size { for (int x = 0; x < size; x++) { float tempsum = 0; for (int u = 0; u < size; u++) { tempsum += coeff[x][u] * peek(u); } push(tempsum); } for (int u = 0; u < size; u++) { pop(); } } } /** * Transforms a 1D horizontal signal from the frequency domain to the signal * domain using a FAST inverse Discrete Cosine Transform. * @input size values, representing an array of values in the frequency * domain, ordered by row and then column. Vertical frequency increases * along each row and horizontal frequency along each column. * @output size values representing an array of values in the signal * domain, ordered by row and then column. */ int->int filter iDCT8x8_1D_row_fast() { int size = 8; int W1 = 2841; /* 2048*sqrt(2)*cos(1*pi/16) */ int W2 = 2676; /* 2048*sqrt(2)*cos(2*pi/16) */ int W3 = 2408; /* 2048*sqrt(2)*cos(3*pi/16) */ int W5 = 1609; /* 2048*sqrt(2)*cos(5*pi/16) */ int W6 = 1108; /* 2048*sqrt(2)*cos(6*pi/16) */ int W7 = 565; /* 2048*sqrt(2)*cos(7*pi/16) */ work pop size push size { int x0 = peek(0); int x1 = peek(4) << 11; int x2 = peek(6); int x3 = peek(2); int x4 = peek(1); int x5 = peek(7); int x6 = peek(5); int x7 = peek(3); int x8; /* shortcut */ if ((x1 == 0) && (x2 == 0) && (x3 == 0) && (x4 == 0) && (x5 == 0) && (x6 == 0) && (x7 == 0)) { x0 = x0 << 3; for (int i = 0; i < size; i++) { push(x0); } } else { /* for proper rounding in the fourth stage */ x0 = (x0 << 11) + 128; /* first stage */ x8 = W7 * (x4 + x5); x4 = x8 + (W1 - W7) * x4; x5 = x8 - (W1 + W7) * x5; x8 = W3 * (x6 + x7); x6 = x8 - (W3 - W5) * x6; x7 = x8 - (W3 + W5) * x7; /* second stage */ x8 = x0 + x1; x0 = x0 - x1; x1 = W6 * (x3 + x2); x2 = x1 - (W2 + W6) * x2; x3 = x1 + (W2 - W6) * x3; x1 = x4 + x6; x4 = x4 - x6; x6 = x5 + x7; x5 = x5 - x7; /* third stage */ x7 = x8 + x3; x8 = x8 - x3; x3 = x0 + x2; x0 = x0 - x2; x2 = (181 * (x4 + x5) + 128) >> 8; x4 = (181 * (x4 - x5) + 128) >> 8; /* fourth stage */ push((x7 + x1) >> 8); push((x3 + x2) >> 8); push((x0 + x4) >> 8); push((x8 + x6) >> 8); push((x8 - x6) >> 8); push((x0 - x4) >> 8); push((x3 - x2) >> 8); push((x7 - x1) >> 8); } for (int i = 0; i < size; i++) pop(); } } /** * Transforms a 1D vertical signal from the frequency domain to the signal * domain using a FAST inverse Discrete Cosine Transform. * @input size*size values, representing an array of values in the frequency * domain, ordered by row and then column. Vertical frequency increases * along each row and horizontal frequency along each column. * @output size values representing an array of values in the signal * domain, ordered by row and then column. */ int->int filter iDCT8x8_1D_col_fast() { int size = 8; int[8*8] buffer; int W1 = 2841; /* 2048*sqrt(2)*cos(1*pi/16) */ int W2 = 2676; /* 2048*sqrt(2)*cos(2*pi/16) */ int W3 = 2408; /* 2048*sqrt(2)*cos(3*pi/16) */ int W5 = 1609; /* 2048*sqrt(2)*cos(5*pi/16) */ int W6 = 1108; /* 2048*sqrt(2)*cos(6*pi/16) */ int W7 = 565; /* 2048*sqrt(2)*cos(7*pi/16) */ work pop size*size push size*size { for (int c = 0; c < size; c++) { int x0 = peek(c + size * 0); int x1 = peek(c + size * 4) << 8; int x2 = peek(c + size * 6); int x3 = peek(c + size * 2); int x4 = peek(c + size * 1); int x5 = peek(c + size * 7); int x6 = peek(c + size * 5); int x7 = peek(c + size * 3); int x8; /* shortcut */ if ((x1 == 0) && (x2 == 0) && (x3 == 0) && (x4 == 0) && (x5 == 0) && (x6 == 0) && (x7 == 0)) { x0 = (x0 + 32) >> 6; for (int i = 0; i < size; i++) { buffer[c + size * i] = x0; } } else { /* for proper rounding in the fourth stage */ x0 = (x0 << 8) + 8192; /* first stage */ x8 = W7 * (x4 + x5) + 4; x4 = (x8 + (W1 - W7) * x4) >> 3; x5 = (x8 - (W1 + W7) * x5) >> 3; x8 = W3 * (x6 + x7) + 4; x6 = (x8 - (W3 - W5) * x6) >> 3; x7 = (x8 - (W3 + W5) * x7) >> 3; /* second stage */ x8 = x0 + x1; x0 = x0 - x1; x1 = W6 * (x3 + x2) + 4; x2 = (x1 - (W2 + W6) * x2) >> 3; x3 = (x1 + (W2 - W6) * x3) >> 3; x1 = x4 + x6; x4 = x4 - x6; x6 = x5 + x7; x5 = x5 - x7; /* third stage */ x7 = x8 + x3; x8 = x8 - x3; x3 = x0 + x2; x0 = x0 - x2; x2 = (181 * (x4 + x5) + 128) >> 8; x4 = (181 * (x4 - x5) + 128) >> 8; /* fourth stage */ buffer[c + size * 0] = ((x7 + x1) >> 14); buffer[c + size * 1] = ((x3 + x2) >> 14); buffer[c + size * 2] = ((x0 + x4) >> 14); buffer[c + size * 3] = ((x8 + x6) >> 14); buffer[c + size * 4] = ((x8 - x6) >> 14); buffer[c + size * 5] = ((x0 - x4) >> 14); buffer[c + size * 6] = ((x3 - x2) >> 14); buffer[c + size * 7] = ((x7 - x1) >> 14); } } for (int i = 0; i < size*size; i++) { pop(); push(buffer[i]); } } } /** * Transforms a 1D vertical signal from the frequency domain to the signal * domain using a FAST inverse Discrete Cosine Transform. * @param size The number of elements in each dimension of the signal. * @input size values, representing an array of values in the frequency * domain, ordered by row and then column. Vertical frequency increases * along each row and horizontal frequency along each column. * @output size values representing an array of values in the signal * domain, ordered by row and then column. */ int->int filter iDCT8x8_1D_col_fast_fine() { int size = 8; int W1 = 2841; /* 2048*sqrt(2)*cos(1*pi/16) */ int W2 = 2676; /* 2048*sqrt(2)*cos(2*pi/16) */ int W3 = 2408; /* 2048*sqrt(2)*cos(3*pi/16) */ int W5 = 1609; /* 2048*sqrt(2)*cos(5*pi/16) */ int W6 = 1108; /* 2048*sqrt(2)*cos(6*pi/16) */ int W7 = 565; /* 2048*sqrt(2)*cos(7*pi/16) */ work pop size push size { int x0 = peek(0); int x1 = peek(4) << 8; int x2 = peek(6); int x3 = peek(2); int x4 = peek(1); int x5 = peek(7); int x6 = peek(5); int x7 = peek(3); int x8; /* shortcut */ if ((x1 == 0) && (x2 == 0) && (x3 == 0) && (x4 == 0) && (x5 == 0) && (x6 == 0) && (x7 == 0)) { x0 = (x0 + 32) >> 6; for (int i = 0; i < size; i++) { push(x0); } } else { /* for proper rounding in the fourth stage */ x0 = (x0 << 8) + 8192; /* first stage */ x8 = W7 * (x4 + x5) + 4; x4 = (x8 + (W1 - W7) * x4) >> 3; x5 = (x8 - (W1 + W7) * x5) >> 3; x8 = W3 * (x6 + x7) + 4; x6 = (x8 - (W3 - W5) * x6) >> 3; x7 = (x8 - (W3 + W5) * x7) >> 3; /* second stage */ x8 = x0 + x1; x0 = x0 - x1; x1 = W6 * (x3 + x2) + 4; x2 = (x1 - (W2 + W6) * x2) >> 3; x3 = (x1 + (W2 - W6) * x3) >> 3; x1 = x4 + x6; x4 = x4 - x6; x6 = x5 + x7; x5 = x5 - x7; /* third stage */ x7 = x8 + x3; x8 = x8 - x3; x3 = x0 + x2; x0 = x0 - x2; x2 = (181 * (x4 + x5) + 128) >> 8; x4 = (181 * (x4 - x5) + 128) >> 8; /* fourth stage */ push((x7 + x1) >> 14); push((x3 + x2) >> 14); push((x0 + x4) >> 14); push((x8 + x6) >> 14); push((x8 - x6) >> 14); push((x0 - x4) >> 14); push((x3 - x2) >> 14); push((x7 - x1) >> 14); } for (int i = 0; i < size; i++) pop(); } } /** * Transforms a 1D signal from the signal domain to the frequency domain * using a Discrete Cosine Transform. * @param size The number of elements in each dimension of the signal. * @input size values, representing an array of values in the signal * domain, ordered by row and then column. * @output size values representing an array of values in the * frequency domain, ordered by row and then column. Vertical frequency * increases along each row and horizontal frequency along each column. */ float->float filter DCT_1D_reference_fine(int size) { float[size][size] coeff; init { for (int u = 0; u < size; u++) { float Cu = 1; if (u == 0) Cu = 1/sqrt(2); for (int x = 0; x < size; x++) { coeff[u][x] = 0.5 * Cu * cos(u * pi * (2.0 * x+1) / (2.0 * size)); } } } work pop size push size { for (int u = 0; u < size; u++) { float tempsum = 0; for (int x = 0; x < size; x++) { tempsum += peek(x) * coeff[u][x]; } push(tempsum); } } }