Add wasm tacle-bench targets

This commit is contained in:
2026-06-12 20:06:22 +02:00
parent 30daa8a00c
commit 08c2e9c13d
1122 changed files with 520422 additions and 0 deletions

View File

@ -0,0 +1,25 @@
# ~~~
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2026, Friedrich-Alexander-Universität Erlangen-Nürnberg (FAU)
# ~~~
cmake_minimum_required(VERSION 3.20)
project(jfdctint)
set(TACLEBENCH_ROOT_PATH "${CMAKE_CURRENT_LIST_DIR}/../../..")
set(REPOSITORY_ROOT_PATH "${CMAKE_CURRENT_LIST_DIR}/../../../..")
set(APP_TARGET_NAME "${CMAKE_PROJECT_NAME}")
if(DEFINED TACLEBENCH_VARIANT AND "${TACLEBENCH_VARIANT}" STREQUAL "inline")
set(APP_SOURCE_FILE_PATH
"generated/modified_sources/inline/jfdctint.c")
else()
set(APP_SOURCE_FILE_PATH
"generated/modified_sources/default/jfdctint.c")
endif()
include(${REPOSITORY_ROOT_PATH}/cmake/taclebench_wasm.cmake)

View File

@ -0,0 +1,70 @@
File: jfdctint.c
Original provenience: SNU-RT Benchmark Suite for Worst Case Timing Analysis
2016-02-01:
- Added generic TACLeBench header.
- Removed old file header (keep some information in TACLeBench header).
- Renamed global variable date to jfdctint_data.
- Renamed main to jfdctint_main.
- Moved initialisation code to jfdctint_init
- Implemented new main function according to TACLeBench guidlines.
- Implemented new function jfdctint_return, calculates checksum over
all data.
- Applied code formatting according to the following rules
- Lines shall not be wider than 80 characters; whenever possible, appropriate
line breaks shall be inserted to keep lines below 80 characters
- Indentation is done using whitespaces only, no tabs. Code is indented by
two whitespaces
- Two empty lines are put between any two functions
- In non-empty lists or index expressions, opening '(' and '[' are followed by
one whitespace, closing ')' and ']' are preceded by one whitespace
- In comma- or colon-separated argument lists, one whitespace is put after
each comma/colon
- Names of functions and global variables all start with a benchmark-specific
prefix (here: bs_) followed by lowercase letter (e.g., bs_square)
- For pointer types, one whitespace is put before the '*'
- Operators within expressions shall be preceded and followed by one
whitespace
- Code of then- and else-parts of if-then-else statements shall be put in
separate lines, not in the same lines as the if-condition or the keyword
"else"
- Opening braces '{' denoting the beginning of code for some if-else or loop
body shall be put at the end of the same line where the keywords "if",
"else", "for", "while" etc. occur
- In non-empty lists or index expressions, opening '(' and '[' are followed by
one whitespace, closing ')' and ']' are preceded by one whitespace
- Operators within expressions shall be preceded and followed by one
whitespace
2016-02-03:
- Removed all PROFILINGs.
- Macro types replaced by actual types:
- Replaced INT32 with int.
- Replaced DCTELEM with int.
- Removed macros:
- GLOBAL (useless)
- Unused "FIX_... FIX(..)" definitions (unused)
- BITS_IN_JSAMPLE (used in #ifdef...#else..., keep only #if part)
- SHIFT_TEMPS (empty)
- JPEG_INTERNALS (unused)
- MULTIPLY (simply multiply *)
- ONE (used only once)
- RIGHT_SHIFT (used only once)
2016-04-05:
- Return '0' on success
2016-04-06:
- Fixed generation of return value
2016-04-21:
- Fixed checksum value
- Fixed license
2016-06-01:
- Changed all prefixes to lower-case
- Changed return type of jfdctint_main
2016-06-08:
- Prefix
- removed return from jfdctint_main

View File

@ -0,0 +1,383 @@
The Independent JPEG Group's JPEG software
==========================================
README for release 6a of 7-Feb-96
=================================
This distribution contains the sixth public release of the Independent JPEG
Group's free JPEG software. You are welcome to redistribute this software and
to use it for any purpose, subject to the conditions under LEGAL ISSUES, below.
Serious users of this software (particularly those incorporating it into
larger programs) should contact IJG at jpeg-info@uunet.uu.net to be added to
our electronic mailing list. Mailing list members are notified of updates
and have a chance to participate in technical discussions, etc.
This software is the work of Tom Lane, Philip Gladstone, Luis Ortiz, Jim
Boucher, Lee Crocker, Julian Minguillon, George Phillips, Davide Rossi,
Ge' Weijers, and other members of the Independent JPEG Group.
IJG is not affiliated with the official ISO JPEG standards committee.
DOCUMENTATION ROADMAP
=====================
This file contains the following sections:
OVERVIEW General description of JPEG and the IJG software.
LEGAL ISSUES Copyright, lack of warranty, terms of distribution.
REFERENCES Where to learn more about JPEG.
ARCHIVE LOCATIONS Where to find newer versions of this software.
RELATED SOFTWARE Other stuff you should get.
FILE FORMAT WARS Software *not* to get.
TO DO Plans for future IJG releases.
Other documentation files in the distribution are:
User documentation:
install.doc How to configure and install the IJG software.
usage.doc Usage instructions for cjpeg, djpeg, jpegtran,
rdjpgcom, and wrjpgcom.
*.1 Unix-style man pages for programs (same info as usage.doc).
wizard.doc Advanced usage instructions for JPEG wizards only.
change.log Version-to-version change highlights.
Programmer and internal documentation:
libjpeg.doc How to use the JPEG library in your own programs.
example.c Sample code for calling the JPEG library.
structure.doc Overview of the JPEG library's internal structure.
filelist.doc Road map of IJG files.
coderules.doc Coding style rules --- please read if you contribute code.
Please read at least the files install.doc and usage.doc. Useful information
can also be found in the JPEG FAQ (Frequently Asked Questions) article. See
ARCHIVE LOCATIONS below to find out where to obtain the FAQ article.
If you want to understand how the JPEG code works, we suggest reading one or
more of the REFERENCES, then looking at the documentation files (in roughly
the order listed) before diving into the code.
OVERVIEW
========
This package contains C software to implement JPEG image compression and
decompression. JPEG (pronounced "jay-peg") is a standardized compression
method for full-color and gray-scale images. JPEG is intended for compressing
"real-world" scenes; line drawings, cartoons and other non-realistic images
are not its strong suit. JPEG is lossy, meaning that the output image is not
exactly identical to the input image. Hence you must not use JPEG if you
have to have identical output bits. However, on typical photographic images,
very good compression levels can be obtained with no visible change, and
remarkably high compression levels are possible if you can tolerate a
low-quality image. For more details, see the references, or just experiment
with various compression settings.
This software implements JPEG baseline, extended-sequential, and progressive
compression processes. Provision is made for supporting all variants of these
processes, although some uncommon parameter settings aren't implemented yet.
For legal reasons, we are not distributing code for the arithmetic-coding
variants of JPEG; see LEGAL ISSUES. We have made no provision for supporting
the hierarchical or lossless processes defined in the standard.
We provide a set of library routines for reading and writing JPEG image files,
plus two sample applications "cjpeg" and "djpeg", which use the library to
perform conversion between JPEG and some other popular image file formats.
The library is intended to be reused in other applications.
In order to support file conversion and viewing software, we have included
considerable functionality beyond the bare JPEG coding/decoding capability;
for example, the color quantization modules are not strictly part of JPEG
decoding, but they are essential for output to colormapped file formats or
colormapped displays. These extra functions can be compiled out of the
library if not required for a particular application. We have also included
"jpegtran", a utility for lossless transcoding between different JPEG
processes, and "rdjpgcom" and "wrjpgcom", two simple applications for
inserting and extracting textual comments in JFIF files.
The emphasis in designing this software has been on achieving portability and
flexibility, while also making it fast enough to be useful. In particular,
the software is not intended to be read as a tutorial on JPEG. (See the
REFERENCES section for introductory material.) Rather, it is intended to
be reliable, portable, industrial-strength code. We do not claim to have
achieved that goal in every aspect of the software, but we strive for it.
We welcome the use of this software as a component of commercial products.
No royalty is required, but we do ask for an acknowledgement in product
documentation, as described under LEGAL ISSUES.
LEGAL ISSUES
============
In plain English:
1. We don't promise that this software works. (But if you find any bugs,
please let us know!)
2. You can use this software for whatever you want. You don't have to pay us.
3. You may not pretend that you wrote this software. If you use it in a
program, you must acknowledge somewhere in your documentation that
you've used the IJG code.
In legalese:
The authors make NO WARRANTY or representation, either express or implied,
with respect to this software, its quality, accuracy, merchantability, or
fitness for a particular purpose. This software is provided "AS IS", and you,
its user, assume the entire risk as to its quality and accuracy.
This software is copyright (C) 1991-1996, Thomas G. Lane.
All Rights Reserved except as specified below.
Permission is hereby granted to use, copy, modify, and distribute this
software (or portions thereof) for any purpose, without fee, subject to these
conditions:
(1) If any part of the source code for this software is distributed, then this
README file must be included, with this copyright and no-warranty notice
unaltered; and any additions, deletions, or changes to the original files
must be clearly indicated in accompanying documentation.
(2) If only executable code is distributed, then the accompanying
documentation must state that "this software is based in part on the work of
the Independent JPEG Group".
(3) Permission for use of this software is granted only if the user accepts
full responsibility for any undesirable consequences; the authors accept
NO LIABILITY for damages of any kind.
These conditions apply to any software derived from or based on the IJG code,
not just to the unmodified library. If you use our work, you ought to
acknowledge us.
Permission is NOT granted for the use of any IJG author's name or company name
in advertising or publicity relating to this software or products derived from
it. This software may be referred to only as "the Independent JPEG Group's
software".
We specifically permit and encourage the use of this software as the basis of
commercial products, provided that all warranty or liability claims are
assumed by the product vendor.
ansi2knr.c is included in this distribution by permission of L. Peter Deutsch,
sole proprietor of its copyright holder, Aladdin Enterprises of Menlo Park, CA.
ansi2knr.c is NOT covered by the above copyright and conditions, but instead
by the usual distribution terms of the Free Software Foundation; principally,
that you must include source code if you redistribute it. (See the file
ansi2knr.c for full details.) However, since ansi2knr.c is not needed as part
of any program generated from the IJG code, this does not limit you more than
the foregoing paragraphs do.
The configuration script "configure" was produced with GNU Autoconf. It
is copyright by the Free Software Foundation but is freely distributable.
It appears that the arithmetic coding option of the JPEG spec is covered by
patents owned by IBM, AT&T, and Mitsubishi. Hence arithmetic coding cannot
legally be used without obtaining one or more licenses. For this reason,
support for arithmetic coding has been removed from the free JPEG software.
(Since arithmetic coding provides only a marginal gain over the unpatented
Huffman mode, it is unlikely that very many implementations will support it.)
So far as we are aware, there are no patent restrictions on the remaining
code.
WARNING: Unisys has begun to enforce their patent on LZW compression against
GIF encoders and decoders. You will need a license from Unisys to use the
included rdgif.c or wrgif.c files in a commercial or shareware application.
At this time, Unisys is not enforcing their patent against freeware, so
distribution of this package remains legal. However, we intend to remove
GIF support from the IJG package as soon as a suitable replacement format
becomes reasonably popular.
We are required to state that
"The Graphics Interchange Format(c) is the Copyright property of
CompuServe Incorporated. GIF(sm) is a Service Mark property of
CompuServe Incorporated."
REFERENCES
==========
We highly recommend reading one or more of these references before trying to
understand the innards of the JPEG software.
The best short technical introduction to the JPEG compression algorithm is
Wallace, Gregory K. "The JPEG Still Picture Compression Standard",
Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44.
(Adjacent articles in that issue discuss MPEG motion picture compression,
applications of JPEG, and related topics.) If you don't have the CACM issue
handy, a PostScript file containing a revised version of Wallace's article
is available at ftp.uu.net, graphics/jpeg/wallace.ps.gz. The file (actually
a preprint for an article that appeared in IEEE Trans. Consumer Electronics)
omits the sample images that appeared in CACM, but it includes corrections
and some added material. Note: the Wallace article is copyright ACM and
IEEE, and it may not be used for commercial purposes.
A somewhat less technical, more leisurely introduction to JPEG can be found in
"The Data Compression Book" by Mark Nelson, published by M&T Books (Redwood
City, CA), 1991, ISBN 1-55851-216-0. This book provides good explanations and
example C code for a multitude of compression methods including JPEG. It is
an excellent source if you are comfortable reading C code but don't know much
about data compression in general. The book's JPEG sample code is far from
industrial-strength, but when you are ready to look at a full implementation,
you've got one here...
The best full description of JPEG is the textbook "JPEG Still Image Data
Compression Standard" by William B. Pennebaker and Joan L. Mitchell, published
by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1. Price US$59.95, 638 pp.
The book includes the complete text of the ISO JPEG standards (DIS 10918-1
and draft DIS 10918-2). This is by far the most complete exposition of JPEG
in existence, and we highly recommend it.
The JPEG standard itself is not available electronically; you must order a
paper copy through ISO or ITU. (Unless you feel a need to own a certified
official copy, we recommend buying the Pennebaker and Mitchell book instead;
it's much cheaper and includes a great deal of useful explanatory material.)
In the USA, copies of the standard may be ordered from ANSI Sales at (212)
642-4900, or from Global Engineering Documents at (800) 854-7179. (ANSI
doesn't take credit card orders, but Global does.) It's not cheap: as of
1992, ANSI was charging $95 for Part 1 and $47 for Part 2, plus 7%
shipping/handling. The standard is divided into two parts, Part 1 being the
actual specification, while Part 2 covers compliance testing methods. Part 1
is titled "Digital Compression and Coding of Continuous-tone Still Images,
Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS
10918-1, ITU-T T.81. Part 2 is titled "Digital Compression and Coding of
Continuous-tone Still Images, Part 2: Compliance testing" and has document
numbers ISO/IEC IS 10918-2, ITU-T T.83.
Extensions to the original JPEG standard are defined in JPEG Part 3, a new ISO
document. Part 3 is undergoing ISO balloting and is expected to be approved
by the end of 1995; it will have document numbers ISO/IEC IS 10918-3, ITU-T
T.84. IJG currently does not support any Part 3 extensions.
The JPEG standard does not specify all details of an interchangeable file
format. For the omitted details we follow the "JFIF" conventions, revision
1.02. A copy of the JFIF spec is available from:
Literature Department
C-Cube Microsystems, Inc.
1778 McCarthy Blvd.
Milpitas, CA 95035
phone (408) 944-6300, fax (408) 944-6314
A PostScript version of this document is available at ftp.uu.net, file
graphics/jpeg/jfif.ps.gz. It can also be obtained by e-mail from the C-Cube
mail server, netlib@c3.pla.ca.us. Send the message "send jfif_ps from jpeg"
to the server to obtain the JFIF document; send the message "help" if you have
trouble.
The TIFF 6.0 file format specification can be obtained by FTP from sgi.com
(192.48.153.1), file graphics/tiff/TIFF6.ps.Z; or you can order a printed
copy from Aldus Corp. at (206) 628-6593. The JPEG incorporation scheme
found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems.
IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6).
Instead, we recommend the JPEG design proposed by TIFF Technical Note #2
(Compression tag 7). Copies of this Note can be obtained from sgi.com or
from ftp.uu.net:/graphics/jpeg/. It is expected that the next revision of
the TIFF spec will replace the 6.0 JPEG design with the Note's design.
Although IJG's own code does not support TIFF/JPEG, the free libtiff library
uses our library to implement TIFF/JPEG per the Note. libtiff is available
from sgi.com:/graphics/tiff/.
ARCHIVE LOCATIONS
=================
The "official" archive site for this software is ftp.uu.net (Internet
address 192.48.96.9). The most recent released version can always be found
there in directory graphics/jpeg. This particular version will be archived
as graphics/jpeg/jpegsrc.v6a.tar.gz. If you are on the Internet, you
can retrieve files from ftp.uu.net by standard anonymous FTP. If you don't
have FTP access, UUNET's archives are also available via UUCP; contact
help@uunet.uu.net for information on retrieving files that way.
Numerous Internet sites maintain copies of the UUNET files. However, only
ftp.uu.net is guaranteed to have the latest official version.
You can also obtain this software in DOS-compatible "zip" archive format from
the SimTel archives (ftp.coast.net:/SimTel/msdos/graphics/), or on CompuServe
in the Graphics Support forum (GO CIS:GRAPHSUP), library 12 "JPEG Tools".
Again, these versions may sometimes lag behind the ftp.uu.net release.
The JPEG FAQ (Frequently Asked Questions) article is a useful source of
general information about JPEG. It is updated constantly and therefore is
not included in this distribution. The FAQ is posted every two weeks to
Usenet newsgroups comp.graphics.misc, news.answers, and other groups.
You can always obtain the latest version from the news.answers archive at
rtfm.mit.edu. By FTP, fetch /pub/usenet/news.answers/jpeg-faq/part1 and
.../part2. If you don't have FTP, send e-mail to mail-server@rtfm.mit.edu
with body
send usenet/news.answers/jpeg-faq/part1
send usenet/news.answers/jpeg-faq/part2
RELATED SOFTWARE
================
Numerous viewing and image manipulation programs now support JPEG. (Quite a
few of them use this library to do so.) The JPEG FAQ described above lists
some of the more popular free and shareware viewers, and tells where to
obtain them on Internet.
If you are on a Unix machine, we highly recommend Jef Poskanzer's free
PBMPLUS image software, which provides many useful operations on PPM-format
image files. In particular, it can convert PPM images to and from a wide
range of other formats. You can obtain this package by FTP from ftp.x.org
(contrib/pbmplus*.tar.Z) or ftp.ee.lbl.gov (pbmplus*.tar.Z). There is also
a newer update of this package called NETPBM, available from
wuarchive.wustl.edu under directory /graphics/graphics/packages/NetPBM/.
Unfortunately PBMPLUS/NETPBM is not nearly as portable as the IJG software
is; you are likely to have difficulty making it work on any non-Unix machine.
A different free JPEG implementation, written by the PVRG group at Stanford,
is available from havefun.stanford.edu in directory pub/jpeg. This program
is designed for research and experimentation rather than production use;
it is slower, harder to use, and less portable than the IJG code, but it
is easier to read and modify. Also, the PVRG code supports lossless JPEG,
which we do not.
FILE FORMAT WARS
================
Some JPEG programs produce files that are not compatible with our library.
The root of the problem is that the ISO JPEG committee failed to specify a
concrete file format. Some vendors "filled in the blanks" on their own,
creating proprietary formats that no one else could read. (For example, none
of the early commercial JPEG implementations for the Macintosh were able to
exchange compressed files.)
The file format we have adopted is called JFIF (see REFERENCES). This format
has been agreed to by a number of major commercial JPEG vendors, and it has
become the de facto standard. JFIF is a minimal or "low end" representation.
We recommend the use of TIFF/JPEG (TIFF revision 6.0 as modified by TIFF
Technical Note #2) for "high end" applications that need to record a lot of
additional data about an image. TIFF/JPEG is fairly new and not yet widely
supported, unfortunately.
The upcoming JPEG Part 3 standard defines a file format called SPIFF.
SPIFF is interoperable with JFIF, in the sense that most JFIF decoders should
be able to read the most common variant of SPIFF. SPIFF has some technical
advantages over JFIF, but its major claim to fame is simply that it is an
official standard rather than an informal one. At this point it is unclear
whether SPIFF will supersede JFIF or whether JFIF will remain the de-facto
standard. IJG intends to support SPIFF once the standard is frozen, but we
have not decided whether it should become our default output format or not.
(In any case, our decoder will remain capable of reading JFIF indefinitely.)
Various proprietary file formats incorporating JPEG compression also exist.
We have little or no sympathy for the existence of these formats. Indeed,
one of the original reasons for developing this free software was to help
force convergence on common, open format standards for JPEG files. Don't
use a proprietary file format!
TO DO
=====
In future versions, we are considering supporting some of the upcoming JPEG
Part 3 extensions --- principally, variable quantization and the SPIFF file
format.
Tuning the software for better behavior at low quality/high compression
settings is also of interest. The current method for scaling the
quantization tables is known not to be very good at low Q values.
As always, speeding things up is high on our priority list.
Please send bug reports, offers of help, etc. to jpeg-info@uunet.uu.net.

View File

@ -0,0 +1,679 @@
(module $jfdctint.wasm
(type (;0;) (func (param i32 i32)))
(type (;1;) (func))
(type (;2;) (func (result i32)))
(import "__pragma" "loopbound" (func $__pragma_loopbound (type 0)))
(func $__wasm_apply_data_relocs (type 1))
(func $jfdctint_return (type 2) (result i32)
i32.const 64
i32.const 64
call $__pragma_loopbound
i32.const -1
i32.const 0
i32.const 0
i32.load offset=1276
i32.const 0
i32.load offset=1272
i32.const 0
i32.load offset=1268
i32.const 0
i32.load offset=1264
i32.const 0
i32.load offset=1260
i32.const 0
i32.load offset=1256
i32.const 0
i32.load offset=1252
i32.const 0
i32.load offset=1248
i32.const 0
i32.load offset=1244
i32.const 0
i32.load offset=1240
i32.const 0
i32.load offset=1236
i32.const 0
i32.load offset=1232
i32.const 0
i32.load offset=1228
i32.const 0
i32.load offset=1224
i32.const 0
i32.load offset=1220
i32.const 0
i32.load offset=1216
i32.const 0
i32.load offset=1212
i32.const 0
i32.load offset=1208
i32.const 0
i32.load offset=1204
i32.const 0
i32.load offset=1200
i32.const 0
i32.load offset=1196
i32.const 0
i32.load offset=1192
i32.const 0
i32.load offset=1188
i32.const 0
i32.load offset=1184
i32.const 0
i32.load offset=1180
i32.const 0
i32.load offset=1176
i32.const 0
i32.load offset=1172
i32.const 0
i32.load offset=1168
i32.const 0
i32.load offset=1164
i32.const 0
i32.load offset=1160
i32.const 0
i32.load offset=1156
i32.const 0
i32.load offset=1152
i32.const 0
i32.load offset=1148
i32.const 0
i32.load offset=1144
i32.const 0
i32.load offset=1140
i32.const 0
i32.load offset=1136
i32.const 0
i32.load offset=1132
i32.const 0
i32.load offset=1128
i32.const 0
i32.load offset=1124
i32.const 0
i32.load offset=1120
i32.const 0
i32.load offset=1116
i32.const 0
i32.load offset=1112
i32.const 0
i32.load offset=1108
i32.const 0
i32.load offset=1104
i32.const 0
i32.load offset=1100
i32.const 0
i32.load offset=1096
i32.const 0
i32.load offset=1092
i32.const 0
i32.load offset=1088
i32.const 0
i32.load offset=1084
i32.const 0
i32.load offset=1080
i32.const 0
i32.load offset=1076
i32.const 0
i32.load offset=1072
i32.const 0
i32.load offset=1068
i32.const 0
i32.load offset=1064
i32.const 0
i32.load offset=1060
i32.const 0
i32.load offset=1056
i32.const 0
i32.load offset=1052
i32.const 0
i32.load offset=1048
i32.const 0
i32.load offset=1044
i32.const 0
i32.load offset=1040
i32.const 0
i32.load offset=1036
i32.const 0
i32.load offset=1032
i32.const 0
i32.load offset=1028
i32.const 0
i32.load offset=1024
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.add
i32.const 1668124
i32.ne
select)
(func $jfdctint_jpeg_fdct_islow (type 1)
(local i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32)
i32.const 8
i32.const 8
call $__pragma_loopbound
i32.const -256
local.set 0
loop ;; label = @1
local.get 0
i32.const 1296
i32.add
local.tee 1
local.get 1
i32.load
local.tee 1
local.get 0
i32.const 1292
i32.add
local.tee 2
i32.load
local.tee 3
i32.add
local.tee 4
local.get 0
i32.const 1308
i32.add
local.tee 5
i32.load
local.tee 6
local.get 0
i32.const 1280
i32.add
local.tee 7
i32.load
local.tee 8
i32.add
local.tee 9
i32.add
local.tee 10
local.get 0
i32.const 1300
i32.add
local.tee 11
i32.load
local.tee 12
local.get 0
i32.const 1288
i32.add
local.tee 13
i32.load
local.tee 14
i32.add
local.tee 15
local.get 0
i32.const 1304
i32.add
local.tee 16
i32.load
local.tee 17
local.get 0
i32.const 1284
i32.add
local.tee 18
i32.load
local.tee 19
i32.add
local.tee 20
i32.add
local.tee 21
i32.sub
i32.const 2
i32.shl
i32.store
local.get 7
local.get 10
local.get 21
i32.add
i32.const 2
i32.shl
i32.store
local.get 5
local.get 3
local.get 1
i32.sub
local.tee 1
local.get 8
local.get 6
i32.sub
local.tee 3
i32.add
i32.const -7373
i32.mul
i32.const 1024
i32.add
local.tee 7
local.get 1
i32.const 2446
i32.mul
i32.add
local.get 1
local.get 19
local.get 17
i32.sub
local.tee 6
i32.add
local.tee 8
local.get 14
local.get 12
i32.sub
local.tee 1
local.get 3
i32.add
local.tee 10
i32.add
i32.const 9633
i32.mul
local.tee 12
local.get 8
i32.const -16069
i32.mul
i32.add
local.tee 8
i32.add
i32.const 11
i32.shr_s
i32.store
local.get 16
local.get 9
local.get 4
i32.sub
local.tee 4
local.get 20
local.get 15
i32.sub
local.tee 5
i32.add
i32.const 4433
i32.mul
i32.const 1024
i32.add
local.tee 9
local.get 5
i32.const -15137
i32.mul
i32.add
i32.const 11
i32.shr_s
i32.store
local.get 13
local.get 9
local.get 4
i32.const 6270
i32.mul
i32.add
i32.const 11
i32.shr_s
i32.store
local.get 11
local.get 1
local.get 6
i32.add
i32.const -20995
i32.mul
i32.const 1024
i32.add
local.tee 4
local.get 1
i32.const 16819
i32.mul
i32.add
local.get 12
local.get 10
i32.const -3196
i32.mul
i32.add
local.tee 1
i32.add
i32.const 11
i32.shr_s
i32.store
local.get 2
local.get 4
local.get 6
i32.const 25172
i32.mul
i32.add
local.get 8
i32.add
i32.const 11
i32.shr_s
i32.store
local.get 18
local.get 7
local.get 3
i32.const 12299
i32.mul
i32.add
local.get 1
i32.add
i32.const 11
i32.shr_s
i32.store
local.get 0
i32.const 32
i32.add
local.tee 0
br_if 0 (;@1;)
end
i32.const 8
i32.const 8
call $__pragma_loopbound
i32.const -32
local.set 0
loop ;; label = @1
local.get 0
i32.const 1184
i32.add
local.tee 1
local.get 1
i32.load
local.tee 1
local.get 0
i32.const 1152
i32.add
local.tee 2
i32.load
local.tee 3
i32.add
local.tee 4
local.get 0
i32.const 1280
i32.add
local.tee 5
i32.load
local.tee 6
local.get 0
i32.const 1056
i32.add
local.tee 7
i32.load
local.tee 8
i32.add
local.tee 9
i32.add
local.tee 10
local.get 0
i32.const 1216
i32.add
local.tee 11
i32.load
local.tee 12
local.get 0
i32.const 1120
i32.add
local.tee 13
i32.load
local.tee 14
i32.add
local.tee 15
local.get 0
i32.const 1248
i32.add
local.tee 16
i32.load
local.tee 17
local.get 0
i32.const 1088
i32.add
local.tee 18
i32.load
local.tee 19
i32.add
local.tee 20
i32.add
local.tee 21
i32.sub
i32.const 2
i32.add
i32.const 2
i32.shr_s
i32.store
local.get 7
local.get 21
local.get 10
i32.add
i32.const 2
i32.add
i32.const 2
i32.shr_s
i32.store
local.get 5
local.get 3
local.get 1
i32.sub
local.tee 1
local.get 8
local.get 6
i32.sub
local.tee 3
i32.add
i32.const -7373
i32.mul
i32.const 16384
i32.add
local.tee 7
local.get 1
i32.const 2446
i32.mul
i32.add
local.get 1
local.get 19
local.get 17
i32.sub
local.tee 6
i32.add
local.tee 8
local.get 14
local.get 12
i32.sub
local.tee 1
local.get 3
i32.add
local.tee 10
i32.add
i32.const 9633
i32.mul
local.tee 12
local.get 8
i32.const -16069
i32.mul
i32.add
local.tee 8
i32.add
i32.const 15
i32.shr_s
i32.store
local.get 16
local.get 9
local.get 4
i32.sub
local.tee 4
local.get 20
local.get 15
i32.sub
local.tee 5
i32.add
i32.const 4433
i32.mul
i32.const 16384
i32.add
local.tee 9
local.get 5
i32.const -15137
i32.mul
i32.add
i32.const 15
i32.shr_s
i32.store
local.get 13
local.get 9
local.get 4
i32.const 6270
i32.mul
i32.add
i32.const 15
i32.shr_s
i32.store
local.get 11
local.get 1
local.get 6
i32.add
i32.const -20995
i32.mul
i32.const 16384
i32.add
local.tee 4
local.get 1
i32.const 16819
i32.mul
i32.add
local.get 12
local.get 10
i32.const -3196
i32.mul
i32.add
local.tee 1
i32.add
i32.const 15
i32.shr_s
i32.store
local.get 2
local.get 4
local.get 6
i32.const 25172
i32.mul
i32.add
local.get 8
i32.add
i32.const 15
i32.shr_s
i32.store
local.get 18
local.get 7
local.get 3
i32.const 12299
i32.mul
i32.add
local.get 1
i32.add
i32.const 15
i32.shr_s
i32.store
local.get 0
i32.const 4
i32.add
local.tee 0
br_if 0 (;@1;)
end)
(func $jfdctint_main (type 1)
call $jfdctint_jpeg_fdct_islow)
(func $__original_main (type 2) (result i32)
(local i32 i32)
i32.const 64
i32.const 64
call $__pragma_loopbound
i32.const 1
local.set 0
i32.const -256
local.set 1
loop ;; label = @1
local.get 1
i32.const 1280
i32.add
local.get 0
i32.const 133
i32.mul
i32.const 81
i32.add
i32.const 65535
i32.rem_s
local.tee 0
i32.store
local.get 1
i32.const 1284
i32.add
local.get 0
i32.const 133
i32.mul
i32.const 81
i32.add
i32.const 65535
i32.rem_s
local.tee 0
i32.store
local.get 1
i32.const 8
i32.add
local.tee 1
br_if 0 (;@1;)
end
call $jfdctint_main
call $jfdctint_return)
(table (;0;) 1 1 funcref)
(memory (;0;) 1)
(global $__stack_pointer (mut i32) (i32.const 5376))
(global (;1;) i32 (i32.const 1280))
(global (;2;) i32 (i32.const 5376))
(export "memory" (memory 0))
(export "__wasm_apply_data_relocs" (func $__wasm_apply_data_relocs))
(export "entrypoint" (func $jfdctint_main))
(export "main" (func $__original_main))
(export "__data_end" (global 1))
(export "__heap_base" (global 2)))

View File

@ -0,0 +1,314 @@
/*
This program is part of the TACLeBench benchmark suite.
Version V 1.x
Name: jfdctint
Author: Thomas G. Lane, Public domain JPEG source code.
Modified by Steven Li at Princeton University.
Function: JPEG slow-but-accurate integer implementation of the
forward DCT (Discrete Cosine Transform) on a 8x8
pixel block [from original file documentations]
Copyright (C) 1991-1994, Thomas G. Lane.
This file is part of the Independent JPEG Group's software.
For conditions of distribution and use, see the accompanying README file.
This file contains a slow-but-accurate integer implementation of the
forward DCT (Discrete Cosine Transform).
A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
on each column. Direct algorithms are also available, but they are
much more complex and seem not to be any faster when reduced to code.
This implementation is based on an algorithm described in
C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
The primary algorithm described there uses 11 multiplies and 29 adds.
We use their alternate method with 12 multiplies and 32 adds.
The advantage of this method is that no data path contains more than one
multiplication; this allows a very simple and accurate implementation in
scaled fixed-point arithmetic, with a minimal number of shifts.
Source: SNU-RT Benchmark Suite for Worst Case Timing Analysis
Collected and Modified by S.-S. Lim
Real-Time Research Group
Seoul National University
Changes: Moved initialisation code from jfdctint_main() to jfdctint_init(),
added checksum calculation in jfdctint_return()
License: see README
*/
/* COMMENTS: Long calculation sequences (i.e., long basic blocks), */
/* single-nested loops. */
/**********************************************************************
Functions to be timed
***********************************************************************/
/* This definitions are added by Steven Li so as to bypass the header
files.
*/
// Wasm loop bounds
__attribute__((import_module("__pragma"), import_name("loopbound"))) extern void
__pragma_loopbound(unsigned int min_bound, unsigned int max_bound);
#define DCTSIZE 8
#define DESCALE(x, n) (((x) + (((int) 1) << ((n) - 1))) >> (n))
/*
The poop on this scaling stuff is as follows:
Each 1-D DCT step produces outputs which are a factor of sqrt(N)
larger than the true DCT outputs. The final outputs are therefore
a factor of N larger than desired; since N=8 this can be cured by
a simple right shift at the end of the algorithm. The advantage of
this arrangement is that we save two multiplications per 1-D DCT,
because the y0 and y4 outputs need not be divided by sqrt(N).
In the IJG code, this factor of 8 is removed by the quantization step
(in jcdctmgr.c), NOT in this module.
We have to do addition and subtraction of the integer inputs, which
is no problem, and multiplication by fractional constants, which is
a problem to do in integer arithmetic. We multiply all the constants
by CONST_SCALE and convert them to integer constants (thus retaining
CONST_BITS (13) bits of precision in the constants). After doing a
multiplication we have to divide the product by CONST_SCALE, with proper
rounding, to produce the correct output. This division can be done
cheaply as a right shift of CONST_BITS (13) bits. We postpone shifting
as long as possible so that partial sums can be added together with
full fractional precision.
The outputs of the first pass are scaled up by PASS1_BITS (2) bits so that
they are represented to better-than-integral precision. These outputs
require BITS_IN_JSAMPLE (8) + PASS1_BITS (2) + 3 bits; this fits in a
16-bit word with the recommended scaling. (For 12-bit sample data, the
intermediate array is int anyway.)
To avoid overflow of the 32-bit intermediate results in pass 2, we must
have BITS_IN_JSAMPLE (8) + CONST_BITS (13) + PASS1_BITS (2) <= 26.
Error analysis shows that the values given below are the most effective.
*/
/*
Forward declaration of functions
*/
void jfdctint_init();
int jfdctint_return();
__attribute__((noinline)) __attribute__((export_name("entrypoint"))) void
jfdctint_main();
__attribute__((noinline)) __attribute__((export_name("main"))) int main(void);
#define CONST_BITS 13
#define PASS1_BITS 2
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
causing a lot of useless floating-point operations at run time.
To get around this we use the following pre-calculated constants.
If you change CONST_BITS you may want to add appropriate values.
(With a reasonable C compiler, you can just rely on the FIX() macro...)
*/
#define FIX_0_298631336 ((int) 2446) /* FIX(0.298631336) */
#define FIX_0_390180644 ((int) 3196) /* FIX(0.390180644) */
#define FIX_0_541196100 ((int) 4433) /* FIX(0.541196100) */
#define FIX_0_765366865 ((int) 6270) /* FIX(0.765366865) */
#define FIX_0_899976223 ((int) 7373) /* FIX(0.899976223) */
#define FIX_1_175875602 ((int) 9633) /* FIX(1.175875602) */
#define FIX_1_501321110 ((int) 12299) /* FIX(1.501321110) */
#define FIX_1_847759065 ((int) 15137) /* FIX(1.847759065) */
#define FIX_1_961570560 ((int) 16069) /* FIX(1.961570560) */
#define FIX_2_053119869 ((int) 16819) /* FIX(2.053119869) */
#define FIX_2_562915447 ((int) 20995) /* FIX(2.562915447) */
#define FIX_3_072711026 ((int) 25172) /* FIX(3.072711026) */
/* Multiply an int variable by an int constant to yield an int result.
For 8-bit samples with the recommended scaling, all the variable
and constant values involved are no more than 16 bits wide, so a
16x16->32 bit multiply can be used instead of a full 32x32 multiply.
For 12-bit samples, a full 32-bit multiplication will be needed.
*/
int jfdctint_data[64];
const int jfdctint_CHECKSUM = 1668124;
void
jfdctint_init() {
int i, seed;
/* Worst case settings */
/* Set array to random values */
seed = 1;
__pragma_loopbound(64, 64);
for (i = 0; i < 64; i++) {
seed = ((seed * 133) + 81) % 65535;
jfdctint_data[i] = seed;
}
}
int
jfdctint_return() {
int checksum = 0;
int i;
__pragma_loopbound(64, 64);
for (i = 0; i < 64; ++i)
checksum += jfdctint_data[i];
return ((checksum == jfdctint_CHECKSUM) ? 0 : -1);
}
/*
Perform the forward DCT on one block of samples.
*/
void
jfdctint_jpeg_fdct_islow(void) {
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1, z2, z3, z4, z5;
int *dataptr;
int ctr;
/* Pass 1: process rows. */
/* Note results are scaled up by sqrt(8) compared to a true DCT; */
/* furthermore, we scale the results by 2**PASS1_BITS. */
dataptr = jfdctint_data;
__pragma_loopbound(8, 8);
for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
tmp0 = dataptr[0] + dataptr[7];
tmp7 = dataptr[0] - dataptr[7];
tmp1 = dataptr[1] + dataptr[6];
tmp6 = dataptr[1] - dataptr[6];
tmp2 = dataptr[2] + dataptr[5];
tmp5 = dataptr[2] - dataptr[5];
tmp3 = dataptr[3] + dataptr[4];
tmp4 = dataptr[3] - dataptr[4];
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
dataptr[0] = (int) ((tmp10 + tmp11) << PASS1_BITS);
dataptr[4] = (int) ((tmp10 - tmp11) << PASS1_BITS);
z1 = (tmp12 + tmp13) * FIX_0_541196100;
dataptr[2] = (int) DESCALE(z1 + tmp13 * FIX_0_765366865,
CONST_BITS - PASS1_BITS);
dataptr[6] = (int) DESCALE(z1 + tmp12 * (-FIX_1_847759065),
CONST_BITS - PASS1_BITS);
z1 = tmp4 + tmp7;
z2 = tmp5 + tmp6;
z3 = tmp4 + tmp6;
z4 = tmp5 + tmp7;
z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */
tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */
tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */
tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */
tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */
z1 = z1 * (-FIX_0_899976223); /* sqrt(2) * (c7-c3) */
z2 = z2 * (-FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
z3 = z3 * (-FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
z4 = z4 * (-FIX_0_390180644); /* sqrt(2) * (c5-c3) */
z3 += z5;
z4 += z5;
dataptr[7] = (int) DESCALE(tmp4 + z1 + z3, CONST_BITS - PASS1_BITS);
dataptr[5] = (int) DESCALE(tmp5 + z2 + z4, CONST_BITS - PASS1_BITS);
dataptr[3] = (int) DESCALE(tmp6 + z2 + z3, CONST_BITS - PASS1_BITS);
dataptr[1] = (int) DESCALE(tmp7 + z1 + z4, CONST_BITS - PASS1_BITS);
dataptr += DCTSIZE; /* advance pointer to next row */
}
dataptr = jfdctint_data;
__pragma_loopbound(8, 8);
for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
dataptr[DCTSIZE * 0] = (int) DESCALE(tmp10 + tmp11, PASS1_BITS);
dataptr[DCTSIZE * 4] = (int) DESCALE(tmp10 - tmp11, PASS1_BITS);
z1 = (tmp12 + tmp13) * FIX_0_541196100;
dataptr[DCTSIZE * 2] = (int) DESCALE(z1 + tmp13 * FIX_0_765366865,
CONST_BITS + PASS1_BITS);
dataptr[DCTSIZE * 6] = (int) DESCALE(z1 + tmp12 * (-FIX_1_847759065),
CONST_BITS + PASS1_BITS);
z1 = tmp4 + tmp7;
z2 = tmp5 + tmp6;
z3 = tmp4 + tmp6;
z4 = tmp5 + tmp7;
z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */
tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */
tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */
tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */
tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */
z1 = z1 * (-FIX_0_899976223); /* sqrt(2) * (c7-c3) */
z2 = z2 * (-FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
z3 = z3 * (-FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
z4 = z4 * (-FIX_0_390180644); /* sqrt(2) * (c5-c3) */
z3 += z5;
z4 += z5;
dataptr[DCTSIZE * 7] =
(int) DESCALE(tmp4 + z1 + z3, CONST_BITS + PASS1_BITS);
dataptr[DCTSIZE * 5] =
(int) DESCALE(tmp5 + z2 + z4, CONST_BITS + PASS1_BITS);
dataptr[DCTSIZE * 3] =
(int) DESCALE(tmp6 + z2 + z3, CONST_BITS + PASS1_BITS);
dataptr[DCTSIZE * 1] =
(int) DESCALE(tmp7 + z1 + z4, CONST_BITS + PASS1_BITS);
dataptr++; /* advance pointer to next column */
}
}
/* Main function
Time to function execution time using logic analyzer,
which measures the OFF time of a LED on board.
The switching latency, including the function call/return time,
is measured to be equal to 1.1us (22 clock cycles).
*/
__attribute__((noinline)) __attribute__((export_name("entrypoint"))) void
jfdctint_main(void) {
jfdctint_jpeg_fdct_islow();
}
__attribute__((noinline)) __attribute__((export_name("main"))) int
main(void) {
jfdctint_init();
jfdctint_main();
return (jfdctint_return());
}

View File

@ -0,0 +1,322 @@
/*
This program is part of the TACLeBench benchmark suite.
Version V 1.x
Name: jfdctint
Author: Thomas G. Lane, Public domain JPEG source code.
Modified by Steven Li at Princeton University.
Function: JPEG slow-but-accurate integer implementation of the
forward DCT (Discrete Cosine Transform) on a 8x8
pixel block [from original file documentations]
Copyright (C) 1991-1994, Thomas G. Lane.
This file is part of the Independent JPEG Group's software.
For conditions of distribution and use, see the accompanying README file.
This file contains a slow-but-accurate integer implementation of the
forward DCT (Discrete Cosine Transform).
A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
on each column. Direct algorithms are also available, but they are
much more complex and seem not to be any faster when reduced to code.
This implementation is based on an algorithm described in
C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
The primary algorithm described there uses 11 multiplies and 29 adds.
We use their alternate method with 12 multiplies and 32 adds.
The advantage of this method is that no data path contains more than one
multiplication; this allows a very simple and accurate implementation in
scaled fixed-point arithmetic, with a minimal number of shifts.
Source: SNU-RT Benchmark Suite for Worst Case Timing Analysis
Collected and Modified by S.-S. Lim
Real-Time Research Group
Seoul National University
Changes: Moved initialisation code from jfdctint_main() to jfdctint_init(),
added checksum calculation in jfdctint_return()
License: see README
*/
/* COMMENTS: Long calculation sequences (i.e., long basic blocks), */
/* single-nested loops. */
/**********************************************************************
Functions to be timed
***********************************************************************/
/* This definitions are added by Steven Li so as to bypass the header
files.
*/
// Wasm loop bounds
__attribute__((import_module("__pragma"), import_name("loopbound"))) extern void
__pragma_loopbound(unsigned int min_bound, unsigned int max_bound);
#define DCTSIZE 8
#define DESCALE(x, n) (((x) + (((int) 1) << ((n) - 1))) >> (n))
/*
The poop on this scaling stuff is as follows:
Each 1-D DCT step produces outputs which are a factor of sqrt(N)
larger than the true DCT outputs. The final outputs are therefore
a factor of N larger than desired; since N=8 this can be cured by
a simple right shift at the end of the algorithm. The advantage of
this arrangement is that we save two multiplications per 1-D DCT,
because the y0 and y4 outputs need not be divided by sqrt(N).
In the IJG code, this factor of 8 is removed by the quantization step
(in jcdctmgr.c), NOT in this module.
We have to do addition and subtraction of the integer inputs, which
is no problem, and multiplication by fractional constants, which is
a problem to do in integer arithmetic. We multiply all the constants
by CONST_SCALE and convert them to integer constants (thus retaining
CONST_BITS (13) bits of precision in the constants). After doing a
multiplication we have to divide the product by CONST_SCALE, with proper
rounding, to produce the correct output. This division can be done
cheaply as a right shift of CONST_BITS (13) bits. We postpone shifting
as long as possible so that partial sums can be added together with
full fractional precision.
The outputs of the first pass are scaled up by PASS1_BITS (2) bits so that
they are represented to better-than-integral precision. These outputs
require BITS_IN_JSAMPLE (8) + PASS1_BITS (2) + 3 bits; this fits in a
16-bit word with the recommended scaling. (For 12-bit sample data, the
intermediate array is int anyway.)
To avoid overflow of the 32-bit intermediate results in pass 2, we must
have BITS_IN_JSAMPLE (8) + CONST_BITS (13) + PASS1_BITS (2) <= 26.
Error analysis shows that the values given below are the most effective.
*/
/*
Forward declaration of functions
*/
__attribute__((always_inline)) static inline void jfdctint_init();
__attribute__((always_inline)) static inline int jfdctint_return();
__attribute__((noinline)) __attribute__((export_name("entrypoint")))
__attribute__((noinline)) __attribute__((export_name("entrypoint"))) void
jfdctint_main();
__attribute__((noinline)) __attribute__((export_name("main")))
__attribute__((noinline)) __attribute__((export_name("main"))) int
main(void);
#define CONST_BITS 13
#define PASS1_BITS 2
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
causing a lot of useless floating-point operations at run time.
To get around this we use the following pre-calculated constants.
If you change CONST_BITS you may want to add appropriate values.
(With a reasonable C compiler, you can just rely on the FIX() macro...)
*/
#define FIX_0_298631336 ((int) 2446) /* FIX(0.298631336) */
#define FIX_0_390180644 ((int) 3196) /* FIX(0.390180644) */
#define FIX_0_541196100 ((int) 4433) /* FIX(0.541196100) */
#define FIX_0_765366865 ((int) 6270) /* FIX(0.765366865) */
#define FIX_0_899976223 ((int) 7373) /* FIX(0.899976223) */
#define FIX_1_175875602 ((int) 9633) /* FIX(1.175875602) */
#define FIX_1_501321110 ((int) 12299) /* FIX(1.501321110) */
#define FIX_1_847759065 ((int) 15137) /* FIX(1.847759065) */
#define FIX_1_961570560 ((int) 16069) /* FIX(1.961570560) */
#define FIX_2_053119869 ((int) 16819) /* FIX(2.053119869) */
#define FIX_2_562915447 ((int) 20995) /* FIX(2.562915447) */
#define FIX_3_072711026 ((int) 25172) /* FIX(3.072711026) */
/* Multiply an int variable by an int constant to yield an int result.
For 8-bit samples with the recommended scaling, all the variable
and constant values involved are no more than 16 bits wide, so a
16x16->32 bit multiply can be used instead of a full 32x32 multiply.
For 12-bit samples, a full 32-bit multiplication will be needed.
*/
int jfdctint_data[64];
const int jfdctint_CHECKSUM = 1668124;
__attribute__((always_inline)) static inline void
jfdctint_init() {
int i, seed;
/* Worst case settings */
/* Set array to random values */
seed = 1;
__pragma_loopbound(64, 64);
for (i = 0; i < 64; i++) {
seed = ((seed * 133) + 81) % 65535;
jfdctint_data[i] = seed;
}
}
__attribute__((always_inline)) static inline int
jfdctint_return() {
int checksum = 0;
int i;
__pragma_loopbound(64, 64);
for (i = 0; i < 64; ++i)
checksum += jfdctint_data[i];
return ((checksum == jfdctint_CHECKSUM) ? 0 : -1);
}
/*
Perform the forward DCT on one block of samples.
*/
__attribute__((always_inline)) static inline void
jfdctint_jpeg_fdct_islow(void) {
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1, z2, z3, z4, z5;
int *dataptr;
int ctr;
/* Pass 1: process rows. */
/* Note results are scaled up by sqrt(8) compared to a true DCT; */
/* furthermore, we scale the results by 2**PASS1_BITS. */
dataptr = jfdctint_data;
__pragma_loopbound(8, 8);
for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
tmp0 = dataptr[0] + dataptr[7];
tmp7 = dataptr[0] - dataptr[7];
tmp1 = dataptr[1] + dataptr[6];
tmp6 = dataptr[1] - dataptr[6];
tmp2 = dataptr[2] + dataptr[5];
tmp5 = dataptr[2] - dataptr[5];
tmp3 = dataptr[3] + dataptr[4];
tmp4 = dataptr[3] - dataptr[4];
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
dataptr[0] = (int) ((tmp10 + tmp11) << PASS1_BITS);
dataptr[4] = (int) ((tmp10 - tmp11) << PASS1_BITS);
z1 = (tmp12 + tmp13) * FIX_0_541196100;
dataptr[2] = (int) DESCALE(z1 + tmp13 * FIX_0_765366865,
CONST_BITS - PASS1_BITS);
dataptr[6] = (int) DESCALE(z1 + tmp12 * (-FIX_1_847759065),
CONST_BITS - PASS1_BITS);
z1 = tmp4 + tmp7;
z2 = tmp5 + tmp6;
z3 = tmp4 + tmp6;
z4 = tmp5 + tmp7;
z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */
tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */
tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */
tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */
tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */
z1 = z1 * (-FIX_0_899976223); /* sqrt(2) * (c7-c3) */
z2 = z2 * (-FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
z3 = z3 * (-FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
z4 = z4 * (-FIX_0_390180644); /* sqrt(2) * (c5-c3) */
z3 += z5;
z4 += z5;
dataptr[7] = (int) DESCALE(tmp4 + z1 + z3, CONST_BITS - PASS1_BITS);
dataptr[5] = (int) DESCALE(tmp5 + z2 + z4, CONST_BITS - PASS1_BITS);
dataptr[3] = (int) DESCALE(tmp6 + z2 + z3, CONST_BITS - PASS1_BITS);
dataptr[1] = (int) DESCALE(tmp7 + z1 + z4, CONST_BITS - PASS1_BITS);
dataptr += DCTSIZE; /* advance pointer to next row */
}
dataptr = jfdctint_data;
__pragma_loopbound(8, 8);
for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
dataptr[DCTSIZE * 0] = (int) DESCALE(tmp10 + tmp11, PASS1_BITS);
dataptr[DCTSIZE * 4] = (int) DESCALE(tmp10 - tmp11, PASS1_BITS);
z1 = (tmp12 + tmp13) * FIX_0_541196100;
dataptr[DCTSIZE * 2] = (int) DESCALE(z1 + tmp13 * FIX_0_765366865,
CONST_BITS + PASS1_BITS);
dataptr[DCTSIZE * 6] = (int) DESCALE(z1 + tmp12 * (-FIX_1_847759065),
CONST_BITS + PASS1_BITS);
z1 = tmp4 + tmp7;
z2 = tmp5 + tmp6;
z3 = tmp4 + tmp6;
z4 = tmp5 + tmp7;
z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */
tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */
tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */
tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */
tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */
z1 = z1 * (-FIX_0_899976223); /* sqrt(2) * (c7-c3) */
z2 = z2 * (-FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
z3 = z3 * (-FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
z4 = z4 * (-FIX_0_390180644); /* sqrt(2) * (c5-c3) */
z3 += z5;
z4 += z5;
dataptr[DCTSIZE * 7] =
(int) DESCALE(tmp4 + z1 + z3, CONST_BITS + PASS1_BITS);
dataptr[DCTSIZE * 5] =
(int) DESCALE(tmp5 + z2 + z4, CONST_BITS + PASS1_BITS);
dataptr[DCTSIZE * 3] =
(int) DESCALE(tmp6 + z2 + z3, CONST_BITS + PASS1_BITS);
dataptr[DCTSIZE * 1] =
(int) DESCALE(tmp7 + z1 + z4, CONST_BITS + PASS1_BITS);
dataptr++; /* advance pointer to next column */
}
}
/* Main function
Time to function execution time using logic analyzer,
which measures the OFF time of a LED on board.
The switching latency, including the function call/return time,
is measured to be equal to 1.1us (22 clock cycles).
*/
__attribute__((noinline)) __attribute__((export_name("entrypoint")))
__attribute__((noinline)) __attribute__((export_name("entrypoint"))) void
jfdctint_main(void) {
jfdctint_jpeg_fdct_islow();
}
__attribute__((noinline)) __attribute__((export_name("main")))
__attribute__((noinline)) __attribute__((export_name("main"))) int
main(void) {
jfdctint_init();
jfdctint_main();
return (jfdctint_return());
}

View File

@ -0,0 +1,319 @@
/*
This program is part of the TACLeBench benchmark suite.
Version V 1.x
Name: jfdctint
Author: Thomas G. Lane, Public domain JPEG source code.
Modified by Steven Li at Princeton University.
Function: JPEG slow-but-accurate integer implementation of the
forward DCT (Discrete Cosine Transform) on a 8x8
pixel block [from original file documentations]
Copyright (C) 1991-1994, Thomas G. Lane.
This file is part of the Independent JPEG Group's software.
For conditions of distribution and use, see the accompanying README file.
This file contains a slow-but-accurate integer implementation of the
forward DCT (Discrete Cosine Transform).
A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
on each column. Direct algorithms are also available, but they are
much more complex and seem not to be any faster when reduced to code.
This implementation is based on an algorithm described in
C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
The primary algorithm described there uses 11 multiplies and 29 adds.
We use their alternate method with 12 multiplies and 32 adds.
The advantage of this method is that no data path contains more than one
multiplication; this allows a very simple and accurate implementation in
scaled fixed-point arithmetic, with a minimal number of shifts.
Source: SNU-RT Benchmark Suite for Worst Case Timing Analysis
Collected and Modified by S.-S. Lim
Real-Time Research Group
Seoul National University
Changes: Moved initialisation code from jfdctint_main() to jfdctint_init(),
added checksum calculation in jfdctint_return()
License: see README
*/
/* COMMENTS: Long calculation sequences (i.e., long basic blocks), */
/* single-nested loops. */
/**********************************************************************
Functions to be timed
***********************************************************************/
/* This definitions are added by Steven Li so as to bypass the header
files.
*/
#define DCTSIZE 8
#define DESCALE(x,n) (((x) + (((int)1) << ((n)-1))) >> (n))
/*
The poop on this scaling stuff is as follows:
Each 1-D DCT step produces outputs which are a factor of sqrt(N)
larger than the true DCT outputs. The final outputs are therefore
a factor of N larger than desired; since N=8 this can be cured by
a simple right shift at the end of the algorithm. The advantage of
this arrangement is that we save two multiplications per 1-D DCT,
because the y0 and y4 outputs need not be divided by sqrt(N).
In the IJG code, this factor of 8 is removed by the quantization step
(in jcdctmgr.c), NOT in this module.
We have to do addition and subtraction of the integer inputs, which
is no problem, and multiplication by fractional constants, which is
a problem to do in integer arithmetic. We multiply all the constants
by CONST_SCALE and convert them to integer constants (thus retaining
CONST_BITS (13) bits of precision in the constants). After doing a
multiplication we have to divide the product by CONST_SCALE, with proper
rounding, to produce the correct output. This division can be done
cheaply as a right shift of CONST_BITS (13) bits. We postpone shifting
as long as possible so that partial sums can be added together with
full fractional precision.
The outputs of the first pass are scaled up by PASS1_BITS (2) bits so that
they are represented to better-than-integral precision. These outputs
require BITS_IN_JSAMPLE (8) + PASS1_BITS (2) + 3 bits; this fits in a
16-bit word with the recommended scaling. (For 12-bit sample data, the
intermediate array is int anyway.)
To avoid overflow of the 32-bit intermediate results in pass 2, we must
have BITS_IN_JSAMPLE (8) + CONST_BITS (13) + PASS1_BITS (2) <= 26.
Error analysis shows that the values given below are the most effective.
*/
/*
Forward declaration of functions
*/
void jfdctint_init();
int jfdctint_return();
void jfdctint_main();
int main( void );
#define CONST_BITS 13
#define PASS1_BITS 2
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
causing a lot of useless floating-point operations at run time.
To get around this we use the following pre-calculated constants.
If you change CONST_BITS you may want to add appropriate values.
(With a reasonable C compiler, you can just rely on the FIX() macro...)
*/
#define FIX_0_298631336 ((int) 2446) /* FIX(0.298631336) */
#define FIX_0_390180644 ((int) 3196) /* FIX(0.390180644) */
#define FIX_0_541196100 ((int) 4433) /* FIX(0.541196100) */
#define FIX_0_765366865 ((int) 6270) /* FIX(0.765366865) */
#define FIX_0_899976223 ((int) 7373) /* FIX(0.899976223) */
#define FIX_1_175875602 ((int) 9633) /* FIX(1.175875602) */
#define FIX_1_501321110 ((int) 12299) /* FIX(1.501321110) */
#define FIX_1_847759065 ((int) 15137) /* FIX(1.847759065) */
#define FIX_1_961570560 ((int) 16069) /* FIX(1.961570560) */
#define FIX_2_053119869 ((int) 16819) /* FIX(2.053119869) */
#define FIX_2_562915447 ((int) 20995) /* FIX(2.562915447) */
#define FIX_3_072711026 ((int) 25172) /* FIX(3.072711026) */
/* Multiply an int variable by an int constant to yield an int result.
For 8-bit samples with the recommended scaling, all the variable
and constant values involved are no more than 16 bits wide, so a
16x16->32 bit multiply can be used instead of a full 32x32 multiply.
For 12-bit samples, a full 32-bit multiplication will be needed.
*/
int jfdctint_data[ 64 ];
const int jfdctint_CHECKSUM = 1668124;
void jfdctint_init()
{
int i, seed;
/* Worst case settings */
/* Set array to random values */
seed = 1;
_Pragma( "loopbound min 64 max 64" )
for ( i = 0; i < 64; i++ ) {
seed = ( ( seed * 133 ) + 81 ) % 65535;
jfdctint_data[ i ] = seed;
}
}
int jfdctint_return()
{
int checksum = 0;
int i;
_Pragma( "loopbound min 64 max 64" )
for ( i = 0; i < 64; ++i )
checksum += jfdctint_data[ i ];
return ( ( checksum == jfdctint_CHECKSUM ) ? 0 : -1 );
}
/*
Perform the forward DCT on one block of samples.
*/
void jfdctint_jpeg_fdct_islow( void )
{
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1, z2, z3, z4, z5;
int *dataptr;
int ctr;
/* Pass 1: process rows. */
/* Note results are scaled up by sqrt(8) compared to a true DCT; */
/* furthermore, we scale the results by 2**PASS1_BITS. */
dataptr = jfdctint_data;
_Pragma( "loopbound min 8 max 8" )
for ( ctr = DCTSIZE - 1; ctr >= 0; ctr-- ) {
tmp0 = dataptr[ 0 ] + dataptr[ 7 ];
tmp7 = dataptr[ 0 ] - dataptr[ 7 ];
tmp1 = dataptr[ 1 ] + dataptr[ 6 ];
tmp6 = dataptr[ 1 ] - dataptr[ 6 ];
tmp2 = dataptr[ 2 ] + dataptr[ 5 ];
tmp5 = dataptr[ 2 ] - dataptr[ 5 ];
tmp3 = dataptr[ 3 ] + dataptr[ 4 ];
tmp4 = dataptr[ 3 ] - dataptr[ 4 ];
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
dataptr[ 0 ] = ( int ) ( ( tmp10 + tmp11 ) << PASS1_BITS );
dataptr[ 4 ] = ( int ) ( ( tmp10 - tmp11 ) << PASS1_BITS );
z1 = ( tmp12 + tmp13 ) * FIX_0_541196100;
dataptr[ 2 ] = ( int ) DESCALE( z1 + tmp13 * FIX_0_765366865,
CONST_BITS - PASS1_BITS );
dataptr[ 6 ] = ( int ) DESCALE( z1 + tmp12 * ( - FIX_1_847759065 ),
CONST_BITS - PASS1_BITS );
z1 = tmp4 + tmp7;
z2 = tmp5 + tmp6;
z3 = tmp4 + tmp6;
z4 = tmp5 + tmp7;
z5 = ( z3 + z4 ) * FIX_1_175875602; /* sqrt(2) * c3 */
tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */
tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */
tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */
tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */
z1 = z1 * ( - FIX_0_899976223 ); /* sqrt(2) * (c7-c3) */
z2 = z2 * ( - FIX_2_562915447 ); /* sqrt(2) * (-c1-c3) */
z3 = z3 * ( - FIX_1_961570560 ); /* sqrt(2) * (-c3-c5) */
z4 = z4 * ( - FIX_0_390180644 ); /* sqrt(2) * (c5-c3) */
z3 += z5;
z4 += z5;
dataptr[ 7 ] = ( int ) DESCALE( tmp4 + z1 + z3, CONST_BITS - PASS1_BITS );
dataptr[ 5 ] = ( int ) DESCALE( tmp5 + z2 + z4, CONST_BITS - PASS1_BITS );
dataptr[ 3 ] = ( int ) DESCALE( tmp6 + z2 + z3, CONST_BITS - PASS1_BITS );
dataptr[ 1 ] = ( int ) DESCALE( tmp7 + z1 + z4, CONST_BITS - PASS1_BITS );
dataptr += DCTSIZE; /* advance pointer to next row */
}
dataptr = jfdctint_data;
_Pragma( "loopbound min 8 max 8" )
for ( ctr = DCTSIZE - 1; ctr >= 0; ctr-- ) {
tmp0 = dataptr[ DCTSIZE * 0 ] + dataptr[ DCTSIZE * 7 ];
tmp7 = dataptr[ DCTSIZE * 0 ] - dataptr[ DCTSIZE * 7 ];
tmp1 = dataptr[ DCTSIZE * 1 ] + dataptr[ DCTSIZE * 6 ];
tmp6 = dataptr[ DCTSIZE * 1 ] - dataptr[ DCTSIZE * 6 ];
tmp2 = dataptr[ DCTSIZE * 2 ] + dataptr[ DCTSIZE * 5 ];
tmp5 = dataptr[ DCTSIZE * 2 ] - dataptr[ DCTSIZE * 5 ];
tmp3 = dataptr[ DCTSIZE * 3 ] + dataptr[ DCTSIZE * 4 ];
tmp4 = dataptr[ DCTSIZE * 3 ] - dataptr[ DCTSIZE * 4 ];
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
dataptr[ DCTSIZE * 0 ] = ( int ) DESCALE( tmp10 + tmp11, PASS1_BITS );
dataptr[ DCTSIZE * 4 ] = ( int ) DESCALE( tmp10 - tmp11, PASS1_BITS );
z1 = ( tmp12 + tmp13 ) * FIX_0_541196100;
dataptr[ DCTSIZE * 2 ] = ( int ) DESCALE( z1 + tmp13 * FIX_0_765366865,
CONST_BITS + PASS1_BITS );
dataptr[ DCTSIZE * 6 ] = ( int ) DESCALE( z1
+ tmp12 * ( - FIX_1_847759065 ),
CONST_BITS + PASS1_BITS );
z1 = tmp4 + tmp7;
z2 = tmp5 + tmp6;
z3 = tmp4 + tmp6;
z4 = tmp5 + tmp7;
z5 = ( z3 + z4 ) * FIX_1_175875602; /* sqrt(2) * c3 */
tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */
tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */
tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */
tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */
z1 = z1 * ( - FIX_0_899976223 ); /* sqrt(2) * (c7-c3) */
z2 = z2 * ( - FIX_2_562915447 ); /* sqrt(2) * (-c1-c3) */
z3 = z3 * ( - FIX_1_961570560 ); /* sqrt(2) * (-c3-c5) */
z4 = z4 * ( - FIX_0_390180644 ); /* sqrt(2) * (c5-c3) */
z3 += z5;
z4 += z5;
dataptr[ DCTSIZE * 7 ] = ( int ) DESCALE( tmp4 + z1 + z3,
CONST_BITS + PASS1_BITS );
dataptr[ DCTSIZE * 5 ] = ( int ) DESCALE( tmp5 + z2 + z4,
CONST_BITS + PASS1_BITS );
dataptr[ DCTSIZE * 3 ] = ( int ) DESCALE( tmp6 + z2 + z3,
CONST_BITS + PASS1_BITS );
dataptr[ DCTSIZE * 1 ] = ( int ) DESCALE( tmp7 + z1 + z4,
CONST_BITS + PASS1_BITS );
dataptr++; /* advance pointer to next column */
}
}
/* Main function
Time to function execution time using logic analyzer,
which measures the OFF time of a LED on board.
The switching latency, including the function call/return time,
is measured to be equal to 1.1us (22 clock cycles).
*/
void _Pragma ( "entrypoint" ) jfdctint_main( void )
{
jfdctint_jpeg_fdct_islow();
}
int main( void )
{
jfdctint_init();
jfdctint_main();
return ( jfdctint_return() );
}