Add wasm tacle-bench targets
This commit is contained in:
25
targets/wasm-tacle/kernel/jfdctint/CMakeLists.txt
Normal file
25
targets/wasm-tacle/kernel/jfdctint/CMakeLists.txt
Normal file
@ -0,0 +1,25 @@
|
||||
# ~~~
|
||||
# SPDX-License-Identifier: MIT
|
||||
# SPDX-FileCopyrightText: 2026, Friedrich-Alexander-Universität Erlangen-Nürnberg (FAU)
|
||||
# ~~~
|
||||
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(jfdctint)
|
||||
|
||||
set(TACLEBENCH_ROOT_PATH "${CMAKE_CURRENT_LIST_DIR}/../../..")
|
||||
set(REPOSITORY_ROOT_PATH "${CMAKE_CURRENT_LIST_DIR}/../../../..")
|
||||
|
||||
set(APP_TARGET_NAME "${CMAKE_PROJECT_NAME}")
|
||||
|
||||
if(DEFINED TACLEBENCH_VARIANT AND "${TACLEBENCH_VARIANT}" STREQUAL "inline")
|
||||
set(APP_SOURCE_FILE_PATH
|
||||
"generated/modified_sources/inline/jfdctint.c")
|
||||
else()
|
||||
set(APP_SOURCE_FILE_PATH
|
||||
"generated/modified_sources/default/jfdctint.c")
|
||||
endif()
|
||||
|
||||
include(${REPOSITORY_ROOT_PATH}/cmake/taclebench_wasm.cmake)
|
||||
|
||||
|
||||
70
targets/wasm-tacle/kernel/jfdctint/ChangeLog.txt
Executable file
70
targets/wasm-tacle/kernel/jfdctint/ChangeLog.txt
Executable file
@ -0,0 +1,70 @@
|
||||
File: jfdctint.c
|
||||
Original provenience: SNU-RT Benchmark Suite for Worst Case Timing Analysis
|
||||
|
||||
2016-02-01:
|
||||
- Added generic TACLeBench header.
|
||||
- Removed old file header (keep some information in TACLeBench header).
|
||||
- Renamed global variable date to jfdctint_data.
|
||||
- Renamed main to jfdctint_main.
|
||||
- Moved initialisation code to jfdctint_init
|
||||
- Implemented new main function according to TACLeBench guidlines.
|
||||
- Implemented new function jfdctint_return, calculates checksum over
|
||||
all data.
|
||||
- Applied code formatting according to the following rules
|
||||
- Lines shall not be wider than 80 characters; whenever possible, appropriate
|
||||
line breaks shall be inserted to keep lines below 80 characters
|
||||
- Indentation is done using whitespaces only, no tabs. Code is indented by
|
||||
two whitespaces
|
||||
- Two empty lines are put between any two functions
|
||||
- In non-empty lists or index expressions, opening '(' and '[' are followed by
|
||||
one whitespace, closing ')' and ']' are preceded by one whitespace
|
||||
- In comma- or colon-separated argument lists, one whitespace is put after
|
||||
each comma/colon
|
||||
- Names of functions and global variables all start with a benchmark-specific
|
||||
prefix (here: bs_) followed by lowercase letter (e.g., bs_square)
|
||||
- For pointer types, one whitespace is put before the '*'
|
||||
- Operators within expressions shall be preceded and followed by one
|
||||
whitespace
|
||||
- Code of then- and else-parts of if-then-else statements shall be put in
|
||||
separate lines, not in the same lines as the if-condition or the keyword
|
||||
"else"
|
||||
- Opening braces '{' denoting the beginning of code for some if-else or loop
|
||||
body shall be put at the end of the same line where the keywords "if",
|
||||
"else", "for", "while" etc. occur
|
||||
- In non-empty lists or index expressions, opening '(' and '[' are followed by
|
||||
one whitespace, closing ')' and ']' are preceded by one whitespace
|
||||
- Operators within expressions shall be preceded and followed by one
|
||||
whitespace
|
||||
|
||||
2016-02-03:
|
||||
- Removed all PROFILINGs.
|
||||
- Macro types replaced by actual types:
|
||||
- Replaced INT32 with int.
|
||||
- Replaced DCTELEM with int.
|
||||
- Removed macros:
|
||||
- GLOBAL (useless)
|
||||
- Unused "FIX_... FIX(..)" definitions (unused)
|
||||
- BITS_IN_JSAMPLE (used in #ifdef...#else..., keep only #if part)
|
||||
- SHIFT_TEMPS (empty)
|
||||
- JPEG_INTERNALS (unused)
|
||||
- MULTIPLY (simply multiply *)
|
||||
- ONE (used only once)
|
||||
- RIGHT_SHIFT (used only once)
|
||||
|
||||
2016-04-05:
|
||||
- Return '0' on success
|
||||
|
||||
2016-04-06:
|
||||
- Fixed generation of return value
|
||||
|
||||
2016-04-21:
|
||||
- Fixed checksum value
|
||||
- Fixed license
|
||||
|
||||
2016-06-01:
|
||||
- Changed all prefixes to lower-case
|
||||
- Changed return type of jfdctint_main
|
||||
|
||||
2016-06-08:
|
||||
- Prefix
|
||||
- removed return from jfdctint_main
|
||||
383
targets/wasm-tacle/kernel/jfdctint/README
Executable file
383
targets/wasm-tacle/kernel/jfdctint/README
Executable file
@ -0,0 +1,383 @@
|
||||
The Independent JPEG Group's JPEG software
|
||||
==========================================
|
||||
|
||||
README for release 6a of 7-Feb-96
|
||||
=================================
|
||||
|
||||
This distribution contains the sixth public release of the Independent JPEG
|
||||
Group's free JPEG software. You are welcome to redistribute this software and
|
||||
to use it for any purpose, subject to the conditions under LEGAL ISSUES, below.
|
||||
|
||||
Serious users of this software (particularly those incorporating it into
|
||||
larger programs) should contact IJG at jpeg-info@uunet.uu.net to be added to
|
||||
our electronic mailing list. Mailing list members are notified of updates
|
||||
and have a chance to participate in technical discussions, etc.
|
||||
|
||||
This software is the work of Tom Lane, Philip Gladstone, Luis Ortiz, Jim
|
||||
Boucher, Lee Crocker, Julian Minguillon, George Phillips, Davide Rossi,
|
||||
Ge' Weijers, and other members of the Independent JPEG Group.
|
||||
|
||||
IJG is not affiliated with the official ISO JPEG standards committee.
|
||||
|
||||
|
||||
DOCUMENTATION ROADMAP
|
||||
=====================
|
||||
|
||||
This file contains the following sections:
|
||||
|
||||
OVERVIEW General description of JPEG and the IJG software.
|
||||
LEGAL ISSUES Copyright, lack of warranty, terms of distribution.
|
||||
REFERENCES Where to learn more about JPEG.
|
||||
ARCHIVE LOCATIONS Where to find newer versions of this software.
|
||||
RELATED SOFTWARE Other stuff you should get.
|
||||
FILE FORMAT WARS Software *not* to get.
|
||||
TO DO Plans for future IJG releases.
|
||||
|
||||
Other documentation files in the distribution are:
|
||||
|
||||
User documentation:
|
||||
install.doc How to configure and install the IJG software.
|
||||
usage.doc Usage instructions for cjpeg, djpeg, jpegtran,
|
||||
rdjpgcom, and wrjpgcom.
|
||||
*.1 Unix-style man pages for programs (same info as usage.doc).
|
||||
wizard.doc Advanced usage instructions for JPEG wizards only.
|
||||
change.log Version-to-version change highlights.
|
||||
Programmer and internal documentation:
|
||||
libjpeg.doc How to use the JPEG library in your own programs.
|
||||
example.c Sample code for calling the JPEG library.
|
||||
structure.doc Overview of the JPEG library's internal structure.
|
||||
filelist.doc Road map of IJG files.
|
||||
coderules.doc Coding style rules --- please read if you contribute code.
|
||||
|
||||
Please read at least the files install.doc and usage.doc. Useful information
|
||||
can also be found in the JPEG FAQ (Frequently Asked Questions) article. See
|
||||
ARCHIVE LOCATIONS below to find out where to obtain the FAQ article.
|
||||
|
||||
If you want to understand how the JPEG code works, we suggest reading one or
|
||||
more of the REFERENCES, then looking at the documentation files (in roughly
|
||||
the order listed) before diving into the code.
|
||||
|
||||
|
||||
OVERVIEW
|
||||
========
|
||||
|
||||
This package contains C software to implement JPEG image compression and
|
||||
decompression. JPEG (pronounced "jay-peg") is a standardized compression
|
||||
method for full-color and gray-scale images. JPEG is intended for compressing
|
||||
"real-world" scenes; line drawings, cartoons and other non-realistic images
|
||||
are not its strong suit. JPEG is lossy, meaning that the output image is not
|
||||
exactly identical to the input image. Hence you must not use JPEG if you
|
||||
have to have identical output bits. However, on typical photographic images,
|
||||
very good compression levels can be obtained with no visible change, and
|
||||
remarkably high compression levels are possible if you can tolerate a
|
||||
low-quality image. For more details, see the references, or just experiment
|
||||
with various compression settings.
|
||||
|
||||
This software implements JPEG baseline, extended-sequential, and progressive
|
||||
compression processes. Provision is made for supporting all variants of these
|
||||
processes, although some uncommon parameter settings aren't implemented yet.
|
||||
For legal reasons, we are not distributing code for the arithmetic-coding
|
||||
variants of JPEG; see LEGAL ISSUES. We have made no provision for supporting
|
||||
the hierarchical or lossless processes defined in the standard.
|
||||
|
||||
We provide a set of library routines for reading and writing JPEG image files,
|
||||
plus two sample applications "cjpeg" and "djpeg", which use the library to
|
||||
perform conversion between JPEG and some other popular image file formats.
|
||||
The library is intended to be reused in other applications.
|
||||
|
||||
In order to support file conversion and viewing software, we have included
|
||||
considerable functionality beyond the bare JPEG coding/decoding capability;
|
||||
for example, the color quantization modules are not strictly part of JPEG
|
||||
decoding, but they are essential for output to colormapped file formats or
|
||||
colormapped displays. These extra functions can be compiled out of the
|
||||
library if not required for a particular application. We have also included
|
||||
"jpegtran", a utility for lossless transcoding between different JPEG
|
||||
processes, and "rdjpgcom" and "wrjpgcom", two simple applications for
|
||||
inserting and extracting textual comments in JFIF files.
|
||||
|
||||
The emphasis in designing this software has been on achieving portability and
|
||||
flexibility, while also making it fast enough to be useful. In particular,
|
||||
the software is not intended to be read as a tutorial on JPEG. (See the
|
||||
REFERENCES section for introductory material.) Rather, it is intended to
|
||||
be reliable, portable, industrial-strength code. We do not claim to have
|
||||
achieved that goal in every aspect of the software, but we strive for it.
|
||||
|
||||
We welcome the use of this software as a component of commercial products.
|
||||
No royalty is required, but we do ask for an acknowledgement in product
|
||||
documentation, as described under LEGAL ISSUES.
|
||||
|
||||
|
||||
LEGAL ISSUES
|
||||
============
|
||||
|
||||
In plain English:
|
||||
|
||||
1. We don't promise that this software works. (But if you find any bugs,
|
||||
please let us know!)
|
||||
2. You can use this software for whatever you want. You don't have to pay us.
|
||||
3. You may not pretend that you wrote this software. If you use it in a
|
||||
program, you must acknowledge somewhere in your documentation that
|
||||
you've used the IJG code.
|
||||
|
||||
In legalese:
|
||||
|
||||
The authors make NO WARRANTY or representation, either express or implied,
|
||||
with respect to this software, its quality, accuracy, merchantability, or
|
||||
fitness for a particular purpose. This software is provided "AS IS", and you,
|
||||
its user, assume the entire risk as to its quality and accuracy.
|
||||
|
||||
This software is copyright (C) 1991-1996, Thomas G. Lane.
|
||||
All Rights Reserved except as specified below.
|
||||
|
||||
Permission is hereby granted to use, copy, modify, and distribute this
|
||||
software (or portions thereof) for any purpose, without fee, subject to these
|
||||
conditions:
|
||||
(1) If any part of the source code for this software is distributed, then this
|
||||
README file must be included, with this copyright and no-warranty notice
|
||||
unaltered; and any additions, deletions, or changes to the original files
|
||||
must be clearly indicated in accompanying documentation.
|
||||
(2) If only executable code is distributed, then the accompanying
|
||||
documentation must state that "this software is based in part on the work of
|
||||
the Independent JPEG Group".
|
||||
(3) Permission for use of this software is granted only if the user accepts
|
||||
full responsibility for any undesirable consequences; the authors accept
|
||||
NO LIABILITY for damages of any kind.
|
||||
|
||||
These conditions apply to any software derived from or based on the IJG code,
|
||||
not just to the unmodified library. If you use our work, you ought to
|
||||
acknowledge us.
|
||||
|
||||
Permission is NOT granted for the use of any IJG author's name or company name
|
||||
in advertising or publicity relating to this software or products derived from
|
||||
it. This software may be referred to only as "the Independent JPEG Group's
|
||||
software".
|
||||
|
||||
We specifically permit and encourage the use of this software as the basis of
|
||||
commercial products, provided that all warranty or liability claims are
|
||||
assumed by the product vendor.
|
||||
|
||||
|
||||
ansi2knr.c is included in this distribution by permission of L. Peter Deutsch,
|
||||
sole proprietor of its copyright holder, Aladdin Enterprises of Menlo Park, CA.
|
||||
ansi2knr.c is NOT covered by the above copyright and conditions, but instead
|
||||
by the usual distribution terms of the Free Software Foundation; principally,
|
||||
that you must include source code if you redistribute it. (See the file
|
||||
ansi2knr.c for full details.) However, since ansi2knr.c is not needed as part
|
||||
of any program generated from the IJG code, this does not limit you more than
|
||||
the foregoing paragraphs do.
|
||||
|
||||
The configuration script "configure" was produced with GNU Autoconf. It
|
||||
is copyright by the Free Software Foundation but is freely distributable.
|
||||
|
||||
It appears that the arithmetic coding option of the JPEG spec is covered by
|
||||
patents owned by IBM, AT&T, and Mitsubishi. Hence arithmetic coding cannot
|
||||
legally be used without obtaining one or more licenses. For this reason,
|
||||
support for arithmetic coding has been removed from the free JPEG software.
|
||||
(Since arithmetic coding provides only a marginal gain over the unpatented
|
||||
Huffman mode, it is unlikely that very many implementations will support it.)
|
||||
So far as we are aware, there are no patent restrictions on the remaining
|
||||
code.
|
||||
|
||||
WARNING: Unisys has begun to enforce their patent on LZW compression against
|
||||
GIF encoders and decoders. You will need a license from Unisys to use the
|
||||
included rdgif.c or wrgif.c files in a commercial or shareware application.
|
||||
At this time, Unisys is not enforcing their patent against freeware, so
|
||||
distribution of this package remains legal. However, we intend to remove
|
||||
GIF support from the IJG package as soon as a suitable replacement format
|
||||
becomes reasonably popular.
|
||||
|
||||
We are required to state that
|
||||
"The Graphics Interchange Format(c) is the Copyright property of
|
||||
CompuServe Incorporated. GIF(sm) is a Service Mark property of
|
||||
CompuServe Incorporated."
|
||||
|
||||
|
||||
REFERENCES
|
||||
==========
|
||||
|
||||
We highly recommend reading one or more of these references before trying to
|
||||
understand the innards of the JPEG software.
|
||||
|
||||
The best short technical introduction to the JPEG compression algorithm is
|
||||
Wallace, Gregory K. "The JPEG Still Picture Compression Standard",
|
||||
Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44.
|
||||
(Adjacent articles in that issue discuss MPEG motion picture compression,
|
||||
applications of JPEG, and related topics.) If you don't have the CACM issue
|
||||
handy, a PostScript file containing a revised version of Wallace's article
|
||||
is available at ftp.uu.net, graphics/jpeg/wallace.ps.gz. The file (actually
|
||||
a preprint for an article that appeared in IEEE Trans. Consumer Electronics)
|
||||
omits the sample images that appeared in CACM, but it includes corrections
|
||||
and some added material. Note: the Wallace article is copyright ACM and
|
||||
IEEE, and it may not be used for commercial purposes.
|
||||
|
||||
A somewhat less technical, more leisurely introduction to JPEG can be found in
|
||||
"The Data Compression Book" by Mark Nelson, published by M&T Books (Redwood
|
||||
City, CA), 1991, ISBN 1-55851-216-0. This book provides good explanations and
|
||||
example C code for a multitude of compression methods including JPEG. It is
|
||||
an excellent source if you are comfortable reading C code but don't know much
|
||||
about data compression in general. The book's JPEG sample code is far from
|
||||
industrial-strength, but when you are ready to look at a full implementation,
|
||||
you've got one here...
|
||||
|
||||
The best full description of JPEG is the textbook "JPEG Still Image Data
|
||||
Compression Standard" by William B. Pennebaker and Joan L. Mitchell, published
|
||||
by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1. Price US$59.95, 638 pp.
|
||||
The book includes the complete text of the ISO JPEG standards (DIS 10918-1
|
||||
and draft DIS 10918-2). This is by far the most complete exposition of JPEG
|
||||
in existence, and we highly recommend it.
|
||||
|
||||
The JPEG standard itself is not available electronically; you must order a
|
||||
paper copy through ISO or ITU. (Unless you feel a need to own a certified
|
||||
official copy, we recommend buying the Pennebaker and Mitchell book instead;
|
||||
it's much cheaper and includes a great deal of useful explanatory material.)
|
||||
In the USA, copies of the standard may be ordered from ANSI Sales at (212)
|
||||
642-4900, or from Global Engineering Documents at (800) 854-7179. (ANSI
|
||||
doesn't take credit card orders, but Global does.) It's not cheap: as of
|
||||
1992, ANSI was charging $95 for Part 1 and $47 for Part 2, plus 7%
|
||||
shipping/handling. The standard is divided into two parts, Part 1 being the
|
||||
actual specification, while Part 2 covers compliance testing methods. Part 1
|
||||
is titled "Digital Compression and Coding of Continuous-tone Still Images,
|
||||
Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS
|
||||
10918-1, ITU-T T.81. Part 2 is titled "Digital Compression and Coding of
|
||||
Continuous-tone Still Images, Part 2: Compliance testing" and has document
|
||||
numbers ISO/IEC IS 10918-2, ITU-T T.83.
|
||||
|
||||
Extensions to the original JPEG standard are defined in JPEG Part 3, a new ISO
|
||||
document. Part 3 is undergoing ISO balloting and is expected to be approved
|
||||
by the end of 1995; it will have document numbers ISO/IEC IS 10918-3, ITU-T
|
||||
T.84. IJG currently does not support any Part 3 extensions.
|
||||
|
||||
The JPEG standard does not specify all details of an interchangeable file
|
||||
format. For the omitted details we follow the "JFIF" conventions, revision
|
||||
1.02. A copy of the JFIF spec is available from:
|
||||
Literature Department
|
||||
C-Cube Microsystems, Inc.
|
||||
1778 McCarthy Blvd.
|
||||
Milpitas, CA 95035
|
||||
phone (408) 944-6300, fax (408) 944-6314
|
||||
A PostScript version of this document is available at ftp.uu.net, file
|
||||
graphics/jpeg/jfif.ps.gz. It can also be obtained by e-mail from the C-Cube
|
||||
mail server, netlib@c3.pla.ca.us. Send the message "send jfif_ps from jpeg"
|
||||
to the server to obtain the JFIF document; send the message "help" if you have
|
||||
trouble.
|
||||
|
||||
The TIFF 6.0 file format specification can be obtained by FTP from sgi.com
|
||||
(192.48.153.1), file graphics/tiff/TIFF6.ps.Z; or you can order a printed
|
||||
copy from Aldus Corp. at (206) 628-6593. The JPEG incorporation scheme
|
||||
found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems.
|
||||
IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6).
|
||||
Instead, we recommend the JPEG design proposed by TIFF Technical Note #2
|
||||
(Compression tag 7). Copies of this Note can be obtained from sgi.com or
|
||||
from ftp.uu.net:/graphics/jpeg/. It is expected that the next revision of
|
||||
the TIFF spec will replace the 6.0 JPEG design with the Note's design.
|
||||
Although IJG's own code does not support TIFF/JPEG, the free libtiff library
|
||||
uses our library to implement TIFF/JPEG per the Note. libtiff is available
|
||||
from sgi.com:/graphics/tiff/.
|
||||
|
||||
|
||||
ARCHIVE LOCATIONS
|
||||
=================
|
||||
|
||||
The "official" archive site for this software is ftp.uu.net (Internet
|
||||
address 192.48.96.9). The most recent released version can always be found
|
||||
there in directory graphics/jpeg. This particular version will be archived
|
||||
as graphics/jpeg/jpegsrc.v6a.tar.gz. If you are on the Internet, you
|
||||
can retrieve files from ftp.uu.net by standard anonymous FTP. If you don't
|
||||
have FTP access, UUNET's archives are also available via UUCP; contact
|
||||
help@uunet.uu.net for information on retrieving files that way.
|
||||
|
||||
Numerous Internet sites maintain copies of the UUNET files. However, only
|
||||
ftp.uu.net is guaranteed to have the latest official version.
|
||||
|
||||
You can also obtain this software in DOS-compatible "zip" archive format from
|
||||
the SimTel archives (ftp.coast.net:/SimTel/msdos/graphics/), or on CompuServe
|
||||
in the Graphics Support forum (GO CIS:GRAPHSUP), library 12 "JPEG Tools".
|
||||
Again, these versions may sometimes lag behind the ftp.uu.net release.
|
||||
|
||||
The JPEG FAQ (Frequently Asked Questions) article is a useful source of
|
||||
general information about JPEG. It is updated constantly and therefore is
|
||||
not included in this distribution. The FAQ is posted every two weeks to
|
||||
Usenet newsgroups comp.graphics.misc, news.answers, and other groups.
|
||||
You can always obtain the latest version from the news.answers archive at
|
||||
rtfm.mit.edu. By FTP, fetch /pub/usenet/news.answers/jpeg-faq/part1 and
|
||||
.../part2. If you don't have FTP, send e-mail to mail-server@rtfm.mit.edu
|
||||
with body
|
||||
send usenet/news.answers/jpeg-faq/part1
|
||||
send usenet/news.answers/jpeg-faq/part2
|
||||
|
||||
|
||||
RELATED SOFTWARE
|
||||
================
|
||||
|
||||
Numerous viewing and image manipulation programs now support JPEG. (Quite a
|
||||
few of them use this library to do so.) The JPEG FAQ described above lists
|
||||
some of the more popular free and shareware viewers, and tells where to
|
||||
obtain them on Internet.
|
||||
|
||||
If you are on a Unix machine, we highly recommend Jef Poskanzer's free
|
||||
PBMPLUS image software, which provides many useful operations on PPM-format
|
||||
image files. In particular, it can convert PPM images to and from a wide
|
||||
range of other formats. You can obtain this package by FTP from ftp.x.org
|
||||
(contrib/pbmplus*.tar.Z) or ftp.ee.lbl.gov (pbmplus*.tar.Z). There is also
|
||||
a newer update of this package called NETPBM, available from
|
||||
wuarchive.wustl.edu under directory /graphics/graphics/packages/NetPBM/.
|
||||
Unfortunately PBMPLUS/NETPBM is not nearly as portable as the IJG software
|
||||
is; you are likely to have difficulty making it work on any non-Unix machine.
|
||||
|
||||
A different free JPEG implementation, written by the PVRG group at Stanford,
|
||||
is available from havefun.stanford.edu in directory pub/jpeg. This program
|
||||
is designed for research and experimentation rather than production use;
|
||||
it is slower, harder to use, and less portable than the IJG code, but it
|
||||
is easier to read and modify. Also, the PVRG code supports lossless JPEG,
|
||||
which we do not.
|
||||
|
||||
|
||||
FILE FORMAT WARS
|
||||
================
|
||||
|
||||
Some JPEG programs produce files that are not compatible with our library.
|
||||
The root of the problem is that the ISO JPEG committee failed to specify a
|
||||
concrete file format. Some vendors "filled in the blanks" on their own,
|
||||
creating proprietary formats that no one else could read. (For example, none
|
||||
of the early commercial JPEG implementations for the Macintosh were able to
|
||||
exchange compressed files.)
|
||||
|
||||
The file format we have adopted is called JFIF (see REFERENCES). This format
|
||||
has been agreed to by a number of major commercial JPEG vendors, and it has
|
||||
become the de facto standard. JFIF is a minimal or "low end" representation.
|
||||
We recommend the use of TIFF/JPEG (TIFF revision 6.0 as modified by TIFF
|
||||
Technical Note #2) for "high end" applications that need to record a lot of
|
||||
additional data about an image. TIFF/JPEG is fairly new and not yet widely
|
||||
supported, unfortunately.
|
||||
|
||||
The upcoming JPEG Part 3 standard defines a file format called SPIFF.
|
||||
SPIFF is interoperable with JFIF, in the sense that most JFIF decoders should
|
||||
be able to read the most common variant of SPIFF. SPIFF has some technical
|
||||
advantages over JFIF, but its major claim to fame is simply that it is an
|
||||
official standard rather than an informal one. At this point it is unclear
|
||||
whether SPIFF will supersede JFIF or whether JFIF will remain the de-facto
|
||||
standard. IJG intends to support SPIFF once the standard is frozen, but we
|
||||
have not decided whether it should become our default output format or not.
|
||||
(In any case, our decoder will remain capable of reading JFIF indefinitely.)
|
||||
|
||||
Various proprietary file formats incorporating JPEG compression also exist.
|
||||
We have little or no sympathy for the existence of these formats. Indeed,
|
||||
one of the original reasons for developing this free software was to help
|
||||
force convergence on common, open format standards for JPEG files. Don't
|
||||
use a proprietary file format!
|
||||
|
||||
|
||||
TO DO
|
||||
=====
|
||||
|
||||
In future versions, we are considering supporting some of the upcoming JPEG
|
||||
Part 3 extensions --- principally, variable quantization and the SPIFF file
|
||||
format.
|
||||
|
||||
Tuning the software for better behavior at low quality/high compression
|
||||
settings is also of interest. The current method for scaling the
|
||||
quantization tables is known not to be very good at low Q values.
|
||||
|
||||
As always, speeding things up is high on our priority list.
|
||||
|
||||
Please send bug reports, offers of help, etc. to jpeg-info@uunet.uu.net.
|
||||
BIN
targets/wasm-tacle/kernel/jfdctint/generated/default/jfdctint.wasm
Executable file
BIN
targets/wasm-tacle/kernel/jfdctint/generated/default/jfdctint.wasm
Executable file
Binary file not shown.
@ -0,0 +1,679 @@
|
||||
(module $jfdctint.wasm
|
||||
(type (;0;) (func (param i32 i32)))
|
||||
(type (;1;) (func))
|
||||
(type (;2;) (func (result i32)))
|
||||
(import "__pragma" "loopbound" (func $__pragma_loopbound (type 0)))
|
||||
(func $__wasm_apply_data_relocs (type 1))
|
||||
(func $jfdctint_return (type 2) (result i32)
|
||||
i32.const 64
|
||||
i32.const 64
|
||||
call $__pragma_loopbound
|
||||
i32.const -1
|
||||
i32.const 0
|
||||
i32.const 0
|
||||
i32.load offset=1276
|
||||
i32.const 0
|
||||
i32.load offset=1272
|
||||
i32.const 0
|
||||
i32.load offset=1268
|
||||
i32.const 0
|
||||
i32.load offset=1264
|
||||
i32.const 0
|
||||
i32.load offset=1260
|
||||
i32.const 0
|
||||
i32.load offset=1256
|
||||
i32.const 0
|
||||
i32.load offset=1252
|
||||
i32.const 0
|
||||
i32.load offset=1248
|
||||
i32.const 0
|
||||
i32.load offset=1244
|
||||
i32.const 0
|
||||
i32.load offset=1240
|
||||
i32.const 0
|
||||
i32.load offset=1236
|
||||
i32.const 0
|
||||
i32.load offset=1232
|
||||
i32.const 0
|
||||
i32.load offset=1228
|
||||
i32.const 0
|
||||
i32.load offset=1224
|
||||
i32.const 0
|
||||
i32.load offset=1220
|
||||
i32.const 0
|
||||
i32.load offset=1216
|
||||
i32.const 0
|
||||
i32.load offset=1212
|
||||
i32.const 0
|
||||
i32.load offset=1208
|
||||
i32.const 0
|
||||
i32.load offset=1204
|
||||
i32.const 0
|
||||
i32.load offset=1200
|
||||
i32.const 0
|
||||
i32.load offset=1196
|
||||
i32.const 0
|
||||
i32.load offset=1192
|
||||
i32.const 0
|
||||
i32.load offset=1188
|
||||
i32.const 0
|
||||
i32.load offset=1184
|
||||
i32.const 0
|
||||
i32.load offset=1180
|
||||
i32.const 0
|
||||
i32.load offset=1176
|
||||
i32.const 0
|
||||
i32.load offset=1172
|
||||
i32.const 0
|
||||
i32.load offset=1168
|
||||
i32.const 0
|
||||
i32.load offset=1164
|
||||
i32.const 0
|
||||
i32.load offset=1160
|
||||
i32.const 0
|
||||
i32.load offset=1156
|
||||
i32.const 0
|
||||
i32.load offset=1152
|
||||
i32.const 0
|
||||
i32.load offset=1148
|
||||
i32.const 0
|
||||
i32.load offset=1144
|
||||
i32.const 0
|
||||
i32.load offset=1140
|
||||
i32.const 0
|
||||
i32.load offset=1136
|
||||
i32.const 0
|
||||
i32.load offset=1132
|
||||
i32.const 0
|
||||
i32.load offset=1128
|
||||
i32.const 0
|
||||
i32.load offset=1124
|
||||
i32.const 0
|
||||
i32.load offset=1120
|
||||
i32.const 0
|
||||
i32.load offset=1116
|
||||
i32.const 0
|
||||
i32.load offset=1112
|
||||
i32.const 0
|
||||
i32.load offset=1108
|
||||
i32.const 0
|
||||
i32.load offset=1104
|
||||
i32.const 0
|
||||
i32.load offset=1100
|
||||
i32.const 0
|
||||
i32.load offset=1096
|
||||
i32.const 0
|
||||
i32.load offset=1092
|
||||
i32.const 0
|
||||
i32.load offset=1088
|
||||
i32.const 0
|
||||
i32.load offset=1084
|
||||
i32.const 0
|
||||
i32.load offset=1080
|
||||
i32.const 0
|
||||
i32.load offset=1076
|
||||
i32.const 0
|
||||
i32.load offset=1072
|
||||
i32.const 0
|
||||
i32.load offset=1068
|
||||
i32.const 0
|
||||
i32.load offset=1064
|
||||
i32.const 0
|
||||
i32.load offset=1060
|
||||
i32.const 0
|
||||
i32.load offset=1056
|
||||
i32.const 0
|
||||
i32.load offset=1052
|
||||
i32.const 0
|
||||
i32.load offset=1048
|
||||
i32.const 0
|
||||
i32.load offset=1044
|
||||
i32.const 0
|
||||
i32.load offset=1040
|
||||
i32.const 0
|
||||
i32.load offset=1036
|
||||
i32.const 0
|
||||
i32.load offset=1032
|
||||
i32.const 0
|
||||
i32.load offset=1028
|
||||
i32.const 0
|
||||
i32.load offset=1024
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.add
|
||||
i32.const 1668124
|
||||
i32.ne
|
||||
select)
|
||||
(func $jfdctint_jpeg_fdct_islow (type 1)
|
||||
(local i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32)
|
||||
i32.const 8
|
||||
i32.const 8
|
||||
call $__pragma_loopbound
|
||||
i32.const -256
|
||||
local.set 0
|
||||
loop ;; label = @1
|
||||
local.get 0
|
||||
i32.const 1296
|
||||
i32.add
|
||||
local.tee 1
|
||||
local.get 1
|
||||
i32.load
|
||||
local.tee 1
|
||||
local.get 0
|
||||
i32.const 1292
|
||||
i32.add
|
||||
local.tee 2
|
||||
i32.load
|
||||
local.tee 3
|
||||
i32.add
|
||||
local.tee 4
|
||||
local.get 0
|
||||
i32.const 1308
|
||||
i32.add
|
||||
local.tee 5
|
||||
i32.load
|
||||
local.tee 6
|
||||
local.get 0
|
||||
i32.const 1280
|
||||
i32.add
|
||||
local.tee 7
|
||||
i32.load
|
||||
local.tee 8
|
||||
i32.add
|
||||
local.tee 9
|
||||
i32.add
|
||||
local.tee 10
|
||||
local.get 0
|
||||
i32.const 1300
|
||||
i32.add
|
||||
local.tee 11
|
||||
i32.load
|
||||
local.tee 12
|
||||
local.get 0
|
||||
i32.const 1288
|
||||
i32.add
|
||||
local.tee 13
|
||||
i32.load
|
||||
local.tee 14
|
||||
i32.add
|
||||
local.tee 15
|
||||
local.get 0
|
||||
i32.const 1304
|
||||
i32.add
|
||||
local.tee 16
|
||||
i32.load
|
||||
local.tee 17
|
||||
local.get 0
|
||||
i32.const 1284
|
||||
i32.add
|
||||
local.tee 18
|
||||
i32.load
|
||||
local.tee 19
|
||||
i32.add
|
||||
local.tee 20
|
||||
i32.add
|
||||
local.tee 21
|
||||
i32.sub
|
||||
i32.const 2
|
||||
i32.shl
|
||||
i32.store
|
||||
local.get 7
|
||||
local.get 10
|
||||
local.get 21
|
||||
i32.add
|
||||
i32.const 2
|
||||
i32.shl
|
||||
i32.store
|
||||
local.get 5
|
||||
local.get 3
|
||||
local.get 1
|
||||
i32.sub
|
||||
local.tee 1
|
||||
local.get 8
|
||||
local.get 6
|
||||
i32.sub
|
||||
local.tee 3
|
||||
i32.add
|
||||
i32.const -7373
|
||||
i32.mul
|
||||
i32.const 1024
|
||||
i32.add
|
||||
local.tee 7
|
||||
local.get 1
|
||||
i32.const 2446
|
||||
i32.mul
|
||||
i32.add
|
||||
local.get 1
|
||||
local.get 19
|
||||
local.get 17
|
||||
i32.sub
|
||||
local.tee 6
|
||||
i32.add
|
||||
local.tee 8
|
||||
local.get 14
|
||||
local.get 12
|
||||
i32.sub
|
||||
local.tee 1
|
||||
local.get 3
|
||||
i32.add
|
||||
local.tee 10
|
||||
i32.add
|
||||
i32.const 9633
|
||||
i32.mul
|
||||
local.tee 12
|
||||
local.get 8
|
||||
i32.const -16069
|
||||
i32.mul
|
||||
i32.add
|
||||
local.tee 8
|
||||
i32.add
|
||||
i32.const 11
|
||||
i32.shr_s
|
||||
i32.store
|
||||
local.get 16
|
||||
local.get 9
|
||||
local.get 4
|
||||
i32.sub
|
||||
local.tee 4
|
||||
local.get 20
|
||||
local.get 15
|
||||
i32.sub
|
||||
local.tee 5
|
||||
i32.add
|
||||
i32.const 4433
|
||||
i32.mul
|
||||
i32.const 1024
|
||||
i32.add
|
||||
local.tee 9
|
||||
local.get 5
|
||||
i32.const -15137
|
||||
i32.mul
|
||||
i32.add
|
||||
i32.const 11
|
||||
i32.shr_s
|
||||
i32.store
|
||||
local.get 13
|
||||
local.get 9
|
||||
local.get 4
|
||||
i32.const 6270
|
||||
i32.mul
|
||||
i32.add
|
||||
i32.const 11
|
||||
i32.shr_s
|
||||
i32.store
|
||||
local.get 11
|
||||
local.get 1
|
||||
local.get 6
|
||||
i32.add
|
||||
i32.const -20995
|
||||
i32.mul
|
||||
i32.const 1024
|
||||
i32.add
|
||||
local.tee 4
|
||||
local.get 1
|
||||
i32.const 16819
|
||||
i32.mul
|
||||
i32.add
|
||||
local.get 12
|
||||
local.get 10
|
||||
i32.const -3196
|
||||
i32.mul
|
||||
i32.add
|
||||
local.tee 1
|
||||
i32.add
|
||||
i32.const 11
|
||||
i32.shr_s
|
||||
i32.store
|
||||
local.get 2
|
||||
local.get 4
|
||||
local.get 6
|
||||
i32.const 25172
|
||||
i32.mul
|
||||
i32.add
|
||||
local.get 8
|
||||
i32.add
|
||||
i32.const 11
|
||||
i32.shr_s
|
||||
i32.store
|
||||
local.get 18
|
||||
local.get 7
|
||||
local.get 3
|
||||
i32.const 12299
|
||||
i32.mul
|
||||
i32.add
|
||||
local.get 1
|
||||
i32.add
|
||||
i32.const 11
|
||||
i32.shr_s
|
||||
i32.store
|
||||
local.get 0
|
||||
i32.const 32
|
||||
i32.add
|
||||
local.tee 0
|
||||
br_if 0 (;@1;)
|
||||
end
|
||||
i32.const 8
|
||||
i32.const 8
|
||||
call $__pragma_loopbound
|
||||
i32.const -32
|
||||
local.set 0
|
||||
loop ;; label = @1
|
||||
local.get 0
|
||||
i32.const 1184
|
||||
i32.add
|
||||
local.tee 1
|
||||
local.get 1
|
||||
i32.load
|
||||
local.tee 1
|
||||
local.get 0
|
||||
i32.const 1152
|
||||
i32.add
|
||||
local.tee 2
|
||||
i32.load
|
||||
local.tee 3
|
||||
i32.add
|
||||
local.tee 4
|
||||
local.get 0
|
||||
i32.const 1280
|
||||
i32.add
|
||||
local.tee 5
|
||||
i32.load
|
||||
local.tee 6
|
||||
local.get 0
|
||||
i32.const 1056
|
||||
i32.add
|
||||
local.tee 7
|
||||
i32.load
|
||||
local.tee 8
|
||||
i32.add
|
||||
local.tee 9
|
||||
i32.add
|
||||
local.tee 10
|
||||
local.get 0
|
||||
i32.const 1216
|
||||
i32.add
|
||||
local.tee 11
|
||||
i32.load
|
||||
local.tee 12
|
||||
local.get 0
|
||||
i32.const 1120
|
||||
i32.add
|
||||
local.tee 13
|
||||
i32.load
|
||||
local.tee 14
|
||||
i32.add
|
||||
local.tee 15
|
||||
local.get 0
|
||||
i32.const 1248
|
||||
i32.add
|
||||
local.tee 16
|
||||
i32.load
|
||||
local.tee 17
|
||||
local.get 0
|
||||
i32.const 1088
|
||||
i32.add
|
||||
local.tee 18
|
||||
i32.load
|
||||
local.tee 19
|
||||
i32.add
|
||||
local.tee 20
|
||||
i32.add
|
||||
local.tee 21
|
||||
i32.sub
|
||||
i32.const 2
|
||||
i32.add
|
||||
i32.const 2
|
||||
i32.shr_s
|
||||
i32.store
|
||||
local.get 7
|
||||
local.get 21
|
||||
local.get 10
|
||||
i32.add
|
||||
i32.const 2
|
||||
i32.add
|
||||
i32.const 2
|
||||
i32.shr_s
|
||||
i32.store
|
||||
local.get 5
|
||||
local.get 3
|
||||
local.get 1
|
||||
i32.sub
|
||||
local.tee 1
|
||||
local.get 8
|
||||
local.get 6
|
||||
i32.sub
|
||||
local.tee 3
|
||||
i32.add
|
||||
i32.const -7373
|
||||
i32.mul
|
||||
i32.const 16384
|
||||
i32.add
|
||||
local.tee 7
|
||||
local.get 1
|
||||
i32.const 2446
|
||||
i32.mul
|
||||
i32.add
|
||||
local.get 1
|
||||
local.get 19
|
||||
local.get 17
|
||||
i32.sub
|
||||
local.tee 6
|
||||
i32.add
|
||||
local.tee 8
|
||||
local.get 14
|
||||
local.get 12
|
||||
i32.sub
|
||||
local.tee 1
|
||||
local.get 3
|
||||
i32.add
|
||||
local.tee 10
|
||||
i32.add
|
||||
i32.const 9633
|
||||
i32.mul
|
||||
local.tee 12
|
||||
local.get 8
|
||||
i32.const -16069
|
||||
i32.mul
|
||||
i32.add
|
||||
local.tee 8
|
||||
i32.add
|
||||
i32.const 15
|
||||
i32.shr_s
|
||||
i32.store
|
||||
local.get 16
|
||||
local.get 9
|
||||
local.get 4
|
||||
i32.sub
|
||||
local.tee 4
|
||||
local.get 20
|
||||
local.get 15
|
||||
i32.sub
|
||||
local.tee 5
|
||||
i32.add
|
||||
i32.const 4433
|
||||
i32.mul
|
||||
i32.const 16384
|
||||
i32.add
|
||||
local.tee 9
|
||||
local.get 5
|
||||
i32.const -15137
|
||||
i32.mul
|
||||
i32.add
|
||||
i32.const 15
|
||||
i32.shr_s
|
||||
i32.store
|
||||
local.get 13
|
||||
local.get 9
|
||||
local.get 4
|
||||
i32.const 6270
|
||||
i32.mul
|
||||
i32.add
|
||||
i32.const 15
|
||||
i32.shr_s
|
||||
i32.store
|
||||
local.get 11
|
||||
local.get 1
|
||||
local.get 6
|
||||
i32.add
|
||||
i32.const -20995
|
||||
i32.mul
|
||||
i32.const 16384
|
||||
i32.add
|
||||
local.tee 4
|
||||
local.get 1
|
||||
i32.const 16819
|
||||
i32.mul
|
||||
i32.add
|
||||
local.get 12
|
||||
local.get 10
|
||||
i32.const -3196
|
||||
i32.mul
|
||||
i32.add
|
||||
local.tee 1
|
||||
i32.add
|
||||
i32.const 15
|
||||
i32.shr_s
|
||||
i32.store
|
||||
local.get 2
|
||||
local.get 4
|
||||
local.get 6
|
||||
i32.const 25172
|
||||
i32.mul
|
||||
i32.add
|
||||
local.get 8
|
||||
i32.add
|
||||
i32.const 15
|
||||
i32.shr_s
|
||||
i32.store
|
||||
local.get 18
|
||||
local.get 7
|
||||
local.get 3
|
||||
i32.const 12299
|
||||
i32.mul
|
||||
i32.add
|
||||
local.get 1
|
||||
i32.add
|
||||
i32.const 15
|
||||
i32.shr_s
|
||||
i32.store
|
||||
local.get 0
|
||||
i32.const 4
|
||||
i32.add
|
||||
local.tee 0
|
||||
br_if 0 (;@1;)
|
||||
end)
|
||||
(func $jfdctint_main (type 1)
|
||||
call $jfdctint_jpeg_fdct_islow)
|
||||
(func $__original_main (type 2) (result i32)
|
||||
(local i32 i32)
|
||||
i32.const 64
|
||||
i32.const 64
|
||||
call $__pragma_loopbound
|
||||
i32.const 1
|
||||
local.set 0
|
||||
i32.const -256
|
||||
local.set 1
|
||||
loop ;; label = @1
|
||||
local.get 1
|
||||
i32.const 1280
|
||||
i32.add
|
||||
local.get 0
|
||||
i32.const 133
|
||||
i32.mul
|
||||
i32.const 81
|
||||
i32.add
|
||||
i32.const 65535
|
||||
i32.rem_s
|
||||
local.tee 0
|
||||
i32.store
|
||||
local.get 1
|
||||
i32.const 1284
|
||||
i32.add
|
||||
local.get 0
|
||||
i32.const 133
|
||||
i32.mul
|
||||
i32.const 81
|
||||
i32.add
|
||||
i32.const 65535
|
||||
i32.rem_s
|
||||
local.tee 0
|
||||
i32.store
|
||||
local.get 1
|
||||
i32.const 8
|
||||
i32.add
|
||||
local.tee 1
|
||||
br_if 0 (;@1;)
|
||||
end
|
||||
call $jfdctint_main
|
||||
call $jfdctint_return)
|
||||
(table (;0;) 1 1 funcref)
|
||||
(memory (;0;) 1)
|
||||
(global $__stack_pointer (mut i32) (i32.const 5376))
|
||||
(global (;1;) i32 (i32.const 1280))
|
||||
(global (;2;) i32 (i32.const 5376))
|
||||
(export "memory" (memory 0))
|
||||
(export "__wasm_apply_data_relocs" (func $__wasm_apply_data_relocs))
|
||||
(export "entrypoint" (func $jfdctint_main))
|
||||
(export "main" (func $__original_main))
|
||||
(export "__data_end" (global 1))
|
||||
(export "__heap_base" (global 2)))
|
||||
@ -0,0 +1,314 @@
|
||||
/*
|
||||
|
||||
This program is part of the TACLeBench benchmark suite.
|
||||
Version V 1.x
|
||||
|
||||
Name: jfdctint
|
||||
|
||||
Author: Thomas G. Lane, Public domain JPEG source code.
|
||||
Modified by Steven Li at Princeton University.
|
||||
|
||||
Function: JPEG slow-but-accurate integer implementation of the
|
||||
forward DCT (Discrete Cosine Transform) on a 8x8
|
||||
pixel block [from original file documentations]
|
||||
|
||||
Copyright (C) 1991-1994, Thomas G. Lane.
|
||||
This file is part of the Independent JPEG Group's software.
|
||||
For conditions of distribution and use, see the accompanying README file.
|
||||
|
||||
This file contains a slow-but-accurate integer implementation of the
|
||||
forward DCT (Discrete Cosine Transform).
|
||||
|
||||
A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
|
||||
on each column. Direct algorithms are also available, but they are
|
||||
much more complex and seem not to be any faster when reduced to code.
|
||||
|
||||
This implementation is based on an algorithm described in
|
||||
C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
|
||||
Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
|
||||
Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
|
||||
The primary algorithm described there uses 11 multiplies and 29 adds.
|
||||
We use their alternate method with 12 multiplies and 32 adds.
|
||||
The advantage of this method is that no data path contains more than one
|
||||
multiplication; this allows a very simple and accurate implementation in
|
||||
scaled fixed-point arithmetic, with a minimal number of shifts.
|
||||
|
||||
Source: SNU-RT Benchmark Suite for Worst Case Timing Analysis
|
||||
Collected and Modified by S.-S. Lim
|
||||
Real-Time Research Group
|
||||
Seoul National University
|
||||
|
||||
Changes: Moved initialisation code from jfdctint_main() to jfdctint_init(),
|
||||
added checksum calculation in jfdctint_return()
|
||||
|
||||
License: see README
|
||||
|
||||
*/
|
||||
|
||||
/* COMMENTS: Long calculation sequences (i.e., long basic blocks), */
|
||||
/* single-nested loops. */
|
||||
|
||||
/**********************************************************************
|
||||
Functions to be timed
|
||||
***********************************************************************/
|
||||
|
||||
/* This definitions are added by Steven Li so as to bypass the header
|
||||
files.
|
||||
*/
|
||||
|
||||
// Wasm loop bounds
|
||||
|
||||
__attribute__((import_module("__pragma"), import_name("loopbound"))) extern void
|
||||
__pragma_loopbound(unsigned int min_bound, unsigned int max_bound);
|
||||
|
||||
#define DCTSIZE 8
|
||||
#define DESCALE(x, n) (((x) + (((int) 1) << ((n) - 1))) >> (n))
|
||||
|
||||
/*
|
||||
The poop on this scaling stuff is as follows:
|
||||
|
||||
Each 1-D DCT step produces outputs which are a factor of sqrt(N)
|
||||
larger than the true DCT outputs. The final outputs are therefore
|
||||
a factor of N larger than desired; since N=8 this can be cured by
|
||||
a simple right shift at the end of the algorithm. The advantage of
|
||||
this arrangement is that we save two multiplications per 1-D DCT,
|
||||
because the y0 and y4 outputs need not be divided by sqrt(N).
|
||||
In the IJG code, this factor of 8 is removed by the quantization step
|
||||
(in jcdctmgr.c), NOT in this module.
|
||||
|
||||
We have to do addition and subtraction of the integer inputs, which
|
||||
is no problem, and multiplication by fractional constants, which is
|
||||
a problem to do in integer arithmetic. We multiply all the constants
|
||||
by CONST_SCALE and convert them to integer constants (thus retaining
|
||||
CONST_BITS (13) bits of precision in the constants). After doing a
|
||||
multiplication we have to divide the product by CONST_SCALE, with proper
|
||||
rounding, to produce the correct output. This division can be done
|
||||
cheaply as a right shift of CONST_BITS (13) bits. We postpone shifting
|
||||
as long as possible so that partial sums can be added together with
|
||||
full fractional precision.
|
||||
|
||||
The outputs of the first pass are scaled up by PASS1_BITS (2) bits so that
|
||||
they are represented to better-than-integral precision. These outputs
|
||||
require BITS_IN_JSAMPLE (8) + PASS1_BITS (2) + 3 bits; this fits in a
|
||||
16-bit word with the recommended scaling. (For 12-bit sample data, the
|
||||
intermediate array is int anyway.)
|
||||
|
||||
To avoid overflow of the 32-bit intermediate results in pass 2, we must
|
||||
have BITS_IN_JSAMPLE (8) + CONST_BITS (13) + PASS1_BITS (2) <= 26.
|
||||
Error analysis shows that the values given below are the most effective.
|
||||
*/
|
||||
|
||||
/*
|
||||
Forward declaration of functions
|
||||
*/
|
||||
|
||||
void jfdctint_init();
|
||||
int jfdctint_return();
|
||||
__attribute__((noinline)) __attribute__((export_name("entrypoint"))) void
|
||||
jfdctint_main();
|
||||
__attribute__((noinline)) __attribute__((export_name("main"))) int main(void);
|
||||
|
||||
#define CONST_BITS 13
|
||||
#define PASS1_BITS 2
|
||||
|
||||
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
|
||||
causing a lot of useless floating-point operations at run time.
|
||||
To get around this we use the following pre-calculated constants.
|
||||
If you change CONST_BITS you may want to add appropriate values.
|
||||
(With a reasonable C compiler, you can just rely on the FIX() macro...)
|
||||
*/
|
||||
|
||||
#define FIX_0_298631336 ((int) 2446) /* FIX(0.298631336) */
|
||||
#define FIX_0_390180644 ((int) 3196) /* FIX(0.390180644) */
|
||||
#define FIX_0_541196100 ((int) 4433) /* FIX(0.541196100) */
|
||||
#define FIX_0_765366865 ((int) 6270) /* FIX(0.765366865) */
|
||||
#define FIX_0_899976223 ((int) 7373) /* FIX(0.899976223) */
|
||||
#define FIX_1_175875602 ((int) 9633) /* FIX(1.175875602) */
|
||||
#define FIX_1_501321110 ((int) 12299) /* FIX(1.501321110) */
|
||||
#define FIX_1_847759065 ((int) 15137) /* FIX(1.847759065) */
|
||||
#define FIX_1_961570560 ((int) 16069) /* FIX(1.961570560) */
|
||||
#define FIX_2_053119869 ((int) 16819) /* FIX(2.053119869) */
|
||||
#define FIX_2_562915447 ((int) 20995) /* FIX(2.562915447) */
|
||||
#define FIX_3_072711026 ((int) 25172) /* FIX(3.072711026) */
|
||||
|
||||
/* Multiply an int variable by an int constant to yield an int result.
|
||||
For 8-bit samples with the recommended scaling, all the variable
|
||||
and constant values involved are no more than 16 bits wide, so a
|
||||
16x16->32 bit multiply can be used instead of a full 32x32 multiply.
|
||||
For 12-bit samples, a full 32-bit multiplication will be needed.
|
||||
*/
|
||||
|
||||
int jfdctint_data[64];
|
||||
|
||||
const int jfdctint_CHECKSUM = 1668124;
|
||||
|
||||
void
|
||||
jfdctint_init() {
|
||||
int i, seed;
|
||||
|
||||
/* Worst case settings */
|
||||
/* Set array to random values */
|
||||
seed = 1;
|
||||
|
||||
__pragma_loopbound(64, 64);
|
||||
for (i = 0; i < 64; i++) {
|
||||
seed = ((seed * 133) + 81) % 65535;
|
||||
jfdctint_data[i] = seed;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
jfdctint_return() {
|
||||
int checksum = 0;
|
||||
int i;
|
||||
__pragma_loopbound(64, 64);
|
||||
for (i = 0; i < 64; ++i)
|
||||
checksum += jfdctint_data[i];
|
||||
return ((checksum == jfdctint_CHECKSUM) ? 0 : -1);
|
||||
}
|
||||
|
||||
/*
|
||||
Perform the forward DCT on one block of samples.
|
||||
*/
|
||||
|
||||
void
|
||||
jfdctint_jpeg_fdct_islow(void) {
|
||||
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
int tmp10, tmp11, tmp12, tmp13;
|
||||
int z1, z2, z3, z4, z5;
|
||||
int *dataptr;
|
||||
int ctr;
|
||||
|
||||
/* Pass 1: process rows. */
|
||||
/* Note results are scaled up by sqrt(8) compared to a true DCT; */
|
||||
/* furthermore, we scale the results by 2**PASS1_BITS. */
|
||||
|
||||
dataptr = jfdctint_data;
|
||||
__pragma_loopbound(8, 8);
|
||||
for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
|
||||
|
||||
tmp0 = dataptr[0] + dataptr[7];
|
||||
tmp7 = dataptr[0] - dataptr[7];
|
||||
tmp1 = dataptr[1] + dataptr[6];
|
||||
tmp6 = dataptr[1] - dataptr[6];
|
||||
tmp2 = dataptr[2] + dataptr[5];
|
||||
tmp5 = dataptr[2] - dataptr[5];
|
||||
tmp3 = dataptr[3] + dataptr[4];
|
||||
tmp4 = dataptr[3] - dataptr[4];
|
||||
|
||||
tmp10 = tmp0 + tmp3;
|
||||
tmp13 = tmp0 - tmp3;
|
||||
tmp11 = tmp1 + tmp2;
|
||||
tmp12 = tmp1 - tmp2;
|
||||
|
||||
dataptr[0] = (int) ((tmp10 + tmp11) << PASS1_BITS);
|
||||
dataptr[4] = (int) ((tmp10 - tmp11) << PASS1_BITS);
|
||||
|
||||
z1 = (tmp12 + tmp13) * FIX_0_541196100;
|
||||
dataptr[2] = (int) DESCALE(z1 + tmp13 * FIX_0_765366865,
|
||||
CONST_BITS - PASS1_BITS);
|
||||
dataptr[6] = (int) DESCALE(z1 + tmp12 * (-FIX_1_847759065),
|
||||
CONST_BITS - PASS1_BITS);
|
||||
|
||||
z1 = tmp4 + tmp7;
|
||||
z2 = tmp5 + tmp6;
|
||||
z3 = tmp4 + tmp6;
|
||||
z4 = tmp5 + tmp7;
|
||||
z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */
|
||||
|
||||
tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */
|
||||
tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */
|
||||
tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */
|
||||
tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */
|
||||
z1 = z1 * (-FIX_0_899976223); /* sqrt(2) * (c7-c3) */
|
||||
z2 = z2 * (-FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
|
||||
z3 = z3 * (-FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
|
||||
z4 = z4 * (-FIX_0_390180644); /* sqrt(2) * (c5-c3) */
|
||||
|
||||
z3 += z5;
|
||||
z4 += z5;
|
||||
|
||||
dataptr[7] = (int) DESCALE(tmp4 + z1 + z3, CONST_BITS - PASS1_BITS);
|
||||
dataptr[5] = (int) DESCALE(tmp5 + z2 + z4, CONST_BITS - PASS1_BITS);
|
||||
dataptr[3] = (int) DESCALE(tmp6 + z2 + z3, CONST_BITS - PASS1_BITS);
|
||||
dataptr[1] = (int) DESCALE(tmp7 + z1 + z4, CONST_BITS - PASS1_BITS);
|
||||
|
||||
dataptr += DCTSIZE; /* advance pointer to next row */
|
||||
}
|
||||
|
||||
dataptr = jfdctint_data;
|
||||
__pragma_loopbound(8, 8);
|
||||
for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
|
||||
tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
|
||||
tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
|
||||
tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
|
||||
tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
|
||||
tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
|
||||
tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
|
||||
tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
|
||||
tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
|
||||
|
||||
tmp10 = tmp0 + tmp3;
|
||||
tmp13 = tmp0 - tmp3;
|
||||
tmp11 = tmp1 + tmp2;
|
||||
tmp12 = tmp1 - tmp2;
|
||||
|
||||
dataptr[DCTSIZE * 0] = (int) DESCALE(tmp10 + tmp11, PASS1_BITS);
|
||||
dataptr[DCTSIZE * 4] = (int) DESCALE(tmp10 - tmp11, PASS1_BITS);
|
||||
|
||||
z1 = (tmp12 + tmp13) * FIX_0_541196100;
|
||||
dataptr[DCTSIZE * 2] = (int) DESCALE(z1 + tmp13 * FIX_0_765366865,
|
||||
CONST_BITS + PASS1_BITS);
|
||||
dataptr[DCTSIZE * 6] = (int) DESCALE(z1 + tmp12 * (-FIX_1_847759065),
|
||||
CONST_BITS + PASS1_BITS);
|
||||
|
||||
z1 = tmp4 + tmp7;
|
||||
z2 = tmp5 + tmp6;
|
||||
z3 = tmp4 + tmp6;
|
||||
z4 = tmp5 + tmp7;
|
||||
z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */
|
||||
|
||||
tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */
|
||||
tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */
|
||||
tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */
|
||||
tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */
|
||||
z1 = z1 * (-FIX_0_899976223); /* sqrt(2) * (c7-c3) */
|
||||
z2 = z2 * (-FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
|
||||
z3 = z3 * (-FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
|
||||
z4 = z4 * (-FIX_0_390180644); /* sqrt(2) * (c5-c3) */
|
||||
|
||||
z3 += z5;
|
||||
z4 += z5;
|
||||
|
||||
dataptr[DCTSIZE * 7] =
|
||||
(int) DESCALE(tmp4 + z1 + z3, CONST_BITS + PASS1_BITS);
|
||||
dataptr[DCTSIZE * 5] =
|
||||
(int) DESCALE(tmp5 + z2 + z4, CONST_BITS + PASS1_BITS);
|
||||
dataptr[DCTSIZE * 3] =
|
||||
(int) DESCALE(tmp6 + z2 + z3, CONST_BITS + PASS1_BITS);
|
||||
dataptr[DCTSIZE * 1] =
|
||||
(int) DESCALE(tmp7 + z1 + z4, CONST_BITS + PASS1_BITS);
|
||||
|
||||
dataptr++; /* advance pointer to next column */
|
||||
}
|
||||
}
|
||||
|
||||
/* Main function
|
||||
Time to function execution time using logic analyzer,
|
||||
which measures the OFF time of a LED on board.
|
||||
|
||||
The switching latency, including the function call/return time,
|
||||
is measured to be equal to 1.1us (22 clock cycles).
|
||||
*/
|
||||
__attribute__((noinline)) __attribute__((export_name("entrypoint"))) void
|
||||
jfdctint_main(void) {
|
||||
jfdctint_jpeg_fdct_islow();
|
||||
}
|
||||
|
||||
__attribute__((noinline)) __attribute__((export_name("main"))) int
|
||||
main(void) {
|
||||
jfdctint_init();
|
||||
jfdctint_main();
|
||||
|
||||
return (jfdctint_return());
|
||||
}
|
||||
@ -0,0 +1,322 @@
|
||||
/*
|
||||
|
||||
This program is part of the TACLeBench benchmark suite.
|
||||
Version V 1.x
|
||||
|
||||
Name: jfdctint
|
||||
|
||||
Author: Thomas G. Lane, Public domain JPEG source code.
|
||||
Modified by Steven Li at Princeton University.
|
||||
|
||||
Function: JPEG slow-but-accurate integer implementation of the
|
||||
forward DCT (Discrete Cosine Transform) on a 8x8
|
||||
pixel block [from original file documentations]
|
||||
|
||||
Copyright (C) 1991-1994, Thomas G. Lane.
|
||||
This file is part of the Independent JPEG Group's software.
|
||||
For conditions of distribution and use, see the accompanying README file.
|
||||
|
||||
This file contains a slow-but-accurate integer implementation of the
|
||||
forward DCT (Discrete Cosine Transform).
|
||||
|
||||
A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
|
||||
on each column. Direct algorithms are also available, but they are
|
||||
much more complex and seem not to be any faster when reduced to code.
|
||||
|
||||
This implementation is based on an algorithm described in
|
||||
C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
|
||||
Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
|
||||
Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
|
||||
The primary algorithm described there uses 11 multiplies and 29 adds.
|
||||
We use their alternate method with 12 multiplies and 32 adds.
|
||||
The advantage of this method is that no data path contains more than one
|
||||
multiplication; this allows a very simple and accurate implementation in
|
||||
scaled fixed-point arithmetic, with a minimal number of shifts.
|
||||
|
||||
Source: SNU-RT Benchmark Suite for Worst Case Timing Analysis
|
||||
Collected and Modified by S.-S. Lim
|
||||
Real-Time Research Group
|
||||
Seoul National University
|
||||
|
||||
Changes: Moved initialisation code from jfdctint_main() to jfdctint_init(),
|
||||
added checksum calculation in jfdctint_return()
|
||||
|
||||
License: see README
|
||||
|
||||
*/
|
||||
|
||||
/* COMMENTS: Long calculation sequences (i.e., long basic blocks), */
|
||||
/* single-nested loops. */
|
||||
|
||||
/**********************************************************************
|
||||
Functions to be timed
|
||||
***********************************************************************/
|
||||
|
||||
/* This definitions are added by Steven Li so as to bypass the header
|
||||
files.
|
||||
*/
|
||||
|
||||
// Wasm loop bounds
|
||||
|
||||
|
||||
|
||||
|
||||
__attribute__((import_module("__pragma"), import_name("loopbound"))) extern void
|
||||
__pragma_loopbound(unsigned int min_bound, unsigned int max_bound);
|
||||
|
||||
#define DCTSIZE 8
|
||||
#define DESCALE(x, n) (((x) + (((int) 1) << ((n) - 1))) >> (n))
|
||||
|
||||
/*
|
||||
The poop on this scaling stuff is as follows:
|
||||
|
||||
Each 1-D DCT step produces outputs which are a factor of sqrt(N)
|
||||
larger than the true DCT outputs. The final outputs are therefore
|
||||
a factor of N larger than desired; since N=8 this can be cured by
|
||||
a simple right shift at the end of the algorithm. The advantage of
|
||||
this arrangement is that we save two multiplications per 1-D DCT,
|
||||
because the y0 and y4 outputs need not be divided by sqrt(N).
|
||||
In the IJG code, this factor of 8 is removed by the quantization step
|
||||
(in jcdctmgr.c), NOT in this module.
|
||||
|
||||
We have to do addition and subtraction of the integer inputs, which
|
||||
is no problem, and multiplication by fractional constants, which is
|
||||
a problem to do in integer arithmetic. We multiply all the constants
|
||||
by CONST_SCALE and convert them to integer constants (thus retaining
|
||||
CONST_BITS (13) bits of precision in the constants). After doing a
|
||||
multiplication we have to divide the product by CONST_SCALE, with proper
|
||||
rounding, to produce the correct output. This division can be done
|
||||
cheaply as a right shift of CONST_BITS (13) bits. We postpone shifting
|
||||
as long as possible so that partial sums can be added together with
|
||||
full fractional precision.
|
||||
|
||||
The outputs of the first pass are scaled up by PASS1_BITS (2) bits so that
|
||||
they are represented to better-than-integral precision. These outputs
|
||||
require BITS_IN_JSAMPLE (8) + PASS1_BITS (2) + 3 bits; this fits in a
|
||||
16-bit word with the recommended scaling. (For 12-bit sample data, the
|
||||
intermediate array is int anyway.)
|
||||
|
||||
To avoid overflow of the 32-bit intermediate results in pass 2, we must
|
||||
have BITS_IN_JSAMPLE (8) + CONST_BITS (13) + PASS1_BITS (2) <= 26.
|
||||
Error analysis shows that the values given below are the most effective.
|
||||
*/
|
||||
|
||||
/*
|
||||
Forward declaration of functions
|
||||
*/
|
||||
|
||||
__attribute__((always_inline)) static inline void jfdctint_init();
|
||||
__attribute__((always_inline)) static inline int jfdctint_return();
|
||||
__attribute__((noinline)) __attribute__((export_name("entrypoint")))
|
||||
__attribute__((noinline)) __attribute__((export_name("entrypoint"))) void
|
||||
jfdctint_main();
|
||||
__attribute__((noinline)) __attribute__((export_name("main")))
|
||||
__attribute__((noinline)) __attribute__((export_name("main"))) int
|
||||
main(void);
|
||||
|
||||
#define CONST_BITS 13
|
||||
#define PASS1_BITS 2
|
||||
|
||||
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
|
||||
causing a lot of useless floating-point operations at run time.
|
||||
To get around this we use the following pre-calculated constants.
|
||||
If you change CONST_BITS you may want to add appropriate values.
|
||||
(With a reasonable C compiler, you can just rely on the FIX() macro...)
|
||||
*/
|
||||
|
||||
#define FIX_0_298631336 ((int) 2446) /* FIX(0.298631336) */
|
||||
#define FIX_0_390180644 ((int) 3196) /* FIX(0.390180644) */
|
||||
#define FIX_0_541196100 ((int) 4433) /* FIX(0.541196100) */
|
||||
#define FIX_0_765366865 ((int) 6270) /* FIX(0.765366865) */
|
||||
#define FIX_0_899976223 ((int) 7373) /* FIX(0.899976223) */
|
||||
#define FIX_1_175875602 ((int) 9633) /* FIX(1.175875602) */
|
||||
#define FIX_1_501321110 ((int) 12299) /* FIX(1.501321110) */
|
||||
#define FIX_1_847759065 ((int) 15137) /* FIX(1.847759065) */
|
||||
#define FIX_1_961570560 ((int) 16069) /* FIX(1.961570560) */
|
||||
#define FIX_2_053119869 ((int) 16819) /* FIX(2.053119869) */
|
||||
#define FIX_2_562915447 ((int) 20995) /* FIX(2.562915447) */
|
||||
#define FIX_3_072711026 ((int) 25172) /* FIX(3.072711026) */
|
||||
|
||||
/* Multiply an int variable by an int constant to yield an int result.
|
||||
For 8-bit samples with the recommended scaling, all the variable
|
||||
and constant values involved are no more than 16 bits wide, so a
|
||||
16x16->32 bit multiply can be used instead of a full 32x32 multiply.
|
||||
For 12-bit samples, a full 32-bit multiplication will be needed.
|
||||
*/
|
||||
|
||||
int jfdctint_data[64];
|
||||
|
||||
const int jfdctint_CHECKSUM = 1668124;
|
||||
|
||||
__attribute__((always_inline)) static inline void
|
||||
jfdctint_init() {
|
||||
int i, seed;
|
||||
|
||||
/* Worst case settings */
|
||||
/* Set array to random values */
|
||||
seed = 1;
|
||||
|
||||
__pragma_loopbound(64, 64);
|
||||
for (i = 0; i < 64; i++) {
|
||||
seed = ((seed * 133) + 81) % 65535;
|
||||
jfdctint_data[i] = seed;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((always_inline)) static inline int
|
||||
jfdctint_return() {
|
||||
int checksum = 0;
|
||||
int i;
|
||||
__pragma_loopbound(64, 64);
|
||||
for (i = 0; i < 64; ++i)
|
||||
checksum += jfdctint_data[i];
|
||||
return ((checksum == jfdctint_CHECKSUM) ? 0 : -1);
|
||||
}
|
||||
|
||||
/*
|
||||
Perform the forward DCT on one block of samples.
|
||||
*/
|
||||
|
||||
__attribute__((always_inline)) static inline void
|
||||
jfdctint_jpeg_fdct_islow(void) {
|
||||
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
int tmp10, tmp11, tmp12, tmp13;
|
||||
int z1, z2, z3, z4, z5;
|
||||
int *dataptr;
|
||||
int ctr;
|
||||
|
||||
/* Pass 1: process rows. */
|
||||
/* Note results are scaled up by sqrt(8) compared to a true DCT; */
|
||||
/* furthermore, we scale the results by 2**PASS1_BITS. */
|
||||
|
||||
dataptr = jfdctint_data;
|
||||
__pragma_loopbound(8, 8);
|
||||
for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
|
||||
|
||||
tmp0 = dataptr[0] + dataptr[7];
|
||||
tmp7 = dataptr[0] - dataptr[7];
|
||||
tmp1 = dataptr[1] + dataptr[6];
|
||||
tmp6 = dataptr[1] - dataptr[6];
|
||||
tmp2 = dataptr[2] + dataptr[5];
|
||||
tmp5 = dataptr[2] - dataptr[5];
|
||||
tmp3 = dataptr[3] + dataptr[4];
|
||||
tmp4 = dataptr[3] - dataptr[4];
|
||||
|
||||
tmp10 = tmp0 + tmp3;
|
||||
tmp13 = tmp0 - tmp3;
|
||||
tmp11 = tmp1 + tmp2;
|
||||
tmp12 = tmp1 - tmp2;
|
||||
|
||||
dataptr[0] = (int) ((tmp10 + tmp11) << PASS1_BITS);
|
||||
dataptr[4] = (int) ((tmp10 - tmp11) << PASS1_BITS);
|
||||
|
||||
z1 = (tmp12 + tmp13) * FIX_0_541196100;
|
||||
dataptr[2] = (int) DESCALE(z1 + tmp13 * FIX_0_765366865,
|
||||
CONST_BITS - PASS1_BITS);
|
||||
dataptr[6] = (int) DESCALE(z1 + tmp12 * (-FIX_1_847759065),
|
||||
CONST_BITS - PASS1_BITS);
|
||||
|
||||
z1 = tmp4 + tmp7;
|
||||
z2 = tmp5 + tmp6;
|
||||
z3 = tmp4 + tmp6;
|
||||
z4 = tmp5 + tmp7;
|
||||
z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */
|
||||
|
||||
tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */
|
||||
tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */
|
||||
tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */
|
||||
tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */
|
||||
z1 = z1 * (-FIX_0_899976223); /* sqrt(2) * (c7-c3) */
|
||||
z2 = z2 * (-FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
|
||||
z3 = z3 * (-FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
|
||||
z4 = z4 * (-FIX_0_390180644); /* sqrt(2) * (c5-c3) */
|
||||
|
||||
z3 += z5;
|
||||
z4 += z5;
|
||||
|
||||
dataptr[7] = (int) DESCALE(tmp4 + z1 + z3, CONST_BITS - PASS1_BITS);
|
||||
dataptr[5] = (int) DESCALE(tmp5 + z2 + z4, CONST_BITS - PASS1_BITS);
|
||||
dataptr[3] = (int) DESCALE(tmp6 + z2 + z3, CONST_BITS - PASS1_BITS);
|
||||
dataptr[1] = (int) DESCALE(tmp7 + z1 + z4, CONST_BITS - PASS1_BITS);
|
||||
|
||||
dataptr += DCTSIZE; /* advance pointer to next row */
|
||||
}
|
||||
|
||||
dataptr = jfdctint_data;
|
||||
__pragma_loopbound(8, 8);
|
||||
for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
|
||||
tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
|
||||
tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
|
||||
tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
|
||||
tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
|
||||
tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
|
||||
tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
|
||||
tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
|
||||
tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
|
||||
|
||||
tmp10 = tmp0 + tmp3;
|
||||
tmp13 = tmp0 - tmp3;
|
||||
tmp11 = tmp1 + tmp2;
|
||||
tmp12 = tmp1 - tmp2;
|
||||
|
||||
dataptr[DCTSIZE * 0] = (int) DESCALE(tmp10 + tmp11, PASS1_BITS);
|
||||
dataptr[DCTSIZE * 4] = (int) DESCALE(tmp10 - tmp11, PASS1_BITS);
|
||||
|
||||
z1 = (tmp12 + tmp13) * FIX_0_541196100;
|
||||
dataptr[DCTSIZE * 2] = (int) DESCALE(z1 + tmp13 * FIX_0_765366865,
|
||||
CONST_BITS + PASS1_BITS);
|
||||
dataptr[DCTSIZE * 6] = (int) DESCALE(z1 + tmp12 * (-FIX_1_847759065),
|
||||
CONST_BITS + PASS1_BITS);
|
||||
|
||||
z1 = tmp4 + tmp7;
|
||||
z2 = tmp5 + tmp6;
|
||||
z3 = tmp4 + tmp6;
|
||||
z4 = tmp5 + tmp7;
|
||||
z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */
|
||||
|
||||
tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */
|
||||
tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */
|
||||
tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */
|
||||
tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */
|
||||
z1 = z1 * (-FIX_0_899976223); /* sqrt(2) * (c7-c3) */
|
||||
z2 = z2 * (-FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
|
||||
z3 = z3 * (-FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
|
||||
z4 = z4 * (-FIX_0_390180644); /* sqrt(2) * (c5-c3) */
|
||||
|
||||
z3 += z5;
|
||||
z4 += z5;
|
||||
|
||||
dataptr[DCTSIZE * 7] =
|
||||
(int) DESCALE(tmp4 + z1 + z3, CONST_BITS + PASS1_BITS);
|
||||
dataptr[DCTSIZE * 5] =
|
||||
(int) DESCALE(tmp5 + z2 + z4, CONST_BITS + PASS1_BITS);
|
||||
dataptr[DCTSIZE * 3] =
|
||||
(int) DESCALE(tmp6 + z2 + z3, CONST_BITS + PASS1_BITS);
|
||||
dataptr[DCTSIZE * 1] =
|
||||
(int) DESCALE(tmp7 + z1 + z4, CONST_BITS + PASS1_BITS);
|
||||
|
||||
dataptr++; /* advance pointer to next column */
|
||||
}
|
||||
}
|
||||
|
||||
/* Main function
|
||||
Time to function execution time using logic analyzer,
|
||||
which measures the OFF time of a LED on board.
|
||||
|
||||
The switching latency, including the function call/return time,
|
||||
is measured to be equal to 1.1us (22 clock cycles).
|
||||
*/
|
||||
__attribute__((noinline)) __attribute__((export_name("entrypoint")))
|
||||
__attribute__((noinline)) __attribute__((export_name("entrypoint"))) void
|
||||
jfdctint_main(void) {
|
||||
jfdctint_jpeg_fdct_islow();
|
||||
}
|
||||
|
||||
__attribute__((noinline)) __attribute__((export_name("main")))
|
||||
__attribute__((noinline)) __attribute__((export_name("main"))) int
|
||||
main(void) {
|
||||
jfdctint_init();
|
||||
jfdctint_main();
|
||||
|
||||
return (jfdctint_return());
|
||||
}
|
||||
319
targets/wasm-tacle/kernel/jfdctint/jfdctint.c
Executable file
319
targets/wasm-tacle/kernel/jfdctint/jfdctint.c
Executable file
@ -0,0 +1,319 @@
|
||||
/*
|
||||
|
||||
This program is part of the TACLeBench benchmark suite.
|
||||
Version V 1.x
|
||||
|
||||
Name: jfdctint
|
||||
|
||||
Author: Thomas G. Lane, Public domain JPEG source code.
|
||||
Modified by Steven Li at Princeton University.
|
||||
|
||||
Function: JPEG slow-but-accurate integer implementation of the
|
||||
forward DCT (Discrete Cosine Transform) on a 8x8
|
||||
pixel block [from original file documentations]
|
||||
|
||||
Copyright (C) 1991-1994, Thomas G. Lane.
|
||||
This file is part of the Independent JPEG Group's software.
|
||||
For conditions of distribution and use, see the accompanying README file.
|
||||
|
||||
This file contains a slow-but-accurate integer implementation of the
|
||||
forward DCT (Discrete Cosine Transform).
|
||||
|
||||
A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
|
||||
on each column. Direct algorithms are also available, but they are
|
||||
much more complex and seem not to be any faster when reduced to code.
|
||||
|
||||
This implementation is based on an algorithm described in
|
||||
C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
|
||||
Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
|
||||
Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
|
||||
The primary algorithm described there uses 11 multiplies and 29 adds.
|
||||
We use their alternate method with 12 multiplies and 32 adds.
|
||||
The advantage of this method is that no data path contains more than one
|
||||
multiplication; this allows a very simple and accurate implementation in
|
||||
scaled fixed-point arithmetic, with a minimal number of shifts.
|
||||
|
||||
Source: SNU-RT Benchmark Suite for Worst Case Timing Analysis
|
||||
Collected and Modified by S.-S. Lim
|
||||
Real-Time Research Group
|
||||
Seoul National University
|
||||
|
||||
Changes: Moved initialisation code from jfdctint_main() to jfdctint_init(),
|
||||
added checksum calculation in jfdctint_return()
|
||||
|
||||
License: see README
|
||||
|
||||
*/
|
||||
|
||||
|
||||
/* COMMENTS: Long calculation sequences (i.e., long basic blocks), */
|
||||
/* single-nested loops. */
|
||||
|
||||
/**********************************************************************
|
||||
Functions to be timed
|
||||
***********************************************************************/
|
||||
|
||||
/* This definitions are added by Steven Li so as to bypass the header
|
||||
files.
|
||||
*/
|
||||
|
||||
#define DCTSIZE 8
|
||||
#define DESCALE(x,n) (((x) + (((int)1) << ((n)-1))) >> (n))
|
||||
|
||||
/*
|
||||
The poop on this scaling stuff is as follows:
|
||||
|
||||
Each 1-D DCT step produces outputs which are a factor of sqrt(N)
|
||||
larger than the true DCT outputs. The final outputs are therefore
|
||||
a factor of N larger than desired; since N=8 this can be cured by
|
||||
a simple right shift at the end of the algorithm. The advantage of
|
||||
this arrangement is that we save two multiplications per 1-D DCT,
|
||||
because the y0 and y4 outputs need not be divided by sqrt(N).
|
||||
In the IJG code, this factor of 8 is removed by the quantization step
|
||||
(in jcdctmgr.c), NOT in this module.
|
||||
|
||||
We have to do addition and subtraction of the integer inputs, which
|
||||
is no problem, and multiplication by fractional constants, which is
|
||||
a problem to do in integer arithmetic. We multiply all the constants
|
||||
by CONST_SCALE and convert them to integer constants (thus retaining
|
||||
CONST_BITS (13) bits of precision in the constants). After doing a
|
||||
multiplication we have to divide the product by CONST_SCALE, with proper
|
||||
rounding, to produce the correct output. This division can be done
|
||||
cheaply as a right shift of CONST_BITS (13) bits. We postpone shifting
|
||||
as long as possible so that partial sums can be added together with
|
||||
full fractional precision.
|
||||
|
||||
The outputs of the first pass are scaled up by PASS1_BITS (2) bits so that
|
||||
they are represented to better-than-integral precision. These outputs
|
||||
require BITS_IN_JSAMPLE (8) + PASS1_BITS (2) + 3 bits; this fits in a
|
||||
16-bit word with the recommended scaling. (For 12-bit sample data, the
|
||||
intermediate array is int anyway.)
|
||||
|
||||
To avoid overflow of the 32-bit intermediate results in pass 2, we must
|
||||
have BITS_IN_JSAMPLE (8) + CONST_BITS (13) + PASS1_BITS (2) <= 26.
|
||||
Error analysis shows that the values given below are the most effective.
|
||||
*/
|
||||
|
||||
/*
|
||||
Forward declaration of functions
|
||||
*/
|
||||
|
||||
void jfdctint_init();
|
||||
int jfdctint_return();
|
||||
void jfdctint_main();
|
||||
int main( void );
|
||||
|
||||
|
||||
#define CONST_BITS 13
|
||||
#define PASS1_BITS 2
|
||||
|
||||
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
|
||||
causing a lot of useless floating-point operations at run time.
|
||||
To get around this we use the following pre-calculated constants.
|
||||
If you change CONST_BITS you may want to add appropriate values.
|
||||
(With a reasonable C compiler, you can just rely on the FIX() macro...)
|
||||
*/
|
||||
|
||||
#define FIX_0_298631336 ((int) 2446) /* FIX(0.298631336) */
|
||||
#define FIX_0_390180644 ((int) 3196) /* FIX(0.390180644) */
|
||||
#define FIX_0_541196100 ((int) 4433) /* FIX(0.541196100) */
|
||||
#define FIX_0_765366865 ((int) 6270) /* FIX(0.765366865) */
|
||||
#define FIX_0_899976223 ((int) 7373) /* FIX(0.899976223) */
|
||||
#define FIX_1_175875602 ((int) 9633) /* FIX(1.175875602) */
|
||||
#define FIX_1_501321110 ((int) 12299) /* FIX(1.501321110) */
|
||||
#define FIX_1_847759065 ((int) 15137) /* FIX(1.847759065) */
|
||||
#define FIX_1_961570560 ((int) 16069) /* FIX(1.961570560) */
|
||||
#define FIX_2_053119869 ((int) 16819) /* FIX(2.053119869) */
|
||||
#define FIX_2_562915447 ((int) 20995) /* FIX(2.562915447) */
|
||||
#define FIX_3_072711026 ((int) 25172) /* FIX(3.072711026) */
|
||||
|
||||
|
||||
/* Multiply an int variable by an int constant to yield an int result.
|
||||
For 8-bit samples with the recommended scaling, all the variable
|
||||
and constant values involved are no more than 16 bits wide, so a
|
||||
16x16->32 bit multiply can be used instead of a full 32x32 multiply.
|
||||
For 12-bit samples, a full 32-bit multiplication will be needed.
|
||||
*/
|
||||
|
||||
|
||||
int jfdctint_data[ 64 ];
|
||||
|
||||
|
||||
const int jfdctint_CHECKSUM = 1668124;
|
||||
|
||||
void jfdctint_init()
|
||||
{
|
||||
int i, seed;
|
||||
|
||||
/* Worst case settings */
|
||||
/* Set array to random values */
|
||||
seed = 1;
|
||||
|
||||
_Pragma( "loopbound min 64 max 64" )
|
||||
for ( i = 0; i < 64; i++ ) {
|
||||
seed = ( ( seed * 133 ) + 81 ) % 65535;
|
||||
jfdctint_data[ i ] = seed;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
int jfdctint_return()
|
||||
{
|
||||
int checksum = 0;
|
||||
int i;
|
||||
_Pragma( "loopbound min 64 max 64" )
|
||||
for ( i = 0; i < 64; ++i )
|
||||
checksum += jfdctint_data[ i ];
|
||||
return ( ( checksum == jfdctint_CHECKSUM ) ? 0 : -1 );
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Perform the forward DCT on one block of samples.
|
||||
*/
|
||||
|
||||
void jfdctint_jpeg_fdct_islow( void )
|
||||
{
|
||||
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
int tmp10, tmp11, tmp12, tmp13;
|
||||
int z1, z2, z3, z4, z5;
|
||||
int *dataptr;
|
||||
int ctr;
|
||||
|
||||
/* Pass 1: process rows. */
|
||||
/* Note results are scaled up by sqrt(8) compared to a true DCT; */
|
||||
/* furthermore, we scale the results by 2**PASS1_BITS. */
|
||||
|
||||
dataptr = jfdctint_data;
|
||||
_Pragma( "loopbound min 8 max 8" )
|
||||
for ( ctr = DCTSIZE - 1; ctr >= 0; ctr-- ) {
|
||||
|
||||
tmp0 = dataptr[ 0 ] + dataptr[ 7 ];
|
||||
tmp7 = dataptr[ 0 ] - dataptr[ 7 ];
|
||||
tmp1 = dataptr[ 1 ] + dataptr[ 6 ];
|
||||
tmp6 = dataptr[ 1 ] - dataptr[ 6 ];
|
||||
tmp2 = dataptr[ 2 ] + dataptr[ 5 ];
|
||||
tmp5 = dataptr[ 2 ] - dataptr[ 5 ];
|
||||
tmp3 = dataptr[ 3 ] + dataptr[ 4 ];
|
||||
tmp4 = dataptr[ 3 ] - dataptr[ 4 ];
|
||||
|
||||
tmp10 = tmp0 + tmp3;
|
||||
tmp13 = tmp0 - tmp3;
|
||||
tmp11 = tmp1 + tmp2;
|
||||
tmp12 = tmp1 - tmp2;
|
||||
|
||||
dataptr[ 0 ] = ( int ) ( ( tmp10 + tmp11 ) << PASS1_BITS );
|
||||
dataptr[ 4 ] = ( int ) ( ( tmp10 - tmp11 ) << PASS1_BITS );
|
||||
|
||||
z1 = ( tmp12 + tmp13 ) * FIX_0_541196100;
|
||||
dataptr[ 2 ] = ( int ) DESCALE( z1 + tmp13 * FIX_0_765366865,
|
||||
CONST_BITS - PASS1_BITS );
|
||||
dataptr[ 6 ] = ( int ) DESCALE( z1 + tmp12 * ( - FIX_1_847759065 ),
|
||||
CONST_BITS - PASS1_BITS );
|
||||
|
||||
z1 = tmp4 + tmp7;
|
||||
z2 = tmp5 + tmp6;
|
||||
z3 = tmp4 + tmp6;
|
||||
z4 = tmp5 + tmp7;
|
||||
z5 = ( z3 + z4 ) * FIX_1_175875602; /* sqrt(2) * c3 */
|
||||
|
||||
tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */
|
||||
tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */
|
||||
tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */
|
||||
tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */
|
||||
z1 = z1 * ( - FIX_0_899976223 ); /* sqrt(2) * (c7-c3) */
|
||||
z2 = z2 * ( - FIX_2_562915447 ); /* sqrt(2) * (-c1-c3) */
|
||||
z3 = z3 * ( - FIX_1_961570560 ); /* sqrt(2) * (-c3-c5) */
|
||||
z4 = z4 * ( - FIX_0_390180644 ); /* sqrt(2) * (c5-c3) */
|
||||
|
||||
z3 += z5;
|
||||
z4 += z5;
|
||||
|
||||
dataptr[ 7 ] = ( int ) DESCALE( tmp4 + z1 + z3, CONST_BITS - PASS1_BITS );
|
||||
dataptr[ 5 ] = ( int ) DESCALE( tmp5 + z2 + z4, CONST_BITS - PASS1_BITS );
|
||||
dataptr[ 3 ] = ( int ) DESCALE( tmp6 + z2 + z3, CONST_BITS - PASS1_BITS );
|
||||
dataptr[ 1 ] = ( int ) DESCALE( tmp7 + z1 + z4, CONST_BITS - PASS1_BITS );
|
||||
|
||||
dataptr += DCTSIZE; /* advance pointer to next row */
|
||||
}
|
||||
|
||||
dataptr = jfdctint_data;
|
||||
_Pragma( "loopbound min 8 max 8" )
|
||||
for ( ctr = DCTSIZE - 1; ctr >= 0; ctr-- ) {
|
||||
tmp0 = dataptr[ DCTSIZE * 0 ] + dataptr[ DCTSIZE * 7 ];
|
||||
tmp7 = dataptr[ DCTSIZE * 0 ] - dataptr[ DCTSIZE * 7 ];
|
||||
tmp1 = dataptr[ DCTSIZE * 1 ] + dataptr[ DCTSIZE * 6 ];
|
||||
tmp6 = dataptr[ DCTSIZE * 1 ] - dataptr[ DCTSIZE * 6 ];
|
||||
tmp2 = dataptr[ DCTSIZE * 2 ] + dataptr[ DCTSIZE * 5 ];
|
||||
tmp5 = dataptr[ DCTSIZE * 2 ] - dataptr[ DCTSIZE * 5 ];
|
||||
tmp3 = dataptr[ DCTSIZE * 3 ] + dataptr[ DCTSIZE * 4 ];
|
||||
tmp4 = dataptr[ DCTSIZE * 3 ] - dataptr[ DCTSIZE * 4 ];
|
||||
|
||||
tmp10 = tmp0 + tmp3;
|
||||
tmp13 = tmp0 - tmp3;
|
||||
tmp11 = tmp1 + tmp2;
|
||||
tmp12 = tmp1 - tmp2;
|
||||
|
||||
dataptr[ DCTSIZE * 0 ] = ( int ) DESCALE( tmp10 + tmp11, PASS1_BITS );
|
||||
dataptr[ DCTSIZE * 4 ] = ( int ) DESCALE( tmp10 - tmp11, PASS1_BITS );
|
||||
|
||||
z1 = ( tmp12 + tmp13 ) * FIX_0_541196100;
|
||||
dataptr[ DCTSIZE * 2 ] = ( int ) DESCALE( z1 + tmp13 * FIX_0_765366865,
|
||||
CONST_BITS + PASS1_BITS );
|
||||
dataptr[ DCTSIZE * 6 ] = ( int ) DESCALE( z1
|
||||
+ tmp12 * ( - FIX_1_847759065 ),
|
||||
CONST_BITS + PASS1_BITS );
|
||||
|
||||
z1 = tmp4 + tmp7;
|
||||
z2 = tmp5 + tmp6;
|
||||
z3 = tmp4 + tmp6;
|
||||
z4 = tmp5 + tmp7;
|
||||
z5 = ( z3 + z4 ) * FIX_1_175875602; /* sqrt(2) * c3 */
|
||||
|
||||
tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */
|
||||
tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */
|
||||
tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */
|
||||
tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */
|
||||
z1 = z1 * ( - FIX_0_899976223 ); /* sqrt(2) * (c7-c3) */
|
||||
z2 = z2 * ( - FIX_2_562915447 ); /* sqrt(2) * (-c1-c3) */
|
||||
z3 = z3 * ( - FIX_1_961570560 ); /* sqrt(2) * (-c3-c5) */
|
||||
z4 = z4 * ( - FIX_0_390180644 ); /* sqrt(2) * (c5-c3) */
|
||||
|
||||
z3 += z5;
|
||||
z4 += z5;
|
||||
|
||||
dataptr[ DCTSIZE * 7 ] = ( int ) DESCALE( tmp4 + z1 + z3,
|
||||
CONST_BITS + PASS1_BITS );
|
||||
dataptr[ DCTSIZE * 5 ] = ( int ) DESCALE( tmp5 + z2 + z4,
|
||||
CONST_BITS + PASS1_BITS );
|
||||
dataptr[ DCTSIZE * 3 ] = ( int ) DESCALE( tmp6 + z2 + z3,
|
||||
CONST_BITS + PASS1_BITS );
|
||||
dataptr[ DCTSIZE * 1 ] = ( int ) DESCALE( tmp7 + z1 + z4,
|
||||
CONST_BITS + PASS1_BITS );
|
||||
|
||||
dataptr++; /* advance pointer to next column */
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Main function
|
||||
Time to function execution time using logic analyzer,
|
||||
which measures the OFF time of a LED on board.
|
||||
|
||||
The switching latency, including the function call/return time,
|
||||
is measured to be equal to 1.1us (22 clock cycles).
|
||||
*/
|
||||
void _Pragma ( "entrypoint" ) jfdctint_main( void )
|
||||
{
|
||||
jfdctint_jpeg_fdct_islow();
|
||||
}
|
||||
|
||||
|
||||
int main( void )
|
||||
{
|
||||
jfdctint_init();
|
||||
jfdctint_main();
|
||||
|
||||
return ( jfdctint_return() );
|
||||
}
|
||||
Reference in New Issue
Block a user