blob: 2f8168b7c483718c185dd19a2162b747b47cc020 [file] [log] [blame]
Leon Scroggins III3cc83ac2017-10-06 11:02:56 -04001
2/* intel_init.c - SSE2 optimized filter functions
3 *
xNombred07bb0d2020-03-10 20:17:12 +01004 * Copyright (c) 2018 Cosmin Truta
Leon Scroggins III3cc83ac2017-10-06 11:02:56 -04005 * Copyright (c) 2016-2017 Glenn Randers-Pehrson
6 * Written by Mike Klein and Matt Sarett, Google, Inc.
7 * Derived from arm/arm_init.c
8 *
Leon Scroggins III3cc83ac2017-10-06 11:02:56 -04009 * This code is released under the libpng license.
10 * For conditions of distribution and use, see the disclaimer
11 * and license in png.h
12 */
13
14#include "../pngpriv.h"
15
16#ifdef PNG_READ_SUPPORTED
17#if PNG_INTEL_SSE_IMPLEMENTATION > 0
18
19void
20png_init_filter_functions_sse2(png_structp pp, unsigned int bpp)
21{
22 /* The techniques used to implement each of these filters in SSE operate on
23 * one pixel at a time.
24 * So they generally speed up 3bpp images about 3x, 4bpp images about 4x.
25 * They can scale up to 6 and 8 bpp images and down to 2 bpp images,
26 * but they'd not likely have any benefit for 1bpp images.
27 * Most of these can be implemented using only MMX and 64-bit registers,
28 * but they end up a bit slower than using the equally-ubiquitous SSE2.
29 */
30 png_debug(1, "in png_init_filter_functions_sse2");
31 if (bpp == 3)
32 {
33 pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2;
34 pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_sse2;
35 pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
36 png_read_filter_row_paeth3_sse2;
37 }
38 else if (bpp == 4)
39 {
40 pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_sse2;
41 pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_sse2;
42 pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
43 png_read_filter_row_paeth4_sse2;
44 }
45
46 /* No need optimize PNG_FILTER_VALUE_UP. The compiler should
47 * autovectorize.
48 */
49}
50
51#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */
52#endif /* PNG_READ_SUPPORTED */