Leon Scroggins III | 3cc83ac | 2017-10-06 11:02:56 -0400 | [diff] [blame] | 1 | |
| 2 | /* intel_init.c - SSE2 optimized filter functions |
| 3 | * |
xNombre | d07bb0d | 2020-03-10 20:17:12 +0100 | [diff] [blame] | 4 | * Copyright (c) 2018 Cosmin Truta |
Leon Scroggins III | 3cc83ac | 2017-10-06 11:02:56 -0400 | [diff] [blame] | 5 | * Copyright (c) 2016-2017 Glenn Randers-Pehrson |
| 6 | * Written by Mike Klein and Matt Sarett, Google, Inc. |
| 7 | * Derived from arm/arm_init.c |
| 8 | * |
Leon Scroggins III | 3cc83ac | 2017-10-06 11:02:56 -0400 | [diff] [blame] | 9 | * This code is released under the libpng license. |
| 10 | * For conditions of distribution and use, see the disclaimer |
| 11 | * and license in png.h |
| 12 | */ |
| 13 | |
| 14 | #include "../pngpriv.h" |
| 15 | |
| 16 | #ifdef PNG_READ_SUPPORTED |
| 17 | #if PNG_INTEL_SSE_IMPLEMENTATION > 0 |
| 18 | |
| 19 | void |
| 20 | png_init_filter_functions_sse2(png_structp pp, unsigned int bpp) |
| 21 | { |
| 22 | /* The techniques used to implement each of these filters in SSE operate on |
| 23 | * one pixel at a time. |
| 24 | * So they generally speed up 3bpp images about 3x, 4bpp images about 4x. |
| 25 | * They can scale up to 6 and 8 bpp images and down to 2 bpp images, |
| 26 | * but they'd not likely have any benefit for 1bpp images. |
| 27 | * Most of these can be implemented using only MMX and 64-bit registers, |
| 28 | * but they end up a bit slower than using the equally-ubiquitous SSE2. |
| 29 | */ |
| 30 | png_debug(1, "in png_init_filter_functions_sse2"); |
| 31 | if (bpp == 3) |
| 32 | { |
| 33 | pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2; |
| 34 | pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_sse2; |
| 35 | pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = |
| 36 | png_read_filter_row_paeth3_sse2; |
| 37 | } |
| 38 | else if (bpp == 4) |
| 39 | { |
| 40 | pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_sse2; |
| 41 | pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_sse2; |
| 42 | pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = |
| 43 | png_read_filter_row_paeth4_sse2; |
| 44 | } |
| 45 | |
| 46 | /* No need optimize PNG_FILTER_VALUE_UP. The compiler should |
| 47 | * autovectorize. |
| 48 | */ |
| 49 | } |
| 50 | |
| 51 | #endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */ |
| 52 | #endif /* PNG_READ_SUPPORTED */ |