summaryrefslogtreecommitdiff
path: root/externals/stb
diff options
context:
space:
mode:
Diffstat (limited to 'externals/stb')
-rw-r--r--externals/stb/stb_image.cpp7529
-rw-r--r--externals/stb/stb_image.h7221
-rw-r--r--externals/stb/stb_image_resize.cpp2282
-rw-r--r--externals/stb/stb_image_resize.h2214
-rw-r--r--externals/stb/stb_image_write.cpp1677
-rw-r--r--externals/stb/stb_image_write.h1435
6 files changed, 10860 insertions, 11498 deletions
diff --git a/externals/stb/stb_image.cpp b/externals/stb/stb_image.cpp
deleted file mode 100644
index dbf26f7c5..000000000
--- a/externals/stb/stb_image.cpp
+++ /dev/null
@@ -1,7529 +0,0 @@
1// SPDX-FileCopyrightText: stb http://nothings.org/stb
2// SPDX-License-Identifier: MIT
3
4/* stb_image - v2.28 - public domain image loader - http://nothings.org/stb
5 no warranty implied; use at your own risk
6
7LICENSE
8
9 See end of file for license information.
10
11RECENT REVISION HISTORY:
12
13 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff
14 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
15 2.26 (2020-07-13) many minor fixes
16 2.25 (2020-02-02) fix warnings
17 2.24 (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
18 2.23 (2019-08-11) fix clang static analysis warning
19 2.22 (2019-03-04) gif fixes, fix warnings
20 2.21 (2019-02-25) fix typo in comment
21 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
22 2.19 (2018-02-11) fix warning
23 2.18 (2018-01-30) fix warnings
24 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
25 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
26 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
27 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
28 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
29 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
30 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
31 RGB-format JPEG; remove white matting in PSD;
32 allocate large structures on the stack;
33 correct channel count for PNG & BMP
34 2.10 (2016-01-22) avoid warning introduced in 2.09
35 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
36
37 See end of file for full revision history.
38
39
40 ============================ Contributors =========================
41
42 Image formats Extensions, features
43 Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info)
44 Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info)
45 Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG)
46 Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks)
47 Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG)
48 Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip)
49 Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD)
50 github:urraka (animated gif) Junggon Kim (PNM comments)
51 Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA)
52 socks-the-fox (16-bit PNG)
53 Jeremy Sawicki (handle all ImageNet JPGs)
54 Optimizations & bugfixes Mikhail Morozov (1-bit BMP)
55 Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query)
56 Arseny Kapoulkine Simon Breuss (16-bit PNM)
57 John-Mark Allen
58 Carmelo J Fdez-Aguera
59
60 Bug & warning fixes
61 Marc LeBlanc David Woo Guillaume George Martins Mozeiko
62 Christpher Lloyd Jerry Jansson Joseph Thomson Blazej Dariusz Roszkowski
63 Phil Jordan Dave Moore Roy Eltham
64 Hayaki Saito Nathan Reed Won Chun
65 Luke Graham Johan Duparc Nick Verigakis the Horde3D community
66 Thomas Ruf Ronny Chevalier github:rlyeh
67 Janez Zemva John Bartholomew Michal Cichon github:romigrou
68 Jonathan Blow Ken Hamada Tero Hanninen github:svdijk
69 Eugene Golushkov Laurent Gomila Cort Stratton github:snagar
70 Aruelien Pocheville Sergio Gonzalez Thibault Reuille github:Zelex
71 Cass Everitt Ryamond Barbiero github:grim210
72 Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw
73 Philipp Wiesemann Dale Weiler Oriol Ferrer Mesia github:phprus
74 Josh Tobin Neil Bickford Matthew Gregan github:poppolopoppo
75 Julian Raschke Gregory Mullen Christian Floisand github:darealshinji
76 Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007
77 Brad Weinberger Matvey Cherevko github:mosra
78 Luca Sas Alexander Veselov Zack Middleton [reserved]
79 Ryan C. Gordon [reserved] [reserved]
80 DO NOT ADD YOUR NAME HERE
81
82 Jacko Dirks
83
84 To add your name to the credits, pick a random blank space in the middle and fill it.
85 80% of merge conflicts on stb PRs are due to people adding their name at the end
86 of the credits.
87*/
88
89#include <stb_image.h>
90
91#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
92 || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
93 || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
94 || defined(STBI_ONLY_ZLIB)
95 #ifndef STBI_ONLY_JPEG
96 #define STBI_NO_JPEG
97 #endif
98 #ifndef STBI_ONLY_PNG
99 #define STBI_NO_PNG
100 #endif
101 #ifndef STBI_ONLY_BMP
102 #define STBI_NO_BMP
103 #endif
104 #ifndef STBI_ONLY_PSD
105 #define STBI_NO_PSD
106 #endif
107 #ifndef STBI_ONLY_TGA
108 #define STBI_NO_TGA
109 #endif
110 #ifndef STBI_ONLY_GIF
111 #define STBI_NO_GIF
112 #endif
113 #ifndef STBI_ONLY_HDR
114 #define STBI_NO_HDR
115 #endif
116 #ifndef STBI_ONLY_PIC
117 #define STBI_NO_PIC
118 #endif
119 #ifndef STBI_ONLY_PNM
120 #define STBI_NO_PNM
121 #endif
122#endif
123
124#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
125#define STBI_NO_ZLIB
126#endif
127
128
129#include <stdarg.h>
130#include <stddef.h> // ptrdiff_t on osx
131#include <stdlib.h>
132#include <string.h>
133#include <limits.h>
134
135#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
136#include <math.h> // ldexp, pow
137#endif
138
139#ifndef STBI_NO_STDIO
140#include <stdio.h>
141#endif
142
143#ifndef STBI_ASSERT
144#include <assert.h>
145#define STBI_ASSERT(x) assert(x)
146#endif
147
148#ifdef __cplusplus
149#define STBI_EXTERN extern "C"
150#else
151#define STBI_EXTERN extern
152#endif
153
154
155#ifndef _MSC_VER
156 #ifdef __cplusplus
157 #define stbi_inline inline
158 #else
159 #define stbi_inline
160 #endif
161#else
162 #define stbi_inline __forceinline
163#endif
164
165#ifndef STBI_NO_THREAD_LOCALS
166 #if defined(__cplusplus) && __cplusplus >= 201103L
167 #define STBI_THREAD_LOCAL thread_local
168 #elif defined(__GNUC__) && __GNUC__ < 5
169 #define STBI_THREAD_LOCAL __thread
170 #elif defined(_MSC_VER)
171 #define STBI_THREAD_LOCAL __declspec(thread)
172 #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
173 #define STBI_THREAD_LOCAL _Thread_local
174 #endif
175
176 #ifndef STBI_THREAD_LOCAL
177 #if defined(__GNUC__)
178 #define STBI_THREAD_LOCAL __thread
179 #endif
180 #endif
181#endif
182
183#if defined(_MSC_VER) || defined(__SYMBIAN32__)
184typedef unsigned short stbi__uint16;
185typedef signed short stbi__int16;
186typedef unsigned int stbi__uint32;
187typedef signed int stbi__int32;
188#else
189#include <stdint.h>
190typedef uint16_t stbi__uint16;
191typedef int16_t stbi__int16;
192typedef uint32_t stbi__uint32;
193typedef int32_t stbi__int32;
194#endif
195
196// should produce compiler error if size is wrong
197typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
198
199#ifdef _MSC_VER
200#define STBI_NOTUSED(v) (void)(v)
201#else
202#define STBI_NOTUSED(v) (void)sizeof(v)
203#endif
204
205#ifdef _MSC_VER
206#define STBI_HAS_LROTL
207#endif
208
209#ifdef STBI_HAS_LROTL
210 #define stbi_lrot(x,y) _lrotl(x,y)
211#else
212 #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31)))
213#endif
214
215#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
216// ok
217#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
218// ok
219#else
220#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
221#endif
222
223#ifndef STBI_MALLOC
224#define STBI_MALLOC(sz) malloc(sz)
225#define STBI_REALLOC(p,newsz) realloc(p,newsz)
226#define STBI_FREE(p) free(p)
227#endif
228
229#ifndef STBI_REALLOC_SIZED
230#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
231#endif
232
233// x86/x64 detection
234#if defined(__x86_64__) || defined(_M_X64)
235#define STBI__X64_TARGET
236#elif defined(__i386) || defined(_M_IX86)
237#define STBI__X86_TARGET
238#endif
239
240#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
241// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
242// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
243// but previous attempts to provide the SSE2 functions with runtime
244// detection caused numerous issues. The way architecture extensions are
245// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
246// New behavior: if compiled with -msse2, we use SSE2 without any
247// detection; if not, we don't use it at all.
248#define STBI_NO_SIMD
249#endif
250
251#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
252// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
253//
254// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
255// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
256// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
257// simultaneously enabling "-mstackrealign".
258//
259// See https://github.com/nothings/stb/issues/81 for more information.
260//
261// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
262// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
263#define STBI_NO_SIMD
264#endif
265
266#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
267#define STBI_SSE2
268#include <emmintrin.h>
269
270#ifdef _MSC_VER
271
272#if _MSC_VER >= 1400 // not VC6
273#include <intrin.h> // __cpuid
274static int stbi__cpuid3(void)
275{
276 int info[4];
277 __cpuid(info,1);
278 return info[3];
279}
280#else
281static int stbi__cpuid3(void)
282{
283 int res;
284 __asm {
285 mov eax,1
286 cpuid
287 mov res,edx
288 }
289 return res;
290}
291#endif
292
293#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
294
295#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
296static int stbi__sse2_available(void)
297{
298 int info3 = stbi__cpuid3();
299 return ((info3 >> 26) & 1) != 0;
300}
301#endif
302
303#else // assume GCC-style if not VC++
304#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
305
306#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
307static int stbi__sse2_available(void)
308{
309 // If we're even attempting to compile this on GCC/Clang, that means
310 // -msse2 is on, which means the compiler is allowed to use SSE2
311 // instructions at will, and so are we.
312 return 1;
313}
314#endif
315
316#endif
317#endif
318
319// ARM NEON
320#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
321#undef STBI_NEON
322#endif
323
324#ifdef STBI_NEON
325#include <arm_neon.h>
326#ifdef _MSC_VER
327#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
328#else
329#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
330#endif
331#endif
332
333#ifndef STBI_SIMD_ALIGN
334#define STBI_SIMD_ALIGN(type, name) type name
335#endif
336
337#ifndef STBI_MAX_DIMENSIONS
338#define STBI_MAX_DIMENSIONS (1 << 24)
339#endif
340
341///////////////////////////////////////////////
342//
343// stbi__context struct and start_xxx functions
344
345// stbi__context structure is our basic context used by all images, so it
346// contains all the IO context, plus some basic image information
347typedef struct
348{
349 stbi__uint32 img_x, img_y;
350 int img_n, img_out_n;
351
352 stbi_io_callbacks io;
353 void *io_user_data;
354
355 int read_from_callbacks;
356 int buflen;
357 stbi_uc buffer_start[128];
358 int callback_already_read;
359
360 stbi_uc *img_buffer, *img_buffer_end;
361 stbi_uc *img_buffer_original, *img_buffer_original_end;
362} stbi__context;
363
364
365static void stbi__refill_buffer(stbi__context *s);
366
367// initialize a memory-decode context
368static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
369{
370 s->io.read = NULL;
371 s->read_from_callbacks = 0;
372 s->callback_already_read = 0;
373 s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
374 s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
375}
376
377// initialize a callback-based context
378static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
379{
380 s->io = *c;
381 s->io_user_data = user;
382 s->buflen = sizeof(s->buffer_start);
383 s->read_from_callbacks = 1;
384 s->callback_already_read = 0;
385 s->img_buffer = s->img_buffer_original = s->buffer_start;
386 stbi__refill_buffer(s);
387 s->img_buffer_original_end = s->img_buffer_end;
388}
389
390#ifndef STBI_NO_STDIO
391
392static int stbi__stdio_read(void *user, char *data, int size)
393{
394 return (int) fread(data,1,size,(FILE*) user);
395}
396
397static void stbi__stdio_skip(void *user, int n)
398{
399 int ch;
400 fseek((FILE*) user, n, SEEK_CUR);
401 ch = fgetc((FILE*) user); /* have to read a byte to reset feof()'s flag */
402 if (ch != EOF) {
403 ungetc(ch, (FILE *) user); /* push byte back onto stream if valid. */
404 }
405}
406
407static int stbi__stdio_eof(void *user)
408{
409 return feof((FILE*) user) || ferror((FILE *) user);
410}
411
412static stbi_io_callbacks stbi__stdio_callbacks =
413{
414 stbi__stdio_read,
415 stbi__stdio_skip,
416 stbi__stdio_eof,
417};
418
419static void stbi__start_file(stbi__context *s, FILE *f)
420{
421 stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
422}
423
424//static void stop_file(stbi__context *s) { }
425
426#endif // !STBI_NO_STDIO
427
428static void stbi__rewind(stbi__context *s)
429{
430 // conceptually rewind SHOULD rewind to the beginning of the stream,
431 // but we just rewind to the beginning of the initial buffer, because
432 // we only use it after doing 'test', which only ever looks at at most 92 bytes
433 s->img_buffer = s->img_buffer_original;
434 s->img_buffer_end = s->img_buffer_original_end;
435}
436
437enum
438{
439 STBI_ORDER_RGB,
440 STBI_ORDER_BGR
441};
442
443typedef struct
444{
445 int bits_per_channel;
446 int num_channels;
447 int channel_order;
448} stbi__result_info;
449
450#ifndef STBI_NO_JPEG
451static int stbi__jpeg_test(stbi__context *s);
452static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
453static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
454#endif
455
456#ifndef STBI_NO_PNG
457static int stbi__png_test(stbi__context *s);
458static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
459static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
460static int stbi__png_is16(stbi__context *s);
461#endif
462
463#ifndef STBI_NO_BMP
464static int stbi__bmp_test(stbi__context *s);
465static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
466static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
467#endif
468
469#ifndef STBI_NO_TGA
470static int stbi__tga_test(stbi__context *s);
471static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
472static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
473#endif
474
475#ifndef STBI_NO_PSD
476static int stbi__psd_test(stbi__context *s);
477static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
478static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
479static int stbi__psd_is16(stbi__context *s);
480#endif
481
482#ifndef STBI_NO_HDR
483static int stbi__hdr_test(stbi__context *s);
484static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
485static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
486#endif
487
488#ifndef STBI_NO_PIC
489static int stbi__pic_test(stbi__context *s);
490static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
491static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
492#endif
493
494#ifndef STBI_NO_GIF
495static int stbi__gif_test(stbi__context *s);
496static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
497static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
498static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
499#endif
500
501#ifndef STBI_NO_PNM
502static int stbi__pnm_test(stbi__context *s);
503static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
504static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
505static int stbi__pnm_is16(stbi__context *s);
506#endif
507
508static
509#ifdef STBI_THREAD_LOCAL
510STBI_THREAD_LOCAL
511#endif
512const char *stbi__g_failure_reason;
513
514STBIDEF const char *stbi_failure_reason(void)
515{
516 return stbi__g_failure_reason;
517}
518
519#ifndef STBI_NO_FAILURE_STRINGS
520static int stbi__err(const char *str)
521{
522 stbi__g_failure_reason = str;
523 return 0;
524}
525#endif
526
527static void *stbi__malloc(size_t size)
528{
529 return STBI_MALLOC(size);
530}
531
532// stb_image uses ints pervasively, including for offset calculations.
533// therefore the largest decoded image size we can support with the
534// current code, even on 64-bit targets, is INT_MAX. this is not a
535// significant limitation for the intended use case.
536//
537// we do, however, need to make sure our size calculations don't
538// overflow. hence a few helper functions for size calculations that
539// multiply integers together, making sure that they're non-negative
540// and no overflow occurs.
541
542// return 1 if the sum is valid, 0 on overflow.
543// negative terms are considered invalid.
544static int stbi__addsizes_valid(int a, int b)
545{
546 if (b < 0) return 0;
547 // now 0 <= b <= INT_MAX, hence also
548 // 0 <= INT_MAX - b <= INTMAX.
549 // And "a + b <= INT_MAX" (which might overflow) is the
550 // same as a <= INT_MAX - b (no overflow)
551 return a <= INT_MAX - b;
552}
553
554// returns 1 if the product is valid, 0 on overflow.
555// negative factors are considered invalid.
556static int stbi__mul2sizes_valid(int a, int b)
557{
558 if (a < 0 || b < 0) return 0;
559 if (b == 0) return 1; // mul-by-0 is always safe
560 // portable way to check for no overflows in a*b
561 return a <= INT_MAX/b;
562}
563
564#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
565// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
566static int stbi__mad2sizes_valid(int a, int b, int add)
567{
568 return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
569}
570#endif
571
572// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
573static int stbi__mad3sizes_valid(int a, int b, int c, int add)
574{
575 return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
576 stbi__addsizes_valid(a*b*c, add);
577}
578
579// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
580#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
581static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
582{
583 return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
584 stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
585}
586#endif
587
588#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
589// mallocs with size overflow checking
590static void *stbi__malloc_mad2(int a, int b, int add)
591{
592 if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
593 return stbi__malloc(a*b + add);
594}
595#endif
596
597static void *stbi__malloc_mad3(int a, int b, int c, int add)
598{
599 if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
600 return stbi__malloc(a*b*c + add);
601}
602
603#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
604static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
605{
606 if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
607 return stbi__malloc(a*b*c*d + add);
608}
609#endif
610
611// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow.
612static int stbi__addints_valid(int a, int b)
613{
614 if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow
615 if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0.
616 return a <= INT_MAX - b;
617}
618
619// returns 1 if the product of two signed shorts is valid, 0 on overflow.
620static int stbi__mul2shorts_valid(short a, short b)
621{
622 if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow
623 if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid
624 if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN
625 return a >= SHRT_MIN / b;
626}
627
628// stbi__err - error
629// stbi__errpf - error returning pointer to float
630// stbi__errpuc - error returning pointer to unsigned char
631
632#ifdef STBI_NO_FAILURE_STRINGS
633 #define stbi__err(x,y) 0
634#elif defined(STBI_FAILURE_USERMSG)
635 #define stbi__err(x,y) stbi__err(y)
636#else
637 #define stbi__err(x,y) stbi__err(x)
638#endif
639
640#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
641#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
642
643STBIDEF void stbi_image_free(void *retval_from_stbi_load)
644{
645 STBI_FREE(retval_from_stbi_load);
646}
647
648#ifndef STBI_NO_LINEAR
649static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
650#endif
651
652#ifndef STBI_NO_HDR
653static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp);
654#endif
655
656static int stbi__vertically_flip_on_load_global = 0;
657
658STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
659{
660 stbi__vertically_flip_on_load_global = flag_true_if_should_flip;
661}
662
663#ifndef STBI_THREAD_LOCAL
664#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global
665#else
666static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set;
667
668STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip)
669{
670 stbi__vertically_flip_on_load_local = flag_true_if_should_flip;
671 stbi__vertically_flip_on_load_set = 1;
672}
673
674#define stbi__vertically_flip_on_load (stbi__vertically_flip_on_load_set \
675 ? stbi__vertically_flip_on_load_local \
676 : stbi__vertically_flip_on_load_global)
677#endif // STBI_THREAD_LOCAL
678
679static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
680{
681 memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
682 ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
683 ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
684 ri->num_channels = 0;
685
686 // test the formats with a very explicit header first (at least a FOURCC
687 // or distinctive magic number first)
688 #ifndef STBI_NO_PNG
689 if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri);
690 #endif
691 #ifndef STBI_NO_BMP
692 if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri);
693 #endif
694 #ifndef STBI_NO_GIF
695 if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri);
696 #endif
697 #ifndef STBI_NO_PSD
698 if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
699 #else
700 STBI_NOTUSED(bpc);
701 #endif
702 #ifndef STBI_NO_PIC
703 if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri);
704 #endif
705
706 // then the formats that can end up attempting to load with just 1 or 2
707 // bytes matching expectations; these are prone to false positives, so
708 // try them later
709 #ifndef STBI_NO_JPEG
710 if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
711 #endif
712 #ifndef STBI_NO_PNM
713 if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri);
714 #endif
715
716 #ifndef STBI_NO_HDR
717 if (stbi__hdr_test(s)) {
718 float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
719 return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
720 }
721 #endif
722
723 #ifndef STBI_NO_TGA
724 // test tga last because it's a crappy test!
725 if (stbi__tga_test(s))
726 return stbi__tga_load(s,x,y,comp,req_comp, ri);
727 #endif
728
729 return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
730}
731
732static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
733{
734 int i;
735 int img_len = w * h * channels;
736 stbi_uc *reduced;
737
738 reduced = (stbi_uc *) stbi__malloc(img_len);
739 if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
740
741 for (i = 0; i < img_len; ++i)
742 reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
743
744 STBI_FREE(orig);
745 return reduced;
746}
747
748static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
749{
750 int i;
751 int img_len = w * h * channels;
752 stbi__uint16 *enlarged;
753
754 enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
755 if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
756
757 for (i = 0; i < img_len; ++i)
758 enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
759
760 STBI_FREE(orig);
761 return enlarged;
762}
763
764static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
765{
766 int row;
767 size_t bytes_per_row = (size_t)w * bytes_per_pixel;
768 stbi_uc temp[2048];
769 stbi_uc *bytes = (stbi_uc *)image;
770
771 for (row = 0; row < (h>>1); row++) {
772 stbi_uc *row0 = bytes + row*bytes_per_row;
773 stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
774 // swap row0 with row1
775 size_t bytes_left = bytes_per_row;
776 while (bytes_left) {
777 size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
778 memcpy(temp, row0, bytes_copy);
779 memcpy(row0, row1, bytes_copy);
780 memcpy(row1, temp, bytes_copy);
781 row0 += bytes_copy;
782 row1 += bytes_copy;
783 bytes_left -= bytes_copy;
784 }
785 }
786}
787
788#ifndef STBI_NO_GIF
789static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
790{
791 int slice;
792 int slice_size = w * h * bytes_per_pixel;
793
794 stbi_uc *bytes = (stbi_uc *)image;
795 for (slice = 0; slice < z; ++slice) {
796 stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
797 bytes += slice_size;
798 }
799}
800#endif
801
802static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
803{
804 stbi__result_info ri;
805 void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
806
807 if (result == NULL)
808 return NULL;
809
810 // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
811 STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
812
813 if (ri.bits_per_channel != 8) {
814 result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
815 ri.bits_per_channel = 8;
816 }
817
818 // @TODO: move stbi__convert_format to here
819
820 if (stbi__vertically_flip_on_load) {
821 int channels = req_comp ? req_comp : *comp;
822 stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
823 }
824
825 return (unsigned char *) result;
826}
827
828static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
829{
830 stbi__result_info ri;
831 void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
832
833 if (result == NULL)
834 return NULL;
835
836 // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
837 STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
838
839 if (ri.bits_per_channel != 16) {
840 result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
841 ri.bits_per_channel = 16;
842 }
843
844 // @TODO: move stbi__convert_format16 to here
845 // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
846
847 if (stbi__vertically_flip_on_load) {
848 int channels = req_comp ? req_comp : *comp;
849 stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
850 }
851
852 return (stbi__uint16 *) result;
853}
854
855#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
856static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
857{
858 if (stbi__vertically_flip_on_load && result != NULL) {
859 int channels = req_comp ? req_comp : *comp;
860 stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
861 }
862}
863#endif
864
865#ifndef STBI_NO_STDIO
866
867#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
868STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
869STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
870#endif
871
872#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
873STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
874{
875 return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
876}
877#endif
878
879static FILE *stbi__fopen(char const *filename, char const *mode)
880{
881 FILE *f;
882#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
883 wchar_t wMode[64];
884 wchar_t wFilename[1024];
885 if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
886 return 0;
887
888 if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
889 return 0;
890
891#if defined(_MSC_VER) && _MSC_VER >= 1400
892 if (0 != _wfopen_s(&f, wFilename, wMode))
893 f = 0;
894#else
895 f = _wfopen(wFilename, wMode);
896#endif
897
898#elif defined(_MSC_VER) && _MSC_VER >= 1400
899 if (0 != fopen_s(&f, filename, mode))
900 f=0;
901#else
902 f = fopen(filename, mode);
903#endif
904 return f;
905}
906
907
908STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
909{
910 FILE *f = stbi__fopen(filename, "rb");
911 unsigned char *result;
912 if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
913 result = stbi_load_from_file(f,x,y,comp,req_comp);
914 fclose(f);
915 return result;
916}
917
918STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
919{
920 unsigned char *result;
921 stbi__context s;
922 stbi__start_file(&s,f);
923 result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
924 if (result) {
925 // need to 'unget' all the characters in the IO buffer
926 fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
927 }
928 return result;
929}
930
931STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
932{
933 stbi__uint16 *result;
934 stbi__context s;
935 stbi__start_file(&s,f);
936 result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
937 if (result) {
938 // need to 'unget' all the characters in the IO buffer
939 fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
940 }
941 return result;
942}
943
944STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
945{
946 FILE *f = stbi__fopen(filename, "rb");
947 stbi__uint16 *result;
948 if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
949 result = stbi_load_from_file_16(f,x,y,comp,req_comp);
950 fclose(f);
951 return result;
952}
953
954
955#endif //!STBI_NO_STDIO
956
957STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
958{
959 stbi__context s;
960 stbi__start_mem(&s,buffer,len);
961 return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
962}
963
964STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
965{
966 stbi__context s;
967 stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
968 return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
969}
970
971STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
972{
973 stbi__context s;
974 stbi__start_mem(&s,buffer,len);
975 return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
976}
977
978STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
979{
980 stbi__context s;
981 stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
982 return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
983}
984
985#ifndef STBI_NO_GIF
986STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
987{
988 unsigned char *result;
989 stbi__context s;
990 stbi__start_mem(&s,buffer,len);
991
992 result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
993 if (stbi__vertically_flip_on_load) {
994 stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
995 }
996
997 return result;
998}
999#endif
1000
1001#ifndef STBI_NO_LINEAR
1002static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1003{
1004 unsigned char *data;
1005 #ifndef STBI_NO_HDR
1006 if (stbi__hdr_test(s)) {
1007 stbi__result_info ri;
1008 float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
1009 if (hdr_data)
1010 stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1011 return hdr_data;
1012 }
1013 #endif
1014 data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1015 if (data)
1016 return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1017 return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1018}
1019
1020STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1021{
1022 stbi__context s;
1023 stbi__start_mem(&s,buffer,len);
1024 return stbi__loadf_main(&s,x,y,comp,req_comp);
1025}
1026
1027STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1028{
1029 stbi__context s;
1030 stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1031 return stbi__loadf_main(&s,x,y,comp,req_comp);
1032}
1033
1034#ifndef STBI_NO_STDIO
1035STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1036{
1037 float *result;
1038 FILE *f = stbi__fopen(filename, "rb");
1039 if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1040 result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1041 fclose(f);
1042 return result;
1043}
1044
1045STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1046{
1047 stbi__context s;
1048 stbi__start_file(&s,f);
1049 return stbi__loadf_main(&s,x,y,comp,req_comp);
1050}
1051#endif // !STBI_NO_STDIO
1052
1053#endif // !STBI_NO_LINEAR
1054
1055// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1056// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1057// reports false!
1058
1059STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1060{
1061 #ifndef STBI_NO_HDR
1062 stbi__context s;
1063 stbi__start_mem(&s,buffer,len);
1064 return stbi__hdr_test(&s);
1065 #else
1066 STBI_NOTUSED(buffer);
1067 STBI_NOTUSED(len);
1068 return 0;
1069 #endif
1070}
1071
1072#ifndef STBI_NO_STDIO
1073STBIDEF int stbi_is_hdr (char const *filename)
1074{
1075 FILE *f = stbi__fopen(filename, "rb");
1076 int result=0;
1077 if (f) {
1078 result = stbi_is_hdr_from_file(f);
1079 fclose(f);
1080 }
1081 return result;
1082}
1083
1084STBIDEF int stbi_is_hdr_from_file(FILE *f)
1085{
1086 #ifndef STBI_NO_HDR
1087 long pos = ftell(f);
1088 int res;
1089 stbi__context s;
1090 stbi__start_file(&s,f);
1091 res = stbi__hdr_test(&s);
1092 fseek(f, pos, SEEK_SET);
1093 return res;
1094 #else
1095 STBI_NOTUSED(f);
1096 return 0;
1097 #endif
1098}
1099#endif // !STBI_NO_STDIO
1100
1101STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1102{
1103 #ifndef STBI_NO_HDR
1104 stbi__context s;
1105 stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1106 return stbi__hdr_test(&s);
1107 #else
1108 STBI_NOTUSED(clbk);
1109 STBI_NOTUSED(user);
1110 return 0;
1111 #endif
1112}
1113
1114#ifndef STBI_NO_LINEAR
1115static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1116
1117STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
1118STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1119#endif
1120
1121static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1122
1123STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
1124STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1125
1126
1127//////////////////////////////////////////////////////////////////////////////
1128//
1129// Common code used by all image loaders
1130//
1131
1132enum
1133{
1134 STBI__SCAN_load=0,
1135 STBI__SCAN_type,
1136 STBI__SCAN_header
1137};
1138
1139static void stbi__refill_buffer(stbi__context *s)
1140{
1141 int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1142 s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original);
1143 if (n == 0) {
1144 // at end of file, treat same as if from memory, but need to handle case
1145 // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1146 s->read_from_callbacks = 0;
1147 s->img_buffer = s->buffer_start;
1148 s->img_buffer_end = s->buffer_start+1;
1149 *s->img_buffer = 0;
1150 } else {
1151 s->img_buffer = s->buffer_start;
1152 s->img_buffer_end = s->buffer_start + n;
1153 }
1154}
1155
1156stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1157{
1158 if (s->img_buffer < s->img_buffer_end)
1159 return *s->img_buffer++;
1160 if (s->read_from_callbacks) {
1161 stbi__refill_buffer(s);
1162 return *s->img_buffer++;
1163 }
1164 return 0;
1165}
1166
1167#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
1168// nothing
1169#else
1170stbi_inline static int stbi__at_eof(stbi__context *s)
1171{
1172 if (s->io.read) {
1173 if (!(s->io.eof)(s->io_user_data)) return 0;
1174 // if feof() is true, check if buffer = end
1175 // special case: we've only got the special 0 character at the end
1176 if (s->read_from_callbacks == 0) return 1;
1177 }
1178
1179 return s->img_buffer >= s->img_buffer_end;
1180}
1181#endif
1182
1183#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC)
1184// nothing
1185#else
1186static void stbi__skip(stbi__context *s, int n)
1187{
1188 if (n == 0) return; // already there!
1189 if (n < 0) {
1190 s->img_buffer = s->img_buffer_end;
1191 return;
1192 }
1193 if (s->io.read) {
1194 int blen = (int) (s->img_buffer_end - s->img_buffer);
1195 if (blen < n) {
1196 s->img_buffer = s->img_buffer_end;
1197 (s->io.skip)(s->io_user_data, n - blen);
1198 return;
1199 }
1200 }
1201 s->img_buffer += n;
1202}
1203#endif
1204
1205#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM)
1206// nothing
1207#else
1208static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1209{
1210 if (s->io.read) {
1211 int blen = (int) (s->img_buffer_end - s->img_buffer);
1212 if (blen < n) {
1213 int res, count;
1214
1215 memcpy(buffer, s->img_buffer, blen);
1216
1217 count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1218 res = (count == (n-blen));
1219 s->img_buffer = s->img_buffer_end;
1220 return res;
1221 }
1222 }
1223
1224 if (s->img_buffer+n <= s->img_buffer_end) {
1225 memcpy(buffer, s->img_buffer, n);
1226 s->img_buffer += n;
1227 return 1;
1228 } else
1229 return 0;
1230}
1231#endif
1232
1233#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
1234// nothing
1235#else
1236static int stbi__get16be(stbi__context *s)
1237{
1238 int z = stbi__get8(s);
1239 return (z << 8) + stbi__get8(s);
1240}
1241#endif
1242
1243#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
1244// nothing
1245#else
1246static stbi__uint32 stbi__get32be(stbi__context *s)
1247{
1248 stbi__uint32 z = stbi__get16be(s);
1249 return (z << 16) + stbi__get16be(s);
1250}
1251#endif
1252
1253#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1254// nothing
1255#else
1256static int stbi__get16le(stbi__context *s)
1257{
1258 int z = stbi__get8(s);
1259 return z + (stbi__get8(s) << 8);
1260}
1261#endif
1262
1263#ifndef STBI_NO_BMP
1264static stbi__uint32 stbi__get32le(stbi__context *s)
1265{
1266 stbi__uint32 z = stbi__get16le(s);
1267 z += (stbi__uint32)stbi__get16le(s) << 16;
1268 return z;
1269}
1270#endif
1271
1272#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings
1273
1274#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
1275// nothing
1276#else
1277//////////////////////////////////////////////////////////////////////////////
1278//
1279// generic converter from built-in img_n to req_comp
1280// individual types do this automatically as much as possible (e.g. jpeg
1281// does all cases internally since it needs to colorspace convert anyway,
1282// and it never has alpha, so very few cases ). png can automatically
1283// interleave an alpha=255 channel, but falls back to this for other cases
1284//
1285// assume data buffer is malloced, so malloc a new one and free that one
1286// only failure mode is malloc failing
1287
1288static stbi_uc stbi__compute_y(int r, int g, int b)
1289{
1290 return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8);
1291}
1292#endif
1293
1294#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
1295// nothing
1296#else
1297static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1298{
1299 int i,j;
1300 unsigned char *good;
1301
1302 if (req_comp == img_n) return data;
1303 STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1304
1305 good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1306 if (good == NULL) {
1307 STBI_FREE(data);
1308 return stbi__errpuc("outofmem", "Out of memory");
1309 }
1310
1311 for (j=0; j < (int) y; ++j) {
1312 unsigned char *src = data + j * x * img_n ;
1313 unsigned char *dest = good + j * x * req_comp;
1314
1315 #define STBI__COMBO(a,b) ((a)*8+(b))
1316 #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1317 // convert source image with img_n components to one with req_comp components;
1318 // avoid switch per pixel, so use switch per scanline and massive macros
1319 switch (STBI__COMBO(img_n, req_comp)) {
1320 STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255; } break;
1321 STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1322 STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255; } break;
1323 STBI__CASE(2,1) { dest[0]=src[0]; } break;
1324 STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1325 STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break;
1326 STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255; } break;
1327 STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
1328 STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255; } break;
1329 STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
1330 STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break;
1331 STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break;
1332 default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion");
1333 }
1334 #undef STBI__CASE
1335 }
1336
1337 STBI_FREE(data);
1338 return good;
1339}
1340#endif
1341
1342#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
1343// nothing
1344#else
1345static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1346{
1347 return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8);
1348}
1349#endif
1350
1351#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
1352// nothing
1353#else
1354static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1355{
1356 int i,j;
1357 stbi__uint16 *good;
1358
1359 if (req_comp == img_n) return data;
1360 STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1361
1362 good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1363 if (good == NULL) {
1364 STBI_FREE(data);
1365 return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1366 }
1367
1368 for (j=0; j < (int) y; ++j) {
1369 stbi__uint16 *src = data + j * x * img_n ;
1370 stbi__uint16 *dest = good + j * x * req_comp;
1371
1372 #define STBI__COMBO(a,b) ((a)*8+(b))
1373 #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1374 // convert source image with img_n components to one with req_comp components;
1375 // avoid switch per pixel, so use switch per scanline and massive macros
1376 switch (STBI__COMBO(img_n, req_comp)) {
1377 STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff; } break;
1378 STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1379 STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff; } break;
1380 STBI__CASE(2,1) { dest[0]=src[0]; } break;
1381 STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1382 STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break;
1383 STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff; } break;
1384 STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
1385 STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break;
1386 STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
1387 STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break;
1388 STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break;
1389 default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion");
1390 }
1391 #undef STBI__CASE
1392 }
1393
1394 STBI_FREE(data);
1395 return good;
1396}
1397#endif
1398
1399#ifndef STBI_NO_LINEAR
1400static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1401{
1402 int i,k,n;
1403 float *output;
1404 if (!data) return NULL;
1405 output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1406 if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1407 // compute number of non-alpha components
1408 if (comp & 1) n = comp; else n = comp-1;
1409 for (i=0; i < x*y; ++i) {
1410 for (k=0; k < n; ++k) {
1411 output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1412 }
1413 }
1414 if (n < comp) {
1415 for (i=0; i < x*y; ++i) {
1416 output[i*comp + n] = data[i*comp + n]/255.0f;
1417 }
1418 }
1419 STBI_FREE(data);
1420 return output;
1421}
1422#endif
1423
1424#ifndef STBI_NO_HDR
1425#define stbi__float2int(x) ((int) (x))
1426static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
1427{
1428 int i,k,n;
1429 stbi_uc *output;
1430 if (!data) return NULL;
1431 output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1432 if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1433 // compute number of non-alpha components
1434 if (comp & 1) n = comp; else n = comp-1;
1435 for (i=0; i < x*y; ++i) {
1436 for (k=0; k < n; ++k) {
1437 float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1438 if (z < 0) z = 0;
1439 if (z > 255) z = 255;
1440 output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1441 }
1442 if (k < comp) {
1443 float z = data[i*comp+k] * 255 + 0.5f;
1444 if (z < 0) z = 0;
1445 if (z > 255) z = 255;
1446 output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1447 }
1448 }
1449 STBI_FREE(data);
1450 return output;
1451}
1452#endif
1453
1454//////////////////////////////////////////////////////////////////////////////
1455//
1456// "baseline" JPEG/JFIF decoder
1457//
1458// simple implementation
1459// - doesn't support delayed output of y-dimension
1460// - simple interface (only one output format: 8-bit interleaved RGB)
1461// - doesn't try to recover corrupt jpegs
1462// - doesn't allow partial loading, loading multiple at once
1463// - still fast on x86 (copying globals into locals doesn't help x86)
1464// - allocates lots of intermediate memory (full size of all components)
1465// - non-interleaved case requires this anyway
1466// - allows good upsampling (see next)
1467// high-quality
1468// - upsampled channels are bilinearly interpolated, even across blocks
1469// - quality integer IDCT derived from IJG's 'slow'
1470// performance
1471// - fast huffman; reasonable integer IDCT
1472// - some SIMD kernels for common paths on targets with SSE2/NEON
1473// - uses a lot of intermediate memory, could cache poorly
1474
1475#ifndef STBI_NO_JPEG
1476
1477// huffman decoding acceleration
1478#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
1479
1480typedef struct
1481{
1482 stbi_uc fast[1 << FAST_BITS];
1483 // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1484 stbi__uint16 code[256];
1485 stbi_uc values[256];
1486 stbi_uc size[257];
1487 unsigned int maxcode[18];
1488 int delta[17]; // old 'firstsymbol' - old 'firstcode'
1489} stbi__huffman;
1490
1491typedef struct
1492{
1493 stbi__context *s;
1494 stbi__huffman huff_dc[4];
1495 stbi__huffman huff_ac[4];
1496 stbi__uint16 dequant[4][64];
1497 stbi__int16 fast_ac[4][1 << FAST_BITS];
1498
1499// sizes for components, interleaved MCUs
1500 int img_h_max, img_v_max;
1501 int img_mcu_x, img_mcu_y;
1502 int img_mcu_w, img_mcu_h;
1503
1504// definition of jpeg image component
1505 struct
1506 {
1507 int id;
1508 int h,v;
1509 int tq;
1510 int hd,ha;
1511 int dc_pred;
1512
1513 int x,y,w2,h2;
1514 stbi_uc *data;
1515 void *raw_data, *raw_coeff;
1516 stbi_uc *linebuf;
1517 short *coeff; // progressive only
1518 int coeff_w, coeff_h; // number of 8x8 coefficient blocks
1519 } img_comp[4];
1520
1521 stbi__uint32 code_buffer; // jpeg entropy-coded buffer
1522 int code_bits; // number of valid bits
1523 unsigned char marker; // marker seen while filling entropy buffer
1524 int nomore; // flag if we saw a marker so must stop
1525
1526 int progressive;
1527 int spec_start;
1528 int spec_end;
1529 int succ_high;
1530 int succ_low;
1531 int eob_run;
1532 int jfif;
1533 int app14_color_transform; // Adobe APP14 tag
1534 int rgb;
1535
1536 int scan_n, order[4];
1537 int restart_interval, todo;
1538
1539// kernels
1540 void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1541 void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1542 stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1543} stbi__jpeg;
1544
1545static int stbi__build_huffman(stbi__huffman *h, int *count)
1546{
1547 int i,j,k=0;
1548 unsigned int code;
1549 // build size list for each symbol (from JPEG spec)
1550 for (i=0; i < 16; ++i) {
1551 for (j=0; j < count[i]; ++j) {
1552 h->size[k++] = (stbi_uc) (i+1);
1553 if(k >= 257) return stbi__err("bad size list","Corrupt JPEG");
1554 }
1555 }
1556 h->size[k] = 0;
1557
1558 // compute actual symbols (from jpeg spec)
1559 code = 0;
1560 k = 0;
1561 for(j=1; j <= 16; ++j) {
1562 // compute delta to add to code to compute symbol id
1563 h->delta[j] = k - code;
1564 if (h->size[k] == j) {
1565 while (h->size[k] == j)
1566 h->code[k++] = (stbi__uint16) (code++);
1567 if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
1568 }
1569 // compute largest code + 1 for this size, preshifted as needed later
1570 h->maxcode[j] = code << (16-j);
1571 code <<= 1;
1572 }
1573 h->maxcode[j] = 0xffffffff;
1574
1575 // build non-spec acceleration table; 255 is flag for not-accelerated
1576 memset(h->fast, 255, 1 << FAST_BITS);
1577 for (i=0; i < k; ++i) {
1578 int s = h->size[i];
1579 if (s <= FAST_BITS) {
1580 int c = h->code[i] << (FAST_BITS-s);
1581 int m = 1 << (FAST_BITS-s);
1582 for (j=0; j < m; ++j) {
1583 h->fast[c+j] = (stbi_uc) i;
1584 }
1585 }
1586 }
1587 return 1;
1588}
1589
1590// build a table that decodes both magnitude and value of small ACs in
1591// one go.
1592static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1593{
1594 int i;
1595 for (i=0; i < (1 << FAST_BITS); ++i) {
1596 stbi_uc fast = h->fast[i];
1597 fast_ac[i] = 0;
1598 if (fast < 255) {
1599 int rs = h->values[fast];
1600 int run = (rs >> 4) & 15;
1601 int magbits = rs & 15;
1602 int len = h->size[fast];
1603
1604 if (magbits && len + magbits <= FAST_BITS) {
1605 // magnitude code followed by receive_extend code
1606 int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1607 int m = 1 << (magbits - 1);
1608 if (k < m) k += (~0U << magbits) + 1;
1609 // if the result is small enough, we can fit it in fast_ac table
1610 if (k >= -128 && k <= 127)
1611 fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
1612 }
1613 }
1614 }
1615}
1616
1617static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1618{
1619 do {
1620 unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
1621 if (b == 0xff) {
1622 int c = stbi__get8(j->s);
1623 while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
1624 if (c != 0) {
1625 j->marker = (unsigned char) c;
1626 j->nomore = 1;
1627 return;
1628 }
1629 }
1630 j->code_buffer |= b << (24 - j->code_bits);
1631 j->code_bits += 8;
1632 } while (j->code_bits <= 24);
1633}
1634
1635// (1 << n) - 1
1636static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1637
1638// decode a jpeg huffman value from the bitstream
1639stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1640{
1641 unsigned int temp;
1642 int c,k;
1643
1644 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1645
1646 // look at the top FAST_BITS and determine what symbol ID it is,
1647 // if the code is <= FAST_BITS
1648 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1649 k = h->fast[c];
1650 if (k < 255) {
1651 int s = h->size[k];
1652 if (s > j->code_bits)
1653 return -1;
1654 j->code_buffer <<= s;
1655 j->code_bits -= s;
1656 return h->values[k];
1657 }
1658
1659 // naive test is to shift the code_buffer down so k bits are
1660 // valid, then test against maxcode. To speed this up, we've
1661 // preshifted maxcode left so that it has (16-k) 0s at the
1662 // end; in other words, regardless of the number of bits, it
1663 // wants to be compared against something shifted to have 16;
1664 // that way we don't need to shift inside the loop.
1665 temp = j->code_buffer >> 16;
1666 for (k=FAST_BITS+1 ; ; ++k)
1667 if (temp < h->maxcode[k])
1668 break;
1669 if (k == 17) {
1670 // error! code not found
1671 j->code_bits -= 16;
1672 return -1;
1673 }
1674
1675 if (k > j->code_bits)
1676 return -1;
1677
1678 // convert the huffman code to the symbol id
1679 c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1680 if(c < 0 || c >= 256) // symbol id out of bounds!
1681 return -1;
1682 STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1683
1684 // convert the id to a symbol
1685 j->code_bits -= k;
1686 j->code_buffer <<= k;
1687 return h->values[c];
1688}
1689
1690// bias[n] = (-1<<n) + 1
1691static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
1692
1693// combined JPEG 'receive' and JPEG 'extend', since baseline
1694// always extends everything it receives.
1695stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1696{
1697 unsigned int k;
1698 int sgn;
1699 if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1700 if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
1701
1702 sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
1703 k = stbi_lrot(j->code_buffer, n);
1704 j->code_buffer = k & ~stbi__bmask[n];
1705 k &= stbi__bmask[n];
1706 j->code_bits -= n;
1707 return k + (stbi__jbias[n] & (sgn - 1));
1708}
1709
1710// get some unsigned bits
1711stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1712{
1713 unsigned int k;
1714 if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1715 if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
1716 k = stbi_lrot(j->code_buffer, n);
1717 j->code_buffer = k & ~stbi__bmask[n];
1718 k &= stbi__bmask[n];
1719 j->code_bits -= n;
1720 return k;
1721}
1722
1723stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1724{
1725 unsigned int k;
1726 if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1727 if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing
1728 k = j->code_buffer;
1729 j->code_buffer <<= 1;
1730 --j->code_bits;
1731 return k & 0x80000000;
1732}
1733
1734// given a value that's at position X in the zigzag stream,
1735// where does it appear in the 8x8 matrix coded as row-major?
1736static const stbi_uc stbi__jpeg_dezigzag[64+15] =
1737{
1738 0, 1, 8, 16, 9, 2, 3, 10,
1739 17, 24, 32, 25, 18, 11, 4, 5,
1740 12, 19, 26, 33, 40, 48, 41, 34,
1741 27, 20, 13, 6, 7, 14, 21, 28,
1742 35, 42, 49, 56, 57, 50, 43, 36,
1743 29, 22, 15, 23, 30, 37, 44, 51,
1744 58, 59, 52, 45, 38, 31, 39, 46,
1745 53, 60, 61, 54, 47, 55, 62, 63,
1746 // let corrupt input sample past end
1747 63, 63, 63, 63, 63, 63, 63, 63,
1748 63, 63, 63, 63, 63, 63, 63
1749};
1750
1751// decode one 64-entry block--
1752static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
1753{
1754 int diff,dc,k;
1755 int t;
1756
1757 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1758 t = stbi__jpeg_huff_decode(j, hdc);
1759 if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG");
1760
1761 // 0 all the ac values now so we can do it 32-bits at a time
1762 memset(data,0,64*sizeof(data[0]));
1763
1764 diff = t ? stbi__extend_receive(j, t) : 0;
1765 if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG");
1766 dc = j->img_comp[b].dc_pred + diff;
1767 j->img_comp[b].dc_pred = dc;
1768 if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1769 data[0] = (short) (dc * dequant[0]);
1770
1771 // decode AC components, see JPEG spec
1772 k = 1;
1773 do {
1774 unsigned int zig;
1775 int c,r,s;
1776 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1777 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1778 r = fac[c];
1779 if (r) { // fast-AC path
1780 k += (r >> 4) & 15; // run
1781 s = r & 15; // combined length
1782 if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
1783 j->code_buffer <<= s;
1784 j->code_bits -= s;
1785 // decode into unzigzag'd location
1786 zig = stbi__jpeg_dezigzag[k++];
1787 data[zig] = (short) ((r >> 8) * dequant[zig]);
1788 } else {
1789 int rs = stbi__jpeg_huff_decode(j, hac);
1790 if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1791 s = rs & 15;
1792 r = rs >> 4;
1793 if (s == 0) {
1794 if (rs != 0xf0) break; // end block
1795 k += 16;
1796 } else {
1797 k += r;
1798 // decode into unzigzag'd location
1799 zig = stbi__jpeg_dezigzag[k++];
1800 data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
1801 }
1802 }
1803 } while (k < 64);
1804 return 1;
1805}
1806
1807static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
1808{
1809 int diff,dc;
1810 int t;
1811 if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1812
1813 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1814
1815 if (j->succ_high == 0) {
1816 // first scan for DC coefficient, must be first
1817 memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
1818 t = stbi__jpeg_huff_decode(j, hdc);
1819 if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1820 diff = t ? stbi__extend_receive(j, t) : 0;
1821
1822 if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG");
1823 dc = j->img_comp[b].dc_pred + diff;
1824 j->img_comp[b].dc_pred = dc;
1825 if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1826 data[0] = (short) (dc * (1 << j->succ_low));
1827 } else {
1828 // refinement scan for DC coefficient
1829 if (stbi__jpeg_get_bit(j))
1830 data[0] += (short) (1 << j->succ_low);
1831 }
1832 return 1;
1833}
1834
1835// @OPTIMIZE: store non-zigzagged during the decode passes,
1836// and only de-zigzag when dequantizing
1837static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
1838{
1839 int k;
1840 if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1841
1842 if (j->succ_high == 0) {
1843 int shift = j->succ_low;
1844
1845 if (j->eob_run) {
1846 --j->eob_run;
1847 return 1;
1848 }
1849
1850 k = j->spec_start;
1851 do {
1852 unsigned int zig;
1853 int c,r,s;
1854 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1855 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1856 r = fac[c];
1857 if (r) { // fast-AC path
1858 k += (r >> 4) & 15; // run
1859 s = r & 15; // combined length
1860 if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
1861 j->code_buffer <<= s;
1862 j->code_bits -= s;
1863 zig = stbi__jpeg_dezigzag[k++];
1864 data[zig] = (short) ((r >> 8) * (1 << shift));
1865 } else {
1866 int rs = stbi__jpeg_huff_decode(j, hac);
1867 if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1868 s = rs & 15;
1869 r = rs >> 4;
1870 if (s == 0) {
1871 if (r < 15) {
1872 j->eob_run = (1 << r);
1873 if (r)
1874 j->eob_run += stbi__jpeg_get_bits(j, r);
1875 --j->eob_run;
1876 break;
1877 }
1878 k += 16;
1879 } else {
1880 k += r;
1881 zig = stbi__jpeg_dezigzag[k++];
1882 data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift));
1883 }
1884 }
1885 } while (k <= j->spec_end);
1886 } else {
1887 // refinement scan for these AC coefficients
1888
1889 short bit = (short) (1 << j->succ_low);
1890
1891 if (j->eob_run) {
1892 --j->eob_run;
1893 for (k = j->spec_start; k <= j->spec_end; ++k) {
1894 short *p = &data[stbi__jpeg_dezigzag[k]];
1895 if (*p != 0)
1896 if (stbi__jpeg_get_bit(j))
1897 if ((*p & bit)==0) {
1898 if (*p > 0)
1899 *p += bit;
1900 else
1901 *p -= bit;
1902 }
1903 }
1904 } else {
1905 k = j->spec_start;
1906 do {
1907 int r,s;
1908 int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
1909 if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1910 s = rs & 15;
1911 r = rs >> 4;
1912 if (s == 0) {
1913 if (r < 15) {
1914 j->eob_run = (1 << r) - 1;
1915 if (r)
1916 j->eob_run += stbi__jpeg_get_bits(j, r);
1917 r = 64; // force end of block
1918 } else {
1919 // r=15 s=0 should write 16 0s, so we just do
1920 // a run of 15 0s and then write s (which is 0),
1921 // so we don't have to do anything special here
1922 }
1923 } else {
1924 if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
1925 // sign bit
1926 if (stbi__jpeg_get_bit(j))
1927 s = bit;
1928 else
1929 s = -bit;
1930 }
1931
1932 // advance by r
1933 while (k <= j->spec_end) {
1934 short *p = &data[stbi__jpeg_dezigzag[k++]];
1935 if (*p != 0) {
1936 if (stbi__jpeg_get_bit(j))
1937 if ((*p & bit)==0) {
1938 if (*p > 0)
1939 *p += bit;
1940 else
1941 *p -= bit;
1942 }
1943 } else {
1944 if (r == 0) {
1945 *p = (short) s;
1946 break;
1947 }
1948 --r;
1949 }
1950 }
1951 } while (k <= j->spec_end);
1952 }
1953 }
1954 return 1;
1955}
1956
1957// take a -128..127 value and stbi__clamp it and convert to 0..255
1958stbi_inline static stbi_uc stbi__clamp(int x)
1959{
1960 // trick to use a single test to catch both cases
1961 if ((unsigned int) x > 255) {
1962 if (x < 0) return 0;
1963 if (x > 255) return 255;
1964 }
1965 return (stbi_uc) x;
1966}
1967
1968#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5)))
1969#define stbi__fsh(x) ((x) * 4096)
1970
1971// derived from jidctint -- DCT_ISLOW
1972#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
1973 int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
1974 p2 = s2; \
1975 p3 = s6; \
1976 p1 = (p2+p3) * stbi__f2f(0.5411961f); \
1977 t2 = p1 + p3*stbi__f2f(-1.847759065f); \
1978 t3 = p1 + p2*stbi__f2f( 0.765366865f); \
1979 p2 = s0; \
1980 p3 = s4; \
1981 t0 = stbi__fsh(p2+p3); \
1982 t1 = stbi__fsh(p2-p3); \
1983 x0 = t0+t3; \
1984 x3 = t0-t3; \
1985 x1 = t1+t2; \
1986 x2 = t1-t2; \
1987 t0 = s7; \
1988 t1 = s5; \
1989 t2 = s3; \
1990 t3 = s1; \
1991 p3 = t0+t2; \
1992 p4 = t1+t3; \
1993 p1 = t0+t3; \
1994 p2 = t1+t2; \
1995 p5 = (p3+p4)*stbi__f2f( 1.175875602f); \
1996 t0 = t0*stbi__f2f( 0.298631336f); \
1997 t1 = t1*stbi__f2f( 2.053119869f); \
1998 t2 = t2*stbi__f2f( 3.072711026f); \
1999 t3 = t3*stbi__f2f( 1.501321110f); \
2000 p1 = p5 + p1*stbi__f2f(-0.899976223f); \
2001 p2 = p5 + p2*stbi__f2f(-2.562915447f); \
2002 p3 = p3*stbi__f2f(-1.961570560f); \
2003 p4 = p4*stbi__f2f(-0.390180644f); \
2004 t3 += p1+p4; \
2005 t2 += p2+p3; \
2006 t1 += p2+p4; \
2007 t0 += p1+p3;
2008
2009static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2010{
2011 int i,val[64],*v=val;
2012 stbi_uc *o;
2013 short *d = data;
2014
2015 // columns
2016 for (i=0; i < 8; ++i,++d, ++v) {
2017 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2018 if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2019 && d[40]==0 && d[48]==0 && d[56]==0) {
2020 // no shortcut 0 seconds
2021 // (1|2|3|4|5|6|7)==0 0 seconds
2022 // all separate -0.047 seconds
2023 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
2024 int dcterm = d[0]*4;
2025 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2026 } else {
2027 STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
2028 // constants scaled things up by 1<<12; let's bring them back
2029 // down, but keep 2 extra bits of precision
2030 x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2031 v[ 0] = (x0+t3) >> 10;
2032 v[56] = (x0-t3) >> 10;
2033 v[ 8] = (x1+t2) >> 10;
2034 v[48] = (x1-t2) >> 10;
2035 v[16] = (x2+t1) >> 10;
2036 v[40] = (x2-t1) >> 10;
2037 v[24] = (x3+t0) >> 10;
2038 v[32] = (x3-t0) >> 10;
2039 }
2040 }
2041
2042 for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
2043 // no fast case since the first 1D IDCT spread components out
2044 STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
2045 // constants scaled things up by 1<<12, plus we had 1<<2 from first
2046 // loop, plus horizontal and vertical each scale by sqrt(8) so together
2047 // we've got an extra 1<<3, so 1<<17 total we need to remove.
2048 // so we want to round that, which means adding 0.5 * 1<<17,
2049 // aka 65536. Also, we'll end up with -128 to 127 that we want
2050 // to encode as 0..255 by adding 128, so we'll add that before the shift
2051 x0 += 65536 + (128<<17);
2052 x1 += 65536 + (128<<17);
2053 x2 += 65536 + (128<<17);
2054 x3 += 65536 + (128<<17);
2055 // tried computing the shifts into temps, or'ing the temps to see
2056 // if any were out of range, but that was slower
2057 o[0] = stbi__clamp((x0+t3) >> 17);
2058 o[7] = stbi__clamp((x0-t3) >> 17);
2059 o[1] = stbi__clamp((x1+t2) >> 17);
2060 o[6] = stbi__clamp((x1-t2) >> 17);
2061 o[2] = stbi__clamp((x2+t1) >> 17);
2062 o[5] = stbi__clamp((x2-t1) >> 17);
2063 o[3] = stbi__clamp((x3+t0) >> 17);
2064 o[4] = stbi__clamp((x3-t0) >> 17);
2065 }
2066}
2067
2068#ifdef STBI_SSE2
2069// sse2 integer IDCT. not the fastest possible implementation but it
2070// produces bit-identical results to the generic C version so it's
2071// fully "transparent".
2072static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2073{
2074 // This is constructed to match our regular (generic) integer IDCT exactly.
2075 __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2076 __m128i tmp;
2077
2078 // dot product constant: even elems=x, odd elems=y
2079 #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2080
2081 // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
2082 // out(1) = c1[even]*x + c1[odd]*y
2083 #define dct_rot(out0,out1, x,y,c0,c1) \
2084 __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2085 __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2086 __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2087 __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2088 __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2089 __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2090
2091 // out = in << 12 (in 16-bit, out 32-bit)
2092 #define dct_widen(out, in) \
2093 __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2094 __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2095
2096 // wide add
2097 #define dct_wadd(out, a, b) \
2098 __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2099 __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2100
2101 // wide sub
2102 #define dct_wsub(out, a, b) \
2103 __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2104 __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2105
2106 // butterfly a/b, add bias, then shift by "s" and pack
2107 #define dct_bfly32o(out0, out1, a,b,bias,s) \
2108 { \
2109 __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2110 __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2111 dct_wadd(sum, abiased, b); \
2112 dct_wsub(dif, abiased, b); \
2113 out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2114 out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2115 }
2116
2117 // 8-bit interleave step (for transposes)
2118 #define dct_interleave8(a, b) \
2119 tmp = a; \
2120 a = _mm_unpacklo_epi8(a, b); \
2121 b = _mm_unpackhi_epi8(tmp, b)
2122
2123 // 16-bit interleave step (for transposes)
2124 #define dct_interleave16(a, b) \
2125 tmp = a; \
2126 a = _mm_unpacklo_epi16(a, b); \
2127 b = _mm_unpackhi_epi16(tmp, b)
2128
2129 #define dct_pass(bias,shift) \
2130 { \
2131 /* even part */ \
2132 dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2133 __m128i sum04 = _mm_add_epi16(row0, row4); \
2134 __m128i dif04 = _mm_sub_epi16(row0, row4); \
2135 dct_widen(t0e, sum04); \
2136 dct_widen(t1e, dif04); \
2137 dct_wadd(x0, t0e, t3e); \
2138 dct_wsub(x3, t0e, t3e); \
2139 dct_wadd(x1, t1e, t2e); \
2140 dct_wsub(x2, t1e, t2e); \
2141 /* odd part */ \
2142 dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2143 dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2144 __m128i sum17 = _mm_add_epi16(row1, row7); \
2145 __m128i sum35 = _mm_add_epi16(row3, row5); \
2146 dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2147 dct_wadd(x4, y0o, y4o); \
2148 dct_wadd(x5, y1o, y5o); \
2149 dct_wadd(x6, y2o, y5o); \
2150 dct_wadd(x7, y3o, y4o); \
2151 dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2152 dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2153 dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2154 dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2155 }
2156
2157 __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2158 __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
2159 __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2160 __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2161 __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
2162 __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
2163 __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
2164 __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
2165
2166 // rounding biases in column/row passes, see stbi__idct_block for explanation.
2167 __m128i bias_0 = _mm_set1_epi32(512);
2168 __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2169
2170 // load
2171 row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2172 row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2173 row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2174 row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2175 row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2176 row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2177 row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2178 row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2179
2180 // column pass
2181 dct_pass(bias_0, 10);
2182
2183 {
2184 // 16bit 8x8 transpose pass 1
2185 dct_interleave16(row0, row4);
2186 dct_interleave16(row1, row5);
2187 dct_interleave16(row2, row6);
2188 dct_interleave16(row3, row7);
2189
2190 // transpose pass 2
2191 dct_interleave16(row0, row2);
2192 dct_interleave16(row1, row3);
2193 dct_interleave16(row4, row6);
2194 dct_interleave16(row5, row7);
2195
2196 // transpose pass 3
2197 dct_interleave16(row0, row1);
2198 dct_interleave16(row2, row3);
2199 dct_interleave16(row4, row5);
2200 dct_interleave16(row6, row7);
2201 }
2202
2203 // row pass
2204 dct_pass(bias_1, 17);
2205
2206 {
2207 // pack
2208 __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2209 __m128i p1 = _mm_packus_epi16(row2, row3);
2210 __m128i p2 = _mm_packus_epi16(row4, row5);
2211 __m128i p3 = _mm_packus_epi16(row6, row7);
2212
2213 // 8bit 8x8 transpose pass 1
2214 dct_interleave8(p0, p2); // a0e0a1e1...
2215 dct_interleave8(p1, p3); // c0g0c1g1...
2216
2217 // transpose pass 2
2218 dct_interleave8(p0, p1); // a0c0e0g0...
2219 dct_interleave8(p2, p3); // b0d0f0h0...
2220
2221 // transpose pass 3
2222 dct_interleave8(p0, p2); // a0b0c0d0...
2223 dct_interleave8(p1, p3); // a4b4c4d4...
2224
2225 // store
2226 _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2227 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2228 _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2229 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2230 _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2231 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2232 _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2233 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2234 }
2235
2236#undef dct_const
2237#undef dct_rot
2238#undef dct_widen
2239#undef dct_wadd
2240#undef dct_wsub
2241#undef dct_bfly32o
2242#undef dct_interleave8
2243#undef dct_interleave16
2244#undef dct_pass
2245}
2246
2247#endif // STBI_SSE2
2248
2249#ifdef STBI_NEON
2250
2251// NEON integer IDCT. should produce bit-identical
2252// results to the generic C version.
2253static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2254{
2255 int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2256
2257 int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2258 int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2259 int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2260 int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2261 int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2262 int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2263 int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2264 int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2265 int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2266 int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2267 int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2268 int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2269
2270#define dct_long_mul(out, inq, coeff) \
2271 int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2272 int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2273
2274#define dct_long_mac(out, acc, inq, coeff) \
2275 int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2276 int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2277
2278#define dct_widen(out, inq) \
2279 int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2280 int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2281
2282// wide add
2283#define dct_wadd(out, a, b) \
2284 int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2285 int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2286
2287// wide sub
2288#define dct_wsub(out, a, b) \
2289 int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2290 int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2291
2292// butterfly a/b, then shift using "shiftop" by "s" and pack
2293#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2294 { \
2295 dct_wadd(sum, a, b); \
2296 dct_wsub(dif, a, b); \
2297 out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2298 out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2299 }
2300
2301#define dct_pass(shiftop, shift) \
2302 { \
2303 /* even part */ \
2304 int16x8_t sum26 = vaddq_s16(row2, row6); \
2305 dct_long_mul(p1e, sum26, rot0_0); \
2306 dct_long_mac(t2e, p1e, row6, rot0_1); \
2307 dct_long_mac(t3e, p1e, row2, rot0_2); \
2308 int16x8_t sum04 = vaddq_s16(row0, row4); \
2309 int16x8_t dif04 = vsubq_s16(row0, row4); \
2310 dct_widen(t0e, sum04); \
2311 dct_widen(t1e, dif04); \
2312 dct_wadd(x0, t0e, t3e); \
2313 dct_wsub(x3, t0e, t3e); \
2314 dct_wadd(x1, t1e, t2e); \
2315 dct_wsub(x2, t1e, t2e); \
2316 /* odd part */ \
2317 int16x8_t sum15 = vaddq_s16(row1, row5); \
2318 int16x8_t sum17 = vaddq_s16(row1, row7); \
2319 int16x8_t sum35 = vaddq_s16(row3, row5); \
2320 int16x8_t sum37 = vaddq_s16(row3, row7); \
2321 int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2322 dct_long_mul(p5o, sumodd, rot1_0); \
2323 dct_long_mac(p1o, p5o, sum17, rot1_1); \
2324 dct_long_mac(p2o, p5o, sum35, rot1_2); \
2325 dct_long_mul(p3o, sum37, rot2_0); \
2326 dct_long_mul(p4o, sum15, rot2_1); \
2327 dct_wadd(sump13o, p1o, p3o); \
2328 dct_wadd(sump24o, p2o, p4o); \
2329 dct_wadd(sump23o, p2o, p3o); \
2330 dct_wadd(sump14o, p1o, p4o); \
2331 dct_long_mac(x4, sump13o, row7, rot3_0); \
2332 dct_long_mac(x5, sump24o, row5, rot3_1); \
2333 dct_long_mac(x6, sump23o, row3, rot3_2); \
2334 dct_long_mac(x7, sump14o, row1, rot3_3); \
2335 dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2336 dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2337 dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2338 dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2339 }
2340
2341 // load
2342 row0 = vld1q_s16(data + 0*8);
2343 row1 = vld1q_s16(data + 1*8);
2344 row2 = vld1q_s16(data + 2*8);
2345 row3 = vld1q_s16(data + 3*8);
2346 row4 = vld1q_s16(data + 4*8);
2347 row5 = vld1q_s16(data + 5*8);
2348 row6 = vld1q_s16(data + 6*8);
2349 row7 = vld1q_s16(data + 7*8);
2350
2351 // add DC bias
2352 row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2353
2354 // column pass
2355 dct_pass(vrshrn_n_s32, 10);
2356
2357 // 16bit 8x8 transpose
2358 {
2359// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2360// whether compilers actually get this is another story, sadly.
2361#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2362#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2363#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2364
2365 // pass 1
2366 dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2367 dct_trn16(row2, row3);
2368 dct_trn16(row4, row5);
2369 dct_trn16(row6, row7);
2370
2371 // pass 2
2372 dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2373 dct_trn32(row1, row3);
2374 dct_trn32(row4, row6);
2375 dct_trn32(row5, row7);
2376
2377 // pass 3
2378 dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2379 dct_trn64(row1, row5);
2380 dct_trn64(row2, row6);
2381 dct_trn64(row3, row7);
2382
2383#undef dct_trn16
2384#undef dct_trn32
2385#undef dct_trn64
2386 }
2387
2388 // row pass
2389 // vrshrn_n_s32 only supports shifts up to 16, we need
2390 // 17. so do a non-rounding shift of 16 first then follow
2391 // up with a rounding shift by 1.
2392 dct_pass(vshrn_n_s32, 16);
2393
2394 {
2395 // pack and round
2396 uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2397 uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2398 uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2399 uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2400 uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2401 uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2402 uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2403 uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2404
2405 // again, these can translate into one instruction, but often don't.
2406#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2407#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2408#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2409
2410 // sadly can't use interleaved stores here since we only write
2411 // 8 bytes to each scan line!
2412
2413 // 8x8 8-bit transpose pass 1
2414 dct_trn8_8(p0, p1);
2415 dct_trn8_8(p2, p3);
2416 dct_trn8_8(p4, p5);
2417 dct_trn8_8(p6, p7);
2418
2419 // pass 2
2420 dct_trn8_16(p0, p2);
2421 dct_trn8_16(p1, p3);
2422 dct_trn8_16(p4, p6);
2423 dct_trn8_16(p5, p7);
2424
2425 // pass 3
2426 dct_trn8_32(p0, p4);
2427 dct_trn8_32(p1, p5);
2428 dct_trn8_32(p2, p6);
2429 dct_trn8_32(p3, p7);
2430
2431 // store
2432 vst1_u8(out, p0); out += out_stride;
2433 vst1_u8(out, p1); out += out_stride;
2434 vst1_u8(out, p2); out += out_stride;
2435 vst1_u8(out, p3); out += out_stride;
2436 vst1_u8(out, p4); out += out_stride;
2437 vst1_u8(out, p5); out += out_stride;
2438 vst1_u8(out, p6); out += out_stride;
2439 vst1_u8(out, p7);
2440
2441#undef dct_trn8_8
2442#undef dct_trn8_16
2443#undef dct_trn8_32
2444 }
2445
2446#undef dct_long_mul
2447#undef dct_long_mac
2448#undef dct_widen
2449#undef dct_wadd
2450#undef dct_wsub
2451#undef dct_bfly32o
2452#undef dct_pass
2453}
2454
2455#endif // STBI_NEON
2456
2457#define STBI__MARKER_none 0xff
2458// if there's a pending marker from the entropy stream, return that
2459// otherwise, fetch from the stream and get a marker. if there's no
2460// marker, return 0xff, which is never a valid marker value
2461static stbi_uc stbi__get_marker(stbi__jpeg *j)
2462{
2463 stbi_uc x;
2464 if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2465 x = stbi__get8(j->s);
2466 if (x != 0xff) return STBI__MARKER_none;
2467 while (x == 0xff)
2468 x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2469 return x;
2470}
2471
2472// in each scan, we'll have scan_n components, and the order
2473// of the components is specified by order[]
2474#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
2475
2476// after a restart interval, stbi__jpeg_reset the entropy decoder and
2477// the dc prediction
2478static void stbi__jpeg_reset(stbi__jpeg *j)
2479{
2480 j->code_bits = 0;
2481 j->code_buffer = 0;
2482 j->nomore = 0;
2483 j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2484 j->marker = STBI__MARKER_none;
2485 j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2486 j->eob_run = 0;
2487 // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2488 // since we don't even allow 1<<30 pixels
2489}
2490
2491static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2492{
2493 stbi__jpeg_reset(z);
2494 if (!z->progressive) {
2495 if (z->scan_n == 1) {
2496 int i,j;
2497 STBI_SIMD_ALIGN(short, data[64]);
2498 int n = z->order[0];
2499 // non-interleaved data, we just need to process one block at a time,
2500 // in trivial scanline order
2501 // number of blocks to do just depends on how many actual "pixels" this
2502 // component has, independent of interleaved MCU blocking and such
2503 int w = (z->img_comp[n].x+7) >> 3;
2504 int h = (z->img_comp[n].y+7) >> 3;
2505 for (j=0; j < h; ++j) {
2506 for (i=0; i < w; ++i) {
2507 int ha = z->img_comp[n].ha;
2508 if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2509 z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2510 // every data block is an MCU, so countdown the restart interval
2511 if (--z->todo <= 0) {
2512 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2513 // if it's NOT a restart, then just bail, so we get corrupt data
2514 // rather than no data
2515 if (!STBI__RESTART(z->marker)) return 1;
2516 stbi__jpeg_reset(z);
2517 }
2518 }
2519 }
2520 return 1;
2521 } else { // interleaved
2522 int i,j,k,x,y;
2523 STBI_SIMD_ALIGN(short, data[64]);
2524 for (j=0; j < z->img_mcu_y; ++j) {
2525 for (i=0; i < z->img_mcu_x; ++i) {
2526 // scan an interleaved mcu... process scan_n components in order
2527 for (k=0; k < z->scan_n; ++k) {
2528 int n = z->order[k];
2529 // scan out an mcu's worth of this component; that's just determined
2530 // by the basic H and V specified for the component
2531 for (y=0; y < z->img_comp[n].v; ++y) {
2532 for (x=0; x < z->img_comp[n].h; ++x) {
2533 int x2 = (i*z->img_comp[n].h + x)*8;
2534 int y2 = (j*z->img_comp[n].v + y)*8;
2535 int ha = z->img_comp[n].ha;
2536 if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2537 z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2538 }
2539 }
2540 }
2541 // after all interleaved components, that's an interleaved MCU,
2542 // so now count down the restart interval
2543 if (--z->todo <= 0) {
2544 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2545 if (!STBI__RESTART(z->marker)) return 1;
2546 stbi__jpeg_reset(z);
2547 }
2548 }
2549 }
2550 return 1;
2551 }
2552 } else {
2553 if (z->scan_n == 1) {
2554 int i,j;
2555 int n = z->order[0];
2556 // non-interleaved data, we just need to process one block at a time,
2557 // in trivial scanline order
2558 // number of blocks to do just depends on how many actual "pixels" this
2559 // component has, independent of interleaved MCU blocking and such
2560 int w = (z->img_comp[n].x+7) >> 3;
2561 int h = (z->img_comp[n].y+7) >> 3;
2562 for (j=0; j < h; ++j) {
2563 for (i=0; i < w; ++i) {
2564 short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2565 if (z->spec_start == 0) {
2566 if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2567 return 0;
2568 } else {
2569 int ha = z->img_comp[n].ha;
2570 if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2571 return 0;
2572 }
2573 // every data block is an MCU, so countdown the restart interval
2574 if (--z->todo <= 0) {
2575 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2576 if (!STBI__RESTART(z->marker)) return 1;
2577 stbi__jpeg_reset(z);
2578 }
2579 }
2580 }
2581 return 1;
2582 } else { // interleaved
2583 int i,j,k,x,y;
2584 for (j=0; j < z->img_mcu_y; ++j) {
2585 for (i=0; i < z->img_mcu_x; ++i) {
2586 // scan an interleaved mcu... process scan_n components in order
2587 for (k=0; k < z->scan_n; ++k) {
2588 int n = z->order[k];
2589 // scan out an mcu's worth of this component; that's just determined
2590 // by the basic H and V specified for the component
2591 for (y=0; y < z->img_comp[n].v; ++y) {
2592 for (x=0; x < z->img_comp[n].h; ++x) {
2593 int x2 = (i*z->img_comp[n].h + x);
2594 int y2 = (j*z->img_comp[n].v + y);
2595 short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2596 if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2597 return 0;
2598 }
2599 }
2600 }
2601 // after all interleaved components, that's an interleaved MCU,
2602 // so now count down the restart interval
2603 if (--z->todo <= 0) {
2604 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2605 if (!STBI__RESTART(z->marker)) return 1;
2606 stbi__jpeg_reset(z);
2607 }
2608 }
2609 }
2610 return 1;
2611 }
2612 }
2613}
2614
2615static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
2616{
2617 int i;
2618 for (i=0; i < 64; ++i)
2619 data[i] *= dequant[i];
2620}
2621
2622static void stbi__jpeg_finish(stbi__jpeg *z)
2623{
2624 if (z->progressive) {
2625 // dequantize and idct the data
2626 int i,j,n;
2627 for (n=0; n < z->s->img_n; ++n) {
2628 int w = (z->img_comp[n].x+7) >> 3;
2629 int h = (z->img_comp[n].y+7) >> 3;
2630 for (j=0; j < h; ++j) {
2631 for (i=0; i < w; ++i) {
2632 short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2633 stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2634 z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2635 }
2636 }
2637 }
2638 }
2639}
2640
2641static int stbi__process_marker(stbi__jpeg *z, int m)
2642{
2643 int L;
2644 switch (m) {
2645 case STBI__MARKER_none: // no marker found
2646 return stbi__err("expected marker","Corrupt JPEG");
2647
2648 case 0xDD: // DRI - specify restart interval
2649 if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
2650 z->restart_interval = stbi__get16be(z->s);
2651 return 1;
2652
2653 case 0xDB: // DQT - define quantization table
2654 L = stbi__get16be(z->s)-2;
2655 while (L > 0) {
2656 int q = stbi__get8(z->s);
2657 int p = q >> 4, sixteen = (p != 0);
2658 int t = q & 15,i;
2659 if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
2660 if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
2661
2662 for (i=0; i < 64; ++i)
2663 z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
2664 L -= (sixteen ? 129 : 65);
2665 }
2666 return L==0;
2667
2668 case 0xC4: // DHT - define huffman table
2669 L = stbi__get16be(z->s)-2;
2670 while (L > 0) {
2671 stbi_uc *v;
2672 int sizes[16],i,n=0;
2673 int q = stbi__get8(z->s);
2674 int tc = q >> 4;
2675 int th = q & 15;
2676 if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
2677 for (i=0; i < 16; ++i) {
2678 sizes[i] = stbi__get8(z->s);
2679 n += sizes[i];
2680 }
2681 if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values!
2682 L -= 17;
2683 if (tc == 0) {
2684 if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
2685 v = z->huff_dc[th].values;
2686 } else {
2687 if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
2688 v = z->huff_ac[th].values;
2689 }
2690 for (i=0; i < n; ++i)
2691 v[i] = stbi__get8(z->s);
2692 if (tc != 0)
2693 stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2694 L -= n;
2695 }
2696 return L==0;
2697 }
2698
2699 // check for comment block or APP blocks
2700 if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2701 L = stbi__get16be(z->s);
2702 if (L < 2) {
2703 if (m == 0xFE)
2704 return stbi__err("bad COM len","Corrupt JPEG");
2705 else
2706 return stbi__err("bad APP len","Corrupt JPEG");
2707 }
2708 L -= 2;
2709
2710 if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
2711 static const unsigned char tag[5] = {'J','F','I','F','\0'};
2712 int ok = 1;
2713 int i;
2714 for (i=0; i < 5; ++i)
2715 if (stbi__get8(z->s) != tag[i])
2716 ok = 0;
2717 L -= 5;
2718 if (ok)
2719 z->jfif = 1;
2720 } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
2721 static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
2722 int ok = 1;
2723 int i;
2724 for (i=0; i < 6; ++i)
2725 if (stbi__get8(z->s) != tag[i])
2726 ok = 0;
2727 L -= 6;
2728 if (ok) {
2729 stbi__get8(z->s); // version
2730 stbi__get16be(z->s); // flags0
2731 stbi__get16be(z->s); // flags1
2732 z->app14_color_transform = stbi__get8(z->s); // color transform
2733 L -= 6;
2734 }
2735 }
2736
2737 stbi__skip(z->s, L);
2738 return 1;
2739 }
2740
2741 return stbi__err("unknown marker","Corrupt JPEG");
2742}
2743
2744// after we see SOS
2745static int stbi__process_scan_header(stbi__jpeg *z)
2746{
2747 int i;
2748 int Ls = stbi__get16be(z->s);
2749 z->scan_n = stbi__get8(z->s);
2750 if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
2751 if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
2752 for (i=0; i < z->scan_n; ++i) {
2753 int id = stbi__get8(z->s), which;
2754 int q = stbi__get8(z->s);
2755 for (which = 0; which < z->s->img_n; ++which)
2756 if (z->img_comp[which].id == id)
2757 break;
2758 if (which == z->s->img_n) return 0; // no match
2759 z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
2760 z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
2761 z->order[i] = which;
2762 }
2763
2764 {
2765 int aa;
2766 z->spec_start = stbi__get8(z->s);
2767 z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
2768 aa = stbi__get8(z->s);
2769 z->succ_high = (aa >> 4);
2770 z->succ_low = (aa & 15);
2771 if (z->progressive) {
2772 if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
2773 return stbi__err("bad SOS", "Corrupt JPEG");
2774 } else {
2775 if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
2776 if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
2777 z->spec_end = 63;
2778 }
2779 }
2780
2781 return 1;
2782}
2783
2784static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
2785{
2786 int i;
2787 for (i=0; i < ncomp; ++i) {
2788 if (z->img_comp[i].raw_data) {
2789 STBI_FREE(z->img_comp[i].raw_data);
2790 z->img_comp[i].raw_data = NULL;
2791 z->img_comp[i].data = NULL;
2792 }
2793 if (z->img_comp[i].raw_coeff) {
2794 STBI_FREE(z->img_comp[i].raw_coeff);
2795 z->img_comp[i].raw_coeff = 0;
2796 z->img_comp[i].coeff = 0;
2797 }
2798 if (z->img_comp[i].linebuf) {
2799 STBI_FREE(z->img_comp[i].linebuf);
2800 z->img_comp[i].linebuf = NULL;
2801 }
2802 }
2803 return why;
2804}
2805
2806static int stbi__process_frame_header(stbi__jpeg *z, int scan)
2807{
2808 stbi__context *s = z->s;
2809 int Lf,p,i,q, h_max=1,v_max=1,c;
2810 Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
2811 p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
2812 s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
2813 s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
2814 if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
2815 if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
2816 c = stbi__get8(s);
2817 if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
2818 s->img_n = c;
2819 for (i=0; i < c; ++i) {
2820 z->img_comp[i].data = NULL;
2821 z->img_comp[i].linebuf = NULL;
2822 }
2823
2824 if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
2825
2826 z->rgb = 0;
2827 for (i=0; i < s->img_n; ++i) {
2828 static const unsigned char rgb[3] = { 'R', 'G', 'B' };
2829 z->img_comp[i].id = stbi__get8(s);
2830 if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
2831 ++z->rgb;
2832 q = stbi__get8(s);
2833 z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
2834 z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
2835 z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
2836 }
2837
2838 if (scan != STBI__SCAN_load) return 1;
2839
2840 if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
2841
2842 for (i=0; i < s->img_n; ++i) {
2843 if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
2844 if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
2845 }
2846
2847 // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
2848 // and I've never seen a non-corrupted JPEG file actually use them
2849 for (i=0; i < s->img_n; ++i) {
2850 if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG");
2851 if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG");
2852 }
2853
2854 // compute interleaved mcu info
2855 z->img_h_max = h_max;
2856 z->img_v_max = v_max;
2857 z->img_mcu_w = h_max * 8;
2858 z->img_mcu_h = v_max * 8;
2859 // these sizes can't be more than 17 bits
2860 z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
2861 z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
2862
2863 for (i=0; i < s->img_n; ++i) {
2864 // number of effective pixels (e.g. for non-interleaved MCU)
2865 z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
2866 z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
2867 // to simplify generation, we'll allocate enough memory to decode
2868 // the bogus oversized data from using interleaved MCUs and their
2869 // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
2870 // discard the extra data until colorspace conversion
2871 //
2872 // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
2873 // so these muls can't overflow with 32-bit ints (which we require)
2874 z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
2875 z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
2876 z->img_comp[i].coeff = 0;
2877 z->img_comp[i].raw_coeff = 0;
2878 z->img_comp[i].linebuf = NULL;
2879 z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
2880 if (z->img_comp[i].raw_data == NULL)
2881 return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
2882 // align blocks for idct using mmx/sse
2883 z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
2884 if (z->progressive) {
2885 // w2, h2 are multiples of 8 (see above)
2886 z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
2887 z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
2888 z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
2889 if (z->img_comp[i].raw_coeff == NULL)
2890 return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
2891 z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
2892 }
2893 }
2894
2895 return 1;
2896}
2897
2898// use comparisons since in some cases we handle more than one case (e.g. SOF)
2899#define stbi__DNL(x) ((x) == 0xdc)
2900#define stbi__SOI(x) ((x) == 0xd8)
2901#define stbi__EOI(x) ((x) == 0xd9)
2902#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
2903#define stbi__SOS(x) ((x) == 0xda)
2904
2905#define stbi__SOF_progressive(x) ((x) == 0xc2)
2906
2907static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
2908{
2909 int m;
2910 z->jfif = 0;
2911 z->app14_color_transform = -1; // valid values are 0,1,2
2912 z->marker = STBI__MARKER_none; // initialize cached marker to empty
2913 m = stbi__get_marker(z);
2914 if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
2915 if (scan == STBI__SCAN_type) return 1;
2916 m = stbi__get_marker(z);
2917 while (!stbi__SOF(m)) {
2918 if (!stbi__process_marker(z,m)) return 0;
2919 m = stbi__get_marker(z);
2920 while (m == STBI__MARKER_none) {
2921 // some files have extra padding after their blocks, so ok, we'll scan
2922 if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
2923 m = stbi__get_marker(z);
2924 }
2925 }
2926 z->progressive = stbi__SOF_progressive(m);
2927 if (!stbi__process_frame_header(z, scan)) return 0;
2928 return 1;
2929}
2930
2931static int stbi__skip_jpeg_junk_at_end(stbi__jpeg *j)
2932{
2933 // some JPEGs have junk at end, skip over it but if we find what looks
2934 // like a valid marker, resume there
2935 while (!stbi__at_eof(j->s)) {
2936 int x = stbi__get8(j->s);
2937 while (x == 255) { // might be a marker
2938 if (stbi__at_eof(j->s)) return STBI__MARKER_none;
2939 x = stbi__get8(j->s);
2940 if (x != 0x00 && x != 0xff) {
2941 // not a stuffed zero or lead-in to another marker, looks
2942 // like an actual marker, return it
2943 return x;
2944 }
2945 // stuffed zero has x=0 now which ends the loop, meaning we go
2946 // back to regular scan loop.
2947 // repeated 0xff keeps trying to read the next byte of the marker.
2948 }
2949 }
2950 return STBI__MARKER_none;
2951}
2952
2953// decode image to YCbCr format
2954static int stbi__decode_jpeg_image(stbi__jpeg *j)
2955{
2956 int m;
2957 for (m = 0; m < 4; m++) {
2958 j->img_comp[m].raw_data = NULL;
2959 j->img_comp[m].raw_coeff = NULL;
2960 }
2961 j->restart_interval = 0;
2962 if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
2963 m = stbi__get_marker(j);
2964 while (!stbi__EOI(m)) {
2965 if (stbi__SOS(m)) {
2966 if (!stbi__process_scan_header(j)) return 0;
2967 if (!stbi__parse_entropy_coded_data(j)) return 0;
2968 if (j->marker == STBI__MARKER_none ) {
2969 j->marker = stbi__skip_jpeg_junk_at_end(j);
2970 // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
2971 }
2972 m = stbi__get_marker(j);
2973 if (STBI__RESTART(m))
2974 m = stbi__get_marker(j);
2975 } else if (stbi__DNL(m)) {
2976 int Ld = stbi__get16be(j->s);
2977 stbi__uint32 NL = stbi__get16be(j->s);
2978 if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
2979 if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
2980 m = stbi__get_marker(j);
2981 } else {
2982 if (!stbi__process_marker(j, m)) return 1;
2983 m = stbi__get_marker(j);
2984 }
2985 }
2986 if (j->progressive)
2987 stbi__jpeg_finish(j);
2988 return 1;
2989}
2990
2991// static jfif-centered resampling (across block boundaries)
2992
2993typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
2994 int w, int hs);
2995
2996#define stbi__div4(x) ((stbi_uc) ((x) >> 2))
2997
2998static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2999{
3000 STBI_NOTUSED(out);
3001 STBI_NOTUSED(in_far);
3002 STBI_NOTUSED(w);
3003 STBI_NOTUSED(hs);
3004 return in_near;
3005}
3006
3007static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3008{
3009 // need to generate two samples vertically for every one in input
3010 int i;
3011 STBI_NOTUSED(hs);
3012 for (i=0; i < w; ++i)
3013 out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
3014 return out;
3015}
3016
3017static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3018{
3019 // need to generate two samples horizontally for every one in input
3020 int i;
3021 stbi_uc *input = in_near;
3022
3023 if (w == 1) {
3024 // if only one sample, can't do any interpolation
3025 out[0] = out[1] = input[0];
3026 return out;
3027 }
3028
3029 out[0] = input[0];
3030 out[1] = stbi__div4(input[0]*3 + input[1] + 2);
3031 for (i=1; i < w-1; ++i) {
3032 int n = 3*input[i]+2;
3033 out[i*2+0] = stbi__div4(n+input[i-1]);
3034 out[i*2+1] = stbi__div4(n+input[i+1]);
3035 }
3036 out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
3037 out[i*2+1] = input[w-1];
3038
3039 STBI_NOTUSED(in_far);
3040 STBI_NOTUSED(hs);
3041
3042 return out;
3043}
3044
3045#define stbi__div16(x) ((stbi_uc) ((x) >> 4))
3046
3047static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3048{
3049 // need to generate 2x2 samples for every one in input
3050 int i,t0,t1;
3051 if (w == 1) {
3052 out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3053 return out;
3054 }
3055
3056 t1 = 3*in_near[0] + in_far[0];
3057 out[0] = stbi__div4(t1+2);
3058 for (i=1; i < w; ++i) {
3059 t0 = t1;
3060 t1 = 3*in_near[i]+in_far[i];
3061 out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3062 out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
3063 }
3064 out[w*2-1] = stbi__div4(t1+2);
3065
3066 STBI_NOTUSED(hs);
3067
3068 return out;
3069}
3070
3071#if defined(STBI_SSE2) || defined(STBI_NEON)
3072static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3073{
3074 // need to generate 2x2 samples for every one in input
3075 int i=0,t0,t1;
3076
3077 if (w == 1) {
3078 out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3079 return out;
3080 }
3081
3082 t1 = 3*in_near[0] + in_far[0];
3083 // process groups of 8 pixels for as long as we can.
3084 // note we can't handle the last pixel in a row in this loop
3085 // because we need to handle the filter boundary conditions.
3086 for (; i < ((w-1) & ~7); i += 8) {
3087#if defined(STBI_SSE2)
3088 // load and perform the vertical filtering pass
3089 // this uses 3*x + y = 4*x + (y - x)
3090 __m128i zero = _mm_setzero_si128();
3091 __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i));
3092 __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
3093 __m128i farw = _mm_unpacklo_epi8(farb, zero);
3094 __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3095 __m128i diff = _mm_sub_epi16(farw, nearw);
3096 __m128i nears = _mm_slli_epi16(nearw, 2);
3097 __m128i curr = _mm_add_epi16(nears, diff); // current row
3098
3099 // horizontal filter works the same based on shifted vers of current
3100 // row. "prev" is current row shifted right by 1 pixel; we need to
3101 // insert the previous pixel value (from t1).
3102 // "next" is current row shifted left by 1 pixel, with first pixel
3103 // of next block of 8 pixels added in.
3104 __m128i prv0 = _mm_slli_si128(curr, 2);
3105 __m128i nxt0 = _mm_srli_si128(curr, 2);
3106 __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3107 __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
3108
3109 // horizontal filter, polyphase implementation since it's convenient:
3110 // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3111 // odd pixels = 3*cur + next = cur*4 + (next - cur)
3112 // note the shared term.
3113 __m128i bias = _mm_set1_epi16(8);
3114 __m128i curs = _mm_slli_epi16(curr, 2);
3115 __m128i prvd = _mm_sub_epi16(prev, curr);
3116 __m128i nxtd = _mm_sub_epi16(next, curr);
3117 __m128i curb = _mm_add_epi16(curs, bias);
3118 __m128i even = _mm_add_epi16(prvd, curb);
3119 __m128i odd = _mm_add_epi16(nxtd, curb);
3120
3121 // interleave even and odd pixels, then undo scaling.
3122 __m128i int0 = _mm_unpacklo_epi16(even, odd);
3123 __m128i int1 = _mm_unpackhi_epi16(even, odd);
3124 __m128i de0 = _mm_srli_epi16(int0, 4);
3125 __m128i de1 = _mm_srli_epi16(int1, 4);
3126
3127 // pack and write output
3128 __m128i outv = _mm_packus_epi16(de0, de1);
3129 _mm_storeu_si128((__m128i *) (out + i*2), outv);
3130#elif defined(STBI_NEON)
3131 // load and perform the vertical filtering pass
3132 // this uses 3*x + y = 4*x + (y - x)
3133 uint8x8_t farb = vld1_u8(in_far + i);
3134 uint8x8_t nearb = vld1_u8(in_near + i);
3135 int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3136 int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3137 int16x8_t curr = vaddq_s16(nears, diff); // current row
3138
3139 // horizontal filter works the same based on shifted vers of current
3140 // row. "prev" is current row shifted right by 1 pixel; we need to
3141 // insert the previous pixel value (from t1).
3142 // "next" is current row shifted left by 1 pixel, with first pixel
3143 // of next block of 8 pixels added in.
3144 int16x8_t prv0 = vextq_s16(curr, curr, 7);
3145 int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3146 int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3147 int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
3148
3149 // horizontal filter, polyphase implementation since it's convenient:
3150 // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3151 // odd pixels = 3*cur + next = cur*4 + (next - cur)
3152 // note the shared term.
3153 int16x8_t curs = vshlq_n_s16(curr, 2);
3154 int16x8_t prvd = vsubq_s16(prev, curr);
3155 int16x8_t nxtd = vsubq_s16(next, curr);
3156 int16x8_t even = vaddq_s16(curs, prvd);
3157 int16x8_t odd = vaddq_s16(curs, nxtd);
3158
3159 // undo scaling and round, then store with even/odd phases interleaved
3160 uint8x8x2_t o;
3161 o.val[0] = vqrshrun_n_s16(even, 4);
3162 o.val[1] = vqrshrun_n_s16(odd, 4);
3163 vst2_u8(out + i*2, o);
3164#endif
3165
3166 // "previous" value for next iter
3167 t1 = 3*in_near[i+7] + in_far[i+7];
3168 }
3169
3170 t0 = t1;
3171 t1 = 3*in_near[i] + in_far[i];
3172 out[i*2] = stbi__div16(3*t1 + t0 + 8);
3173
3174 for (++i; i < w; ++i) {
3175 t0 = t1;
3176 t1 = 3*in_near[i]+in_far[i];
3177 out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3178 out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
3179 }
3180 out[w*2-1] = stbi__div4(t1+2);
3181
3182 STBI_NOTUSED(hs);
3183
3184 return out;
3185}
3186#endif
3187
3188static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3189{
3190 // resample with nearest-neighbor
3191 int i,j;
3192 STBI_NOTUSED(in_far);
3193 for (i=0; i < w; ++i)
3194 for (j=0; j < hs; ++j)
3195 out[i*hs+j] = in_near[i];
3196 return out;
3197}
3198
3199// this is a reduced-precision calculation of YCbCr-to-RGB introduced
3200// to make sure the code produces the same results in both SIMD and scalar
3201#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8)
3202static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3203{
3204 int i;
3205 for (i=0; i < count; ++i) {
3206 int y_fixed = (y[i] << 20) + (1<<19); // rounding
3207 int r,g,b;
3208 int cr = pcr[i] - 128;
3209 int cb = pcb[i] - 128;
3210 r = y_fixed + cr* stbi__float2fixed(1.40200f);
3211 g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3212 b = y_fixed + cb* stbi__float2fixed(1.77200f);
3213 r >>= 20;
3214 g >>= 20;
3215 b >>= 20;
3216 if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3217 if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3218 if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3219 out[0] = (stbi_uc)r;
3220 out[1] = (stbi_uc)g;
3221 out[2] = (stbi_uc)b;
3222 out[3] = 255;
3223 out += step;
3224 }
3225}
3226
3227#if defined(STBI_SSE2) || defined(STBI_NEON)
3228static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3229{
3230 int i = 0;
3231
3232#ifdef STBI_SSE2
3233 // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3234 // it's useful in practice (you wouldn't use it for textures, for example).
3235 // so just accelerate step == 4 case.
3236 if (step == 4) {
3237 // this is a fairly straightforward implementation and not super-optimized.
3238 __m128i signflip = _mm_set1_epi8(-0x80);
3239 __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f));
3240 __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
3241 __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
3242 __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f));
3243 __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
3244 __m128i xw = _mm_set1_epi16(255); // alpha channel
3245
3246 for (; i+7 < count; i += 8) {
3247 // load
3248 __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3249 __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3250 __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3251 __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3252 __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3253
3254 // unpack to short (and left-shift cr, cb by 8)
3255 __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
3256 __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3257 __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3258
3259 // color transform
3260 __m128i yws = _mm_srli_epi16(yw, 4);
3261 __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3262 __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3263 __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3264 __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3265 __m128i rws = _mm_add_epi16(cr0, yws);
3266 __m128i gwt = _mm_add_epi16(cb0, yws);
3267 __m128i bws = _mm_add_epi16(yws, cb1);
3268 __m128i gws = _mm_add_epi16(gwt, cr1);
3269
3270 // descale
3271 __m128i rw = _mm_srai_epi16(rws, 4);
3272 __m128i bw = _mm_srai_epi16(bws, 4);
3273 __m128i gw = _mm_srai_epi16(gws, 4);
3274
3275 // back to byte, set up for transpose
3276 __m128i brb = _mm_packus_epi16(rw, bw);
3277 __m128i gxb = _mm_packus_epi16(gw, xw);
3278
3279 // transpose to interleave channels
3280 __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3281 __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3282 __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3283 __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3284
3285 // store
3286 _mm_storeu_si128((__m128i *) (out + 0), o0);
3287 _mm_storeu_si128((__m128i *) (out + 16), o1);
3288 out += 32;
3289 }
3290 }
3291#endif
3292
3293#ifdef STBI_NEON
3294 // in this version, step=3 support would be easy to add. but is there demand?
3295 if (step == 4) {
3296 // this is a fairly straightforward implementation and not super-optimized.
3297 uint8x8_t signflip = vdup_n_u8(0x80);
3298 int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f));
3299 int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3300 int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3301 int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f));
3302
3303 for (; i+7 < count; i += 8) {
3304 // load
3305 uint8x8_t y_bytes = vld1_u8(y + i);
3306 uint8x8_t cr_bytes = vld1_u8(pcr + i);
3307 uint8x8_t cb_bytes = vld1_u8(pcb + i);
3308 int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3309 int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3310
3311 // expand to s16
3312 int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3313 int16x8_t crw = vshll_n_s8(cr_biased, 7);
3314 int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3315
3316 // color transform
3317 int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3318 int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3319 int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3320 int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3321 int16x8_t rws = vaddq_s16(yws, cr0);
3322 int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3323 int16x8_t bws = vaddq_s16(yws, cb1);
3324
3325 // undo scaling, round, convert to byte
3326 uint8x8x4_t o;
3327 o.val[0] = vqrshrun_n_s16(rws, 4);
3328 o.val[1] = vqrshrun_n_s16(gws, 4);
3329 o.val[2] = vqrshrun_n_s16(bws, 4);
3330 o.val[3] = vdup_n_u8(255);
3331
3332 // store, interleaving r/g/b/a
3333 vst4_u8(out, o);
3334 out += 8*4;
3335 }
3336 }
3337#endif
3338
3339 for (; i < count; ++i) {
3340 int y_fixed = (y[i] << 20) + (1<<19); // rounding
3341 int r,g,b;
3342 int cr = pcr[i] - 128;
3343 int cb = pcb[i] - 128;
3344 r = y_fixed + cr* stbi__float2fixed(1.40200f);
3345 g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3346 b = y_fixed + cb* stbi__float2fixed(1.77200f);
3347 r >>= 20;
3348 g >>= 20;
3349 b >>= 20;
3350 if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3351 if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3352 if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3353 out[0] = (stbi_uc)r;
3354 out[1] = (stbi_uc)g;
3355 out[2] = (stbi_uc)b;
3356 out[3] = 255;
3357 out += step;
3358 }
3359}
3360#endif
3361
3362// set up the kernels
3363static void stbi__setup_jpeg(stbi__jpeg *j)
3364{
3365 j->idct_block_kernel = stbi__idct_block;
3366 j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3367 j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3368
3369#ifdef STBI_SSE2
3370 if (stbi__sse2_available()) {
3371 j->idct_block_kernel = stbi__idct_simd;
3372 j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3373 j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3374 }
3375#endif
3376
3377#ifdef STBI_NEON
3378 j->idct_block_kernel = stbi__idct_simd;
3379 j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3380 j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3381#endif
3382}
3383
3384// clean up the temporary component buffers
3385static void stbi__cleanup_jpeg(stbi__jpeg *j)
3386{
3387 stbi__free_jpeg_components(j, j->s->img_n, 0);
3388}
3389
3390typedef struct
3391{
3392 resample_row_func resample;
3393 stbi_uc *line0,*line1;
3394 int hs,vs; // expansion factor in each axis
3395 int w_lores; // horizontal pixels pre-expansion
3396 int ystep; // how far through vertical expansion we are
3397 int ypos; // which pre-expansion row we're on
3398} stbi__resample;
3399
3400// fast 0..255 * 0..255 => 0..255 rounded multiplication
3401static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3402{
3403 unsigned int t = x*y + 128;
3404 return (stbi_uc) ((t + (t >>8)) >> 8);
3405}
3406
3407static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3408{
3409 int n, decode_n, is_rgb;
3410 z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3411
3412 // validate req_comp
3413 if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3414
3415 // load a jpeg image from whichever source, but leave in YCbCr format
3416 if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3417
3418 // determine actual number of components to generate
3419 n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3420
3421 is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3422
3423 if (z->s->img_n == 3 && n < 3 && !is_rgb)
3424 decode_n = 1;
3425 else
3426 decode_n = z->s->img_n;
3427
3428 // nothing to do if no components requested; check this now to avoid
3429 // accessing uninitialized coutput[0] later
3430 if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; }
3431
3432 // resample and color-convert
3433 {
3434 int k;
3435 unsigned int i,j;
3436 stbi_uc *output;
3437 stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL };
3438
3439 stbi__resample res_comp[4];
3440
3441 for (k=0; k < decode_n; ++k) {
3442 stbi__resample *r = &res_comp[k];
3443
3444 // allocate line buffer big enough for upsampling off the edges
3445 // with upsample factor of 4
3446 z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3447 if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3448
3449 r->hs = z->img_h_max / z->img_comp[k].h;
3450 r->vs = z->img_v_max / z->img_comp[k].v;
3451 r->ystep = r->vs >> 1;
3452 r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3453 r->ypos = 0;
3454 r->line0 = r->line1 = z->img_comp[k].data;
3455
3456 if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3457 else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3458 else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3459 else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3460 else r->resample = stbi__resample_row_generic;
3461 }
3462
3463 // can't error after this so, this is safe
3464 output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3465 if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3466
3467 // now go ahead and resample
3468 for (j=0; j < z->s->img_y; ++j) {
3469 stbi_uc *out = output + n * z->s->img_x * j;
3470 for (k=0; k < decode_n; ++k) {
3471 stbi__resample *r = &res_comp[k];
3472 int y_bot = r->ystep >= (r->vs >> 1);
3473 coutput[k] = r->resample(z->img_comp[k].linebuf,
3474 y_bot ? r->line1 : r->line0,
3475 y_bot ? r->line0 : r->line1,
3476 r->w_lores, r->hs);
3477 if (++r->ystep >= r->vs) {
3478 r->ystep = 0;
3479 r->line0 = r->line1;
3480 if (++r->ypos < z->img_comp[k].y)
3481 r->line1 += z->img_comp[k].w2;
3482 }
3483 }
3484 if (n >= 3) {
3485 stbi_uc *y = coutput[0];
3486 if (z->s->img_n == 3) {
3487 if (is_rgb) {
3488 for (i=0; i < z->s->img_x; ++i) {
3489 out[0] = y[i];
3490 out[1] = coutput[1][i];
3491 out[2] = coutput[2][i];
3492 out[3] = 255;
3493 out += n;
3494 }
3495 } else {
3496 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3497 }
3498 } else if (z->s->img_n == 4) {
3499 if (z->app14_color_transform == 0) { // CMYK
3500 for (i=0; i < z->s->img_x; ++i) {
3501 stbi_uc m = coutput[3][i];
3502 out[0] = stbi__blinn_8x8(coutput[0][i], m);
3503 out[1] = stbi__blinn_8x8(coutput[1][i], m);
3504 out[2] = stbi__blinn_8x8(coutput[2][i], m);
3505 out[3] = 255;
3506 out += n;
3507 }
3508 } else if (z->app14_color_transform == 2) { // YCCK
3509 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3510 for (i=0; i < z->s->img_x; ++i) {
3511 stbi_uc m = coutput[3][i];
3512 out[0] = stbi__blinn_8x8(255 - out[0], m);
3513 out[1] = stbi__blinn_8x8(255 - out[1], m);
3514 out[2] = stbi__blinn_8x8(255 - out[2], m);
3515 out += n;
3516 }
3517 } else { // YCbCr + alpha? Ignore the fourth channel for now
3518 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3519 }
3520 } else
3521 for (i=0; i < z->s->img_x; ++i) {
3522 out[0] = out[1] = out[2] = y[i];
3523 out[3] = 255; // not used if n==3
3524 out += n;
3525 }
3526 } else {
3527 if (is_rgb) {
3528 if (n == 1)
3529 for (i=0; i < z->s->img_x; ++i)
3530 *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3531 else {
3532 for (i=0; i < z->s->img_x; ++i, out += 2) {
3533 out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3534 out[1] = 255;
3535 }
3536 }
3537 } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3538 for (i=0; i < z->s->img_x; ++i) {
3539 stbi_uc m = coutput[3][i];
3540 stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
3541 stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
3542 stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
3543 out[0] = stbi__compute_y(r, g, b);
3544 out[1] = 255;
3545 out += n;
3546 }
3547 } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
3548 for (i=0; i < z->s->img_x; ++i) {
3549 out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
3550 out[1] = 255;
3551 out += n;
3552 }
3553 } else {
3554 stbi_uc *y = coutput[0];
3555 if (n == 1)
3556 for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
3557 else
3558 for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; }
3559 }
3560 }
3561 }
3562 stbi__cleanup_jpeg(z);
3563 *out_x = z->s->img_x;
3564 *out_y = z->s->img_y;
3565 if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
3566 return output;
3567 }
3568}
3569
3570static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
3571{
3572 unsigned char* result;
3573 stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
3574 if (!j) return stbi__errpuc("outofmem", "Out of memory");
3575 memset(j, 0, sizeof(stbi__jpeg));
3576 STBI_NOTUSED(ri);
3577 j->s = s;
3578 stbi__setup_jpeg(j);
3579 result = load_jpeg_image(j, x,y,comp,req_comp);
3580 STBI_FREE(j);
3581 return result;
3582}
3583
3584static int stbi__jpeg_test(stbi__context *s)
3585{
3586 int r;
3587 stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
3588 if (!j) return stbi__err("outofmem", "Out of memory");
3589 memset(j, 0, sizeof(stbi__jpeg));
3590 j->s = s;
3591 stbi__setup_jpeg(j);
3592 r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
3593 stbi__rewind(s);
3594 STBI_FREE(j);
3595 return r;
3596}
3597
3598static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3599{
3600 if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3601 stbi__rewind( j->s );
3602 return 0;
3603 }
3604 if (x) *x = j->s->img_x;
3605 if (y) *y = j->s->img_y;
3606 if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
3607 return 1;
3608}
3609
3610static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3611{
3612 int result;
3613 stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
3614 if (!j) return stbi__err("outofmem", "Out of memory");
3615 memset(j, 0, sizeof(stbi__jpeg));
3616 j->s = s;
3617 result = stbi__jpeg_info_raw(j, x, y, comp);
3618 STBI_FREE(j);
3619 return result;
3620}
3621#endif
3622
3623// public domain zlib decode v0.2 Sean Barrett 2006-11-18
3624// simple implementation
3625// - all input must be provided in an upfront buffer
3626// - all output is written to a single output buffer (can malloc/realloc)
3627// performance
3628// - fast huffman
3629
3630#ifndef STBI_NO_ZLIB
3631
3632// fast-way is faster to check than jpeg huffman, but slow way is slower
3633#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
3634#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
3635#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet
3636
3637// zlib-style huffman encoding
3638// (jpegs packs from left, zlib from right, so can't share code)
3639typedef struct
3640{
3641 stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3642 stbi__uint16 firstcode[16];
3643 int maxcode[17];
3644 stbi__uint16 firstsymbol[16];
3645 stbi_uc size[STBI__ZNSYMS];
3646 stbi__uint16 value[STBI__ZNSYMS];
3647} stbi__zhuffman;
3648
3649stbi_inline static int stbi__bitreverse16(int n)
3650{
3651 n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
3652 n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
3653 n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
3654 n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
3655 return n;
3656}
3657
3658stbi_inline static int stbi__bit_reverse(int v, int bits)
3659{
3660 STBI_ASSERT(bits <= 16);
3661 // to bit reverse n bits, reverse 16 and shift
3662 // e.g. 11 bits, bit reverse and shift away 5
3663 return stbi__bitreverse16(v) >> (16-bits);
3664}
3665
3666static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
3667{
3668 int i,k=0;
3669 int code, next_code[16], sizes[17];
3670
3671 // DEFLATE spec for generating codes
3672 memset(sizes, 0, sizeof(sizes));
3673 memset(z->fast, 0, sizeof(z->fast));
3674 for (i=0; i < num; ++i)
3675 ++sizes[sizelist[i]];
3676 sizes[0] = 0;
3677 for (i=1; i < 16; ++i)
3678 if (sizes[i] > (1 << i))
3679 return stbi__err("bad sizes", "Corrupt PNG");
3680 code = 0;
3681 for (i=1; i < 16; ++i) {
3682 next_code[i] = code;
3683 z->firstcode[i] = (stbi__uint16) code;
3684 z->firstsymbol[i] = (stbi__uint16) k;
3685 code = (code + sizes[i]);
3686 if (sizes[i])
3687 if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
3688 z->maxcode[i] = code << (16-i); // preshift for inner loop
3689 code <<= 1;
3690 k += sizes[i];
3691 }
3692 z->maxcode[16] = 0x10000; // sentinel
3693 for (i=0; i < num; ++i) {
3694 int s = sizelist[i];
3695 if (s) {
3696 int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3697 stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
3698 z->size [c] = (stbi_uc ) s;
3699 z->value[c] = (stbi__uint16) i;
3700 if (s <= STBI__ZFAST_BITS) {
3701 int j = stbi__bit_reverse(next_code[s],s);
3702 while (j < (1 << STBI__ZFAST_BITS)) {
3703 z->fast[j] = fastv;
3704 j += (1 << s);
3705 }
3706 }
3707 ++next_code[s];
3708 }
3709 }
3710 return 1;
3711}
3712
3713// zlib-from-memory implementation for PNG reading
3714// because PNG allows splitting the zlib stream arbitrarily,
3715// and it's annoying structurally to have PNG call ZLIB call PNG,
3716// we require PNG read all the IDATs and combine them into a single
3717// memory buffer
3718
3719typedef struct
3720{
3721 stbi_uc *zbuffer, *zbuffer_end;
3722 int num_bits;
3723 stbi__uint32 code_buffer;
3724
3725 char *zout;
3726 char *zout_start;
3727 char *zout_end;
3728 int z_expandable;
3729
3730 stbi__zhuffman z_length, z_distance;
3731} stbi__zbuf;
3732
3733stbi_inline static int stbi__zeof(stbi__zbuf *z)
3734{
3735 return (z->zbuffer >= z->zbuffer_end);
3736}
3737
3738stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3739{
3740 return stbi__zeof(z) ? 0 : *z->zbuffer++;
3741}
3742
3743static void stbi__fill_bits(stbi__zbuf *z)
3744{
3745 do {
3746 if (z->code_buffer >= (1U << z->num_bits)) {
3747 z->zbuffer = z->zbuffer_end; /* treat this as EOF so we fail. */
3748 return;
3749 }
3750 z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
3751 z->num_bits += 8;
3752 } while (z->num_bits <= 24);
3753}
3754
3755stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3756{
3757 unsigned int k;
3758 if (z->num_bits < n) stbi__fill_bits(z);
3759 k = z->code_buffer & ((1 << n) - 1);
3760 z->code_buffer >>= n;
3761 z->num_bits -= n;
3762 return k;
3763}
3764
3765static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3766{
3767 int b,s,k;
3768 // not resolved by fast table, so compute it the slow way
3769 // use jpeg approach, which requires MSbits at top
3770 k = stbi__bit_reverse(a->code_buffer, 16);
3771 for (s=STBI__ZFAST_BITS+1; ; ++s)
3772 if (k < z->maxcode[s])
3773 break;
3774 if (s >= 16) return -1; // invalid code!
3775 // code size is s, so:
3776 b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
3777 if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere!
3778 if (z->size[b] != s) return -1; // was originally an assert, but report failure instead.
3779 a->code_buffer >>= s;
3780 a->num_bits -= s;
3781 return z->value[b];
3782}
3783
3784stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3785{
3786 int b,s;
3787 if (a->num_bits < 16) {
3788 if (stbi__zeof(a)) {
3789 return -1; /* report error for unexpected end of data. */
3790 }
3791 stbi__fill_bits(a);
3792 }
3793 b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
3794 if (b) {
3795 s = b >> 9;
3796 a->code_buffer >>= s;
3797 a->num_bits -= s;
3798 return b & 511;
3799 }
3800 return stbi__zhuffman_decode_slowpath(a, z);
3801}
3802
3803static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes
3804{
3805 char *q;
3806 unsigned int cur, limit, old_limit;
3807 z->zout = zout;
3808 if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
3809 cur = (unsigned int) (z->zout - z->zout_start);
3810 limit = old_limit = (unsigned) (z->zout_end - z->zout_start);
3811 if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory");
3812 while (cur + n > limit) {
3813 if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory");
3814 limit *= 2;
3815 }
3816 q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
3817 STBI_NOTUSED(old_limit);
3818 if (q == NULL) return stbi__err("outofmem", "Out of memory");
3819 z->zout_start = q;
3820 z->zout = q + cur;
3821 z->zout_end = q + limit;
3822 return 1;
3823}
3824
3825static const int stbi__zlength_base[31] = {
3826 3,4,5,6,7,8,9,10,11,13,
3827 15,17,19,23,27,31,35,43,51,59,
3828 67,83,99,115,131,163,195,227,258,0,0 };
3829
3830static const int stbi__zlength_extra[31]=
3831{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
3832
3833static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
3834257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
3835
3836static const int stbi__zdist_extra[32] =
3837{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
3838
3839static int stbi__parse_huffman_block(stbi__zbuf *a)
3840{
3841 char *zout = a->zout;
3842 for(;;) {
3843 int z = stbi__zhuffman_decode(a, &a->z_length);
3844 if (z < 256) {
3845 if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
3846 if (zout >= a->zout_end) {
3847 if (!stbi__zexpand(a, zout, 1)) return 0;
3848 zout = a->zout;
3849 }
3850 *zout++ = (char) z;
3851 } else {
3852 stbi_uc *p;
3853 int len,dist;
3854 if (z == 256) {
3855 a->zout = zout;
3856 return 1;
3857 }
3858 if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data
3859 z -= 257;
3860 len = stbi__zlength_base[z];
3861 if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
3862 z = stbi__zhuffman_decode(a, &a->z_distance);
3863 if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data
3864 dist = stbi__zdist_base[z];
3865 if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
3866 if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
3867 if (zout + len > a->zout_end) {
3868 if (!stbi__zexpand(a, zout, len)) return 0;
3869 zout = a->zout;
3870 }
3871 p = (stbi_uc *) (zout - dist);
3872 if (dist == 1) { // run of one byte; common in images.
3873 stbi_uc v = *p;
3874 if (len) { do *zout++ = v; while (--len); }
3875 } else {
3876 if (len) { do *zout++ = *p++; while (--len); }
3877 }
3878 }
3879 }
3880}
3881
3882static int stbi__compute_huffman_codes(stbi__zbuf *a)
3883{
3884 static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
3885 stbi__zhuffman z_codelength;
3886 stbi_uc lencodes[286+32+137];//padding for maximum single op
3887 stbi_uc codelength_sizes[19];
3888 int i,n;
3889
3890 int hlit = stbi__zreceive(a,5) + 257;
3891 int hdist = stbi__zreceive(a,5) + 1;
3892 int hclen = stbi__zreceive(a,4) + 4;
3893 int ntot = hlit + hdist;
3894
3895 memset(codelength_sizes, 0, sizeof(codelength_sizes));
3896 for (i=0; i < hclen; ++i) {
3897 int s = stbi__zreceive(a,3);
3898 codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
3899 }
3900 if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
3901
3902 n = 0;
3903 while (n < ntot) {
3904 int c = stbi__zhuffman_decode(a, &z_codelength);
3905 if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
3906 if (c < 16)
3907 lencodes[n++] = (stbi_uc) c;
3908 else {
3909 stbi_uc fill = 0;
3910 if (c == 16) {
3911 c = stbi__zreceive(a,2)+3;
3912 if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
3913 fill = lencodes[n-1];
3914 } else if (c == 17) {
3915 c = stbi__zreceive(a,3)+3;
3916 } else if (c == 18) {
3917 c = stbi__zreceive(a,7)+11;
3918 } else {
3919 return stbi__err("bad codelengths", "Corrupt PNG");
3920 }
3921 if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
3922 memset(lencodes+n, fill, c);
3923 n += c;
3924 }
3925 }
3926 if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
3927 if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
3928 if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
3929 return 1;
3930}
3931
3932static int stbi__parse_uncompressed_block(stbi__zbuf *a)
3933{
3934 stbi_uc header[4];
3935 int len,nlen,k;
3936 if (a->num_bits & 7)
3937 stbi__zreceive(a, a->num_bits & 7); // discard
3938 // drain the bit-packed data into header
3939 k = 0;
3940 while (a->num_bits > 0) {
3941 header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
3942 a->code_buffer >>= 8;
3943 a->num_bits -= 8;
3944 }
3945 if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG");
3946 // now fill header the normal way
3947 while (k < 4)
3948 header[k++] = stbi__zget8(a);
3949 len = header[1] * 256 + header[0];
3950 nlen = header[3] * 256 + header[2];
3951 if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
3952 if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
3953 if (a->zout + len > a->zout_end)
3954 if (!stbi__zexpand(a, a->zout, len)) return 0;
3955 memcpy(a->zout, a->zbuffer, len);
3956 a->zbuffer += len;
3957 a->zout += len;
3958 return 1;
3959}
3960
3961static int stbi__parse_zlib_header(stbi__zbuf *a)
3962{
3963 int cmf = stbi__zget8(a);
3964 int cm = cmf & 15;
3965 /* int cinfo = cmf >> 4; */
3966 int flg = stbi__zget8(a);
3967 if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
3968 if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
3969 if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
3970 if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
3971 // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
3972 return 1;
3973}
3974
3975static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] =
3976{
3977 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
3978 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
3979 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
3980 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
3981 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
3982 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
3983 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
3984 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
3985 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
3986};
3987static const stbi_uc stbi__zdefault_distance[32] =
3988{
3989 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
3990};
3991/*
3992Init algorithm:
3993{
3994 int i; // use <= to match clearly with spec
3995 for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8;
3996 for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9;
3997 for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7;
3998 for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8;
3999
4000 for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5;
4001}
4002*/
4003
4004static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4005{
4006 int final, type;
4007 if (parse_header)
4008 if (!stbi__parse_zlib_header(a)) return 0;
4009 a->num_bits = 0;
4010 a->code_buffer = 0;
4011 do {
4012 final = stbi__zreceive(a,1);
4013 type = stbi__zreceive(a,2);
4014 if (type == 0) {
4015 if (!stbi__parse_uncompressed_block(a)) return 0;
4016 } else if (type == 3) {
4017 return 0;
4018 } else {
4019 if (type == 1) {
4020 // use fixed code lengths
4021 if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , STBI__ZNSYMS)) return 0;
4022 if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
4023 } else {
4024 if (!stbi__compute_huffman_codes(a)) return 0;
4025 }
4026 if (!stbi__parse_huffman_block(a)) return 0;
4027 }
4028 } while (!final);
4029 return 1;
4030}
4031
4032static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
4033{
4034 a->zout_start = obuf;
4035 a->zout = obuf;
4036 a->zout_end = obuf + olen;
4037 a->z_expandable = exp;
4038
4039 return stbi__parse_zlib(a, parse_header);
4040}
4041
4042STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
4043{
4044 stbi__zbuf a;
4045 char *p = (char *) stbi__malloc(initial_size);
4046 if (p == NULL) return NULL;
4047 a.zbuffer = (stbi_uc *) buffer;
4048 a.zbuffer_end = (stbi_uc *) buffer + len;
4049 if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4050 if (outlen) *outlen = (int) (a.zout - a.zout_start);
4051 return a.zout_start;
4052 } else {
4053 STBI_FREE(a.zout_start);
4054 return NULL;
4055 }
4056}
4057
4058STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4059{
4060 return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4061}
4062
4063STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
4064{
4065 stbi__zbuf a;
4066 char *p = (char *) stbi__malloc(initial_size);
4067 if (p == NULL) return NULL;
4068 a.zbuffer = (stbi_uc *) buffer;
4069 a.zbuffer_end = (stbi_uc *) buffer + len;
4070 if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4071 if (outlen) *outlen = (int) (a.zout - a.zout_start);
4072 return a.zout_start;
4073 } else {
4074 STBI_FREE(a.zout_start);
4075 return NULL;
4076 }
4077}
4078
4079STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
4080{
4081 stbi__zbuf a;
4082 a.zbuffer = (stbi_uc *) ibuffer;
4083 a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4084 if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4085 return (int) (a.zout - a.zout_start);
4086 else
4087 return -1;
4088}
4089
4090STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
4091{
4092 stbi__zbuf a;
4093 char *p = (char *) stbi__malloc(16384);
4094 if (p == NULL) return NULL;
4095 a.zbuffer = (stbi_uc *) buffer;
4096 a.zbuffer_end = (stbi_uc *) buffer+len;
4097 if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4098 if (outlen) *outlen = (int) (a.zout - a.zout_start);
4099 return a.zout_start;
4100 } else {
4101 STBI_FREE(a.zout_start);
4102 return NULL;
4103 }
4104}
4105
4106STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
4107{
4108 stbi__zbuf a;
4109 a.zbuffer = (stbi_uc *) ibuffer;
4110 a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4111 if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4112 return (int) (a.zout - a.zout_start);
4113 else
4114 return -1;
4115}
4116#endif
4117
4118// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
4119// simple implementation
4120// - only 8-bit samples
4121// - no CRC checking
4122// - allocates lots of intermediate memory
4123// - avoids problem of streaming data between subsystems
4124// - avoids explicit window management
4125// performance
4126// - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4127
4128#ifndef STBI_NO_PNG
4129typedef struct
4130{
4131 stbi__uint32 length;
4132 stbi__uint32 type;
4133} stbi__pngchunk;
4134
4135static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4136{
4137 stbi__pngchunk c;
4138 c.length = stbi__get32be(s);
4139 c.type = stbi__get32be(s);
4140 return c;
4141}
4142
4143static int stbi__check_png_header(stbi__context *s)
4144{
4145 static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
4146 int i;
4147 for (i=0; i < 8; ++i)
4148 if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
4149 return 1;
4150}
4151
4152typedef struct
4153{
4154 stbi__context *s;
4155 stbi_uc *idata, *expanded, *out;
4156 int depth;
4157} stbi__png;
4158
4159
4160enum {
4161 STBI__F_none=0,
4162 STBI__F_sub=1,
4163 STBI__F_up=2,
4164 STBI__F_avg=3,
4165 STBI__F_paeth=4,
4166 // synthetic filters used for first scanline to avoid needing a dummy row of 0s
4167 STBI__F_avg_first,
4168 STBI__F_paeth_first
4169};
4170
4171static stbi_uc first_row_filter[5] =
4172{
4173 STBI__F_none,
4174 STBI__F_sub,
4175 STBI__F_none,
4176 STBI__F_avg_first,
4177 STBI__F_paeth_first
4178};
4179
4180static int stbi__paeth(int a, int b, int c)
4181{
4182 int p = a + b - c;
4183 int pa = abs(p-a);
4184 int pb = abs(p-b);
4185 int pc = abs(p-c);
4186 if (pa <= pb && pa <= pc) return a;
4187 if (pb <= pc) return b;
4188 return c;
4189}
4190
4191static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
4192
4193// create the png data from post-deflated data
4194static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
4195{
4196 int bytes = (depth == 16? 2 : 1);
4197 stbi__context *s = a->s;
4198 stbi__uint32 i,j,stride = x*out_n*bytes;
4199 stbi__uint32 img_len, img_width_bytes;
4200 int k;
4201 int img_n = s->img_n; // copy it into a local for later
4202
4203 int output_bytes = out_n*bytes;
4204 int filter_bytes = img_n*bytes;
4205 int width = x;
4206
4207 STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
4208 a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
4209 if (!a->out) return stbi__err("outofmem", "Out of memory");
4210
4211 if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
4212 img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4213 img_len = (img_width_bytes + 1) * y;
4214
4215 // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
4216 // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
4217 // so just check for raw_len < img_len always.
4218 if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
4219
4220 for (j=0; j < y; ++j) {
4221 stbi_uc *cur = a->out + stride*j;
4222 stbi_uc *prior;
4223 int filter = *raw++;
4224
4225 if (filter > 4)
4226 return stbi__err("invalid filter","Corrupt PNG");
4227
4228 if (depth < 8) {
4229 if (img_width_bytes > x) return stbi__err("invalid width","Corrupt PNG");
4230 cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
4231 filter_bytes = 1;
4232 width = img_width_bytes;
4233 }
4234 prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
4235
4236 // if first row, use special filter that doesn't sample previous row
4237 if (j == 0) filter = first_row_filter[filter];
4238
4239 // handle first byte explicitly
4240 for (k=0; k < filter_bytes; ++k) {
4241 switch (filter) {
4242 case STBI__F_none : cur[k] = raw[k]; break;
4243 case STBI__F_sub : cur[k] = raw[k]; break;
4244 case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4245 case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
4246 case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
4247 case STBI__F_avg_first : cur[k] = raw[k]; break;
4248 case STBI__F_paeth_first: cur[k] = raw[k]; break;
4249 }
4250 }
4251
4252 if (depth == 8) {
4253 if (img_n != out_n)
4254 cur[img_n] = 255; // first pixel
4255 raw += img_n;
4256 cur += out_n;
4257 prior += out_n;
4258 } else if (depth == 16) {
4259 if (img_n != out_n) {
4260 cur[filter_bytes] = 255; // first pixel top byte
4261 cur[filter_bytes+1] = 255; // first pixel bottom byte
4262 }
4263 raw += filter_bytes;
4264 cur += output_bytes;
4265 prior += output_bytes;
4266 } else {
4267 raw += 1;
4268 cur += 1;
4269 prior += 1;
4270 }
4271
4272 // this is a little gross, so that we don't switch per-pixel or per-component
4273 if (depth < 8 || img_n == out_n) {
4274 int nk = (width - 1)*filter_bytes;
4275 #define STBI__CASE(f) \
4276 case f: \
4277 for (k=0; k < nk; ++k)
4278 switch (filter) {
4279 // "none" filter turns into a memcpy here; make that explicit.
4280 case STBI__F_none: memcpy(cur, raw, nk); break;
4281 STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
4282 STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4283 STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
4284 STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
4285 STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
4286 STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
4287 }
4288 #undef STBI__CASE
4289 raw += nk;
4290 } else {
4291 STBI_ASSERT(img_n+1 == out_n);
4292 #define STBI__CASE(f) \
4293 case f: \
4294 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
4295 for (k=0; k < filter_bytes; ++k)
4296 switch (filter) {
4297 STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break;
4298 STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
4299 STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4300 STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
4301 STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
4302 STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
4303 STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
4304 }
4305 #undef STBI__CASE
4306
4307 // the loop above sets the high byte of the pixels' alpha, but for
4308 // 16 bit png files we also need the low byte set. we'll do that here.
4309 if (depth == 16) {
4310 cur = a->out + stride*j; // start at the beginning of the row again
4311 for (i=0; i < x; ++i,cur+=output_bytes) {
4312 cur[filter_bytes+1] = 255;
4313 }
4314 }
4315 }
4316 }
4317
4318 // we make a separate pass to expand bits to pixels; for performance,
4319 // this could run two scanlines behind the above code, so it won't
4320 // intefere with filtering but will still be in the cache.
4321 if (depth < 8) {
4322 for (j=0; j < y; ++j) {
4323 stbi_uc *cur = a->out + stride*j;
4324 stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes;
4325 // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4326 // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4327 stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4328
4329 // note that the final byte might overshoot and write more data than desired.
4330 // we can allocate enough data that this never writes out of memory, but it
4331 // could also overwrite the next scanline. can it overwrite non-empty data
4332 // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4333 // so we need to explicitly clamp the final ones
4334
4335 if (depth == 4) {
4336 for (k=x*img_n; k >= 2; k-=2, ++in) {
4337 *cur++ = scale * ((*in >> 4) );
4338 *cur++ = scale * ((*in ) & 0x0f);
4339 }
4340 if (k > 0) *cur++ = scale * ((*in >> 4) );
4341 } else if (depth == 2) {
4342 for (k=x*img_n; k >= 4; k-=4, ++in) {
4343 *cur++ = scale * ((*in >> 6) );
4344 *cur++ = scale * ((*in >> 4) & 0x03);
4345 *cur++ = scale * ((*in >> 2) & 0x03);
4346 *cur++ = scale * ((*in ) & 0x03);
4347 }
4348 if (k > 0) *cur++ = scale * ((*in >> 6) );
4349 if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4350 if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4351 } else if (depth == 1) {
4352 for (k=x*img_n; k >= 8; k-=8, ++in) {
4353 *cur++ = scale * ((*in >> 7) );
4354 *cur++ = scale * ((*in >> 6) & 0x01);
4355 *cur++ = scale * ((*in >> 5) & 0x01);
4356 *cur++ = scale * ((*in >> 4) & 0x01);
4357 *cur++ = scale * ((*in >> 3) & 0x01);
4358 *cur++ = scale * ((*in >> 2) & 0x01);
4359 *cur++ = scale * ((*in >> 1) & 0x01);
4360 *cur++ = scale * ((*in ) & 0x01);
4361 }
4362 if (k > 0) *cur++ = scale * ((*in >> 7) );
4363 if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4364 if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4365 if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4366 if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4367 if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4368 if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4369 }
4370 if (img_n != out_n) {
4371 int q;
4372 // insert alpha = 255
4373 cur = a->out + stride*j;
4374 if (img_n == 1) {
4375 for (q=x-1; q >= 0; --q) {
4376 cur[q*2+1] = 255;
4377 cur[q*2+0] = cur[q];
4378 }
4379 } else {
4380 STBI_ASSERT(img_n == 3);
4381 for (q=x-1; q >= 0; --q) {
4382 cur[q*4+3] = 255;
4383 cur[q*4+2] = cur[q*3+2];
4384 cur[q*4+1] = cur[q*3+1];
4385 cur[q*4+0] = cur[q*3+0];
4386 }
4387 }
4388 }
4389 }
4390 } else if (depth == 16) {
4391 // force the image data from big-endian to platform-native.
4392 // this is done in a separate pass due to the decoding relying
4393 // on the data being untouched, but could probably be done
4394 // per-line during decode if care is taken.
4395 stbi_uc *cur = a->out;
4396 stbi__uint16 *cur16 = (stbi__uint16*)cur;
4397
4398 for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
4399 *cur16 = (cur[0] << 8) | cur[1];
4400 }
4401 }
4402
4403 return 1;
4404}
4405
4406static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4407{
4408 int bytes = (depth == 16 ? 2 : 1);
4409 int out_bytes = out_n * bytes;
4410 stbi_uc *final;
4411 int p;
4412 if (!interlaced)
4413 return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4414
4415 // de-interlacing
4416 final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4417 if (!final) return stbi__err("outofmem", "Out of memory");
4418 for (p=0; p < 7; ++p) {
4419 int xorig[] = { 0,4,0,2,0,1,0 };
4420 int yorig[] = { 0,0,4,0,2,0,1 };
4421 int xspc[] = { 8,8,4,4,2,2,1 };
4422 int yspc[] = { 8,8,8,4,4,2,2 };
4423 int i,j,x,y;
4424 // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4425 x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4426 y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4427 if (x && y) {
4428 stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4429 if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4430 STBI_FREE(final);
4431 return 0;
4432 }
4433 for (j=0; j < y; ++j) {
4434 for (i=0; i < x; ++i) {
4435 int out_y = j*yspc[p]+yorig[p];
4436 int out_x = i*xspc[p]+xorig[p];
4437 memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4438 a->out + (j*x+i)*out_bytes, out_bytes);
4439 }
4440 }
4441 STBI_FREE(a->out);
4442 image_data += img_len;
4443 image_data_len -= img_len;
4444 }
4445 }
4446 a->out = final;
4447
4448 return 1;
4449}
4450
4451static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4452{
4453 stbi__context *s = z->s;
4454 stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4455 stbi_uc *p = z->out;
4456
4457 // compute color-based transparency, assuming we've
4458 // already got 255 as the alpha value in the output
4459 STBI_ASSERT(out_n == 2 || out_n == 4);
4460
4461 if (out_n == 2) {
4462 for (i=0; i < pixel_count; ++i) {
4463 p[1] = (p[0] == tc[0] ? 0 : 255);
4464 p += 2;
4465 }
4466 } else {
4467 for (i=0; i < pixel_count; ++i) {
4468 if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4469 p[3] = 0;
4470 p += 4;
4471 }
4472 }
4473 return 1;
4474}
4475
4476static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4477{
4478 stbi__context *s = z->s;
4479 stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4480 stbi__uint16 *p = (stbi__uint16*) z->out;
4481
4482 // compute color-based transparency, assuming we've
4483 // already got 65535 as the alpha value in the output
4484 STBI_ASSERT(out_n == 2 || out_n == 4);
4485
4486 if (out_n == 2) {
4487 for (i = 0; i < pixel_count; ++i) {
4488 p[1] = (p[0] == tc[0] ? 0 : 65535);
4489 p += 2;
4490 }
4491 } else {
4492 for (i = 0; i < pixel_count; ++i) {
4493 if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4494 p[3] = 0;
4495 p += 4;
4496 }
4497 }
4498 return 1;
4499}
4500
4501static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4502{
4503 stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4504 stbi_uc *p, *temp_out, *orig = a->out;
4505
4506 p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4507 if (p == NULL) return stbi__err("outofmem", "Out of memory");
4508
4509 // between here and free(out) below, exitting would leak
4510 temp_out = p;
4511
4512 if (pal_img_n == 3) {
4513 for (i=0; i < pixel_count; ++i) {
4514 int n = orig[i]*4;
4515 p[0] = palette[n ];
4516 p[1] = palette[n+1];
4517 p[2] = palette[n+2];
4518 p += 3;
4519 }
4520 } else {
4521 for (i=0; i < pixel_count; ++i) {
4522 int n = orig[i]*4;
4523 p[0] = palette[n ];
4524 p[1] = palette[n+1];
4525 p[2] = palette[n+2];
4526 p[3] = palette[n+3];
4527 p += 4;
4528 }
4529 }
4530 STBI_FREE(a->out);
4531 a->out = temp_out;
4532
4533 STBI_NOTUSED(len);
4534
4535 return 1;
4536}
4537
4538static int stbi__unpremultiply_on_load_global = 0;
4539static int stbi__de_iphone_flag_global = 0;
4540
4541STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4542{
4543 stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply;
4544}
4545
4546STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4547{
4548 stbi__de_iphone_flag_global = flag_true_if_should_convert;
4549}
4550
4551#ifndef STBI_THREAD_LOCAL
4552#define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global
4553#define stbi__de_iphone_flag stbi__de_iphone_flag_global
4554#else
4555static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set;
4556static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set;
4557
4558STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
4559{
4560 stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
4561 stbi__unpremultiply_on_load_set = 1;
4562}
4563
4564STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert)
4565{
4566 stbi__de_iphone_flag_local = flag_true_if_should_convert;
4567 stbi__de_iphone_flag_set = 1;
4568}
4569
4570#define stbi__unpremultiply_on_load (stbi__unpremultiply_on_load_set \
4571 ? stbi__unpremultiply_on_load_local \
4572 : stbi__unpremultiply_on_load_global)
4573#define stbi__de_iphone_flag (stbi__de_iphone_flag_set \
4574 ? stbi__de_iphone_flag_local \
4575 : stbi__de_iphone_flag_global)
4576#endif // STBI_THREAD_LOCAL
4577
4578static void stbi__de_iphone(stbi__png *z)
4579{
4580 stbi__context *s = z->s;
4581 stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4582 stbi_uc *p = z->out;
4583
4584 if (s->img_out_n == 3) { // convert bgr to rgb
4585 for (i=0; i < pixel_count; ++i) {
4586 stbi_uc t = p[0];
4587 p[0] = p[2];
4588 p[2] = t;
4589 p += 3;
4590 }
4591 } else {
4592 STBI_ASSERT(s->img_out_n == 4);
4593 if (stbi__unpremultiply_on_load) {
4594 // convert bgr to rgb and unpremultiply
4595 for (i=0; i < pixel_count; ++i) {
4596 stbi_uc a = p[3];
4597 stbi_uc t = p[0];
4598 if (a) {
4599 stbi_uc half = a / 2;
4600 p[0] = (p[2] * 255 + half) / a;
4601 p[1] = (p[1] * 255 + half) / a;
4602 p[2] = ( t * 255 + half) / a;
4603 } else {
4604 p[0] = p[2];
4605 p[2] = t;
4606 }
4607 p += 4;
4608 }
4609 } else {
4610 // convert bgr to rgb
4611 for (i=0; i < pixel_count; ++i) {
4612 stbi_uc t = p[0];
4613 p[0] = p[2];
4614 p[2] = t;
4615 p += 4;
4616 }
4617 }
4618 }
4619}
4620
4621#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
4622
4623static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4624{
4625 stbi_uc palette[1024], pal_img_n=0;
4626 stbi_uc has_trans=0, tc[3]={0};
4627 stbi__uint16 tc16[3];
4628 stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
4629 int first=1,k,interlace=0, color=0, is_iphone=0;
4630 stbi__context *s = z->s;
4631
4632 z->expanded = NULL;
4633 z->idata = NULL;
4634 z->out = NULL;
4635
4636 if (!stbi__check_png_header(s)) return 0;
4637
4638 if (scan == STBI__SCAN_type) return 1;
4639
4640 for (;;) {
4641 stbi__pngchunk c = stbi__get_chunk_header(s);
4642 switch (c.type) {
4643 case STBI__PNG_TYPE('C','g','B','I'):
4644 is_iphone = 1;
4645 stbi__skip(s, c.length);
4646 break;
4647 case STBI__PNG_TYPE('I','H','D','R'): {
4648 int comp,filter;
4649 if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
4650 first = 0;
4651 if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
4652 s->img_x = stbi__get32be(s);
4653 s->img_y = stbi__get32be(s);
4654 if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
4655 if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
4656 z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
4657 color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG");
4658 if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG");
4659 if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
4660 comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG");
4661 filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG");
4662 interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
4663 if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
4664 if (!pal_img_n) {
4665 s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4666 if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4667 } else {
4668 // if paletted, then pal_n is our final components, and
4669 // img_n is # components to decompress/filter.
4670 s->img_n = 1;
4671 if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
4672 }
4673 // even with SCAN_header, have to scan to see if we have a tRNS
4674 break;
4675 }
4676
4677 case STBI__PNG_TYPE('P','L','T','E'): {
4678 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4679 if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
4680 pal_len = c.length / 3;
4681 if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
4682 for (i=0; i < pal_len; ++i) {
4683 palette[i*4+0] = stbi__get8(s);
4684 palette[i*4+1] = stbi__get8(s);
4685 palette[i*4+2] = stbi__get8(s);
4686 palette[i*4+3] = 255;
4687 }
4688 break;
4689 }
4690
4691 case STBI__PNG_TYPE('t','R','N','S'): {
4692 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4693 if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
4694 if (pal_img_n) {
4695 if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4696 if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
4697 if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
4698 pal_img_n = 4;
4699 for (i=0; i < c.length; ++i)
4700 palette[i*4+3] = stbi__get8(s);
4701 } else {
4702 if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
4703 if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
4704 has_trans = 1;
4705 // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now.
4706 if (scan == STBI__SCAN_header) { ++s->img_n; return 1; }
4707 if (z->depth == 16) {
4708 for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
4709 } else {
4710 for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
4711 }
4712 }
4713 break;
4714 }
4715
4716 case STBI__PNG_TYPE('I','D','A','T'): {
4717 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4718 if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
4719 if (scan == STBI__SCAN_header) {
4720 // header scan definitely stops at first IDAT
4721 if (pal_img_n)
4722 s->img_n = pal_img_n;
4723 return 1;
4724 }
4725 if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes");
4726 if ((int)(ioff + c.length) < (int)ioff) return 0;
4727 if (ioff + c.length > idata_limit) {
4728 stbi__uint32 idata_limit_old = idata_limit;
4729 stbi_uc *p;
4730 if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4731 while (ioff + c.length > idata_limit)
4732 idata_limit *= 2;
4733 STBI_NOTUSED(idata_limit_old);
4734 p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4735 z->idata = p;
4736 }
4737 if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
4738 ioff += c.length;
4739 break;
4740 }
4741
4742 case STBI__PNG_TYPE('I','E','N','D'): {
4743 stbi__uint32 raw_len, bpl;
4744 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4745 if (scan != STBI__SCAN_load) return 1;
4746 if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
4747 // initial guess for decoded data size to avoid unnecessary reallocs
4748 bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
4749 raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4750 z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
4751 if (z->expanded == NULL) return 0; // zlib should set error
4752 STBI_FREE(z->idata); z->idata = NULL;
4753 if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
4754 s->img_out_n = s->img_n+1;
4755 else
4756 s->img_out_n = s->img_n;
4757 if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
4758 if (has_trans) {
4759 if (z->depth == 16) {
4760 if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
4761 } else {
4762 if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4763 }
4764 }
4765 if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4766 stbi__de_iphone(z);
4767 if (pal_img_n) {
4768 // pal_img_n == 3 or 4
4769 s->img_n = pal_img_n; // record the actual colors we had
4770 s->img_out_n = pal_img_n;
4771 if (req_comp >= 3) s->img_out_n = req_comp;
4772 if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4773 return 0;
4774 } else if (has_trans) {
4775 // non-paletted image with tRNS -> source image has (constant) alpha
4776 ++s->img_n;
4777 }
4778 STBI_FREE(z->expanded); z->expanded = NULL;
4779 // end of PNG chunk, read and skip CRC
4780 stbi__get32be(s);
4781 return 1;
4782 }
4783
4784 default:
4785 // if critical, fail
4786 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4787 if ((c.type & (1 << 29)) == 0) {
4788 #ifndef STBI_NO_FAILURE_STRINGS
4789 // not threadsafe
4790 static char invalid_chunk[] = "XXXX PNG chunk not known";
4791 invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4792 invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4793 invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
4794 invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
4795 #endif
4796 return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4797 }
4798 stbi__skip(s, c.length);
4799 break;
4800 }
4801 // end of PNG chunk, read and skip CRC
4802 stbi__get32be(s);
4803 }
4804}
4805
4806static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
4807{
4808 void *result=NULL;
4809 if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4810 if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4811 if (p->depth <= 8)
4812 ri->bits_per_channel = 8;
4813 else if (p->depth == 16)
4814 ri->bits_per_channel = 16;
4815 else
4816 return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth");
4817 result = p->out;
4818 p->out = NULL;
4819 if (req_comp && req_comp != p->s->img_out_n) {
4820 if (ri->bits_per_channel == 8)
4821 result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4822 else
4823 result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4824 p->s->img_out_n = req_comp;
4825 if (result == NULL) return result;
4826 }
4827 *x = p->s->img_x;
4828 *y = p->s->img_y;
4829 if (n) *n = p->s->img_n;
4830 }
4831 STBI_FREE(p->out); p->out = NULL;
4832 STBI_FREE(p->expanded); p->expanded = NULL;
4833 STBI_FREE(p->idata); p->idata = NULL;
4834
4835 return result;
4836}
4837
4838static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
4839{
4840 stbi__png p;
4841 p.s = s;
4842 return stbi__do_png(&p, x,y,comp,req_comp, ri);
4843}
4844
4845static int stbi__png_test(stbi__context *s)
4846{
4847 int r;
4848 r = stbi__check_png_header(s);
4849 stbi__rewind(s);
4850 return r;
4851}
4852
4853static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
4854{
4855 if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
4856 stbi__rewind( p->s );
4857 return 0;
4858 }
4859 if (x) *x = p->s->img_x;
4860 if (y) *y = p->s->img_y;
4861 if (comp) *comp = p->s->img_n;
4862 return 1;
4863}
4864
4865static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
4866{
4867 stbi__png p;
4868 p.s = s;
4869 return stbi__png_info_raw(&p, x, y, comp);
4870}
4871
4872static int stbi__png_is16(stbi__context *s)
4873{
4874 stbi__png p;
4875 p.s = s;
4876 if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
4877 return 0;
4878 if (p.depth != 16) {
4879 stbi__rewind(p.s);
4880 return 0;
4881 }
4882 return 1;
4883}
4884#endif
4885
4886// Microsoft/Windows BMP image
4887
4888#ifndef STBI_NO_BMP
4889static int stbi__bmp_test_raw(stbi__context *s)
4890{
4891 int r;
4892 int sz;
4893 if (stbi__get8(s) != 'B') return 0;
4894 if (stbi__get8(s) != 'M') return 0;
4895 stbi__get32le(s); // discard filesize
4896 stbi__get16le(s); // discard reserved
4897 stbi__get16le(s); // discard reserved
4898 stbi__get32le(s); // discard data offset
4899 sz = stbi__get32le(s);
4900 r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
4901 return r;
4902}
4903
4904static int stbi__bmp_test(stbi__context *s)
4905{
4906 int r = stbi__bmp_test_raw(s);
4907 stbi__rewind(s);
4908 return r;
4909}
4910
4911
4912// returns 0..31 for the highest set bit
4913static int stbi__high_bit(unsigned int z)
4914{
4915 int n=0;
4916 if (z == 0) return -1;
4917 if (z >= 0x10000) { n += 16; z >>= 16; }
4918 if (z >= 0x00100) { n += 8; z >>= 8; }
4919 if (z >= 0x00010) { n += 4; z >>= 4; }
4920 if (z >= 0x00004) { n += 2; z >>= 2; }
4921 if (z >= 0x00002) { n += 1;/* >>= 1;*/ }
4922 return n;
4923}
4924
4925static int stbi__bitcount(unsigned int a)
4926{
4927 a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
4928 a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
4929 a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
4930 a = (a + (a >> 8)); // max 16 per 8 bits
4931 a = (a + (a >> 16)); // max 32 per 8 bits
4932 return a & 0xff;
4933}
4934
4935// extract an arbitrarily-aligned N-bit value (N=bits)
4936// from v, and then make it 8-bits long and fractionally
4937// extend it to full full range.
4938static int stbi__shiftsigned(unsigned int v, int shift, int bits)
4939{
4940 static unsigned int mul_table[9] = {
4941 0,
4942 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
4943 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
4944 };
4945 static unsigned int shift_table[9] = {
4946 0, 0,0,1,0,2,4,6,0,
4947 };
4948 if (shift < 0)
4949 v <<= -shift;
4950 else
4951 v >>= shift;
4952 STBI_ASSERT(v < 256);
4953 v >>= (8-bits);
4954 STBI_ASSERT(bits >= 0 && bits <= 8);
4955 return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
4956}
4957
4958typedef struct
4959{
4960 int bpp, offset, hsz;
4961 unsigned int mr,mg,mb,ma, all_a;
4962 int extra_read;
4963} stbi__bmp_data;
4964
4965static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress)
4966{
4967 // BI_BITFIELDS specifies masks explicitly, don't override
4968 if (compress == 3)
4969 return 1;
4970
4971 if (compress == 0) {
4972 if (info->bpp == 16) {
4973 info->mr = 31u << 10;
4974 info->mg = 31u << 5;
4975 info->mb = 31u << 0;
4976 } else if (info->bpp == 32) {
4977 info->mr = 0xffu << 16;
4978 info->mg = 0xffu << 8;
4979 info->mb = 0xffu << 0;
4980 info->ma = 0xffu << 24;
4981 info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
4982 } else {
4983 // otherwise, use defaults, which is all-0
4984 info->mr = info->mg = info->mb = info->ma = 0;
4985 }
4986 return 1;
4987 }
4988 return 0; // error
4989}
4990
4991static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
4992{
4993 int hsz;
4994 if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
4995 stbi__get32le(s); // discard filesize
4996 stbi__get16le(s); // discard reserved
4997 stbi__get16le(s); // discard reserved
4998 info->offset = stbi__get32le(s);
4999 info->hsz = hsz = stbi__get32le(s);
5000 info->mr = info->mg = info->mb = info->ma = 0;
5001 info->extra_read = 14;
5002
5003 if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP");
5004
5005 if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5006 if (hsz == 12) {
5007 s->img_x = stbi__get16le(s);
5008 s->img_y = stbi__get16le(s);
5009 } else {
5010 s->img_x = stbi__get32le(s);
5011 s->img_y = stbi__get32le(s);
5012 }
5013 if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
5014 info->bpp = stbi__get16le(s);
5015 if (hsz != 12) {
5016 int compress = stbi__get32le(s);
5017 if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5018 if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes
5019 if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel
5020 stbi__get32le(s); // discard sizeof
5021 stbi__get32le(s); // discard hres
5022 stbi__get32le(s); // discard vres
5023 stbi__get32le(s); // discard colorsused
5024 stbi__get32le(s); // discard max important
5025 if (hsz == 40 || hsz == 56) {
5026 if (hsz == 56) {
5027 stbi__get32le(s);
5028 stbi__get32le(s);
5029 stbi__get32le(s);
5030 stbi__get32le(s);
5031 }
5032 if (info->bpp == 16 || info->bpp == 32) {
5033 if (compress == 0) {
5034 stbi__bmp_set_mask_defaults(info, compress);
5035 } else if (compress == 3) {
5036 info->mr = stbi__get32le(s);
5037 info->mg = stbi__get32le(s);
5038 info->mb = stbi__get32le(s);
5039 info->extra_read += 12;
5040 // not documented, but generated by photoshop and handled by mspaint
5041 if (info->mr == info->mg && info->mg == info->mb) {
5042 // ?!?!?
5043 return stbi__errpuc("bad BMP", "bad BMP");
5044 }
5045 } else
5046 return stbi__errpuc("bad BMP", "bad BMP");
5047 }
5048 } else {
5049 // V4/V5 header
5050 int i;
5051 if (hsz != 108 && hsz != 124)
5052 return stbi__errpuc("bad BMP", "bad BMP");
5053 info->mr = stbi__get32le(s);
5054 info->mg = stbi__get32le(s);
5055 info->mb = stbi__get32le(s);
5056 info->ma = stbi__get32le(s);
5057 if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs
5058 stbi__bmp_set_mask_defaults(info, compress);
5059 stbi__get32le(s); // discard color space
5060 for (i=0; i < 12; ++i)
5061 stbi__get32le(s); // discard color space parameters
5062 if (hsz == 124) {
5063 stbi__get32le(s); // discard rendering intent
5064 stbi__get32le(s); // discard offset of profile data
5065 stbi__get32le(s); // discard size of profile data
5066 stbi__get32le(s); // discard reserved
5067 }
5068 }
5069 }
5070 return (void *) 1;
5071}
5072
5073
5074static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5075{
5076 stbi_uc *out;
5077 unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
5078 stbi_uc pal[256][4];
5079 int psize=0,i,j,width;
5080 int flip_vertically, pad, target;
5081 stbi__bmp_data info;
5082 STBI_NOTUSED(ri);
5083
5084 info.all_a = 255;
5085 if (stbi__bmp_parse_header(s, &info) == NULL)
5086 return NULL; // error code already set
5087
5088 flip_vertically = ((int) s->img_y) > 0;
5089 s->img_y = abs((int) s->img_y);
5090
5091 if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
5092 if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
5093
5094 mr = info.mr;
5095 mg = info.mg;
5096 mb = info.mb;
5097 ma = info.ma;
5098 all_a = info.all_a;
5099
5100 if (info.hsz == 12) {
5101 if (info.bpp < 24)
5102 psize = (info.offset - info.extra_read - 24) / 3;
5103 } else {
5104 if (info.bpp < 16)
5105 psize = (info.offset - info.extra_read - info.hsz) >> 2;
5106 }
5107 if (psize == 0) {
5108 // accept some number of extra bytes after the header, but if the offset points either to before
5109 // the header ends or implies a large amount of extra data, reject the file as malformed
5110 int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original);
5111 int header_limit = 1024; // max we actually read is below 256 bytes currently.
5112 int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size.
5113 if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) {
5114 return stbi__errpuc("bad header", "Corrupt BMP");
5115 }
5116 // we established that bytes_read_so_far is positive and sensible.
5117 // the first half of this test rejects offsets that are either too small positives, or
5118 // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn
5119 // ensures the number computed in the second half of the test can't overflow.
5120 if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) {
5121 return stbi__errpuc("bad offset", "Corrupt BMP");
5122 } else {
5123 stbi__skip(s, info.offset - bytes_read_so_far);
5124 }
5125 }
5126
5127 if (info.bpp == 24 && ma == 0xff000000)
5128 s->img_n = 3;
5129 else
5130 s->img_n = ma ? 4 : 3;
5131 if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5132 target = req_comp;
5133 else
5134 target = s->img_n; // if they want monochrome, we'll post-convert
5135
5136 // sanity-check size
5137 if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5138 return stbi__errpuc("too large", "Corrupt BMP");
5139
5140 out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5141 if (!out) return stbi__errpuc("outofmem", "Out of memory");
5142 if (info.bpp < 16) {
5143 int z=0;
5144 if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
5145 for (i=0; i < psize; ++i) {
5146 pal[i][2] = stbi__get8(s);
5147 pal[i][1] = stbi__get8(s);
5148 pal[i][0] = stbi__get8(s);
5149 if (info.hsz != 12) stbi__get8(s);
5150 pal[i][3] = 255;
5151 }
5152 stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
5153 if (info.bpp == 1) width = (s->img_x + 7) >> 3;
5154 else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
5155 else if (info.bpp == 8) width = s->img_x;
5156 else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
5157 pad = (-width)&3;
5158 if (info.bpp == 1) {
5159 for (j=0; j < (int) s->img_y; ++j) {
5160 int bit_offset = 7, v = stbi__get8(s);
5161 for (i=0; i < (int) s->img_x; ++i) {
5162 int color = (v>>bit_offset)&0x1;
5163 out[z++] = pal[color][0];
5164 out[z++] = pal[color][1];
5165 out[z++] = pal[color][2];
5166 if (target == 4) out[z++] = 255;
5167 if (i+1 == (int) s->img_x) break;
5168 if((--bit_offset) < 0) {
5169 bit_offset = 7;
5170 v = stbi__get8(s);
5171 }
5172 }
5173 stbi__skip(s, pad);
5174 }
5175 } else {
5176 for (j=0; j < (int) s->img_y; ++j) {
5177 for (i=0; i < (int) s->img_x; i += 2) {
5178 int v=stbi__get8(s),v2=0;
5179 if (info.bpp == 4) {
5180 v2 = v & 15;
5181 v >>= 4;
5182 }
5183 out[z++] = pal[v][0];
5184 out[z++] = pal[v][1];
5185 out[z++] = pal[v][2];
5186 if (target == 4) out[z++] = 255;
5187 if (i+1 == (int) s->img_x) break;
5188 v = (info.bpp == 8) ? stbi__get8(s) : v2;
5189 out[z++] = pal[v][0];
5190 out[z++] = pal[v][1];
5191 out[z++] = pal[v][2];
5192 if (target == 4) out[z++] = 255;
5193 }
5194 stbi__skip(s, pad);
5195 }
5196 }
5197 } else {
5198 int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
5199 int z = 0;
5200 int easy=0;
5201 stbi__skip(s, info.offset - info.extra_read - info.hsz);
5202 if (info.bpp == 24) width = 3 * s->img_x;
5203 else if (info.bpp == 16) width = 2*s->img_x;
5204 else /* bpp = 32 and pad = 0 */ width=0;
5205 pad = (-width) & 3;
5206 if (info.bpp == 24) {
5207 easy = 1;
5208 } else if (info.bpp == 32) {
5209 if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
5210 easy = 2;
5211 }
5212 if (!easy) {
5213 if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5214 // right shift amt to put high bit in position #7
5215 rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
5216 gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
5217 bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
5218 ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
5219 if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5220 }
5221 for (j=0; j < (int) s->img_y; ++j) {
5222 if (easy) {
5223 for (i=0; i < (int) s->img_x; ++i) {
5224 unsigned char a;
5225 out[z+2] = stbi__get8(s);
5226 out[z+1] = stbi__get8(s);
5227 out[z+0] = stbi__get8(s);
5228 z += 3;
5229 a = (easy == 2 ? stbi__get8(s) : 255);
5230 all_a |= a;
5231 if (target == 4) out[z++] = a;
5232 }
5233 } else {
5234 int bpp = info.bpp;
5235 for (i=0; i < (int) s->img_x; ++i) {
5236 stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
5237 unsigned int a;
5238 out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
5239 out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
5240 out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
5241 a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
5242 all_a |= a;
5243 if (target == 4) out[z++] = STBI__BYTECAST(a);
5244 }
5245 }
5246 stbi__skip(s, pad);
5247 }
5248 }
5249
5250 // if alpha channel is all 0s, replace with all 255s
5251 if (target == 4 && all_a == 0)
5252 for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
5253 out[i] = 255;
5254
5255 if (flip_vertically) {
5256 stbi_uc t;
5257 for (j=0; j < (int) s->img_y>>1; ++j) {
5258 stbi_uc *p1 = out + j *s->img_x*target;
5259 stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
5260 for (i=0; i < (int) s->img_x*target; ++i) {
5261 t = p1[i]; p1[i] = p2[i]; p2[i] = t;
5262 }
5263 }
5264 }
5265
5266 if (req_comp && req_comp != target) {
5267 out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
5268 if (out == NULL) return out; // stbi__convert_format frees input on failure
5269 }
5270
5271 *x = s->img_x;
5272 *y = s->img_y;
5273 if (comp) *comp = s->img_n;
5274 return out;
5275}
5276#endif
5277
5278// Targa Truevision - TGA
5279// by Jonathan Dummer
5280#ifndef STBI_NO_TGA
5281// returns STBI_rgb or whatever, 0 on error
5282static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
5283{
5284 // only RGB or RGBA (incl. 16bit) or grey allowed
5285 if (is_rgb16) *is_rgb16 = 0;
5286 switch(bits_per_pixel) {
5287 case 8: return STBI_grey;
5288 case 16: if(is_grey) return STBI_grey_alpha;
5289 // fallthrough
5290 case 15: if(is_rgb16) *is_rgb16 = 1;
5291 return STBI_rgb;
5292 case 24: // fallthrough
5293 case 32: return bits_per_pixel/8;
5294 default: return 0;
5295 }
5296}
5297
5298static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
5299{
5300 int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
5301 int sz, tga_colormap_type;
5302 stbi__get8(s); // discard Offset
5303 tga_colormap_type = stbi__get8(s); // colormap type
5304 if( tga_colormap_type > 1 ) {
5305 stbi__rewind(s);
5306 return 0; // only RGB or indexed allowed
5307 }
5308 tga_image_type = stbi__get8(s); // image type
5309 if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
5310 if (tga_image_type != 1 && tga_image_type != 9) {
5311 stbi__rewind(s);
5312 return 0;
5313 }
5314 stbi__skip(s,4); // skip index of first colormap entry and number of entries
5315 sz = stbi__get8(s); // check bits per palette color entry
5316 if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
5317 stbi__rewind(s);
5318 return 0;
5319 }
5320 stbi__skip(s,4); // skip image x and y origin
5321 tga_colormap_bpp = sz;
5322 } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
5323 if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
5324 stbi__rewind(s);
5325 return 0; // only RGB or grey allowed, +/- RLE
5326 }
5327 stbi__skip(s,9); // skip colormap specification and image x/y origin
5328 tga_colormap_bpp = 0;
5329 }
5330 tga_w = stbi__get16le(s);
5331 if( tga_w < 1 ) {
5332 stbi__rewind(s);
5333 return 0; // test width
5334 }
5335 tga_h = stbi__get16le(s);
5336 if( tga_h < 1 ) {
5337 stbi__rewind(s);
5338 return 0; // test height
5339 }
5340 tga_bits_per_pixel = stbi__get8(s); // bits per pixel
5341 stbi__get8(s); // ignore alpha bits
5342 if (tga_colormap_bpp != 0) {
5343 if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
5344 // when using a colormap, tga_bits_per_pixel is the size of the indexes
5345 // I don't think anything but 8 or 16bit indexes makes sense
5346 stbi__rewind(s);
5347 return 0;
5348 }
5349 tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
5350 } else {
5351 tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
5352 }
5353 if(!tga_comp) {
5354 stbi__rewind(s);
5355 return 0;
5356 }
5357 if (x) *x = tga_w;
5358 if (y) *y = tga_h;
5359 if (comp) *comp = tga_comp;
5360 return 1; // seems to have passed everything
5361}
5362
5363static int stbi__tga_test(stbi__context *s)
5364{
5365 int res = 0;
5366 int sz, tga_color_type;
5367 stbi__get8(s); // discard Offset
5368 tga_color_type = stbi__get8(s); // color type
5369 if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed
5370 sz = stbi__get8(s); // image type
5371 if ( tga_color_type == 1 ) { // colormapped (paletted) image
5372 if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
5373 stbi__skip(s,4); // skip index of first colormap entry and number of entries
5374 sz = stbi__get8(s); // check bits per palette color entry
5375 if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5376 stbi__skip(s,4); // skip image x and y origin
5377 } else { // "normal" image w/o colormap
5378 if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
5379 stbi__skip(s,9); // skip colormap specification and image x/y origin
5380 }
5381 if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width
5382 if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height
5383 sz = stbi__get8(s); // bits per pixel
5384 if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
5385 if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5386
5387 res = 1; // if we got this far, everything's good and we can return 1 instead of 0
5388
5389errorEnd:
5390 stbi__rewind(s);
5391 return res;
5392}
5393
5394// read 16bit value and convert to 24bit RGB
5395static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
5396{
5397 stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
5398 stbi__uint16 fiveBitMask = 31;
5399 // we have 3 channels with 5bits each
5400 int r = (px >> 10) & fiveBitMask;
5401 int g = (px >> 5) & fiveBitMask;
5402 int b = px & fiveBitMask;
5403 // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
5404 out[0] = (stbi_uc)((r * 255)/31);
5405 out[1] = (stbi_uc)((g * 255)/31);
5406 out[2] = (stbi_uc)((b * 255)/31);
5407
5408 // some people claim that the most significant bit might be used for alpha
5409 // (possibly if an alpha-bit is set in the "image descriptor byte")
5410 // but that only made 16bit test images completely translucent..
5411 // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
5412}
5413
5414static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5415{
5416 // read in the TGA header stuff
5417 int tga_offset = stbi__get8(s);
5418 int tga_indexed = stbi__get8(s);
5419 int tga_image_type = stbi__get8(s);
5420 int tga_is_RLE = 0;
5421 int tga_palette_start = stbi__get16le(s);
5422 int tga_palette_len = stbi__get16le(s);
5423 int tga_palette_bits = stbi__get8(s);
5424 int tga_x_origin = stbi__get16le(s);
5425 int tga_y_origin = stbi__get16le(s);
5426 int tga_width = stbi__get16le(s);
5427 int tga_height = stbi__get16le(s);
5428 int tga_bits_per_pixel = stbi__get8(s);
5429 int tga_comp, tga_rgb16=0;
5430 int tga_inverted = stbi__get8(s);
5431 // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
5432 // image data
5433 unsigned char *tga_data;
5434 unsigned char *tga_palette = NULL;
5435 int i, j;
5436 unsigned char raw_data[4] = {0};
5437 int RLE_count = 0;
5438 int RLE_repeating = 0;
5439 int read_next_pixel = 1;
5440 STBI_NOTUSED(ri);
5441 STBI_NOTUSED(tga_x_origin); // @TODO
5442 STBI_NOTUSED(tga_y_origin); // @TODO
5443
5444 if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
5445 if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
5446
5447 // do a tiny bit of precessing
5448 if ( tga_image_type >= 8 )
5449 {
5450 tga_image_type -= 8;
5451 tga_is_RLE = 1;
5452 }
5453 tga_inverted = 1 - ((tga_inverted >> 5) & 1);
5454
5455 // If I'm paletted, then I'll use the number of bits from the palette
5456 if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
5457 else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
5458
5459 if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
5460 return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
5461
5462 // tga info
5463 *x = tga_width;
5464 *y = tga_height;
5465 if (comp) *comp = tga_comp;
5466
5467 if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
5468 return stbi__errpuc("too large", "Corrupt TGA");
5469
5470 tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
5471 if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
5472
5473 // skip to the data's starting position (offset usually = 0)
5474 stbi__skip(s, tga_offset );
5475
5476 if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
5477 for (i=0; i < tga_height; ++i) {
5478 int row = tga_inverted ? tga_height -i - 1 : i;
5479 stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
5480 stbi__getn(s, tga_row, tga_width * tga_comp);
5481 }
5482 } else {
5483 // do I need to load a palette?
5484 if ( tga_indexed)
5485 {
5486 if (tga_palette_len == 0) { /* you have to have at least one entry! */
5487 STBI_FREE(tga_data);
5488 return stbi__errpuc("bad palette", "Corrupt TGA");
5489 }
5490
5491 // any data to skip? (offset usually = 0)
5492 stbi__skip(s, tga_palette_start );
5493 // load the palette
5494 tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
5495 if (!tga_palette) {
5496 STBI_FREE(tga_data);
5497 return stbi__errpuc("outofmem", "Out of memory");
5498 }
5499 if (tga_rgb16) {
5500 stbi_uc *pal_entry = tga_palette;
5501 STBI_ASSERT(tga_comp == STBI_rgb);
5502 for (i=0; i < tga_palette_len; ++i) {
5503 stbi__tga_read_rgb16(s, pal_entry);
5504 pal_entry += tga_comp;
5505 }
5506 } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
5507 STBI_FREE(tga_data);
5508 STBI_FREE(tga_palette);
5509 return stbi__errpuc("bad palette", "Corrupt TGA");
5510 }
5511 }
5512 // load the data
5513 for (i=0; i < tga_width * tga_height; ++i)
5514 {
5515 // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5516 if ( tga_is_RLE )
5517 {
5518 if ( RLE_count == 0 )
5519 {
5520 // yep, get the next byte as a RLE command
5521 int RLE_cmd = stbi__get8(s);
5522 RLE_count = 1 + (RLE_cmd & 127);
5523 RLE_repeating = RLE_cmd >> 7;
5524 read_next_pixel = 1;
5525 } else if ( !RLE_repeating )
5526 {
5527 read_next_pixel = 1;
5528 }
5529 } else
5530 {
5531 read_next_pixel = 1;
5532 }
5533 // OK, if I need to read a pixel, do it now
5534 if ( read_next_pixel )
5535 {
5536 // load however much data we did have
5537 if ( tga_indexed )
5538 {
5539 // read in index, then perform the lookup
5540 int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5541 if ( pal_idx >= tga_palette_len ) {
5542 // invalid index
5543 pal_idx = 0;
5544 }
5545 pal_idx *= tga_comp;
5546 for (j = 0; j < tga_comp; ++j) {
5547 raw_data[j] = tga_palette[pal_idx+j];
5548 }
5549 } else if(tga_rgb16) {
5550 STBI_ASSERT(tga_comp == STBI_rgb);
5551 stbi__tga_read_rgb16(s, raw_data);
5552 } else {
5553 // read in the data raw
5554 for (j = 0; j < tga_comp; ++j) {
5555 raw_data[j] = stbi__get8(s);
5556 }
5557 }
5558 // clear the reading flag for the next pixel
5559 read_next_pixel = 0;
5560 } // end of reading a pixel
5561
5562 // copy data
5563 for (j = 0; j < tga_comp; ++j)
5564 tga_data[i*tga_comp+j] = raw_data[j];
5565
5566 // in case we're in RLE mode, keep counting down
5567 --RLE_count;
5568 }
5569 // do I need to invert the image?
5570 if ( tga_inverted )
5571 {
5572 for (j = 0; j*2 < tga_height; ++j)
5573 {
5574 int index1 = j * tga_width * tga_comp;
5575 int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
5576 for (i = tga_width * tga_comp; i > 0; --i)
5577 {
5578 unsigned char temp = tga_data[index1];
5579 tga_data[index1] = tga_data[index2];
5580 tga_data[index2] = temp;
5581 ++index1;
5582 ++index2;
5583 }
5584 }
5585 }
5586 // clear my palette, if I had one
5587 if ( tga_palette != NULL )
5588 {
5589 STBI_FREE( tga_palette );
5590 }
5591 }
5592
5593 // swap RGB - if the source data was RGB16, it already is in the right order
5594 if (tga_comp >= 3 && !tga_rgb16)
5595 {
5596 unsigned char* tga_pixel = tga_data;
5597 for (i=0; i < tga_width * tga_height; ++i)
5598 {
5599 unsigned char temp = tga_pixel[0];
5600 tga_pixel[0] = tga_pixel[2];
5601 tga_pixel[2] = temp;
5602 tga_pixel += tga_comp;
5603 }
5604 }
5605
5606 // convert to target component count
5607 if (req_comp && req_comp != tga_comp)
5608 tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
5609
5610 // the things I do to get rid of an error message, and yet keep
5611 // Microsoft's C compilers happy... [8^(
5612 tga_palette_start = tga_palette_len = tga_palette_bits =
5613 tga_x_origin = tga_y_origin = 0;
5614 STBI_NOTUSED(tga_palette_start);
5615 // OK, done
5616 return tga_data;
5617}
5618#endif
5619
5620// *************************************************************************************************
5621// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
5622
5623#ifndef STBI_NO_PSD
5624static int stbi__psd_test(stbi__context *s)
5625{
5626 int r = (stbi__get32be(s) == 0x38425053);
5627 stbi__rewind(s);
5628 return r;
5629}
5630
5631static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
5632{
5633 int count, nleft, len;
5634
5635 count = 0;
5636 while ((nleft = pixelCount - count) > 0) {
5637 len = stbi__get8(s);
5638 if (len == 128) {
5639 // No-op.
5640 } else if (len < 128) {
5641 // Copy next len+1 bytes literally.
5642 len++;
5643 if (len > nleft) return 0; // corrupt data
5644 count += len;
5645 while (len) {
5646 *p = stbi__get8(s);
5647 p += 4;
5648 len--;
5649 }
5650 } else if (len > 128) {
5651 stbi_uc val;
5652 // Next -len+1 bytes in the dest are replicated from next source byte.
5653 // (Interpret len as a negative 8-bit int.)
5654 len = 257 - len;
5655 if (len > nleft) return 0; // corrupt data
5656 val = stbi__get8(s);
5657 count += len;
5658 while (len) {
5659 *p = val;
5660 p += 4;
5661 len--;
5662 }
5663 }
5664 }
5665
5666 return 1;
5667}
5668
5669static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
5670{
5671 int pixelCount;
5672 int channelCount, compression;
5673 int channel, i;
5674 int bitdepth;
5675 int w,h;
5676 stbi_uc *out;
5677 STBI_NOTUSED(ri);
5678
5679 // Check identifier
5680 if (stbi__get32be(s) != 0x38425053) // "8BPS"
5681 return stbi__errpuc("not PSD", "Corrupt PSD image");
5682
5683 // Check file type version.
5684 if (stbi__get16be(s) != 1)
5685 return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5686
5687 // Skip 6 reserved bytes.
5688 stbi__skip(s, 6 );
5689
5690 // Read the number of channels (R, G, B, A, etc).
5691 channelCount = stbi__get16be(s);
5692 if (channelCount < 0 || channelCount > 16)
5693 return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5694
5695 // Read the rows and columns of the image.
5696 h = stbi__get32be(s);
5697 w = stbi__get32be(s);
5698
5699 if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
5700 if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
5701
5702 // Make sure the depth is 8 bits.
5703 bitdepth = stbi__get16be(s);
5704 if (bitdepth != 8 && bitdepth != 16)
5705 return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
5706
5707 // Make sure the color mode is RGB.
5708 // Valid options are:
5709 // 0: Bitmap
5710 // 1: Grayscale
5711 // 2: Indexed color
5712 // 3: RGB color
5713 // 4: CMYK color
5714 // 7: Multichannel
5715 // 8: Duotone
5716 // 9: Lab color
5717 if (stbi__get16be(s) != 3)
5718 return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5719
5720 // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.)
5721 stbi__skip(s,stbi__get32be(s) );
5722
5723 // Skip the image resources. (resolution, pen tool paths, etc)
5724 stbi__skip(s, stbi__get32be(s) );
5725
5726 // Skip the reserved data.
5727 stbi__skip(s, stbi__get32be(s) );
5728
5729 // Find out if the data is compressed.
5730 // Known values:
5731 // 0: no compression
5732 // 1: RLE compressed
5733 compression = stbi__get16be(s);
5734 if (compression > 1)
5735 return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5736
5737 // Check size
5738 if (!stbi__mad3sizes_valid(4, w, h, 0))
5739 return stbi__errpuc("too large", "Corrupt PSD");
5740
5741 // Create the destination image.
5742
5743 if (!compression && bitdepth == 16 && bpc == 16) {
5744 out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
5745 ri->bits_per_channel = 16;
5746 } else
5747 out = (stbi_uc *) stbi__malloc(4 * w*h);
5748
5749 if (!out) return stbi__errpuc("outofmem", "Out of memory");
5750 pixelCount = w*h;
5751
5752 // Initialize the data to zero.
5753 //memset( out, 0, pixelCount * 4 );
5754
5755 // Finally, the image data.
5756 if (compression) {
5757 // RLE as used by .PSD and .TIFF
5758 // Loop until you get the number of unpacked bytes you are expecting:
5759 // Read the next source byte into n.
5760 // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5761 // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5762 // Else if n is 128, noop.
5763 // Endloop
5764
5765 // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
5766 // which we're going to just skip.
5767 stbi__skip(s, h * channelCount * 2 );
5768
5769 // Read the RLE data by channel.
5770 for (channel = 0; channel < 4; channel++) {
5771 stbi_uc *p;
5772
5773 p = out+channel;
5774 if (channel >= channelCount) {
5775 // Fill this channel with default data.
5776 for (i = 0; i < pixelCount; i++, p += 4)
5777 *p = (channel == 3 ? 255 : 0);
5778 } else {
5779 // Read the RLE data.
5780 if (!stbi__psd_decode_rle(s, p, pixelCount)) {
5781 STBI_FREE(out);
5782 return stbi__errpuc("corrupt", "bad RLE data");
5783 }
5784 }
5785 }
5786
5787 } else {
5788 // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
5789 // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
5790
5791 // Read the data by channel.
5792 for (channel = 0; channel < 4; channel++) {
5793 if (channel >= channelCount) {
5794 // Fill this channel with default data.
5795 if (bitdepth == 16 && bpc == 16) {
5796 stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5797 stbi__uint16 val = channel == 3 ? 65535 : 0;
5798 for (i = 0; i < pixelCount; i++, q += 4)
5799 *q = val;
5800 } else {
5801 stbi_uc *p = out+channel;
5802 stbi_uc val = channel == 3 ? 255 : 0;
5803 for (i = 0; i < pixelCount; i++, p += 4)
5804 *p = val;
5805 }
5806 } else {
5807 if (ri->bits_per_channel == 16) { // output bpc
5808 stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5809 for (i = 0; i < pixelCount; i++, q += 4)
5810 *q = (stbi__uint16) stbi__get16be(s);
5811 } else {
5812 stbi_uc *p = out+channel;
5813 if (bitdepth == 16) { // input bpc
5814 for (i = 0; i < pixelCount; i++, p += 4)
5815 *p = (stbi_uc) (stbi__get16be(s) >> 8);
5816 } else {
5817 for (i = 0; i < pixelCount; i++, p += 4)
5818 *p = stbi__get8(s);
5819 }
5820 }
5821 }
5822 }
5823 }
5824
5825 // remove weird white matte from PSD
5826 if (channelCount >= 4) {
5827 if (ri->bits_per_channel == 16) {
5828 for (i=0; i < w*h; ++i) {
5829 stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
5830 if (pixel[3] != 0 && pixel[3] != 65535) {
5831 float a = pixel[3] / 65535.0f;
5832 float ra = 1.0f / a;
5833 float inv_a = 65535.0f * (1 - ra);
5834 pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
5835 pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
5836 pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
5837 }
5838 }
5839 } else {
5840 for (i=0; i < w*h; ++i) {
5841 unsigned char *pixel = out + 4*i;
5842 if (pixel[3] != 0 && pixel[3] != 255) {
5843 float a = pixel[3] / 255.0f;
5844 float ra = 1.0f / a;
5845 float inv_a = 255.0f * (1 - ra);
5846 pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
5847 pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
5848 pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
5849 }
5850 }
5851 }
5852 }
5853
5854 // convert to desired output format
5855 if (req_comp && req_comp != 4) {
5856 if (ri->bits_per_channel == 16)
5857 out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
5858 else
5859 out = stbi__convert_format(out, 4, req_comp, w, h);
5860 if (out == NULL) return out; // stbi__convert_format frees input on failure
5861 }
5862
5863 if (comp) *comp = 4;
5864 *y = h;
5865 *x = w;
5866
5867 return out;
5868}
5869#endif
5870
5871// *************************************************************************************************
5872// Softimage PIC loader
5873// by Tom Seddon
5874//
5875// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5876// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5877
5878#ifndef STBI_NO_PIC
5879static int stbi__pic_is4(stbi__context *s,const char *str)
5880{
5881 int i;
5882 for (i=0; i<4; ++i)
5883 if (stbi__get8(s) != (stbi_uc)str[i])
5884 return 0;
5885
5886 return 1;
5887}
5888
5889static int stbi__pic_test_core(stbi__context *s)
5890{
5891 int i;
5892
5893 if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
5894 return 0;
5895
5896 for(i=0;i<84;++i)
5897 stbi__get8(s);
5898
5899 if (!stbi__pic_is4(s,"PICT"))
5900 return 0;
5901
5902 return 1;
5903}
5904
5905typedef struct
5906{
5907 stbi_uc size,type,channel;
5908} stbi__pic_packet;
5909
5910static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
5911{
5912 int mask=0x80, i;
5913
5914 for (i=0; i<4; ++i, mask>>=1) {
5915 if (channel & mask) {
5916 if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
5917 dest[i]=stbi__get8(s);
5918 }
5919 }
5920
5921 return dest;
5922}
5923
5924static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
5925{
5926 int mask=0x80,i;
5927
5928 for (i=0;i<4; ++i, mask>>=1)
5929 if (channel&mask)
5930 dest[i]=src[i];
5931}
5932
5933static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
5934{
5935 int act_comp=0,num_packets=0,y,chained;
5936 stbi__pic_packet packets[10];
5937
5938 // this will (should...) cater for even some bizarre stuff like having data
5939 // for the same channel in multiple packets.
5940 do {
5941 stbi__pic_packet *packet;
5942
5943 if (num_packets==sizeof(packets)/sizeof(packets[0]))
5944 return stbi__errpuc("bad format","too many packets");
5945
5946 packet = &packets[num_packets++];
5947
5948 chained = stbi__get8(s);
5949 packet->size = stbi__get8(s);
5950 packet->type = stbi__get8(s);
5951 packet->channel = stbi__get8(s);
5952
5953 act_comp |= packet->channel;
5954
5955 if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)");
5956 if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp");
5957 } while (chained);
5958
5959 *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
5960
5961 for(y=0; y<height; ++y) {
5962 int packet_idx;
5963
5964 for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
5965 stbi__pic_packet *packet = &packets[packet_idx];
5966 stbi_uc *dest = result+y*width*4;
5967
5968 switch (packet->type) {
5969 default:
5970 return stbi__errpuc("bad format","packet has bad compression type");
5971
5972 case 0: {//uncompressed
5973 int x;
5974
5975 for(x=0;x<width;++x, dest+=4)
5976 if (!stbi__readval(s,packet->channel,dest))
5977 return 0;
5978 break;
5979 }
5980
5981 case 1://Pure RLE
5982 {
5983 int left=width, i;
5984
5985 while (left>0) {
5986 stbi_uc count,value[4];
5987
5988 count=stbi__get8(s);
5989 if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)");
5990
5991 if (count > left)
5992 count = (stbi_uc) left;
5993
5994 if (!stbi__readval(s,packet->channel,value)) return 0;
5995
5996 for(i=0; i<count; ++i,dest+=4)
5997 stbi__copyval(packet->channel,dest,value);
5998 left -= count;
5999 }
6000 }
6001 break;
6002
6003 case 2: {//Mixed RLE
6004 int left=width;
6005 while (left>0) {
6006 int count = stbi__get8(s), i;
6007 if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)");
6008
6009 if (count >= 128) { // Repeated
6010 stbi_uc value[4];
6011
6012 if (count==128)
6013 count = stbi__get16be(s);
6014 else
6015 count -= 127;
6016 if (count > left)
6017 return stbi__errpuc("bad file","scanline overrun");
6018
6019 if (!stbi__readval(s,packet->channel,value))
6020 return 0;
6021
6022 for(i=0;i<count;++i, dest += 4)
6023 stbi__copyval(packet->channel,dest,value);
6024 } else { // Raw
6025 ++count;
6026 if (count>left) return stbi__errpuc("bad file","scanline overrun");
6027
6028 for(i=0;i<count;++i, dest+=4)
6029 if (!stbi__readval(s,packet->channel,dest))
6030 return 0;
6031 }
6032 left-=count;
6033 }
6034 break;
6035 }
6036 }
6037 }
6038 }
6039
6040 return result;
6041}
6042
6043static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
6044{
6045 stbi_uc *result;
6046 int i, x,y, internal_comp;
6047 STBI_NOTUSED(ri);
6048
6049 if (!comp) comp = &internal_comp;
6050
6051 for (i=0; i<92; ++i)
6052 stbi__get8(s);
6053
6054 x = stbi__get16be(s);
6055 y = stbi__get16be(s);
6056
6057 if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
6058 if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
6059
6060 if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)");
6061 if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
6062
6063 stbi__get32be(s); //skip `ratio'
6064 stbi__get16be(s); //skip `fields'
6065 stbi__get16be(s); //skip `pad'
6066
6067 // intermediate buffer is RGBA
6068 result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
6069 if (!result) return stbi__errpuc("outofmem", "Out of memory");
6070 memset(result, 0xff, x*y*4);
6071
6072 if (!stbi__pic_load_core(s,x,y,comp, result)) {
6073 STBI_FREE(result);
6074 result=0;
6075 }
6076 *px = x;
6077 *py = y;
6078 if (req_comp == 0) req_comp = *comp;
6079 result=stbi__convert_format(result,4,req_comp,x,y);
6080
6081 return result;
6082}
6083
6084static int stbi__pic_test(stbi__context *s)
6085{
6086 int r = stbi__pic_test_core(s);
6087 stbi__rewind(s);
6088 return r;
6089}
6090#endif
6091
6092// *************************************************************************************************
6093// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
6094
6095#ifndef STBI_NO_GIF
6096typedef struct
6097{
6098 stbi__int16 prefix;
6099 stbi_uc first;
6100 stbi_uc suffix;
6101} stbi__gif_lzw;
6102
6103typedef struct
6104{
6105 int w,h;
6106 stbi_uc *out; // output buffer (always 4 components)
6107 stbi_uc *background; // The current "background" as far as a gif is concerned
6108 stbi_uc *history;
6109 int flags, bgindex, ratio, transparent, eflags;
6110 stbi_uc pal[256][4];
6111 stbi_uc lpal[256][4];
6112 stbi__gif_lzw codes[8192];
6113 stbi_uc *color_table;
6114 int parse, step;
6115 int lflags;
6116 int start_x, start_y;
6117 int max_x, max_y;
6118 int cur_x, cur_y;
6119 int line_size;
6120 int delay;
6121} stbi__gif;
6122
6123static int stbi__gif_test_raw(stbi__context *s)
6124{
6125 int sz;
6126 if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
6127 sz = stbi__get8(s);
6128 if (sz != '9' && sz != '7') return 0;
6129 if (stbi__get8(s) != 'a') return 0;
6130 return 1;
6131}
6132
6133static int stbi__gif_test(stbi__context *s)
6134{
6135 int r = stbi__gif_test_raw(s);
6136 stbi__rewind(s);
6137 return r;
6138}
6139
6140static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
6141{
6142 int i;
6143 for (i=0; i < num_entries; ++i) {
6144 pal[i][2] = stbi__get8(s);
6145 pal[i][1] = stbi__get8(s);
6146 pal[i][0] = stbi__get8(s);
6147 pal[i][3] = transp == i ? 0 : 255;
6148 }
6149}
6150
6151static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
6152{
6153 stbi_uc version;
6154 if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
6155 return stbi__err("not GIF", "Corrupt GIF");
6156
6157 version = stbi__get8(s);
6158 if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF");
6159 if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF");
6160
6161 stbi__g_failure_reason = "";
6162 g->w = stbi__get16le(s);
6163 g->h = stbi__get16le(s);
6164 g->flags = stbi__get8(s);
6165 g->bgindex = stbi__get8(s);
6166 g->ratio = stbi__get8(s);
6167 g->transparent = -1;
6168
6169 if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
6170 if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
6171
6172 if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments
6173
6174 if (is_info) return 1;
6175
6176 if (g->flags & 0x80)
6177 stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
6178
6179 return 1;
6180}
6181
6182static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
6183{
6184 stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6185 if (!g) return stbi__err("outofmem", "Out of memory");
6186 if (!stbi__gif_header(s, g, comp, 1)) {
6187 STBI_FREE(g);
6188 stbi__rewind( s );
6189 return 0;
6190 }
6191 if (x) *x = g->w;
6192 if (y) *y = g->h;
6193 STBI_FREE(g);
6194 return 1;
6195}
6196
6197static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
6198{
6199 stbi_uc *p, *c;
6200 int idx;
6201
6202 // recurse to decode the prefixes, since the linked-list is backwards,
6203 // and working backwards through an interleaved image would be nasty
6204 if (g->codes[code].prefix >= 0)
6205 stbi__out_gif_code(g, g->codes[code].prefix);
6206
6207 if (g->cur_y >= g->max_y) return;
6208
6209 idx = g->cur_x + g->cur_y;
6210 p = &g->out[idx];
6211 g->history[idx / 4] = 1;
6212
6213 c = &g->color_table[g->codes[code].suffix * 4];
6214 if (c[3] > 128) { // don't render transparent pixels;
6215 p[0] = c[2];
6216 p[1] = c[1];
6217 p[2] = c[0];
6218 p[3] = c[3];
6219 }
6220 g->cur_x += 4;
6221
6222 if (g->cur_x >= g->max_x) {
6223 g->cur_x = g->start_x;
6224 g->cur_y += g->step;
6225
6226 while (g->cur_y >= g->max_y && g->parse > 0) {
6227 g->step = (1 << g->parse) * g->line_size;
6228 g->cur_y = g->start_y + (g->step >> 1);
6229 --g->parse;
6230 }
6231 }
6232}
6233
6234static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
6235{
6236 stbi_uc lzw_cs;
6237 stbi__int32 len, init_code;
6238 stbi__uint32 first;
6239 stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
6240 stbi__gif_lzw *p;
6241
6242 lzw_cs = stbi__get8(s);
6243 if (lzw_cs > 12) return NULL;
6244 clear = 1 << lzw_cs;
6245 first = 1;
6246 codesize = lzw_cs + 1;
6247 codemask = (1 << codesize) - 1;
6248 bits = 0;
6249 valid_bits = 0;
6250 for (init_code = 0; init_code < clear; init_code++) {
6251 g->codes[init_code].prefix = -1;
6252 g->codes[init_code].first = (stbi_uc) init_code;
6253 g->codes[init_code].suffix = (stbi_uc) init_code;
6254 }
6255
6256 // support no starting clear code
6257 avail = clear+2;
6258 oldcode = -1;
6259
6260 len = 0;
6261 for(;;) {
6262 if (valid_bits < codesize) {
6263 if (len == 0) {
6264 len = stbi__get8(s); // start new block
6265 if (len == 0)
6266 return g->out;
6267 }
6268 --len;
6269 bits |= (stbi__int32) stbi__get8(s) << valid_bits;
6270 valid_bits += 8;
6271 } else {
6272 stbi__int32 code = bits & codemask;
6273 bits >>= codesize;
6274 valid_bits -= codesize;
6275 // @OPTIMIZE: is there some way we can accelerate the non-clear path?
6276 if (code == clear) { // clear code
6277 codesize = lzw_cs + 1;
6278 codemask = (1 << codesize) - 1;
6279 avail = clear + 2;
6280 oldcode = -1;
6281 first = 0;
6282 } else if (code == clear + 1) { // end of stream code
6283 stbi__skip(s, len);
6284 while ((len = stbi__get8(s)) > 0)
6285 stbi__skip(s,len);
6286 return g->out;
6287 } else if (code <= avail) {
6288 if (first) {
6289 return stbi__errpuc("no clear code", "Corrupt GIF");
6290 }
6291
6292 if (oldcode >= 0) {
6293 p = &g->codes[avail++];
6294 if (avail > 8192) {
6295 return stbi__errpuc("too many codes", "Corrupt GIF");
6296 }
6297
6298 p->prefix = (stbi__int16) oldcode;
6299 p->first = g->codes[oldcode].first;
6300 p->suffix = (code == avail) ? p->first : g->codes[code].first;
6301 } else if (code == avail)
6302 return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6303
6304 stbi__out_gif_code(g, (stbi__uint16) code);
6305
6306 if ((avail & codemask) == 0 && avail <= 0x0FFF) {
6307 codesize++;
6308 codemask = (1 << codesize) - 1;
6309 }
6310
6311 oldcode = code;
6312 } else {
6313 return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6314 }
6315 }
6316 }
6317}
6318
6319// this function is designed to support animated gifs, although stb_image doesn't support it
6320// two back is the image from two frames ago, used for a very specific disposal format
6321static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
6322{
6323 int dispose;
6324 int first_frame;
6325 int pi;
6326 int pcount;
6327 STBI_NOTUSED(req_comp);
6328
6329 // on first frame, any non-written pixels get the background colour (non-transparent)
6330 first_frame = 0;
6331 if (g->out == 0) {
6332 if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header
6333 if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))
6334 return stbi__errpuc("too large", "GIF image is too large");
6335 pcount = g->w * g->h;
6336 g->out = (stbi_uc *) stbi__malloc(4 * pcount);
6337 g->background = (stbi_uc *) stbi__malloc(4 * pcount);
6338 g->history = (stbi_uc *) stbi__malloc(pcount);
6339 if (!g->out || !g->background || !g->history)
6340 return stbi__errpuc("outofmem", "Out of memory");
6341
6342 // image is treated as "transparent" at the start - ie, nothing overwrites the current background;
6343 // background colour is only used for pixels that are not rendered first frame, after that "background"
6344 // color refers to the color that was there the previous frame.
6345 memset(g->out, 0x00, 4 * pcount);
6346 memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent)
6347 memset(g->history, 0x00, pcount); // pixels that were affected previous frame
6348 first_frame = 1;
6349 } else {
6350 // second frame - how do we dispose of the previous one?
6351 dispose = (g->eflags & 0x1C) >> 2;
6352 pcount = g->w * g->h;
6353
6354 if ((dispose == 3) && (two_back == 0)) {
6355 dispose = 2; // if I don't have an image to revert back to, default to the old background
6356 }
6357
6358 if (dispose == 3) { // use previous graphic
6359 for (pi = 0; pi < pcount; ++pi) {
6360 if (g->history[pi]) {
6361 memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
6362 }
6363 }
6364 } else if (dispose == 2) {
6365 // restore what was changed last frame to background before that frame;
6366 for (pi = 0; pi < pcount; ++pi) {
6367 if (g->history[pi]) {
6368 memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
6369 }
6370 }
6371 } else {
6372 // This is a non-disposal case eithe way, so just
6373 // leave the pixels as is, and they will become the new background
6374 // 1: do not dispose
6375 // 0: not specified.
6376 }
6377
6378 // background is what out is after the undoing of the previou frame;
6379 memcpy( g->background, g->out, 4 * g->w * g->h );
6380 }
6381
6382 // clear my history;
6383 memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame
6384
6385 for (;;) {
6386 int tag = stbi__get8(s);
6387 switch (tag) {
6388 case 0x2C: /* Image Descriptor */
6389 {
6390 stbi__int32 x, y, w, h;
6391 stbi_uc *o;
6392
6393 x = stbi__get16le(s);
6394 y = stbi__get16le(s);
6395 w = stbi__get16le(s);
6396 h = stbi__get16le(s);
6397 if (((x + w) > (g->w)) || ((y + h) > (g->h)))
6398 return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
6399
6400 g->line_size = g->w * 4;
6401 g->start_x = x * 4;
6402 g->start_y = y * g->line_size;
6403 g->max_x = g->start_x + w * 4;
6404 g->max_y = g->start_y + h * g->line_size;
6405 g->cur_x = g->start_x;
6406 g->cur_y = g->start_y;
6407
6408 // if the width of the specified rectangle is 0, that means
6409 // we may not see *any* pixels or the image is malformed;
6410 // to make sure this is caught, move the current y down to
6411 // max_y (which is what out_gif_code checks).
6412 if (w == 0)
6413 g->cur_y = g->max_y;
6414
6415 g->lflags = stbi__get8(s);
6416
6417 if (g->lflags & 0x40) {
6418 g->step = 8 * g->line_size; // first interlaced spacing
6419 g->parse = 3;
6420 } else {
6421 g->step = g->line_size;
6422 g->parse = 0;
6423 }
6424
6425 if (g->lflags & 0x80) {
6426 stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
6427 g->color_table = (stbi_uc *) g->lpal;
6428 } else if (g->flags & 0x80) {
6429 g->color_table = (stbi_uc *) g->pal;
6430 } else
6431 return stbi__errpuc("missing color table", "Corrupt GIF");
6432
6433 o = stbi__process_gif_raster(s, g);
6434 if (!o) return NULL;
6435
6436 // if this was the first frame,
6437 pcount = g->w * g->h;
6438 if (first_frame && (g->bgindex > 0)) {
6439 // if first frame, any pixel not drawn to gets the background color
6440 for (pi = 0; pi < pcount; ++pi) {
6441 if (g->history[pi] == 0) {
6442 g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
6443 memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
6444 }
6445 }
6446 }
6447
6448 return o;
6449 }
6450
6451 case 0x21: // Comment Extension.
6452 {
6453 int len;
6454 int ext = stbi__get8(s);
6455 if (ext == 0xF9) { // Graphic Control Extension.
6456 len = stbi__get8(s);
6457 if (len == 4) {
6458 g->eflags = stbi__get8(s);
6459 g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
6460
6461 // unset old transparent
6462 if (g->transparent >= 0) {
6463 g->pal[g->transparent][3] = 255;
6464 }
6465 if (g->eflags & 0x01) {
6466 g->transparent = stbi__get8(s);
6467 if (g->transparent >= 0) {
6468 g->pal[g->transparent][3] = 0;
6469 }
6470 } else {
6471 // don't need transparent
6472 stbi__skip(s, 1);
6473 g->transparent = -1;
6474 }
6475 } else {
6476 stbi__skip(s, len);
6477 break;
6478 }
6479 }
6480 while ((len = stbi__get8(s)) != 0) {
6481 stbi__skip(s, len);
6482 }
6483 break;
6484 }
6485
6486 case 0x3B: // gif stream termination code
6487 return (stbi_uc *) s; // using '1' causes warning on some compilers
6488
6489 default:
6490 return stbi__errpuc("unknown code", "Corrupt GIF");
6491 }
6492 }
6493}
6494
6495static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays)
6496{
6497 STBI_FREE(g->out);
6498 STBI_FREE(g->history);
6499 STBI_FREE(g->background);
6500
6501 if (out) STBI_FREE(out);
6502 if (delays && *delays) STBI_FREE(*delays);
6503 return stbi__errpuc("outofmem", "Out of memory");
6504}
6505
6506static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
6507{
6508 if (stbi__gif_test(s)) {
6509 int layers = 0;
6510 stbi_uc *u = 0;
6511 stbi_uc *out = 0;
6512 stbi_uc *two_back = 0;
6513 stbi__gif g;
6514 int stride;
6515 int out_size = 0;
6516 int delays_size = 0;
6517
6518 STBI_NOTUSED(out_size);
6519 STBI_NOTUSED(delays_size);
6520
6521 memset(&g, 0, sizeof(g));
6522 if (delays) {
6523 *delays = 0;
6524 }
6525
6526 do {
6527 u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
6528 if (u == (stbi_uc *) s) u = 0; // end of animated gif marker
6529
6530 if (u) {
6531 *x = g.w;
6532 *y = g.h;
6533 ++layers;
6534 stride = g.w * g.h * 4;
6535
6536 if (out) {
6537 void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride );
6538 if (!tmp)
6539 return stbi__load_gif_main_outofmem(&g, out, delays);
6540 else {
6541 out = (stbi_uc*) tmp;
6542 out_size = layers * stride;
6543 }
6544
6545 if (delays) {
6546 int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
6547 if (!new_delays)
6548 return stbi__load_gif_main_outofmem(&g, out, delays);
6549 *delays = new_delays;
6550 delays_size = layers * sizeof(int);
6551 }
6552 } else {
6553 out = (stbi_uc*)stbi__malloc( layers * stride );
6554 if (!out)
6555 return stbi__load_gif_main_outofmem(&g, out, delays);
6556 out_size = layers * stride;
6557 if (delays) {
6558 *delays = (int*) stbi__malloc( layers * sizeof(int) );
6559 if (!*delays)
6560 return stbi__load_gif_main_outofmem(&g, out, delays);
6561 delays_size = layers * sizeof(int);
6562 }
6563 }
6564 memcpy( out + ((layers - 1) * stride), u, stride );
6565 if (layers >= 2) {
6566 two_back = out - 2 * stride;
6567 }
6568
6569 if (delays) {
6570 (*delays)[layers - 1U] = g.delay;
6571 }
6572 }
6573 } while (u != 0);
6574
6575 // free temp buffer;
6576 STBI_FREE(g.out);
6577 STBI_FREE(g.history);
6578 STBI_FREE(g.background);
6579
6580 // do the final conversion after loading everything;
6581 if (req_comp && req_comp != 4)
6582 out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
6583
6584 *z = layers;
6585 return out;
6586 } else {
6587 return stbi__errpuc("not GIF", "Image was not as a gif type.");
6588 }
6589}
6590
6591static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6592{
6593 stbi_uc *u = 0;
6594 stbi__gif g;
6595 memset(&g, 0, sizeof(g));
6596 STBI_NOTUSED(ri);
6597
6598 u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
6599 if (u == (stbi_uc *) s) u = 0; // end of animated gif marker
6600 if (u) {
6601 *x = g.w;
6602 *y = g.h;
6603
6604 // moved conversion to after successful load so that the same
6605 // can be done for multiple frames.
6606 if (req_comp && req_comp != 4)
6607 u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
6608 } else if (g.out) {
6609 // if there was an error and we allocated an image buffer, free it!
6610 STBI_FREE(g.out);
6611 }
6612
6613 // free buffers needed for multiple frame loading;
6614 STBI_FREE(g.history);
6615 STBI_FREE(g.background);
6616
6617 return u;
6618}
6619
6620static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
6621{
6622 return stbi__gif_info_raw(s,x,y,comp);
6623}
6624#endif
6625
6626// *************************************************************************************************
6627// Radiance RGBE HDR loader
6628// originally by Nicolas Schulz
6629#ifndef STBI_NO_HDR
6630static int stbi__hdr_test_core(stbi__context *s, const char *signature)
6631{
6632 int i;
6633 for (i=0; signature[i]; ++i)
6634 if (stbi__get8(s) != signature[i])
6635 return 0;
6636 stbi__rewind(s);
6637 return 1;
6638}
6639
6640static int stbi__hdr_test(stbi__context* s)
6641{
6642 int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
6643 stbi__rewind(s);
6644 if(!r) {
6645 r = stbi__hdr_test_core(s, "#?RGBE\n");
6646 stbi__rewind(s);
6647 }
6648 return r;
6649}
6650
6651#define STBI__HDR_BUFLEN 1024
6652static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
6653{
6654 int len=0;
6655 char c = '\0';
6656
6657 c = (char) stbi__get8(z);
6658
6659 while (!stbi__at_eof(z) && c != '\n') {
6660 buffer[len++] = c;
6661 if (len == STBI__HDR_BUFLEN-1) {
6662 // flush to end of line
6663 while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
6664 ;
6665 break;
6666 }
6667 c = (char) stbi__get8(z);
6668 }
6669
6670 buffer[len] = 0;
6671 return buffer;
6672}
6673
6674static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
6675{
6676 if ( input[3] != 0 ) {
6677 float f1;
6678 // Exponent
6679 f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
6680 if (req_comp <= 2)
6681 output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
6682 else {
6683 output[0] = input[0] * f1;
6684 output[1] = input[1] * f1;
6685 output[2] = input[2] * f1;
6686 }
6687 if (req_comp == 2) output[1] = 1;
6688 if (req_comp == 4) output[3] = 1;
6689 } else {
6690 switch (req_comp) {
6691 case 4: output[3] = 1; /* fallthrough */
6692 case 3: output[0] = output[1] = output[2] = 0;
6693 break;
6694 case 2: output[1] = 1; /* fallthrough */
6695 case 1: output[0] = 0;
6696 break;
6697 }
6698 }
6699}
6700
6701static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6702{
6703 char buffer[STBI__HDR_BUFLEN];
6704 char *token;
6705 int valid = 0;
6706 int width, height;
6707 stbi_uc *scanline;
6708 float *hdr_data;
6709 int len;
6710 unsigned char count, value;
6711 int i, j, k, c1,c2, z;
6712 const char *headerToken;
6713 STBI_NOTUSED(ri);
6714
6715 // Check identifier
6716 headerToken = stbi__hdr_gettoken(s,buffer);
6717 if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
6718 return stbi__errpf("not HDR", "Corrupt HDR image");
6719
6720 // Parse header
6721 for(;;) {
6722 token = stbi__hdr_gettoken(s,buffer);
6723 if (token[0] == 0) break;
6724 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6725 }
6726
6727 if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format");
6728
6729 // Parse width and height
6730 // can't use sscanf() if we're not using stdio!
6731 token = stbi__hdr_gettoken(s,buffer);
6732 if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6733 token += 3;
6734 height = (int) strtol(token, &token, 10);
6735 while (*token == ' ') ++token;
6736 if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6737 token += 3;
6738 width = (int) strtol(token, NULL, 10);
6739
6740 if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
6741 if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
6742
6743 *x = width;
6744 *y = height;
6745
6746 if (comp) *comp = 3;
6747 if (req_comp == 0) req_comp = 3;
6748
6749 if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
6750 return stbi__errpf("too large", "HDR image is too large");
6751
6752 // Read data
6753 hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
6754 if (!hdr_data)
6755 return stbi__errpf("outofmem", "Out of memory");
6756
6757 // Load image data
6758 // image data is stored as some number of sca
6759 if ( width < 8 || width >= 32768) {
6760 // Read flat data
6761 for (j=0; j < height; ++j) {
6762 for (i=0; i < width; ++i) {
6763 stbi_uc rgbe[4];
6764 main_decode_loop:
6765 stbi__getn(s, rgbe, 4);
6766 stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
6767 }
6768 }
6769 } else {
6770 // Read RLE-encoded data
6771 scanline = NULL;
6772
6773 for (j = 0; j < height; ++j) {
6774 c1 = stbi__get8(s);
6775 c2 = stbi__get8(s);
6776 len = stbi__get8(s);
6777 if (c1 != 2 || c2 != 2 || (len & 0x80)) {
6778 // not run-length encoded, so we have to actually use THIS data as a decoded
6779 // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
6780 stbi_uc rgbe[4];
6781 rgbe[0] = (stbi_uc) c1;
6782 rgbe[1] = (stbi_uc) c2;
6783 rgbe[2] = (stbi_uc) len;
6784 rgbe[3] = (stbi_uc) stbi__get8(s);
6785 stbi__hdr_convert(hdr_data, rgbe, req_comp);
6786 i = 1;
6787 j = 0;
6788 STBI_FREE(scanline);
6789 goto main_decode_loop; // yes, this makes no sense
6790 }
6791 len <<= 8;
6792 len |= stbi__get8(s);
6793 if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
6794 if (scanline == NULL) {
6795 scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
6796 if (!scanline) {
6797 STBI_FREE(hdr_data);
6798 return stbi__errpf("outofmem", "Out of memory");
6799 }
6800 }
6801
6802 for (k = 0; k < 4; ++k) {
6803 int nleft;
6804 i = 0;
6805 while ((nleft = width - i) > 0) {
6806 count = stbi__get8(s);
6807 if (count > 128) {
6808 // Run
6809 value = stbi__get8(s);
6810 count -= 128;
6811 if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6812 for (z = 0; z < count; ++z)
6813 scanline[i++ * 4 + k] = value;
6814 } else {
6815 // Dump
6816 if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6817 for (z = 0; z < count; ++z)
6818 scanline[i++ * 4 + k] = stbi__get8(s);
6819 }
6820 }
6821 }
6822 for (i=0; i < width; ++i)
6823 stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
6824 }
6825 if (scanline)
6826 STBI_FREE(scanline);
6827 }
6828
6829 return hdr_data;
6830}
6831
6832static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
6833{
6834 char buffer[STBI__HDR_BUFLEN];
6835 char *token;
6836 int valid = 0;
6837 int dummy;
6838
6839 if (!x) x = &dummy;
6840 if (!y) y = &dummy;
6841 if (!comp) comp = &dummy;
6842
6843 if (stbi__hdr_test(s) == 0) {
6844 stbi__rewind( s );
6845 return 0;
6846 }
6847
6848 for(;;) {
6849 token = stbi__hdr_gettoken(s,buffer);
6850 if (token[0] == 0) break;
6851 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6852 }
6853
6854 if (!valid) {
6855 stbi__rewind( s );
6856 return 0;
6857 }
6858 token = stbi__hdr_gettoken(s,buffer);
6859 if (strncmp(token, "-Y ", 3)) {
6860 stbi__rewind( s );
6861 return 0;
6862 }
6863 token += 3;
6864 *y = (int) strtol(token, &token, 10);
6865 while (*token == ' ') ++token;
6866 if (strncmp(token, "+X ", 3)) {
6867 stbi__rewind( s );
6868 return 0;
6869 }
6870 token += 3;
6871 *x = (int) strtol(token, NULL, 10);
6872 *comp = 3;
6873 return 1;
6874}
6875#endif // STBI_NO_HDR
6876
6877#ifndef STBI_NO_BMP
6878static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
6879{
6880 void *p;
6881 stbi__bmp_data info;
6882
6883 info.all_a = 255;
6884 p = stbi__bmp_parse_header(s, &info);
6885 if (p == NULL) {
6886 stbi__rewind( s );
6887 return 0;
6888 }
6889 if (x) *x = s->img_x;
6890 if (y) *y = s->img_y;
6891 if (comp) {
6892 if (info.bpp == 24 && info.ma == 0xff000000)
6893 *comp = 3;
6894 else
6895 *comp = info.ma ? 4 : 3;
6896 }
6897 return 1;
6898}
6899#endif
6900
6901#ifndef STBI_NO_PSD
6902static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
6903{
6904 int channelCount, dummy, depth;
6905 if (!x) x = &dummy;
6906 if (!y) y = &dummy;
6907 if (!comp) comp = &dummy;
6908 if (stbi__get32be(s) != 0x38425053) {
6909 stbi__rewind( s );
6910 return 0;
6911 }
6912 if (stbi__get16be(s) != 1) {
6913 stbi__rewind( s );
6914 return 0;
6915 }
6916 stbi__skip(s, 6);
6917 channelCount = stbi__get16be(s);
6918 if (channelCount < 0 || channelCount > 16) {
6919 stbi__rewind( s );
6920 return 0;
6921 }
6922 *y = stbi__get32be(s);
6923 *x = stbi__get32be(s);
6924 depth = stbi__get16be(s);
6925 if (depth != 8 && depth != 16) {
6926 stbi__rewind( s );
6927 return 0;
6928 }
6929 if (stbi__get16be(s) != 3) {
6930 stbi__rewind( s );
6931 return 0;
6932 }
6933 *comp = 4;
6934 return 1;
6935}
6936
6937static int stbi__psd_is16(stbi__context *s)
6938{
6939 int channelCount, depth;
6940 if (stbi__get32be(s) != 0x38425053) {
6941 stbi__rewind( s );
6942 return 0;
6943 }
6944 if (stbi__get16be(s) != 1) {
6945 stbi__rewind( s );
6946 return 0;
6947 }
6948 stbi__skip(s, 6);
6949 channelCount = stbi__get16be(s);
6950 if (channelCount < 0 || channelCount > 16) {
6951 stbi__rewind( s );
6952 return 0;
6953 }
6954 STBI_NOTUSED(stbi__get32be(s));
6955 STBI_NOTUSED(stbi__get32be(s));
6956 depth = stbi__get16be(s);
6957 if (depth != 16) {
6958 stbi__rewind( s );
6959 return 0;
6960 }
6961 return 1;
6962}
6963#endif
6964
6965#ifndef STBI_NO_PIC
6966static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
6967{
6968 int act_comp=0,num_packets=0,chained,dummy;
6969 stbi__pic_packet packets[10];
6970
6971 if (!x) x = &dummy;
6972 if (!y) y = &dummy;
6973 if (!comp) comp = &dummy;
6974
6975 if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
6976 stbi__rewind(s);
6977 return 0;
6978 }
6979
6980 stbi__skip(s, 88);
6981
6982 *x = stbi__get16be(s);
6983 *y = stbi__get16be(s);
6984 if (stbi__at_eof(s)) {
6985 stbi__rewind( s);
6986 return 0;
6987 }
6988 if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
6989 stbi__rewind( s );
6990 return 0;
6991 }
6992
6993 stbi__skip(s, 8);
6994
6995 do {
6996 stbi__pic_packet *packet;
6997
6998 if (num_packets==sizeof(packets)/sizeof(packets[0]))
6999 return 0;
7000
7001 packet = &packets[num_packets++];
7002 chained = stbi__get8(s);
7003 packet->size = stbi__get8(s);
7004 packet->type = stbi__get8(s);
7005 packet->channel = stbi__get8(s);
7006 act_comp |= packet->channel;
7007
7008 if (stbi__at_eof(s)) {
7009 stbi__rewind( s );
7010 return 0;
7011 }
7012 if (packet->size != 8) {
7013 stbi__rewind( s );
7014 return 0;
7015 }
7016 } while (chained);
7017
7018 *comp = (act_comp & 0x10 ? 4 : 3);
7019
7020 return 1;
7021}
7022#endif
7023
7024// *************************************************************************************************
7025// Portable Gray Map and Portable Pixel Map loader
7026// by Ken Miller
7027//
7028// PGM: http://netpbm.sourceforge.net/doc/pgm.html
7029// PPM: http://netpbm.sourceforge.net/doc/ppm.html
7030//
7031// Known limitations:
7032// Does not support comments in the header section
7033// Does not support ASCII image data (formats P2 and P3)
7034
7035#ifndef STBI_NO_PNM
7036
7037static int stbi__pnm_test(stbi__context *s)
7038{
7039 char p, t;
7040 p = (char) stbi__get8(s);
7041 t = (char) stbi__get8(s);
7042 if (p != 'P' || (t != '5' && t != '6')) {
7043 stbi__rewind( s );
7044 return 0;
7045 }
7046 return 1;
7047}
7048
7049static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
7050{
7051 stbi_uc *out;
7052 STBI_NOTUSED(ri);
7053
7054 ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n);
7055 if (ri->bits_per_channel == 0)
7056 return 0;
7057
7058 if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
7059 if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
7060
7061 *x = s->img_x;
7062 *y = s->img_y;
7063 if (comp) *comp = s->img_n;
7064
7065 if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0))
7066 return stbi__errpuc("too large", "PNM too large");
7067
7068 out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
7069 if (!out) return stbi__errpuc("outofmem", "Out of memory");
7070 if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) {
7071 STBI_FREE(out);
7072 return stbi__errpuc("bad PNM", "PNM file truncated");
7073 }
7074
7075 if (req_comp && req_comp != s->img_n) {
7076 if (ri->bits_per_channel == 16) {
7077 out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y);
7078 } else {
7079 out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
7080 }
7081 if (out == NULL) return out; // stbi__convert_format frees input on failure
7082 }
7083 return out;
7084}
7085
7086static int stbi__pnm_isspace(char c)
7087{
7088 return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
7089}
7090
7091static void stbi__pnm_skip_whitespace(stbi__context *s, char *c)
7092{
7093 for (;;) {
7094 while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
7095 *c = (char) stbi__get8(s);
7096
7097 if (stbi__at_eof(s) || *c != '#')
7098 break;
7099
7100 while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
7101 *c = (char) stbi__get8(s);
7102 }
7103}
7104
7105static int stbi__pnm_isdigit(char c)
7106{
7107 return c >= '0' && c <= '9';
7108}
7109
7110static int stbi__pnm_getinteger(stbi__context *s, char *c)
7111{
7112 int value = 0;
7113
7114 while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
7115 value = value*10 + (*c - '0');
7116 *c = (char) stbi__get8(s);
7117 if((value > 214748364) || (value == 214748364 && *c > '7'))
7118 return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int");
7119 }
7120
7121 return value;
7122}
7123
7124static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
7125{
7126 int maxv, dummy;
7127 char c, p, t;
7128
7129 if (!x) x = &dummy;
7130 if (!y) y = &dummy;
7131 if (!comp) comp = &dummy;
7132
7133 stbi__rewind(s);
7134
7135 // Get identifier
7136 p = (char) stbi__get8(s);
7137 t = (char) stbi__get8(s);
7138 if (p != 'P' || (t != '5' && t != '6')) {
7139 stbi__rewind(s);
7140 return 0;
7141 }
7142
7143 *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm
7144
7145 c = (char) stbi__get8(s);
7146 stbi__pnm_skip_whitespace(s, &c);
7147
7148 *x = stbi__pnm_getinteger(s, &c); // read width
7149 if(*x == 0)
7150 return stbi__err("invalid width", "PPM image header had zero or overflowing width");
7151 stbi__pnm_skip_whitespace(s, &c);
7152
7153 *y = stbi__pnm_getinteger(s, &c); // read height
7154 if (*y == 0)
7155 return stbi__err("invalid width", "PPM image header had zero or overflowing width");
7156 stbi__pnm_skip_whitespace(s, &c);
7157
7158 maxv = stbi__pnm_getinteger(s, &c); // read max value
7159 if (maxv > 65535)
7160 return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images");
7161 else if (maxv > 255)
7162 return 16;
7163 else
7164 return 8;
7165}
7166
7167static int stbi__pnm_is16(stbi__context *s)
7168{
7169 if (stbi__pnm_info(s, NULL, NULL, NULL) == 16)
7170 return 1;
7171 return 0;
7172}
7173#endif
7174
7175static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
7176{
7177 #ifndef STBI_NO_JPEG
7178 if (stbi__jpeg_info(s, x, y, comp)) return 1;
7179 #endif
7180
7181 #ifndef STBI_NO_PNG
7182 if (stbi__png_info(s, x, y, comp)) return 1;
7183 #endif
7184
7185 #ifndef STBI_NO_GIF
7186 if (stbi__gif_info(s, x, y, comp)) return 1;
7187 #endif
7188
7189 #ifndef STBI_NO_BMP
7190 if (stbi__bmp_info(s, x, y, comp)) return 1;
7191 #endif
7192
7193 #ifndef STBI_NO_PSD
7194 if (stbi__psd_info(s, x, y, comp)) return 1;
7195 #endif
7196
7197 #ifndef STBI_NO_PIC
7198 if (stbi__pic_info(s, x, y, comp)) return 1;
7199 #endif
7200
7201 #ifndef STBI_NO_PNM
7202 if (stbi__pnm_info(s, x, y, comp)) return 1;
7203 #endif
7204
7205 #ifndef STBI_NO_HDR
7206 if (stbi__hdr_info(s, x, y, comp)) return 1;
7207 #endif
7208
7209 // test tga last because it's a crappy test!
7210 #ifndef STBI_NO_TGA
7211 if (stbi__tga_info(s, x, y, comp))
7212 return 1;
7213 #endif
7214 return stbi__err("unknown image type", "Image not of any known type, or corrupt");
7215}
7216
7217static int stbi__is_16_main(stbi__context *s)
7218{
7219 #ifndef STBI_NO_PNG
7220 if (stbi__png_is16(s)) return 1;
7221 #endif
7222
7223 #ifndef STBI_NO_PSD
7224 if (stbi__psd_is16(s)) return 1;
7225 #endif
7226
7227 #ifndef STBI_NO_PNM
7228 if (stbi__pnm_is16(s)) return 1;
7229 #endif
7230 return 0;
7231}
7232
7233#ifndef STBI_NO_STDIO
7234STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
7235{
7236 FILE *f = stbi__fopen(filename, "rb");
7237 int result;
7238 if (!f) return stbi__err("can't fopen", "Unable to open file");
7239 result = stbi_info_from_file(f, x, y, comp);
7240 fclose(f);
7241 return result;
7242}
7243
7244STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
7245{
7246 int r;
7247 stbi__context s;
7248 long pos = ftell(f);
7249 stbi__start_file(&s, f);
7250 r = stbi__info_main(&s,x,y,comp);
7251 fseek(f,pos,SEEK_SET);
7252 return r;
7253}
7254
7255STBIDEF int stbi_is_16_bit(char const *filename)
7256{
7257 FILE *f = stbi__fopen(filename, "rb");
7258 int result;
7259 if (!f) return stbi__err("can't fopen", "Unable to open file");
7260 result = stbi_is_16_bit_from_file(f);
7261 fclose(f);
7262 return result;
7263}
7264
7265STBIDEF int stbi_is_16_bit_from_file(FILE *f)
7266{
7267 int r;
7268 stbi__context s;
7269 long pos = ftell(f);
7270 stbi__start_file(&s, f);
7271 r = stbi__is_16_main(&s);
7272 fseek(f,pos,SEEK_SET);
7273 return r;
7274}
7275#endif // !STBI_NO_STDIO
7276
7277STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
7278{
7279 stbi__context s;
7280 stbi__start_mem(&s,buffer,len);
7281 return stbi__info_main(&s,x,y,comp);
7282}
7283
7284STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
7285{
7286 stbi__context s;
7287 stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
7288 return stbi__info_main(&s,x,y,comp);
7289}
7290
7291STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
7292{
7293 stbi__context s;
7294 stbi__start_mem(&s,buffer,len);
7295 return stbi__is_16_main(&s);
7296}
7297
7298STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
7299{
7300 stbi__context s;
7301 stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
7302 return stbi__is_16_main(&s);
7303}
7304
7305/*
7306 revision history:
7307 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
7308 2.19 (2018-02-11) fix warning
7309 2.18 (2018-01-30) fix warnings
7310 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
7311 1-bit BMP
7312 *_is_16_bit api
7313 avoid warnings
7314 2.16 (2017-07-23) all functions have 16-bit variants;
7315 STBI_NO_STDIO works again;
7316 compilation fixes;
7317 fix rounding in unpremultiply;
7318 optimize vertical flip;
7319 disable raw_len validation;
7320 documentation fixes
7321 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
7322 warning fixes; disable run-time SSE detection on gcc;
7323 uniform handling of optional "return" values;
7324 thread-safe initialization of zlib tables
7325 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
7326 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now
7327 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
7328 2.11 (2016-04-02) allocate large structures on the stack
7329 remove white matting for transparent PSD
7330 fix reported channel count for PNG & BMP
7331 re-enable SSE2 in non-gcc 64-bit
7332 support RGB-formatted JPEG
7333 read 16-bit PNGs (only as 8-bit)
7334 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
7335 2.09 (2016-01-16) allow comments in PNM files
7336 16-bit-per-pixel TGA (not bit-per-component)
7337 info() for TGA could break due to .hdr handling
7338 info() for BMP to shares code instead of sloppy parse
7339 can use STBI_REALLOC_SIZED if allocator doesn't support realloc
7340 code cleanup
7341 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
7342 2.07 (2015-09-13) fix compiler warnings
7343 partial animated GIF support
7344 limited 16-bpc PSD support
7345 #ifdef unused functions
7346 bug with < 92 byte PIC,PNM,HDR,TGA
7347 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value
7348 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning
7349 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit
7350 2.03 (2015-04-12) extra corruption checking (mmozeiko)
7351 stbi_set_flip_vertically_on_load (nguillemot)
7352 fix NEON support; fix mingw support
7353 2.02 (2015-01-19) fix incorrect assert, fix warning
7354 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
7355 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
7356 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
7357 progressive JPEG (stb)
7358 PGM/PPM support (Ken Miller)
7359 STBI_MALLOC,STBI_REALLOC,STBI_FREE
7360 GIF bugfix -- seemingly never worked
7361 STBI_NO_*, STBI_ONLY_*
7362 1.48 (2014-12-14) fix incorrectly-named assert()
7363 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
7364 optimize PNG (ryg)
7365 fix bug in interlaced PNG with user-specified channel count (stb)
7366 1.46 (2014-08-26)
7367 fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
7368 1.45 (2014-08-16)
7369 fix MSVC-ARM internal compiler error by wrapping malloc
7370 1.44 (2014-08-07)
7371 various warning fixes from Ronny Chevalier
7372 1.43 (2014-07-15)
7373 fix MSVC-only compiler problem in code changed in 1.42
7374 1.42 (2014-07-09)
7375 don't define _CRT_SECURE_NO_WARNINGS (affects user code)
7376 fixes to stbi__cleanup_jpeg path
7377 added STBI_ASSERT to avoid requiring assert.h
7378 1.41 (2014-06-25)
7379 fix search&replace from 1.36 that messed up comments/error messages
7380 1.40 (2014-06-22)
7381 fix gcc struct-initialization warning
7382 1.39 (2014-06-15)
7383 fix to TGA optimization when req_comp != number of components in TGA;
7384 fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
7385 add support for BMP version 5 (more ignored fields)
7386 1.38 (2014-06-06)
7387 suppress MSVC warnings on integer casts truncating values
7388 fix accidental rename of 'skip' field of I/O
7389 1.37 (2014-06-04)
7390 remove duplicate typedef
7391 1.36 (2014-06-03)
7392 convert to header file single-file library
7393 if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
7394 1.35 (2014-05-27)
7395 various warnings
7396 fix broken STBI_SIMD path
7397 fix bug where stbi_load_from_file no longer left file pointer in correct place
7398 fix broken non-easy path for 32-bit BMP (possibly never used)
7399 TGA optimization by Arseny Kapoulkine
7400 1.34 (unknown)
7401 use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
7402 1.33 (2011-07-14)
7403 make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
7404 1.32 (2011-07-13)
7405 support for "info" function for all supported filetypes (SpartanJ)
7406 1.31 (2011-06-20)
7407 a few more leak fixes, bug in PNG handling (SpartanJ)
7408 1.30 (2011-06-11)
7409 added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
7410 removed deprecated format-specific test/load functions
7411 removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
7412 error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
7413 fix inefficiency in decoding 32-bit BMP (David Woo)
7414 1.29 (2010-08-16)
7415 various warning fixes from Aurelien Pocheville
7416 1.28 (2010-08-01)
7417 fix bug in GIF palette transparency (SpartanJ)
7418 1.27 (2010-08-01)
7419 cast-to-stbi_uc to fix warnings
7420 1.26 (2010-07-24)
7421 fix bug in file buffering for PNG reported by SpartanJ
7422 1.25 (2010-07-17)
7423 refix trans_data warning (Won Chun)
7424 1.24 (2010-07-12)
7425 perf improvements reading from files on platforms with lock-heavy fgetc()
7426 minor perf improvements for jpeg
7427 deprecated type-specific functions so we'll get feedback if they're needed
7428 attempt to fix trans_data warning (Won Chun)
7429 1.23 fixed bug in iPhone support
7430 1.22 (2010-07-10)
7431 removed image *writing* support
7432 stbi_info support from Jetro Lauha
7433 GIF support from Jean-Marc Lienher
7434 iPhone PNG-extensions from James Brown
7435 warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
7436 1.21 fix use of 'stbi_uc' in header (reported by jon blow)
7437 1.20 added support for Softimage PIC, by Tom Seddon
7438 1.19 bug in interlaced PNG corruption check (found by ryg)
7439 1.18 (2008-08-02)
7440 fix a threading bug (local mutable static)
7441 1.17 support interlaced PNG
7442 1.16 major bugfix - stbi__convert_format converted one too many pixels
7443 1.15 initialize some fields for thread safety
7444 1.14 fix threadsafe conversion bug
7445 header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
7446 1.13 threadsafe
7447 1.12 const qualifiers in the API
7448 1.11 Support installable IDCT, colorspace conversion routines
7449 1.10 Fixes for 64-bit (don't use "unsigned long")
7450 optimized upsampling by Fabian "ryg" Giesen
7451 1.09 Fix format-conversion for PSD code (bad global variables!)
7452 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz
7453 1.07 attempt to fix C++ warning/errors again
7454 1.06 attempt to fix C++ warning/errors again
7455 1.05 fix TGA loading to return correct *comp and use good luminance calc
7456 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free
7457 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR
7458 1.02 support for (subset of) HDR files, float interface for preferred access to them
7459 1.01 fix bug: possible bug in handling right-side up bmps... not sure
7460 fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
7461 1.00 interface to zlib that skips zlib header
7462 0.99 correct handling of alpha in palette
7463 0.98 TGA loader by lonesock; dynamically add loaders (untested)
7464 0.97 jpeg errors on too large a file; also catch another malloc failure
7465 0.96 fix detection of invalid v value - particleman@mollyrocket forum
7466 0.95 during header scan, seek to markers in case of padding
7467 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same
7468 0.93 handle jpegtran output; verbose errors
7469 0.92 read 4,8,16,24,32-bit BMP files of several formats
7470 0.91 output 24-bit Windows 3.0 BMP files
7471 0.90 fix a few more warnings; bump version number to approach 1.0
7472 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd
7473 0.60 fix compiling as c++
7474 0.59 fix warnings: merge Dave Moore's -Wall fixes
7475 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
7476 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
7477 0.56 fix bug: zlib uncompressed mode len vs. nlen
7478 0.55 fix bug: restart_interval not initialized to 0
7479 0.54 allow NULL for 'int *comp'
7480 0.53 fix bug in png 3->4; speedup png decoding
7481 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments
7482 0.51 obey req_comp requests, 1-component jpegs return as 1-component,
7483 on 'test' only check type, not whether we support this variant
7484 0.50 (2006-11-19)
7485 first released version
7486*/
7487
7488
7489/*
7490------------------------------------------------------------------------------
7491This software is available under 2 licenses -- choose whichever you prefer.
7492------------------------------------------------------------------------------
7493ALTERNATIVE A - MIT License
7494Copyright (c) 2017 Sean Barrett
7495Permission is hereby granted, free of charge, to any person obtaining a copy of
7496this software and associated documentation files (the "Software"), to deal in
7497the Software without restriction, including without limitation the rights to
7498use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7499of the Software, and to permit persons to whom the Software is furnished to do
7500so, subject to the following conditions:
7501The above copyright notice and this permission notice shall be included in all
7502copies or substantial portions of the Software.
7503THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7504IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7505FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7506AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7507LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
7508OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
7509SOFTWARE.
7510------------------------------------------------------------------------------
7511ALTERNATIVE B - Public Domain (www.unlicense.org)
7512This is free and unencumbered software released into the public domain.
7513Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
7514software, either in source code form or as a compiled binary, for any purpose,
7515commercial or non-commercial, and by any means.
7516In jurisdictions that recognize copyright laws, the author or authors of this
7517software dedicate any and all copyright interest in the software to the public
7518domain. We make this dedication for the benefit of the public at large and to
7519the detriment of our heirs and successors. We intend this dedication to be an
7520overt act of relinquishment in perpetuity of all present and future rights to
7521this software under copyright law.
7522THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7523IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7524FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7525AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
7526ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
7527WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7528------------------------------------------------------------------------------
7529*/
diff --git a/externals/stb/stb_image.h b/externals/stb/stb_image.h
index f0dfad1c6..5e807a0a6 100644
--- a/externals/stb/stb_image.h
+++ b/externals/stb/stb_image.h
@@ -1,6 +1,3 @@
1// SPDX-FileCopyrightText: stb http://nothings.org/stb
2// SPDX-License-Identifier: MIT
3
4/* stb_image - v2.28 - public domain image loader - http://nothings.org/stb 1/* stb_image - v2.28 - public domain image loader - http://nothings.org/stb
5 no warranty implied; use at your own risk 2 no warranty implied; use at your own risk
6 3
@@ -545,6 +542,7224 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch
545//// end header file ///////////////////////////////////////////////////// 542//// end header file /////////////////////////////////////////////////////
546#endif // STBI_INCLUDE_STB_IMAGE_H 543#endif // STBI_INCLUDE_STB_IMAGE_H
547 544
545#ifdef STB_IMAGE_IMPLEMENTATION
546
547#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
548 || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
549 || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
550 || defined(STBI_ONLY_ZLIB)
551 #ifndef STBI_ONLY_JPEG
552 #define STBI_NO_JPEG
553 #endif
554 #ifndef STBI_ONLY_PNG
555 #define STBI_NO_PNG
556 #endif
557 #ifndef STBI_ONLY_BMP
558 #define STBI_NO_BMP
559 #endif
560 #ifndef STBI_ONLY_PSD
561 #define STBI_NO_PSD
562 #endif
563 #ifndef STBI_ONLY_TGA
564 #define STBI_NO_TGA
565 #endif
566 #ifndef STBI_ONLY_GIF
567 #define STBI_NO_GIF
568 #endif
569 #ifndef STBI_ONLY_HDR
570 #define STBI_NO_HDR
571 #endif
572 #ifndef STBI_ONLY_PIC
573 #define STBI_NO_PIC
574 #endif
575 #ifndef STBI_ONLY_PNM
576 #define STBI_NO_PNM
577 #endif
578#endif
579
580#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
581#define STBI_NO_ZLIB
582#endif
583
584
585#include <stdarg.h>
586#include <stddef.h> // ptrdiff_t on osx
587#include <stdlib.h>
588#include <string.h>
589#include <limits.h>
590
591#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
592#include <math.h> // ldexp, pow
593#endif
594
595#ifndef STBI_NO_STDIO
596#include <stdio.h>
597#endif
598
599#ifndef STBI_ASSERT
600#include <assert.h>
601#define STBI_ASSERT(x) assert(x)
602#endif
603
604#ifdef __cplusplus
605#define STBI_EXTERN extern "C"
606#else
607#define STBI_EXTERN extern
608#endif
609
610
611#ifndef _MSC_VER
612 #ifdef __cplusplus
613 #define stbi_inline inline
614 #else
615 #define stbi_inline
616 #endif
617#else
618 #define stbi_inline __forceinline
619#endif
620
621#ifndef STBI_NO_THREAD_LOCALS
622 #if defined(__cplusplus) && __cplusplus >= 201103L
623 #define STBI_THREAD_LOCAL thread_local
624 #elif defined(__GNUC__) && __GNUC__ < 5
625 #define STBI_THREAD_LOCAL __thread
626 #elif defined(_MSC_VER)
627 #define STBI_THREAD_LOCAL __declspec(thread)
628 #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
629 #define STBI_THREAD_LOCAL _Thread_local
630 #endif
631
632 #ifndef STBI_THREAD_LOCAL
633 #if defined(__GNUC__)
634 #define STBI_THREAD_LOCAL __thread
635 #endif
636 #endif
637#endif
638
639#if defined(_MSC_VER) || defined(__SYMBIAN32__)
640typedef unsigned short stbi__uint16;
641typedef signed short stbi__int16;
642typedef unsigned int stbi__uint32;
643typedef signed int stbi__int32;
644#else
645#include <stdint.h>
646typedef uint16_t stbi__uint16;
647typedef int16_t stbi__int16;
648typedef uint32_t stbi__uint32;
649typedef int32_t stbi__int32;
650#endif
651
652// should produce compiler error if size is wrong
653typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
654
655#ifdef _MSC_VER
656#define STBI_NOTUSED(v) (void)(v)
657#else
658#define STBI_NOTUSED(v) (void)sizeof(v)
659#endif
660
661#ifdef _MSC_VER
662#define STBI_HAS_LROTL
663#endif
664
665#ifdef STBI_HAS_LROTL
666 #define stbi_lrot(x,y) _lrotl(x,y)
667#else
668 #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31)))
669#endif
670
671#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
672// ok
673#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
674// ok
675#else
676#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
677#endif
678
679#ifndef STBI_MALLOC
680#define STBI_MALLOC(sz) malloc(sz)
681#define STBI_REALLOC(p,newsz) realloc(p,newsz)
682#define STBI_FREE(p) free(p)
683#endif
684
685#ifndef STBI_REALLOC_SIZED
686#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
687#endif
688
689// x86/x64 detection
690#if defined(__x86_64__) || defined(_M_X64)
691#define STBI__X64_TARGET
692#elif defined(__i386) || defined(_M_IX86)
693#define STBI__X86_TARGET
694#endif
695
696#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
697// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
698// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
699// but previous attempts to provide the SSE2 functions with runtime
700// detection caused numerous issues. The way architecture extensions are
701// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
702// New behavior: if compiled with -msse2, we use SSE2 without any
703// detection; if not, we don't use it at all.
704#define STBI_NO_SIMD
705#endif
706
707#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
708// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
709//
710// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
711// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
712// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
713// simultaneously enabling "-mstackrealign".
714//
715// See https://github.com/nothings/stb/issues/81 for more information.
716//
717// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
718// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
719#define STBI_NO_SIMD
720#endif
721
722#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
723#define STBI_SSE2
724#include <emmintrin.h>
725
726#ifdef _MSC_VER
727
728#if _MSC_VER >= 1400 // not VC6
729#include <intrin.h> // __cpuid
730static int stbi__cpuid3(void)
731{
732 int info[4];
733 __cpuid(info,1);
734 return info[3];
735}
736#else
737static int stbi__cpuid3(void)
738{
739 int res;
740 __asm {
741 mov eax,1
742 cpuid
743 mov res,edx
744 }
745 return res;
746}
747#endif
748
749#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
750
751#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
752static int stbi__sse2_available(void)
753{
754 int info3 = stbi__cpuid3();
755 return ((info3 >> 26) & 1) != 0;
756}
757#endif
758
759#else // assume GCC-style if not VC++
760#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
761
762#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
763static int stbi__sse2_available(void)
764{
765 // If we're even attempting to compile this on GCC/Clang, that means
766 // -msse2 is on, which means the compiler is allowed to use SSE2
767 // instructions at will, and so are we.
768 return 1;
769}
770#endif
771
772#endif
773#endif
774
775// ARM NEON
776#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
777#undef STBI_NEON
778#endif
779
780#ifdef STBI_NEON
781#include <arm_neon.h>
782#ifdef _MSC_VER
783#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
784#else
785#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
786#endif
787#endif
788
789#ifndef STBI_SIMD_ALIGN
790#define STBI_SIMD_ALIGN(type, name) type name
791#endif
792
793#ifndef STBI_MAX_DIMENSIONS
794#define STBI_MAX_DIMENSIONS (1 << 24)
795#endif
796
797///////////////////////////////////////////////
798//
799// stbi__context struct and start_xxx functions
800
801// stbi__context structure is our basic context used by all images, so it
802// contains all the IO context, plus some basic image information
803typedef struct
804{
805 stbi__uint32 img_x, img_y;
806 int img_n, img_out_n;
807
808 stbi_io_callbacks io;
809 void *io_user_data;
810
811 int read_from_callbacks;
812 int buflen;
813 stbi_uc buffer_start[128];
814 int callback_already_read;
815
816 stbi_uc *img_buffer, *img_buffer_end;
817 stbi_uc *img_buffer_original, *img_buffer_original_end;
818} stbi__context;
819
820
821static void stbi__refill_buffer(stbi__context *s);
822
823// initialize a memory-decode context
824static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
825{
826 s->io.read = NULL;
827 s->read_from_callbacks = 0;
828 s->callback_already_read = 0;
829 s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
830 s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
831}
832
833// initialize a callback-based context
834static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
835{
836 s->io = *c;
837 s->io_user_data = user;
838 s->buflen = sizeof(s->buffer_start);
839 s->read_from_callbacks = 1;
840 s->callback_already_read = 0;
841 s->img_buffer = s->img_buffer_original = s->buffer_start;
842 stbi__refill_buffer(s);
843 s->img_buffer_original_end = s->img_buffer_end;
844}
845
846#ifndef STBI_NO_STDIO
847
848static int stbi__stdio_read(void *user, char *data, int size)
849{
850 return (int) fread(data,1,size,(FILE*) user);
851}
852
853static void stbi__stdio_skip(void *user, int n)
854{
855 int ch;
856 fseek((FILE*) user, n, SEEK_CUR);
857 ch = fgetc((FILE*) user); /* have to read a byte to reset feof()'s flag */
858 if (ch != EOF) {
859 ungetc(ch, (FILE *) user); /* push byte back onto stream if valid. */
860 }
861}
862
863static int stbi__stdio_eof(void *user)
864{
865 return feof((FILE*) user) || ferror((FILE *) user);
866}
867
868static stbi_io_callbacks stbi__stdio_callbacks =
869{
870 stbi__stdio_read,
871 stbi__stdio_skip,
872 stbi__stdio_eof,
873};
874
875static void stbi__start_file(stbi__context *s, FILE *f)
876{
877 stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
878}
879
880//static void stop_file(stbi__context *s) { }
881
882#endif // !STBI_NO_STDIO
883
884static void stbi__rewind(stbi__context *s)
885{
886 // conceptually rewind SHOULD rewind to the beginning of the stream,
887 // but we just rewind to the beginning of the initial buffer, because
888 // we only use it after doing 'test', which only ever looks at at most 92 bytes
889 s->img_buffer = s->img_buffer_original;
890 s->img_buffer_end = s->img_buffer_original_end;
891}
892
893enum
894{
895 STBI_ORDER_RGB,
896 STBI_ORDER_BGR
897};
898
899typedef struct
900{
901 int bits_per_channel;
902 int num_channels;
903 int channel_order;
904} stbi__result_info;
905
906#ifndef STBI_NO_JPEG
907static int stbi__jpeg_test(stbi__context *s);
908static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
909static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
910#endif
911
912#ifndef STBI_NO_PNG
913static int stbi__png_test(stbi__context *s);
914static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
915static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
916static int stbi__png_is16(stbi__context *s);
917#endif
918
919#ifndef STBI_NO_BMP
920static int stbi__bmp_test(stbi__context *s);
921static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
922static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
923#endif
924
925#ifndef STBI_NO_TGA
926static int stbi__tga_test(stbi__context *s);
927static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
928static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
929#endif
930
931#ifndef STBI_NO_PSD
932static int stbi__psd_test(stbi__context *s);
933static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
934static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
935static int stbi__psd_is16(stbi__context *s);
936#endif
937
938#ifndef STBI_NO_HDR
939static int stbi__hdr_test(stbi__context *s);
940static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
941static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
942#endif
943
944#ifndef STBI_NO_PIC
945static int stbi__pic_test(stbi__context *s);
946static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
947static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
948#endif
949
950#ifndef STBI_NO_GIF
951static int stbi__gif_test(stbi__context *s);
952static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
953static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
954static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
955#endif
956
957#ifndef STBI_NO_PNM
958static int stbi__pnm_test(stbi__context *s);
959static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
960static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
961static int stbi__pnm_is16(stbi__context *s);
962#endif
963
964static
965#ifdef STBI_THREAD_LOCAL
966STBI_THREAD_LOCAL
967#endif
968const char *stbi__g_failure_reason;
969
970STBIDEF const char *stbi_failure_reason(void)
971{
972 return stbi__g_failure_reason;
973}
974
975#ifndef STBI_NO_FAILURE_STRINGS
976static int stbi__err(const char *str)
977{
978 stbi__g_failure_reason = str;
979 return 0;
980}
981#endif
982
983static void *stbi__malloc(size_t size)
984{
985 return STBI_MALLOC(size);
986}
987
988// stb_image uses ints pervasively, including for offset calculations.
989// therefore the largest decoded image size we can support with the
990// current code, even on 64-bit targets, is INT_MAX. this is not a
991// significant limitation for the intended use case.
992//
993// we do, however, need to make sure our size calculations don't
994// overflow. hence a few helper functions for size calculations that
995// multiply integers together, making sure that they're non-negative
996// and no overflow occurs.
997
998// return 1 if the sum is valid, 0 on overflow.
999// negative terms are considered invalid.
1000static int stbi__addsizes_valid(int a, int b)
1001{
1002 if (b < 0) return 0;
1003 // now 0 <= b <= INT_MAX, hence also
1004 // 0 <= INT_MAX - b <= INTMAX.
1005 // And "a + b <= INT_MAX" (which might overflow) is the
1006 // same as a <= INT_MAX - b (no overflow)
1007 return a <= INT_MAX - b;
1008}
1009
1010// returns 1 if the product is valid, 0 on overflow.
1011// negative factors are considered invalid.
1012static int stbi__mul2sizes_valid(int a, int b)
1013{
1014 if (a < 0 || b < 0) return 0;
1015 if (b == 0) return 1; // mul-by-0 is always safe
1016 // portable way to check for no overflows in a*b
1017 return a <= INT_MAX/b;
1018}
1019
1020#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
1021// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
1022static int stbi__mad2sizes_valid(int a, int b, int add)
1023{
1024 return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
1025}
1026#endif
1027
1028// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
1029static int stbi__mad3sizes_valid(int a, int b, int c, int add)
1030{
1031 return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
1032 stbi__addsizes_valid(a*b*c, add);
1033}
1034
1035// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
1036#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
1037static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
1038{
1039 return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
1040 stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
1041}
1042#endif
1043
1044#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
1045// mallocs with size overflow checking
1046static void *stbi__malloc_mad2(int a, int b, int add)
1047{
1048 if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
1049 return stbi__malloc(a*b + add);
1050}
1051#endif
1052
1053static void *stbi__malloc_mad3(int a, int b, int c, int add)
1054{
1055 if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
1056 return stbi__malloc(a*b*c + add);
1057}
1058
1059#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
1060static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
1061{
1062 if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
1063 return stbi__malloc(a*b*c*d + add);
1064}
1065#endif
1066
1067// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow.
1068static int stbi__addints_valid(int a, int b)
1069{
1070 if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow
1071 if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0.
1072 return a <= INT_MAX - b;
1073}
1074
1075// returns 1 if the product of two signed shorts is valid, 0 on overflow.
1076static int stbi__mul2shorts_valid(short a, short b)
1077{
1078 if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow
1079 if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid
1080 if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN
1081 return a >= SHRT_MIN / b;
1082}
1083
1084// stbi__err - error
1085// stbi__errpf - error returning pointer to float
1086// stbi__errpuc - error returning pointer to unsigned char
1087
1088#ifdef STBI_NO_FAILURE_STRINGS
1089 #define stbi__err(x,y) 0
1090#elif defined(STBI_FAILURE_USERMSG)
1091 #define stbi__err(x,y) stbi__err(y)
1092#else
1093 #define stbi__err(x,y) stbi__err(x)
1094#endif
1095
1096#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
1097#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
1098
1099STBIDEF void stbi_image_free(void *retval_from_stbi_load)
1100{
1101 STBI_FREE(retval_from_stbi_load);
1102}
1103
1104#ifndef STBI_NO_LINEAR
1105static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
1106#endif
1107
1108#ifndef STBI_NO_HDR
1109static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp);
1110#endif
1111
1112static int stbi__vertically_flip_on_load_global = 0;
1113
1114STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
1115{
1116 stbi__vertically_flip_on_load_global = flag_true_if_should_flip;
1117}
1118
1119#ifndef STBI_THREAD_LOCAL
1120#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global
1121#else
1122static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set;
1123
1124STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip)
1125{
1126 stbi__vertically_flip_on_load_local = flag_true_if_should_flip;
1127 stbi__vertically_flip_on_load_set = 1;
1128}
1129
1130#define stbi__vertically_flip_on_load (stbi__vertically_flip_on_load_set \
1131 ? stbi__vertically_flip_on_load_local \
1132 : stbi__vertically_flip_on_load_global)
1133#endif // STBI_THREAD_LOCAL
1134
1135static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
1136{
1137 memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
1138 ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
1139 ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
1140 ri->num_channels = 0;
1141
1142 // test the formats with a very explicit header first (at least a FOURCC
1143 // or distinctive magic number first)
1144 #ifndef STBI_NO_PNG
1145 if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri);
1146 #endif
1147 #ifndef STBI_NO_BMP
1148 if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri);
1149 #endif
1150 #ifndef STBI_NO_GIF
1151 if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri);
1152 #endif
1153 #ifndef STBI_NO_PSD
1154 if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
1155 #else
1156 STBI_NOTUSED(bpc);
1157 #endif
1158 #ifndef STBI_NO_PIC
1159 if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri);
1160 #endif
1161
1162 // then the formats that can end up attempting to load with just 1 or 2
1163 // bytes matching expectations; these are prone to false positives, so
1164 // try them later
1165 #ifndef STBI_NO_JPEG
1166 if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
1167 #endif
1168 #ifndef STBI_NO_PNM
1169 if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri);
1170 #endif
1171
1172 #ifndef STBI_NO_HDR
1173 if (stbi__hdr_test(s)) {
1174 float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
1175 return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
1176 }
1177 #endif
1178
1179 #ifndef STBI_NO_TGA
1180 // test tga last because it's a crappy test!
1181 if (stbi__tga_test(s))
1182 return stbi__tga_load(s,x,y,comp,req_comp, ri);
1183 #endif
1184
1185 return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
1186}
1187
1188static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1189{
1190 int i;
1191 int img_len = w * h * channels;
1192 stbi_uc *reduced;
1193
1194 reduced = (stbi_uc *) stbi__malloc(img_len);
1195 if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1196
1197 for (i = 0; i < img_len; ++i)
1198 reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1199
1200 STBI_FREE(orig);
1201 return reduced;
1202}
1203
1204static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1205{
1206 int i;
1207 int img_len = w * h * channels;
1208 stbi__uint16 *enlarged;
1209
1210 enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
1211 if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1212
1213 for (i = 0; i < img_len; ++i)
1214 enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1215
1216 STBI_FREE(orig);
1217 return enlarged;
1218}
1219
1220static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1221{
1222 int row;
1223 size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1224 stbi_uc temp[2048];
1225 stbi_uc *bytes = (stbi_uc *)image;
1226
1227 for (row = 0; row < (h>>1); row++) {
1228 stbi_uc *row0 = bytes + row*bytes_per_row;
1229 stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
1230 // swap row0 with row1
1231 size_t bytes_left = bytes_per_row;
1232 while (bytes_left) {
1233 size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1234 memcpy(temp, row0, bytes_copy);
1235 memcpy(row0, row1, bytes_copy);
1236 memcpy(row1, temp, bytes_copy);
1237 row0 += bytes_copy;
1238 row1 += bytes_copy;
1239 bytes_left -= bytes_copy;
1240 }
1241 }
1242}
1243
1244#ifndef STBI_NO_GIF
1245static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
1246{
1247 int slice;
1248 int slice_size = w * h * bytes_per_pixel;
1249
1250 stbi_uc *bytes = (stbi_uc *)image;
1251 for (slice = 0; slice < z; ++slice) {
1252 stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
1253 bytes += slice_size;
1254 }
1255}
1256#endif
1257
1258static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1259{
1260 stbi__result_info ri;
1261 void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1262
1263 if (result == NULL)
1264 return NULL;
1265
1266 // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
1267 STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
1268
1269 if (ri.bits_per_channel != 8) {
1270 result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1271 ri.bits_per_channel = 8;
1272 }
1273
1274 // @TODO: move stbi__convert_format to here
1275
1276 if (stbi__vertically_flip_on_load) {
1277 int channels = req_comp ? req_comp : *comp;
1278 stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1279 }
1280
1281 return (unsigned char *) result;
1282}
1283
1284static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1285{
1286 stbi__result_info ri;
1287 void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1288
1289 if (result == NULL)
1290 return NULL;
1291
1292 // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
1293 STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
1294
1295 if (ri.bits_per_channel != 16) {
1296 result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1297 ri.bits_per_channel = 16;
1298 }
1299
1300 // @TODO: move stbi__convert_format16 to here
1301 // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1302
1303 if (stbi__vertically_flip_on_load) {
1304 int channels = req_comp ? req_comp : *comp;
1305 stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1306 }
1307
1308 return (stbi__uint16 *) result;
1309}
1310
1311#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
1312static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1313{
1314 if (stbi__vertically_flip_on_load && result != NULL) {
1315 int channels = req_comp ? req_comp : *comp;
1316 stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1317 }
1318}
1319#endif
1320
1321#ifndef STBI_NO_STDIO
1322
1323#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
1324STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
1325STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
1326#endif
1327
1328#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
1329STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
1330{
1331 return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
1332}
1333#endif
1334
1335static FILE *stbi__fopen(char const *filename, char const *mode)
1336{
1337 FILE *f;
1338#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
1339 wchar_t wMode[64];
1340 wchar_t wFilename[1024];
1341 if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
1342 return 0;
1343
1344 if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
1345 return 0;
1346
1347#if defined(_MSC_VER) && _MSC_VER >= 1400
1348 if (0 != _wfopen_s(&f, wFilename, wMode))
1349 f = 0;
1350#else
1351 f = _wfopen(wFilename, wMode);
1352#endif
1353
1354#elif defined(_MSC_VER) && _MSC_VER >= 1400
1355 if (0 != fopen_s(&f, filename, mode))
1356 f=0;
1357#else
1358 f = fopen(filename, mode);
1359#endif
1360 return f;
1361}
1362
1363
1364STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1365{
1366 FILE *f = stbi__fopen(filename, "rb");
1367 unsigned char *result;
1368 if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1369 result = stbi_load_from_file(f,x,y,comp,req_comp);
1370 fclose(f);
1371 return result;
1372}
1373
1374STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1375{
1376 unsigned char *result;
1377 stbi__context s;
1378 stbi__start_file(&s,f);
1379 result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1380 if (result) {
1381 // need to 'unget' all the characters in the IO buffer
1382 fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1383 }
1384 return result;
1385}
1386
1387STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1388{
1389 stbi__uint16 *result;
1390 stbi__context s;
1391 stbi__start_file(&s,f);
1392 result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
1393 if (result) {
1394 // need to 'unget' all the characters in the IO buffer
1395 fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1396 }
1397 return result;
1398}
1399
1400STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1401{
1402 FILE *f = stbi__fopen(filename, "rb");
1403 stbi__uint16 *result;
1404 if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
1405 result = stbi_load_from_file_16(f,x,y,comp,req_comp);
1406 fclose(f);
1407 return result;
1408}
1409
1410
1411#endif //!STBI_NO_STDIO
1412
1413STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
1414{
1415 stbi__context s;
1416 stbi__start_mem(&s,buffer,len);
1417 return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1418}
1419
1420STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
1421{
1422 stbi__context s;
1423 stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1424 return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1425}
1426
1427STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1428{
1429 stbi__context s;
1430 stbi__start_mem(&s,buffer,len);
1431 return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1432}
1433
1434STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1435{
1436 stbi__context s;
1437 stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1438 return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1439}
1440
1441#ifndef STBI_NO_GIF
1442STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
1443{
1444 unsigned char *result;
1445 stbi__context s;
1446 stbi__start_mem(&s,buffer,len);
1447
1448 result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
1449 if (stbi__vertically_flip_on_load) {
1450 stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
1451 }
1452
1453 return result;
1454}
1455#endif
1456
1457#ifndef STBI_NO_LINEAR
1458static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1459{
1460 unsigned char *data;
1461 #ifndef STBI_NO_HDR
1462 if (stbi__hdr_test(s)) {
1463 stbi__result_info ri;
1464 float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
1465 if (hdr_data)
1466 stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1467 return hdr_data;
1468 }
1469 #endif
1470 data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1471 if (data)
1472 return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1473 return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1474}
1475
1476STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1477{
1478 stbi__context s;
1479 stbi__start_mem(&s,buffer,len);
1480 return stbi__loadf_main(&s,x,y,comp,req_comp);
1481}
1482
1483STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1484{
1485 stbi__context s;
1486 stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1487 return stbi__loadf_main(&s,x,y,comp,req_comp);
1488}
1489
1490#ifndef STBI_NO_STDIO
1491STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1492{
1493 float *result;
1494 FILE *f = stbi__fopen(filename, "rb");
1495 if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1496 result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1497 fclose(f);
1498 return result;
1499}
1500
1501STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1502{
1503 stbi__context s;
1504 stbi__start_file(&s,f);
1505 return stbi__loadf_main(&s,x,y,comp,req_comp);
1506}
1507#endif // !STBI_NO_STDIO
1508
1509#endif // !STBI_NO_LINEAR
1510
1511// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1512// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1513// reports false!
1514
1515STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1516{
1517 #ifndef STBI_NO_HDR
1518 stbi__context s;
1519 stbi__start_mem(&s,buffer,len);
1520 return stbi__hdr_test(&s);
1521 #else
1522 STBI_NOTUSED(buffer);
1523 STBI_NOTUSED(len);
1524 return 0;
1525 #endif
1526}
1527
1528#ifndef STBI_NO_STDIO
1529STBIDEF int stbi_is_hdr (char const *filename)
1530{
1531 FILE *f = stbi__fopen(filename, "rb");
1532 int result=0;
1533 if (f) {
1534 result = stbi_is_hdr_from_file(f);
1535 fclose(f);
1536 }
1537 return result;
1538}
1539
1540STBIDEF int stbi_is_hdr_from_file(FILE *f)
1541{
1542 #ifndef STBI_NO_HDR
1543 long pos = ftell(f);
1544 int res;
1545 stbi__context s;
1546 stbi__start_file(&s,f);
1547 res = stbi__hdr_test(&s);
1548 fseek(f, pos, SEEK_SET);
1549 return res;
1550 #else
1551 STBI_NOTUSED(f);
1552 return 0;
1553 #endif
1554}
1555#endif // !STBI_NO_STDIO
1556
1557STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1558{
1559 #ifndef STBI_NO_HDR
1560 stbi__context s;
1561 stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1562 return stbi__hdr_test(&s);
1563 #else
1564 STBI_NOTUSED(clbk);
1565 STBI_NOTUSED(user);
1566 return 0;
1567 #endif
1568}
1569
1570#ifndef STBI_NO_LINEAR
1571static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1572
1573STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
1574STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1575#endif
1576
1577static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1578
1579STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
1580STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1581
1582
1583//////////////////////////////////////////////////////////////////////////////
1584//
1585// Common code used by all image loaders
1586//
1587
1588enum
1589{
1590 STBI__SCAN_load=0,
1591 STBI__SCAN_type,
1592 STBI__SCAN_header
1593};
1594
1595static void stbi__refill_buffer(stbi__context *s)
1596{
1597 int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1598 s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original);
1599 if (n == 0) {
1600 // at end of file, treat same as if from memory, but need to handle case
1601 // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1602 s->read_from_callbacks = 0;
1603 s->img_buffer = s->buffer_start;
1604 s->img_buffer_end = s->buffer_start+1;
1605 *s->img_buffer = 0;
1606 } else {
1607 s->img_buffer = s->buffer_start;
1608 s->img_buffer_end = s->buffer_start + n;
1609 }
1610}
1611
1612stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1613{
1614 if (s->img_buffer < s->img_buffer_end)
1615 return *s->img_buffer++;
1616 if (s->read_from_callbacks) {
1617 stbi__refill_buffer(s);
1618 return *s->img_buffer++;
1619 }
1620 return 0;
1621}
1622
1623#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
1624// nothing
1625#else
1626stbi_inline static int stbi__at_eof(stbi__context *s)
1627{
1628 if (s->io.read) {
1629 if (!(s->io.eof)(s->io_user_data)) return 0;
1630 // if feof() is true, check if buffer = end
1631 // special case: we've only got the special 0 character at the end
1632 if (s->read_from_callbacks == 0) return 1;
1633 }
1634
1635 return s->img_buffer >= s->img_buffer_end;
1636}
1637#endif
1638
1639#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC)
1640// nothing
1641#else
1642static void stbi__skip(stbi__context *s, int n)
1643{
1644 if (n == 0) return; // already there!
1645 if (n < 0) {
1646 s->img_buffer = s->img_buffer_end;
1647 return;
1648 }
1649 if (s->io.read) {
1650 int blen = (int) (s->img_buffer_end - s->img_buffer);
1651 if (blen < n) {
1652 s->img_buffer = s->img_buffer_end;
1653 (s->io.skip)(s->io_user_data, n - blen);
1654 return;
1655 }
1656 }
1657 s->img_buffer += n;
1658}
1659#endif
1660
1661#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM)
1662// nothing
1663#else
1664static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1665{
1666 if (s->io.read) {
1667 int blen = (int) (s->img_buffer_end - s->img_buffer);
1668 if (blen < n) {
1669 int res, count;
1670
1671 memcpy(buffer, s->img_buffer, blen);
1672
1673 count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1674 res = (count == (n-blen));
1675 s->img_buffer = s->img_buffer_end;
1676 return res;
1677 }
1678 }
1679
1680 if (s->img_buffer+n <= s->img_buffer_end) {
1681 memcpy(buffer, s->img_buffer, n);
1682 s->img_buffer += n;
1683 return 1;
1684 } else
1685 return 0;
1686}
1687#endif
1688
1689#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
1690// nothing
1691#else
1692static int stbi__get16be(stbi__context *s)
1693{
1694 int z = stbi__get8(s);
1695 return (z << 8) + stbi__get8(s);
1696}
1697#endif
1698
1699#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
1700// nothing
1701#else
1702static stbi__uint32 stbi__get32be(stbi__context *s)
1703{
1704 stbi__uint32 z = stbi__get16be(s);
1705 return (z << 16) + stbi__get16be(s);
1706}
1707#endif
1708
1709#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1710// nothing
1711#else
1712static int stbi__get16le(stbi__context *s)
1713{
1714 int z = stbi__get8(s);
1715 return z + (stbi__get8(s) << 8);
1716}
1717#endif
1718
1719#ifndef STBI_NO_BMP
1720static stbi__uint32 stbi__get32le(stbi__context *s)
1721{
1722 stbi__uint32 z = stbi__get16le(s);
1723 z += (stbi__uint32)stbi__get16le(s) << 16;
1724 return z;
1725}
1726#endif
1727
1728#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings
1729
1730#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
1731// nothing
1732#else
1733//////////////////////////////////////////////////////////////////////////////
1734//
1735// generic converter from built-in img_n to req_comp
1736// individual types do this automatically as much as possible (e.g. jpeg
1737// does all cases internally since it needs to colorspace convert anyway,
1738// and it never has alpha, so very few cases ). png can automatically
1739// interleave an alpha=255 channel, but falls back to this for other cases
1740//
1741// assume data buffer is malloced, so malloc a new one and free that one
1742// only failure mode is malloc failing
1743
1744static stbi_uc stbi__compute_y(int r, int g, int b)
1745{
1746 return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8);
1747}
1748#endif
1749
1750#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
1751// nothing
1752#else
1753static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1754{
1755 int i,j;
1756 unsigned char *good;
1757
1758 if (req_comp == img_n) return data;
1759 STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1760
1761 good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1762 if (good == NULL) {
1763 STBI_FREE(data);
1764 return stbi__errpuc("outofmem", "Out of memory");
1765 }
1766
1767 for (j=0; j < (int) y; ++j) {
1768 unsigned char *src = data + j * x * img_n ;
1769 unsigned char *dest = good + j * x * req_comp;
1770
1771 #define STBI__COMBO(a,b) ((a)*8+(b))
1772 #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1773 // convert source image with img_n components to one with req_comp components;
1774 // avoid switch per pixel, so use switch per scanline and massive macros
1775 switch (STBI__COMBO(img_n, req_comp)) {
1776 STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255; } break;
1777 STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1778 STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255; } break;
1779 STBI__CASE(2,1) { dest[0]=src[0]; } break;
1780 STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1781 STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break;
1782 STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255; } break;
1783 STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
1784 STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255; } break;
1785 STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
1786 STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break;
1787 STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break;
1788 default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion");
1789 }
1790 #undef STBI__CASE
1791 }
1792
1793 STBI_FREE(data);
1794 return good;
1795}
1796#endif
1797
1798#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
1799// nothing
1800#else
1801static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1802{
1803 return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8);
1804}
1805#endif
1806
1807#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
1808// nothing
1809#else
1810static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1811{
1812 int i,j;
1813 stbi__uint16 *good;
1814
1815 if (req_comp == img_n) return data;
1816 STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1817
1818 good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1819 if (good == NULL) {
1820 STBI_FREE(data);
1821 return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1822 }
1823
1824 for (j=0; j < (int) y; ++j) {
1825 stbi__uint16 *src = data + j * x * img_n ;
1826 stbi__uint16 *dest = good + j * x * req_comp;
1827
1828 #define STBI__COMBO(a,b) ((a)*8+(b))
1829 #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1830 // convert source image with img_n components to one with req_comp components;
1831 // avoid switch per pixel, so use switch per scanline and massive macros
1832 switch (STBI__COMBO(img_n, req_comp)) {
1833 STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff; } break;
1834 STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1835 STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff; } break;
1836 STBI__CASE(2,1) { dest[0]=src[0]; } break;
1837 STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1838 STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break;
1839 STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff; } break;
1840 STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
1841 STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break;
1842 STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
1843 STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break;
1844 STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break;
1845 default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion");
1846 }
1847 #undef STBI__CASE
1848 }
1849
1850 STBI_FREE(data);
1851 return good;
1852}
1853#endif
1854
1855#ifndef STBI_NO_LINEAR
1856static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1857{
1858 int i,k,n;
1859 float *output;
1860 if (!data) return NULL;
1861 output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1862 if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1863 // compute number of non-alpha components
1864 if (comp & 1) n = comp; else n = comp-1;
1865 for (i=0; i < x*y; ++i) {
1866 for (k=0; k < n; ++k) {
1867 output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1868 }
1869 }
1870 if (n < comp) {
1871 for (i=0; i < x*y; ++i) {
1872 output[i*comp + n] = data[i*comp + n]/255.0f;
1873 }
1874 }
1875 STBI_FREE(data);
1876 return output;
1877}
1878#endif
1879
1880#ifndef STBI_NO_HDR
1881#define stbi__float2int(x) ((int) (x))
1882static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
1883{
1884 int i,k,n;
1885 stbi_uc *output;
1886 if (!data) return NULL;
1887 output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1888 if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1889 // compute number of non-alpha components
1890 if (comp & 1) n = comp; else n = comp-1;
1891 for (i=0; i < x*y; ++i) {
1892 for (k=0; k < n; ++k) {
1893 float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1894 if (z < 0) z = 0;
1895 if (z > 255) z = 255;
1896 output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1897 }
1898 if (k < comp) {
1899 float z = data[i*comp+k] * 255 + 0.5f;
1900 if (z < 0) z = 0;
1901 if (z > 255) z = 255;
1902 output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1903 }
1904 }
1905 STBI_FREE(data);
1906 return output;
1907}
1908#endif
1909
1910//////////////////////////////////////////////////////////////////////////////
1911//
1912// "baseline" JPEG/JFIF decoder
1913//
1914// simple implementation
1915// - doesn't support delayed output of y-dimension
1916// - simple interface (only one output format: 8-bit interleaved RGB)
1917// - doesn't try to recover corrupt jpegs
1918// - doesn't allow partial loading, loading multiple at once
1919// - still fast on x86 (copying globals into locals doesn't help x86)
1920// - allocates lots of intermediate memory (full size of all components)
1921// - non-interleaved case requires this anyway
1922// - allows good upsampling (see next)
1923// high-quality
1924// - upsampled channels are bilinearly interpolated, even across blocks
1925// - quality integer IDCT derived from IJG's 'slow'
1926// performance
1927// - fast huffman; reasonable integer IDCT
1928// - some SIMD kernels for common paths on targets with SSE2/NEON
1929// - uses a lot of intermediate memory, could cache poorly
1930
1931#ifndef STBI_NO_JPEG
1932
1933// huffman decoding acceleration
1934#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
1935
1936typedef struct
1937{
1938 stbi_uc fast[1 << FAST_BITS];
1939 // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1940 stbi__uint16 code[256];
1941 stbi_uc values[256];
1942 stbi_uc size[257];
1943 unsigned int maxcode[18];
1944 int delta[17]; // old 'firstsymbol' - old 'firstcode'
1945} stbi__huffman;
1946
1947typedef struct
1948{
1949 stbi__context *s;
1950 stbi__huffman huff_dc[4];
1951 stbi__huffman huff_ac[4];
1952 stbi__uint16 dequant[4][64];
1953 stbi__int16 fast_ac[4][1 << FAST_BITS];
1954
1955// sizes for components, interleaved MCUs
1956 int img_h_max, img_v_max;
1957 int img_mcu_x, img_mcu_y;
1958 int img_mcu_w, img_mcu_h;
1959
1960// definition of jpeg image component
1961 struct
1962 {
1963 int id;
1964 int h,v;
1965 int tq;
1966 int hd,ha;
1967 int dc_pred;
1968
1969 int x,y,w2,h2;
1970 stbi_uc *data;
1971 void *raw_data, *raw_coeff;
1972 stbi_uc *linebuf;
1973 short *coeff; // progressive only
1974 int coeff_w, coeff_h; // number of 8x8 coefficient blocks
1975 } img_comp[4];
1976
1977 stbi__uint32 code_buffer; // jpeg entropy-coded buffer
1978 int code_bits; // number of valid bits
1979 unsigned char marker; // marker seen while filling entropy buffer
1980 int nomore; // flag if we saw a marker so must stop
1981
1982 int progressive;
1983 int spec_start;
1984 int spec_end;
1985 int succ_high;
1986 int succ_low;
1987 int eob_run;
1988 int jfif;
1989 int app14_color_transform; // Adobe APP14 tag
1990 int rgb;
1991
1992 int scan_n, order[4];
1993 int restart_interval, todo;
1994
1995// kernels
1996 void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1997 void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1998 stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1999} stbi__jpeg;
2000
2001static int stbi__build_huffman(stbi__huffman *h, int *count)
2002{
2003 int i,j,k=0;
2004 unsigned int code;
2005 // build size list for each symbol (from JPEG spec)
2006 for (i=0; i < 16; ++i) {
2007 for (j=0; j < count[i]; ++j) {
2008 h->size[k++] = (stbi_uc) (i+1);
2009 if(k >= 257) return stbi__err("bad size list","Corrupt JPEG");
2010 }
2011 }
2012 h->size[k] = 0;
2013
2014 // compute actual symbols (from jpeg spec)
2015 code = 0;
2016 k = 0;
2017 for(j=1; j <= 16; ++j) {
2018 // compute delta to add to code to compute symbol id
2019 h->delta[j] = k - code;
2020 if (h->size[k] == j) {
2021 while (h->size[k] == j)
2022 h->code[k++] = (stbi__uint16) (code++);
2023 if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
2024 }
2025 // compute largest code + 1 for this size, preshifted as needed later
2026 h->maxcode[j] = code << (16-j);
2027 code <<= 1;
2028 }
2029 h->maxcode[j] = 0xffffffff;
2030
2031 // build non-spec acceleration table; 255 is flag for not-accelerated
2032 memset(h->fast, 255, 1 << FAST_BITS);
2033 for (i=0; i < k; ++i) {
2034 int s = h->size[i];
2035 if (s <= FAST_BITS) {
2036 int c = h->code[i] << (FAST_BITS-s);
2037 int m = 1 << (FAST_BITS-s);
2038 for (j=0; j < m; ++j) {
2039 h->fast[c+j] = (stbi_uc) i;
2040 }
2041 }
2042 }
2043 return 1;
2044}
2045
2046// build a table that decodes both magnitude and value of small ACs in
2047// one go.
2048static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
2049{
2050 int i;
2051 for (i=0; i < (1 << FAST_BITS); ++i) {
2052 stbi_uc fast = h->fast[i];
2053 fast_ac[i] = 0;
2054 if (fast < 255) {
2055 int rs = h->values[fast];
2056 int run = (rs >> 4) & 15;
2057 int magbits = rs & 15;
2058 int len = h->size[fast];
2059
2060 if (magbits && len + magbits <= FAST_BITS) {
2061 // magnitude code followed by receive_extend code
2062 int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
2063 int m = 1 << (magbits - 1);
2064 if (k < m) k += (~0U << magbits) + 1;
2065 // if the result is small enough, we can fit it in fast_ac table
2066 if (k >= -128 && k <= 127)
2067 fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
2068 }
2069 }
2070 }
2071}
2072
2073static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
2074{
2075 do {
2076 unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
2077 if (b == 0xff) {
2078 int c = stbi__get8(j->s);
2079 while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
2080 if (c != 0) {
2081 j->marker = (unsigned char) c;
2082 j->nomore = 1;
2083 return;
2084 }
2085 }
2086 j->code_buffer |= b << (24 - j->code_bits);
2087 j->code_bits += 8;
2088 } while (j->code_bits <= 24);
2089}
2090
2091// (1 << n) - 1
2092static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
2093
2094// decode a jpeg huffman value from the bitstream
2095stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
2096{
2097 unsigned int temp;
2098 int c,k;
2099
2100 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2101
2102 // look at the top FAST_BITS and determine what symbol ID it is,
2103 // if the code is <= FAST_BITS
2104 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2105 k = h->fast[c];
2106 if (k < 255) {
2107 int s = h->size[k];
2108 if (s > j->code_bits)
2109 return -1;
2110 j->code_buffer <<= s;
2111 j->code_bits -= s;
2112 return h->values[k];
2113 }
2114
2115 // naive test is to shift the code_buffer down so k bits are
2116 // valid, then test against maxcode. To speed this up, we've
2117 // preshifted maxcode left so that it has (16-k) 0s at the
2118 // end; in other words, regardless of the number of bits, it
2119 // wants to be compared against something shifted to have 16;
2120 // that way we don't need to shift inside the loop.
2121 temp = j->code_buffer >> 16;
2122 for (k=FAST_BITS+1 ; ; ++k)
2123 if (temp < h->maxcode[k])
2124 break;
2125 if (k == 17) {
2126 // error! code not found
2127 j->code_bits -= 16;
2128 return -1;
2129 }
2130
2131 if (k > j->code_bits)
2132 return -1;
2133
2134 // convert the huffman code to the symbol id
2135 c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
2136 if(c < 0 || c >= 256) // symbol id out of bounds!
2137 return -1;
2138 STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
2139
2140 // convert the id to a symbol
2141 j->code_bits -= k;
2142 j->code_buffer <<= k;
2143 return h->values[c];
2144}
2145
2146// bias[n] = (-1<<n) + 1
2147static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
2148
2149// combined JPEG 'receive' and JPEG 'extend', since baseline
2150// always extends everything it receives.
2151stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
2152{
2153 unsigned int k;
2154 int sgn;
2155 if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
2156 if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
2157
2158 sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
2159 k = stbi_lrot(j->code_buffer, n);
2160 j->code_buffer = k & ~stbi__bmask[n];
2161 k &= stbi__bmask[n];
2162 j->code_bits -= n;
2163 return k + (stbi__jbias[n] & (sgn - 1));
2164}
2165
2166// get some unsigned bits
2167stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
2168{
2169 unsigned int k;
2170 if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
2171 if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
2172 k = stbi_lrot(j->code_buffer, n);
2173 j->code_buffer = k & ~stbi__bmask[n];
2174 k &= stbi__bmask[n];
2175 j->code_bits -= n;
2176 return k;
2177}
2178
2179stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
2180{
2181 unsigned int k;
2182 if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
2183 if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing
2184 k = j->code_buffer;
2185 j->code_buffer <<= 1;
2186 --j->code_bits;
2187 return k & 0x80000000;
2188}
2189
2190// given a value that's at position X in the zigzag stream,
2191// where does it appear in the 8x8 matrix coded as row-major?
2192static const stbi_uc stbi__jpeg_dezigzag[64+15] =
2193{
2194 0, 1, 8, 16, 9, 2, 3, 10,
2195 17, 24, 32, 25, 18, 11, 4, 5,
2196 12, 19, 26, 33, 40, 48, 41, 34,
2197 27, 20, 13, 6, 7, 14, 21, 28,
2198 35, 42, 49, 56, 57, 50, 43, 36,
2199 29, 22, 15, 23, 30, 37, 44, 51,
2200 58, 59, 52, 45, 38, 31, 39, 46,
2201 53, 60, 61, 54, 47, 55, 62, 63,
2202 // let corrupt input sample past end
2203 63, 63, 63, 63, 63, 63, 63, 63,
2204 63, 63, 63, 63, 63, 63, 63
2205};
2206
2207// decode one 64-entry block--
2208static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
2209{
2210 int diff,dc,k;
2211 int t;
2212
2213 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2214 t = stbi__jpeg_huff_decode(j, hdc);
2215 if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG");
2216
2217 // 0 all the ac values now so we can do it 32-bits at a time
2218 memset(data,0,64*sizeof(data[0]));
2219
2220 diff = t ? stbi__extend_receive(j, t) : 0;
2221 if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG");
2222 dc = j->img_comp[b].dc_pred + diff;
2223 j->img_comp[b].dc_pred = dc;
2224 if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2225 data[0] = (short) (dc * dequant[0]);
2226
2227 // decode AC components, see JPEG spec
2228 k = 1;
2229 do {
2230 unsigned int zig;
2231 int c,r,s;
2232 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2233 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2234 r = fac[c];
2235 if (r) { // fast-AC path
2236 k += (r >> 4) & 15; // run
2237 s = r & 15; // combined length
2238 if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
2239 j->code_buffer <<= s;
2240 j->code_bits -= s;
2241 // decode into unzigzag'd location
2242 zig = stbi__jpeg_dezigzag[k++];
2243 data[zig] = (short) ((r >> 8) * dequant[zig]);
2244 } else {
2245 int rs = stbi__jpeg_huff_decode(j, hac);
2246 if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2247 s = rs & 15;
2248 r = rs >> 4;
2249 if (s == 0) {
2250 if (rs != 0xf0) break; // end block
2251 k += 16;
2252 } else {
2253 k += r;
2254 // decode into unzigzag'd location
2255 zig = stbi__jpeg_dezigzag[k++];
2256 data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
2257 }
2258 }
2259 } while (k < 64);
2260 return 1;
2261}
2262
2263static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
2264{
2265 int diff,dc;
2266 int t;
2267 if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2268
2269 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2270
2271 if (j->succ_high == 0) {
2272 // first scan for DC coefficient, must be first
2273 memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
2274 t = stbi__jpeg_huff_decode(j, hdc);
2275 if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2276 diff = t ? stbi__extend_receive(j, t) : 0;
2277
2278 if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG");
2279 dc = j->img_comp[b].dc_pred + diff;
2280 j->img_comp[b].dc_pred = dc;
2281 if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2282 data[0] = (short) (dc * (1 << j->succ_low));
2283 } else {
2284 // refinement scan for DC coefficient
2285 if (stbi__jpeg_get_bit(j))
2286 data[0] += (short) (1 << j->succ_low);
2287 }
2288 return 1;
2289}
2290
2291// @OPTIMIZE: store non-zigzagged during the decode passes,
2292// and only de-zigzag when dequantizing
2293static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
2294{
2295 int k;
2296 if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2297
2298 if (j->succ_high == 0) {
2299 int shift = j->succ_low;
2300
2301 if (j->eob_run) {
2302 --j->eob_run;
2303 return 1;
2304 }
2305
2306 k = j->spec_start;
2307 do {
2308 unsigned int zig;
2309 int c,r,s;
2310 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2311 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2312 r = fac[c];
2313 if (r) { // fast-AC path
2314 k += (r >> 4) & 15; // run
2315 s = r & 15; // combined length
2316 if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
2317 j->code_buffer <<= s;
2318 j->code_bits -= s;
2319 zig = stbi__jpeg_dezigzag[k++];
2320 data[zig] = (short) ((r >> 8) * (1 << shift));
2321 } else {
2322 int rs = stbi__jpeg_huff_decode(j, hac);
2323 if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2324 s = rs & 15;
2325 r = rs >> 4;
2326 if (s == 0) {
2327 if (r < 15) {
2328 j->eob_run = (1 << r);
2329 if (r)
2330 j->eob_run += stbi__jpeg_get_bits(j, r);
2331 --j->eob_run;
2332 break;
2333 }
2334 k += 16;
2335 } else {
2336 k += r;
2337 zig = stbi__jpeg_dezigzag[k++];
2338 data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift));
2339 }
2340 }
2341 } while (k <= j->spec_end);
2342 } else {
2343 // refinement scan for these AC coefficients
2344
2345 short bit = (short) (1 << j->succ_low);
2346
2347 if (j->eob_run) {
2348 --j->eob_run;
2349 for (k = j->spec_start; k <= j->spec_end; ++k) {
2350 short *p = &data[stbi__jpeg_dezigzag[k]];
2351 if (*p != 0)
2352 if (stbi__jpeg_get_bit(j))
2353 if ((*p & bit)==0) {
2354 if (*p > 0)
2355 *p += bit;
2356 else
2357 *p -= bit;
2358 }
2359 }
2360 } else {
2361 k = j->spec_start;
2362 do {
2363 int r,s;
2364 int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
2365 if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2366 s = rs & 15;
2367 r = rs >> 4;
2368 if (s == 0) {
2369 if (r < 15) {
2370 j->eob_run = (1 << r) - 1;
2371 if (r)
2372 j->eob_run += stbi__jpeg_get_bits(j, r);
2373 r = 64; // force end of block
2374 } else {
2375 // r=15 s=0 should write 16 0s, so we just do
2376 // a run of 15 0s and then write s (which is 0),
2377 // so we don't have to do anything special here
2378 }
2379 } else {
2380 if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
2381 // sign bit
2382 if (stbi__jpeg_get_bit(j))
2383 s = bit;
2384 else
2385 s = -bit;
2386 }
2387
2388 // advance by r
2389 while (k <= j->spec_end) {
2390 short *p = &data[stbi__jpeg_dezigzag[k++]];
2391 if (*p != 0) {
2392 if (stbi__jpeg_get_bit(j))
2393 if ((*p & bit)==0) {
2394 if (*p > 0)
2395 *p += bit;
2396 else
2397 *p -= bit;
2398 }
2399 } else {
2400 if (r == 0) {
2401 *p = (short) s;
2402 break;
2403 }
2404 --r;
2405 }
2406 }
2407 } while (k <= j->spec_end);
2408 }
2409 }
2410 return 1;
2411}
2412
2413// take a -128..127 value and stbi__clamp it and convert to 0..255
2414stbi_inline static stbi_uc stbi__clamp(int x)
2415{
2416 // trick to use a single test to catch both cases
2417 if ((unsigned int) x > 255) {
2418 if (x < 0) return 0;
2419 if (x > 255) return 255;
2420 }
2421 return (stbi_uc) x;
2422}
2423
2424#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5)))
2425#define stbi__fsh(x) ((x) * 4096)
2426
2427// derived from jidctint -- DCT_ISLOW
2428#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
2429 int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
2430 p2 = s2; \
2431 p3 = s6; \
2432 p1 = (p2+p3) * stbi__f2f(0.5411961f); \
2433 t2 = p1 + p3*stbi__f2f(-1.847759065f); \
2434 t3 = p1 + p2*stbi__f2f( 0.765366865f); \
2435 p2 = s0; \
2436 p3 = s4; \
2437 t0 = stbi__fsh(p2+p3); \
2438 t1 = stbi__fsh(p2-p3); \
2439 x0 = t0+t3; \
2440 x3 = t0-t3; \
2441 x1 = t1+t2; \
2442 x2 = t1-t2; \
2443 t0 = s7; \
2444 t1 = s5; \
2445 t2 = s3; \
2446 t3 = s1; \
2447 p3 = t0+t2; \
2448 p4 = t1+t3; \
2449 p1 = t0+t3; \
2450 p2 = t1+t2; \
2451 p5 = (p3+p4)*stbi__f2f( 1.175875602f); \
2452 t0 = t0*stbi__f2f( 0.298631336f); \
2453 t1 = t1*stbi__f2f( 2.053119869f); \
2454 t2 = t2*stbi__f2f( 3.072711026f); \
2455 t3 = t3*stbi__f2f( 1.501321110f); \
2456 p1 = p5 + p1*stbi__f2f(-0.899976223f); \
2457 p2 = p5 + p2*stbi__f2f(-2.562915447f); \
2458 p3 = p3*stbi__f2f(-1.961570560f); \
2459 p4 = p4*stbi__f2f(-0.390180644f); \
2460 t3 += p1+p4; \
2461 t2 += p2+p3; \
2462 t1 += p2+p4; \
2463 t0 += p1+p3;
2464
2465static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2466{
2467 int i,val[64],*v=val;
2468 stbi_uc *o;
2469 short *d = data;
2470
2471 // columns
2472 for (i=0; i < 8; ++i,++d, ++v) {
2473 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2474 if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2475 && d[40]==0 && d[48]==0 && d[56]==0) {
2476 // no shortcut 0 seconds
2477 // (1|2|3|4|5|6|7)==0 0 seconds
2478 // all separate -0.047 seconds
2479 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
2480 int dcterm = d[0]*4;
2481 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2482 } else {
2483 STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
2484 // constants scaled things up by 1<<12; let's bring them back
2485 // down, but keep 2 extra bits of precision
2486 x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2487 v[ 0] = (x0+t3) >> 10;
2488 v[56] = (x0-t3) >> 10;
2489 v[ 8] = (x1+t2) >> 10;
2490 v[48] = (x1-t2) >> 10;
2491 v[16] = (x2+t1) >> 10;
2492 v[40] = (x2-t1) >> 10;
2493 v[24] = (x3+t0) >> 10;
2494 v[32] = (x3-t0) >> 10;
2495 }
2496 }
2497
2498 for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
2499 // no fast case since the first 1D IDCT spread components out
2500 STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
2501 // constants scaled things up by 1<<12, plus we had 1<<2 from first
2502 // loop, plus horizontal and vertical each scale by sqrt(8) so together
2503 // we've got an extra 1<<3, so 1<<17 total we need to remove.
2504 // so we want to round that, which means adding 0.5 * 1<<17,
2505 // aka 65536. Also, we'll end up with -128 to 127 that we want
2506 // to encode as 0..255 by adding 128, so we'll add that before the shift
2507 x0 += 65536 + (128<<17);
2508 x1 += 65536 + (128<<17);
2509 x2 += 65536 + (128<<17);
2510 x3 += 65536 + (128<<17);
2511 // tried computing the shifts into temps, or'ing the temps to see
2512 // if any were out of range, but that was slower
2513 o[0] = stbi__clamp((x0+t3) >> 17);
2514 o[7] = stbi__clamp((x0-t3) >> 17);
2515 o[1] = stbi__clamp((x1+t2) >> 17);
2516 o[6] = stbi__clamp((x1-t2) >> 17);
2517 o[2] = stbi__clamp((x2+t1) >> 17);
2518 o[5] = stbi__clamp((x2-t1) >> 17);
2519 o[3] = stbi__clamp((x3+t0) >> 17);
2520 o[4] = stbi__clamp((x3-t0) >> 17);
2521 }
2522}
2523
2524#ifdef STBI_SSE2
2525// sse2 integer IDCT. not the fastest possible implementation but it
2526// produces bit-identical results to the generic C version so it's
2527// fully "transparent".
2528static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2529{
2530 // This is constructed to match our regular (generic) integer IDCT exactly.
2531 __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2532 __m128i tmp;
2533
2534 // dot product constant: even elems=x, odd elems=y
2535 #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2536
2537 // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
2538 // out(1) = c1[even]*x + c1[odd]*y
2539 #define dct_rot(out0,out1, x,y,c0,c1) \
2540 __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2541 __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2542 __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2543 __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2544 __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2545 __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2546
2547 // out = in << 12 (in 16-bit, out 32-bit)
2548 #define dct_widen(out, in) \
2549 __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2550 __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2551
2552 // wide add
2553 #define dct_wadd(out, a, b) \
2554 __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2555 __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2556
2557 // wide sub
2558 #define dct_wsub(out, a, b) \
2559 __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2560 __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2561
2562 // butterfly a/b, add bias, then shift by "s" and pack
2563 #define dct_bfly32o(out0, out1, a,b,bias,s) \
2564 { \
2565 __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2566 __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2567 dct_wadd(sum, abiased, b); \
2568 dct_wsub(dif, abiased, b); \
2569 out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2570 out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2571 }
2572
2573 // 8-bit interleave step (for transposes)
2574 #define dct_interleave8(a, b) \
2575 tmp = a; \
2576 a = _mm_unpacklo_epi8(a, b); \
2577 b = _mm_unpackhi_epi8(tmp, b)
2578
2579 // 16-bit interleave step (for transposes)
2580 #define dct_interleave16(a, b) \
2581 tmp = a; \
2582 a = _mm_unpacklo_epi16(a, b); \
2583 b = _mm_unpackhi_epi16(tmp, b)
2584
2585 #define dct_pass(bias,shift) \
2586 { \
2587 /* even part */ \
2588 dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2589 __m128i sum04 = _mm_add_epi16(row0, row4); \
2590 __m128i dif04 = _mm_sub_epi16(row0, row4); \
2591 dct_widen(t0e, sum04); \
2592 dct_widen(t1e, dif04); \
2593 dct_wadd(x0, t0e, t3e); \
2594 dct_wsub(x3, t0e, t3e); \
2595 dct_wadd(x1, t1e, t2e); \
2596 dct_wsub(x2, t1e, t2e); \
2597 /* odd part */ \
2598 dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2599 dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2600 __m128i sum17 = _mm_add_epi16(row1, row7); \
2601 __m128i sum35 = _mm_add_epi16(row3, row5); \
2602 dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2603 dct_wadd(x4, y0o, y4o); \
2604 dct_wadd(x5, y1o, y5o); \
2605 dct_wadd(x6, y2o, y5o); \
2606 dct_wadd(x7, y3o, y4o); \
2607 dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2608 dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2609 dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2610 dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2611 }
2612
2613 __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2614 __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
2615 __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2616 __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2617 __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
2618 __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
2619 __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
2620 __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
2621
2622 // rounding biases in column/row passes, see stbi__idct_block for explanation.
2623 __m128i bias_0 = _mm_set1_epi32(512);
2624 __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2625
2626 // load
2627 row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2628 row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2629 row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2630 row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2631 row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2632 row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2633 row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2634 row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2635
2636 // column pass
2637 dct_pass(bias_0, 10);
2638
2639 {
2640 // 16bit 8x8 transpose pass 1
2641 dct_interleave16(row0, row4);
2642 dct_interleave16(row1, row5);
2643 dct_interleave16(row2, row6);
2644 dct_interleave16(row3, row7);
2645
2646 // transpose pass 2
2647 dct_interleave16(row0, row2);
2648 dct_interleave16(row1, row3);
2649 dct_interleave16(row4, row6);
2650 dct_interleave16(row5, row7);
2651
2652 // transpose pass 3
2653 dct_interleave16(row0, row1);
2654 dct_interleave16(row2, row3);
2655 dct_interleave16(row4, row5);
2656 dct_interleave16(row6, row7);
2657 }
2658
2659 // row pass
2660 dct_pass(bias_1, 17);
2661
2662 {
2663 // pack
2664 __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2665 __m128i p1 = _mm_packus_epi16(row2, row3);
2666 __m128i p2 = _mm_packus_epi16(row4, row5);
2667 __m128i p3 = _mm_packus_epi16(row6, row7);
2668
2669 // 8bit 8x8 transpose pass 1
2670 dct_interleave8(p0, p2); // a0e0a1e1...
2671 dct_interleave8(p1, p3); // c0g0c1g1...
2672
2673 // transpose pass 2
2674 dct_interleave8(p0, p1); // a0c0e0g0...
2675 dct_interleave8(p2, p3); // b0d0f0h0...
2676
2677 // transpose pass 3
2678 dct_interleave8(p0, p2); // a0b0c0d0...
2679 dct_interleave8(p1, p3); // a4b4c4d4...
2680
2681 // store
2682 _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2683 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2684 _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2685 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2686 _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2687 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2688 _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2689 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2690 }
2691
2692#undef dct_const
2693#undef dct_rot
2694#undef dct_widen
2695#undef dct_wadd
2696#undef dct_wsub
2697#undef dct_bfly32o
2698#undef dct_interleave8
2699#undef dct_interleave16
2700#undef dct_pass
2701}
2702
2703#endif // STBI_SSE2
2704
2705#ifdef STBI_NEON
2706
2707// NEON integer IDCT. should produce bit-identical
2708// results to the generic C version.
2709static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2710{
2711 int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2712
2713 int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2714 int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2715 int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2716 int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2717 int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2718 int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2719 int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2720 int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2721 int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2722 int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2723 int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2724 int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2725
2726#define dct_long_mul(out, inq, coeff) \
2727 int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2728 int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2729
2730#define dct_long_mac(out, acc, inq, coeff) \
2731 int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2732 int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2733
2734#define dct_widen(out, inq) \
2735 int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2736 int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2737
2738// wide add
2739#define dct_wadd(out, a, b) \
2740 int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2741 int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2742
2743// wide sub
2744#define dct_wsub(out, a, b) \
2745 int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2746 int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2747
2748// butterfly a/b, then shift using "shiftop" by "s" and pack
2749#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2750 { \
2751 dct_wadd(sum, a, b); \
2752 dct_wsub(dif, a, b); \
2753 out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2754 out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2755 }
2756
2757#define dct_pass(shiftop, shift) \
2758 { \
2759 /* even part */ \
2760 int16x8_t sum26 = vaddq_s16(row2, row6); \
2761 dct_long_mul(p1e, sum26, rot0_0); \
2762 dct_long_mac(t2e, p1e, row6, rot0_1); \
2763 dct_long_mac(t3e, p1e, row2, rot0_2); \
2764 int16x8_t sum04 = vaddq_s16(row0, row4); \
2765 int16x8_t dif04 = vsubq_s16(row0, row4); \
2766 dct_widen(t0e, sum04); \
2767 dct_widen(t1e, dif04); \
2768 dct_wadd(x0, t0e, t3e); \
2769 dct_wsub(x3, t0e, t3e); \
2770 dct_wadd(x1, t1e, t2e); \
2771 dct_wsub(x2, t1e, t2e); \
2772 /* odd part */ \
2773 int16x8_t sum15 = vaddq_s16(row1, row5); \
2774 int16x8_t sum17 = vaddq_s16(row1, row7); \
2775 int16x8_t sum35 = vaddq_s16(row3, row5); \
2776 int16x8_t sum37 = vaddq_s16(row3, row7); \
2777 int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2778 dct_long_mul(p5o, sumodd, rot1_0); \
2779 dct_long_mac(p1o, p5o, sum17, rot1_1); \
2780 dct_long_mac(p2o, p5o, sum35, rot1_2); \
2781 dct_long_mul(p3o, sum37, rot2_0); \
2782 dct_long_mul(p4o, sum15, rot2_1); \
2783 dct_wadd(sump13o, p1o, p3o); \
2784 dct_wadd(sump24o, p2o, p4o); \
2785 dct_wadd(sump23o, p2o, p3o); \
2786 dct_wadd(sump14o, p1o, p4o); \
2787 dct_long_mac(x4, sump13o, row7, rot3_0); \
2788 dct_long_mac(x5, sump24o, row5, rot3_1); \
2789 dct_long_mac(x6, sump23o, row3, rot3_2); \
2790 dct_long_mac(x7, sump14o, row1, rot3_3); \
2791 dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2792 dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2793 dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2794 dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2795 }
2796
2797 // load
2798 row0 = vld1q_s16(data + 0*8);
2799 row1 = vld1q_s16(data + 1*8);
2800 row2 = vld1q_s16(data + 2*8);
2801 row3 = vld1q_s16(data + 3*8);
2802 row4 = vld1q_s16(data + 4*8);
2803 row5 = vld1q_s16(data + 5*8);
2804 row6 = vld1q_s16(data + 6*8);
2805 row7 = vld1q_s16(data + 7*8);
2806
2807 // add DC bias
2808 row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2809
2810 // column pass
2811 dct_pass(vrshrn_n_s32, 10);
2812
2813 // 16bit 8x8 transpose
2814 {
2815// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2816// whether compilers actually get this is another story, sadly.
2817#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2818#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2819#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2820
2821 // pass 1
2822 dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2823 dct_trn16(row2, row3);
2824 dct_trn16(row4, row5);
2825 dct_trn16(row6, row7);
2826
2827 // pass 2
2828 dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2829 dct_trn32(row1, row3);
2830 dct_trn32(row4, row6);
2831 dct_trn32(row5, row7);
2832
2833 // pass 3
2834 dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2835 dct_trn64(row1, row5);
2836 dct_trn64(row2, row6);
2837 dct_trn64(row3, row7);
2838
2839#undef dct_trn16
2840#undef dct_trn32
2841#undef dct_trn64
2842 }
2843
2844 // row pass
2845 // vrshrn_n_s32 only supports shifts up to 16, we need
2846 // 17. so do a non-rounding shift of 16 first then follow
2847 // up with a rounding shift by 1.
2848 dct_pass(vshrn_n_s32, 16);
2849
2850 {
2851 // pack and round
2852 uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2853 uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2854 uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2855 uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2856 uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2857 uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2858 uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2859 uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2860
2861 // again, these can translate into one instruction, but often don't.
2862#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2863#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2864#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2865
2866 // sadly can't use interleaved stores here since we only write
2867 // 8 bytes to each scan line!
2868
2869 // 8x8 8-bit transpose pass 1
2870 dct_trn8_8(p0, p1);
2871 dct_trn8_8(p2, p3);
2872 dct_trn8_8(p4, p5);
2873 dct_trn8_8(p6, p7);
2874
2875 // pass 2
2876 dct_trn8_16(p0, p2);
2877 dct_trn8_16(p1, p3);
2878 dct_trn8_16(p4, p6);
2879 dct_trn8_16(p5, p7);
2880
2881 // pass 3
2882 dct_trn8_32(p0, p4);
2883 dct_trn8_32(p1, p5);
2884 dct_trn8_32(p2, p6);
2885 dct_trn8_32(p3, p7);
2886
2887 // store
2888 vst1_u8(out, p0); out += out_stride;
2889 vst1_u8(out, p1); out += out_stride;
2890 vst1_u8(out, p2); out += out_stride;
2891 vst1_u8(out, p3); out += out_stride;
2892 vst1_u8(out, p4); out += out_stride;
2893 vst1_u8(out, p5); out += out_stride;
2894 vst1_u8(out, p6); out += out_stride;
2895 vst1_u8(out, p7);
2896
2897#undef dct_trn8_8
2898#undef dct_trn8_16
2899#undef dct_trn8_32
2900 }
2901
2902#undef dct_long_mul
2903#undef dct_long_mac
2904#undef dct_widen
2905#undef dct_wadd
2906#undef dct_wsub
2907#undef dct_bfly32o
2908#undef dct_pass
2909}
2910
2911#endif // STBI_NEON
2912
2913#define STBI__MARKER_none 0xff
2914// if there's a pending marker from the entropy stream, return that
2915// otherwise, fetch from the stream and get a marker. if there's no
2916// marker, return 0xff, which is never a valid marker value
2917static stbi_uc stbi__get_marker(stbi__jpeg *j)
2918{
2919 stbi_uc x;
2920 if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2921 x = stbi__get8(j->s);
2922 if (x != 0xff) return STBI__MARKER_none;
2923 while (x == 0xff)
2924 x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2925 return x;
2926}
2927
2928// in each scan, we'll have scan_n components, and the order
2929// of the components is specified by order[]
2930#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
2931
2932// after a restart interval, stbi__jpeg_reset the entropy decoder and
2933// the dc prediction
2934static void stbi__jpeg_reset(stbi__jpeg *j)
2935{
2936 j->code_bits = 0;
2937 j->code_buffer = 0;
2938 j->nomore = 0;
2939 j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2940 j->marker = STBI__MARKER_none;
2941 j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2942 j->eob_run = 0;
2943 // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2944 // since we don't even allow 1<<30 pixels
2945}
2946
2947static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2948{
2949 stbi__jpeg_reset(z);
2950 if (!z->progressive) {
2951 if (z->scan_n == 1) {
2952 int i,j;
2953 STBI_SIMD_ALIGN(short, data[64]);
2954 int n = z->order[0];
2955 // non-interleaved data, we just need to process one block at a time,
2956 // in trivial scanline order
2957 // number of blocks to do just depends on how many actual "pixels" this
2958 // component has, independent of interleaved MCU blocking and such
2959 int w = (z->img_comp[n].x+7) >> 3;
2960 int h = (z->img_comp[n].y+7) >> 3;
2961 for (j=0; j < h; ++j) {
2962 for (i=0; i < w; ++i) {
2963 int ha = z->img_comp[n].ha;
2964 if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2965 z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2966 // every data block is an MCU, so countdown the restart interval
2967 if (--z->todo <= 0) {
2968 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2969 // if it's NOT a restart, then just bail, so we get corrupt data
2970 // rather than no data
2971 if (!STBI__RESTART(z->marker)) return 1;
2972 stbi__jpeg_reset(z);
2973 }
2974 }
2975 }
2976 return 1;
2977 } else { // interleaved
2978 int i,j,k,x,y;
2979 STBI_SIMD_ALIGN(short, data[64]);
2980 for (j=0; j < z->img_mcu_y; ++j) {
2981 for (i=0; i < z->img_mcu_x; ++i) {
2982 // scan an interleaved mcu... process scan_n components in order
2983 for (k=0; k < z->scan_n; ++k) {
2984 int n = z->order[k];
2985 // scan out an mcu's worth of this component; that's just determined
2986 // by the basic H and V specified for the component
2987 for (y=0; y < z->img_comp[n].v; ++y) {
2988 for (x=0; x < z->img_comp[n].h; ++x) {
2989 int x2 = (i*z->img_comp[n].h + x)*8;
2990 int y2 = (j*z->img_comp[n].v + y)*8;
2991 int ha = z->img_comp[n].ha;
2992 if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2993 z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2994 }
2995 }
2996 }
2997 // after all interleaved components, that's an interleaved MCU,
2998 // so now count down the restart interval
2999 if (--z->todo <= 0) {
3000 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
3001 if (!STBI__RESTART(z->marker)) return 1;
3002 stbi__jpeg_reset(z);
3003 }
3004 }
3005 }
3006 return 1;
3007 }
3008 } else {
3009 if (z->scan_n == 1) {
3010 int i,j;
3011 int n = z->order[0];
3012 // non-interleaved data, we just need to process one block at a time,
3013 // in trivial scanline order
3014 // number of blocks to do just depends on how many actual "pixels" this
3015 // component has, independent of interleaved MCU blocking and such
3016 int w = (z->img_comp[n].x+7) >> 3;
3017 int h = (z->img_comp[n].y+7) >> 3;
3018 for (j=0; j < h; ++j) {
3019 for (i=0; i < w; ++i) {
3020 short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
3021 if (z->spec_start == 0) {
3022 if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
3023 return 0;
3024 } else {
3025 int ha = z->img_comp[n].ha;
3026 if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
3027 return 0;
3028 }
3029 // every data block is an MCU, so countdown the restart interval
3030 if (--z->todo <= 0) {
3031 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
3032 if (!STBI__RESTART(z->marker)) return 1;
3033 stbi__jpeg_reset(z);
3034 }
3035 }
3036 }
3037 return 1;
3038 } else { // interleaved
3039 int i,j,k,x,y;
3040 for (j=0; j < z->img_mcu_y; ++j) {
3041 for (i=0; i < z->img_mcu_x; ++i) {
3042 // scan an interleaved mcu... process scan_n components in order
3043 for (k=0; k < z->scan_n; ++k) {
3044 int n = z->order[k];
3045 // scan out an mcu's worth of this component; that's just determined
3046 // by the basic H and V specified for the component
3047 for (y=0; y < z->img_comp[n].v; ++y) {
3048 for (x=0; x < z->img_comp[n].h; ++x) {
3049 int x2 = (i*z->img_comp[n].h + x);
3050 int y2 = (j*z->img_comp[n].v + y);
3051 short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
3052 if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
3053 return 0;
3054 }
3055 }
3056 }
3057 // after all interleaved components, that's an interleaved MCU,
3058 // so now count down the restart interval
3059 if (--z->todo <= 0) {
3060 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
3061 if (!STBI__RESTART(z->marker)) return 1;
3062 stbi__jpeg_reset(z);
3063 }
3064 }
3065 }
3066 return 1;
3067 }
3068 }
3069}
3070
3071static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
3072{
3073 int i;
3074 for (i=0; i < 64; ++i)
3075 data[i] *= dequant[i];
3076}
3077
3078static void stbi__jpeg_finish(stbi__jpeg *z)
3079{
3080 if (z->progressive) {
3081 // dequantize and idct the data
3082 int i,j,n;
3083 for (n=0; n < z->s->img_n; ++n) {
3084 int w = (z->img_comp[n].x+7) >> 3;
3085 int h = (z->img_comp[n].y+7) >> 3;
3086 for (j=0; j < h; ++j) {
3087 for (i=0; i < w; ++i) {
3088 short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
3089 stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
3090 z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
3091 }
3092 }
3093 }
3094 }
3095}
3096
3097static int stbi__process_marker(stbi__jpeg *z, int m)
3098{
3099 int L;
3100 switch (m) {
3101 case STBI__MARKER_none: // no marker found
3102 return stbi__err("expected marker","Corrupt JPEG");
3103
3104 case 0xDD: // DRI - specify restart interval
3105 if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
3106 z->restart_interval = stbi__get16be(z->s);
3107 return 1;
3108
3109 case 0xDB: // DQT - define quantization table
3110 L = stbi__get16be(z->s)-2;
3111 while (L > 0) {
3112 int q = stbi__get8(z->s);
3113 int p = q >> 4, sixteen = (p != 0);
3114 int t = q & 15,i;
3115 if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
3116 if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
3117
3118 for (i=0; i < 64; ++i)
3119 z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
3120 L -= (sixteen ? 129 : 65);
3121 }
3122 return L==0;
3123
3124 case 0xC4: // DHT - define huffman table
3125 L = stbi__get16be(z->s)-2;
3126 while (L > 0) {
3127 stbi_uc *v;
3128 int sizes[16],i,n=0;
3129 int q = stbi__get8(z->s);
3130 int tc = q >> 4;
3131 int th = q & 15;
3132 if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
3133 for (i=0; i < 16; ++i) {
3134 sizes[i] = stbi__get8(z->s);
3135 n += sizes[i];
3136 }
3137 if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values!
3138 L -= 17;
3139 if (tc == 0) {
3140 if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
3141 v = z->huff_dc[th].values;
3142 } else {
3143 if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
3144 v = z->huff_ac[th].values;
3145 }
3146 for (i=0; i < n; ++i)
3147 v[i] = stbi__get8(z->s);
3148 if (tc != 0)
3149 stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
3150 L -= n;
3151 }
3152 return L==0;
3153 }
3154
3155 // check for comment block or APP blocks
3156 if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
3157 L = stbi__get16be(z->s);
3158 if (L < 2) {
3159 if (m == 0xFE)
3160 return stbi__err("bad COM len","Corrupt JPEG");
3161 else
3162 return stbi__err("bad APP len","Corrupt JPEG");
3163 }
3164 L -= 2;
3165
3166 if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
3167 static const unsigned char tag[5] = {'J','F','I','F','\0'};
3168 int ok = 1;
3169 int i;
3170 for (i=0; i < 5; ++i)
3171 if (stbi__get8(z->s) != tag[i])
3172 ok = 0;
3173 L -= 5;
3174 if (ok)
3175 z->jfif = 1;
3176 } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
3177 static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
3178 int ok = 1;
3179 int i;
3180 for (i=0; i < 6; ++i)
3181 if (stbi__get8(z->s) != tag[i])
3182 ok = 0;
3183 L -= 6;
3184 if (ok) {
3185 stbi__get8(z->s); // version
3186 stbi__get16be(z->s); // flags0
3187 stbi__get16be(z->s); // flags1
3188 z->app14_color_transform = stbi__get8(z->s); // color transform
3189 L -= 6;
3190 }
3191 }
3192
3193 stbi__skip(z->s, L);
3194 return 1;
3195 }
3196
3197 return stbi__err("unknown marker","Corrupt JPEG");
3198}
3199
3200// after we see SOS
3201static int stbi__process_scan_header(stbi__jpeg *z)
3202{
3203 int i;
3204 int Ls = stbi__get16be(z->s);
3205 z->scan_n = stbi__get8(z->s);
3206 if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
3207 if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
3208 for (i=0; i < z->scan_n; ++i) {
3209 int id = stbi__get8(z->s), which;
3210 int q = stbi__get8(z->s);
3211 for (which = 0; which < z->s->img_n; ++which)
3212 if (z->img_comp[which].id == id)
3213 break;
3214 if (which == z->s->img_n) return 0; // no match
3215 z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
3216 z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
3217 z->order[i] = which;
3218 }
3219
3220 {
3221 int aa;
3222 z->spec_start = stbi__get8(z->s);
3223 z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
3224 aa = stbi__get8(z->s);
3225 z->succ_high = (aa >> 4);
3226 z->succ_low = (aa & 15);
3227 if (z->progressive) {
3228 if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
3229 return stbi__err("bad SOS", "Corrupt JPEG");
3230 } else {
3231 if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
3232 if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
3233 z->spec_end = 63;
3234 }
3235 }
3236
3237 return 1;
3238}
3239
3240static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
3241{
3242 int i;
3243 for (i=0; i < ncomp; ++i) {
3244 if (z->img_comp[i].raw_data) {
3245 STBI_FREE(z->img_comp[i].raw_data);
3246 z->img_comp[i].raw_data = NULL;
3247 z->img_comp[i].data = NULL;
3248 }
3249 if (z->img_comp[i].raw_coeff) {
3250 STBI_FREE(z->img_comp[i].raw_coeff);
3251 z->img_comp[i].raw_coeff = 0;
3252 z->img_comp[i].coeff = 0;
3253 }
3254 if (z->img_comp[i].linebuf) {
3255 STBI_FREE(z->img_comp[i].linebuf);
3256 z->img_comp[i].linebuf = NULL;
3257 }
3258 }
3259 return why;
3260}
3261
3262static int stbi__process_frame_header(stbi__jpeg *z, int scan)
3263{
3264 stbi__context *s = z->s;
3265 int Lf,p,i,q, h_max=1,v_max=1,c;
3266 Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
3267 p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
3268 s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
3269 s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
3270 if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
3271 if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
3272 c = stbi__get8(s);
3273 if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
3274 s->img_n = c;
3275 for (i=0; i < c; ++i) {
3276 z->img_comp[i].data = NULL;
3277 z->img_comp[i].linebuf = NULL;
3278 }
3279
3280 if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
3281
3282 z->rgb = 0;
3283 for (i=0; i < s->img_n; ++i) {
3284 static const unsigned char rgb[3] = { 'R', 'G', 'B' };
3285 z->img_comp[i].id = stbi__get8(s);
3286 if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
3287 ++z->rgb;
3288 q = stbi__get8(s);
3289 z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
3290 z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
3291 z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
3292 }
3293
3294 if (scan != STBI__SCAN_load) return 1;
3295
3296 if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
3297
3298 for (i=0; i < s->img_n; ++i) {
3299 if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
3300 if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
3301 }
3302
3303 // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
3304 // and I've never seen a non-corrupted JPEG file actually use them
3305 for (i=0; i < s->img_n; ++i) {
3306 if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG");
3307 if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG");
3308 }
3309
3310 // compute interleaved mcu info
3311 z->img_h_max = h_max;
3312 z->img_v_max = v_max;
3313 z->img_mcu_w = h_max * 8;
3314 z->img_mcu_h = v_max * 8;
3315 // these sizes can't be more than 17 bits
3316 z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
3317 z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
3318
3319 for (i=0; i < s->img_n; ++i) {
3320 // number of effective pixels (e.g. for non-interleaved MCU)
3321 z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
3322 z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
3323 // to simplify generation, we'll allocate enough memory to decode
3324 // the bogus oversized data from using interleaved MCUs and their
3325 // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3326 // discard the extra data until colorspace conversion
3327 //
3328 // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
3329 // so these muls can't overflow with 32-bit ints (which we require)
3330 z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3331 z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3332 z->img_comp[i].coeff = 0;
3333 z->img_comp[i].raw_coeff = 0;
3334 z->img_comp[i].linebuf = NULL;
3335 z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3336 if (z->img_comp[i].raw_data == NULL)
3337 return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3338 // align blocks for idct using mmx/sse
3339 z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
3340 if (z->progressive) {
3341 // w2, h2 are multiples of 8 (see above)
3342 z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
3343 z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
3344 z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
3345 if (z->img_comp[i].raw_coeff == NULL)
3346 return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3347 z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
3348 }
3349 }
3350
3351 return 1;
3352}
3353
3354// use comparisons since in some cases we handle more than one case (e.g. SOF)
3355#define stbi__DNL(x) ((x) == 0xdc)
3356#define stbi__SOI(x) ((x) == 0xd8)
3357#define stbi__EOI(x) ((x) == 0xd9)
3358#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
3359#define stbi__SOS(x) ((x) == 0xda)
3360
3361#define stbi__SOF_progressive(x) ((x) == 0xc2)
3362
3363static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
3364{
3365 int m;
3366 z->jfif = 0;
3367 z->app14_color_transform = -1; // valid values are 0,1,2
3368 z->marker = STBI__MARKER_none; // initialize cached marker to empty
3369 m = stbi__get_marker(z);
3370 if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
3371 if (scan == STBI__SCAN_type) return 1;
3372 m = stbi__get_marker(z);
3373 while (!stbi__SOF(m)) {
3374 if (!stbi__process_marker(z,m)) return 0;
3375 m = stbi__get_marker(z);
3376 while (m == STBI__MARKER_none) {
3377 // some files have extra padding after their blocks, so ok, we'll scan
3378 if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
3379 m = stbi__get_marker(z);
3380 }
3381 }
3382 z->progressive = stbi__SOF_progressive(m);
3383 if (!stbi__process_frame_header(z, scan)) return 0;
3384 return 1;
3385}
3386
3387static int stbi__skip_jpeg_junk_at_end(stbi__jpeg *j)
3388{
3389 // some JPEGs have junk at end, skip over it but if we find what looks
3390 // like a valid marker, resume there
3391 while (!stbi__at_eof(j->s)) {
3392 int x = stbi__get8(j->s);
3393 while (x == 255) { // might be a marker
3394 if (stbi__at_eof(j->s)) return STBI__MARKER_none;
3395 x = stbi__get8(j->s);
3396 if (x != 0x00 && x != 0xff) {
3397 // not a stuffed zero or lead-in to another marker, looks
3398 // like an actual marker, return it
3399 return x;
3400 }
3401 // stuffed zero has x=0 now which ends the loop, meaning we go
3402 // back to regular scan loop.
3403 // repeated 0xff keeps trying to read the next byte of the marker.
3404 }
3405 }
3406 return STBI__MARKER_none;
3407}
3408
3409// decode image to YCbCr format
3410static int stbi__decode_jpeg_image(stbi__jpeg *j)
3411{
3412 int m;
3413 for (m = 0; m < 4; m++) {
3414 j->img_comp[m].raw_data = NULL;
3415 j->img_comp[m].raw_coeff = NULL;
3416 }
3417 j->restart_interval = 0;
3418 if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
3419 m = stbi__get_marker(j);
3420 while (!stbi__EOI(m)) {
3421 if (stbi__SOS(m)) {
3422 if (!stbi__process_scan_header(j)) return 0;
3423 if (!stbi__parse_entropy_coded_data(j)) return 0;
3424 if (j->marker == STBI__MARKER_none ) {
3425 j->marker = stbi__skip_jpeg_junk_at_end(j);
3426 // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
3427 }
3428 m = stbi__get_marker(j);
3429 if (STBI__RESTART(m))
3430 m = stbi__get_marker(j);
3431 } else if (stbi__DNL(m)) {
3432 int Ld = stbi__get16be(j->s);
3433 stbi__uint32 NL = stbi__get16be(j->s);
3434 if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
3435 if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
3436 m = stbi__get_marker(j);
3437 } else {
3438 if (!stbi__process_marker(j, m)) return 1;
3439 m = stbi__get_marker(j);
3440 }
3441 }
3442 if (j->progressive)
3443 stbi__jpeg_finish(j);
3444 return 1;
3445}
3446
3447// static jfif-centered resampling (across block boundaries)
3448
3449typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
3450 int w, int hs);
3451
3452#define stbi__div4(x) ((stbi_uc) ((x) >> 2))
3453
3454static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3455{
3456 STBI_NOTUSED(out);
3457 STBI_NOTUSED(in_far);
3458 STBI_NOTUSED(w);
3459 STBI_NOTUSED(hs);
3460 return in_near;
3461}
3462
3463static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3464{
3465 // need to generate two samples vertically for every one in input
3466 int i;
3467 STBI_NOTUSED(hs);
3468 for (i=0; i < w; ++i)
3469 out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
3470 return out;
3471}
3472
3473static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3474{
3475 // need to generate two samples horizontally for every one in input
3476 int i;
3477 stbi_uc *input = in_near;
3478
3479 if (w == 1) {
3480 // if only one sample, can't do any interpolation
3481 out[0] = out[1] = input[0];
3482 return out;
3483 }
3484
3485 out[0] = input[0];
3486 out[1] = stbi__div4(input[0]*3 + input[1] + 2);
3487 for (i=1; i < w-1; ++i) {
3488 int n = 3*input[i]+2;
3489 out[i*2+0] = stbi__div4(n+input[i-1]);
3490 out[i*2+1] = stbi__div4(n+input[i+1]);
3491 }
3492 out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
3493 out[i*2+1] = input[w-1];
3494
3495 STBI_NOTUSED(in_far);
3496 STBI_NOTUSED(hs);
3497
3498 return out;
3499}
3500
3501#define stbi__div16(x) ((stbi_uc) ((x) >> 4))
3502
3503static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3504{
3505 // need to generate 2x2 samples for every one in input
3506 int i,t0,t1;
3507 if (w == 1) {
3508 out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3509 return out;
3510 }
3511
3512 t1 = 3*in_near[0] + in_far[0];
3513 out[0] = stbi__div4(t1+2);
3514 for (i=1; i < w; ++i) {
3515 t0 = t1;
3516 t1 = 3*in_near[i]+in_far[i];
3517 out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3518 out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
3519 }
3520 out[w*2-1] = stbi__div4(t1+2);
3521
3522 STBI_NOTUSED(hs);
3523
3524 return out;
3525}
3526
3527#if defined(STBI_SSE2) || defined(STBI_NEON)
3528static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3529{
3530 // need to generate 2x2 samples for every one in input
3531 int i=0,t0,t1;
3532
3533 if (w == 1) {
3534 out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3535 return out;
3536 }
3537
3538 t1 = 3*in_near[0] + in_far[0];
3539 // process groups of 8 pixels for as long as we can.
3540 // note we can't handle the last pixel in a row in this loop
3541 // because we need to handle the filter boundary conditions.
3542 for (; i < ((w-1) & ~7); i += 8) {
3543#if defined(STBI_SSE2)
3544 // load and perform the vertical filtering pass
3545 // this uses 3*x + y = 4*x + (y - x)
3546 __m128i zero = _mm_setzero_si128();
3547 __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i));
3548 __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
3549 __m128i farw = _mm_unpacklo_epi8(farb, zero);
3550 __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3551 __m128i diff = _mm_sub_epi16(farw, nearw);
3552 __m128i nears = _mm_slli_epi16(nearw, 2);
3553 __m128i curr = _mm_add_epi16(nears, diff); // current row
3554
3555 // horizontal filter works the same based on shifted vers of current
3556 // row. "prev" is current row shifted right by 1 pixel; we need to
3557 // insert the previous pixel value (from t1).
3558 // "next" is current row shifted left by 1 pixel, with first pixel
3559 // of next block of 8 pixels added in.
3560 __m128i prv0 = _mm_slli_si128(curr, 2);
3561 __m128i nxt0 = _mm_srli_si128(curr, 2);
3562 __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3563 __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
3564
3565 // horizontal filter, polyphase implementation since it's convenient:
3566 // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3567 // odd pixels = 3*cur + next = cur*4 + (next - cur)
3568 // note the shared term.
3569 __m128i bias = _mm_set1_epi16(8);
3570 __m128i curs = _mm_slli_epi16(curr, 2);
3571 __m128i prvd = _mm_sub_epi16(prev, curr);
3572 __m128i nxtd = _mm_sub_epi16(next, curr);
3573 __m128i curb = _mm_add_epi16(curs, bias);
3574 __m128i even = _mm_add_epi16(prvd, curb);
3575 __m128i odd = _mm_add_epi16(nxtd, curb);
3576
3577 // interleave even and odd pixels, then undo scaling.
3578 __m128i int0 = _mm_unpacklo_epi16(even, odd);
3579 __m128i int1 = _mm_unpackhi_epi16(even, odd);
3580 __m128i de0 = _mm_srli_epi16(int0, 4);
3581 __m128i de1 = _mm_srli_epi16(int1, 4);
3582
3583 // pack and write output
3584 __m128i outv = _mm_packus_epi16(de0, de1);
3585 _mm_storeu_si128((__m128i *) (out + i*2), outv);
3586#elif defined(STBI_NEON)
3587 // load and perform the vertical filtering pass
3588 // this uses 3*x + y = 4*x + (y - x)
3589 uint8x8_t farb = vld1_u8(in_far + i);
3590 uint8x8_t nearb = vld1_u8(in_near + i);
3591 int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3592 int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3593 int16x8_t curr = vaddq_s16(nears, diff); // current row
3594
3595 // horizontal filter works the same based on shifted vers of current
3596 // row. "prev" is current row shifted right by 1 pixel; we need to
3597 // insert the previous pixel value (from t1).
3598 // "next" is current row shifted left by 1 pixel, with first pixel
3599 // of next block of 8 pixels added in.
3600 int16x8_t prv0 = vextq_s16(curr, curr, 7);
3601 int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3602 int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3603 int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
3604
3605 // horizontal filter, polyphase implementation since it's convenient:
3606 // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3607 // odd pixels = 3*cur + next = cur*4 + (next - cur)
3608 // note the shared term.
3609 int16x8_t curs = vshlq_n_s16(curr, 2);
3610 int16x8_t prvd = vsubq_s16(prev, curr);
3611 int16x8_t nxtd = vsubq_s16(next, curr);
3612 int16x8_t even = vaddq_s16(curs, prvd);
3613 int16x8_t odd = vaddq_s16(curs, nxtd);
3614
3615 // undo scaling and round, then store with even/odd phases interleaved
3616 uint8x8x2_t o;
3617 o.val[0] = vqrshrun_n_s16(even, 4);
3618 o.val[1] = vqrshrun_n_s16(odd, 4);
3619 vst2_u8(out + i*2, o);
3620#endif
3621
3622 // "previous" value for next iter
3623 t1 = 3*in_near[i+7] + in_far[i+7];
3624 }
3625
3626 t0 = t1;
3627 t1 = 3*in_near[i] + in_far[i];
3628 out[i*2] = stbi__div16(3*t1 + t0 + 8);
3629
3630 for (++i; i < w; ++i) {
3631 t0 = t1;
3632 t1 = 3*in_near[i]+in_far[i];
3633 out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3634 out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
3635 }
3636 out[w*2-1] = stbi__div4(t1+2);
3637
3638 STBI_NOTUSED(hs);
3639
3640 return out;
3641}
3642#endif
3643
3644static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3645{
3646 // resample with nearest-neighbor
3647 int i,j;
3648 STBI_NOTUSED(in_far);
3649 for (i=0; i < w; ++i)
3650 for (j=0; j < hs; ++j)
3651 out[i*hs+j] = in_near[i];
3652 return out;
3653}
3654
3655// this is a reduced-precision calculation of YCbCr-to-RGB introduced
3656// to make sure the code produces the same results in both SIMD and scalar
3657#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8)
3658static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3659{
3660 int i;
3661 for (i=0; i < count; ++i) {
3662 int y_fixed = (y[i] << 20) + (1<<19); // rounding
3663 int r,g,b;
3664 int cr = pcr[i] - 128;
3665 int cb = pcb[i] - 128;
3666 r = y_fixed + cr* stbi__float2fixed(1.40200f);
3667 g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3668 b = y_fixed + cb* stbi__float2fixed(1.77200f);
3669 r >>= 20;
3670 g >>= 20;
3671 b >>= 20;
3672 if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3673 if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3674 if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3675 out[0] = (stbi_uc)r;
3676 out[1] = (stbi_uc)g;
3677 out[2] = (stbi_uc)b;
3678 out[3] = 255;
3679 out += step;
3680 }
3681}
3682
3683#if defined(STBI_SSE2) || defined(STBI_NEON)
3684static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3685{
3686 int i = 0;
3687
3688#ifdef STBI_SSE2
3689 // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3690 // it's useful in practice (you wouldn't use it for textures, for example).
3691 // so just accelerate step == 4 case.
3692 if (step == 4) {
3693 // this is a fairly straightforward implementation and not super-optimized.
3694 __m128i signflip = _mm_set1_epi8(-0x80);
3695 __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f));
3696 __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
3697 __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
3698 __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f));
3699 __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
3700 __m128i xw = _mm_set1_epi16(255); // alpha channel
3701
3702 for (; i+7 < count; i += 8) {
3703 // load
3704 __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3705 __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3706 __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3707 __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3708 __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3709
3710 // unpack to short (and left-shift cr, cb by 8)
3711 __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
3712 __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3713 __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3714
3715 // color transform
3716 __m128i yws = _mm_srli_epi16(yw, 4);
3717 __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3718 __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3719 __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3720 __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3721 __m128i rws = _mm_add_epi16(cr0, yws);
3722 __m128i gwt = _mm_add_epi16(cb0, yws);
3723 __m128i bws = _mm_add_epi16(yws, cb1);
3724 __m128i gws = _mm_add_epi16(gwt, cr1);
3725
3726 // descale
3727 __m128i rw = _mm_srai_epi16(rws, 4);
3728 __m128i bw = _mm_srai_epi16(bws, 4);
3729 __m128i gw = _mm_srai_epi16(gws, 4);
3730
3731 // back to byte, set up for transpose
3732 __m128i brb = _mm_packus_epi16(rw, bw);
3733 __m128i gxb = _mm_packus_epi16(gw, xw);
3734
3735 // transpose to interleave channels
3736 __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3737 __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3738 __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3739 __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3740
3741 // store
3742 _mm_storeu_si128((__m128i *) (out + 0), o0);
3743 _mm_storeu_si128((__m128i *) (out + 16), o1);
3744 out += 32;
3745 }
3746 }
3747#endif
3748
3749#ifdef STBI_NEON
3750 // in this version, step=3 support would be easy to add. but is there demand?
3751 if (step == 4) {
3752 // this is a fairly straightforward implementation and not super-optimized.
3753 uint8x8_t signflip = vdup_n_u8(0x80);
3754 int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f));
3755 int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3756 int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3757 int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f));
3758
3759 for (; i+7 < count; i += 8) {
3760 // load
3761 uint8x8_t y_bytes = vld1_u8(y + i);
3762 uint8x8_t cr_bytes = vld1_u8(pcr + i);
3763 uint8x8_t cb_bytes = vld1_u8(pcb + i);
3764 int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3765 int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3766
3767 // expand to s16
3768 int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3769 int16x8_t crw = vshll_n_s8(cr_biased, 7);
3770 int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3771
3772 // color transform
3773 int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3774 int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3775 int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3776 int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3777 int16x8_t rws = vaddq_s16(yws, cr0);
3778 int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3779 int16x8_t bws = vaddq_s16(yws, cb1);
3780
3781 // undo scaling, round, convert to byte
3782 uint8x8x4_t o;
3783 o.val[0] = vqrshrun_n_s16(rws, 4);
3784 o.val[1] = vqrshrun_n_s16(gws, 4);
3785 o.val[2] = vqrshrun_n_s16(bws, 4);
3786 o.val[3] = vdup_n_u8(255);
3787
3788 // store, interleaving r/g/b/a
3789 vst4_u8(out, o);
3790 out += 8*4;
3791 }
3792 }
3793#endif
3794
3795 for (; i < count; ++i) {
3796 int y_fixed = (y[i] << 20) + (1<<19); // rounding
3797 int r,g,b;
3798 int cr = pcr[i] - 128;
3799 int cb = pcb[i] - 128;
3800 r = y_fixed + cr* stbi__float2fixed(1.40200f);
3801 g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3802 b = y_fixed + cb* stbi__float2fixed(1.77200f);
3803 r >>= 20;
3804 g >>= 20;
3805 b >>= 20;
3806 if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3807 if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3808 if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3809 out[0] = (stbi_uc)r;
3810 out[1] = (stbi_uc)g;
3811 out[2] = (stbi_uc)b;
3812 out[3] = 255;
3813 out += step;
3814 }
3815}
3816#endif
3817
3818// set up the kernels
3819static void stbi__setup_jpeg(stbi__jpeg *j)
3820{
3821 j->idct_block_kernel = stbi__idct_block;
3822 j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3823 j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3824
3825#ifdef STBI_SSE2
3826 if (stbi__sse2_available()) {
3827 j->idct_block_kernel = stbi__idct_simd;
3828 j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3829 j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3830 }
3831#endif
3832
3833#ifdef STBI_NEON
3834 j->idct_block_kernel = stbi__idct_simd;
3835 j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3836 j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3837#endif
3838}
3839
3840// clean up the temporary component buffers
3841static void stbi__cleanup_jpeg(stbi__jpeg *j)
3842{
3843 stbi__free_jpeg_components(j, j->s->img_n, 0);
3844}
3845
3846typedef struct
3847{
3848 resample_row_func resample;
3849 stbi_uc *line0,*line1;
3850 int hs,vs; // expansion factor in each axis
3851 int w_lores; // horizontal pixels pre-expansion
3852 int ystep; // how far through vertical expansion we are
3853 int ypos; // which pre-expansion row we're on
3854} stbi__resample;
3855
3856// fast 0..255 * 0..255 => 0..255 rounded multiplication
3857static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3858{
3859 unsigned int t = x*y + 128;
3860 return (stbi_uc) ((t + (t >>8)) >> 8);
3861}
3862
3863static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3864{
3865 int n, decode_n, is_rgb;
3866 z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3867
3868 // validate req_comp
3869 if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3870
3871 // load a jpeg image from whichever source, but leave in YCbCr format
3872 if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3873
3874 // determine actual number of components to generate
3875 n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3876
3877 is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3878
3879 if (z->s->img_n == 3 && n < 3 && !is_rgb)
3880 decode_n = 1;
3881 else
3882 decode_n = z->s->img_n;
3883
3884 // nothing to do if no components requested; check this now to avoid
3885 // accessing uninitialized coutput[0] later
3886 if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; }
3887
3888 // resample and color-convert
3889 {
3890 int k;
3891 unsigned int i,j;
3892 stbi_uc *output;
3893 stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL };
3894
3895 stbi__resample res_comp[4];
3896
3897 for (k=0; k < decode_n; ++k) {
3898 stbi__resample *r = &res_comp[k];
3899
3900 // allocate line buffer big enough for upsampling off the edges
3901 // with upsample factor of 4
3902 z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3903 if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3904
3905 r->hs = z->img_h_max / z->img_comp[k].h;
3906 r->vs = z->img_v_max / z->img_comp[k].v;
3907 r->ystep = r->vs >> 1;
3908 r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3909 r->ypos = 0;
3910 r->line0 = r->line1 = z->img_comp[k].data;
3911
3912 if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3913 else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3914 else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3915 else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3916 else r->resample = stbi__resample_row_generic;
3917 }
3918
3919 // can't error after this so, this is safe
3920 output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3921 if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3922
3923 // now go ahead and resample
3924 for (j=0; j < z->s->img_y; ++j) {
3925 stbi_uc *out = output + n * z->s->img_x * j;
3926 for (k=0; k < decode_n; ++k) {
3927 stbi__resample *r = &res_comp[k];
3928 int y_bot = r->ystep >= (r->vs >> 1);
3929 coutput[k] = r->resample(z->img_comp[k].linebuf,
3930 y_bot ? r->line1 : r->line0,
3931 y_bot ? r->line0 : r->line1,
3932 r->w_lores, r->hs);
3933 if (++r->ystep >= r->vs) {
3934 r->ystep = 0;
3935 r->line0 = r->line1;
3936 if (++r->ypos < z->img_comp[k].y)
3937 r->line1 += z->img_comp[k].w2;
3938 }
3939 }
3940 if (n >= 3) {
3941 stbi_uc *y = coutput[0];
3942 if (z->s->img_n == 3) {
3943 if (is_rgb) {
3944 for (i=0; i < z->s->img_x; ++i) {
3945 out[0] = y[i];
3946 out[1] = coutput[1][i];
3947 out[2] = coutput[2][i];
3948 out[3] = 255;
3949 out += n;
3950 }
3951 } else {
3952 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3953 }
3954 } else if (z->s->img_n == 4) {
3955 if (z->app14_color_transform == 0) { // CMYK
3956 for (i=0; i < z->s->img_x; ++i) {
3957 stbi_uc m = coutput[3][i];
3958 out[0] = stbi__blinn_8x8(coutput[0][i], m);
3959 out[1] = stbi__blinn_8x8(coutput[1][i], m);
3960 out[2] = stbi__blinn_8x8(coutput[2][i], m);
3961 out[3] = 255;
3962 out += n;
3963 }
3964 } else if (z->app14_color_transform == 2) { // YCCK
3965 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3966 for (i=0; i < z->s->img_x; ++i) {
3967 stbi_uc m = coutput[3][i];
3968 out[0] = stbi__blinn_8x8(255 - out[0], m);
3969 out[1] = stbi__blinn_8x8(255 - out[1], m);
3970 out[2] = stbi__blinn_8x8(255 - out[2], m);
3971 out += n;
3972 }
3973 } else { // YCbCr + alpha? Ignore the fourth channel for now
3974 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3975 }
3976 } else
3977 for (i=0; i < z->s->img_x; ++i) {
3978 out[0] = out[1] = out[2] = y[i];
3979 out[3] = 255; // not used if n==3
3980 out += n;
3981 }
3982 } else {
3983 if (is_rgb) {
3984 if (n == 1)
3985 for (i=0; i < z->s->img_x; ++i)
3986 *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3987 else {
3988 for (i=0; i < z->s->img_x; ++i, out += 2) {
3989 out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3990 out[1] = 255;
3991 }
3992 }
3993 } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3994 for (i=0; i < z->s->img_x; ++i) {
3995 stbi_uc m = coutput[3][i];
3996 stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
3997 stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
3998 stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
3999 out[0] = stbi__compute_y(r, g, b);
4000 out[1] = 255;
4001 out += n;
4002 }
4003 } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
4004 for (i=0; i < z->s->img_x; ++i) {
4005 out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
4006 out[1] = 255;
4007 out += n;
4008 }
4009 } else {
4010 stbi_uc *y = coutput[0];
4011 if (n == 1)
4012 for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
4013 else
4014 for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; }
4015 }
4016 }
4017 }
4018 stbi__cleanup_jpeg(z);
4019 *out_x = z->s->img_x;
4020 *out_y = z->s->img_y;
4021 if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
4022 return output;
4023 }
4024}
4025
4026static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
4027{
4028 unsigned char* result;
4029 stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
4030 if (!j) return stbi__errpuc("outofmem", "Out of memory");
4031 memset(j, 0, sizeof(stbi__jpeg));
4032 STBI_NOTUSED(ri);
4033 j->s = s;
4034 stbi__setup_jpeg(j);
4035 result = load_jpeg_image(j, x,y,comp,req_comp);
4036 STBI_FREE(j);
4037 return result;
4038}
4039
4040static int stbi__jpeg_test(stbi__context *s)
4041{
4042 int r;
4043 stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
4044 if (!j) return stbi__err("outofmem", "Out of memory");
4045 memset(j, 0, sizeof(stbi__jpeg));
4046 j->s = s;
4047 stbi__setup_jpeg(j);
4048 r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
4049 stbi__rewind(s);
4050 STBI_FREE(j);
4051 return r;
4052}
4053
4054static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
4055{
4056 if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
4057 stbi__rewind( j->s );
4058 return 0;
4059 }
4060 if (x) *x = j->s->img_x;
4061 if (y) *y = j->s->img_y;
4062 if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
4063 return 1;
4064}
4065
4066static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
4067{
4068 int result;
4069 stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
4070 if (!j) return stbi__err("outofmem", "Out of memory");
4071 memset(j, 0, sizeof(stbi__jpeg));
4072 j->s = s;
4073 result = stbi__jpeg_info_raw(j, x, y, comp);
4074 STBI_FREE(j);
4075 return result;
4076}
4077#endif
4078
4079// public domain zlib decode v0.2 Sean Barrett 2006-11-18
4080// simple implementation
4081// - all input must be provided in an upfront buffer
4082// - all output is written to a single output buffer (can malloc/realloc)
4083// performance
4084// - fast huffman
4085
4086#ifndef STBI_NO_ZLIB
4087
4088// fast-way is faster to check than jpeg huffman, but slow way is slower
4089#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
4090#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
4091#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet
4092
4093// zlib-style huffman encoding
4094// (jpegs packs from left, zlib from right, so can't share code)
4095typedef struct
4096{
4097 stbi__uint16 fast[1 << STBI__ZFAST_BITS];
4098 stbi__uint16 firstcode[16];
4099 int maxcode[17];
4100 stbi__uint16 firstsymbol[16];
4101 stbi_uc size[STBI__ZNSYMS];
4102 stbi__uint16 value[STBI__ZNSYMS];
4103} stbi__zhuffman;
4104
4105stbi_inline static int stbi__bitreverse16(int n)
4106{
4107 n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
4108 n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
4109 n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
4110 n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
4111 return n;
4112}
4113
4114stbi_inline static int stbi__bit_reverse(int v, int bits)
4115{
4116 STBI_ASSERT(bits <= 16);
4117 // to bit reverse n bits, reverse 16 and shift
4118 // e.g. 11 bits, bit reverse and shift away 5
4119 return stbi__bitreverse16(v) >> (16-bits);
4120}
4121
4122static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
4123{
4124 int i,k=0;
4125 int code, next_code[16], sizes[17];
4126
4127 // DEFLATE spec for generating codes
4128 memset(sizes, 0, sizeof(sizes));
4129 memset(z->fast, 0, sizeof(z->fast));
4130 for (i=0; i < num; ++i)
4131 ++sizes[sizelist[i]];
4132 sizes[0] = 0;
4133 for (i=1; i < 16; ++i)
4134 if (sizes[i] > (1 << i))
4135 return stbi__err("bad sizes", "Corrupt PNG");
4136 code = 0;
4137 for (i=1; i < 16; ++i) {
4138 next_code[i] = code;
4139 z->firstcode[i] = (stbi__uint16) code;
4140 z->firstsymbol[i] = (stbi__uint16) k;
4141 code = (code + sizes[i]);
4142 if (sizes[i])
4143 if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
4144 z->maxcode[i] = code << (16-i); // preshift for inner loop
4145 code <<= 1;
4146 k += sizes[i];
4147 }
4148 z->maxcode[16] = 0x10000; // sentinel
4149 for (i=0; i < num; ++i) {
4150 int s = sizelist[i];
4151 if (s) {
4152 int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
4153 stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
4154 z->size [c] = (stbi_uc ) s;
4155 z->value[c] = (stbi__uint16) i;
4156 if (s <= STBI__ZFAST_BITS) {
4157 int j = stbi__bit_reverse(next_code[s],s);
4158 while (j < (1 << STBI__ZFAST_BITS)) {
4159 z->fast[j] = fastv;
4160 j += (1 << s);
4161 }
4162 }
4163 ++next_code[s];
4164 }
4165 }
4166 return 1;
4167}
4168
4169// zlib-from-memory implementation for PNG reading
4170// because PNG allows splitting the zlib stream arbitrarily,
4171// and it's annoying structurally to have PNG call ZLIB call PNG,
4172// we require PNG read all the IDATs and combine them into a single
4173// memory buffer
4174
4175typedef struct
4176{
4177 stbi_uc *zbuffer, *zbuffer_end;
4178 int num_bits;
4179 stbi__uint32 code_buffer;
4180
4181 char *zout;
4182 char *zout_start;
4183 char *zout_end;
4184 int z_expandable;
4185
4186 stbi__zhuffman z_length, z_distance;
4187} stbi__zbuf;
4188
4189stbi_inline static int stbi__zeof(stbi__zbuf *z)
4190{
4191 return (z->zbuffer >= z->zbuffer_end);
4192}
4193
4194stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
4195{
4196 return stbi__zeof(z) ? 0 : *z->zbuffer++;
4197}
4198
4199static void stbi__fill_bits(stbi__zbuf *z)
4200{
4201 do {
4202 if (z->code_buffer >= (1U << z->num_bits)) {
4203 z->zbuffer = z->zbuffer_end; /* treat this as EOF so we fail. */
4204 return;
4205 }
4206 z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
4207 z->num_bits += 8;
4208 } while (z->num_bits <= 24);
4209}
4210
4211stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
4212{
4213 unsigned int k;
4214 if (z->num_bits < n) stbi__fill_bits(z);
4215 k = z->code_buffer & ((1 << n) - 1);
4216 z->code_buffer >>= n;
4217 z->num_bits -= n;
4218 return k;
4219}
4220
4221static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
4222{
4223 int b,s,k;
4224 // not resolved by fast table, so compute it the slow way
4225 // use jpeg approach, which requires MSbits at top
4226 k = stbi__bit_reverse(a->code_buffer, 16);
4227 for (s=STBI__ZFAST_BITS+1; ; ++s)
4228 if (k < z->maxcode[s])
4229 break;
4230 if (s >= 16) return -1; // invalid code!
4231 // code size is s, so:
4232 b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
4233 if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere!
4234 if (z->size[b] != s) return -1; // was originally an assert, but report failure instead.
4235 a->code_buffer >>= s;
4236 a->num_bits -= s;
4237 return z->value[b];
4238}
4239
4240stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
4241{
4242 int b,s;
4243 if (a->num_bits < 16) {
4244 if (stbi__zeof(a)) {
4245 return -1; /* report error for unexpected end of data. */
4246 }
4247 stbi__fill_bits(a);
4248 }
4249 b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
4250 if (b) {
4251 s = b >> 9;
4252 a->code_buffer >>= s;
4253 a->num_bits -= s;
4254 return b & 511;
4255 }
4256 return stbi__zhuffman_decode_slowpath(a, z);
4257}
4258
4259static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes
4260{
4261 char *q;
4262 unsigned int cur, limit, old_limit;
4263 z->zout = zout;
4264 if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
4265 cur = (unsigned int) (z->zout - z->zout_start);
4266 limit = old_limit = (unsigned) (z->zout_end - z->zout_start);
4267 if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory");
4268 while (cur + n > limit) {
4269 if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory");
4270 limit *= 2;
4271 }
4272 q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
4273 STBI_NOTUSED(old_limit);
4274 if (q == NULL) return stbi__err("outofmem", "Out of memory");
4275 z->zout_start = q;
4276 z->zout = q + cur;
4277 z->zout_end = q + limit;
4278 return 1;
4279}
4280
4281static const int stbi__zlength_base[31] = {
4282 3,4,5,6,7,8,9,10,11,13,
4283 15,17,19,23,27,31,35,43,51,59,
4284 67,83,99,115,131,163,195,227,258,0,0 };
4285
4286static const int stbi__zlength_extra[31]=
4287{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
4288
4289static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
4290257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
4291
4292static const int stbi__zdist_extra[32] =
4293{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
4294
4295static int stbi__parse_huffman_block(stbi__zbuf *a)
4296{
4297 char *zout = a->zout;
4298 for(;;) {
4299 int z = stbi__zhuffman_decode(a, &a->z_length);
4300 if (z < 256) {
4301 if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
4302 if (zout >= a->zout_end) {
4303 if (!stbi__zexpand(a, zout, 1)) return 0;
4304 zout = a->zout;
4305 }
4306 *zout++ = (char) z;
4307 } else {
4308 stbi_uc *p;
4309 int len,dist;
4310 if (z == 256) {
4311 a->zout = zout;
4312 return 1;
4313 }
4314 if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data
4315 z -= 257;
4316 len = stbi__zlength_base[z];
4317 if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
4318 z = stbi__zhuffman_decode(a, &a->z_distance);
4319 if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data
4320 dist = stbi__zdist_base[z];
4321 if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
4322 if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
4323 if (zout + len > a->zout_end) {
4324 if (!stbi__zexpand(a, zout, len)) return 0;
4325 zout = a->zout;
4326 }
4327 p = (stbi_uc *) (zout - dist);
4328 if (dist == 1) { // run of one byte; common in images.
4329 stbi_uc v = *p;
4330 if (len) { do *zout++ = v; while (--len); }
4331 } else {
4332 if (len) { do *zout++ = *p++; while (--len); }
4333 }
4334 }
4335 }
4336}
4337
4338static int stbi__compute_huffman_codes(stbi__zbuf *a)
4339{
4340 static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
4341 stbi__zhuffman z_codelength;
4342 stbi_uc lencodes[286+32+137];//padding for maximum single op
4343 stbi_uc codelength_sizes[19];
4344 int i,n;
4345
4346 int hlit = stbi__zreceive(a,5) + 257;
4347 int hdist = stbi__zreceive(a,5) + 1;
4348 int hclen = stbi__zreceive(a,4) + 4;
4349 int ntot = hlit + hdist;
4350
4351 memset(codelength_sizes, 0, sizeof(codelength_sizes));
4352 for (i=0; i < hclen; ++i) {
4353 int s = stbi__zreceive(a,3);
4354 codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
4355 }
4356 if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
4357
4358 n = 0;
4359 while (n < ntot) {
4360 int c = stbi__zhuffman_decode(a, &z_codelength);
4361 if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
4362 if (c < 16)
4363 lencodes[n++] = (stbi_uc) c;
4364 else {
4365 stbi_uc fill = 0;
4366 if (c == 16) {
4367 c = stbi__zreceive(a,2)+3;
4368 if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
4369 fill = lencodes[n-1];
4370 } else if (c == 17) {
4371 c = stbi__zreceive(a,3)+3;
4372 } else if (c == 18) {
4373 c = stbi__zreceive(a,7)+11;
4374 } else {
4375 return stbi__err("bad codelengths", "Corrupt PNG");
4376 }
4377 if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
4378 memset(lencodes+n, fill, c);
4379 n += c;
4380 }
4381 }
4382 if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
4383 if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
4384 if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
4385 return 1;
4386}
4387
4388static int stbi__parse_uncompressed_block(stbi__zbuf *a)
4389{
4390 stbi_uc header[4];
4391 int len,nlen,k;
4392 if (a->num_bits & 7)
4393 stbi__zreceive(a, a->num_bits & 7); // discard
4394 // drain the bit-packed data into header
4395 k = 0;
4396 while (a->num_bits > 0) {
4397 header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
4398 a->code_buffer >>= 8;
4399 a->num_bits -= 8;
4400 }
4401 if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG");
4402 // now fill header the normal way
4403 while (k < 4)
4404 header[k++] = stbi__zget8(a);
4405 len = header[1] * 256 + header[0];
4406 nlen = header[3] * 256 + header[2];
4407 if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
4408 if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
4409 if (a->zout + len > a->zout_end)
4410 if (!stbi__zexpand(a, a->zout, len)) return 0;
4411 memcpy(a->zout, a->zbuffer, len);
4412 a->zbuffer += len;
4413 a->zout += len;
4414 return 1;
4415}
4416
4417static int stbi__parse_zlib_header(stbi__zbuf *a)
4418{
4419 int cmf = stbi__zget8(a);
4420 int cm = cmf & 15;
4421 /* int cinfo = cmf >> 4; */
4422 int flg = stbi__zget8(a);
4423 if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
4424 if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
4425 if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
4426 if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
4427 // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
4428 return 1;
4429}
4430
4431static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] =
4432{
4433 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4434 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4435 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4436 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4437 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4438 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4439 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4440 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4441 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
4442};
4443static const stbi_uc stbi__zdefault_distance[32] =
4444{
4445 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
4446};
4447/*
4448Init algorithm:
4449{
4450 int i; // use <= to match clearly with spec
4451 for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8;
4452 for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9;
4453 for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7;
4454 for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8;
4455
4456 for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5;
4457}
4458*/
4459
4460static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4461{
4462 int final, type;
4463 if (parse_header)
4464 if (!stbi__parse_zlib_header(a)) return 0;
4465 a->num_bits = 0;
4466 a->code_buffer = 0;
4467 do {
4468 final = stbi__zreceive(a,1);
4469 type = stbi__zreceive(a,2);
4470 if (type == 0) {
4471 if (!stbi__parse_uncompressed_block(a)) return 0;
4472 } else if (type == 3) {
4473 return 0;
4474 } else {
4475 if (type == 1) {
4476 // use fixed code lengths
4477 if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , STBI__ZNSYMS)) return 0;
4478 if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
4479 } else {
4480 if (!stbi__compute_huffman_codes(a)) return 0;
4481 }
4482 if (!stbi__parse_huffman_block(a)) return 0;
4483 }
4484 } while (!final);
4485 return 1;
4486}
4487
4488static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
4489{
4490 a->zout_start = obuf;
4491 a->zout = obuf;
4492 a->zout_end = obuf + olen;
4493 a->z_expandable = exp;
4494
4495 return stbi__parse_zlib(a, parse_header);
4496}
4497
4498STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
4499{
4500 stbi__zbuf a;
4501 char *p = (char *) stbi__malloc(initial_size);
4502 if (p == NULL) return NULL;
4503 a.zbuffer = (stbi_uc *) buffer;
4504 a.zbuffer_end = (stbi_uc *) buffer + len;
4505 if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4506 if (outlen) *outlen = (int) (a.zout - a.zout_start);
4507 return a.zout_start;
4508 } else {
4509 STBI_FREE(a.zout_start);
4510 return NULL;
4511 }
4512}
4513
4514STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4515{
4516 return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4517}
4518
4519STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
4520{
4521 stbi__zbuf a;
4522 char *p = (char *) stbi__malloc(initial_size);
4523 if (p == NULL) return NULL;
4524 a.zbuffer = (stbi_uc *) buffer;
4525 a.zbuffer_end = (stbi_uc *) buffer + len;
4526 if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4527 if (outlen) *outlen = (int) (a.zout - a.zout_start);
4528 return a.zout_start;
4529 } else {
4530 STBI_FREE(a.zout_start);
4531 return NULL;
4532 }
4533}
4534
4535STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
4536{
4537 stbi__zbuf a;
4538 a.zbuffer = (stbi_uc *) ibuffer;
4539 a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4540 if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4541 return (int) (a.zout - a.zout_start);
4542 else
4543 return -1;
4544}
4545
4546STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
4547{
4548 stbi__zbuf a;
4549 char *p = (char *) stbi__malloc(16384);
4550 if (p == NULL) return NULL;
4551 a.zbuffer = (stbi_uc *) buffer;
4552 a.zbuffer_end = (stbi_uc *) buffer+len;
4553 if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4554 if (outlen) *outlen = (int) (a.zout - a.zout_start);
4555 return a.zout_start;
4556 } else {
4557 STBI_FREE(a.zout_start);
4558 return NULL;
4559 }
4560}
4561
4562STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
4563{
4564 stbi__zbuf a;
4565 a.zbuffer = (stbi_uc *) ibuffer;
4566 a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4567 if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4568 return (int) (a.zout - a.zout_start);
4569 else
4570 return -1;
4571}
4572#endif
4573
4574// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
4575// simple implementation
4576// - only 8-bit samples
4577// - no CRC checking
4578// - allocates lots of intermediate memory
4579// - avoids problem of streaming data between subsystems
4580// - avoids explicit window management
4581// performance
4582// - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4583
4584#ifndef STBI_NO_PNG
4585typedef struct
4586{
4587 stbi__uint32 length;
4588 stbi__uint32 type;
4589} stbi__pngchunk;
4590
4591static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4592{
4593 stbi__pngchunk c;
4594 c.length = stbi__get32be(s);
4595 c.type = stbi__get32be(s);
4596 return c;
4597}
4598
4599static int stbi__check_png_header(stbi__context *s)
4600{
4601 static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
4602 int i;
4603 for (i=0; i < 8; ++i)
4604 if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
4605 return 1;
4606}
4607
4608typedef struct
4609{
4610 stbi__context *s;
4611 stbi_uc *idata, *expanded, *out;
4612 int depth;
4613} stbi__png;
4614
4615
4616enum {
4617 STBI__F_none=0,
4618 STBI__F_sub=1,
4619 STBI__F_up=2,
4620 STBI__F_avg=3,
4621 STBI__F_paeth=4,
4622 // synthetic filters used for first scanline to avoid needing a dummy row of 0s
4623 STBI__F_avg_first,
4624 STBI__F_paeth_first
4625};
4626
4627static stbi_uc first_row_filter[5] =
4628{
4629 STBI__F_none,
4630 STBI__F_sub,
4631 STBI__F_none,
4632 STBI__F_avg_first,
4633 STBI__F_paeth_first
4634};
4635
4636static int stbi__paeth(int a, int b, int c)
4637{
4638 int p = a + b - c;
4639 int pa = abs(p-a);
4640 int pb = abs(p-b);
4641 int pc = abs(p-c);
4642 if (pa <= pb && pa <= pc) return a;
4643 if (pb <= pc) return b;
4644 return c;
4645}
4646
4647static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
4648
4649// create the png data from post-deflated data
4650static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
4651{
4652 int bytes = (depth == 16? 2 : 1);
4653 stbi__context *s = a->s;
4654 stbi__uint32 i,j,stride = x*out_n*bytes;
4655 stbi__uint32 img_len, img_width_bytes;
4656 int k;
4657 int img_n = s->img_n; // copy it into a local for later
4658
4659 int output_bytes = out_n*bytes;
4660 int filter_bytes = img_n*bytes;
4661 int width = x;
4662
4663 STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
4664 a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
4665 if (!a->out) return stbi__err("outofmem", "Out of memory");
4666
4667 if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
4668 img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4669 img_len = (img_width_bytes + 1) * y;
4670
4671 // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
4672 // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
4673 // so just check for raw_len < img_len always.
4674 if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
4675
4676 for (j=0; j < y; ++j) {
4677 stbi_uc *cur = a->out + stride*j;
4678 stbi_uc *prior;
4679 int filter = *raw++;
4680
4681 if (filter > 4)
4682 return stbi__err("invalid filter","Corrupt PNG");
4683
4684 if (depth < 8) {
4685 if (img_width_bytes > x) return stbi__err("invalid width","Corrupt PNG");
4686 cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
4687 filter_bytes = 1;
4688 width = img_width_bytes;
4689 }
4690 prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
4691
4692 // if first row, use special filter that doesn't sample previous row
4693 if (j == 0) filter = first_row_filter[filter];
4694
4695 // handle first byte explicitly
4696 for (k=0; k < filter_bytes; ++k) {
4697 switch (filter) {
4698 case STBI__F_none : cur[k] = raw[k]; break;
4699 case STBI__F_sub : cur[k] = raw[k]; break;
4700 case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4701 case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
4702 case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
4703 case STBI__F_avg_first : cur[k] = raw[k]; break;
4704 case STBI__F_paeth_first: cur[k] = raw[k]; break;
4705 }
4706 }
4707
4708 if (depth == 8) {
4709 if (img_n != out_n)
4710 cur[img_n] = 255; // first pixel
4711 raw += img_n;
4712 cur += out_n;
4713 prior += out_n;
4714 } else if (depth == 16) {
4715 if (img_n != out_n) {
4716 cur[filter_bytes] = 255; // first pixel top byte
4717 cur[filter_bytes+1] = 255; // first pixel bottom byte
4718 }
4719 raw += filter_bytes;
4720 cur += output_bytes;
4721 prior += output_bytes;
4722 } else {
4723 raw += 1;
4724 cur += 1;
4725 prior += 1;
4726 }
4727
4728 // this is a little gross, so that we don't switch per-pixel or per-component
4729 if (depth < 8 || img_n == out_n) {
4730 int nk = (width - 1)*filter_bytes;
4731 #define STBI__CASE(f) \
4732 case f: \
4733 for (k=0; k < nk; ++k)
4734 switch (filter) {
4735 // "none" filter turns into a memcpy here; make that explicit.
4736 case STBI__F_none: memcpy(cur, raw, nk); break;
4737 STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
4738 STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4739 STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
4740 STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
4741 STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
4742 STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
4743 }
4744 #undef STBI__CASE
4745 raw += nk;
4746 } else {
4747 STBI_ASSERT(img_n+1 == out_n);
4748 #define STBI__CASE(f) \
4749 case f: \
4750 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
4751 for (k=0; k < filter_bytes; ++k)
4752 switch (filter) {
4753 STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break;
4754 STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
4755 STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4756 STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
4757 STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
4758 STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
4759 STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
4760 }
4761 #undef STBI__CASE
4762
4763 // the loop above sets the high byte of the pixels' alpha, but for
4764 // 16 bit png files we also need the low byte set. we'll do that here.
4765 if (depth == 16) {
4766 cur = a->out + stride*j; // start at the beginning of the row again
4767 for (i=0; i < x; ++i,cur+=output_bytes) {
4768 cur[filter_bytes+1] = 255;
4769 }
4770 }
4771 }
4772 }
4773
4774 // we make a separate pass to expand bits to pixels; for performance,
4775 // this could run two scanlines behind the above code, so it won't
4776 // intefere with filtering but will still be in the cache.
4777 if (depth < 8) {
4778 for (j=0; j < y; ++j) {
4779 stbi_uc *cur = a->out + stride*j;
4780 stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes;
4781 // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4782 // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4783 stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4784
4785 // note that the final byte might overshoot and write more data than desired.
4786 // we can allocate enough data that this never writes out of memory, but it
4787 // could also overwrite the next scanline. can it overwrite non-empty data
4788 // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4789 // so we need to explicitly clamp the final ones
4790
4791 if (depth == 4) {
4792 for (k=x*img_n; k >= 2; k-=2, ++in) {
4793 *cur++ = scale * ((*in >> 4) );
4794 *cur++ = scale * ((*in ) & 0x0f);
4795 }
4796 if (k > 0) *cur++ = scale * ((*in >> 4) );
4797 } else if (depth == 2) {
4798 for (k=x*img_n; k >= 4; k-=4, ++in) {
4799 *cur++ = scale * ((*in >> 6) );
4800 *cur++ = scale * ((*in >> 4) & 0x03);
4801 *cur++ = scale * ((*in >> 2) & 0x03);
4802 *cur++ = scale * ((*in ) & 0x03);
4803 }
4804 if (k > 0) *cur++ = scale * ((*in >> 6) );
4805 if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4806 if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4807 } else if (depth == 1) {
4808 for (k=x*img_n; k >= 8; k-=8, ++in) {
4809 *cur++ = scale * ((*in >> 7) );
4810 *cur++ = scale * ((*in >> 6) & 0x01);
4811 *cur++ = scale * ((*in >> 5) & 0x01);
4812 *cur++ = scale * ((*in >> 4) & 0x01);
4813 *cur++ = scale * ((*in >> 3) & 0x01);
4814 *cur++ = scale * ((*in >> 2) & 0x01);
4815 *cur++ = scale * ((*in >> 1) & 0x01);
4816 *cur++ = scale * ((*in ) & 0x01);
4817 }
4818 if (k > 0) *cur++ = scale * ((*in >> 7) );
4819 if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4820 if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4821 if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4822 if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4823 if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4824 if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4825 }
4826 if (img_n != out_n) {
4827 int q;
4828 // insert alpha = 255
4829 cur = a->out + stride*j;
4830 if (img_n == 1) {
4831 for (q=x-1; q >= 0; --q) {
4832 cur[q*2+1] = 255;
4833 cur[q*2+0] = cur[q];
4834 }
4835 } else {
4836 STBI_ASSERT(img_n == 3);
4837 for (q=x-1; q >= 0; --q) {
4838 cur[q*4+3] = 255;
4839 cur[q*4+2] = cur[q*3+2];
4840 cur[q*4+1] = cur[q*3+1];
4841 cur[q*4+0] = cur[q*3+0];
4842 }
4843 }
4844 }
4845 }
4846 } else if (depth == 16) {
4847 // force the image data from big-endian to platform-native.
4848 // this is done in a separate pass due to the decoding relying
4849 // on the data being untouched, but could probably be done
4850 // per-line during decode if care is taken.
4851 stbi_uc *cur = a->out;
4852 stbi__uint16 *cur16 = (stbi__uint16*)cur;
4853
4854 for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
4855 *cur16 = (cur[0] << 8) | cur[1];
4856 }
4857 }
4858
4859 return 1;
4860}
4861
4862static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4863{
4864 int bytes = (depth == 16 ? 2 : 1);
4865 int out_bytes = out_n * bytes;
4866 stbi_uc *final;
4867 int p;
4868 if (!interlaced)
4869 return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4870
4871 // de-interlacing
4872 final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4873 if (!final) return stbi__err("outofmem", "Out of memory");
4874 for (p=0; p < 7; ++p) {
4875 int xorig[] = { 0,4,0,2,0,1,0 };
4876 int yorig[] = { 0,0,4,0,2,0,1 };
4877 int xspc[] = { 8,8,4,4,2,2,1 };
4878 int yspc[] = { 8,8,8,4,4,2,2 };
4879 int i,j,x,y;
4880 // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4881 x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4882 y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4883 if (x && y) {
4884 stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4885 if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4886 STBI_FREE(final);
4887 return 0;
4888 }
4889 for (j=0; j < y; ++j) {
4890 for (i=0; i < x; ++i) {
4891 int out_y = j*yspc[p]+yorig[p];
4892 int out_x = i*xspc[p]+xorig[p];
4893 memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4894 a->out + (j*x+i)*out_bytes, out_bytes);
4895 }
4896 }
4897 STBI_FREE(a->out);
4898 image_data += img_len;
4899 image_data_len -= img_len;
4900 }
4901 }
4902 a->out = final;
4903
4904 return 1;
4905}
4906
4907static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4908{
4909 stbi__context *s = z->s;
4910 stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4911 stbi_uc *p = z->out;
4912
4913 // compute color-based transparency, assuming we've
4914 // already got 255 as the alpha value in the output
4915 STBI_ASSERT(out_n == 2 || out_n == 4);
4916
4917 if (out_n == 2) {
4918 for (i=0; i < pixel_count; ++i) {
4919 p[1] = (p[0] == tc[0] ? 0 : 255);
4920 p += 2;
4921 }
4922 } else {
4923 for (i=0; i < pixel_count; ++i) {
4924 if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4925 p[3] = 0;
4926 p += 4;
4927 }
4928 }
4929 return 1;
4930}
4931
4932static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4933{
4934 stbi__context *s = z->s;
4935 stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4936 stbi__uint16 *p = (stbi__uint16*) z->out;
4937
4938 // compute color-based transparency, assuming we've
4939 // already got 65535 as the alpha value in the output
4940 STBI_ASSERT(out_n == 2 || out_n == 4);
4941
4942 if (out_n == 2) {
4943 for (i = 0; i < pixel_count; ++i) {
4944 p[1] = (p[0] == tc[0] ? 0 : 65535);
4945 p += 2;
4946 }
4947 } else {
4948 for (i = 0; i < pixel_count; ++i) {
4949 if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4950 p[3] = 0;
4951 p += 4;
4952 }
4953 }
4954 return 1;
4955}
4956
4957static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4958{
4959 stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4960 stbi_uc *p, *temp_out, *orig = a->out;
4961
4962 p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4963 if (p == NULL) return stbi__err("outofmem", "Out of memory");
4964
4965 // between here and free(out) below, exitting would leak
4966 temp_out = p;
4967
4968 if (pal_img_n == 3) {
4969 for (i=0; i < pixel_count; ++i) {
4970 int n = orig[i]*4;
4971 p[0] = palette[n ];
4972 p[1] = palette[n+1];
4973 p[2] = palette[n+2];
4974 p += 3;
4975 }
4976 } else {
4977 for (i=0; i < pixel_count; ++i) {
4978 int n = orig[i]*4;
4979 p[0] = palette[n ];
4980 p[1] = palette[n+1];
4981 p[2] = palette[n+2];
4982 p[3] = palette[n+3];
4983 p += 4;
4984 }
4985 }
4986 STBI_FREE(a->out);
4987 a->out = temp_out;
4988
4989 STBI_NOTUSED(len);
4990
4991 return 1;
4992}
4993
4994static int stbi__unpremultiply_on_load_global = 0;
4995static int stbi__de_iphone_flag_global = 0;
4996
4997STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4998{
4999 stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply;
5000}
5001
5002STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
5003{
5004 stbi__de_iphone_flag_global = flag_true_if_should_convert;
5005}
5006
5007#ifndef STBI_THREAD_LOCAL
5008#define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global
5009#define stbi__de_iphone_flag stbi__de_iphone_flag_global
5010#else
5011static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set;
5012static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set;
5013
5014STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
5015{
5016 stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
5017 stbi__unpremultiply_on_load_set = 1;
5018}
5019
5020STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert)
5021{
5022 stbi__de_iphone_flag_local = flag_true_if_should_convert;
5023 stbi__de_iphone_flag_set = 1;
5024}
5025
5026#define stbi__unpremultiply_on_load (stbi__unpremultiply_on_load_set \
5027 ? stbi__unpremultiply_on_load_local \
5028 : stbi__unpremultiply_on_load_global)
5029#define stbi__de_iphone_flag (stbi__de_iphone_flag_set \
5030 ? stbi__de_iphone_flag_local \
5031 : stbi__de_iphone_flag_global)
5032#endif // STBI_THREAD_LOCAL
5033
5034static void stbi__de_iphone(stbi__png *z)
5035{
5036 stbi__context *s = z->s;
5037 stbi__uint32 i, pixel_count = s->img_x * s->img_y;
5038 stbi_uc *p = z->out;
5039
5040 if (s->img_out_n == 3) { // convert bgr to rgb
5041 for (i=0; i < pixel_count; ++i) {
5042 stbi_uc t = p[0];
5043 p[0] = p[2];
5044 p[2] = t;
5045 p += 3;
5046 }
5047 } else {
5048 STBI_ASSERT(s->img_out_n == 4);
5049 if (stbi__unpremultiply_on_load) {
5050 // convert bgr to rgb and unpremultiply
5051 for (i=0; i < pixel_count; ++i) {
5052 stbi_uc a = p[3];
5053 stbi_uc t = p[0];
5054 if (a) {
5055 stbi_uc half = a / 2;
5056 p[0] = (p[2] * 255 + half) / a;
5057 p[1] = (p[1] * 255 + half) / a;
5058 p[2] = ( t * 255 + half) / a;
5059 } else {
5060 p[0] = p[2];
5061 p[2] = t;
5062 }
5063 p += 4;
5064 }
5065 } else {
5066 // convert bgr to rgb
5067 for (i=0; i < pixel_count; ++i) {
5068 stbi_uc t = p[0];
5069 p[0] = p[2];
5070 p[2] = t;
5071 p += 4;
5072 }
5073 }
5074 }
5075}
5076
5077#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
5078
5079static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
5080{
5081 stbi_uc palette[1024], pal_img_n=0;
5082 stbi_uc has_trans=0, tc[3]={0};
5083 stbi__uint16 tc16[3];
5084 stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
5085 int first=1,k,interlace=0, color=0, is_iphone=0;
5086 stbi__context *s = z->s;
5087
5088 z->expanded = NULL;
5089 z->idata = NULL;
5090 z->out = NULL;
5091
5092 if (!stbi__check_png_header(s)) return 0;
5093
5094 if (scan == STBI__SCAN_type) return 1;
5095
5096 for (;;) {
5097 stbi__pngchunk c = stbi__get_chunk_header(s);
5098 switch (c.type) {
5099 case STBI__PNG_TYPE('C','g','B','I'):
5100 is_iphone = 1;
5101 stbi__skip(s, c.length);
5102 break;
5103 case STBI__PNG_TYPE('I','H','D','R'): {
5104 int comp,filter;
5105 if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
5106 first = 0;
5107 if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
5108 s->img_x = stbi__get32be(s);
5109 s->img_y = stbi__get32be(s);
5110 if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
5111 if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
5112 z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
5113 color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG");
5114 if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG");
5115 if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
5116 comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG");
5117 filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG");
5118 interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
5119 if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
5120 if (!pal_img_n) {
5121 s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
5122 if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
5123 } else {
5124 // if paletted, then pal_n is our final components, and
5125 // img_n is # components to decompress/filter.
5126 s->img_n = 1;
5127 if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
5128 }
5129 // even with SCAN_header, have to scan to see if we have a tRNS
5130 break;
5131 }
5132
5133 case STBI__PNG_TYPE('P','L','T','E'): {
5134 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
5135 if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
5136 pal_len = c.length / 3;
5137 if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
5138 for (i=0; i < pal_len; ++i) {
5139 palette[i*4+0] = stbi__get8(s);
5140 palette[i*4+1] = stbi__get8(s);
5141 palette[i*4+2] = stbi__get8(s);
5142 palette[i*4+3] = 255;
5143 }
5144 break;
5145 }
5146
5147 case STBI__PNG_TYPE('t','R','N','S'): {
5148 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
5149 if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
5150 if (pal_img_n) {
5151 if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
5152 if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
5153 if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
5154 pal_img_n = 4;
5155 for (i=0; i < c.length; ++i)
5156 palette[i*4+3] = stbi__get8(s);
5157 } else {
5158 if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
5159 if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
5160 has_trans = 1;
5161 // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now.
5162 if (scan == STBI__SCAN_header) { ++s->img_n; return 1; }
5163 if (z->depth == 16) {
5164 for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
5165 } else {
5166 for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
5167 }
5168 }
5169 break;
5170 }
5171
5172 case STBI__PNG_TYPE('I','D','A','T'): {
5173 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
5174 if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
5175 if (scan == STBI__SCAN_header) {
5176 // header scan definitely stops at first IDAT
5177 if (pal_img_n)
5178 s->img_n = pal_img_n;
5179 return 1;
5180 }
5181 if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes");
5182 if ((int)(ioff + c.length) < (int)ioff) return 0;
5183 if (ioff + c.length > idata_limit) {
5184 stbi__uint32 idata_limit_old = idata_limit;
5185 stbi_uc *p;
5186 if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
5187 while (ioff + c.length > idata_limit)
5188 idata_limit *= 2;
5189 STBI_NOTUSED(idata_limit_old);
5190 p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
5191 z->idata = p;
5192 }
5193 if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
5194 ioff += c.length;
5195 break;
5196 }
5197
5198 case STBI__PNG_TYPE('I','E','N','D'): {
5199 stbi__uint32 raw_len, bpl;
5200 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
5201 if (scan != STBI__SCAN_load) return 1;
5202 if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
5203 // initial guess for decoded data size to avoid unnecessary reallocs
5204 bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
5205 raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
5206 z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
5207 if (z->expanded == NULL) return 0; // zlib should set error
5208 STBI_FREE(z->idata); z->idata = NULL;
5209 if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
5210 s->img_out_n = s->img_n+1;
5211 else
5212 s->img_out_n = s->img_n;
5213 if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
5214 if (has_trans) {
5215 if (z->depth == 16) {
5216 if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
5217 } else {
5218 if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
5219 }
5220 }
5221 if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
5222 stbi__de_iphone(z);
5223 if (pal_img_n) {
5224 // pal_img_n == 3 or 4
5225 s->img_n = pal_img_n; // record the actual colors we had
5226 s->img_out_n = pal_img_n;
5227 if (req_comp >= 3) s->img_out_n = req_comp;
5228 if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
5229 return 0;
5230 } else if (has_trans) {
5231 // non-paletted image with tRNS -> source image has (constant) alpha
5232 ++s->img_n;
5233 }
5234 STBI_FREE(z->expanded); z->expanded = NULL;
5235 // end of PNG chunk, read and skip CRC
5236 stbi__get32be(s);
5237 return 1;
5238 }
5239
5240 default:
5241 // if critical, fail
5242 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
5243 if ((c.type & (1 << 29)) == 0) {
5244 #ifndef STBI_NO_FAILURE_STRINGS
5245 // not threadsafe
5246 static char invalid_chunk[] = "XXXX PNG chunk not known";
5247 invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
5248 invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
5249 invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
5250 invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
5251 #endif
5252 return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
5253 }
5254 stbi__skip(s, c.length);
5255 break;
5256 }
5257 // end of PNG chunk, read and skip CRC
5258 stbi__get32be(s);
5259 }
5260}
5261
5262static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
5263{
5264 void *result=NULL;
5265 if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
5266 if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
5267 if (p->depth <= 8)
5268 ri->bits_per_channel = 8;
5269 else if (p->depth == 16)
5270 ri->bits_per_channel = 16;
5271 else
5272 return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth");
5273 result = p->out;
5274 p->out = NULL;
5275 if (req_comp && req_comp != p->s->img_out_n) {
5276 if (ri->bits_per_channel == 8)
5277 result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
5278 else
5279 result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
5280 p->s->img_out_n = req_comp;
5281 if (result == NULL) return result;
5282 }
5283 *x = p->s->img_x;
5284 *y = p->s->img_y;
5285 if (n) *n = p->s->img_n;
5286 }
5287 STBI_FREE(p->out); p->out = NULL;
5288 STBI_FREE(p->expanded); p->expanded = NULL;
5289 STBI_FREE(p->idata); p->idata = NULL;
5290
5291 return result;
5292}
5293
5294static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5295{
5296 stbi__png p;
5297 p.s = s;
5298 return stbi__do_png(&p, x,y,comp,req_comp, ri);
5299}
5300
5301static int stbi__png_test(stbi__context *s)
5302{
5303 int r;
5304 r = stbi__check_png_header(s);
5305 stbi__rewind(s);
5306 return r;
5307}
5308
5309static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
5310{
5311 if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
5312 stbi__rewind( p->s );
5313 return 0;
5314 }
5315 if (x) *x = p->s->img_x;
5316 if (y) *y = p->s->img_y;
5317 if (comp) *comp = p->s->img_n;
5318 return 1;
5319}
5320
5321static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
5322{
5323 stbi__png p;
5324 p.s = s;
5325 return stbi__png_info_raw(&p, x, y, comp);
5326}
5327
5328static int stbi__png_is16(stbi__context *s)
5329{
5330 stbi__png p;
5331 p.s = s;
5332 if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
5333 return 0;
5334 if (p.depth != 16) {
5335 stbi__rewind(p.s);
5336 return 0;
5337 }
5338 return 1;
5339}
5340#endif
5341
5342// Microsoft/Windows BMP image
5343
5344#ifndef STBI_NO_BMP
5345static int stbi__bmp_test_raw(stbi__context *s)
5346{
5347 int r;
5348 int sz;
5349 if (stbi__get8(s) != 'B') return 0;
5350 if (stbi__get8(s) != 'M') return 0;
5351 stbi__get32le(s); // discard filesize
5352 stbi__get16le(s); // discard reserved
5353 stbi__get16le(s); // discard reserved
5354 stbi__get32le(s); // discard data offset
5355 sz = stbi__get32le(s);
5356 r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
5357 return r;
5358}
5359
5360static int stbi__bmp_test(stbi__context *s)
5361{
5362 int r = stbi__bmp_test_raw(s);
5363 stbi__rewind(s);
5364 return r;
5365}
5366
5367
5368// returns 0..31 for the highest set bit
5369static int stbi__high_bit(unsigned int z)
5370{
5371 int n=0;
5372 if (z == 0) return -1;
5373 if (z >= 0x10000) { n += 16; z >>= 16; }
5374 if (z >= 0x00100) { n += 8; z >>= 8; }
5375 if (z >= 0x00010) { n += 4; z >>= 4; }
5376 if (z >= 0x00004) { n += 2; z >>= 2; }
5377 if (z >= 0x00002) { n += 1;/* >>= 1;*/ }
5378 return n;
5379}
5380
5381static int stbi__bitcount(unsigned int a)
5382{
5383 a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
5384 a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
5385 a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
5386 a = (a + (a >> 8)); // max 16 per 8 bits
5387 a = (a + (a >> 16)); // max 32 per 8 bits
5388 return a & 0xff;
5389}
5390
5391// extract an arbitrarily-aligned N-bit value (N=bits)
5392// from v, and then make it 8-bits long and fractionally
5393// extend it to full full range.
5394static int stbi__shiftsigned(unsigned int v, int shift, int bits)
5395{
5396 static unsigned int mul_table[9] = {
5397 0,
5398 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
5399 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
5400 };
5401 static unsigned int shift_table[9] = {
5402 0, 0,0,1,0,2,4,6,0,
5403 };
5404 if (shift < 0)
5405 v <<= -shift;
5406 else
5407 v >>= shift;
5408 STBI_ASSERT(v < 256);
5409 v >>= (8-bits);
5410 STBI_ASSERT(bits >= 0 && bits <= 8);
5411 return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
5412}
5413
5414typedef struct
5415{
5416 int bpp, offset, hsz;
5417 unsigned int mr,mg,mb,ma, all_a;
5418 int extra_read;
5419} stbi__bmp_data;
5420
5421static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress)
5422{
5423 // BI_BITFIELDS specifies masks explicitly, don't override
5424 if (compress == 3)
5425 return 1;
5426
5427 if (compress == 0) {
5428 if (info->bpp == 16) {
5429 info->mr = 31u << 10;
5430 info->mg = 31u << 5;
5431 info->mb = 31u << 0;
5432 } else if (info->bpp == 32) {
5433 info->mr = 0xffu << 16;
5434 info->mg = 0xffu << 8;
5435 info->mb = 0xffu << 0;
5436 info->ma = 0xffu << 24;
5437 info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
5438 } else {
5439 // otherwise, use defaults, which is all-0
5440 info->mr = info->mg = info->mb = info->ma = 0;
5441 }
5442 return 1;
5443 }
5444 return 0; // error
5445}
5446
5447static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
5448{
5449 int hsz;
5450 if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
5451 stbi__get32le(s); // discard filesize
5452 stbi__get16le(s); // discard reserved
5453 stbi__get16le(s); // discard reserved
5454 info->offset = stbi__get32le(s);
5455 info->hsz = hsz = stbi__get32le(s);
5456 info->mr = info->mg = info->mb = info->ma = 0;
5457 info->extra_read = 14;
5458
5459 if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP");
5460
5461 if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5462 if (hsz == 12) {
5463 s->img_x = stbi__get16le(s);
5464 s->img_y = stbi__get16le(s);
5465 } else {
5466 s->img_x = stbi__get32le(s);
5467 s->img_y = stbi__get32le(s);
5468 }
5469 if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
5470 info->bpp = stbi__get16le(s);
5471 if (hsz != 12) {
5472 int compress = stbi__get32le(s);
5473 if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5474 if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes
5475 if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel
5476 stbi__get32le(s); // discard sizeof
5477 stbi__get32le(s); // discard hres
5478 stbi__get32le(s); // discard vres
5479 stbi__get32le(s); // discard colorsused
5480 stbi__get32le(s); // discard max important
5481 if (hsz == 40 || hsz == 56) {
5482 if (hsz == 56) {
5483 stbi__get32le(s);
5484 stbi__get32le(s);
5485 stbi__get32le(s);
5486 stbi__get32le(s);
5487 }
5488 if (info->bpp == 16 || info->bpp == 32) {
5489 if (compress == 0) {
5490 stbi__bmp_set_mask_defaults(info, compress);
5491 } else if (compress == 3) {
5492 info->mr = stbi__get32le(s);
5493 info->mg = stbi__get32le(s);
5494 info->mb = stbi__get32le(s);
5495 info->extra_read += 12;
5496 // not documented, but generated by photoshop and handled by mspaint
5497 if (info->mr == info->mg && info->mg == info->mb) {
5498 // ?!?!?
5499 return stbi__errpuc("bad BMP", "bad BMP");
5500 }
5501 } else
5502 return stbi__errpuc("bad BMP", "bad BMP");
5503 }
5504 } else {
5505 // V4/V5 header
5506 int i;
5507 if (hsz != 108 && hsz != 124)
5508 return stbi__errpuc("bad BMP", "bad BMP");
5509 info->mr = stbi__get32le(s);
5510 info->mg = stbi__get32le(s);
5511 info->mb = stbi__get32le(s);
5512 info->ma = stbi__get32le(s);
5513 if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs
5514 stbi__bmp_set_mask_defaults(info, compress);
5515 stbi__get32le(s); // discard color space
5516 for (i=0; i < 12; ++i)
5517 stbi__get32le(s); // discard color space parameters
5518 if (hsz == 124) {
5519 stbi__get32le(s); // discard rendering intent
5520 stbi__get32le(s); // discard offset of profile data
5521 stbi__get32le(s); // discard size of profile data
5522 stbi__get32le(s); // discard reserved
5523 }
5524 }
5525 }
5526 return (void *) 1;
5527}
5528
5529
5530static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5531{
5532 stbi_uc *out;
5533 unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
5534 stbi_uc pal[256][4];
5535 int psize=0,i,j,width;
5536 int flip_vertically, pad, target;
5537 stbi__bmp_data info;
5538 STBI_NOTUSED(ri);
5539
5540 info.all_a = 255;
5541 if (stbi__bmp_parse_header(s, &info) == NULL)
5542 return NULL; // error code already set
5543
5544 flip_vertically = ((int) s->img_y) > 0;
5545 s->img_y = abs((int) s->img_y);
5546
5547 if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
5548 if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
5549
5550 mr = info.mr;
5551 mg = info.mg;
5552 mb = info.mb;
5553 ma = info.ma;
5554 all_a = info.all_a;
5555
5556 if (info.hsz == 12) {
5557 if (info.bpp < 24)
5558 psize = (info.offset - info.extra_read - 24) / 3;
5559 } else {
5560 if (info.bpp < 16)
5561 psize = (info.offset - info.extra_read - info.hsz) >> 2;
5562 }
5563 if (psize == 0) {
5564 // accept some number of extra bytes after the header, but if the offset points either to before
5565 // the header ends or implies a large amount of extra data, reject the file as malformed
5566 int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original);
5567 int header_limit = 1024; // max we actually read is below 256 bytes currently.
5568 int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size.
5569 if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) {
5570 return stbi__errpuc("bad header", "Corrupt BMP");
5571 }
5572 // we established that bytes_read_so_far is positive and sensible.
5573 // the first half of this test rejects offsets that are either too small positives, or
5574 // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn
5575 // ensures the number computed in the second half of the test can't overflow.
5576 if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) {
5577 return stbi__errpuc("bad offset", "Corrupt BMP");
5578 } else {
5579 stbi__skip(s, info.offset - bytes_read_so_far);
5580 }
5581 }
5582
5583 if (info.bpp == 24 && ma == 0xff000000)
5584 s->img_n = 3;
5585 else
5586 s->img_n = ma ? 4 : 3;
5587 if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5588 target = req_comp;
5589 else
5590 target = s->img_n; // if they want monochrome, we'll post-convert
5591
5592 // sanity-check size
5593 if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5594 return stbi__errpuc("too large", "Corrupt BMP");
5595
5596 out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5597 if (!out) return stbi__errpuc("outofmem", "Out of memory");
5598 if (info.bpp < 16) {
5599 int z=0;
5600 if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
5601 for (i=0; i < psize; ++i) {
5602 pal[i][2] = stbi__get8(s);
5603 pal[i][1] = stbi__get8(s);
5604 pal[i][0] = stbi__get8(s);
5605 if (info.hsz != 12) stbi__get8(s);
5606 pal[i][3] = 255;
5607 }
5608 stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
5609 if (info.bpp == 1) width = (s->img_x + 7) >> 3;
5610 else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
5611 else if (info.bpp == 8) width = s->img_x;
5612 else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
5613 pad = (-width)&3;
5614 if (info.bpp == 1) {
5615 for (j=0; j < (int) s->img_y; ++j) {
5616 int bit_offset = 7, v = stbi__get8(s);
5617 for (i=0; i < (int) s->img_x; ++i) {
5618 int color = (v>>bit_offset)&0x1;
5619 out[z++] = pal[color][0];
5620 out[z++] = pal[color][1];
5621 out[z++] = pal[color][2];
5622 if (target == 4) out[z++] = 255;
5623 if (i+1 == (int) s->img_x) break;
5624 if((--bit_offset) < 0) {
5625 bit_offset = 7;
5626 v = stbi__get8(s);
5627 }
5628 }
5629 stbi__skip(s, pad);
5630 }
5631 } else {
5632 for (j=0; j < (int) s->img_y; ++j) {
5633 for (i=0; i < (int) s->img_x; i += 2) {
5634 int v=stbi__get8(s),v2=0;
5635 if (info.bpp == 4) {
5636 v2 = v & 15;
5637 v >>= 4;
5638 }
5639 out[z++] = pal[v][0];
5640 out[z++] = pal[v][1];
5641 out[z++] = pal[v][2];
5642 if (target == 4) out[z++] = 255;
5643 if (i+1 == (int) s->img_x) break;
5644 v = (info.bpp == 8) ? stbi__get8(s) : v2;
5645 out[z++] = pal[v][0];
5646 out[z++] = pal[v][1];
5647 out[z++] = pal[v][2];
5648 if (target == 4) out[z++] = 255;
5649 }
5650 stbi__skip(s, pad);
5651 }
5652 }
5653 } else {
5654 int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
5655 int z = 0;
5656 int easy=0;
5657 stbi__skip(s, info.offset - info.extra_read - info.hsz);
5658 if (info.bpp == 24) width = 3 * s->img_x;
5659 else if (info.bpp == 16) width = 2*s->img_x;
5660 else /* bpp = 32 and pad = 0 */ width=0;
5661 pad = (-width) & 3;
5662 if (info.bpp == 24) {
5663 easy = 1;
5664 } else if (info.bpp == 32) {
5665 if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
5666 easy = 2;
5667 }
5668 if (!easy) {
5669 if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5670 // right shift amt to put high bit in position #7
5671 rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
5672 gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
5673 bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
5674 ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
5675 if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5676 }
5677 for (j=0; j < (int) s->img_y; ++j) {
5678 if (easy) {
5679 for (i=0; i < (int) s->img_x; ++i) {
5680 unsigned char a;
5681 out[z+2] = stbi__get8(s);
5682 out[z+1] = stbi__get8(s);
5683 out[z+0] = stbi__get8(s);
5684 z += 3;
5685 a = (easy == 2 ? stbi__get8(s) : 255);
5686 all_a |= a;
5687 if (target == 4) out[z++] = a;
5688 }
5689 } else {
5690 int bpp = info.bpp;
5691 for (i=0; i < (int) s->img_x; ++i) {
5692 stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
5693 unsigned int a;
5694 out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
5695 out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
5696 out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
5697 a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
5698 all_a |= a;
5699 if (target == 4) out[z++] = STBI__BYTECAST(a);
5700 }
5701 }
5702 stbi__skip(s, pad);
5703 }
5704 }
5705
5706 // if alpha channel is all 0s, replace with all 255s
5707 if (target == 4 && all_a == 0)
5708 for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
5709 out[i] = 255;
5710
5711 if (flip_vertically) {
5712 stbi_uc t;
5713 for (j=0; j < (int) s->img_y>>1; ++j) {
5714 stbi_uc *p1 = out + j *s->img_x*target;
5715 stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
5716 for (i=0; i < (int) s->img_x*target; ++i) {
5717 t = p1[i]; p1[i] = p2[i]; p2[i] = t;
5718 }
5719 }
5720 }
5721
5722 if (req_comp && req_comp != target) {
5723 out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
5724 if (out == NULL) return out; // stbi__convert_format frees input on failure
5725 }
5726
5727 *x = s->img_x;
5728 *y = s->img_y;
5729 if (comp) *comp = s->img_n;
5730 return out;
5731}
5732#endif
5733
5734// Targa Truevision - TGA
5735// by Jonathan Dummer
5736#ifndef STBI_NO_TGA
5737// returns STBI_rgb or whatever, 0 on error
5738static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
5739{
5740 // only RGB or RGBA (incl. 16bit) or grey allowed
5741 if (is_rgb16) *is_rgb16 = 0;
5742 switch(bits_per_pixel) {
5743 case 8: return STBI_grey;
5744 case 16: if(is_grey) return STBI_grey_alpha;
5745 // fallthrough
5746 case 15: if(is_rgb16) *is_rgb16 = 1;
5747 return STBI_rgb;
5748 case 24: // fallthrough
5749 case 32: return bits_per_pixel/8;
5750 default: return 0;
5751 }
5752}
5753
5754static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
5755{
5756 int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
5757 int sz, tga_colormap_type;
5758 stbi__get8(s); // discard Offset
5759 tga_colormap_type = stbi__get8(s); // colormap type
5760 if( tga_colormap_type > 1 ) {
5761 stbi__rewind(s);
5762 return 0; // only RGB or indexed allowed
5763 }
5764 tga_image_type = stbi__get8(s); // image type
5765 if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
5766 if (tga_image_type != 1 && tga_image_type != 9) {
5767 stbi__rewind(s);
5768 return 0;
5769 }
5770 stbi__skip(s,4); // skip index of first colormap entry and number of entries
5771 sz = stbi__get8(s); // check bits per palette color entry
5772 if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
5773 stbi__rewind(s);
5774 return 0;
5775 }
5776 stbi__skip(s,4); // skip image x and y origin
5777 tga_colormap_bpp = sz;
5778 } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
5779 if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
5780 stbi__rewind(s);
5781 return 0; // only RGB or grey allowed, +/- RLE
5782 }
5783 stbi__skip(s,9); // skip colormap specification and image x/y origin
5784 tga_colormap_bpp = 0;
5785 }
5786 tga_w = stbi__get16le(s);
5787 if( tga_w < 1 ) {
5788 stbi__rewind(s);
5789 return 0; // test width
5790 }
5791 tga_h = stbi__get16le(s);
5792 if( tga_h < 1 ) {
5793 stbi__rewind(s);
5794 return 0; // test height
5795 }
5796 tga_bits_per_pixel = stbi__get8(s); // bits per pixel
5797 stbi__get8(s); // ignore alpha bits
5798 if (tga_colormap_bpp != 0) {
5799 if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
5800 // when using a colormap, tga_bits_per_pixel is the size of the indexes
5801 // I don't think anything but 8 or 16bit indexes makes sense
5802 stbi__rewind(s);
5803 return 0;
5804 }
5805 tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
5806 } else {
5807 tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
5808 }
5809 if(!tga_comp) {
5810 stbi__rewind(s);
5811 return 0;
5812 }
5813 if (x) *x = tga_w;
5814 if (y) *y = tga_h;
5815 if (comp) *comp = tga_comp;
5816 return 1; // seems to have passed everything
5817}
5818
5819static int stbi__tga_test(stbi__context *s)
5820{
5821 int res = 0;
5822 int sz, tga_color_type;
5823 stbi__get8(s); // discard Offset
5824 tga_color_type = stbi__get8(s); // color type
5825 if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed
5826 sz = stbi__get8(s); // image type
5827 if ( tga_color_type == 1 ) { // colormapped (paletted) image
5828 if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
5829 stbi__skip(s,4); // skip index of first colormap entry and number of entries
5830 sz = stbi__get8(s); // check bits per palette color entry
5831 if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5832 stbi__skip(s,4); // skip image x and y origin
5833 } else { // "normal" image w/o colormap
5834 if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
5835 stbi__skip(s,9); // skip colormap specification and image x/y origin
5836 }
5837 if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width
5838 if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height
5839 sz = stbi__get8(s); // bits per pixel
5840 if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
5841 if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5842
5843 res = 1; // if we got this far, everything's good and we can return 1 instead of 0
5844
5845errorEnd:
5846 stbi__rewind(s);
5847 return res;
5848}
5849
5850// read 16bit value and convert to 24bit RGB
5851static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
5852{
5853 stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
5854 stbi__uint16 fiveBitMask = 31;
5855 // we have 3 channels with 5bits each
5856 int r = (px >> 10) & fiveBitMask;
5857 int g = (px >> 5) & fiveBitMask;
5858 int b = px & fiveBitMask;
5859 // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
5860 out[0] = (stbi_uc)((r * 255)/31);
5861 out[1] = (stbi_uc)((g * 255)/31);
5862 out[2] = (stbi_uc)((b * 255)/31);
5863
5864 // some people claim that the most significant bit might be used for alpha
5865 // (possibly if an alpha-bit is set in the "image descriptor byte")
5866 // but that only made 16bit test images completely translucent..
5867 // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
5868}
5869
5870static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5871{
5872 // read in the TGA header stuff
5873 int tga_offset = stbi__get8(s);
5874 int tga_indexed = stbi__get8(s);
5875 int tga_image_type = stbi__get8(s);
5876 int tga_is_RLE = 0;
5877 int tga_palette_start = stbi__get16le(s);
5878 int tga_palette_len = stbi__get16le(s);
5879 int tga_palette_bits = stbi__get8(s);
5880 int tga_x_origin = stbi__get16le(s);
5881 int tga_y_origin = stbi__get16le(s);
5882 int tga_width = stbi__get16le(s);
5883 int tga_height = stbi__get16le(s);
5884 int tga_bits_per_pixel = stbi__get8(s);
5885 int tga_comp, tga_rgb16=0;
5886 int tga_inverted = stbi__get8(s);
5887 // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
5888 // image data
5889 unsigned char *tga_data;
5890 unsigned char *tga_palette = NULL;
5891 int i, j;
5892 unsigned char raw_data[4] = {0};
5893 int RLE_count = 0;
5894 int RLE_repeating = 0;
5895 int read_next_pixel = 1;
5896 STBI_NOTUSED(ri);
5897 STBI_NOTUSED(tga_x_origin); // @TODO
5898 STBI_NOTUSED(tga_y_origin); // @TODO
5899
5900 if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
5901 if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
5902
5903 // do a tiny bit of precessing
5904 if ( tga_image_type >= 8 )
5905 {
5906 tga_image_type -= 8;
5907 tga_is_RLE = 1;
5908 }
5909 tga_inverted = 1 - ((tga_inverted >> 5) & 1);
5910
5911 // If I'm paletted, then I'll use the number of bits from the palette
5912 if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
5913 else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
5914
5915 if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
5916 return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
5917
5918 // tga info
5919 *x = tga_width;
5920 *y = tga_height;
5921 if (comp) *comp = tga_comp;
5922
5923 if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
5924 return stbi__errpuc("too large", "Corrupt TGA");
5925
5926 tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
5927 if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
5928
5929 // skip to the data's starting position (offset usually = 0)
5930 stbi__skip(s, tga_offset );
5931
5932 if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
5933 for (i=0; i < tga_height; ++i) {
5934 int row = tga_inverted ? tga_height -i - 1 : i;
5935 stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
5936 stbi__getn(s, tga_row, tga_width * tga_comp);
5937 }
5938 } else {
5939 // do I need to load a palette?
5940 if ( tga_indexed)
5941 {
5942 if (tga_palette_len == 0) { /* you have to have at least one entry! */
5943 STBI_FREE(tga_data);
5944 return stbi__errpuc("bad palette", "Corrupt TGA");
5945 }
5946
5947 // any data to skip? (offset usually = 0)
5948 stbi__skip(s, tga_palette_start );
5949 // load the palette
5950 tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
5951 if (!tga_palette) {
5952 STBI_FREE(tga_data);
5953 return stbi__errpuc("outofmem", "Out of memory");
5954 }
5955 if (tga_rgb16) {
5956 stbi_uc *pal_entry = tga_palette;
5957 STBI_ASSERT(tga_comp == STBI_rgb);
5958 for (i=0; i < tga_palette_len; ++i) {
5959 stbi__tga_read_rgb16(s, pal_entry);
5960 pal_entry += tga_comp;
5961 }
5962 } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
5963 STBI_FREE(tga_data);
5964 STBI_FREE(tga_palette);
5965 return stbi__errpuc("bad palette", "Corrupt TGA");
5966 }
5967 }
5968 // load the data
5969 for (i=0; i < tga_width * tga_height; ++i)
5970 {
5971 // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5972 if ( tga_is_RLE )
5973 {
5974 if ( RLE_count == 0 )
5975 {
5976 // yep, get the next byte as a RLE command
5977 int RLE_cmd = stbi__get8(s);
5978 RLE_count = 1 + (RLE_cmd & 127);
5979 RLE_repeating = RLE_cmd >> 7;
5980 read_next_pixel = 1;
5981 } else if ( !RLE_repeating )
5982 {
5983 read_next_pixel = 1;
5984 }
5985 } else
5986 {
5987 read_next_pixel = 1;
5988 }
5989 // OK, if I need to read a pixel, do it now
5990 if ( read_next_pixel )
5991 {
5992 // load however much data we did have
5993 if ( tga_indexed )
5994 {
5995 // read in index, then perform the lookup
5996 int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5997 if ( pal_idx >= tga_palette_len ) {
5998 // invalid index
5999 pal_idx = 0;
6000 }
6001 pal_idx *= tga_comp;
6002 for (j = 0; j < tga_comp; ++j) {
6003 raw_data[j] = tga_palette[pal_idx+j];
6004 }
6005 } else if(tga_rgb16) {
6006 STBI_ASSERT(tga_comp == STBI_rgb);
6007 stbi__tga_read_rgb16(s, raw_data);
6008 } else {
6009 // read in the data raw
6010 for (j = 0; j < tga_comp; ++j) {
6011 raw_data[j] = stbi__get8(s);
6012 }
6013 }
6014 // clear the reading flag for the next pixel
6015 read_next_pixel = 0;
6016 } // end of reading a pixel
6017
6018 // copy data
6019 for (j = 0; j < tga_comp; ++j)
6020 tga_data[i*tga_comp+j] = raw_data[j];
6021
6022 // in case we're in RLE mode, keep counting down
6023 --RLE_count;
6024 }
6025 // do I need to invert the image?
6026 if ( tga_inverted )
6027 {
6028 for (j = 0; j*2 < tga_height; ++j)
6029 {
6030 int index1 = j * tga_width * tga_comp;
6031 int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
6032 for (i = tga_width * tga_comp; i > 0; --i)
6033 {
6034 unsigned char temp = tga_data[index1];
6035 tga_data[index1] = tga_data[index2];
6036 tga_data[index2] = temp;
6037 ++index1;
6038 ++index2;
6039 }
6040 }
6041 }
6042 // clear my palette, if I had one
6043 if ( tga_palette != NULL )
6044 {
6045 STBI_FREE( tga_palette );
6046 }
6047 }
6048
6049 // swap RGB - if the source data was RGB16, it already is in the right order
6050 if (tga_comp >= 3 && !tga_rgb16)
6051 {
6052 unsigned char* tga_pixel = tga_data;
6053 for (i=0; i < tga_width * tga_height; ++i)
6054 {
6055 unsigned char temp = tga_pixel[0];
6056 tga_pixel[0] = tga_pixel[2];
6057 tga_pixel[2] = temp;
6058 tga_pixel += tga_comp;
6059 }
6060 }
6061
6062 // convert to target component count
6063 if (req_comp && req_comp != tga_comp)
6064 tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
6065
6066 // the things I do to get rid of an error message, and yet keep
6067 // Microsoft's C compilers happy... [8^(
6068 tga_palette_start = tga_palette_len = tga_palette_bits =
6069 tga_x_origin = tga_y_origin = 0;
6070 STBI_NOTUSED(tga_palette_start);
6071 // OK, done
6072 return tga_data;
6073}
6074#endif
6075
6076// *************************************************************************************************
6077// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
6078
6079#ifndef STBI_NO_PSD
6080static int stbi__psd_test(stbi__context *s)
6081{
6082 int r = (stbi__get32be(s) == 0x38425053);
6083 stbi__rewind(s);
6084 return r;
6085}
6086
6087static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
6088{
6089 int count, nleft, len;
6090
6091 count = 0;
6092 while ((nleft = pixelCount - count) > 0) {
6093 len = stbi__get8(s);
6094 if (len == 128) {
6095 // No-op.
6096 } else if (len < 128) {
6097 // Copy next len+1 bytes literally.
6098 len++;
6099 if (len > nleft) return 0; // corrupt data
6100 count += len;
6101 while (len) {
6102 *p = stbi__get8(s);
6103 p += 4;
6104 len--;
6105 }
6106 } else if (len > 128) {
6107 stbi_uc val;
6108 // Next -len+1 bytes in the dest are replicated from next source byte.
6109 // (Interpret len as a negative 8-bit int.)
6110 len = 257 - len;
6111 if (len > nleft) return 0; // corrupt data
6112 val = stbi__get8(s);
6113 count += len;
6114 while (len) {
6115 *p = val;
6116 p += 4;
6117 len--;
6118 }
6119 }
6120 }
6121
6122 return 1;
6123}
6124
6125static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
6126{
6127 int pixelCount;
6128 int channelCount, compression;
6129 int channel, i;
6130 int bitdepth;
6131 int w,h;
6132 stbi_uc *out;
6133 STBI_NOTUSED(ri);
6134
6135 // Check identifier
6136 if (stbi__get32be(s) != 0x38425053) // "8BPS"
6137 return stbi__errpuc("not PSD", "Corrupt PSD image");
6138
6139 // Check file type version.
6140 if (stbi__get16be(s) != 1)
6141 return stbi__errpuc("wrong version", "Unsupported version of PSD image");
6142
6143 // Skip 6 reserved bytes.
6144 stbi__skip(s, 6 );
6145
6146 // Read the number of channels (R, G, B, A, etc).
6147 channelCount = stbi__get16be(s);
6148 if (channelCount < 0 || channelCount > 16)
6149 return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
6150
6151 // Read the rows and columns of the image.
6152 h = stbi__get32be(s);
6153 w = stbi__get32be(s);
6154
6155 if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
6156 if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
6157
6158 // Make sure the depth is 8 bits.
6159 bitdepth = stbi__get16be(s);
6160 if (bitdepth != 8 && bitdepth != 16)
6161 return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
6162
6163 // Make sure the color mode is RGB.
6164 // Valid options are:
6165 // 0: Bitmap
6166 // 1: Grayscale
6167 // 2: Indexed color
6168 // 3: RGB color
6169 // 4: CMYK color
6170 // 7: Multichannel
6171 // 8: Duotone
6172 // 9: Lab color
6173 if (stbi__get16be(s) != 3)
6174 return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
6175
6176 // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.)
6177 stbi__skip(s,stbi__get32be(s) );
6178
6179 // Skip the image resources. (resolution, pen tool paths, etc)
6180 stbi__skip(s, stbi__get32be(s) );
6181
6182 // Skip the reserved data.
6183 stbi__skip(s, stbi__get32be(s) );
6184
6185 // Find out if the data is compressed.
6186 // Known values:
6187 // 0: no compression
6188 // 1: RLE compressed
6189 compression = stbi__get16be(s);
6190 if (compression > 1)
6191 return stbi__errpuc("bad compression", "PSD has an unknown compression format");
6192
6193 // Check size
6194 if (!stbi__mad3sizes_valid(4, w, h, 0))
6195 return stbi__errpuc("too large", "Corrupt PSD");
6196
6197 // Create the destination image.
6198
6199 if (!compression && bitdepth == 16 && bpc == 16) {
6200 out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
6201 ri->bits_per_channel = 16;
6202 } else
6203 out = (stbi_uc *) stbi__malloc(4 * w*h);
6204
6205 if (!out) return stbi__errpuc("outofmem", "Out of memory");
6206 pixelCount = w*h;
6207
6208 // Initialize the data to zero.
6209 //memset( out, 0, pixelCount * 4 );
6210
6211 // Finally, the image data.
6212 if (compression) {
6213 // RLE as used by .PSD and .TIFF
6214 // Loop until you get the number of unpacked bytes you are expecting:
6215 // Read the next source byte into n.
6216 // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
6217 // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
6218 // Else if n is 128, noop.
6219 // Endloop
6220
6221 // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
6222 // which we're going to just skip.
6223 stbi__skip(s, h * channelCount * 2 );
6224
6225 // Read the RLE data by channel.
6226 for (channel = 0; channel < 4; channel++) {
6227 stbi_uc *p;
6228
6229 p = out+channel;
6230 if (channel >= channelCount) {
6231 // Fill this channel with default data.
6232 for (i = 0; i < pixelCount; i++, p += 4)
6233 *p = (channel == 3 ? 255 : 0);
6234 } else {
6235 // Read the RLE data.
6236 if (!stbi__psd_decode_rle(s, p, pixelCount)) {
6237 STBI_FREE(out);
6238 return stbi__errpuc("corrupt", "bad RLE data");
6239 }
6240 }
6241 }
6242
6243 } else {
6244 // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
6245 // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
6246
6247 // Read the data by channel.
6248 for (channel = 0; channel < 4; channel++) {
6249 if (channel >= channelCount) {
6250 // Fill this channel with default data.
6251 if (bitdepth == 16 && bpc == 16) {
6252 stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
6253 stbi__uint16 val = channel == 3 ? 65535 : 0;
6254 for (i = 0; i < pixelCount; i++, q += 4)
6255 *q = val;
6256 } else {
6257 stbi_uc *p = out+channel;
6258 stbi_uc val = channel == 3 ? 255 : 0;
6259 for (i = 0; i < pixelCount; i++, p += 4)
6260 *p = val;
6261 }
6262 } else {
6263 if (ri->bits_per_channel == 16) { // output bpc
6264 stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
6265 for (i = 0; i < pixelCount; i++, q += 4)
6266 *q = (stbi__uint16) stbi__get16be(s);
6267 } else {
6268 stbi_uc *p = out+channel;
6269 if (bitdepth == 16) { // input bpc
6270 for (i = 0; i < pixelCount; i++, p += 4)
6271 *p = (stbi_uc) (stbi__get16be(s) >> 8);
6272 } else {
6273 for (i = 0; i < pixelCount; i++, p += 4)
6274 *p = stbi__get8(s);
6275 }
6276 }
6277 }
6278 }
6279 }
6280
6281 // remove weird white matte from PSD
6282 if (channelCount >= 4) {
6283 if (ri->bits_per_channel == 16) {
6284 for (i=0; i < w*h; ++i) {
6285 stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
6286 if (pixel[3] != 0 && pixel[3] != 65535) {
6287 float a = pixel[3] / 65535.0f;
6288 float ra = 1.0f / a;
6289 float inv_a = 65535.0f * (1 - ra);
6290 pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
6291 pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
6292 pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
6293 }
6294 }
6295 } else {
6296 for (i=0; i < w*h; ++i) {
6297 unsigned char *pixel = out + 4*i;
6298 if (pixel[3] != 0 && pixel[3] != 255) {
6299 float a = pixel[3] / 255.0f;
6300 float ra = 1.0f / a;
6301 float inv_a = 255.0f * (1 - ra);
6302 pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
6303 pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
6304 pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
6305 }
6306 }
6307 }
6308 }
6309
6310 // convert to desired output format
6311 if (req_comp && req_comp != 4) {
6312 if (ri->bits_per_channel == 16)
6313 out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
6314 else
6315 out = stbi__convert_format(out, 4, req_comp, w, h);
6316 if (out == NULL) return out; // stbi__convert_format frees input on failure
6317 }
6318
6319 if (comp) *comp = 4;
6320 *y = h;
6321 *x = w;
6322
6323 return out;
6324}
6325#endif
6326
6327// *************************************************************************************************
6328// Softimage PIC loader
6329// by Tom Seddon
6330//
6331// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
6332// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
6333
6334#ifndef STBI_NO_PIC
6335static int stbi__pic_is4(stbi__context *s,const char *str)
6336{
6337 int i;
6338 for (i=0; i<4; ++i)
6339 if (stbi__get8(s) != (stbi_uc)str[i])
6340 return 0;
6341
6342 return 1;
6343}
6344
6345static int stbi__pic_test_core(stbi__context *s)
6346{
6347 int i;
6348
6349 if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
6350 return 0;
6351
6352 for(i=0;i<84;++i)
6353 stbi__get8(s);
6354
6355 if (!stbi__pic_is4(s,"PICT"))
6356 return 0;
6357
6358 return 1;
6359}
6360
6361typedef struct
6362{
6363 stbi_uc size,type,channel;
6364} stbi__pic_packet;
6365
6366static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
6367{
6368 int mask=0x80, i;
6369
6370 for (i=0; i<4; ++i, mask>>=1) {
6371 if (channel & mask) {
6372 if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
6373 dest[i]=stbi__get8(s);
6374 }
6375 }
6376
6377 return dest;
6378}
6379
6380static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
6381{
6382 int mask=0x80,i;
6383
6384 for (i=0;i<4; ++i, mask>>=1)
6385 if (channel&mask)
6386 dest[i]=src[i];
6387}
6388
6389static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
6390{
6391 int act_comp=0,num_packets=0,y,chained;
6392 stbi__pic_packet packets[10];
6393
6394 // this will (should...) cater for even some bizarre stuff like having data
6395 // for the same channel in multiple packets.
6396 do {
6397 stbi__pic_packet *packet;
6398
6399 if (num_packets==sizeof(packets)/sizeof(packets[0]))
6400 return stbi__errpuc("bad format","too many packets");
6401
6402 packet = &packets[num_packets++];
6403
6404 chained = stbi__get8(s);
6405 packet->size = stbi__get8(s);
6406 packet->type = stbi__get8(s);
6407 packet->channel = stbi__get8(s);
6408
6409 act_comp |= packet->channel;
6410
6411 if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)");
6412 if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp");
6413 } while (chained);
6414
6415 *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
6416
6417 for(y=0; y<height; ++y) {
6418 int packet_idx;
6419
6420 for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
6421 stbi__pic_packet *packet = &packets[packet_idx];
6422 stbi_uc *dest = result+y*width*4;
6423
6424 switch (packet->type) {
6425 default:
6426 return stbi__errpuc("bad format","packet has bad compression type");
6427
6428 case 0: {//uncompressed
6429 int x;
6430
6431 for(x=0;x<width;++x, dest+=4)
6432 if (!stbi__readval(s,packet->channel,dest))
6433 return 0;
6434 break;
6435 }
6436
6437 case 1://Pure RLE
6438 {
6439 int left=width, i;
6440
6441 while (left>0) {
6442 stbi_uc count,value[4];
6443
6444 count=stbi__get8(s);
6445 if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)");
6446
6447 if (count > left)
6448 count = (stbi_uc) left;
6449
6450 if (!stbi__readval(s,packet->channel,value)) return 0;
6451
6452 for(i=0; i<count; ++i,dest+=4)
6453 stbi__copyval(packet->channel,dest,value);
6454 left -= count;
6455 }
6456 }
6457 break;
6458
6459 case 2: {//Mixed RLE
6460 int left=width;
6461 while (left>0) {
6462 int count = stbi__get8(s), i;
6463 if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)");
6464
6465 if (count >= 128) { // Repeated
6466 stbi_uc value[4];
6467
6468 if (count==128)
6469 count = stbi__get16be(s);
6470 else
6471 count -= 127;
6472 if (count > left)
6473 return stbi__errpuc("bad file","scanline overrun");
6474
6475 if (!stbi__readval(s,packet->channel,value))
6476 return 0;
6477
6478 for(i=0;i<count;++i, dest += 4)
6479 stbi__copyval(packet->channel,dest,value);
6480 } else { // Raw
6481 ++count;
6482 if (count>left) return stbi__errpuc("bad file","scanline overrun");
6483
6484 for(i=0;i<count;++i, dest+=4)
6485 if (!stbi__readval(s,packet->channel,dest))
6486 return 0;
6487 }
6488 left-=count;
6489 }
6490 break;
6491 }
6492 }
6493 }
6494 }
6495
6496 return result;
6497}
6498
6499static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
6500{
6501 stbi_uc *result;
6502 int i, x,y, internal_comp;
6503 STBI_NOTUSED(ri);
6504
6505 if (!comp) comp = &internal_comp;
6506
6507 for (i=0; i<92; ++i)
6508 stbi__get8(s);
6509
6510 x = stbi__get16be(s);
6511 y = stbi__get16be(s);
6512
6513 if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
6514 if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
6515
6516 if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)");
6517 if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
6518
6519 stbi__get32be(s); //skip `ratio'
6520 stbi__get16be(s); //skip `fields'
6521 stbi__get16be(s); //skip `pad'
6522
6523 // intermediate buffer is RGBA
6524 result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
6525 if (!result) return stbi__errpuc("outofmem", "Out of memory");
6526 memset(result, 0xff, x*y*4);
6527
6528 if (!stbi__pic_load_core(s,x,y,comp, result)) {
6529 STBI_FREE(result);
6530 result=0;
6531 }
6532 *px = x;
6533 *py = y;
6534 if (req_comp == 0) req_comp = *comp;
6535 result=stbi__convert_format(result,4,req_comp,x,y);
6536
6537 return result;
6538}
6539
6540static int stbi__pic_test(stbi__context *s)
6541{
6542 int r = stbi__pic_test_core(s);
6543 stbi__rewind(s);
6544 return r;
6545}
6546#endif
6547
6548// *************************************************************************************************
6549// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
6550
6551#ifndef STBI_NO_GIF
6552typedef struct
6553{
6554 stbi__int16 prefix;
6555 stbi_uc first;
6556 stbi_uc suffix;
6557} stbi__gif_lzw;
6558
6559typedef struct
6560{
6561 int w,h;
6562 stbi_uc *out; // output buffer (always 4 components)
6563 stbi_uc *background; // The current "background" as far as a gif is concerned
6564 stbi_uc *history;
6565 int flags, bgindex, ratio, transparent, eflags;
6566 stbi_uc pal[256][4];
6567 stbi_uc lpal[256][4];
6568 stbi__gif_lzw codes[8192];
6569 stbi_uc *color_table;
6570 int parse, step;
6571 int lflags;
6572 int start_x, start_y;
6573 int max_x, max_y;
6574 int cur_x, cur_y;
6575 int line_size;
6576 int delay;
6577} stbi__gif;
6578
6579static int stbi__gif_test_raw(stbi__context *s)
6580{
6581 int sz;
6582 if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
6583 sz = stbi__get8(s);
6584 if (sz != '9' && sz != '7') return 0;
6585 if (stbi__get8(s) != 'a') return 0;
6586 return 1;
6587}
6588
6589static int stbi__gif_test(stbi__context *s)
6590{
6591 int r = stbi__gif_test_raw(s);
6592 stbi__rewind(s);
6593 return r;
6594}
6595
6596static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
6597{
6598 int i;
6599 for (i=0; i < num_entries; ++i) {
6600 pal[i][2] = stbi__get8(s);
6601 pal[i][1] = stbi__get8(s);
6602 pal[i][0] = stbi__get8(s);
6603 pal[i][3] = transp == i ? 0 : 255;
6604 }
6605}
6606
6607static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
6608{
6609 stbi_uc version;
6610 if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
6611 return stbi__err("not GIF", "Corrupt GIF");
6612
6613 version = stbi__get8(s);
6614 if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF");
6615 if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF");
6616
6617 stbi__g_failure_reason = "";
6618 g->w = stbi__get16le(s);
6619 g->h = stbi__get16le(s);
6620 g->flags = stbi__get8(s);
6621 g->bgindex = stbi__get8(s);
6622 g->ratio = stbi__get8(s);
6623 g->transparent = -1;
6624
6625 if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
6626 if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
6627
6628 if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments
6629
6630 if (is_info) return 1;
6631
6632 if (g->flags & 0x80)
6633 stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
6634
6635 return 1;
6636}
6637
6638static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
6639{
6640 stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6641 if (!g) return stbi__err("outofmem", "Out of memory");
6642 if (!stbi__gif_header(s, g, comp, 1)) {
6643 STBI_FREE(g);
6644 stbi__rewind( s );
6645 return 0;
6646 }
6647 if (x) *x = g->w;
6648 if (y) *y = g->h;
6649 STBI_FREE(g);
6650 return 1;
6651}
6652
6653static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
6654{
6655 stbi_uc *p, *c;
6656 int idx;
6657
6658 // recurse to decode the prefixes, since the linked-list is backwards,
6659 // and working backwards through an interleaved image would be nasty
6660 if (g->codes[code].prefix >= 0)
6661 stbi__out_gif_code(g, g->codes[code].prefix);
6662
6663 if (g->cur_y >= g->max_y) return;
6664
6665 idx = g->cur_x + g->cur_y;
6666 p = &g->out[idx];
6667 g->history[idx / 4] = 1;
6668
6669 c = &g->color_table[g->codes[code].suffix * 4];
6670 if (c[3] > 128) { // don't render transparent pixels;
6671 p[0] = c[2];
6672 p[1] = c[1];
6673 p[2] = c[0];
6674 p[3] = c[3];
6675 }
6676 g->cur_x += 4;
6677
6678 if (g->cur_x >= g->max_x) {
6679 g->cur_x = g->start_x;
6680 g->cur_y += g->step;
6681
6682 while (g->cur_y >= g->max_y && g->parse > 0) {
6683 g->step = (1 << g->parse) * g->line_size;
6684 g->cur_y = g->start_y + (g->step >> 1);
6685 --g->parse;
6686 }
6687 }
6688}
6689
6690static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
6691{
6692 stbi_uc lzw_cs;
6693 stbi__int32 len, init_code;
6694 stbi__uint32 first;
6695 stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
6696 stbi__gif_lzw *p;
6697
6698 lzw_cs = stbi__get8(s);
6699 if (lzw_cs > 12) return NULL;
6700 clear = 1 << lzw_cs;
6701 first = 1;
6702 codesize = lzw_cs + 1;
6703 codemask = (1 << codesize) - 1;
6704 bits = 0;
6705 valid_bits = 0;
6706 for (init_code = 0; init_code < clear; init_code++) {
6707 g->codes[init_code].prefix = -1;
6708 g->codes[init_code].first = (stbi_uc) init_code;
6709 g->codes[init_code].suffix = (stbi_uc) init_code;
6710 }
6711
6712 // support no starting clear code
6713 avail = clear+2;
6714 oldcode = -1;
6715
6716 len = 0;
6717 for(;;) {
6718 if (valid_bits < codesize) {
6719 if (len == 0) {
6720 len = stbi__get8(s); // start new block
6721 if (len == 0)
6722 return g->out;
6723 }
6724 --len;
6725 bits |= (stbi__int32) stbi__get8(s) << valid_bits;
6726 valid_bits += 8;
6727 } else {
6728 stbi__int32 code = bits & codemask;
6729 bits >>= codesize;
6730 valid_bits -= codesize;
6731 // @OPTIMIZE: is there some way we can accelerate the non-clear path?
6732 if (code == clear) { // clear code
6733 codesize = lzw_cs + 1;
6734 codemask = (1 << codesize) - 1;
6735 avail = clear + 2;
6736 oldcode = -1;
6737 first = 0;
6738 } else if (code == clear + 1) { // end of stream code
6739 stbi__skip(s, len);
6740 while ((len = stbi__get8(s)) > 0)
6741 stbi__skip(s,len);
6742 return g->out;
6743 } else if (code <= avail) {
6744 if (first) {
6745 return stbi__errpuc("no clear code", "Corrupt GIF");
6746 }
6747
6748 if (oldcode >= 0) {
6749 p = &g->codes[avail++];
6750 if (avail > 8192) {
6751 return stbi__errpuc("too many codes", "Corrupt GIF");
6752 }
6753
6754 p->prefix = (stbi__int16) oldcode;
6755 p->first = g->codes[oldcode].first;
6756 p->suffix = (code == avail) ? p->first : g->codes[code].first;
6757 } else if (code == avail)
6758 return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6759
6760 stbi__out_gif_code(g, (stbi__uint16) code);
6761
6762 if ((avail & codemask) == 0 && avail <= 0x0FFF) {
6763 codesize++;
6764 codemask = (1 << codesize) - 1;
6765 }
6766
6767 oldcode = code;
6768 } else {
6769 return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6770 }
6771 }
6772 }
6773}
6774
6775// this function is designed to support animated gifs, although stb_image doesn't support it
6776// two back is the image from two frames ago, used for a very specific disposal format
6777static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
6778{
6779 int dispose;
6780 int first_frame;
6781 int pi;
6782 int pcount;
6783 STBI_NOTUSED(req_comp);
6784
6785 // on first frame, any non-written pixels get the background colour (non-transparent)
6786 first_frame = 0;
6787 if (g->out == 0) {
6788 if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header
6789 if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))
6790 return stbi__errpuc("too large", "GIF image is too large");
6791 pcount = g->w * g->h;
6792 g->out = (stbi_uc *) stbi__malloc(4 * pcount);
6793 g->background = (stbi_uc *) stbi__malloc(4 * pcount);
6794 g->history = (stbi_uc *) stbi__malloc(pcount);
6795 if (!g->out || !g->background || !g->history)
6796 return stbi__errpuc("outofmem", "Out of memory");
6797
6798 // image is treated as "transparent" at the start - ie, nothing overwrites the current background;
6799 // background colour is only used for pixels that are not rendered first frame, after that "background"
6800 // color refers to the color that was there the previous frame.
6801 memset(g->out, 0x00, 4 * pcount);
6802 memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent)
6803 memset(g->history, 0x00, pcount); // pixels that were affected previous frame
6804 first_frame = 1;
6805 } else {
6806 // second frame - how do we dispose of the previous one?
6807 dispose = (g->eflags & 0x1C) >> 2;
6808 pcount = g->w * g->h;
6809
6810 if ((dispose == 3) && (two_back == 0)) {
6811 dispose = 2; // if I don't have an image to revert back to, default to the old background
6812 }
6813
6814 if (dispose == 3) { // use previous graphic
6815 for (pi = 0; pi < pcount; ++pi) {
6816 if (g->history[pi]) {
6817 memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
6818 }
6819 }
6820 } else if (dispose == 2) {
6821 // restore what was changed last frame to background before that frame;
6822 for (pi = 0; pi < pcount; ++pi) {
6823 if (g->history[pi]) {
6824 memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
6825 }
6826 }
6827 } else {
6828 // This is a non-disposal case eithe way, so just
6829 // leave the pixels as is, and they will become the new background
6830 // 1: do not dispose
6831 // 0: not specified.
6832 }
6833
6834 // background is what out is after the undoing of the previou frame;
6835 memcpy( g->background, g->out, 4 * g->w * g->h );
6836 }
6837
6838 // clear my history;
6839 memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame
6840
6841 for (;;) {
6842 int tag = stbi__get8(s);
6843 switch (tag) {
6844 case 0x2C: /* Image Descriptor */
6845 {
6846 stbi__int32 x, y, w, h;
6847 stbi_uc *o;
6848
6849 x = stbi__get16le(s);
6850 y = stbi__get16le(s);
6851 w = stbi__get16le(s);
6852 h = stbi__get16le(s);
6853 if (((x + w) > (g->w)) || ((y + h) > (g->h)))
6854 return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
6855
6856 g->line_size = g->w * 4;
6857 g->start_x = x * 4;
6858 g->start_y = y * g->line_size;
6859 g->max_x = g->start_x + w * 4;
6860 g->max_y = g->start_y + h * g->line_size;
6861 g->cur_x = g->start_x;
6862 g->cur_y = g->start_y;
6863
6864 // if the width of the specified rectangle is 0, that means
6865 // we may not see *any* pixels or the image is malformed;
6866 // to make sure this is caught, move the current y down to
6867 // max_y (which is what out_gif_code checks).
6868 if (w == 0)
6869 g->cur_y = g->max_y;
6870
6871 g->lflags = stbi__get8(s);
6872
6873 if (g->lflags & 0x40) {
6874 g->step = 8 * g->line_size; // first interlaced spacing
6875 g->parse = 3;
6876 } else {
6877 g->step = g->line_size;
6878 g->parse = 0;
6879 }
6880
6881 if (g->lflags & 0x80) {
6882 stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
6883 g->color_table = (stbi_uc *) g->lpal;
6884 } else if (g->flags & 0x80) {
6885 g->color_table = (stbi_uc *) g->pal;
6886 } else
6887 return stbi__errpuc("missing color table", "Corrupt GIF");
6888
6889 o = stbi__process_gif_raster(s, g);
6890 if (!o) return NULL;
6891
6892 // if this was the first frame,
6893 pcount = g->w * g->h;
6894 if (first_frame && (g->bgindex > 0)) {
6895 // if first frame, any pixel not drawn to gets the background color
6896 for (pi = 0; pi < pcount; ++pi) {
6897 if (g->history[pi] == 0) {
6898 g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
6899 memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
6900 }
6901 }
6902 }
6903
6904 return o;
6905 }
6906
6907 case 0x21: // Comment Extension.
6908 {
6909 int len;
6910 int ext = stbi__get8(s);
6911 if (ext == 0xF9) { // Graphic Control Extension.
6912 len = stbi__get8(s);
6913 if (len == 4) {
6914 g->eflags = stbi__get8(s);
6915 g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
6916
6917 // unset old transparent
6918 if (g->transparent >= 0) {
6919 g->pal[g->transparent][3] = 255;
6920 }
6921 if (g->eflags & 0x01) {
6922 g->transparent = stbi__get8(s);
6923 if (g->transparent >= 0) {
6924 g->pal[g->transparent][3] = 0;
6925 }
6926 } else {
6927 // don't need transparent
6928 stbi__skip(s, 1);
6929 g->transparent = -1;
6930 }
6931 } else {
6932 stbi__skip(s, len);
6933 break;
6934 }
6935 }
6936 while ((len = stbi__get8(s)) != 0) {
6937 stbi__skip(s, len);
6938 }
6939 break;
6940 }
6941
6942 case 0x3B: // gif stream termination code
6943 return (stbi_uc *) s; // using '1' causes warning on some compilers
6944
6945 default:
6946 return stbi__errpuc("unknown code", "Corrupt GIF");
6947 }
6948 }
6949}
6950
6951static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays)
6952{
6953 STBI_FREE(g->out);
6954 STBI_FREE(g->history);
6955 STBI_FREE(g->background);
6956
6957 if (out) STBI_FREE(out);
6958 if (delays && *delays) STBI_FREE(*delays);
6959 return stbi__errpuc("outofmem", "Out of memory");
6960}
6961
6962static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
6963{
6964 if (stbi__gif_test(s)) {
6965 int layers = 0;
6966 stbi_uc *u = 0;
6967 stbi_uc *out = 0;
6968 stbi_uc *two_back = 0;
6969 stbi__gif g;
6970 int stride;
6971 int out_size = 0;
6972 int delays_size = 0;
6973
6974 STBI_NOTUSED(out_size);
6975 STBI_NOTUSED(delays_size);
6976
6977 memset(&g, 0, sizeof(g));
6978 if (delays) {
6979 *delays = 0;
6980 }
6981
6982 do {
6983 u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
6984 if (u == (stbi_uc *) s) u = 0; // end of animated gif marker
6985
6986 if (u) {
6987 *x = g.w;
6988 *y = g.h;
6989 ++layers;
6990 stride = g.w * g.h * 4;
6991
6992 if (out) {
6993 void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride );
6994 if (!tmp)
6995 return stbi__load_gif_main_outofmem(&g, out, delays);
6996 else {
6997 out = (stbi_uc*) tmp;
6998 out_size = layers * stride;
6999 }
7000
7001 if (delays) {
7002 int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
7003 if (!new_delays)
7004 return stbi__load_gif_main_outofmem(&g, out, delays);
7005 *delays = new_delays;
7006 delays_size = layers * sizeof(int);
7007 }
7008 } else {
7009 out = (stbi_uc*)stbi__malloc( layers * stride );
7010 if (!out)
7011 return stbi__load_gif_main_outofmem(&g, out, delays);
7012 out_size = layers * stride;
7013 if (delays) {
7014 *delays = (int*) stbi__malloc( layers * sizeof(int) );
7015 if (!*delays)
7016 return stbi__load_gif_main_outofmem(&g, out, delays);
7017 delays_size = layers * sizeof(int);
7018 }
7019 }
7020 memcpy( out + ((layers - 1) * stride), u, stride );
7021 if (layers >= 2) {
7022 two_back = out - 2 * stride;
7023 }
7024
7025 if (delays) {
7026 (*delays)[layers - 1U] = g.delay;
7027 }
7028 }
7029 } while (u != 0);
7030
7031 // free temp buffer;
7032 STBI_FREE(g.out);
7033 STBI_FREE(g.history);
7034 STBI_FREE(g.background);
7035
7036 // do the final conversion after loading everything;
7037 if (req_comp && req_comp != 4)
7038 out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
7039
7040 *z = layers;
7041 return out;
7042 } else {
7043 return stbi__errpuc("not GIF", "Image was not as a gif type.");
7044 }
7045}
7046
7047static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
7048{
7049 stbi_uc *u = 0;
7050 stbi__gif g;
7051 memset(&g, 0, sizeof(g));
7052 STBI_NOTUSED(ri);
7053
7054 u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
7055 if (u == (stbi_uc *) s) u = 0; // end of animated gif marker
7056 if (u) {
7057 *x = g.w;
7058 *y = g.h;
7059
7060 // moved conversion to after successful load so that the same
7061 // can be done for multiple frames.
7062 if (req_comp && req_comp != 4)
7063 u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
7064 } else if (g.out) {
7065 // if there was an error and we allocated an image buffer, free it!
7066 STBI_FREE(g.out);
7067 }
7068
7069 // free buffers needed for multiple frame loading;
7070 STBI_FREE(g.history);
7071 STBI_FREE(g.background);
7072
7073 return u;
7074}
7075
7076static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
7077{
7078 return stbi__gif_info_raw(s,x,y,comp);
7079}
7080#endif
7081
7082// *************************************************************************************************
7083// Radiance RGBE HDR loader
7084// originally by Nicolas Schulz
7085#ifndef STBI_NO_HDR
7086static int stbi__hdr_test_core(stbi__context *s, const char *signature)
7087{
7088 int i;
7089 for (i=0; signature[i]; ++i)
7090 if (stbi__get8(s) != signature[i])
7091 return 0;
7092 stbi__rewind(s);
7093 return 1;
7094}
7095
7096static int stbi__hdr_test(stbi__context* s)
7097{
7098 int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
7099 stbi__rewind(s);
7100 if(!r) {
7101 r = stbi__hdr_test_core(s, "#?RGBE\n");
7102 stbi__rewind(s);
7103 }
7104 return r;
7105}
7106
7107#define STBI__HDR_BUFLEN 1024
7108static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
7109{
7110 int len=0;
7111 char c = '\0';
7112
7113 c = (char) stbi__get8(z);
7114
7115 while (!stbi__at_eof(z) && c != '\n') {
7116 buffer[len++] = c;
7117 if (len == STBI__HDR_BUFLEN-1) {
7118 // flush to end of line
7119 while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
7120 ;
7121 break;
7122 }
7123 c = (char) stbi__get8(z);
7124 }
7125
7126 buffer[len] = 0;
7127 return buffer;
7128}
7129
7130static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
7131{
7132 if ( input[3] != 0 ) {
7133 float f1;
7134 // Exponent
7135 f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
7136 if (req_comp <= 2)
7137 output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
7138 else {
7139 output[0] = input[0] * f1;
7140 output[1] = input[1] * f1;
7141 output[2] = input[2] * f1;
7142 }
7143 if (req_comp == 2) output[1] = 1;
7144 if (req_comp == 4) output[3] = 1;
7145 } else {
7146 switch (req_comp) {
7147 case 4: output[3] = 1; /* fallthrough */
7148 case 3: output[0] = output[1] = output[2] = 0;
7149 break;
7150 case 2: output[1] = 1; /* fallthrough */
7151 case 1: output[0] = 0;
7152 break;
7153 }
7154 }
7155}
7156
7157static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
7158{
7159 char buffer[STBI__HDR_BUFLEN];
7160 char *token;
7161 int valid = 0;
7162 int width, height;
7163 stbi_uc *scanline;
7164 float *hdr_data;
7165 int len;
7166 unsigned char count, value;
7167 int i, j, k, c1,c2, z;
7168 const char *headerToken;
7169 STBI_NOTUSED(ri);
7170
7171 // Check identifier
7172 headerToken = stbi__hdr_gettoken(s,buffer);
7173 if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
7174 return stbi__errpf("not HDR", "Corrupt HDR image");
7175
7176 // Parse header
7177 for(;;) {
7178 token = stbi__hdr_gettoken(s,buffer);
7179 if (token[0] == 0) break;
7180 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
7181 }
7182
7183 if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format");
7184
7185 // Parse width and height
7186 // can't use sscanf() if we're not using stdio!
7187 token = stbi__hdr_gettoken(s,buffer);
7188 if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
7189 token += 3;
7190 height = (int) strtol(token, &token, 10);
7191 while (*token == ' ') ++token;
7192 if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
7193 token += 3;
7194 width = (int) strtol(token, NULL, 10);
7195
7196 if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
7197 if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
7198
7199 *x = width;
7200 *y = height;
7201
7202 if (comp) *comp = 3;
7203 if (req_comp == 0) req_comp = 3;
7204
7205 if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
7206 return stbi__errpf("too large", "HDR image is too large");
7207
7208 // Read data
7209 hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
7210 if (!hdr_data)
7211 return stbi__errpf("outofmem", "Out of memory");
7212
7213 // Load image data
7214 // image data is stored as some number of sca
7215 if ( width < 8 || width >= 32768) {
7216 // Read flat data
7217 for (j=0; j < height; ++j) {
7218 for (i=0; i < width; ++i) {
7219 stbi_uc rgbe[4];
7220 main_decode_loop:
7221 stbi__getn(s, rgbe, 4);
7222 stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
7223 }
7224 }
7225 } else {
7226 // Read RLE-encoded data
7227 scanline = NULL;
7228
7229 for (j = 0; j < height; ++j) {
7230 c1 = stbi__get8(s);
7231 c2 = stbi__get8(s);
7232 len = stbi__get8(s);
7233 if (c1 != 2 || c2 != 2 || (len & 0x80)) {
7234 // not run-length encoded, so we have to actually use THIS data as a decoded
7235 // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
7236 stbi_uc rgbe[4];
7237 rgbe[0] = (stbi_uc) c1;
7238 rgbe[1] = (stbi_uc) c2;
7239 rgbe[2] = (stbi_uc) len;
7240 rgbe[3] = (stbi_uc) stbi__get8(s);
7241 stbi__hdr_convert(hdr_data, rgbe, req_comp);
7242 i = 1;
7243 j = 0;
7244 STBI_FREE(scanline);
7245 goto main_decode_loop; // yes, this makes no sense
7246 }
7247 len <<= 8;
7248 len |= stbi__get8(s);
7249 if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
7250 if (scanline == NULL) {
7251 scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
7252 if (!scanline) {
7253 STBI_FREE(hdr_data);
7254 return stbi__errpf("outofmem", "Out of memory");
7255 }
7256 }
7257
7258 for (k = 0; k < 4; ++k) {
7259 int nleft;
7260 i = 0;
7261 while ((nleft = width - i) > 0) {
7262 count = stbi__get8(s);
7263 if (count > 128) {
7264 // Run
7265 value = stbi__get8(s);
7266 count -= 128;
7267 if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
7268 for (z = 0; z < count; ++z)
7269 scanline[i++ * 4 + k] = value;
7270 } else {
7271 // Dump
7272 if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
7273 for (z = 0; z < count; ++z)
7274 scanline[i++ * 4 + k] = stbi__get8(s);
7275 }
7276 }
7277 }
7278 for (i=0; i < width; ++i)
7279 stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
7280 }
7281 if (scanline)
7282 STBI_FREE(scanline);
7283 }
7284
7285 return hdr_data;
7286}
7287
7288static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
7289{
7290 char buffer[STBI__HDR_BUFLEN];
7291 char *token;
7292 int valid = 0;
7293 int dummy;
7294
7295 if (!x) x = &dummy;
7296 if (!y) y = &dummy;
7297 if (!comp) comp = &dummy;
7298
7299 if (stbi__hdr_test(s) == 0) {
7300 stbi__rewind( s );
7301 return 0;
7302 }
7303
7304 for(;;) {
7305 token = stbi__hdr_gettoken(s,buffer);
7306 if (token[0] == 0) break;
7307 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
7308 }
7309
7310 if (!valid) {
7311 stbi__rewind( s );
7312 return 0;
7313 }
7314 token = stbi__hdr_gettoken(s,buffer);
7315 if (strncmp(token, "-Y ", 3)) {
7316 stbi__rewind( s );
7317 return 0;
7318 }
7319 token += 3;
7320 *y = (int) strtol(token, &token, 10);
7321 while (*token == ' ') ++token;
7322 if (strncmp(token, "+X ", 3)) {
7323 stbi__rewind( s );
7324 return 0;
7325 }
7326 token += 3;
7327 *x = (int) strtol(token, NULL, 10);
7328 *comp = 3;
7329 return 1;
7330}
7331#endif // STBI_NO_HDR
7332
7333#ifndef STBI_NO_BMP
7334static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
7335{
7336 void *p;
7337 stbi__bmp_data info;
7338
7339 info.all_a = 255;
7340 p = stbi__bmp_parse_header(s, &info);
7341 if (p == NULL) {
7342 stbi__rewind( s );
7343 return 0;
7344 }
7345 if (x) *x = s->img_x;
7346 if (y) *y = s->img_y;
7347 if (comp) {
7348 if (info.bpp == 24 && info.ma == 0xff000000)
7349 *comp = 3;
7350 else
7351 *comp = info.ma ? 4 : 3;
7352 }
7353 return 1;
7354}
7355#endif
7356
7357#ifndef STBI_NO_PSD
7358static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
7359{
7360 int channelCount, dummy, depth;
7361 if (!x) x = &dummy;
7362 if (!y) y = &dummy;
7363 if (!comp) comp = &dummy;
7364 if (stbi__get32be(s) != 0x38425053) {
7365 stbi__rewind( s );
7366 return 0;
7367 }
7368 if (stbi__get16be(s) != 1) {
7369 stbi__rewind( s );
7370 return 0;
7371 }
7372 stbi__skip(s, 6);
7373 channelCount = stbi__get16be(s);
7374 if (channelCount < 0 || channelCount > 16) {
7375 stbi__rewind( s );
7376 return 0;
7377 }
7378 *y = stbi__get32be(s);
7379 *x = stbi__get32be(s);
7380 depth = stbi__get16be(s);
7381 if (depth != 8 && depth != 16) {
7382 stbi__rewind( s );
7383 return 0;
7384 }
7385 if (stbi__get16be(s) != 3) {
7386 stbi__rewind( s );
7387 return 0;
7388 }
7389 *comp = 4;
7390 return 1;
7391}
7392
7393static int stbi__psd_is16(stbi__context *s)
7394{
7395 int channelCount, depth;
7396 if (stbi__get32be(s) != 0x38425053) {
7397 stbi__rewind( s );
7398 return 0;
7399 }
7400 if (stbi__get16be(s) != 1) {
7401 stbi__rewind( s );
7402 return 0;
7403 }
7404 stbi__skip(s, 6);
7405 channelCount = stbi__get16be(s);
7406 if (channelCount < 0 || channelCount > 16) {
7407 stbi__rewind( s );
7408 return 0;
7409 }
7410 STBI_NOTUSED(stbi__get32be(s));
7411 STBI_NOTUSED(stbi__get32be(s));
7412 depth = stbi__get16be(s);
7413 if (depth != 16) {
7414 stbi__rewind( s );
7415 return 0;
7416 }
7417 return 1;
7418}
7419#endif
7420
7421#ifndef STBI_NO_PIC
7422static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
7423{
7424 int act_comp=0,num_packets=0,chained,dummy;
7425 stbi__pic_packet packets[10];
7426
7427 if (!x) x = &dummy;
7428 if (!y) y = &dummy;
7429 if (!comp) comp = &dummy;
7430
7431 if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
7432 stbi__rewind(s);
7433 return 0;
7434 }
7435
7436 stbi__skip(s, 88);
7437
7438 *x = stbi__get16be(s);
7439 *y = stbi__get16be(s);
7440 if (stbi__at_eof(s)) {
7441 stbi__rewind( s);
7442 return 0;
7443 }
7444 if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
7445 stbi__rewind( s );
7446 return 0;
7447 }
7448
7449 stbi__skip(s, 8);
7450
7451 do {
7452 stbi__pic_packet *packet;
7453
7454 if (num_packets==sizeof(packets)/sizeof(packets[0]))
7455 return 0;
7456
7457 packet = &packets[num_packets++];
7458 chained = stbi__get8(s);
7459 packet->size = stbi__get8(s);
7460 packet->type = stbi__get8(s);
7461 packet->channel = stbi__get8(s);
7462 act_comp |= packet->channel;
7463
7464 if (stbi__at_eof(s)) {
7465 stbi__rewind( s );
7466 return 0;
7467 }
7468 if (packet->size != 8) {
7469 stbi__rewind( s );
7470 return 0;
7471 }
7472 } while (chained);
7473
7474 *comp = (act_comp & 0x10 ? 4 : 3);
7475
7476 return 1;
7477}
7478#endif
7479
7480// *************************************************************************************************
7481// Portable Gray Map and Portable Pixel Map loader
7482// by Ken Miller
7483//
7484// PGM: http://netpbm.sourceforge.net/doc/pgm.html
7485// PPM: http://netpbm.sourceforge.net/doc/ppm.html
7486//
7487// Known limitations:
7488// Does not support comments in the header section
7489// Does not support ASCII image data (formats P2 and P3)
7490
7491#ifndef STBI_NO_PNM
7492
7493static int stbi__pnm_test(stbi__context *s)
7494{
7495 char p, t;
7496 p = (char) stbi__get8(s);
7497 t = (char) stbi__get8(s);
7498 if (p != 'P' || (t != '5' && t != '6')) {
7499 stbi__rewind( s );
7500 return 0;
7501 }
7502 return 1;
7503}
7504
7505static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
7506{
7507 stbi_uc *out;
7508 STBI_NOTUSED(ri);
7509
7510 ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n);
7511 if (ri->bits_per_channel == 0)
7512 return 0;
7513
7514 if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
7515 if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
7516
7517 *x = s->img_x;
7518 *y = s->img_y;
7519 if (comp) *comp = s->img_n;
7520
7521 if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0))
7522 return stbi__errpuc("too large", "PNM too large");
7523
7524 out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
7525 if (!out) return stbi__errpuc("outofmem", "Out of memory");
7526 if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) {
7527 STBI_FREE(out);
7528 return stbi__errpuc("bad PNM", "PNM file truncated");
7529 }
7530
7531 if (req_comp && req_comp != s->img_n) {
7532 if (ri->bits_per_channel == 16) {
7533 out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y);
7534 } else {
7535 out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
7536 }
7537 if (out == NULL) return out; // stbi__convert_format frees input on failure
7538 }
7539 return out;
7540}
7541
7542static int stbi__pnm_isspace(char c)
7543{
7544 return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
7545}
7546
7547static void stbi__pnm_skip_whitespace(stbi__context *s, char *c)
7548{
7549 for (;;) {
7550 while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
7551 *c = (char) stbi__get8(s);
7552
7553 if (stbi__at_eof(s) || *c != '#')
7554 break;
7555
7556 while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
7557 *c = (char) stbi__get8(s);
7558 }
7559}
7560
7561static int stbi__pnm_isdigit(char c)
7562{
7563 return c >= '0' && c <= '9';
7564}
7565
7566static int stbi__pnm_getinteger(stbi__context *s, char *c)
7567{
7568 int value = 0;
7569
7570 while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
7571 value = value*10 + (*c - '0');
7572 *c = (char) stbi__get8(s);
7573 if((value > 214748364) || (value == 214748364 && *c > '7'))
7574 return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int");
7575 }
7576
7577 return value;
7578}
7579
7580static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
7581{
7582 int maxv, dummy;
7583 char c, p, t;
7584
7585 if (!x) x = &dummy;
7586 if (!y) y = &dummy;
7587 if (!comp) comp = &dummy;
7588
7589 stbi__rewind(s);
7590
7591 // Get identifier
7592 p = (char) stbi__get8(s);
7593 t = (char) stbi__get8(s);
7594 if (p != 'P' || (t != '5' && t != '6')) {
7595 stbi__rewind(s);
7596 return 0;
7597 }
7598
7599 *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm
7600
7601 c = (char) stbi__get8(s);
7602 stbi__pnm_skip_whitespace(s, &c);
7603
7604 *x = stbi__pnm_getinteger(s, &c); // read width
7605 if(*x == 0)
7606 return stbi__err("invalid width", "PPM image header had zero or overflowing width");
7607 stbi__pnm_skip_whitespace(s, &c);
7608
7609 *y = stbi__pnm_getinteger(s, &c); // read height
7610 if (*y == 0)
7611 return stbi__err("invalid width", "PPM image header had zero or overflowing width");
7612 stbi__pnm_skip_whitespace(s, &c);
7613
7614 maxv = stbi__pnm_getinteger(s, &c); // read max value
7615 if (maxv > 65535)
7616 return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images");
7617 else if (maxv > 255)
7618 return 16;
7619 else
7620 return 8;
7621}
7622
7623static int stbi__pnm_is16(stbi__context *s)
7624{
7625 if (stbi__pnm_info(s, NULL, NULL, NULL) == 16)
7626 return 1;
7627 return 0;
7628}
7629#endif
7630
7631static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
7632{
7633 #ifndef STBI_NO_JPEG
7634 if (stbi__jpeg_info(s, x, y, comp)) return 1;
7635 #endif
7636
7637 #ifndef STBI_NO_PNG
7638 if (stbi__png_info(s, x, y, comp)) return 1;
7639 #endif
7640
7641 #ifndef STBI_NO_GIF
7642 if (stbi__gif_info(s, x, y, comp)) return 1;
7643 #endif
7644
7645 #ifndef STBI_NO_BMP
7646 if (stbi__bmp_info(s, x, y, comp)) return 1;
7647 #endif
7648
7649 #ifndef STBI_NO_PSD
7650 if (stbi__psd_info(s, x, y, comp)) return 1;
7651 #endif
7652
7653 #ifndef STBI_NO_PIC
7654 if (stbi__pic_info(s, x, y, comp)) return 1;
7655 #endif
7656
7657 #ifndef STBI_NO_PNM
7658 if (stbi__pnm_info(s, x, y, comp)) return 1;
7659 #endif
7660
7661 #ifndef STBI_NO_HDR
7662 if (stbi__hdr_info(s, x, y, comp)) return 1;
7663 #endif
7664
7665 // test tga last because it's a crappy test!
7666 #ifndef STBI_NO_TGA
7667 if (stbi__tga_info(s, x, y, comp))
7668 return 1;
7669 #endif
7670 return stbi__err("unknown image type", "Image not of any known type, or corrupt");
7671}
7672
7673static int stbi__is_16_main(stbi__context *s)
7674{
7675 #ifndef STBI_NO_PNG
7676 if (stbi__png_is16(s)) return 1;
7677 #endif
7678
7679 #ifndef STBI_NO_PSD
7680 if (stbi__psd_is16(s)) return 1;
7681 #endif
7682
7683 #ifndef STBI_NO_PNM
7684 if (stbi__pnm_is16(s)) return 1;
7685 #endif
7686 return 0;
7687}
7688
7689#ifndef STBI_NO_STDIO
7690STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
7691{
7692 FILE *f = stbi__fopen(filename, "rb");
7693 int result;
7694 if (!f) return stbi__err("can't fopen", "Unable to open file");
7695 result = stbi_info_from_file(f, x, y, comp);
7696 fclose(f);
7697 return result;
7698}
7699
7700STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
7701{
7702 int r;
7703 stbi__context s;
7704 long pos = ftell(f);
7705 stbi__start_file(&s, f);
7706 r = stbi__info_main(&s,x,y,comp);
7707 fseek(f,pos,SEEK_SET);
7708 return r;
7709}
7710
7711STBIDEF int stbi_is_16_bit(char const *filename)
7712{
7713 FILE *f = stbi__fopen(filename, "rb");
7714 int result;
7715 if (!f) return stbi__err("can't fopen", "Unable to open file");
7716 result = stbi_is_16_bit_from_file(f);
7717 fclose(f);
7718 return result;
7719}
7720
7721STBIDEF int stbi_is_16_bit_from_file(FILE *f)
7722{
7723 int r;
7724 stbi__context s;
7725 long pos = ftell(f);
7726 stbi__start_file(&s, f);
7727 r = stbi__is_16_main(&s);
7728 fseek(f,pos,SEEK_SET);
7729 return r;
7730}
7731#endif // !STBI_NO_STDIO
7732
7733STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
7734{
7735 stbi__context s;
7736 stbi__start_mem(&s,buffer,len);
7737 return stbi__info_main(&s,x,y,comp);
7738}
7739
7740STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
7741{
7742 stbi__context s;
7743 stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
7744 return stbi__info_main(&s,x,y,comp);
7745}
7746
7747STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
7748{
7749 stbi__context s;
7750 stbi__start_mem(&s,buffer,len);
7751 return stbi__is_16_main(&s);
7752}
7753
7754STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
7755{
7756 stbi__context s;
7757 stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
7758 return stbi__is_16_main(&s);
7759}
7760
7761#endif // STB_IMAGE_IMPLEMENTATION
7762
548/* 7763/*
549 revision history: 7764 revision history:
550 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs 7765 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
diff --git a/externals/stb/stb_image_resize.cpp b/externals/stb/stb_image_resize.cpp
deleted file mode 100644
index 6f023629e..000000000
--- a/externals/stb/stb_image_resize.cpp
+++ /dev/null
@@ -1,2282 +0,0 @@
1// SPDX-FileCopyrightText: Jorge L Rodriguez
2// SPDX-License-Identifier: MIT
3
4/* stb_image_resize - v0.97 - public domain image resizing
5 by Jorge L Rodriguez (@VinoBS) - 2014
6 http://github.com/nothings/stb
7
8 CONTRIBUTORS
9 Jorge L Rodriguez: Implementation
10 Sean Barrett: API design, optimizations
11 Aras Pranckevicius: bugfix
12 Nathan Reed: warning fixes
13
14 REVISIONS
15 0.97 (2020-02-02) fixed warning
16 0.96 (2019-03-04) fixed warnings
17 0.95 (2017-07-23) fixed warnings
18 0.94 (2017-03-18) fixed warnings
19 0.93 (2017-03-03) fixed bug with certain combinations of heights
20 0.92 (2017-01-02) fix integer overflow on large (>2GB) images
21 0.91 (2016-04-02) fix warnings; fix handling of subpixel regions
22 0.90 (2014-09-17) first released version
23
24 LICENSE
25 See end of file for license information.
26
27 TODO
28 Don't decode all of the image data when only processing a partial tile
29 Don't use full-width decode buffers when only processing a partial tile
30 When processing wide images, break processing into tiles so data fits in L1 cache
31 Installable filters?
32 Resize that respects alpha test coverage
33 (Reference code: FloatImage::alphaTestCoverage and FloatImage::scaleAlphaToCoverage:
34 https://code.google.com/p/nvidia-texture-tools/source/browse/trunk/src/nvimage/FloatImage.cpp )
35*/
36
37#include <stb_image_resize.h>
38
39#ifndef STBIR_ASSERT
40#include <assert.h>
41#define STBIR_ASSERT(x) assert(x)
42#endif
43
44// For memset
45#include <string.h>
46
47#include <math.h>
48
49#ifndef STBIR_MALLOC
50#include <stdlib.h>
51// use comma operator to evaluate c, to avoid "unused parameter" warnings
52#define STBIR_MALLOC(size,c) ((void)(c), malloc(size))
53#define STBIR_FREE(ptr,c) ((void)(c), free(ptr))
54#endif
55
56#ifndef _MSC_VER
57#ifdef __cplusplus
58#define stbir__inline inline
59#else
60#define stbir__inline
61#endif
62#else
63#define stbir__inline __forceinline
64#endif
65
66
67// should produce compiler error if size is wrong
68typedef unsigned char stbir__validate_uint32[sizeof(stbir_uint32) == 4 ? 1 : -1];
69
70#ifdef _MSC_VER
71#define STBIR__NOTUSED(v) (void)(v)
72#else
73#define STBIR__NOTUSED(v) (void)sizeof(v)
74#endif
75
76#define STBIR__ARRAY_SIZE(a) (sizeof((a))/sizeof((a)[0]))
77
78#ifndef STBIR_DEFAULT_FILTER_UPSAMPLE
79#define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_CATMULLROM
80#endif
81
82#ifndef STBIR_DEFAULT_FILTER_DOWNSAMPLE
83#define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_MITCHELL
84#endif
85
86#ifndef STBIR_PROGRESS_REPORT
87#define STBIR_PROGRESS_REPORT(float_0_to_1)
88#endif
89
90#ifndef STBIR_MAX_CHANNELS
91#define STBIR_MAX_CHANNELS 64
92#endif
93
94#if STBIR_MAX_CHANNELS > 65536
95#error "Too many channels; STBIR_MAX_CHANNELS must be no more than 65536."
96// because we store the indices in 16-bit variables
97#endif
98
99// This value is added to alpha just before premultiplication to avoid
100// zeroing out color values. It is equivalent to 2^-80. If you don't want
101// that behavior (it may interfere if you have floating point images with
102// very small alpha values) then you can define STBIR_NO_ALPHA_EPSILON to
103// disable it.
104#ifndef STBIR_ALPHA_EPSILON
105#define STBIR_ALPHA_EPSILON ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20))
106#endif
107
108
109
110#ifdef _MSC_VER
111#define STBIR__UNUSED_PARAM(v) (void)(v)
112#else
113#define STBIR__UNUSED_PARAM(v) (void)sizeof(v)
114#endif
115
116// must match stbir_datatype
117static unsigned char stbir__type_size[] = {
118 1, // STBIR_TYPE_UINT8
119 2, // STBIR_TYPE_UINT16
120 4, // STBIR_TYPE_UINT32
121 4, // STBIR_TYPE_FLOAT
122};
123
124// Kernel function centered at 0
125typedef float (stbir__kernel_fn)(float x, float scale);
126typedef float (stbir__support_fn)(float scale);
127
128typedef struct
129{
130 stbir__kernel_fn* kernel;
131 stbir__support_fn* support;
132} stbir__filter_info;
133
134// When upsampling, the contributors are which source pixels contribute.
135// When downsampling, the contributors are which destination pixels are contributed to.
136typedef struct
137{
138 int n0; // First contributing pixel
139 int n1; // Last contributing pixel
140} stbir__contributors;
141
142typedef struct
143{
144 const void* input_data;
145 int input_w;
146 int input_h;
147 int input_stride_bytes;
148
149 void* output_data;
150 int output_w;
151 int output_h;
152 int output_stride_bytes;
153
154 float s0, t0, s1, t1;
155
156 float horizontal_shift; // Units: output pixels
157 float vertical_shift; // Units: output pixels
158 float horizontal_scale;
159 float vertical_scale;
160
161 int channels;
162 int alpha_channel;
163 stbir_uint32 flags;
164 stbir_datatype type;
165 stbir_filter horizontal_filter;
166 stbir_filter vertical_filter;
167 stbir_edge edge_horizontal;
168 stbir_edge edge_vertical;
169 stbir_colorspace colorspace;
170
171 stbir__contributors* horizontal_contributors;
172 float* horizontal_coefficients;
173
174 stbir__contributors* vertical_contributors;
175 float* vertical_coefficients;
176
177 int decode_buffer_pixels;
178 float* decode_buffer;
179
180 float* horizontal_buffer;
181
182 // cache these because ceil/floor are inexplicably showing up in profile
183 int horizontal_coefficient_width;
184 int vertical_coefficient_width;
185 int horizontal_filter_pixel_width;
186 int vertical_filter_pixel_width;
187 int horizontal_filter_pixel_margin;
188 int vertical_filter_pixel_margin;
189 int horizontal_num_contributors;
190 int vertical_num_contributors;
191
192 int ring_buffer_length_bytes; // The length of an individual entry in the ring buffer. The total number of ring buffers is stbir__get_filter_pixel_width(filter)
193 int ring_buffer_num_entries; // Total number of entries in the ring buffer.
194 int ring_buffer_first_scanline;
195 int ring_buffer_last_scanline;
196 int ring_buffer_begin_index; // first_scanline is at this index in the ring buffer
197 float* ring_buffer;
198
199 float* encode_buffer; // A temporary buffer to store floats so we don't lose precision while we do multiply-adds.
200
201 int horizontal_contributors_size;
202 int horizontal_coefficients_size;
203 int vertical_contributors_size;
204 int vertical_coefficients_size;
205 int decode_buffer_size;
206 int horizontal_buffer_size;
207 int ring_buffer_size;
208 int encode_buffer_size;
209} stbir__info;
210
211
212static const float stbir__max_uint8_as_float = 255.0f;
213static const float stbir__max_uint16_as_float = 65535.0f;
214static const double stbir__max_uint32_as_float = 4294967295.0;
215
216
217static stbir__inline int stbir__min(int a, int b)
218{
219 return a < b ? a : b;
220}
221
222static stbir__inline float stbir__saturate(float x)
223{
224 if (x < 0)
225 return 0;
226
227 if (x > 1)
228 return 1;
229
230 return x;
231}
232
233#ifdef STBIR_SATURATE_INT
234static stbir__inline stbir_uint8 stbir__saturate8(int x)
235{
236 if ((unsigned int) x <= 255)
237 return x;
238
239 if (x < 0)
240 return 0;
241
242 return 255;
243}
244
245static stbir__inline stbir_uint16 stbir__saturate16(int x)
246{
247 if ((unsigned int) x <= 65535)
248 return x;
249
250 if (x < 0)
251 return 0;
252
253 return 65535;
254}
255#endif
256
257static float stbir__srgb_uchar_to_linear_float[256] = {
258 0.000000f, 0.000304f, 0.000607f, 0.000911f, 0.001214f, 0.001518f, 0.001821f, 0.002125f, 0.002428f, 0.002732f, 0.003035f,
259 0.003347f, 0.003677f, 0.004025f, 0.004391f, 0.004777f, 0.005182f, 0.005605f, 0.006049f, 0.006512f, 0.006995f, 0.007499f,
260 0.008023f, 0.008568f, 0.009134f, 0.009721f, 0.010330f, 0.010960f, 0.011612f, 0.012286f, 0.012983f, 0.013702f, 0.014444f,
261 0.015209f, 0.015996f, 0.016807f, 0.017642f, 0.018500f, 0.019382f, 0.020289f, 0.021219f, 0.022174f, 0.023153f, 0.024158f,
262 0.025187f, 0.026241f, 0.027321f, 0.028426f, 0.029557f, 0.030713f, 0.031896f, 0.033105f, 0.034340f, 0.035601f, 0.036889f,
263 0.038204f, 0.039546f, 0.040915f, 0.042311f, 0.043735f, 0.045186f, 0.046665f, 0.048172f, 0.049707f, 0.051269f, 0.052861f,
264 0.054480f, 0.056128f, 0.057805f, 0.059511f, 0.061246f, 0.063010f, 0.064803f, 0.066626f, 0.068478f, 0.070360f, 0.072272f,
265 0.074214f, 0.076185f, 0.078187f, 0.080220f, 0.082283f, 0.084376f, 0.086500f, 0.088656f, 0.090842f, 0.093059f, 0.095307f,
266 0.097587f, 0.099899f, 0.102242f, 0.104616f, 0.107023f, 0.109462f, 0.111932f, 0.114435f, 0.116971f, 0.119538f, 0.122139f,
267 0.124772f, 0.127438f, 0.130136f, 0.132868f, 0.135633f, 0.138432f, 0.141263f, 0.144128f, 0.147027f, 0.149960f, 0.152926f,
268 0.155926f, 0.158961f, 0.162029f, 0.165132f, 0.168269f, 0.171441f, 0.174647f, 0.177888f, 0.181164f, 0.184475f, 0.187821f,
269 0.191202f, 0.194618f, 0.198069f, 0.201556f, 0.205079f, 0.208637f, 0.212231f, 0.215861f, 0.219526f, 0.223228f, 0.226966f,
270 0.230740f, 0.234551f, 0.238398f, 0.242281f, 0.246201f, 0.250158f, 0.254152f, 0.258183f, 0.262251f, 0.266356f, 0.270498f,
271 0.274677f, 0.278894f, 0.283149f, 0.287441f, 0.291771f, 0.296138f, 0.300544f, 0.304987f, 0.309469f, 0.313989f, 0.318547f,
272 0.323143f, 0.327778f, 0.332452f, 0.337164f, 0.341914f, 0.346704f, 0.351533f, 0.356400f, 0.361307f, 0.366253f, 0.371238f,
273 0.376262f, 0.381326f, 0.386430f, 0.391573f, 0.396755f, 0.401978f, 0.407240f, 0.412543f, 0.417885f, 0.423268f, 0.428691f,
274 0.434154f, 0.439657f, 0.445201f, 0.450786f, 0.456411f, 0.462077f, 0.467784f, 0.473532f, 0.479320f, 0.485150f, 0.491021f,
275 0.496933f, 0.502887f, 0.508881f, 0.514918f, 0.520996f, 0.527115f, 0.533276f, 0.539480f, 0.545725f, 0.552011f, 0.558340f,
276 0.564712f, 0.571125f, 0.577581f, 0.584078f, 0.590619f, 0.597202f, 0.603827f, 0.610496f, 0.617207f, 0.623960f, 0.630757f,
277 0.637597f, 0.644480f, 0.651406f, 0.658375f, 0.665387f, 0.672443f, 0.679543f, 0.686685f, 0.693872f, 0.701102f, 0.708376f,
278 0.715694f, 0.723055f, 0.730461f, 0.737911f, 0.745404f, 0.752942f, 0.760525f, 0.768151f, 0.775822f, 0.783538f, 0.791298f,
279 0.799103f, 0.806952f, 0.814847f, 0.822786f, 0.830770f, 0.838799f, 0.846873f, 0.854993f, 0.863157f, 0.871367f, 0.879622f,
280 0.887923f, 0.896269f, 0.904661f, 0.913099f, 0.921582f, 0.930111f, 0.938686f, 0.947307f, 0.955974f, 0.964686f, 0.973445f,
281 0.982251f, 0.991102f, 1.0f
282};
283
284static float stbir__srgb_to_linear(float f)
285{
286 if (f <= 0.04045f)
287 return f / 12.92f;
288 else
289 return (float)pow((f + 0.055f) / 1.055f, 2.4f);
290}
291
292static float stbir__linear_to_srgb(float f)
293{
294 if (f <= 0.0031308f)
295 return f * 12.92f;
296 else
297 return 1.055f * (float)pow(f, 1 / 2.4f) - 0.055f;
298}
299
300#ifndef STBIR_NON_IEEE_FLOAT
301// From https://gist.github.com/rygorous/2203834
302
303typedef union
304{
305 stbir_uint32 u;
306 float f;
307} stbir__FP32;
308
309static const stbir_uint32 fp32_to_srgb8_tab4[104] = {
310 0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d, 0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d,
311 0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a, 0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a,
312 0x010e0033, 0x01280033, 0x01410033, 0x015b0033, 0x01750033, 0x018f0033, 0x01a80033, 0x01c20033,
313 0x01dc0067, 0x020f0067, 0x02430067, 0x02760067, 0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067,
314 0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce, 0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5,
315 0x06970158, 0x07420142, 0x07e30130, 0x087b0120, 0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2,
316 0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180, 0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143,
317 0x11070264, 0x1238023e, 0x1357021d, 0x14660201, 0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af,
318 0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad, 0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240,
319 0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392, 0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300,
320 0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5, 0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401,
321 0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d, 0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559,
322 0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f, 0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723,
323};
324
325static stbir_uint8 stbir__linear_to_srgb_uchar(float in)
326{
327 static const stbir__FP32 almostone = { 0x3f7fffff }; // 1-eps
328 static const stbir__FP32 minval = { (127-13) << 23 };
329 stbir_uint32 tab,bias,scale,t;
330 stbir__FP32 f;
331
332 // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
333 // The tests are carefully written so that NaNs map to 0, same as in the reference
334 // implementation.
335 if (!(in > minval.f)) // written this way to catch NaNs
336 in = minval.f;
337 if (in > almostone.f)
338 in = almostone.f;
339
340 // Do the table lookup and unpack bias, scale
341 f.f = in;
342 tab = fp32_to_srgb8_tab4[(f.u - minval.u) >> 20];
343 bias = (tab >> 16) << 9;
344 scale = tab & 0xffff;
345
346 // Grab next-highest mantissa bits and perform linear interpolation
347 t = (f.u >> 12) & 0xff;
348 return (unsigned char) ((bias + scale*t) >> 16);
349}
350
351#else
352// sRGB transition values, scaled by 1<<28
353static int stbir__srgb_offset_to_linear_scaled[256] =
354{
355 0, 40738, 122216, 203693, 285170, 366648, 448125, 529603,
356 611080, 692557, 774035, 855852, 942009, 1033024, 1128971, 1229926,
357 1335959, 1447142, 1563542, 1685229, 1812268, 1944725, 2082664, 2226148,
358 2375238, 2529996, 2690481, 2856753, 3028870, 3206888, 3390865, 3580856,
359 3776916, 3979100, 4187460, 4402049, 4622919, 4850123, 5083710, 5323731,
360 5570236, 5823273, 6082892, 6349140, 6622065, 6901714, 7188133, 7481369,
361 7781466, 8088471, 8402427, 8723380, 9051372, 9386448, 9728650, 10078021,
362 10434603, 10798439, 11169569, 11548036, 11933879, 12327139, 12727857, 13136073,
363 13551826, 13975156, 14406100, 14844697, 15290987, 15745007, 16206795, 16676389,
364 17153826, 17639142, 18132374, 18633560, 19142734, 19659934, 20185196, 20718552,
365 21260042, 21809696, 22367554, 22933648, 23508010, 24090680, 24681686, 25281066,
366 25888850, 26505076, 27129772, 27762974, 28404716, 29055026, 29713942, 30381490,
367 31057708, 31742624, 32436272, 33138682, 33849884, 34569912, 35298800, 36036568,
368 36783260, 37538896, 38303512, 39077136, 39859796, 40651528, 41452360, 42262316,
369 43081432, 43909732, 44747252, 45594016, 46450052, 47315392, 48190064, 49074096,
370 49967516, 50870356, 51782636, 52704392, 53635648, 54576432, 55526772, 56486700,
371 57456236, 58435408, 59424248, 60422780, 61431036, 62449032, 63476804, 64514376,
372 65561776, 66619028, 67686160, 68763192, 69850160, 70947088, 72053992, 73170912,
373 74297864, 75434880, 76581976, 77739184, 78906536, 80084040, 81271736, 82469648,
374 83677792, 84896192, 86124888, 87363888, 88613232, 89872928, 91143016, 92423512,
375 93714432, 95015816, 96327688, 97650056, 98982952, 100326408, 101680440, 103045072,
376 104420320, 105806224, 107202800, 108610064, 110028048, 111456776, 112896264, 114346544,
377 115807632, 117279552, 118762328, 120255976, 121760536, 123276016, 124802440, 126339832,
378 127888216, 129447616, 131018048, 132599544, 134192112, 135795792, 137410592, 139036528,
379 140673648, 142321952, 143981456, 145652208, 147334208, 149027488, 150732064, 152447968,
380 154175200, 155913792, 157663776, 159425168, 161197984, 162982240, 164777968, 166585184,
381 168403904, 170234160, 172075968, 173929344, 175794320, 177670896, 179559120, 181458992,
382 183370528, 185293776, 187228736, 189175424, 191133888, 193104112, 195086128, 197079968,
383 199085648, 201103184, 203132592, 205173888, 207227120, 209292272, 211369392, 213458480,
384 215559568, 217672656, 219797792, 221934976, 224084240, 226245600, 228419056, 230604656,
385 232802400, 235012320, 237234432, 239468736, 241715280, 243974080, 246245120, 248528464,
386 250824112, 253132064, 255452368, 257785040, 260130080, 262487520, 264857376, 267239664,
387};
388
389static stbir_uint8 stbir__linear_to_srgb_uchar(float f)
390{
391 int x = (int) (f * (1 << 28)); // has headroom so you don't need to clamp
392 int v = 0;
393 int i;
394
395 // Refine the guess with a short binary search.
396 i = v + 128; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
397 i = v + 64; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
398 i = v + 32; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
399 i = v + 16; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
400 i = v + 8; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
401 i = v + 4; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
402 i = v + 2; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
403 i = v + 1; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
404
405 return (stbir_uint8) v;
406}
407#endif
408
409static float stbir__filter_trapezoid(float x, float scale)
410{
411 float halfscale = scale / 2;
412 float t = 0.5f + halfscale;
413 STBIR_ASSERT(scale <= 1);
414
415 x = (float)fabs(x);
416
417 if (x >= t)
418 return 0;
419 else
420 {
421 float r = 0.5f - halfscale;
422 if (x <= r)
423 return 1;
424 else
425 return (t - x) / scale;
426 }
427}
428
429static float stbir__support_trapezoid(float scale)
430{
431 STBIR_ASSERT(scale <= 1);
432 return 0.5f + scale / 2;
433}
434
435static float stbir__filter_triangle(float x, float s)
436{
437 STBIR__UNUSED_PARAM(s);
438
439 x = (float)fabs(x);
440
441 if (x <= 1.0f)
442 return 1 - x;
443 else
444 return 0;
445}
446
447static float stbir__filter_cubic(float x, float s)
448{
449 STBIR__UNUSED_PARAM(s);
450
451 x = (float)fabs(x);
452
453 if (x < 1.0f)
454 return (4 + x*x*(3*x - 6))/6;
455 else if (x < 2.0f)
456 return (8 + x*(-12 + x*(6 - x)))/6;
457
458 return (0.0f);
459}
460
461static float stbir__filter_catmullrom(float x, float s)
462{
463 STBIR__UNUSED_PARAM(s);
464
465 x = (float)fabs(x);
466
467 if (x < 1.0f)
468 return 1 - x*x*(2.5f - 1.5f*x);
469 else if (x < 2.0f)
470 return 2 - x*(4 + x*(0.5f*x - 2.5f));
471
472 return (0.0f);
473}
474
475static float stbir__filter_mitchell(float x, float s)
476{
477 STBIR__UNUSED_PARAM(s);
478
479 x = (float)fabs(x);
480
481 if (x < 1.0f)
482 return (16 + x*x*(21 * x - 36))/18;
483 else if (x < 2.0f)
484 return (32 + x*(-60 + x*(36 - 7*x)))/18;
485
486 return (0.0f);
487}
488
489static float stbir__support_zero(float s)
490{
491 STBIR__UNUSED_PARAM(s);
492 return 0;
493}
494
495static float stbir__support_one(float s)
496{
497 STBIR__UNUSED_PARAM(s);
498 return 1;
499}
500
501static float stbir__support_two(float s)
502{
503 STBIR__UNUSED_PARAM(s);
504 return 2;
505}
506
507static stbir__filter_info stbir__filter_info_table[] = {
508 { NULL, stbir__support_zero },
509 { stbir__filter_trapezoid, stbir__support_trapezoid },
510 { stbir__filter_triangle, stbir__support_one },
511 { stbir__filter_cubic, stbir__support_two },
512 { stbir__filter_catmullrom, stbir__support_two },
513 { stbir__filter_mitchell, stbir__support_two },
514};
515
516stbir__inline static int stbir__use_upsampling(float ratio)
517{
518 return ratio > 1;
519}
520
521stbir__inline static int stbir__use_width_upsampling(stbir__info* stbir_info)
522{
523 return stbir__use_upsampling(stbir_info->horizontal_scale);
524}
525
526stbir__inline static int stbir__use_height_upsampling(stbir__info* stbir_info)
527{
528 return stbir__use_upsampling(stbir_info->vertical_scale);
529}
530
531// This is the maximum number of input samples that can affect an output sample
532// with the given filter
533static int stbir__get_filter_pixel_width(stbir_filter filter, float scale)
534{
535 STBIR_ASSERT(filter != 0);
536 STBIR_ASSERT(filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
537
538 if (stbir__use_upsampling(scale))
539 return (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2);
540 else
541 return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2 / scale);
542}
543
544// This is how much to expand buffers to account for filters seeking outside
545// the image boundaries.
546static int stbir__get_filter_pixel_margin(stbir_filter filter, float scale)
547{
548 return stbir__get_filter_pixel_width(filter, scale) / 2;
549}
550
551static int stbir__get_coefficient_width(stbir_filter filter, float scale)
552{
553 if (stbir__use_upsampling(scale))
554 return (int)ceil(stbir__filter_info_table[filter].support(1 / scale) * 2);
555 else
556 return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2);
557}
558
559static int stbir__get_contributors(float scale, stbir_filter filter, int input_size, int output_size)
560{
561 if (stbir__use_upsampling(scale))
562 return output_size;
563 else
564 return (input_size + stbir__get_filter_pixel_margin(filter, scale) * 2);
565}
566
567static int stbir__get_total_horizontal_coefficients(stbir__info* info)
568{
569 return info->horizontal_num_contributors
570 * stbir__get_coefficient_width (info->horizontal_filter, info->horizontal_scale);
571}
572
573static int stbir__get_total_vertical_coefficients(stbir__info* info)
574{
575 return info->vertical_num_contributors
576 * stbir__get_coefficient_width (info->vertical_filter, info->vertical_scale);
577}
578
579static stbir__contributors* stbir__get_contributor(stbir__contributors* contributors, int n)
580{
581 return &contributors[n];
582}
583
584// For perf reasons this code is duplicated in stbir__resample_horizontal_upsample/downsample,
585// if you change it here change it there too.
586static float* stbir__get_coefficient(float* coefficients, stbir_filter filter, float scale, int n, int c)
587{
588 int width = stbir__get_coefficient_width(filter, scale);
589 return &coefficients[width*n + c];
590}
591
592static int stbir__edge_wrap_slow(stbir_edge edge, int n, int max)
593{
594 switch (edge)
595 {
596 case STBIR_EDGE_ZERO:
597 return 0; // we'll decode the wrong pixel here, and then overwrite with 0s later
598
599 case STBIR_EDGE_CLAMP:
600 if (n < 0)
601 return 0;
602
603 if (n >= max)
604 return max - 1;
605
606 return n; // NOTREACHED
607
608 case STBIR_EDGE_REFLECT:
609 {
610 if (n < 0)
611 {
612 if (n < max)
613 return -n;
614 else
615 return max - 1;
616 }
617
618 if (n >= max)
619 {
620 int max2 = max * 2;
621 if (n >= max2)
622 return 0;
623 else
624 return max2 - n - 1;
625 }
626
627 return n; // NOTREACHED
628 }
629
630 case STBIR_EDGE_WRAP:
631 if (n >= 0)
632 return (n % max);
633 else
634 {
635 int m = (-n) % max;
636
637 if (m != 0)
638 m = max - m;
639
640 return (m);
641 }
642 // NOTREACHED
643
644 default:
645 STBIR_ASSERT(!"Unimplemented edge type");
646 return 0;
647 }
648}
649
650stbir__inline static int stbir__edge_wrap(stbir_edge edge, int n, int max)
651{
652 // avoid per-pixel switch
653 if (n >= 0 && n < max)
654 return n;
655 return stbir__edge_wrap_slow(edge, n, max);
656}
657
658// What input pixels contribute to this output pixel?
659static void stbir__calculate_sample_range_upsample(int n, float out_filter_radius, float scale_ratio, float out_shift, int* in_first_pixel, int* in_last_pixel, float* in_center_of_out)
660{
661 float out_pixel_center = (float)n + 0.5f;
662 float out_pixel_influence_lowerbound = out_pixel_center - out_filter_radius;
663 float out_pixel_influence_upperbound = out_pixel_center + out_filter_radius;
664
665 float in_pixel_influence_lowerbound = (out_pixel_influence_lowerbound + out_shift) / scale_ratio;
666 float in_pixel_influence_upperbound = (out_pixel_influence_upperbound + out_shift) / scale_ratio;
667
668 *in_center_of_out = (out_pixel_center + out_shift) / scale_ratio;
669 *in_first_pixel = (int)(floor(in_pixel_influence_lowerbound + 0.5));
670 *in_last_pixel = (int)(floor(in_pixel_influence_upperbound - 0.5));
671}
672
673// What output pixels does this input pixel contribute to?
674static void stbir__calculate_sample_range_downsample(int n, float in_pixels_radius, float scale_ratio, float out_shift, int* out_first_pixel, int* out_last_pixel, float* out_center_of_in)
675{
676 float in_pixel_center = (float)n + 0.5f;
677 float in_pixel_influence_lowerbound = in_pixel_center - in_pixels_radius;
678 float in_pixel_influence_upperbound = in_pixel_center + in_pixels_radius;
679
680 float out_pixel_influence_lowerbound = in_pixel_influence_lowerbound * scale_ratio - out_shift;
681 float out_pixel_influence_upperbound = in_pixel_influence_upperbound * scale_ratio - out_shift;
682
683 *out_center_of_in = in_pixel_center * scale_ratio - out_shift;
684 *out_first_pixel = (int)(floor(out_pixel_influence_lowerbound + 0.5));
685 *out_last_pixel = (int)(floor(out_pixel_influence_upperbound - 0.5));
686}
687
688static void stbir__calculate_coefficients_upsample(stbir_filter filter, float scale, int in_first_pixel, int in_last_pixel, float in_center_of_out, stbir__contributors* contributor, float* coefficient_group)
689{
690 int i;
691 float total_filter = 0;
692 float filter_scale;
693
694 STBIR_ASSERT(in_last_pixel - in_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical.
695
696 contributor->n0 = in_first_pixel;
697 contributor->n1 = in_last_pixel;
698
699 STBIR_ASSERT(contributor->n1 >= contributor->n0);
700
701 for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
702 {
703 float in_pixel_center = (float)(i + in_first_pixel) + 0.5f;
704 coefficient_group[i] = stbir__filter_info_table[filter].kernel(in_center_of_out - in_pixel_center, 1 / scale);
705
706 // If the coefficient is zero, skip it. (Don't do the <0 check here, we want the influence of those outside pixels.)
707 if (i == 0 && !coefficient_group[i])
708 {
709 contributor->n0 = ++in_first_pixel;
710 i--;
711 continue;
712 }
713
714 total_filter += coefficient_group[i];
715 }
716
717 // NOTE(fg): Not actually true in general, nor is there any reason to expect it should be.
718 // It would be true in exact math but is at best approximately true in floating-point math,
719 // and it would not make sense to try and put actual bounds on this here because it depends
720 // on the image aspect ratio which can get pretty extreme.
721 //STBIR_ASSERT(stbir__filter_info_table[filter].kernel((float)(in_last_pixel + 1) + 0.5f - in_center_of_out, 1/scale) == 0);
722
723 STBIR_ASSERT(total_filter > 0.9);
724 STBIR_ASSERT(total_filter < 1.1f); // Make sure it's not way off.
725
726 // Make sure the sum of all coefficients is 1.
727 filter_scale = 1 / total_filter;
728
729 for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
730 coefficient_group[i] *= filter_scale;
731
732 for (i = in_last_pixel - in_first_pixel; i >= 0; i--)
733 {
734 if (coefficient_group[i])
735 break;
736
737 // This line has no weight. We can skip it.
738 contributor->n1 = contributor->n0 + i - 1;
739 }
740}
741
742static void stbir__calculate_coefficients_downsample(stbir_filter filter, float scale_ratio, int out_first_pixel, int out_last_pixel, float out_center_of_in, stbir__contributors* contributor, float* coefficient_group)
743{
744 int i;
745
746 STBIR_ASSERT(out_last_pixel - out_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(scale_ratio) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical.
747
748 contributor->n0 = out_first_pixel;
749 contributor->n1 = out_last_pixel;
750
751 STBIR_ASSERT(contributor->n1 >= contributor->n0);
752
753 for (i = 0; i <= out_last_pixel - out_first_pixel; i++)
754 {
755 float out_pixel_center = (float)(i + out_first_pixel) + 0.5f;
756 float x = out_pixel_center - out_center_of_in;
757 coefficient_group[i] = stbir__filter_info_table[filter].kernel(x, scale_ratio) * scale_ratio;
758 }
759
760 // NOTE(fg): Not actually true in general, nor is there any reason to expect it should be.
761 // It would be true in exact math but is at best approximately true in floating-point math,
762 // and it would not make sense to try and put actual bounds on this here because it depends
763 // on the image aspect ratio which can get pretty extreme.
764 //STBIR_ASSERT(stbir__filter_info_table[filter].kernel((float)(out_last_pixel + 1) + 0.5f - out_center_of_in, scale_ratio) == 0);
765
766 for (i = out_last_pixel - out_first_pixel; i >= 0; i--)
767 {
768 if (coefficient_group[i])
769 break;
770
771 // This line has no weight. We can skip it.
772 contributor->n1 = contributor->n0 + i - 1;
773 }
774}
775
776static void stbir__normalize_downsample_coefficients(stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, int input_size, int output_size)
777{
778 int num_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size);
779 int num_coefficients = stbir__get_coefficient_width(filter, scale_ratio);
780 int i, j;
781 int skip;
782
783 for (i = 0; i < output_size; i++)
784 {
785 float scale;
786 float total = 0;
787
788 for (j = 0; j < num_contributors; j++)
789 {
790 if (i >= contributors[j].n0 && i <= contributors[j].n1)
791 {
792 float coefficient = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0);
793 total += coefficient;
794 }
795 else if (i < contributors[j].n0)
796 break;
797 }
798
799 STBIR_ASSERT(total > 0.9f);
800 STBIR_ASSERT(total < 1.1f);
801
802 scale = 1 / total;
803
804 for (j = 0; j < num_contributors; j++)
805 {
806 if (i >= contributors[j].n0 && i <= contributors[j].n1)
807 *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0) *= scale;
808 else if (i < contributors[j].n0)
809 break;
810 }
811 }
812
813 // Optimize: Skip zero coefficients and contributions outside of image bounds.
814 // Do this after normalizing because normalization depends on the n0/n1 values.
815 for (j = 0; j < num_contributors; j++)
816 {
817 int range, max, width;
818
819 skip = 0;
820 while (*stbir__get_coefficient(coefficients, filter, scale_ratio, j, skip) == 0)
821 skip++;
822
823 contributors[j].n0 += skip;
824
825 while (contributors[j].n0 < 0)
826 {
827 contributors[j].n0++;
828 skip++;
829 }
830
831 range = contributors[j].n1 - contributors[j].n0 + 1;
832 max = stbir__min(num_coefficients, range);
833
834 width = stbir__get_coefficient_width(filter, scale_ratio);
835 for (i = 0; i < max; i++)
836 {
837 if (i + skip >= width)
838 break;
839
840 *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i) = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i + skip);
841 }
842
843 continue;
844 }
845
846 // Using min to avoid writing into invalid pixels.
847 for (i = 0; i < num_contributors; i++)
848 contributors[i].n1 = stbir__min(contributors[i].n1, output_size - 1);
849}
850
851// Each scan line uses the same kernel values so we should calculate the kernel
852// values once and then we can use them for every scan line.
853static void stbir__calculate_filters(stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, float shift, int input_size, int output_size)
854{
855 int n;
856 int total_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size);
857
858 if (stbir__use_upsampling(scale_ratio))
859 {
860 float out_pixels_radius = stbir__filter_info_table[filter].support(1 / scale_ratio) * scale_ratio;
861
862 // Looping through out pixels
863 for (n = 0; n < total_contributors; n++)
864 {
865 float in_center_of_out; // Center of the current out pixel in the in pixel space
866 int in_first_pixel, in_last_pixel;
867
868 stbir__calculate_sample_range_upsample(n, out_pixels_radius, scale_ratio, shift, &in_first_pixel, &in_last_pixel, &in_center_of_out);
869
870 stbir__calculate_coefficients_upsample(filter, scale_ratio, in_first_pixel, in_last_pixel, in_center_of_out, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0));
871 }
872 }
873 else
874 {
875 float in_pixels_radius = stbir__filter_info_table[filter].support(scale_ratio) / scale_ratio;
876
877 // Looping through in pixels
878 for (n = 0; n < total_contributors; n++)
879 {
880 float out_center_of_in; // Center of the current out pixel in the in pixel space
881 int out_first_pixel, out_last_pixel;
882 int n_adjusted = n - stbir__get_filter_pixel_margin(filter, scale_ratio);
883
884 stbir__calculate_sample_range_downsample(n_adjusted, in_pixels_radius, scale_ratio, shift, &out_first_pixel, &out_last_pixel, &out_center_of_in);
885
886 stbir__calculate_coefficients_downsample(filter, scale_ratio, out_first_pixel, out_last_pixel, out_center_of_in, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0));
887 }
888
889 stbir__normalize_downsample_coefficients(contributors, coefficients, filter, scale_ratio, input_size, output_size);
890 }
891}
892
893static float* stbir__get_decode_buffer(stbir__info* stbir_info)
894{
895 // The 0 index of the decode buffer starts after the margin. This makes
896 // it okay to use negative indexes on the decode buffer.
897 return &stbir_info->decode_buffer[stbir_info->horizontal_filter_pixel_margin * stbir_info->channels];
898}
899
900#define STBIR__DECODE(type, colorspace) ((int)(type) * (STBIR_MAX_COLORSPACES) + (int)(colorspace))
901
902static void stbir__decode_scanline(stbir__info* stbir_info, int n)
903{
904 int c;
905 int channels = stbir_info->channels;
906 int alpha_channel = stbir_info->alpha_channel;
907 int type = stbir_info->type;
908 int colorspace = stbir_info->colorspace;
909 int input_w = stbir_info->input_w;
910 size_t input_stride_bytes = stbir_info->input_stride_bytes;
911 float* decode_buffer = stbir__get_decode_buffer(stbir_info);
912 stbir_edge edge_horizontal = stbir_info->edge_horizontal;
913 stbir_edge edge_vertical = stbir_info->edge_vertical;
914 size_t in_buffer_row_offset = stbir__edge_wrap(edge_vertical, n, stbir_info->input_h) * input_stride_bytes;
915 const void* input_data = (char *) stbir_info->input_data + in_buffer_row_offset;
916 int max_x = input_w + stbir_info->horizontal_filter_pixel_margin;
917 int decode = STBIR__DECODE(type, colorspace);
918
919 int x = -stbir_info->horizontal_filter_pixel_margin;
920
921 // special handling for STBIR_EDGE_ZERO because it needs to return an item that doesn't appear in the input,
922 // and we want to avoid paying overhead on every pixel if not STBIR_EDGE_ZERO
923 if (edge_vertical == STBIR_EDGE_ZERO && (n < 0 || n >= stbir_info->input_h))
924 {
925 for (; x < max_x; x++)
926 for (c = 0; c < channels; c++)
927 decode_buffer[x*channels + c] = 0;
928 return;
929 }
930
931 switch (decode)
932 {
933 case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR):
934 for (; x < max_x; x++)
935 {
936 int decode_pixel_index = x * channels;
937 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
938 for (c = 0; c < channels; c++)
939 decode_buffer[decode_pixel_index + c] = ((float)((const unsigned char*)input_data)[input_pixel_index + c]) / stbir__max_uint8_as_float;
940 }
941 break;
942
943 case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB):
944 for (; x < max_x; x++)
945 {
946 int decode_pixel_index = x * channels;
947 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
948 for (c = 0; c < channels; c++)
949 decode_buffer[decode_pixel_index + c] = stbir__srgb_uchar_to_linear_float[((const unsigned char*)input_data)[input_pixel_index + c]];
950
951 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
952 decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned char*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint8_as_float;
953 }
954 break;
955
956 case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR):
957 for (; x < max_x; x++)
958 {
959 int decode_pixel_index = x * channels;
960 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
961 for (c = 0; c < channels; c++)
962 decode_buffer[decode_pixel_index + c] = ((float)((const unsigned short*)input_data)[input_pixel_index + c]) / stbir__max_uint16_as_float;
963 }
964 break;
965
966 case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB):
967 for (; x < max_x; x++)
968 {
969 int decode_pixel_index = x * channels;
970 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
971 for (c = 0; c < channels; c++)
972 decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((float)((const unsigned short*)input_data)[input_pixel_index + c]) / stbir__max_uint16_as_float);
973
974 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
975 decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned short*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint16_as_float;
976 }
977 break;
978
979 case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR):
980 for (; x < max_x; x++)
981 {
982 int decode_pixel_index = x * channels;
983 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
984 for (c = 0; c < channels; c++)
985 decode_buffer[decode_pixel_index + c] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / stbir__max_uint32_as_float);
986 }
987 break;
988
989 case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB):
990 for (; x < max_x; x++)
991 {
992 int decode_pixel_index = x * channels;
993 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
994 for (c = 0; c < channels; c++)
995 decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear((float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / stbir__max_uint32_as_float));
996
997 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
998 decode_buffer[decode_pixel_index + alpha_channel] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint32_as_float);
999 }
1000 break;
1001
1002 case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR):
1003 for (; x < max_x; x++)
1004 {
1005 int decode_pixel_index = x * channels;
1006 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1007 for (c = 0; c < channels; c++)
1008 decode_buffer[decode_pixel_index + c] = ((const float*)input_data)[input_pixel_index + c];
1009 }
1010 break;
1011
1012 case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB):
1013 for (; x < max_x; x++)
1014 {
1015 int decode_pixel_index = x * channels;
1016 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1017 for (c = 0; c < channels; c++)
1018 decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((const float*)input_data)[input_pixel_index + c]);
1019
1020 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1021 decode_buffer[decode_pixel_index + alpha_channel] = ((const float*)input_data)[input_pixel_index + alpha_channel];
1022 }
1023
1024 break;
1025
1026 default:
1027 STBIR_ASSERT(!"Unknown type/colorspace/channels combination.");
1028 break;
1029 }
1030
1031 if (!(stbir_info->flags & STBIR_FLAG_ALPHA_PREMULTIPLIED))
1032 {
1033 for (x = -stbir_info->horizontal_filter_pixel_margin; x < max_x; x++)
1034 {
1035 int decode_pixel_index = x * channels;
1036
1037 // If the alpha value is 0 it will clobber the color values. Make sure it's not.
1038 float alpha = decode_buffer[decode_pixel_index + alpha_channel];
1039#ifndef STBIR_NO_ALPHA_EPSILON
1040 if (stbir_info->type != STBIR_TYPE_FLOAT) {
1041 alpha += STBIR_ALPHA_EPSILON;
1042 decode_buffer[decode_pixel_index + alpha_channel] = alpha;
1043 }
1044#endif
1045 for (c = 0; c < channels; c++)
1046 {
1047 if (c == alpha_channel)
1048 continue;
1049
1050 decode_buffer[decode_pixel_index + c] *= alpha;
1051 }
1052 }
1053 }
1054
1055 if (edge_horizontal == STBIR_EDGE_ZERO)
1056 {
1057 for (x = -stbir_info->horizontal_filter_pixel_margin; x < 0; x++)
1058 {
1059 for (c = 0; c < channels; c++)
1060 decode_buffer[x*channels + c] = 0;
1061 }
1062 for (x = input_w; x < max_x; x++)
1063 {
1064 for (c = 0; c < channels; c++)
1065 decode_buffer[x*channels + c] = 0;
1066 }
1067 }
1068}
1069
1070static float* stbir__get_ring_buffer_entry(float* ring_buffer, int index, int ring_buffer_length)
1071{
1072 return &ring_buffer[index * ring_buffer_length];
1073}
1074
1075static float* stbir__add_empty_ring_buffer_entry(stbir__info* stbir_info, int n)
1076{
1077 int ring_buffer_index;
1078 float* ring_buffer;
1079
1080 stbir_info->ring_buffer_last_scanline = n;
1081
1082 if (stbir_info->ring_buffer_begin_index < 0)
1083 {
1084 ring_buffer_index = stbir_info->ring_buffer_begin_index = 0;
1085 stbir_info->ring_buffer_first_scanline = n;
1086 }
1087 else
1088 {
1089 ring_buffer_index = (stbir_info->ring_buffer_begin_index + (stbir_info->ring_buffer_last_scanline - stbir_info->ring_buffer_first_scanline)) % stbir_info->ring_buffer_num_entries;
1090 STBIR_ASSERT(ring_buffer_index != stbir_info->ring_buffer_begin_index);
1091 }
1092
1093 ring_buffer = stbir__get_ring_buffer_entry(stbir_info->ring_buffer, ring_buffer_index, stbir_info->ring_buffer_length_bytes / sizeof(float));
1094 memset(ring_buffer, 0, stbir_info->ring_buffer_length_bytes);
1095
1096 return ring_buffer;
1097}
1098
1099
1100static void stbir__resample_horizontal_upsample(stbir__info* stbir_info, float* output_buffer)
1101{
1102 int x, k;
1103 int output_w = stbir_info->output_w;
1104 int channels = stbir_info->channels;
1105 float* decode_buffer = stbir__get_decode_buffer(stbir_info);
1106 stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors;
1107 float* horizontal_coefficients = stbir_info->horizontal_coefficients;
1108 int coefficient_width = stbir_info->horizontal_coefficient_width;
1109
1110 for (x = 0; x < output_w; x++)
1111 {
1112 int n0 = horizontal_contributors[x].n0;
1113 int n1 = horizontal_contributors[x].n1;
1114
1115 int out_pixel_index = x * channels;
1116 int coefficient_group = coefficient_width * x;
1117 int coefficient_counter = 0;
1118
1119 STBIR_ASSERT(n1 >= n0);
1120 STBIR_ASSERT(n0 >= -stbir_info->horizontal_filter_pixel_margin);
1121 STBIR_ASSERT(n1 >= -stbir_info->horizontal_filter_pixel_margin);
1122 STBIR_ASSERT(n0 < stbir_info->input_w + stbir_info->horizontal_filter_pixel_margin);
1123 STBIR_ASSERT(n1 < stbir_info->input_w + stbir_info->horizontal_filter_pixel_margin);
1124
1125 switch (channels) {
1126 case 1:
1127 for (k = n0; k <= n1; k++)
1128 {
1129 int in_pixel_index = k * 1;
1130 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1131 STBIR_ASSERT(coefficient != 0);
1132 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1133 }
1134 break;
1135 case 2:
1136 for (k = n0; k <= n1; k++)
1137 {
1138 int in_pixel_index = k * 2;
1139 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1140 STBIR_ASSERT(coefficient != 0);
1141 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1142 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1143 }
1144 break;
1145 case 3:
1146 for (k = n0; k <= n1; k++)
1147 {
1148 int in_pixel_index = k * 3;
1149 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1150 STBIR_ASSERT(coefficient != 0);
1151 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1152 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1153 output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
1154 }
1155 break;
1156 case 4:
1157 for (k = n0; k <= n1; k++)
1158 {
1159 int in_pixel_index = k * 4;
1160 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1161 STBIR_ASSERT(coefficient != 0);
1162 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1163 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1164 output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
1165 output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient;
1166 }
1167 break;
1168 default:
1169 for (k = n0; k <= n1; k++)
1170 {
1171 int in_pixel_index = k * channels;
1172 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1173 int c;
1174 STBIR_ASSERT(coefficient != 0);
1175 for (c = 0; c < channels; c++)
1176 output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
1177 }
1178 break;
1179 }
1180 }
1181}
1182
1183static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, float* output_buffer)
1184{
1185 int x, k;
1186 int input_w = stbir_info->input_w;
1187 int channels = stbir_info->channels;
1188 float* decode_buffer = stbir__get_decode_buffer(stbir_info);
1189 stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors;
1190 float* horizontal_coefficients = stbir_info->horizontal_coefficients;
1191 int coefficient_width = stbir_info->horizontal_coefficient_width;
1192 int filter_pixel_margin = stbir_info->horizontal_filter_pixel_margin;
1193 int max_x = input_w + filter_pixel_margin * 2;
1194
1195 STBIR_ASSERT(!stbir__use_width_upsampling(stbir_info));
1196
1197 switch (channels) {
1198 case 1:
1199 for (x = 0; x < max_x; x++)
1200 {
1201 int n0 = horizontal_contributors[x].n0;
1202 int n1 = horizontal_contributors[x].n1;
1203
1204 int in_x = x - filter_pixel_margin;
1205 int in_pixel_index = in_x * 1;
1206 int max_n = n1;
1207 int coefficient_group = coefficient_width * x;
1208
1209 for (k = n0; k <= max_n; k++)
1210 {
1211 int out_pixel_index = k * 1;
1212 float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1213 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1214 }
1215 }
1216 break;
1217
1218 case 2:
1219 for (x = 0; x < max_x; x++)
1220 {
1221 int n0 = horizontal_contributors[x].n0;
1222 int n1 = horizontal_contributors[x].n1;
1223
1224 int in_x = x - filter_pixel_margin;
1225 int in_pixel_index = in_x * 2;
1226 int max_n = n1;
1227 int coefficient_group = coefficient_width * x;
1228
1229 for (k = n0; k <= max_n; k++)
1230 {
1231 int out_pixel_index = k * 2;
1232 float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1233 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1234 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1235 }
1236 }
1237 break;
1238
1239 case 3:
1240 for (x = 0; x < max_x; x++)
1241 {
1242 int n0 = horizontal_contributors[x].n0;
1243 int n1 = horizontal_contributors[x].n1;
1244
1245 int in_x = x - filter_pixel_margin;
1246 int in_pixel_index = in_x * 3;
1247 int max_n = n1;
1248 int coefficient_group = coefficient_width * x;
1249
1250 for (k = n0; k <= max_n; k++)
1251 {
1252 int out_pixel_index = k * 3;
1253 float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1254 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1255 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1256 output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
1257 }
1258 }
1259 break;
1260
1261 case 4:
1262 for (x = 0; x < max_x; x++)
1263 {
1264 int n0 = horizontal_contributors[x].n0;
1265 int n1 = horizontal_contributors[x].n1;
1266
1267 int in_x = x - filter_pixel_margin;
1268 int in_pixel_index = in_x * 4;
1269 int max_n = n1;
1270 int coefficient_group = coefficient_width * x;
1271
1272 for (k = n0; k <= max_n; k++)
1273 {
1274 int out_pixel_index = k * 4;
1275 float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1276 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1277 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1278 output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
1279 output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient;
1280 }
1281 }
1282 break;
1283
1284 default:
1285 for (x = 0; x < max_x; x++)
1286 {
1287 int n0 = horizontal_contributors[x].n0;
1288 int n1 = horizontal_contributors[x].n1;
1289
1290 int in_x = x - filter_pixel_margin;
1291 int in_pixel_index = in_x * channels;
1292 int max_n = n1;
1293 int coefficient_group = coefficient_width * x;
1294
1295 for (k = n0; k <= max_n; k++)
1296 {
1297 int c;
1298 int out_pixel_index = k * channels;
1299 float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1300 for (c = 0; c < channels; c++)
1301 output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
1302 }
1303 }
1304 break;
1305 }
1306}
1307
1308static void stbir__decode_and_resample_upsample(stbir__info* stbir_info, int n)
1309{
1310 // Decode the nth scanline from the source image into the decode buffer.
1311 stbir__decode_scanline(stbir_info, n);
1312
1313 // Now resample it into the ring buffer.
1314 if (stbir__use_width_upsampling(stbir_info))
1315 stbir__resample_horizontal_upsample(stbir_info, stbir__add_empty_ring_buffer_entry(stbir_info, n));
1316 else
1317 stbir__resample_horizontal_downsample(stbir_info, stbir__add_empty_ring_buffer_entry(stbir_info, n));
1318
1319 // Now it's sitting in the ring buffer ready to be used as source for the vertical sampling.
1320}
1321
1322static void stbir__decode_and_resample_downsample(stbir__info* stbir_info, int n)
1323{
1324 // Decode the nth scanline from the source image into the decode buffer.
1325 stbir__decode_scanline(stbir_info, n);
1326
1327 memset(stbir_info->horizontal_buffer, 0, stbir_info->output_w * stbir_info->channels * sizeof(float));
1328
1329 // Now resample it into the horizontal buffer.
1330 if (stbir__use_width_upsampling(stbir_info))
1331 stbir__resample_horizontal_upsample(stbir_info, stbir_info->horizontal_buffer);
1332 else
1333 stbir__resample_horizontal_downsample(stbir_info, stbir_info->horizontal_buffer);
1334
1335 // Now it's sitting in the horizontal buffer ready to be distributed into the ring buffers.
1336}
1337
1338// Get the specified scan line from the ring buffer.
1339static float* stbir__get_ring_buffer_scanline(int get_scanline, float* ring_buffer, int begin_index, int first_scanline, int ring_buffer_num_entries, int ring_buffer_length)
1340{
1341 int ring_buffer_index = (begin_index + (get_scanline - first_scanline)) % ring_buffer_num_entries;
1342 return stbir__get_ring_buffer_entry(ring_buffer, ring_buffer_index, ring_buffer_length);
1343}
1344
1345
1346static void stbir__encode_scanline(stbir__info* stbir_info, int num_pixels, void *output_buffer, float *encode_buffer, int channels, int alpha_channel, int decode)
1347{
1348 int x;
1349 int n;
1350 int num_nonalpha;
1351 stbir_uint16 nonalpha[STBIR_MAX_CHANNELS];
1352
1353 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_PREMULTIPLIED))
1354 {
1355 for (x=0; x < num_pixels; ++x)
1356 {
1357 int pixel_index = x*channels;
1358
1359 float alpha = encode_buffer[pixel_index + alpha_channel];
1360 float reciprocal_alpha = alpha ? 1.0f / alpha : 0;
1361
1362 // unrolling this produced a 1% slowdown upscaling a large RGBA linear-space image on my machine - stb
1363 for (n = 0; n < channels; n++)
1364 if (n != alpha_channel)
1365 encode_buffer[pixel_index + n] *= reciprocal_alpha;
1366
1367 // We added in a small epsilon to prevent the color channel from being deleted with zero alpha.
1368 // Because we only add it for integer types, it will automatically be discarded on integer
1369 // conversion, so we don't need to subtract it back out (which would be problematic for
1370 // numeric precision reasons).
1371 }
1372 }
1373
1374 // build a table of all channels that need colorspace correction, so
1375 // we don't perform colorspace correction on channels that don't need it.
1376 for (x = 0, num_nonalpha = 0; x < channels; ++x)
1377 {
1378 if (x != alpha_channel || (stbir_info->flags & STBIR_FLAG_ALPHA_USES_COLORSPACE))
1379 {
1380 nonalpha[num_nonalpha++] = (stbir_uint16)x;
1381 }
1382 }
1383
1384 #define STBIR__ROUND_INT(f) ((int) ((f)+0.5))
1385 #define STBIR__ROUND_UINT(f) ((stbir_uint32) ((f)+0.5))
1386
1387 #ifdef STBIR__SATURATE_INT
1388 #define STBIR__ENCODE_LINEAR8(f) stbir__saturate8 (STBIR__ROUND_INT((f) * stbir__max_uint8_as_float ))
1389 #define STBIR__ENCODE_LINEAR16(f) stbir__saturate16(STBIR__ROUND_INT((f) * stbir__max_uint16_as_float))
1390 #else
1391 #define STBIR__ENCODE_LINEAR8(f) (unsigned char ) STBIR__ROUND_INT(stbir__saturate(f) * stbir__max_uint8_as_float )
1392 #define STBIR__ENCODE_LINEAR16(f) (unsigned short) STBIR__ROUND_INT(stbir__saturate(f) * stbir__max_uint16_as_float)
1393 #endif
1394
1395 switch (decode)
1396 {
1397 case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR):
1398 for (x=0; x < num_pixels; ++x)
1399 {
1400 int pixel_index = x*channels;
1401
1402 for (n = 0; n < channels; n++)
1403 {
1404 int index = pixel_index + n;
1405 ((unsigned char*)output_buffer)[index] = STBIR__ENCODE_LINEAR8(encode_buffer[index]);
1406 }
1407 }
1408 break;
1409
1410 case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB):
1411 for (x=0; x < num_pixels; ++x)
1412 {
1413 int pixel_index = x*channels;
1414
1415 for (n = 0; n < num_nonalpha; n++)
1416 {
1417 int index = pixel_index + nonalpha[n];
1418 ((unsigned char*)output_buffer)[index] = stbir__linear_to_srgb_uchar(encode_buffer[index]);
1419 }
1420
1421 if (!(stbir_info->flags & STBIR_FLAG_ALPHA_USES_COLORSPACE))
1422 ((unsigned char *)output_buffer)[pixel_index + alpha_channel] = STBIR__ENCODE_LINEAR8(encode_buffer[pixel_index+alpha_channel]);
1423 }
1424 break;
1425
1426 case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR):
1427 for (x=0; x < num_pixels; ++x)
1428 {
1429 int pixel_index = x*channels;
1430
1431 for (n = 0; n < channels; n++)
1432 {
1433 int index = pixel_index + n;
1434 ((unsigned short*)output_buffer)[index] = STBIR__ENCODE_LINEAR16(encode_buffer[index]);
1435 }
1436 }
1437 break;
1438
1439 case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB):
1440 for (x=0; x < num_pixels; ++x)
1441 {
1442 int pixel_index = x*channels;
1443
1444 for (n = 0; n < num_nonalpha; n++)
1445 {
1446 int index = pixel_index + nonalpha[n];
1447 ((unsigned short*)output_buffer)[index] = (unsigned short)STBIR__ROUND_INT(stbir__linear_to_srgb(stbir__saturate(encode_buffer[index])) * stbir__max_uint16_as_float);
1448 }
1449
1450 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1451 ((unsigned short*)output_buffer)[pixel_index + alpha_channel] = STBIR__ENCODE_LINEAR16(encode_buffer[pixel_index + alpha_channel]);
1452 }
1453
1454 break;
1455
1456 case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR):
1457 for (x=0; x < num_pixels; ++x)
1458 {
1459 int pixel_index = x*channels;
1460
1461 for (n = 0; n < channels; n++)
1462 {
1463 int index = pixel_index + n;
1464 ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_UINT(((double)stbir__saturate(encode_buffer[index])) * stbir__max_uint32_as_float);
1465 }
1466 }
1467 break;
1468
1469 case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB):
1470 for (x=0; x < num_pixels; ++x)
1471 {
1472 int pixel_index = x*channels;
1473
1474 for (n = 0; n < num_nonalpha; n++)
1475 {
1476 int index = pixel_index + nonalpha[n];
1477 ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_UINT(((double)stbir__linear_to_srgb(stbir__saturate(encode_buffer[index]))) * stbir__max_uint32_as_float);
1478 }
1479
1480 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1481 ((unsigned int*)output_buffer)[pixel_index + alpha_channel] = (unsigned int)STBIR__ROUND_INT(((double)stbir__saturate(encode_buffer[pixel_index + alpha_channel])) * stbir__max_uint32_as_float);
1482 }
1483 break;
1484
1485 case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR):
1486 for (x=0; x < num_pixels; ++x)
1487 {
1488 int pixel_index = x*channels;
1489
1490 for (n = 0; n < channels; n++)
1491 {
1492 int index = pixel_index + n;
1493 ((float*)output_buffer)[index] = encode_buffer[index];
1494 }
1495 }
1496 break;
1497
1498 case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB):
1499 for (x=0; x < num_pixels; ++x)
1500 {
1501 int pixel_index = x*channels;
1502
1503 for (n = 0; n < num_nonalpha; n++)
1504 {
1505 int index = pixel_index + nonalpha[n];
1506 ((float*)output_buffer)[index] = stbir__linear_to_srgb(encode_buffer[index]);
1507 }
1508
1509 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1510 ((float*)output_buffer)[pixel_index + alpha_channel] = encode_buffer[pixel_index + alpha_channel];
1511 }
1512 break;
1513
1514 default:
1515 STBIR_ASSERT(!"Unknown type/colorspace/channels combination.");
1516 break;
1517 }
1518}
1519
1520static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n)
1521{
1522 int x, k;
1523 int output_w = stbir_info->output_w;
1524 stbir__contributors* vertical_contributors = stbir_info->vertical_contributors;
1525 float* vertical_coefficients = stbir_info->vertical_coefficients;
1526 int channels = stbir_info->channels;
1527 int alpha_channel = stbir_info->alpha_channel;
1528 int type = stbir_info->type;
1529 int colorspace = stbir_info->colorspace;
1530 int ring_buffer_entries = stbir_info->ring_buffer_num_entries;
1531 void* output_data = stbir_info->output_data;
1532 float* encode_buffer = stbir_info->encode_buffer;
1533 int decode = STBIR__DECODE(type, colorspace);
1534 int coefficient_width = stbir_info->vertical_coefficient_width;
1535 int coefficient_counter;
1536 int contributor = n;
1537
1538 float* ring_buffer = stbir_info->ring_buffer;
1539 int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index;
1540 int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline;
1541 int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
1542
1543 int n0,n1, output_row_start;
1544 int coefficient_group = coefficient_width * contributor;
1545
1546 n0 = vertical_contributors[contributor].n0;
1547 n1 = vertical_contributors[contributor].n1;
1548
1549 output_row_start = n * stbir_info->output_stride_bytes;
1550
1551 STBIR_ASSERT(stbir__use_height_upsampling(stbir_info));
1552
1553 memset(encode_buffer, 0, output_w * sizeof(float) * channels);
1554
1555 // I tried reblocking this for better cache usage of encode_buffer
1556 // (using x_outer, k, x_inner), but it lost speed. -- stb
1557
1558 coefficient_counter = 0;
1559 switch (channels) {
1560 case 1:
1561 for (k = n0; k <= n1; k++)
1562 {
1563 int coefficient_index = coefficient_counter++;
1564 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1565 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1566 for (x = 0; x < output_w; ++x)
1567 {
1568 int in_pixel_index = x * 1;
1569 encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
1570 }
1571 }
1572 break;
1573 case 2:
1574 for (k = n0; k <= n1; k++)
1575 {
1576 int coefficient_index = coefficient_counter++;
1577 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1578 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1579 for (x = 0; x < output_w; ++x)
1580 {
1581 int in_pixel_index = x * 2;
1582 encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
1583 encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
1584 }
1585 }
1586 break;
1587 case 3:
1588 for (k = n0; k <= n1; k++)
1589 {
1590 int coefficient_index = coefficient_counter++;
1591 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1592 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1593 for (x = 0; x < output_w; ++x)
1594 {
1595 int in_pixel_index = x * 3;
1596 encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
1597 encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
1598 encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient;
1599 }
1600 }
1601 break;
1602 case 4:
1603 for (k = n0; k <= n1; k++)
1604 {
1605 int coefficient_index = coefficient_counter++;
1606 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1607 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1608 for (x = 0; x < output_w; ++x)
1609 {
1610 int in_pixel_index = x * 4;
1611 encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
1612 encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
1613 encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient;
1614 encode_buffer[in_pixel_index + 3] += ring_buffer_entry[in_pixel_index + 3] * coefficient;
1615 }
1616 }
1617 break;
1618 default:
1619 for (k = n0; k <= n1; k++)
1620 {
1621 int coefficient_index = coefficient_counter++;
1622 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1623 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1624 for (x = 0; x < output_w; ++x)
1625 {
1626 int in_pixel_index = x * channels;
1627 int c;
1628 for (c = 0; c < channels; c++)
1629 encode_buffer[in_pixel_index + c] += ring_buffer_entry[in_pixel_index + c] * coefficient;
1630 }
1631 }
1632 break;
1633 }
1634 stbir__encode_scanline(stbir_info, output_w, (char *) output_data + output_row_start, encode_buffer, channels, alpha_channel, decode);
1635}
1636
1637static void stbir__resample_vertical_downsample(stbir__info* stbir_info, int n)
1638{
1639 int x, k;
1640 int output_w = stbir_info->output_w;
1641 stbir__contributors* vertical_contributors = stbir_info->vertical_contributors;
1642 float* vertical_coefficients = stbir_info->vertical_coefficients;
1643 int channels = stbir_info->channels;
1644 int ring_buffer_entries = stbir_info->ring_buffer_num_entries;
1645 float* horizontal_buffer = stbir_info->horizontal_buffer;
1646 int coefficient_width = stbir_info->vertical_coefficient_width;
1647 int contributor = n + stbir_info->vertical_filter_pixel_margin;
1648
1649 float* ring_buffer = stbir_info->ring_buffer;
1650 int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index;
1651 int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline;
1652 int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
1653 int n0,n1;
1654
1655 n0 = vertical_contributors[contributor].n0;
1656 n1 = vertical_contributors[contributor].n1;
1657
1658 STBIR_ASSERT(!stbir__use_height_upsampling(stbir_info));
1659
1660 for (k = n0; k <= n1; k++)
1661 {
1662 int coefficient_index = k - n0;
1663 int coefficient_group = coefficient_width * contributor;
1664 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1665
1666 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1667
1668 switch (channels) {
1669 case 1:
1670 for (x = 0; x < output_w; x++)
1671 {
1672 int in_pixel_index = x * 1;
1673 ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
1674 }
1675 break;
1676 case 2:
1677 for (x = 0; x < output_w; x++)
1678 {
1679 int in_pixel_index = x * 2;
1680 ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
1681 ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
1682 }
1683 break;
1684 case 3:
1685 for (x = 0; x < output_w; x++)
1686 {
1687 int in_pixel_index = x * 3;
1688 ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
1689 ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
1690 ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient;
1691 }
1692 break;
1693 case 4:
1694 for (x = 0; x < output_w; x++)
1695 {
1696 int in_pixel_index = x * 4;
1697 ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
1698 ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
1699 ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient;
1700 ring_buffer_entry[in_pixel_index + 3] += horizontal_buffer[in_pixel_index + 3] * coefficient;
1701 }
1702 break;
1703 default:
1704 for (x = 0; x < output_w; x++)
1705 {
1706 int in_pixel_index = x * channels;
1707
1708 int c;
1709 for (c = 0; c < channels; c++)
1710 ring_buffer_entry[in_pixel_index + c] += horizontal_buffer[in_pixel_index + c] * coefficient;
1711 }
1712 break;
1713 }
1714 }
1715}
1716
1717static void stbir__buffer_loop_upsample(stbir__info* stbir_info)
1718{
1719 int y;
1720 float scale_ratio = stbir_info->vertical_scale;
1721 float out_scanlines_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(1/scale_ratio) * scale_ratio;
1722
1723 STBIR_ASSERT(stbir__use_height_upsampling(stbir_info));
1724
1725 for (y = 0; y < stbir_info->output_h; y++)
1726 {
1727 float in_center_of_out = 0; // Center of the current out scanline in the in scanline space
1728 int in_first_scanline = 0, in_last_scanline = 0;
1729
1730 stbir__calculate_sample_range_upsample(y, out_scanlines_radius, scale_ratio, stbir_info->vertical_shift, &in_first_scanline, &in_last_scanline, &in_center_of_out);
1731
1732 STBIR_ASSERT(in_last_scanline - in_first_scanline + 1 <= stbir_info->ring_buffer_num_entries);
1733
1734 if (stbir_info->ring_buffer_begin_index >= 0)
1735 {
1736 // Get rid of whatever we don't need anymore.
1737 while (in_first_scanline > stbir_info->ring_buffer_first_scanline)
1738 {
1739 if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline)
1740 {
1741 // We just popped the last scanline off the ring buffer.
1742 // Reset it to the empty state.
1743 stbir_info->ring_buffer_begin_index = -1;
1744 stbir_info->ring_buffer_first_scanline = 0;
1745 stbir_info->ring_buffer_last_scanline = 0;
1746 break;
1747 }
1748 else
1749 {
1750 stbir_info->ring_buffer_first_scanline++;
1751 stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->ring_buffer_num_entries;
1752 }
1753 }
1754 }
1755
1756 // Load in new ones.
1757 if (stbir_info->ring_buffer_begin_index < 0)
1758 stbir__decode_and_resample_upsample(stbir_info, in_first_scanline);
1759
1760 while (in_last_scanline > stbir_info->ring_buffer_last_scanline)
1761 stbir__decode_and_resample_upsample(stbir_info, stbir_info->ring_buffer_last_scanline + 1);
1762
1763 // Now all buffers should be ready to write a row of vertical sampling.
1764 stbir__resample_vertical_upsample(stbir_info, y);
1765
1766 STBIR_PROGRESS_REPORT((float)y / stbir_info->output_h);
1767 }
1768}
1769
1770static void stbir__empty_ring_buffer(stbir__info* stbir_info, int first_necessary_scanline)
1771{
1772 int output_stride_bytes = stbir_info->output_stride_bytes;
1773 int channels = stbir_info->channels;
1774 int alpha_channel = stbir_info->alpha_channel;
1775 int type = stbir_info->type;
1776 int colorspace = stbir_info->colorspace;
1777 int output_w = stbir_info->output_w;
1778 void* output_data = stbir_info->output_data;
1779 int decode = STBIR__DECODE(type, colorspace);
1780
1781 float* ring_buffer = stbir_info->ring_buffer;
1782 int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
1783
1784 if (stbir_info->ring_buffer_begin_index >= 0)
1785 {
1786 // Get rid of whatever we don't need anymore.
1787 while (first_necessary_scanline > stbir_info->ring_buffer_first_scanline)
1788 {
1789 if (stbir_info->ring_buffer_first_scanline >= 0 && stbir_info->ring_buffer_first_scanline < stbir_info->output_h)
1790 {
1791 int output_row_start = stbir_info->ring_buffer_first_scanline * output_stride_bytes;
1792 float* ring_buffer_entry = stbir__get_ring_buffer_entry(ring_buffer, stbir_info->ring_buffer_begin_index, ring_buffer_length);
1793 stbir__encode_scanline(stbir_info, output_w, (char *) output_data + output_row_start, ring_buffer_entry, channels, alpha_channel, decode);
1794 STBIR_PROGRESS_REPORT((float)stbir_info->ring_buffer_first_scanline / stbir_info->output_h);
1795 }
1796
1797 if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline)
1798 {
1799 // We just popped the last scanline off the ring buffer.
1800 // Reset it to the empty state.
1801 stbir_info->ring_buffer_begin_index = -1;
1802 stbir_info->ring_buffer_first_scanline = 0;
1803 stbir_info->ring_buffer_last_scanline = 0;
1804 break;
1805 }
1806 else
1807 {
1808 stbir_info->ring_buffer_first_scanline++;
1809 stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->ring_buffer_num_entries;
1810 }
1811 }
1812 }
1813}
1814
1815static void stbir__buffer_loop_downsample(stbir__info* stbir_info)
1816{
1817 int y;
1818 float scale_ratio = stbir_info->vertical_scale;
1819 int output_h = stbir_info->output_h;
1820 float in_pixels_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(scale_ratio) / scale_ratio;
1821 int pixel_margin = stbir_info->vertical_filter_pixel_margin;
1822 int max_y = stbir_info->input_h + pixel_margin;
1823
1824 STBIR_ASSERT(!stbir__use_height_upsampling(stbir_info));
1825
1826 for (y = -pixel_margin; y < max_y; y++)
1827 {
1828 float out_center_of_in; // Center of the current out scanline in the in scanline space
1829 int out_first_scanline, out_last_scanline;
1830
1831 stbir__calculate_sample_range_downsample(y, in_pixels_radius, scale_ratio, stbir_info->vertical_shift, &out_first_scanline, &out_last_scanline, &out_center_of_in);
1832
1833 STBIR_ASSERT(out_last_scanline - out_first_scanline + 1 <= stbir_info->ring_buffer_num_entries);
1834
1835 if (out_last_scanline < 0 || out_first_scanline >= output_h)
1836 continue;
1837
1838 stbir__empty_ring_buffer(stbir_info, out_first_scanline);
1839
1840 stbir__decode_and_resample_downsample(stbir_info, y);
1841
1842 // Load in new ones.
1843 if (stbir_info->ring_buffer_begin_index < 0)
1844 stbir__add_empty_ring_buffer_entry(stbir_info, out_first_scanline);
1845
1846 while (out_last_scanline > stbir_info->ring_buffer_last_scanline)
1847 stbir__add_empty_ring_buffer_entry(stbir_info, stbir_info->ring_buffer_last_scanline + 1);
1848
1849 // Now the horizontal buffer is ready to write to all ring buffer rows.
1850 stbir__resample_vertical_downsample(stbir_info, y);
1851 }
1852
1853 stbir__empty_ring_buffer(stbir_info, stbir_info->output_h);
1854}
1855
1856static void stbir__setup(stbir__info *info, int input_w, int input_h, int output_w, int output_h, int channels)
1857{
1858 info->input_w = input_w;
1859 info->input_h = input_h;
1860 info->output_w = output_w;
1861 info->output_h = output_h;
1862 info->channels = channels;
1863}
1864
1865static void stbir__calculate_transform(stbir__info *info, float s0, float t0, float s1, float t1, float *transform)
1866{
1867 info->s0 = s0;
1868 info->t0 = t0;
1869 info->s1 = s1;
1870 info->t1 = t1;
1871
1872 if (transform)
1873 {
1874 info->horizontal_scale = transform[0];
1875 info->vertical_scale = transform[1];
1876 info->horizontal_shift = transform[2];
1877 info->vertical_shift = transform[3];
1878 }
1879 else
1880 {
1881 info->horizontal_scale = ((float)info->output_w / info->input_w) / (s1 - s0);
1882 info->vertical_scale = ((float)info->output_h / info->input_h) / (t1 - t0);
1883
1884 info->horizontal_shift = s0 * info->output_w / (s1 - s0);
1885 info->vertical_shift = t0 * info->output_h / (t1 - t0);
1886 }
1887}
1888
1889static void stbir__choose_filter(stbir__info *info, stbir_filter h_filter, stbir_filter v_filter)
1890{
1891 if (h_filter == 0)
1892 h_filter = stbir__use_upsampling(info->horizontal_scale) ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE;
1893 if (v_filter == 0)
1894 v_filter = stbir__use_upsampling(info->vertical_scale) ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE;
1895 info->horizontal_filter = h_filter;
1896 info->vertical_filter = v_filter;
1897}
1898
1899static stbir_uint32 stbir__calculate_memory(stbir__info *info)
1900{
1901 int pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale);
1902 int filter_height = stbir__get_filter_pixel_width(info->vertical_filter, info->vertical_scale);
1903
1904 info->horizontal_num_contributors = stbir__get_contributors(info->horizontal_scale, info->horizontal_filter, info->input_w, info->output_w);
1905 info->vertical_num_contributors = stbir__get_contributors(info->vertical_scale , info->vertical_filter , info->input_h, info->output_h);
1906
1907 // One extra entry because floating point precision problems sometimes cause an extra to be necessary.
1908 info->ring_buffer_num_entries = filter_height + 1;
1909
1910 info->horizontal_contributors_size = info->horizontal_num_contributors * sizeof(stbir__contributors);
1911 info->horizontal_coefficients_size = stbir__get_total_horizontal_coefficients(info) * sizeof(float);
1912 info->vertical_contributors_size = info->vertical_num_contributors * sizeof(stbir__contributors);
1913 info->vertical_coefficients_size = stbir__get_total_vertical_coefficients(info) * sizeof(float);
1914 info->decode_buffer_size = (info->input_w + pixel_margin * 2) * info->channels * sizeof(float);
1915 info->horizontal_buffer_size = info->output_w * info->channels * sizeof(float);
1916 info->ring_buffer_size = info->output_w * info->channels * info->ring_buffer_num_entries * sizeof(float);
1917 info->encode_buffer_size = info->output_w * info->channels * sizeof(float);
1918
1919 STBIR_ASSERT(info->horizontal_filter != 0);
1920 STBIR_ASSERT(info->horizontal_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late
1921 STBIR_ASSERT(info->vertical_filter != 0);
1922 STBIR_ASSERT(info->vertical_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late
1923
1924 if (stbir__use_height_upsampling(info))
1925 // The horizontal buffer is for when we're downsampling the height and we
1926 // can't output the result of sampling the decode buffer directly into the
1927 // ring buffers.
1928 info->horizontal_buffer_size = 0;
1929 else
1930 // The encode buffer is to retain precision in the height upsampling method
1931 // and isn't used when height downsampling.
1932 info->encode_buffer_size = 0;
1933
1934 return info->horizontal_contributors_size + info->horizontal_coefficients_size
1935 + info->vertical_contributors_size + info->vertical_coefficients_size
1936 + info->decode_buffer_size + info->horizontal_buffer_size
1937 + info->ring_buffer_size + info->encode_buffer_size;
1938}
1939
1940static int stbir__resize_allocated(stbir__info *info,
1941 const void* input_data, int input_stride_in_bytes,
1942 void* output_data, int output_stride_in_bytes,
1943 int alpha_channel, stbir_uint32 flags, stbir_datatype type,
1944 stbir_edge edge_horizontal, stbir_edge edge_vertical, stbir_colorspace colorspace,
1945 void* tempmem, size_t tempmem_size_in_bytes)
1946{
1947 size_t memory_required = stbir__calculate_memory(info);
1948
1949 int width_stride_input = input_stride_in_bytes ? input_stride_in_bytes : info->channels * info->input_w * stbir__type_size[type];
1950 int width_stride_output = output_stride_in_bytes ? output_stride_in_bytes : info->channels * info->output_w * stbir__type_size[type];
1951
1952#ifdef STBIR_DEBUG_OVERWRITE_TEST
1953#define OVERWRITE_ARRAY_SIZE 8
1954 unsigned char overwrite_output_before_pre[OVERWRITE_ARRAY_SIZE];
1955 unsigned char overwrite_tempmem_before_pre[OVERWRITE_ARRAY_SIZE];
1956 unsigned char overwrite_output_after_pre[OVERWRITE_ARRAY_SIZE];
1957 unsigned char overwrite_tempmem_after_pre[OVERWRITE_ARRAY_SIZE];
1958
1959 size_t begin_forbidden = width_stride_output * (info->output_h - 1) + info->output_w * info->channels * stbir__type_size[type];
1960 memcpy(overwrite_output_before_pre, &((unsigned char*)output_data)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE);
1961 memcpy(overwrite_output_after_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE);
1962 memcpy(overwrite_tempmem_before_pre, &((unsigned char*)tempmem)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE);
1963 memcpy(overwrite_tempmem_after_pre, &((unsigned char*)tempmem)[tempmem_size_in_bytes], OVERWRITE_ARRAY_SIZE);
1964#endif
1965
1966 STBIR_ASSERT(info->channels >= 0);
1967 STBIR_ASSERT(info->channels <= STBIR_MAX_CHANNELS);
1968
1969 if (info->channels < 0 || info->channels > STBIR_MAX_CHANNELS)
1970 return 0;
1971
1972 STBIR_ASSERT(info->horizontal_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
1973 STBIR_ASSERT(info->vertical_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
1974
1975 if (info->horizontal_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table))
1976 return 0;
1977 if (info->vertical_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table))
1978 return 0;
1979
1980 if (alpha_channel < 0)
1981 flags |= STBIR_FLAG_ALPHA_USES_COLORSPACE | STBIR_FLAG_ALPHA_PREMULTIPLIED;
1982
1983 if (!(flags&STBIR_FLAG_ALPHA_USES_COLORSPACE) || !(flags&STBIR_FLAG_ALPHA_PREMULTIPLIED)) {
1984 STBIR_ASSERT(alpha_channel >= 0 && alpha_channel < info->channels);
1985 }
1986
1987 if (alpha_channel >= info->channels)
1988 return 0;
1989
1990 STBIR_ASSERT(tempmem);
1991
1992 if (!tempmem)
1993 return 0;
1994
1995 STBIR_ASSERT(tempmem_size_in_bytes >= memory_required);
1996
1997 if (tempmem_size_in_bytes < memory_required)
1998 return 0;
1999
2000 memset(tempmem, 0, tempmem_size_in_bytes);
2001
2002 info->input_data = input_data;
2003 info->input_stride_bytes = width_stride_input;
2004
2005 info->output_data = output_data;
2006 info->output_stride_bytes = width_stride_output;
2007
2008 info->alpha_channel = alpha_channel;
2009 info->flags = flags;
2010 info->type = type;
2011 info->edge_horizontal = edge_horizontal;
2012 info->edge_vertical = edge_vertical;
2013 info->colorspace = colorspace;
2014
2015 info->horizontal_coefficient_width = stbir__get_coefficient_width (info->horizontal_filter, info->horizontal_scale);
2016 info->vertical_coefficient_width = stbir__get_coefficient_width (info->vertical_filter , info->vertical_scale );
2017 info->horizontal_filter_pixel_width = stbir__get_filter_pixel_width (info->horizontal_filter, info->horizontal_scale);
2018 info->vertical_filter_pixel_width = stbir__get_filter_pixel_width (info->vertical_filter , info->vertical_scale );
2019 info->horizontal_filter_pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale);
2020 info->vertical_filter_pixel_margin = stbir__get_filter_pixel_margin(info->vertical_filter , info->vertical_scale );
2021
2022 info->ring_buffer_length_bytes = info->output_w * info->channels * sizeof(float);
2023 info->decode_buffer_pixels = info->input_w + info->horizontal_filter_pixel_margin * 2;
2024
2025#define STBIR__NEXT_MEMPTR(current, newtype) (newtype*)(((unsigned char*)current) + current##_size)
2026
2027 info->horizontal_contributors = (stbir__contributors *) tempmem;
2028 info->horizontal_coefficients = STBIR__NEXT_MEMPTR(info->horizontal_contributors, float);
2029 info->vertical_contributors = STBIR__NEXT_MEMPTR(info->horizontal_coefficients, stbir__contributors);
2030 info->vertical_coefficients = STBIR__NEXT_MEMPTR(info->vertical_contributors, float);
2031 info->decode_buffer = STBIR__NEXT_MEMPTR(info->vertical_coefficients, float);
2032
2033 if (stbir__use_height_upsampling(info))
2034 {
2035 info->horizontal_buffer = NULL;
2036 info->ring_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float);
2037 info->encode_buffer = STBIR__NEXT_MEMPTR(info->ring_buffer, float);
2038
2039 STBIR_ASSERT((size_t)STBIR__NEXT_MEMPTR(info->encode_buffer, unsigned char) == (size_t)tempmem + tempmem_size_in_bytes);
2040 }
2041 else
2042 {
2043 info->horizontal_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float);
2044 info->ring_buffer = STBIR__NEXT_MEMPTR(info->horizontal_buffer, float);
2045 info->encode_buffer = NULL;
2046
2047 STBIR_ASSERT((size_t)STBIR__NEXT_MEMPTR(info->ring_buffer, unsigned char) == (size_t)tempmem + tempmem_size_in_bytes);
2048 }
2049
2050#undef STBIR__NEXT_MEMPTR
2051
2052 // This signals that the ring buffer is empty
2053 info->ring_buffer_begin_index = -1;
2054
2055 stbir__calculate_filters(info->horizontal_contributors, info->horizontal_coefficients, info->horizontal_filter, info->horizontal_scale, info->horizontal_shift, info->input_w, info->output_w);
2056 stbir__calculate_filters(info->vertical_contributors, info->vertical_coefficients, info->vertical_filter, info->vertical_scale, info->vertical_shift, info->input_h, info->output_h);
2057
2058 STBIR_PROGRESS_REPORT(0);
2059
2060 if (stbir__use_height_upsampling(info))
2061 stbir__buffer_loop_upsample(info);
2062 else
2063 stbir__buffer_loop_downsample(info);
2064
2065 STBIR_PROGRESS_REPORT(1);
2066
2067#ifdef STBIR_DEBUG_OVERWRITE_TEST
2068 STBIR_ASSERT(memcmp(overwrite_output_before_pre, &((unsigned char*)output_data)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE) == 0);
2069 STBIR_ASSERT(memcmp(overwrite_output_after_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE) == 0);
2070 STBIR_ASSERT(memcmp(overwrite_tempmem_before_pre, &((unsigned char*)tempmem)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE) == 0);
2071 STBIR_ASSERT(memcmp(overwrite_tempmem_after_pre, &((unsigned char*)tempmem)[tempmem_size_in_bytes], OVERWRITE_ARRAY_SIZE) == 0);
2072#endif
2073
2074 return 1;
2075}
2076
2077
2078static int stbir__resize_arbitrary(
2079 void *alloc_context,
2080 const void* input_data, int input_w, int input_h, int input_stride_in_bytes,
2081 void* output_data, int output_w, int output_h, int output_stride_in_bytes,
2082 float s0, float t0, float s1, float t1, float *transform,
2083 int channels, int alpha_channel, stbir_uint32 flags, stbir_datatype type,
2084 stbir_filter h_filter, stbir_filter v_filter,
2085 stbir_edge edge_horizontal, stbir_edge edge_vertical, stbir_colorspace colorspace)
2086{
2087 stbir__info info;
2088 int result;
2089 size_t memory_required;
2090 void* extra_memory;
2091
2092 stbir__setup(&info, input_w, input_h, output_w, output_h, channels);
2093 stbir__calculate_transform(&info, s0,t0,s1,t1,transform);
2094 stbir__choose_filter(&info, h_filter, v_filter);
2095 memory_required = stbir__calculate_memory(&info);
2096 extra_memory = STBIR_MALLOC(memory_required, alloc_context);
2097
2098 if (!extra_memory)
2099 return 0;
2100
2101 result = stbir__resize_allocated(&info, input_data, input_stride_in_bytes,
2102 output_data, output_stride_in_bytes,
2103 alpha_channel, flags, type,
2104 edge_horizontal, edge_vertical,
2105 colorspace, extra_memory, memory_required);
2106
2107 STBIR_FREE(extra_memory, alloc_context);
2108
2109 return result;
2110}
2111
2112STBIRDEF int stbir_resize_uint8( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2113 unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2114 int num_channels)
2115{
2116 return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
2117 output_pixels, output_w, output_h, output_stride_in_bytes,
2118 0,0,1,1,NULL,num_channels,-1,0, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
2119 STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_LINEAR);
2120}
2121
2122STBIRDEF int stbir_resize_float( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2123 float *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2124 int num_channels)
2125{
2126 return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
2127 output_pixels, output_w, output_h, output_stride_in_bytes,
2128 0,0,1,1,NULL,num_channels,-1,0, STBIR_TYPE_FLOAT, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
2129 STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_LINEAR);
2130}
2131
2132STBIRDEF int stbir_resize_uint8_srgb(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2133 unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2134 int num_channels, int alpha_channel, int flags)
2135{
2136 return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
2137 output_pixels, output_w, output_h, output_stride_in_bytes,
2138 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
2139 STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_SRGB);
2140}
2141
2142STBIRDEF int stbir_resize_uint8_srgb_edgemode(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2143 unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2144 int num_channels, int alpha_channel, int flags,
2145 stbir_edge edge_wrap_mode)
2146{
2147 return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
2148 output_pixels, output_w, output_h, output_stride_in_bytes,
2149 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
2150 edge_wrap_mode, edge_wrap_mode, STBIR_COLORSPACE_SRGB);
2151}
2152
2153STBIRDEF int stbir_resize_uint8_generic( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2154 unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2155 int num_channels, int alpha_channel, int flags,
2156 stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
2157 void *alloc_context)
2158{
2159 return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2160 output_pixels, output_w, output_h, output_stride_in_bytes,
2161 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, filter, filter,
2162 edge_wrap_mode, edge_wrap_mode, space);
2163}
2164
2165STBIRDEF int stbir_resize_uint16_generic(const stbir_uint16 *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2166 stbir_uint16 *output_pixels , int output_w, int output_h, int output_stride_in_bytes,
2167 int num_channels, int alpha_channel, int flags,
2168 stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
2169 void *alloc_context)
2170{
2171 return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2172 output_pixels, output_w, output_h, output_stride_in_bytes,
2173 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT16, filter, filter,
2174 edge_wrap_mode, edge_wrap_mode, space);
2175}
2176
2177
2178STBIRDEF int stbir_resize_float_generic( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2179 float *output_pixels , int output_w, int output_h, int output_stride_in_bytes,
2180 int num_channels, int alpha_channel, int flags,
2181 stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
2182 void *alloc_context)
2183{
2184 return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2185 output_pixels, output_w, output_h, output_stride_in_bytes,
2186 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_FLOAT, filter, filter,
2187 edge_wrap_mode, edge_wrap_mode, space);
2188}
2189
2190
2191STBIRDEF int stbir_resize( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2192 void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2193 stbir_datatype datatype,
2194 int num_channels, int alpha_channel, int flags,
2195 stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
2196 stbir_filter filter_horizontal, stbir_filter filter_vertical,
2197 stbir_colorspace space, void *alloc_context)
2198{
2199 return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2200 output_pixels, output_w, output_h, output_stride_in_bytes,
2201 0,0,1,1,NULL,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
2202 edge_mode_horizontal, edge_mode_vertical, space);
2203}
2204
2205
2206STBIRDEF int stbir_resize_subpixel(const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2207 void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2208 stbir_datatype datatype,
2209 int num_channels, int alpha_channel, int flags,
2210 stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
2211 stbir_filter filter_horizontal, stbir_filter filter_vertical,
2212 stbir_colorspace space, void *alloc_context,
2213 float x_scale, float y_scale,
2214 float x_offset, float y_offset)
2215{
2216 float transform[4];
2217 transform[0] = x_scale;
2218 transform[1] = y_scale;
2219 transform[2] = x_offset;
2220 transform[3] = y_offset;
2221 return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2222 output_pixels, output_w, output_h, output_stride_in_bytes,
2223 0,0,1,1,transform,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
2224 edge_mode_horizontal, edge_mode_vertical, space);
2225}
2226
2227STBIRDEF int stbir_resize_region( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2228 void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2229 stbir_datatype datatype,
2230 int num_channels, int alpha_channel, int flags,
2231 stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
2232 stbir_filter filter_horizontal, stbir_filter filter_vertical,
2233 stbir_colorspace space, void *alloc_context,
2234 float s0, float t0, float s1, float t1)
2235{
2236 return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2237 output_pixels, output_w, output_h, output_stride_in_bytes,
2238 s0,t0,s1,t1,NULL,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
2239 edge_mode_horizontal, edge_mode_vertical, space);
2240}
2241
2242/*
2243------------------------------------------------------------------------------
2244This software is available under 2 licenses -- choose whichever you prefer.
2245------------------------------------------------------------------------------
2246ALTERNATIVE A - MIT License
2247Copyright (c) 2017 Sean Barrett
2248Permission is hereby granted, free of charge, to any person obtaining a copy of
2249this software and associated documentation files (the "Software"), to deal in
2250the Software without restriction, including without limitation the rights to
2251use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
2252of the Software, and to permit persons to whom the Software is furnished to do
2253so, subject to the following conditions:
2254The above copyright notice and this permission notice shall be included in all
2255copies or substantial portions of the Software.
2256THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2257IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2258FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2259AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2260LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2261OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2262SOFTWARE.
2263------------------------------------------------------------------------------
2264ALTERNATIVE B - Public Domain (www.unlicense.org)
2265This is free and unencumbered software released into the public domain.
2266Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
2267software, either in source code form or as a compiled binary, for any purpose,
2268commercial or non-commercial, and by any means.
2269In jurisdictions that recognize copyright laws, the author or authors of this
2270software dedicate any and all copyright interest in the software to the public
2271domain. We make this dedication for the benefit of the public at large and to
2272the detriment of our heirs and successors. We intend this dedication to be an
2273overt act of relinquishment in perpetuity of all present and future rights to
2274this software under copyright law.
2275THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2276IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2277FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2278AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
2279ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
2280WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2281------------------------------------------------------------------------------
2282*/
diff --git a/externals/stb/stb_image_resize.h b/externals/stb/stb_image_resize.h
index 3107e0670..ef9e6fe87 100644
--- a/externals/stb/stb_image_resize.h
+++ b/externals/stb/stb_image_resize.h
@@ -1,6 +1,3 @@
1// SPDX-FileCopyrightText: Jorge L Rodriguez
2// SPDX-License-Identifier: MIT
3
4/* stb_image_resize - v0.97 - public domain image resizing 1/* stb_image_resize - v0.97 - public domain image resizing
5 by Jorge L Rodriguez (@VinoBS) - 2014 2 by Jorge L Rodriguez (@VinoBS) - 2014
6 http://github.com/nothings/stb 3 http://github.com/nothings/stb
@@ -383,6 +380,2217 @@ STBIRDEF int stbir_resize_region( const void *input_pixels , int input_w , int
383//// end header file ///////////////////////////////////////////////////// 380//// end header file /////////////////////////////////////////////////////
384#endif // STBIR_INCLUDE_STB_IMAGE_RESIZE_H 381#endif // STBIR_INCLUDE_STB_IMAGE_RESIZE_H
385 382
383
384
385
386
387#ifdef STB_IMAGE_RESIZE_IMPLEMENTATION
388
389#ifndef STBIR_ASSERT
390#include <assert.h>
391#define STBIR_ASSERT(x) assert(x)
392#endif
393
394// For memset
395#include <string.h>
396
397#include <math.h>
398
399#ifndef STBIR_MALLOC
400#include <stdlib.h>
401// use comma operator to evaluate c, to avoid "unused parameter" warnings
402#define STBIR_MALLOC(size,c) ((void)(c), malloc(size))
403#define STBIR_FREE(ptr,c) ((void)(c), free(ptr))
404#endif
405
406#ifndef _MSC_VER
407#ifdef __cplusplus
408#define stbir__inline inline
409#else
410#define stbir__inline
411#endif
412#else
413#define stbir__inline __forceinline
414#endif
415
416
417// should produce compiler error if size is wrong
418typedef unsigned char stbir__validate_uint32[sizeof(stbir_uint32) == 4 ? 1 : -1];
419
420#ifdef _MSC_VER
421#define STBIR__NOTUSED(v) (void)(v)
422#else
423#define STBIR__NOTUSED(v) (void)sizeof(v)
424#endif
425
426#define STBIR__ARRAY_SIZE(a) (sizeof((a))/sizeof((a)[0]))
427
428#ifndef STBIR_DEFAULT_FILTER_UPSAMPLE
429#define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_CATMULLROM
430#endif
431
432#ifndef STBIR_DEFAULT_FILTER_DOWNSAMPLE
433#define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_MITCHELL
434#endif
435
436#ifndef STBIR_PROGRESS_REPORT
437#define STBIR_PROGRESS_REPORT(float_0_to_1)
438#endif
439
440#ifndef STBIR_MAX_CHANNELS
441#define STBIR_MAX_CHANNELS 64
442#endif
443
444#if STBIR_MAX_CHANNELS > 65536
445#error "Too many channels; STBIR_MAX_CHANNELS must be no more than 65536."
446// because we store the indices in 16-bit variables
447#endif
448
449// This value is added to alpha just before premultiplication to avoid
450// zeroing out color values. It is equivalent to 2^-80. If you don't want
451// that behavior (it may interfere if you have floating point images with
452// very small alpha values) then you can define STBIR_NO_ALPHA_EPSILON to
453// disable it.
454#ifndef STBIR_ALPHA_EPSILON
455#define STBIR_ALPHA_EPSILON ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20))
456#endif
457
458
459
460#ifdef _MSC_VER
461#define STBIR__UNUSED_PARAM(v) (void)(v)
462#else
463#define STBIR__UNUSED_PARAM(v) (void)sizeof(v)
464#endif
465
466// must match stbir_datatype
467static unsigned char stbir__type_size[] = {
468 1, // STBIR_TYPE_UINT8
469 2, // STBIR_TYPE_UINT16
470 4, // STBIR_TYPE_UINT32
471 4, // STBIR_TYPE_FLOAT
472};
473
474// Kernel function centered at 0
475typedef float (stbir__kernel_fn)(float x, float scale);
476typedef float (stbir__support_fn)(float scale);
477
478typedef struct
479{
480 stbir__kernel_fn* kernel;
481 stbir__support_fn* support;
482} stbir__filter_info;
483
484// When upsampling, the contributors are which source pixels contribute.
485// When downsampling, the contributors are which destination pixels are contributed to.
486typedef struct
487{
488 int n0; // First contributing pixel
489 int n1; // Last contributing pixel
490} stbir__contributors;
491
492typedef struct
493{
494 const void* input_data;
495 int input_w;
496 int input_h;
497 int input_stride_bytes;
498
499 void* output_data;
500 int output_w;
501 int output_h;
502 int output_stride_bytes;
503
504 float s0, t0, s1, t1;
505
506 float horizontal_shift; // Units: output pixels
507 float vertical_shift; // Units: output pixels
508 float horizontal_scale;
509 float vertical_scale;
510
511 int channels;
512 int alpha_channel;
513 stbir_uint32 flags;
514 stbir_datatype type;
515 stbir_filter horizontal_filter;
516 stbir_filter vertical_filter;
517 stbir_edge edge_horizontal;
518 stbir_edge edge_vertical;
519 stbir_colorspace colorspace;
520
521 stbir__contributors* horizontal_contributors;
522 float* horizontal_coefficients;
523
524 stbir__contributors* vertical_contributors;
525 float* vertical_coefficients;
526
527 int decode_buffer_pixels;
528 float* decode_buffer;
529
530 float* horizontal_buffer;
531
532 // cache these because ceil/floor are inexplicably showing up in profile
533 int horizontal_coefficient_width;
534 int vertical_coefficient_width;
535 int horizontal_filter_pixel_width;
536 int vertical_filter_pixel_width;
537 int horizontal_filter_pixel_margin;
538 int vertical_filter_pixel_margin;
539 int horizontal_num_contributors;
540 int vertical_num_contributors;
541
542 int ring_buffer_length_bytes; // The length of an individual entry in the ring buffer. The total number of ring buffers is stbir__get_filter_pixel_width(filter)
543 int ring_buffer_num_entries; // Total number of entries in the ring buffer.
544 int ring_buffer_first_scanline;
545 int ring_buffer_last_scanline;
546 int ring_buffer_begin_index; // first_scanline is at this index in the ring buffer
547 float* ring_buffer;
548
549 float* encode_buffer; // A temporary buffer to store floats so we don't lose precision while we do multiply-adds.
550
551 int horizontal_contributors_size;
552 int horizontal_coefficients_size;
553 int vertical_contributors_size;
554 int vertical_coefficients_size;
555 int decode_buffer_size;
556 int horizontal_buffer_size;
557 int ring_buffer_size;
558 int encode_buffer_size;
559} stbir__info;
560
561
562static const float stbir__max_uint8_as_float = 255.0f;
563static const float stbir__max_uint16_as_float = 65535.0f;
564static const double stbir__max_uint32_as_float = 4294967295.0;
565
566
567static stbir__inline int stbir__min(int a, int b)
568{
569 return a < b ? a : b;
570}
571
572static stbir__inline float stbir__saturate(float x)
573{
574 if (x < 0)
575 return 0;
576
577 if (x > 1)
578 return 1;
579
580 return x;
581}
582
583#ifdef STBIR_SATURATE_INT
584static stbir__inline stbir_uint8 stbir__saturate8(int x)
585{
586 if ((unsigned int) x <= 255)
587 return x;
588
589 if (x < 0)
590 return 0;
591
592 return 255;
593}
594
595static stbir__inline stbir_uint16 stbir__saturate16(int x)
596{
597 if ((unsigned int) x <= 65535)
598 return x;
599
600 if (x < 0)
601 return 0;
602
603 return 65535;
604}
605#endif
606
607static float stbir__srgb_uchar_to_linear_float[256] = {
608 0.000000f, 0.000304f, 0.000607f, 0.000911f, 0.001214f, 0.001518f, 0.001821f, 0.002125f, 0.002428f, 0.002732f, 0.003035f,
609 0.003347f, 0.003677f, 0.004025f, 0.004391f, 0.004777f, 0.005182f, 0.005605f, 0.006049f, 0.006512f, 0.006995f, 0.007499f,
610 0.008023f, 0.008568f, 0.009134f, 0.009721f, 0.010330f, 0.010960f, 0.011612f, 0.012286f, 0.012983f, 0.013702f, 0.014444f,
611 0.015209f, 0.015996f, 0.016807f, 0.017642f, 0.018500f, 0.019382f, 0.020289f, 0.021219f, 0.022174f, 0.023153f, 0.024158f,
612 0.025187f, 0.026241f, 0.027321f, 0.028426f, 0.029557f, 0.030713f, 0.031896f, 0.033105f, 0.034340f, 0.035601f, 0.036889f,
613 0.038204f, 0.039546f, 0.040915f, 0.042311f, 0.043735f, 0.045186f, 0.046665f, 0.048172f, 0.049707f, 0.051269f, 0.052861f,
614 0.054480f, 0.056128f, 0.057805f, 0.059511f, 0.061246f, 0.063010f, 0.064803f, 0.066626f, 0.068478f, 0.070360f, 0.072272f,
615 0.074214f, 0.076185f, 0.078187f, 0.080220f, 0.082283f, 0.084376f, 0.086500f, 0.088656f, 0.090842f, 0.093059f, 0.095307f,
616 0.097587f, 0.099899f, 0.102242f, 0.104616f, 0.107023f, 0.109462f, 0.111932f, 0.114435f, 0.116971f, 0.119538f, 0.122139f,
617 0.124772f, 0.127438f, 0.130136f, 0.132868f, 0.135633f, 0.138432f, 0.141263f, 0.144128f, 0.147027f, 0.149960f, 0.152926f,
618 0.155926f, 0.158961f, 0.162029f, 0.165132f, 0.168269f, 0.171441f, 0.174647f, 0.177888f, 0.181164f, 0.184475f, 0.187821f,
619 0.191202f, 0.194618f, 0.198069f, 0.201556f, 0.205079f, 0.208637f, 0.212231f, 0.215861f, 0.219526f, 0.223228f, 0.226966f,
620 0.230740f, 0.234551f, 0.238398f, 0.242281f, 0.246201f, 0.250158f, 0.254152f, 0.258183f, 0.262251f, 0.266356f, 0.270498f,
621 0.274677f, 0.278894f, 0.283149f, 0.287441f, 0.291771f, 0.296138f, 0.300544f, 0.304987f, 0.309469f, 0.313989f, 0.318547f,
622 0.323143f, 0.327778f, 0.332452f, 0.337164f, 0.341914f, 0.346704f, 0.351533f, 0.356400f, 0.361307f, 0.366253f, 0.371238f,
623 0.376262f, 0.381326f, 0.386430f, 0.391573f, 0.396755f, 0.401978f, 0.407240f, 0.412543f, 0.417885f, 0.423268f, 0.428691f,
624 0.434154f, 0.439657f, 0.445201f, 0.450786f, 0.456411f, 0.462077f, 0.467784f, 0.473532f, 0.479320f, 0.485150f, 0.491021f,
625 0.496933f, 0.502887f, 0.508881f, 0.514918f, 0.520996f, 0.527115f, 0.533276f, 0.539480f, 0.545725f, 0.552011f, 0.558340f,
626 0.564712f, 0.571125f, 0.577581f, 0.584078f, 0.590619f, 0.597202f, 0.603827f, 0.610496f, 0.617207f, 0.623960f, 0.630757f,
627 0.637597f, 0.644480f, 0.651406f, 0.658375f, 0.665387f, 0.672443f, 0.679543f, 0.686685f, 0.693872f, 0.701102f, 0.708376f,
628 0.715694f, 0.723055f, 0.730461f, 0.737911f, 0.745404f, 0.752942f, 0.760525f, 0.768151f, 0.775822f, 0.783538f, 0.791298f,
629 0.799103f, 0.806952f, 0.814847f, 0.822786f, 0.830770f, 0.838799f, 0.846873f, 0.854993f, 0.863157f, 0.871367f, 0.879622f,
630 0.887923f, 0.896269f, 0.904661f, 0.913099f, 0.921582f, 0.930111f, 0.938686f, 0.947307f, 0.955974f, 0.964686f, 0.973445f,
631 0.982251f, 0.991102f, 1.0f
632};
633
634static float stbir__srgb_to_linear(float f)
635{
636 if (f <= 0.04045f)
637 return f / 12.92f;
638 else
639 return (float)pow((f + 0.055f) / 1.055f, 2.4f);
640}
641
642static float stbir__linear_to_srgb(float f)
643{
644 if (f <= 0.0031308f)
645 return f * 12.92f;
646 else
647 return 1.055f * (float)pow(f, 1 / 2.4f) - 0.055f;
648}
649
650#ifndef STBIR_NON_IEEE_FLOAT
651// From https://gist.github.com/rygorous/2203834
652
653typedef union
654{
655 stbir_uint32 u;
656 float f;
657} stbir__FP32;
658
659static const stbir_uint32 fp32_to_srgb8_tab4[104] = {
660 0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d, 0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d,
661 0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a, 0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a,
662 0x010e0033, 0x01280033, 0x01410033, 0x015b0033, 0x01750033, 0x018f0033, 0x01a80033, 0x01c20033,
663 0x01dc0067, 0x020f0067, 0x02430067, 0x02760067, 0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067,
664 0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce, 0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5,
665 0x06970158, 0x07420142, 0x07e30130, 0x087b0120, 0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2,
666 0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180, 0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143,
667 0x11070264, 0x1238023e, 0x1357021d, 0x14660201, 0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af,
668 0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad, 0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240,
669 0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392, 0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300,
670 0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5, 0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401,
671 0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d, 0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559,
672 0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f, 0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723,
673};
674
675static stbir_uint8 stbir__linear_to_srgb_uchar(float in)
676{
677 static const stbir__FP32 almostone = { 0x3f7fffff }; // 1-eps
678 static const stbir__FP32 minval = { (127-13) << 23 };
679 stbir_uint32 tab,bias,scale,t;
680 stbir__FP32 f;
681
682 // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
683 // The tests are carefully written so that NaNs map to 0, same as in the reference
684 // implementation.
685 if (!(in > minval.f)) // written this way to catch NaNs
686 in = minval.f;
687 if (in > almostone.f)
688 in = almostone.f;
689
690 // Do the table lookup and unpack bias, scale
691 f.f = in;
692 tab = fp32_to_srgb8_tab4[(f.u - minval.u) >> 20];
693 bias = (tab >> 16) << 9;
694 scale = tab & 0xffff;
695
696 // Grab next-highest mantissa bits and perform linear interpolation
697 t = (f.u >> 12) & 0xff;
698 return (unsigned char) ((bias + scale*t) >> 16);
699}
700
701#else
702// sRGB transition values, scaled by 1<<28
703static int stbir__srgb_offset_to_linear_scaled[256] =
704{
705 0, 40738, 122216, 203693, 285170, 366648, 448125, 529603,
706 611080, 692557, 774035, 855852, 942009, 1033024, 1128971, 1229926,
707 1335959, 1447142, 1563542, 1685229, 1812268, 1944725, 2082664, 2226148,
708 2375238, 2529996, 2690481, 2856753, 3028870, 3206888, 3390865, 3580856,
709 3776916, 3979100, 4187460, 4402049, 4622919, 4850123, 5083710, 5323731,
710 5570236, 5823273, 6082892, 6349140, 6622065, 6901714, 7188133, 7481369,
711 7781466, 8088471, 8402427, 8723380, 9051372, 9386448, 9728650, 10078021,
712 10434603, 10798439, 11169569, 11548036, 11933879, 12327139, 12727857, 13136073,
713 13551826, 13975156, 14406100, 14844697, 15290987, 15745007, 16206795, 16676389,
714 17153826, 17639142, 18132374, 18633560, 19142734, 19659934, 20185196, 20718552,
715 21260042, 21809696, 22367554, 22933648, 23508010, 24090680, 24681686, 25281066,
716 25888850, 26505076, 27129772, 27762974, 28404716, 29055026, 29713942, 30381490,
717 31057708, 31742624, 32436272, 33138682, 33849884, 34569912, 35298800, 36036568,
718 36783260, 37538896, 38303512, 39077136, 39859796, 40651528, 41452360, 42262316,
719 43081432, 43909732, 44747252, 45594016, 46450052, 47315392, 48190064, 49074096,
720 49967516, 50870356, 51782636, 52704392, 53635648, 54576432, 55526772, 56486700,
721 57456236, 58435408, 59424248, 60422780, 61431036, 62449032, 63476804, 64514376,
722 65561776, 66619028, 67686160, 68763192, 69850160, 70947088, 72053992, 73170912,
723 74297864, 75434880, 76581976, 77739184, 78906536, 80084040, 81271736, 82469648,
724 83677792, 84896192, 86124888, 87363888, 88613232, 89872928, 91143016, 92423512,
725 93714432, 95015816, 96327688, 97650056, 98982952, 100326408, 101680440, 103045072,
726 104420320, 105806224, 107202800, 108610064, 110028048, 111456776, 112896264, 114346544,
727 115807632, 117279552, 118762328, 120255976, 121760536, 123276016, 124802440, 126339832,
728 127888216, 129447616, 131018048, 132599544, 134192112, 135795792, 137410592, 139036528,
729 140673648, 142321952, 143981456, 145652208, 147334208, 149027488, 150732064, 152447968,
730 154175200, 155913792, 157663776, 159425168, 161197984, 162982240, 164777968, 166585184,
731 168403904, 170234160, 172075968, 173929344, 175794320, 177670896, 179559120, 181458992,
732 183370528, 185293776, 187228736, 189175424, 191133888, 193104112, 195086128, 197079968,
733 199085648, 201103184, 203132592, 205173888, 207227120, 209292272, 211369392, 213458480,
734 215559568, 217672656, 219797792, 221934976, 224084240, 226245600, 228419056, 230604656,
735 232802400, 235012320, 237234432, 239468736, 241715280, 243974080, 246245120, 248528464,
736 250824112, 253132064, 255452368, 257785040, 260130080, 262487520, 264857376, 267239664,
737};
738
739static stbir_uint8 stbir__linear_to_srgb_uchar(float f)
740{
741 int x = (int) (f * (1 << 28)); // has headroom so you don't need to clamp
742 int v = 0;
743 int i;
744
745 // Refine the guess with a short binary search.
746 i = v + 128; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
747 i = v + 64; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
748 i = v + 32; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
749 i = v + 16; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
750 i = v + 8; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
751 i = v + 4; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
752 i = v + 2; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
753 i = v + 1; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
754
755 return (stbir_uint8) v;
756}
757#endif
758
759static float stbir__filter_trapezoid(float x, float scale)
760{
761 float halfscale = scale / 2;
762 float t = 0.5f + halfscale;
763 STBIR_ASSERT(scale <= 1);
764
765 x = (float)fabs(x);
766
767 if (x >= t)
768 return 0;
769 else
770 {
771 float r = 0.5f - halfscale;
772 if (x <= r)
773 return 1;
774 else
775 return (t - x) / scale;
776 }
777}
778
779static float stbir__support_trapezoid(float scale)
780{
781 STBIR_ASSERT(scale <= 1);
782 return 0.5f + scale / 2;
783}
784
785static float stbir__filter_triangle(float x, float s)
786{
787 STBIR__UNUSED_PARAM(s);
788
789 x = (float)fabs(x);
790
791 if (x <= 1.0f)
792 return 1 - x;
793 else
794 return 0;
795}
796
797static float stbir__filter_cubic(float x, float s)
798{
799 STBIR__UNUSED_PARAM(s);
800
801 x = (float)fabs(x);
802
803 if (x < 1.0f)
804 return (4 + x*x*(3*x - 6))/6;
805 else if (x < 2.0f)
806 return (8 + x*(-12 + x*(6 - x)))/6;
807
808 return (0.0f);
809}
810
811static float stbir__filter_catmullrom(float x, float s)
812{
813 STBIR__UNUSED_PARAM(s);
814
815 x = (float)fabs(x);
816
817 if (x < 1.0f)
818 return 1 - x*x*(2.5f - 1.5f*x);
819 else if (x < 2.0f)
820 return 2 - x*(4 + x*(0.5f*x - 2.5f));
821
822 return (0.0f);
823}
824
825static float stbir__filter_mitchell(float x, float s)
826{
827 STBIR__UNUSED_PARAM(s);
828
829 x = (float)fabs(x);
830
831 if (x < 1.0f)
832 return (16 + x*x*(21 * x - 36))/18;
833 else if (x < 2.0f)
834 return (32 + x*(-60 + x*(36 - 7*x)))/18;
835
836 return (0.0f);
837}
838
839static float stbir__support_zero(float s)
840{
841 STBIR__UNUSED_PARAM(s);
842 return 0;
843}
844
845static float stbir__support_one(float s)
846{
847 STBIR__UNUSED_PARAM(s);
848 return 1;
849}
850
851static float stbir__support_two(float s)
852{
853 STBIR__UNUSED_PARAM(s);
854 return 2;
855}
856
857static stbir__filter_info stbir__filter_info_table[] = {
858 { NULL, stbir__support_zero },
859 { stbir__filter_trapezoid, stbir__support_trapezoid },
860 { stbir__filter_triangle, stbir__support_one },
861 { stbir__filter_cubic, stbir__support_two },
862 { stbir__filter_catmullrom, stbir__support_two },
863 { stbir__filter_mitchell, stbir__support_two },
864};
865
866stbir__inline static int stbir__use_upsampling(float ratio)
867{
868 return ratio > 1;
869}
870
871stbir__inline static int stbir__use_width_upsampling(stbir__info* stbir_info)
872{
873 return stbir__use_upsampling(stbir_info->horizontal_scale);
874}
875
876stbir__inline static int stbir__use_height_upsampling(stbir__info* stbir_info)
877{
878 return stbir__use_upsampling(stbir_info->vertical_scale);
879}
880
881// This is the maximum number of input samples that can affect an output sample
882// with the given filter
883static int stbir__get_filter_pixel_width(stbir_filter filter, float scale)
884{
885 STBIR_ASSERT(filter != 0);
886 STBIR_ASSERT(filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
887
888 if (stbir__use_upsampling(scale))
889 return (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2);
890 else
891 return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2 / scale);
892}
893
894// This is how much to expand buffers to account for filters seeking outside
895// the image boundaries.
896static int stbir__get_filter_pixel_margin(stbir_filter filter, float scale)
897{
898 return stbir__get_filter_pixel_width(filter, scale) / 2;
899}
900
901static int stbir__get_coefficient_width(stbir_filter filter, float scale)
902{
903 if (stbir__use_upsampling(scale))
904 return (int)ceil(stbir__filter_info_table[filter].support(1 / scale) * 2);
905 else
906 return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2);
907}
908
909static int stbir__get_contributors(float scale, stbir_filter filter, int input_size, int output_size)
910{
911 if (stbir__use_upsampling(scale))
912 return output_size;
913 else
914 return (input_size + stbir__get_filter_pixel_margin(filter, scale) * 2);
915}
916
917static int stbir__get_total_horizontal_coefficients(stbir__info* info)
918{
919 return info->horizontal_num_contributors
920 * stbir__get_coefficient_width (info->horizontal_filter, info->horizontal_scale);
921}
922
923static int stbir__get_total_vertical_coefficients(stbir__info* info)
924{
925 return info->vertical_num_contributors
926 * stbir__get_coefficient_width (info->vertical_filter, info->vertical_scale);
927}
928
929static stbir__contributors* stbir__get_contributor(stbir__contributors* contributors, int n)
930{
931 return &contributors[n];
932}
933
934// For perf reasons this code is duplicated in stbir__resample_horizontal_upsample/downsample,
935// if you change it here change it there too.
936static float* stbir__get_coefficient(float* coefficients, stbir_filter filter, float scale, int n, int c)
937{
938 int width = stbir__get_coefficient_width(filter, scale);
939 return &coefficients[width*n + c];
940}
941
942static int stbir__edge_wrap_slow(stbir_edge edge, int n, int max)
943{
944 switch (edge)
945 {
946 case STBIR_EDGE_ZERO:
947 return 0; // we'll decode the wrong pixel here, and then overwrite with 0s later
948
949 case STBIR_EDGE_CLAMP:
950 if (n < 0)
951 return 0;
952
953 if (n >= max)
954 return max - 1;
955
956 return n; // NOTREACHED
957
958 case STBIR_EDGE_REFLECT:
959 {
960 if (n < 0)
961 {
962 if (n < max)
963 return -n;
964 else
965 return max - 1;
966 }
967
968 if (n >= max)
969 {
970 int max2 = max * 2;
971 if (n >= max2)
972 return 0;
973 else
974 return max2 - n - 1;
975 }
976
977 return n; // NOTREACHED
978 }
979
980 case STBIR_EDGE_WRAP:
981 if (n >= 0)
982 return (n % max);
983 else
984 {
985 int m = (-n) % max;
986
987 if (m != 0)
988 m = max - m;
989
990 return (m);
991 }
992 // NOTREACHED
993
994 default:
995 STBIR_ASSERT(!"Unimplemented edge type");
996 return 0;
997 }
998}
999
1000stbir__inline static int stbir__edge_wrap(stbir_edge edge, int n, int max)
1001{
1002 // avoid per-pixel switch
1003 if (n >= 0 && n < max)
1004 return n;
1005 return stbir__edge_wrap_slow(edge, n, max);
1006}
1007
1008// What input pixels contribute to this output pixel?
1009static void stbir__calculate_sample_range_upsample(int n, float out_filter_radius, float scale_ratio, float out_shift, int* in_first_pixel, int* in_last_pixel, float* in_center_of_out)
1010{
1011 float out_pixel_center = (float)n + 0.5f;
1012 float out_pixel_influence_lowerbound = out_pixel_center - out_filter_radius;
1013 float out_pixel_influence_upperbound = out_pixel_center + out_filter_radius;
1014
1015 float in_pixel_influence_lowerbound = (out_pixel_influence_lowerbound + out_shift) / scale_ratio;
1016 float in_pixel_influence_upperbound = (out_pixel_influence_upperbound + out_shift) / scale_ratio;
1017
1018 *in_center_of_out = (out_pixel_center + out_shift) / scale_ratio;
1019 *in_first_pixel = (int)(floor(in_pixel_influence_lowerbound + 0.5));
1020 *in_last_pixel = (int)(floor(in_pixel_influence_upperbound - 0.5));
1021}
1022
1023// What output pixels does this input pixel contribute to?
1024static void stbir__calculate_sample_range_downsample(int n, float in_pixels_radius, float scale_ratio, float out_shift, int* out_first_pixel, int* out_last_pixel, float* out_center_of_in)
1025{
1026 float in_pixel_center = (float)n + 0.5f;
1027 float in_pixel_influence_lowerbound = in_pixel_center - in_pixels_radius;
1028 float in_pixel_influence_upperbound = in_pixel_center + in_pixels_radius;
1029
1030 float out_pixel_influence_lowerbound = in_pixel_influence_lowerbound * scale_ratio - out_shift;
1031 float out_pixel_influence_upperbound = in_pixel_influence_upperbound * scale_ratio - out_shift;
1032
1033 *out_center_of_in = in_pixel_center * scale_ratio - out_shift;
1034 *out_first_pixel = (int)(floor(out_pixel_influence_lowerbound + 0.5));
1035 *out_last_pixel = (int)(floor(out_pixel_influence_upperbound - 0.5));
1036}
1037
1038static void stbir__calculate_coefficients_upsample(stbir_filter filter, float scale, int in_first_pixel, int in_last_pixel, float in_center_of_out, stbir__contributors* contributor, float* coefficient_group)
1039{
1040 int i;
1041 float total_filter = 0;
1042 float filter_scale;
1043
1044 STBIR_ASSERT(in_last_pixel - in_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical.
1045
1046 contributor->n0 = in_first_pixel;
1047 contributor->n1 = in_last_pixel;
1048
1049 STBIR_ASSERT(contributor->n1 >= contributor->n0);
1050
1051 for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
1052 {
1053 float in_pixel_center = (float)(i + in_first_pixel) + 0.5f;
1054 coefficient_group[i] = stbir__filter_info_table[filter].kernel(in_center_of_out - in_pixel_center, 1 / scale);
1055
1056 // If the coefficient is zero, skip it. (Don't do the <0 check here, we want the influence of those outside pixels.)
1057 if (i == 0 && !coefficient_group[i])
1058 {
1059 contributor->n0 = ++in_first_pixel;
1060 i--;
1061 continue;
1062 }
1063
1064 total_filter += coefficient_group[i];
1065 }
1066
1067 // NOTE(fg): Not actually true in general, nor is there any reason to expect it should be.
1068 // It would be true in exact math but is at best approximately true in floating-point math,
1069 // and it would not make sense to try and put actual bounds on this here because it depends
1070 // on the image aspect ratio which can get pretty extreme.
1071 //STBIR_ASSERT(stbir__filter_info_table[filter].kernel((float)(in_last_pixel + 1) + 0.5f - in_center_of_out, 1/scale) == 0);
1072
1073 STBIR_ASSERT(total_filter > 0.9);
1074 STBIR_ASSERT(total_filter < 1.1f); // Make sure it's not way off.
1075
1076 // Make sure the sum of all coefficients is 1.
1077 filter_scale = 1 / total_filter;
1078
1079 for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
1080 coefficient_group[i] *= filter_scale;
1081
1082 for (i = in_last_pixel - in_first_pixel; i >= 0; i--)
1083 {
1084 if (coefficient_group[i])
1085 break;
1086
1087 // This line has no weight. We can skip it.
1088 contributor->n1 = contributor->n0 + i - 1;
1089 }
1090}
1091
1092static void stbir__calculate_coefficients_downsample(stbir_filter filter, float scale_ratio, int out_first_pixel, int out_last_pixel, float out_center_of_in, stbir__contributors* contributor, float* coefficient_group)
1093{
1094 int i;
1095
1096 STBIR_ASSERT(out_last_pixel - out_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(scale_ratio) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical.
1097
1098 contributor->n0 = out_first_pixel;
1099 contributor->n1 = out_last_pixel;
1100
1101 STBIR_ASSERT(contributor->n1 >= contributor->n0);
1102
1103 for (i = 0; i <= out_last_pixel - out_first_pixel; i++)
1104 {
1105 float out_pixel_center = (float)(i + out_first_pixel) + 0.5f;
1106 float x = out_pixel_center - out_center_of_in;
1107 coefficient_group[i] = stbir__filter_info_table[filter].kernel(x, scale_ratio) * scale_ratio;
1108 }
1109
1110 // NOTE(fg): Not actually true in general, nor is there any reason to expect it should be.
1111 // It would be true in exact math but is at best approximately true in floating-point math,
1112 // and it would not make sense to try and put actual bounds on this here because it depends
1113 // on the image aspect ratio which can get pretty extreme.
1114 //STBIR_ASSERT(stbir__filter_info_table[filter].kernel((float)(out_last_pixel + 1) + 0.5f - out_center_of_in, scale_ratio) == 0);
1115
1116 for (i = out_last_pixel - out_first_pixel; i >= 0; i--)
1117 {
1118 if (coefficient_group[i])
1119 break;
1120
1121 // This line has no weight. We can skip it.
1122 contributor->n1 = contributor->n0 + i - 1;
1123 }
1124}
1125
1126static void stbir__normalize_downsample_coefficients(stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, int input_size, int output_size)
1127{
1128 int num_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size);
1129 int num_coefficients = stbir__get_coefficient_width(filter, scale_ratio);
1130 int i, j;
1131 int skip;
1132
1133 for (i = 0; i < output_size; i++)
1134 {
1135 float scale;
1136 float total = 0;
1137
1138 for (j = 0; j < num_contributors; j++)
1139 {
1140 if (i >= contributors[j].n0 && i <= contributors[j].n1)
1141 {
1142 float coefficient = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0);
1143 total += coefficient;
1144 }
1145 else if (i < contributors[j].n0)
1146 break;
1147 }
1148
1149 STBIR_ASSERT(total > 0.9f);
1150 STBIR_ASSERT(total < 1.1f);
1151
1152 scale = 1 / total;
1153
1154 for (j = 0; j < num_contributors; j++)
1155 {
1156 if (i >= contributors[j].n0 && i <= contributors[j].n1)
1157 *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0) *= scale;
1158 else if (i < contributors[j].n0)
1159 break;
1160 }
1161 }
1162
1163 // Optimize: Skip zero coefficients and contributions outside of image bounds.
1164 // Do this after normalizing because normalization depends on the n0/n1 values.
1165 for (j = 0; j < num_contributors; j++)
1166 {
1167 int range, max, width;
1168
1169 skip = 0;
1170 while (*stbir__get_coefficient(coefficients, filter, scale_ratio, j, skip) == 0)
1171 skip++;
1172
1173 contributors[j].n0 += skip;
1174
1175 while (contributors[j].n0 < 0)
1176 {
1177 contributors[j].n0++;
1178 skip++;
1179 }
1180
1181 range = contributors[j].n1 - contributors[j].n0 + 1;
1182 max = stbir__min(num_coefficients, range);
1183
1184 width = stbir__get_coefficient_width(filter, scale_ratio);
1185 for (i = 0; i < max; i++)
1186 {
1187 if (i + skip >= width)
1188 break;
1189
1190 *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i) = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i + skip);
1191 }
1192
1193 continue;
1194 }
1195
1196 // Using min to avoid writing into invalid pixels.
1197 for (i = 0; i < num_contributors; i++)
1198 contributors[i].n1 = stbir__min(contributors[i].n1, output_size - 1);
1199}
1200
1201// Each scan line uses the same kernel values so we should calculate the kernel
1202// values once and then we can use them for every scan line.
1203static void stbir__calculate_filters(stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, float shift, int input_size, int output_size)
1204{
1205 int n;
1206 int total_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size);
1207
1208 if (stbir__use_upsampling(scale_ratio))
1209 {
1210 float out_pixels_radius = stbir__filter_info_table[filter].support(1 / scale_ratio) * scale_ratio;
1211
1212 // Looping through out pixels
1213 for (n = 0; n < total_contributors; n++)
1214 {
1215 float in_center_of_out; // Center of the current out pixel in the in pixel space
1216 int in_first_pixel, in_last_pixel;
1217
1218 stbir__calculate_sample_range_upsample(n, out_pixels_radius, scale_ratio, shift, &in_first_pixel, &in_last_pixel, &in_center_of_out);
1219
1220 stbir__calculate_coefficients_upsample(filter, scale_ratio, in_first_pixel, in_last_pixel, in_center_of_out, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0));
1221 }
1222 }
1223 else
1224 {
1225 float in_pixels_radius = stbir__filter_info_table[filter].support(scale_ratio) / scale_ratio;
1226
1227 // Looping through in pixels
1228 for (n = 0; n < total_contributors; n++)
1229 {
1230 float out_center_of_in; // Center of the current out pixel in the in pixel space
1231 int out_first_pixel, out_last_pixel;
1232 int n_adjusted = n - stbir__get_filter_pixel_margin(filter, scale_ratio);
1233
1234 stbir__calculate_sample_range_downsample(n_adjusted, in_pixels_radius, scale_ratio, shift, &out_first_pixel, &out_last_pixel, &out_center_of_in);
1235
1236 stbir__calculate_coefficients_downsample(filter, scale_ratio, out_first_pixel, out_last_pixel, out_center_of_in, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0));
1237 }
1238
1239 stbir__normalize_downsample_coefficients(contributors, coefficients, filter, scale_ratio, input_size, output_size);
1240 }
1241}
1242
1243static float* stbir__get_decode_buffer(stbir__info* stbir_info)
1244{
1245 // The 0 index of the decode buffer starts after the margin. This makes
1246 // it okay to use negative indexes on the decode buffer.
1247 return &stbir_info->decode_buffer[stbir_info->horizontal_filter_pixel_margin * stbir_info->channels];
1248}
1249
1250#define STBIR__DECODE(type, colorspace) ((int)(type) * (STBIR_MAX_COLORSPACES) + (int)(colorspace))
1251
1252static void stbir__decode_scanline(stbir__info* stbir_info, int n)
1253{
1254 int c;
1255 int channels = stbir_info->channels;
1256 int alpha_channel = stbir_info->alpha_channel;
1257 int type = stbir_info->type;
1258 int colorspace = stbir_info->colorspace;
1259 int input_w = stbir_info->input_w;
1260 size_t input_stride_bytes = stbir_info->input_stride_bytes;
1261 float* decode_buffer = stbir__get_decode_buffer(stbir_info);
1262 stbir_edge edge_horizontal = stbir_info->edge_horizontal;
1263 stbir_edge edge_vertical = stbir_info->edge_vertical;
1264 size_t in_buffer_row_offset = stbir__edge_wrap(edge_vertical, n, stbir_info->input_h) * input_stride_bytes;
1265 const void* input_data = (char *) stbir_info->input_data + in_buffer_row_offset;
1266 int max_x = input_w + stbir_info->horizontal_filter_pixel_margin;
1267 int decode = STBIR__DECODE(type, colorspace);
1268
1269 int x = -stbir_info->horizontal_filter_pixel_margin;
1270
1271 // special handling for STBIR_EDGE_ZERO because it needs to return an item that doesn't appear in the input,
1272 // and we want to avoid paying overhead on every pixel if not STBIR_EDGE_ZERO
1273 if (edge_vertical == STBIR_EDGE_ZERO && (n < 0 || n >= stbir_info->input_h))
1274 {
1275 for (; x < max_x; x++)
1276 for (c = 0; c < channels; c++)
1277 decode_buffer[x*channels + c] = 0;
1278 return;
1279 }
1280
1281 switch (decode)
1282 {
1283 case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR):
1284 for (; x < max_x; x++)
1285 {
1286 int decode_pixel_index = x * channels;
1287 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1288 for (c = 0; c < channels; c++)
1289 decode_buffer[decode_pixel_index + c] = ((float)((const unsigned char*)input_data)[input_pixel_index + c]) / stbir__max_uint8_as_float;
1290 }
1291 break;
1292
1293 case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB):
1294 for (; x < max_x; x++)
1295 {
1296 int decode_pixel_index = x * channels;
1297 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1298 for (c = 0; c < channels; c++)
1299 decode_buffer[decode_pixel_index + c] = stbir__srgb_uchar_to_linear_float[((const unsigned char*)input_data)[input_pixel_index + c]];
1300
1301 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1302 decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned char*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint8_as_float;
1303 }
1304 break;
1305
1306 case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR):
1307 for (; x < max_x; x++)
1308 {
1309 int decode_pixel_index = x * channels;
1310 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1311 for (c = 0; c < channels; c++)
1312 decode_buffer[decode_pixel_index + c] = ((float)((const unsigned short*)input_data)[input_pixel_index + c]) / stbir__max_uint16_as_float;
1313 }
1314 break;
1315
1316 case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB):
1317 for (; x < max_x; x++)
1318 {
1319 int decode_pixel_index = x * channels;
1320 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1321 for (c = 0; c < channels; c++)
1322 decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((float)((const unsigned short*)input_data)[input_pixel_index + c]) / stbir__max_uint16_as_float);
1323
1324 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1325 decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned short*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint16_as_float;
1326 }
1327 break;
1328
1329 case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR):
1330 for (; x < max_x; x++)
1331 {
1332 int decode_pixel_index = x * channels;
1333 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1334 for (c = 0; c < channels; c++)
1335 decode_buffer[decode_pixel_index + c] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / stbir__max_uint32_as_float);
1336 }
1337 break;
1338
1339 case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB):
1340 for (; x < max_x; x++)
1341 {
1342 int decode_pixel_index = x * channels;
1343 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1344 for (c = 0; c < channels; c++)
1345 decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear((float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / stbir__max_uint32_as_float));
1346
1347 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1348 decode_buffer[decode_pixel_index + alpha_channel] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint32_as_float);
1349 }
1350 break;
1351
1352 case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR):
1353 for (; x < max_x; x++)
1354 {
1355 int decode_pixel_index = x * channels;
1356 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1357 for (c = 0; c < channels; c++)
1358 decode_buffer[decode_pixel_index + c] = ((const float*)input_data)[input_pixel_index + c];
1359 }
1360 break;
1361
1362 case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB):
1363 for (; x < max_x; x++)
1364 {
1365 int decode_pixel_index = x * channels;
1366 int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1367 for (c = 0; c < channels; c++)
1368 decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((const float*)input_data)[input_pixel_index + c]);
1369
1370 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1371 decode_buffer[decode_pixel_index + alpha_channel] = ((const float*)input_data)[input_pixel_index + alpha_channel];
1372 }
1373
1374 break;
1375
1376 default:
1377 STBIR_ASSERT(!"Unknown type/colorspace/channels combination.");
1378 break;
1379 }
1380
1381 if (!(stbir_info->flags & STBIR_FLAG_ALPHA_PREMULTIPLIED))
1382 {
1383 for (x = -stbir_info->horizontal_filter_pixel_margin; x < max_x; x++)
1384 {
1385 int decode_pixel_index = x * channels;
1386
1387 // If the alpha value is 0 it will clobber the color values. Make sure it's not.
1388 float alpha = decode_buffer[decode_pixel_index + alpha_channel];
1389#ifndef STBIR_NO_ALPHA_EPSILON
1390 if (stbir_info->type != STBIR_TYPE_FLOAT) {
1391 alpha += STBIR_ALPHA_EPSILON;
1392 decode_buffer[decode_pixel_index + alpha_channel] = alpha;
1393 }
1394#endif
1395 for (c = 0; c < channels; c++)
1396 {
1397 if (c == alpha_channel)
1398 continue;
1399
1400 decode_buffer[decode_pixel_index + c] *= alpha;
1401 }
1402 }
1403 }
1404
1405 if (edge_horizontal == STBIR_EDGE_ZERO)
1406 {
1407 for (x = -stbir_info->horizontal_filter_pixel_margin; x < 0; x++)
1408 {
1409 for (c = 0; c < channels; c++)
1410 decode_buffer[x*channels + c] = 0;
1411 }
1412 for (x = input_w; x < max_x; x++)
1413 {
1414 for (c = 0; c < channels; c++)
1415 decode_buffer[x*channels + c] = 0;
1416 }
1417 }
1418}
1419
1420static float* stbir__get_ring_buffer_entry(float* ring_buffer, int index, int ring_buffer_length)
1421{
1422 return &ring_buffer[index * ring_buffer_length];
1423}
1424
1425static float* stbir__add_empty_ring_buffer_entry(stbir__info* stbir_info, int n)
1426{
1427 int ring_buffer_index;
1428 float* ring_buffer;
1429
1430 stbir_info->ring_buffer_last_scanline = n;
1431
1432 if (stbir_info->ring_buffer_begin_index < 0)
1433 {
1434 ring_buffer_index = stbir_info->ring_buffer_begin_index = 0;
1435 stbir_info->ring_buffer_first_scanline = n;
1436 }
1437 else
1438 {
1439 ring_buffer_index = (stbir_info->ring_buffer_begin_index + (stbir_info->ring_buffer_last_scanline - stbir_info->ring_buffer_first_scanline)) % stbir_info->ring_buffer_num_entries;
1440 STBIR_ASSERT(ring_buffer_index != stbir_info->ring_buffer_begin_index);
1441 }
1442
1443 ring_buffer = stbir__get_ring_buffer_entry(stbir_info->ring_buffer, ring_buffer_index, stbir_info->ring_buffer_length_bytes / sizeof(float));
1444 memset(ring_buffer, 0, stbir_info->ring_buffer_length_bytes);
1445
1446 return ring_buffer;
1447}
1448
1449
1450static void stbir__resample_horizontal_upsample(stbir__info* stbir_info, float* output_buffer)
1451{
1452 int x, k;
1453 int output_w = stbir_info->output_w;
1454 int channels = stbir_info->channels;
1455 float* decode_buffer = stbir__get_decode_buffer(stbir_info);
1456 stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors;
1457 float* horizontal_coefficients = stbir_info->horizontal_coefficients;
1458 int coefficient_width = stbir_info->horizontal_coefficient_width;
1459
1460 for (x = 0; x < output_w; x++)
1461 {
1462 int n0 = horizontal_contributors[x].n0;
1463 int n1 = horizontal_contributors[x].n1;
1464
1465 int out_pixel_index = x * channels;
1466 int coefficient_group = coefficient_width * x;
1467 int coefficient_counter = 0;
1468
1469 STBIR_ASSERT(n1 >= n0);
1470 STBIR_ASSERT(n0 >= -stbir_info->horizontal_filter_pixel_margin);
1471 STBIR_ASSERT(n1 >= -stbir_info->horizontal_filter_pixel_margin);
1472 STBIR_ASSERT(n0 < stbir_info->input_w + stbir_info->horizontal_filter_pixel_margin);
1473 STBIR_ASSERT(n1 < stbir_info->input_w + stbir_info->horizontal_filter_pixel_margin);
1474
1475 switch (channels) {
1476 case 1:
1477 for (k = n0; k <= n1; k++)
1478 {
1479 int in_pixel_index = k * 1;
1480 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1481 STBIR_ASSERT(coefficient != 0);
1482 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1483 }
1484 break;
1485 case 2:
1486 for (k = n0; k <= n1; k++)
1487 {
1488 int in_pixel_index = k * 2;
1489 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1490 STBIR_ASSERT(coefficient != 0);
1491 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1492 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1493 }
1494 break;
1495 case 3:
1496 for (k = n0; k <= n1; k++)
1497 {
1498 int in_pixel_index = k * 3;
1499 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1500 STBIR_ASSERT(coefficient != 0);
1501 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1502 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1503 output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
1504 }
1505 break;
1506 case 4:
1507 for (k = n0; k <= n1; k++)
1508 {
1509 int in_pixel_index = k * 4;
1510 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1511 STBIR_ASSERT(coefficient != 0);
1512 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1513 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1514 output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
1515 output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient;
1516 }
1517 break;
1518 default:
1519 for (k = n0; k <= n1; k++)
1520 {
1521 int in_pixel_index = k * channels;
1522 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1523 int c;
1524 STBIR_ASSERT(coefficient != 0);
1525 for (c = 0; c < channels; c++)
1526 output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
1527 }
1528 break;
1529 }
1530 }
1531}
1532
1533static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, float* output_buffer)
1534{
1535 int x, k;
1536 int input_w = stbir_info->input_w;
1537 int channels = stbir_info->channels;
1538 float* decode_buffer = stbir__get_decode_buffer(stbir_info);
1539 stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors;
1540 float* horizontal_coefficients = stbir_info->horizontal_coefficients;
1541 int coefficient_width = stbir_info->horizontal_coefficient_width;
1542 int filter_pixel_margin = stbir_info->horizontal_filter_pixel_margin;
1543 int max_x = input_w + filter_pixel_margin * 2;
1544
1545 STBIR_ASSERT(!stbir__use_width_upsampling(stbir_info));
1546
1547 switch (channels) {
1548 case 1:
1549 for (x = 0; x < max_x; x++)
1550 {
1551 int n0 = horizontal_contributors[x].n0;
1552 int n1 = horizontal_contributors[x].n1;
1553
1554 int in_x = x - filter_pixel_margin;
1555 int in_pixel_index = in_x * 1;
1556 int max_n = n1;
1557 int coefficient_group = coefficient_width * x;
1558
1559 for (k = n0; k <= max_n; k++)
1560 {
1561 int out_pixel_index = k * 1;
1562 float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1563 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1564 }
1565 }
1566 break;
1567
1568 case 2:
1569 for (x = 0; x < max_x; x++)
1570 {
1571 int n0 = horizontal_contributors[x].n0;
1572 int n1 = horizontal_contributors[x].n1;
1573
1574 int in_x = x - filter_pixel_margin;
1575 int in_pixel_index = in_x * 2;
1576 int max_n = n1;
1577 int coefficient_group = coefficient_width * x;
1578
1579 for (k = n0; k <= max_n; k++)
1580 {
1581 int out_pixel_index = k * 2;
1582 float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1583 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1584 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1585 }
1586 }
1587 break;
1588
1589 case 3:
1590 for (x = 0; x < max_x; x++)
1591 {
1592 int n0 = horizontal_contributors[x].n0;
1593 int n1 = horizontal_contributors[x].n1;
1594
1595 int in_x = x - filter_pixel_margin;
1596 int in_pixel_index = in_x * 3;
1597 int max_n = n1;
1598 int coefficient_group = coefficient_width * x;
1599
1600 for (k = n0; k <= max_n; k++)
1601 {
1602 int out_pixel_index = k * 3;
1603 float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1604 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1605 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1606 output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
1607 }
1608 }
1609 break;
1610
1611 case 4:
1612 for (x = 0; x < max_x; x++)
1613 {
1614 int n0 = horizontal_contributors[x].n0;
1615 int n1 = horizontal_contributors[x].n1;
1616
1617 int in_x = x - filter_pixel_margin;
1618 int in_pixel_index = in_x * 4;
1619 int max_n = n1;
1620 int coefficient_group = coefficient_width * x;
1621
1622 for (k = n0; k <= max_n; k++)
1623 {
1624 int out_pixel_index = k * 4;
1625 float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1626 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1627 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1628 output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
1629 output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient;
1630 }
1631 }
1632 break;
1633
1634 default:
1635 for (x = 0; x < max_x; x++)
1636 {
1637 int n0 = horizontal_contributors[x].n0;
1638 int n1 = horizontal_contributors[x].n1;
1639
1640 int in_x = x - filter_pixel_margin;
1641 int in_pixel_index = in_x * channels;
1642 int max_n = n1;
1643 int coefficient_group = coefficient_width * x;
1644
1645 for (k = n0; k <= max_n; k++)
1646 {
1647 int c;
1648 int out_pixel_index = k * channels;
1649 float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1650 for (c = 0; c < channels; c++)
1651 output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
1652 }
1653 }
1654 break;
1655 }
1656}
1657
1658static void stbir__decode_and_resample_upsample(stbir__info* stbir_info, int n)
1659{
1660 // Decode the nth scanline from the source image into the decode buffer.
1661 stbir__decode_scanline(stbir_info, n);
1662
1663 // Now resample it into the ring buffer.
1664 if (stbir__use_width_upsampling(stbir_info))
1665 stbir__resample_horizontal_upsample(stbir_info, stbir__add_empty_ring_buffer_entry(stbir_info, n));
1666 else
1667 stbir__resample_horizontal_downsample(stbir_info, stbir__add_empty_ring_buffer_entry(stbir_info, n));
1668
1669 // Now it's sitting in the ring buffer ready to be used as source for the vertical sampling.
1670}
1671
1672static void stbir__decode_and_resample_downsample(stbir__info* stbir_info, int n)
1673{
1674 // Decode the nth scanline from the source image into the decode buffer.
1675 stbir__decode_scanline(stbir_info, n);
1676
1677 memset(stbir_info->horizontal_buffer, 0, stbir_info->output_w * stbir_info->channels * sizeof(float));
1678
1679 // Now resample it into the horizontal buffer.
1680 if (stbir__use_width_upsampling(stbir_info))
1681 stbir__resample_horizontal_upsample(stbir_info, stbir_info->horizontal_buffer);
1682 else
1683 stbir__resample_horizontal_downsample(stbir_info, stbir_info->horizontal_buffer);
1684
1685 // Now it's sitting in the horizontal buffer ready to be distributed into the ring buffers.
1686}
1687
1688// Get the specified scan line from the ring buffer.
1689static float* stbir__get_ring_buffer_scanline(int get_scanline, float* ring_buffer, int begin_index, int first_scanline, int ring_buffer_num_entries, int ring_buffer_length)
1690{
1691 int ring_buffer_index = (begin_index + (get_scanline - first_scanline)) % ring_buffer_num_entries;
1692 return stbir__get_ring_buffer_entry(ring_buffer, ring_buffer_index, ring_buffer_length);
1693}
1694
1695
1696static void stbir__encode_scanline(stbir__info* stbir_info, int num_pixels, void *output_buffer, float *encode_buffer, int channels, int alpha_channel, int decode)
1697{
1698 int x;
1699 int n;
1700 int num_nonalpha;
1701 stbir_uint16 nonalpha[STBIR_MAX_CHANNELS];
1702
1703 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_PREMULTIPLIED))
1704 {
1705 for (x=0; x < num_pixels; ++x)
1706 {
1707 int pixel_index = x*channels;
1708
1709 float alpha = encode_buffer[pixel_index + alpha_channel];
1710 float reciprocal_alpha = alpha ? 1.0f / alpha : 0;
1711
1712 // unrolling this produced a 1% slowdown upscaling a large RGBA linear-space image on my machine - stb
1713 for (n = 0; n < channels; n++)
1714 if (n != alpha_channel)
1715 encode_buffer[pixel_index + n] *= reciprocal_alpha;
1716
1717 // We added in a small epsilon to prevent the color channel from being deleted with zero alpha.
1718 // Because we only add it for integer types, it will automatically be discarded on integer
1719 // conversion, so we don't need to subtract it back out (which would be problematic for
1720 // numeric precision reasons).
1721 }
1722 }
1723
1724 // build a table of all channels that need colorspace correction, so
1725 // we don't perform colorspace correction on channels that don't need it.
1726 for (x = 0, num_nonalpha = 0; x < channels; ++x)
1727 {
1728 if (x != alpha_channel || (stbir_info->flags & STBIR_FLAG_ALPHA_USES_COLORSPACE))
1729 {
1730 nonalpha[num_nonalpha++] = (stbir_uint16)x;
1731 }
1732 }
1733
1734 #define STBIR__ROUND_INT(f) ((int) ((f)+0.5))
1735 #define STBIR__ROUND_UINT(f) ((stbir_uint32) ((f)+0.5))
1736
1737 #ifdef STBIR__SATURATE_INT
1738 #define STBIR__ENCODE_LINEAR8(f) stbir__saturate8 (STBIR__ROUND_INT((f) * stbir__max_uint8_as_float ))
1739 #define STBIR__ENCODE_LINEAR16(f) stbir__saturate16(STBIR__ROUND_INT((f) * stbir__max_uint16_as_float))
1740 #else
1741 #define STBIR__ENCODE_LINEAR8(f) (unsigned char ) STBIR__ROUND_INT(stbir__saturate(f) * stbir__max_uint8_as_float )
1742 #define STBIR__ENCODE_LINEAR16(f) (unsigned short) STBIR__ROUND_INT(stbir__saturate(f) * stbir__max_uint16_as_float)
1743 #endif
1744
1745 switch (decode)
1746 {
1747 case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR):
1748 for (x=0; x < num_pixels; ++x)
1749 {
1750 int pixel_index = x*channels;
1751
1752 for (n = 0; n < channels; n++)
1753 {
1754 int index = pixel_index + n;
1755 ((unsigned char*)output_buffer)[index] = STBIR__ENCODE_LINEAR8(encode_buffer[index]);
1756 }
1757 }
1758 break;
1759
1760 case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB):
1761 for (x=0; x < num_pixels; ++x)
1762 {
1763 int pixel_index = x*channels;
1764
1765 for (n = 0; n < num_nonalpha; n++)
1766 {
1767 int index = pixel_index + nonalpha[n];
1768 ((unsigned char*)output_buffer)[index] = stbir__linear_to_srgb_uchar(encode_buffer[index]);
1769 }
1770
1771 if (!(stbir_info->flags & STBIR_FLAG_ALPHA_USES_COLORSPACE))
1772 ((unsigned char *)output_buffer)[pixel_index + alpha_channel] = STBIR__ENCODE_LINEAR8(encode_buffer[pixel_index+alpha_channel]);
1773 }
1774 break;
1775
1776 case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR):
1777 for (x=0; x < num_pixels; ++x)
1778 {
1779 int pixel_index = x*channels;
1780
1781 for (n = 0; n < channels; n++)
1782 {
1783 int index = pixel_index + n;
1784 ((unsigned short*)output_buffer)[index] = STBIR__ENCODE_LINEAR16(encode_buffer[index]);
1785 }
1786 }
1787 break;
1788
1789 case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB):
1790 for (x=0; x < num_pixels; ++x)
1791 {
1792 int pixel_index = x*channels;
1793
1794 for (n = 0; n < num_nonalpha; n++)
1795 {
1796 int index = pixel_index + nonalpha[n];
1797 ((unsigned short*)output_buffer)[index] = (unsigned short)STBIR__ROUND_INT(stbir__linear_to_srgb(stbir__saturate(encode_buffer[index])) * stbir__max_uint16_as_float);
1798 }
1799
1800 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1801 ((unsigned short*)output_buffer)[pixel_index + alpha_channel] = STBIR__ENCODE_LINEAR16(encode_buffer[pixel_index + alpha_channel]);
1802 }
1803
1804 break;
1805
1806 case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR):
1807 for (x=0; x < num_pixels; ++x)
1808 {
1809 int pixel_index = x*channels;
1810
1811 for (n = 0; n < channels; n++)
1812 {
1813 int index = pixel_index + n;
1814 ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_UINT(((double)stbir__saturate(encode_buffer[index])) * stbir__max_uint32_as_float);
1815 }
1816 }
1817 break;
1818
1819 case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB):
1820 for (x=0; x < num_pixels; ++x)
1821 {
1822 int pixel_index = x*channels;
1823
1824 for (n = 0; n < num_nonalpha; n++)
1825 {
1826 int index = pixel_index + nonalpha[n];
1827 ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_UINT(((double)stbir__linear_to_srgb(stbir__saturate(encode_buffer[index]))) * stbir__max_uint32_as_float);
1828 }
1829
1830 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1831 ((unsigned int*)output_buffer)[pixel_index + alpha_channel] = (unsigned int)STBIR__ROUND_INT(((double)stbir__saturate(encode_buffer[pixel_index + alpha_channel])) * stbir__max_uint32_as_float);
1832 }
1833 break;
1834
1835 case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR):
1836 for (x=0; x < num_pixels; ++x)
1837 {
1838 int pixel_index = x*channels;
1839
1840 for (n = 0; n < channels; n++)
1841 {
1842 int index = pixel_index + n;
1843 ((float*)output_buffer)[index] = encode_buffer[index];
1844 }
1845 }
1846 break;
1847
1848 case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB):
1849 for (x=0; x < num_pixels; ++x)
1850 {
1851 int pixel_index = x*channels;
1852
1853 for (n = 0; n < num_nonalpha; n++)
1854 {
1855 int index = pixel_index + nonalpha[n];
1856 ((float*)output_buffer)[index] = stbir__linear_to_srgb(encode_buffer[index]);
1857 }
1858
1859 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1860 ((float*)output_buffer)[pixel_index + alpha_channel] = encode_buffer[pixel_index + alpha_channel];
1861 }
1862 break;
1863
1864 default:
1865 STBIR_ASSERT(!"Unknown type/colorspace/channels combination.");
1866 break;
1867 }
1868}
1869
1870static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n)
1871{
1872 int x, k;
1873 int output_w = stbir_info->output_w;
1874 stbir__contributors* vertical_contributors = stbir_info->vertical_contributors;
1875 float* vertical_coefficients = stbir_info->vertical_coefficients;
1876 int channels = stbir_info->channels;
1877 int alpha_channel = stbir_info->alpha_channel;
1878 int type = stbir_info->type;
1879 int colorspace = stbir_info->colorspace;
1880 int ring_buffer_entries = stbir_info->ring_buffer_num_entries;
1881 void* output_data = stbir_info->output_data;
1882 float* encode_buffer = stbir_info->encode_buffer;
1883 int decode = STBIR__DECODE(type, colorspace);
1884 int coefficient_width = stbir_info->vertical_coefficient_width;
1885 int coefficient_counter;
1886 int contributor = n;
1887
1888 float* ring_buffer = stbir_info->ring_buffer;
1889 int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index;
1890 int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline;
1891 int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
1892
1893 int n0,n1, output_row_start;
1894 int coefficient_group = coefficient_width * contributor;
1895
1896 n0 = vertical_contributors[contributor].n0;
1897 n1 = vertical_contributors[contributor].n1;
1898
1899 output_row_start = n * stbir_info->output_stride_bytes;
1900
1901 STBIR_ASSERT(stbir__use_height_upsampling(stbir_info));
1902
1903 memset(encode_buffer, 0, output_w * sizeof(float) * channels);
1904
1905 // I tried reblocking this for better cache usage of encode_buffer
1906 // (using x_outer, k, x_inner), but it lost speed. -- stb
1907
1908 coefficient_counter = 0;
1909 switch (channels) {
1910 case 1:
1911 for (k = n0; k <= n1; k++)
1912 {
1913 int coefficient_index = coefficient_counter++;
1914 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1915 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1916 for (x = 0; x < output_w; ++x)
1917 {
1918 int in_pixel_index = x * 1;
1919 encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
1920 }
1921 }
1922 break;
1923 case 2:
1924 for (k = n0; k <= n1; k++)
1925 {
1926 int coefficient_index = coefficient_counter++;
1927 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1928 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1929 for (x = 0; x < output_w; ++x)
1930 {
1931 int in_pixel_index = x * 2;
1932 encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
1933 encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
1934 }
1935 }
1936 break;
1937 case 3:
1938 for (k = n0; k <= n1; k++)
1939 {
1940 int coefficient_index = coefficient_counter++;
1941 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1942 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1943 for (x = 0; x < output_w; ++x)
1944 {
1945 int in_pixel_index = x * 3;
1946 encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
1947 encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
1948 encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient;
1949 }
1950 }
1951 break;
1952 case 4:
1953 for (k = n0; k <= n1; k++)
1954 {
1955 int coefficient_index = coefficient_counter++;
1956 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1957 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1958 for (x = 0; x < output_w; ++x)
1959 {
1960 int in_pixel_index = x * 4;
1961 encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
1962 encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
1963 encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient;
1964 encode_buffer[in_pixel_index + 3] += ring_buffer_entry[in_pixel_index + 3] * coefficient;
1965 }
1966 }
1967 break;
1968 default:
1969 for (k = n0; k <= n1; k++)
1970 {
1971 int coefficient_index = coefficient_counter++;
1972 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1973 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1974 for (x = 0; x < output_w; ++x)
1975 {
1976 int in_pixel_index = x * channels;
1977 int c;
1978 for (c = 0; c < channels; c++)
1979 encode_buffer[in_pixel_index + c] += ring_buffer_entry[in_pixel_index + c] * coefficient;
1980 }
1981 }
1982 break;
1983 }
1984 stbir__encode_scanline(stbir_info, output_w, (char *) output_data + output_row_start, encode_buffer, channels, alpha_channel, decode);
1985}
1986
1987static void stbir__resample_vertical_downsample(stbir__info* stbir_info, int n)
1988{
1989 int x, k;
1990 int output_w = stbir_info->output_w;
1991 stbir__contributors* vertical_contributors = stbir_info->vertical_contributors;
1992 float* vertical_coefficients = stbir_info->vertical_coefficients;
1993 int channels = stbir_info->channels;
1994 int ring_buffer_entries = stbir_info->ring_buffer_num_entries;
1995 float* horizontal_buffer = stbir_info->horizontal_buffer;
1996 int coefficient_width = stbir_info->vertical_coefficient_width;
1997 int contributor = n + stbir_info->vertical_filter_pixel_margin;
1998
1999 float* ring_buffer = stbir_info->ring_buffer;
2000 int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index;
2001 int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline;
2002 int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
2003 int n0,n1;
2004
2005 n0 = vertical_contributors[contributor].n0;
2006 n1 = vertical_contributors[contributor].n1;
2007
2008 STBIR_ASSERT(!stbir__use_height_upsampling(stbir_info));
2009
2010 for (k = n0; k <= n1; k++)
2011 {
2012 int coefficient_index = k - n0;
2013 int coefficient_group = coefficient_width * contributor;
2014 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
2015
2016 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
2017
2018 switch (channels) {
2019 case 1:
2020 for (x = 0; x < output_w; x++)
2021 {
2022 int in_pixel_index = x * 1;
2023 ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
2024 }
2025 break;
2026 case 2:
2027 for (x = 0; x < output_w; x++)
2028 {
2029 int in_pixel_index = x * 2;
2030 ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
2031 ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
2032 }
2033 break;
2034 case 3:
2035 for (x = 0; x < output_w; x++)
2036 {
2037 int in_pixel_index = x * 3;
2038 ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
2039 ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
2040 ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient;
2041 }
2042 break;
2043 case 4:
2044 for (x = 0; x < output_w; x++)
2045 {
2046 int in_pixel_index = x * 4;
2047 ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
2048 ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
2049 ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient;
2050 ring_buffer_entry[in_pixel_index + 3] += horizontal_buffer[in_pixel_index + 3] * coefficient;
2051 }
2052 break;
2053 default:
2054 for (x = 0; x < output_w; x++)
2055 {
2056 int in_pixel_index = x * channels;
2057
2058 int c;
2059 for (c = 0; c < channels; c++)
2060 ring_buffer_entry[in_pixel_index + c] += horizontal_buffer[in_pixel_index + c] * coefficient;
2061 }
2062 break;
2063 }
2064 }
2065}
2066
2067static void stbir__buffer_loop_upsample(stbir__info* stbir_info)
2068{
2069 int y;
2070 float scale_ratio = stbir_info->vertical_scale;
2071 float out_scanlines_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(1/scale_ratio) * scale_ratio;
2072
2073 STBIR_ASSERT(stbir__use_height_upsampling(stbir_info));
2074
2075 for (y = 0; y < stbir_info->output_h; y++)
2076 {
2077 float in_center_of_out = 0; // Center of the current out scanline in the in scanline space
2078 int in_first_scanline = 0, in_last_scanline = 0;
2079
2080 stbir__calculate_sample_range_upsample(y, out_scanlines_radius, scale_ratio, stbir_info->vertical_shift, &in_first_scanline, &in_last_scanline, &in_center_of_out);
2081
2082 STBIR_ASSERT(in_last_scanline - in_first_scanline + 1 <= stbir_info->ring_buffer_num_entries);
2083
2084 if (stbir_info->ring_buffer_begin_index >= 0)
2085 {
2086 // Get rid of whatever we don't need anymore.
2087 while (in_first_scanline > stbir_info->ring_buffer_first_scanline)
2088 {
2089 if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline)
2090 {
2091 // We just popped the last scanline off the ring buffer.
2092 // Reset it to the empty state.
2093 stbir_info->ring_buffer_begin_index = -1;
2094 stbir_info->ring_buffer_first_scanline = 0;
2095 stbir_info->ring_buffer_last_scanline = 0;
2096 break;
2097 }
2098 else
2099 {
2100 stbir_info->ring_buffer_first_scanline++;
2101 stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->ring_buffer_num_entries;
2102 }
2103 }
2104 }
2105
2106 // Load in new ones.
2107 if (stbir_info->ring_buffer_begin_index < 0)
2108 stbir__decode_and_resample_upsample(stbir_info, in_first_scanline);
2109
2110 while (in_last_scanline > stbir_info->ring_buffer_last_scanline)
2111 stbir__decode_and_resample_upsample(stbir_info, stbir_info->ring_buffer_last_scanline + 1);
2112
2113 // Now all buffers should be ready to write a row of vertical sampling.
2114 stbir__resample_vertical_upsample(stbir_info, y);
2115
2116 STBIR_PROGRESS_REPORT((float)y / stbir_info->output_h);
2117 }
2118}
2119
2120static void stbir__empty_ring_buffer(stbir__info* stbir_info, int first_necessary_scanline)
2121{
2122 int output_stride_bytes = stbir_info->output_stride_bytes;
2123 int channels = stbir_info->channels;
2124 int alpha_channel = stbir_info->alpha_channel;
2125 int type = stbir_info->type;
2126 int colorspace = stbir_info->colorspace;
2127 int output_w = stbir_info->output_w;
2128 void* output_data = stbir_info->output_data;
2129 int decode = STBIR__DECODE(type, colorspace);
2130
2131 float* ring_buffer = stbir_info->ring_buffer;
2132 int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
2133
2134 if (stbir_info->ring_buffer_begin_index >= 0)
2135 {
2136 // Get rid of whatever we don't need anymore.
2137 while (first_necessary_scanline > stbir_info->ring_buffer_first_scanline)
2138 {
2139 if (stbir_info->ring_buffer_first_scanline >= 0 && stbir_info->ring_buffer_first_scanline < stbir_info->output_h)
2140 {
2141 int output_row_start = stbir_info->ring_buffer_first_scanline * output_stride_bytes;
2142 float* ring_buffer_entry = stbir__get_ring_buffer_entry(ring_buffer, stbir_info->ring_buffer_begin_index, ring_buffer_length);
2143 stbir__encode_scanline(stbir_info, output_w, (char *) output_data + output_row_start, ring_buffer_entry, channels, alpha_channel, decode);
2144 STBIR_PROGRESS_REPORT((float)stbir_info->ring_buffer_first_scanline / stbir_info->output_h);
2145 }
2146
2147 if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline)
2148 {
2149 // We just popped the last scanline off the ring buffer.
2150 // Reset it to the empty state.
2151 stbir_info->ring_buffer_begin_index = -1;
2152 stbir_info->ring_buffer_first_scanline = 0;
2153 stbir_info->ring_buffer_last_scanline = 0;
2154 break;
2155 }
2156 else
2157 {
2158 stbir_info->ring_buffer_first_scanline++;
2159 stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->ring_buffer_num_entries;
2160 }
2161 }
2162 }
2163}
2164
2165static void stbir__buffer_loop_downsample(stbir__info* stbir_info)
2166{
2167 int y;
2168 float scale_ratio = stbir_info->vertical_scale;
2169 int output_h = stbir_info->output_h;
2170 float in_pixels_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(scale_ratio) / scale_ratio;
2171 int pixel_margin = stbir_info->vertical_filter_pixel_margin;
2172 int max_y = stbir_info->input_h + pixel_margin;
2173
2174 STBIR_ASSERT(!stbir__use_height_upsampling(stbir_info));
2175
2176 for (y = -pixel_margin; y < max_y; y++)
2177 {
2178 float out_center_of_in; // Center of the current out scanline in the in scanline space
2179 int out_first_scanline, out_last_scanline;
2180
2181 stbir__calculate_sample_range_downsample(y, in_pixels_radius, scale_ratio, stbir_info->vertical_shift, &out_first_scanline, &out_last_scanline, &out_center_of_in);
2182
2183 STBIR_ASSERT(out_last_scanline - out_first_scanline + 1 <= stbir_info->ring_buffer_num_entries);
2184
2185 if (out_last_scanline < 0 || out_first_scanline >= output_h)
2186 continue;
2187
2188 stbir__empty_ring_buffer(stbir_info, out_first_scanline);
2189
2190 stbir__decode_and_resample_downsample(stbir_info, y);
2191
2192 // Load in new ones.
2193 if (stbir_info->ring_buffer_begin_index < 0)
2194 stbir__add_empty_ring_buffer_entry(stbir_info, out_first_scanline);
2195
2196 while (out_last_scanline > stbir_info->ring_buffer_last_scanline)
2197 stbir__add_empty_ring_buffer_entry(stbir_info, stbir_info->ring_buffer_last_scanline + 1);
2198
2199 // Now the horizontal buffer is ready to write to all ring buffer rows.
2200 stbir__resample_vertical_downsample(stbir_info, y);
2201 }
2202
2203 stbir__empty_ring_buffer(stbir_info, stbir_info->output_h);
2204}
2205
2206static void stbir__setup(stbir__info *info, int input_w, int input_h, int output_w, int output_h, int channels)
2207{
2208 info->input_w = input_w;
2209 info->input_h = input_h;
2210 info->output_w = output_w;
2211 info->output_h = output_h;
2212 info->channels = channels;
2213}
2214
2215static void stbir__calculate_transform(stbir__info *info, float s0, float t0, float s1, float t1, float *transform)
2216{
2217 info->s0 = s0;
2218 info->t0 = t0;
2219 info->s1 = s1;
2220 info->t1 = t1;
2221
2222 if (transform)
2223 {
2224 info->horizontal_scale = transform[0];
2225 info->vertical_scale = transform[1];
2226 info->horizontal_shift = transform[2];
2227 info->vertical_shift = transform[3];
2228 }
2229 else
2230 {
2231 info->horizontal_scale = ((float)info->output_w / info->input_w) / (s1 - s0);
2232 info->vertical_scale = ((float)info->output_h / info->input_h) / (t1 - t0);
2233
2234 info->horizontal_shift = s0 * info->output_w / (s1 - s0);
2235 info->vertical_shift = t0 * info->output_h / (t1 - t0);
2236 }
2237}
2238
2239static void stbir__choose_filter(stbir__info *info, stbir_filter h_filter, stbir_filter v_filter)
2240{
2241 if (h_filter == 0)
2242 h_filter = stbir__use_upsampling(info->horizontal_scale) ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE;
2243 if (v_filter == 0)
2244 v_filter = stbir__use_upsampling(info->vertical_scale) ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE;
2245 info->horizontal_filter = h_filter;
2246 info->vertical_filter = v_filter;
2247}
2248
2249static stbir_uint32 stbir__calculate_memory(stbir__info *info)
2250{
2251 int pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale);
2252 int filter_height = stbir__get_filter_pixel_width(info->vertical_filter, info->vertical_scale);
2253
2254 info->horizontal_num_contributors = stbir__get_contributors(info->horizontal_scale, info->horizontal_filter, info->input_w, info->output_w);
2255 info->vertical_num_contributors = stbir__get_contributors(info->vertical_scale , info->vertical_filter , info->input_h, info->output_h);
2256
2257 // One extra entry because floating point precision problems sometimes cause an extra to be necessary.
2258 info->ring_buffer_num_entries = filter_height + 1;
2259
2260 info->horizontal_contributors_size = info->horizontal_num_contributors * sizeof(stbir__contributors);
2261 info->horizontal_coefficients_size = stbir__get_total_horizontal_coefficients(info) * sizeof(float);
2262 info->vertical_contributors_size = info->vertical_num_contributors * sizeof(stbir__contributors);
2263 info->vertical_coefficients_size = stbir__get_total_vertical_coefficients(info) * sizeof(float);
2264 info->decode_buffer_size = (info->input_w + pixel_margin * 2) * info->channels * sizeof(float);
2265 info->horizontal_buffer_size = info->output_w * info->channels * sizeof(float);
2266 info->ring_buffer_size = info->output_w * info->channels * info->ring_buffer_num_entries * sizeof(float);
2267 info->encode_buffer_size = info->output_w * info->channels * sizeof(float);
2268
2269 STBIR_ASSERT(info->horizontal_filter != 0);
2270 STBIR_ASSERT(info->horizontal_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late
2271 STBIR_ASSERT(info->vertical_filter != 0);
2272 STBIR_ASSERT(info->vertical_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late
2273
2274 if (stbir__use_height_upsampling(info))
2275 // The horizontal buffer is for when we're downsampling the height and we
2276 // can't output the result of sampling the decode buffer directly into the
2277 // ring buffers.
2278 info->horizontal_buffer_size = 0;
2279 else
2280 // The encode buffer is to retain precision in the height upsampling method
2281 // and isn't used when height downsampling.
2282 info->encode_buffer_size = 0;
2283
2284 return info->horizontal_contributors_size + info->horizontal_coefficients_size
2285 + info->vertical_contributors_size + info->vertical_coefficients_size
2286 + info->decode_buffer_size + info->horizontal_buffer_size
2287 + info->ring_buffer_size + info->encode_buffer_size;
2288}
2289
2290static int stbir__resize_allocated(stbir__info *info,
2291 const void* input_data, int input_stride_in_bytes,
2292 void* output_data, int output_stride_in_bytes,
2293 int alpha_channel, stbir_uint32 flags, stbir_datatype type,
2294 stbir_edge edge_horizontal, stbir_edge edge_vertical, stbir_colorspace colorspace,
2295 void* tempmem, size_t tempmem_size_in_bytes)
2296{
2297 size_t memory_required = stbir__calculate_memory(info);
2298
2299 int width_stride_input = input_stride_in_bytes ? input_stride_in_bytes : info->channels * info->input_w * stbir__type_size[type];
2300 int width_stride_output = output_stride_in_bytes ? output_stride_in_bytes : info->channels * info->output_w * stbir__type_size[type];
2301
2302#ifdef STBIR_DEBUG_OVERWRITE_TEST
2303#define OVERWRITE_ARRAY_SIZE 8
2304 unsigned char overwrite_output_before_pre[OVERWRITE_ARRAY_SIZE];
2305 unsigned char overwrite_tempmem_before_pre[OVERWRITE_ARRAY_SIZE];
2306 unsigned char overwrite_output_after_pre[OVERWRITE_ARRAY_SIZE];
2307 unsigned char overwrite_tempmem_after_pre[OVERWRITE_ARRAY_SIZE];
2308
2309 size_t begin_forbidden = width_stride_output * (info->output_h - 1) + info->output_w * info->channels * stbir__type_size[type];
2310 memcpy(overwrite_output_before_pre, &((unsigned char*)output_data)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE);
2311 memcpy(overwrite_output_after_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE);
2312 memcpy(overwrite_tempmem_before_pre, &((unsigned char*)tempmem)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE);
2313 memcpy(overwrite_tempmem_after_pre, &((unsigned char*)tempmem)[tempmem_size_in_bytes], OVERWRITE_ARRAY_SIZE);
2314#endif
2315
2316 STBIR_ASSERT(info->channels >= 0);
2317 STBIR_ASSERT(info->channels <= STBIR_MAX_CHANNELS);
2318
2319 if (info->channels < 0 || info->channels > STBIR_MAX_CHANNELS)
2320 return 0;
2321
2322 STBIR_ASSERT(info->horizontal_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
2323 STBIR_ASSERT(info->vertical_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
2324
2325 if (info->horizontal_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table))
2326 return 0;
2327 if (info->vertical_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table))
2328 return 0;
2329
2330 if (alpha_channel < 0)
2331 flags |= STBIR_FLAG_ALPHA_USES_COLORSPACE | STBIR_FLAG_ALPHA_PREMULTIPLIED;
2332
2333 if (!(flags&STBIR_FLAG_ALPHA_USES_COLORSPACE) || !(flags&STBIR_FLAG_ALPHA_PREMULTIPLIED)) {
2334 STBIR_ASSERT(alpha_channel >= 0 && alpha_channel < info->channels);
2335 }
2336
2337 if (alpha_channel >= info->channels)
2338 return 0;
2339
2340 STBIR_ASSERT(tempmem);
2341
2342 if (!tempmem)
2343 return 0;
2344
2345 STBIR_ASSERT(tempmem_size_in_bytes >= memory_required);
2346
2347 if (tempmem_size_in_bytes < memory_required)
2348 return 0;
2349
2350 memset(tempmem, 0, tempmem_size_in_bytes);
2351
2352 info->input_data = input_data;
2353 info->input_stride_bytes = width_stride_input;
2354
2355 info->output_data = output_data;
2356 info->output_stride_bytes = width_stride_output;
2357
2358 info->alpha_channel = alpha_channel;
2359 info->flags = flags;
2360 info->type = type;
2361 info->edge_horizontal = edge_horizontal;
2362 info->edge_vertical = edge_vertical;
2363 info->colorspace = colorspace;
2364
2365 info->horizontal_coefficient_width = stbir__get_coefficient_width (info->horizontal_filter, info->horizontal_scale);
2366 info->vertical_coefficient_width = stbir__get_coefficient_width (info->vertical_filter , info->vertical_scale );
2367 info->horizontal_filter_pixel_width = stbir__get_filter_pixel_width (info->horizontal_filter, info->horizontal_scale);
2368 info->vertical_filter_pixel_width = stbir__get_filter_pixel_width (info->vertical_filter , info->vertical_scale );
2369 info->horizontal_filter_pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale);
2370 info->vertical_filter_pixel_margin = stbir__get_filter_pixel_margin(info->vertical_filter , info->vertical_scale );
2371
2372 info->ring_buffer_length_bytes = info->output_w * info->channels * sizeof(float);
2373 info->decode_buffer_pixels = info->input_w + info->horizontal_filter_pixel_margin * 2;
2374
2375#define STBIR__NEXT_MEMPTR(current, newtype) (newtype*)(((unsigned char*)current) + current##_size)
2376
2377 info->horizontal_contributors = (stbir__contributors *) tempmem;
2378 info->horizontal_coefficients = STBIR__NEXT_MEMPTR(info->horizontal_contributors, float);
2379 info->vertical_contributors = STBIR__NEXT_MEMPTR(info->horizontal_coefficients, stbir__contributors);
2380 info->vertical_coefficients = STBIR__NEXT_MEMPTR(info->vertical_contributors, float);
2381 info->decode_buffer = STBIR__NEXT_MEMPTR(info->vertical_coefficients, float);
2382
2383 if (stbir__use_height_upsampling(info))
2384 {
2385 info->horizontal_buffer = NULL;
2386 info->ring_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float);
2387 info->encode_buffer = STBIR__NEXT_MEMPTR(info->ring_buffer, float);
2388
2389 STBIR_ASSERT((size_t)STBIR__NEXT_MEMPTR(info->encode_buffer, unsigned char) == (size_t)tempmem + tempmem_size_in_bytes);
2390 }
2391 else
2392 {
2393 info->horizontal_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float);
2394 info->ring_buffer = STBIR__NEXT_MEMPTR(info->horizontal_buffer, float);
2395 info->encode_buffer = NULL;
2396
2397 STBIR_ASSERT((size_t)STBIR__NEXT_MEMPTR(info->ring_buffer, unsigned char) == (size_t)tempmem + tempmem_size_in_bytes);
2398 }
2399
2400#undef STBIR__NEXT_MEMPTR
2401
2402 // This signals that the ring buffer is empty
2403 info->ring_buffer_begin_index = -1;
2404
2405 stbir__calculate_filters(info->horizontal_contributors, info->horizontal_coefficients, info->horizontal_filter, info->horizontal_scale, info->horizontal_shift, info->input_w, info->output_w);
2406 stbir__calculate_filters(info->vertical_contributors, info->vertical_coefficients, info->vertical_filter, info->vertical_scale, info->vertical_shift, info->input_h, info->output_h);
2407
2408 STBIR_PROGRESS_REPORT(0);
2409
2410 if (stbir__use_height_upsampling(info))
2411 stbir__buffer_loop_upsample(info);
2412 else
2413 stbir__buffer_loop_downsample(info);
2414
2415 STBIR_PROGRESS_REPORT(1);
2416
2417#ifdef STBIR_DEBUG_OVERWRITE_TEST
2418 STBIR_ASSERT(memcmp(overwrite_output_before_pre, &((unsigned char*)output_data)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE) == 0);
2419 STBIR_ASSERT(memcmp(overwrite_output_after_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE) == 0);
2420 STBIR_ASSERT(memcmp(overwrite_tempmem_before_pre, &((unsigned char*)tempmem)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE) == 0);
2421 STBIR_ASSERT(memcmp(overwrite_tempmem_after_pre, &((unsigned char*)tempmem)[tempmem_size_in_bytes], OVERWRITE_ARRAY_SIZE) == 0);
2422#endif
2423
2424 return 1;
2425}
2426
2427
2428static int stbir__resize_arbitrary(
2429 void *alloc_context,
2430 const void* input_data, int input_w, int input_h, int input_stride_in_bytes,
2431 void* output_data, int output_w, int output_h, int output_stride_in_bytes,
2432 float s0, float t0, float s1, float t1, float *transform,
2433 int channels, int alpha_channel, stbir_uint32 flags, stbir_datatype type,
2434 stbir_filter h_filter, stbir_filter v_filter,
2435 stbir_edge edge_horizontal, stbir_edge edge_vertical, stbir_colorspace colorspace)
2436{
2437 stbir__info info;
2438 int result;
2439 size_t memory_required;
2440 void* extra_memory;
2441
2442 stbir__setup(&info, input_w, input_h, output_w, output_h, channels);
2443 stbir__calculate_transform(&info, s0,t0,s1,t1,transform);
2444 stbir__choose_filter(&info, h_filter, v_filter);
2445 memory_required = stbir__calculate_memory(&info);
2446 extra_memory = STBIR_MALLOC(memory_required, alloc_context);
2447
2448 if (!extra_memory)
2449 return 0;
2450
2451 result = stbir__resize_allocated(&info, input_data, input_stride_in_bytes,
2452 output_data, output_stride_in_bytes,
2453 alpha_channel, flags, type,
2454 edge_horizontal, edge_vertical,
2455 colorspace, extra_memory, memory_required);
2456
2457 STBIR_FREE(extra_memory, alloc_context);
2458
2459 return result;
2460}
2461
2462STBIRDEF int stbir_resize_uint8( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2463 unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2464 int num_channels)
2465{
2466 return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
2467 output_pixels, output_w, output_h, output_stride_in_bytes,
2468 0,0,1,1,NULL,num_channels,-1,0, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
2469 STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_LINEAR);
2470}
2471
2472STBIRDEF int stbir_resize_float( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2473 float *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2474 int num_channels)
2475{
2476 return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
2477 output_pixels, output_w, output_h, output_stride_in_bytes,
2478 0,0,1,1,NULL,num_channels,-1,0, STBIR_TYPE_FLOAT, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
2479 STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_LINEAR);
2480}
2481
2482STBIRDEF int stbir_resize_uint8_srgb(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2483 unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2484 int num_channels, int alpha_channel, int flags)
2485{
2486 return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
2487 output_pixels, output_w, output_h, output_stride_in_bytes,
2488 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
2489 STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_SRGB);
2490}
2491
2492STBIRDEF int stbir_resize_uint8_srgb_edgemode(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2493 unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2494 int num_channels, int alpha_channel, int flags,
2495 stbir_edge edge_wrap_mode)
2496{
2497 return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
2498 output_pixels, output_w, output_h, output_stride_in_bytes,
2499 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
2500 edge_wrap_mode, edge_wrap_mode, STBIR_COLORSPACE_SRGB);
2501}
2502
2503STBIRDEF int stbir_resize_uint8_generic( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2504 unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2505 int num_channels, int alpha_channel, int flags,
2506 stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
2507 void *alloc_context)
2508{
2509 return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2510 output_pixels, output_w, output_h, output_stride_in_bytes,
2511 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, filter, filter,
2512 edge_wrap_mode, edge_wrap_mode, space);
2513}
2514
2515STBIRDEF int stbir_resize_uint16_generic(const stbir_uint16 *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2516 stbir_uint16 *output_pixels , int output_w, int output_h, int output_stride_in_bytes,
2517 int num_channels, int alpha_channel, int flags,
2518 stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
2519 void *alloc_context)
2520{
2521 return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2522 output_pixels, output_w, output_h, output_stride_in_bytes,
2523 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT16, filter, filter,
2524 edge_wrap_mode, edge_wrap_mode, space);
2525}
2526
2527
2528STBIRDEF int stbir_resize_float_generic( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2529 float *output_pixels , int output_w, int output_h, int output_stride_in_bytes,
2530 int num_channels, int alpha_channel, int flags,
2531 stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
2532 void *alloc_context)
2533{
2534 return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2535 output_pixels, output_w, output_h, output_stride_in_bytes,
2536 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_FLOAT, filter, filter,
2537 edge_wrap_mode, edge_wrap_mode, space);
2538}
2539
2540
2541STBIRDEF int stbir_resize( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2542 void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2543 stbir_datatype datatype,
2544 int num_channels, int alpha_channel, int flags,
2545 stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
2546 stbir_filter filter_horizontal, stbir_filter filter_vertical,
2547 stbir_colorspace space, void *alloc_context)
2548{
2549 return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2550 output_pixels, output_w, output_h, output_stride_in_bytes,
2551 0,0,1,1,NULL,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
2552 edge_mode_horizontal, edge_mode_vertical, space);
2553}
2554
2555
2556STBIRDEF int stbir_resize_subpixel(const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2557 void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2558 stbir_datatype datatype,
2559 int num_channels, int alpha_channel, int flags,
2560 stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
2561 stbir_filter filter_horizontal, stbir_filter filter_vertical,
2562 stbir_colorspace space, void *alloc_context,
2563 float x_scale, float y_scale,
2564 float x_offset, float y_offset)
2565{
2566 float transform[4];
2567 transform[0] = x_scale;
2568 transform[1] = y_scale;
2569 transform[2] = x_offset;
2570 transform[3] = y_offset;
2571 return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2572 output_pixels, output_w, output_h, output_stride_in_bytes,
2573 0,0,1,1,transform,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
2574 edge_mode_horizontal, edge_mode_vertical, space);
2575}
2576
2577STBIRDEF int stbir_resize_region( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2578 void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2579 stbir_datatype datatype,
2580 int num_channels, int alpha_channel, int flags,
2581 stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
2582 stbir_filter filter_horizontal, stbir_filter filter_vertical,
2583 stbir_colorspace space, void *alloc_context,
2584 float s0, float t0, float s1, float t1)
2585{
2586 return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2587 output_pixels, output_w, output_h, output_stride_in_bytes,
2588 s0,t0,s1,t1,NULL,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
2589 edge_mode_horizontal, edge_mode_vertical, space);
2590}
2591
2592#endif // STB_IMAGE_RESIZE_IMPLEMENTATION
2593
386/* 2594/*
387------------------------------------------------------------------------------ 2595------------------------------------------------------------------------------
388This software is available under 2 licenses -- choose whichever you prefer. 2596This software is available under 2 licenses -- choose whichever you prefer.
diff --git a/externals/stb/stb_image_write.cpp b/externals/stb/stb_image_write.cpp
deleted file mode 100644
index eda7e7cb1..000000000
--- a/externals/stb/stb_image_write.cpp
+++ /dev/null
@@ -1,1677 +0,0 @@
1// SPDX-FileCopyrightText: stb http://nothings.org/stb
2// SPDX-License-Identifier: MIT
3
4/* stb_image_write - v1.16 - public domain - http://nothings.org/stb
5 writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015
6 no warranty implied; use at your own risk
7
8 Before #including,
9
10 #define STB_IMAGE_WRITE_IMPLEMENTATION
11
12 in the file that you want to have the implementation.
13
14 Will probably not work correctly with strict-aliasing optimizations.
15
16ABOUT:
17
18 This header file is a library for writing images to C stdio or a callback.
19
20 The PNG output is not optimal; it is 20-50% larger than the file
21 written by a decent optimizing implementation; though providing a custom
22 zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that.
23 This library is designed for source code compactness and simplicity,
24 not optimal image file size or run-time performance.
25
26BUILDING:
27
28 You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h.
29 You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace
30 malloc,realloc,free.
31 You can #define STBIW_MEMMOVE() to replace memmove()
32 You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function
33 for PNG compression (instead of the builtin one), it must have the following signature:
34 unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality);
35 The returned data will be freed with STBIW_FREE() (free() by default),
36 so it must be heap allocated with STBIW_MALLOC() (malloc() by default),
37
38UNICODE:
39
40 If compiling for Windows and you wish to use Unicode filenames, compile
41 with
42 #define STBIW_WINDOWS_UTF8
43 and pass utf8-encoded filenames. Call stbiw_convert_wchar_to_utf8 to convert
44 Windows wchar_t filenames to utf8.
45
46USAGE:
47
48 There are five functions, one for each image file format:
49
50 int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes);
51 int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data);
52 int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data);
53 int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality);
54 int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
55
56 void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically
57
58 There are also five equivalent functions that use an arbitrary write function. You are
59 expected to open/close your file-equivalent before and after calling these:
60
61 int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes);
62 int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data);
63 int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data);
64 int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data);
65 int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality);
66
67 where the callback is:
68 void stbi_write_func(void *context, void *data, int size);
69
70 You can configure it with these global variables:
71 int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE
72 int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression
73 int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode
74
75
76 You can define STBI_WRITE_NO_STDIO to disable the file variant of these
77 functions, so the library will not use stdio.h at all. However, this will
78 also disable HDR writing, because it requires stdio for formatted output.
79
80 Each function returns 0 on failure and non-0 on success.
81
82 The functions create an image file defined by the parameters. The image
83 is a rectangle of pixels stored from left-to-right, top-to-bottom.
84 Each pixel contains 'comp' channels of data stored interleaved with 8-bits
85 per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is
86 monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall.
87 The *data pointer points to the first byte of the top-left-most pixel.
88 For PNG, "stride_in_bytes" is the distance in bytes from the first byte of
89 a row of pixels to the first byte of the next row of pixels.
90
91 PNG creates output files with the same number of components as the input.
92 The BMP format expands Y to RGB in the file format and does not
93 output alpha.
94
95 PNG supports writing rectangles of data even when the bytes storing rows of
96 data are not consecutive in memory (e.g. sub-rectangles of a larger image),
97 by supplying the stride between the beginning of adjacent rows. The other
98 formats do not. (Thus you cannot write a native-format BMP through the BMP
99 writer, both because it is in BGR order and because it may have padding
100 at the end of the line.)
101
102 PNG allows you to set the deflate compression level by setting the global
103 variable 'stbi_write_png_compression_level' (it defaults to 8).
104
105 HDR expects linear float data. Since the format is always 32-bit rgb(e)
106 data, alpha (if provided) is discarded, and for monochrome data it is
107 replicated across all three channels.
108
109 TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed
110 data, set the global variable 'stbi_write_tga_with_rle' to 0.
111
112 JPEG does ignore alpha channels in input data; quality is between 1 and 100.
113 Higher quality looks better but results in a bigger image.
114 JPEG baseline (no JPEG progressive).
115
116CREDITS:
117
118
119 Sean Barrett - PNG/BMP/TGA
120 Baldur Karlsson - HDR
121 Jean-Sebastien Guay - TGA monochrome
122 Tim Kelsey - misc enhancements
123 Alan Hickman - TGA RLE
124 Emmanuel Julien - initial file IO callback implementation
125 Jon Olick - original jo_jpeg.cpp code
126 Daniel Gibson - integrate JPEG, allow external zlib
127 Aarni Koskela - allow choosing PNG filter
128
129 bugfixes:
130 github:Chribba
131 Guillaume Chereau
132 github:jry2
133 github:romigrou
134 Sergio Gonzalez
135 Jonas Karlsson
136 Filip Wasil
137 Thatcher Ulrich
138 github:poppolopoppo
139 Patrick Boettcher
140 github:xeekworx
141 Cap Petschulat
142 Simon Rodriguez
143 Ivan Tikhonov
144 github:ignotion
145 Adam Schackart
146 Andrew Kensler
147
148LICENSE
149
150 See end of file for license information.
151
152*/
153
154#include <stb_image_write.h>
155
156#ifdef _WIN32
157 #ifndef _CRT_SECURE_NO_WARNINGS
158 #define _CRT_SECURE_NO_WARNINGS
159 #endif
160 #ifndef _CRT_NONSTDC_NO_DEPRECATE
161 #define _CRT_NONSTDC_NO_DEPRECATE
162 #endif
163#endif
164
165#ifndef STBI_WRITE_NO_STDIO
166#include <stdio.h>
167#endif // STBI_WRITE_NO_STDIO
168
169#include <stdarg.h>
170#include <stdlib.h>
171#include <string.h>
172#include <math.h>
173
174#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED))
175// ok
176#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED)
177// ok
178#else
179#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)."
180#endif
181
182#ifndef STBIW_MALLOC
183#define STBIW_MALLOC(sz) malloc(sz)
184#define STBIW_REALLOC(p,newsz) realloc(p,newsz)
185#define STBIW_FREE(p) free(p)
186#endif
187
188#ifndef STBIW_REALLOC_SIZED
189#define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz)
190#endif
191
192
193#ifndef STBIW_MEMMOVE
194#define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz)
195#endif
196
197
198#ifndef STBIW_ASSERT
199#include <assert.h>
200#define STBIW_ASSERT(x) assert(x)
201#endif
202
203#define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff)
204
205#ifdef STB_IMAGE_WRITE_STATIC
206static int stbi_write_png_compression_level = 8;
207static int stbi_write_tga_with_rle = 1;
208static int stbi_write_force_png_filter = -1;
209#else
210int stbi_write_png_compression_level = 8;
211int stbi_write_tga_with_rle = 1;
212int stbi_write_force_png_filter = -1;
213#endif
214
215static int stbi__flip_vertically_on_write = 0;
216
217STBIWDEF void stbi_flip_vertically_on_write(int flag)
218{
219 stbi__flip_vertically_on_write = flag;
220}
221
222typedef struct
223{
224 stbi_write_func *func;
225 void *context;
226 unsigned char buffer[64];
227 int buf_used;
228} stbi__write_context;
229
230// initialize a callback-based context
231static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context)
232{
233 s->func = c;
234 s->context = context;
235}
236
237#ifndef STBI_WRITE_NO_STDIO
238
239static void stbi__stdio_write(void *context, void *data, int size)
240{
241 fwrite(data,1,size,(FILE*) context);
242}
243
244#if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8)
245#ifdef __cplusplus
246#define STBIW_EXTERN extern "C"
247#else
248#define STBIW_EXTERN extern
249#endif
250STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
251STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
252
253STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
254{
255 return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
256}
257#endif
258
259static FILE *stbiw__fopen(char const *filename, char const *mode)
260{
261 FILE *f;
262#if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8)
263 wchar_t wMode[64];
264 wchar_t wFilename[1024];
265 if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
266 return 0;
267
268 if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
269 return 0;
270
271#if defined(_MSC_VER) && _MSC_VER >= 1400
272 if (0 != _wfopen_s(&f, wFilename, wMode))
273 f = 0;
274#else
275 f = _wfopen(wFilename, wMode);
276#endif
277
278#elif defined(_MSC_VER) && _MSC_VER >= 1400
279 if (0 != fopen_s(&f, filename, mode))
280 f=0;
281#else
282 f = fopen(filename, mode);
283#endif
284 return f;
285}
286
287static int stbi__start_write_file(stbi__write_context *s, const char *filename)
288{
289 FILE *f = stbiw__fopen(filename, "wb");
290 stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f);
291 return f != NULL;
292}
293
294static void stbi__end_write_file(stbi__write_context *s)
295{
296 fclose((FILE *)s->context);
297}
298
299#endif // !STBI_WRITE_NO_STDIO
300
301typedef unsigned int stbiw_uint32;
302typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1];
303
304static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v)
305{
306 while (*fmt) {
307 switch (*fmt++) {
308 case ' ': break;
309 case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int));
310 s->func(s->context,&x,1);
311 break; }
312 case '2': { int x = va_arg(v,int);
313 unsigned char b[2];
314 b[0] = STBIW_UCHAR(x);
315 b[1] = STBIW_UCHAR(x>>8);
316 s->func(s->context,b,2);
317 break; }
318 case '4': { stbiw_uint32 x = va_arg(v,int);
319 unsigned char b[4];
320 b[0]=STBIW_UCHAR(x);
321 b[1]=STBIW_UCHAR(x>>8);
322 b[2]=STBIW_UCHAR(x>>16);
323 b[3]=STBIW_UCHAR(x>>24);
324 s->func(s->context,b,4);
325 break; }
326 default:
327 STBIW_ASSERT(0);
328 return;
329 }
330 }
331}
332
333static void stbiw__writef(stbi__write_context *s, const char *fmt, ...)
334{
335 va_list v;
336 va_start(v, fmt);
337 stbiw__writefv(s, fmt, v);
338 va_end(v);
339}
340
341static void stbiw__write_flush(stbi__write_context *s)
342{
343 if (s->buf_used) {
344 s->func(s->context, &s->buffer, s->buf_used);
345 s->buf_used = 0;
346 }
347}
348
349static void stbiw__putc(stbi__write_context *s, unsigned char c)
350{
351 s->func(s->context, &c, 1);
352}
353
354static void stbiw__write1(stbi__write_context *s, unsigned char a)
355{
356 if ((size_t)s->buf_used + 1 > sizeof(s->buffer))
357 stbiw__write_flush(s);
358 s->buffer[s->buf_used++] = a;
359}
360
361static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c)
362{
363 int n;
364 if ((size_t)s->buf_used + 3 > sizeof(s->buffer))
365 stbiw__write_flush(s);
366 n = s->buf_used;
367 s->buf_used = n+3;
368 s->buffer[n+0] = a;
369 s->buffer[n+1] = b;
370 s->buffer[n+2] = c;
371}
372
373static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d)
374{
375 unsigned char bg[3] = { 255, 0, 255}, px[3];
376 int k;
377
378 if (write_alpha < 0)
379 stbiw__write1(s, d[comp - 1]);
380
381 switch (comp) {
382 case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case
383 case 1:
384 if (expand_mono)
385 stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp
386 else
387 stbiw__write1(s, d[0]); // monochrome TGA
388 break;
389 case 4:
390 if (!write_alpha) {
391 // composite against pink background
392 for (k = 0; k < 3; ++k)
393 px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255;
394 stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]);
395 break;
396 }
397 /* FALLTHROUGH */
398 case 3:
399 stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]);
400 break;
401 }
402 if (write_alpha > 0)
403 stbiw__write1(s, d[comp - 1]);
404}
405
406static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono)
407{
408 stbiw_uint32 zero = 0;
409 int i,j, j_end;
410
411 if (y <= 0)
412 return;
413
414 if (stbi__flip_vertically_on_write)
415 vdir *= -1;
416
417 if (vdir < 0) {
418 j_end = -1; j = y-1;
419 } else {
420 j_end = y; j = 0;
421 }
422
423 for (; j != j_end; j += vdir) {
424 for (i=0; i < x; ++i) {
425 unsigned char *d = (unsigned char *) data + (j*x+i)*comp;
426 stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d);
427 }
428 stbiw__write_flush(s);
429 s->func(s->context, &zero, scanline_pad);
430 }
431}
432
433static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...)
434{
435 if (y < 0 || x < 0) {
436 return 0;
437 } else {
438 va_list v;
439 va_start(v, fmt);
440 stbiw__writefv(s, fmt, v);
441 va_end(v);
442 stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono);
443 return 1;
444 }
445}
446
447static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data)
448{
449 if (comp != 4) {
450 // write RGB bitmap
451 int pad = (-x*3) & 3;
452 return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad,
453 "11 4 22 4" "4 44 22 444444",
454 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header
455 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header
456 } else {
457 // RGBA bitmaps need a v4 header
458 // use BI_BITFIELDS mode with 32bpp and alpha mask
459 // (straight BI_RGB with alpha mask doesn't work in most readers)
460 return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *)data,1,0,
461 "11 4 22 4" "4 44 22 444444 4444 4 444 444 444 444",
462 'B', 'M', 14+108+x*y*4, 0, 0, 14+108, // file header
463 108, x,y, 1,32, 3,0,0,0,0,0, 0xff0000,0xff00,0xff,0xff000000u, 0, 0,0,0, 0,0,0, 0,0,0, 0,0,0); // bitmap V4 header
464 }
465}
466
467STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data)
468{
469 stbi__write_context s = { 0 };
470 stbi__start_write_callbacks(&s, func, context);
471 return stbi_write_bmp_core(&s, x, y, comp, data);
472}
473
474#ifndef STBI_WRITE_NO_STDIO
475STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data)
476{
477 stbi__write_context s = { 0 };
478 if (stbi__start_write_file(&s,filename)) {
479 int r = stbi_write_bmp_core(&s, x, y, comp, data);
480 stbi__end_write_file(&s);
481 return r;
482 } else
483 return 0;
484}
485#endif //!STBI_WRITE_NO_STDIO
486
487static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data)
488{
489 int has_alpha = (comp == 2 || comp == 4);
490 int colorbytes = has_alpha ? comp-1 : comp;
491 int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3
492
493 if (y < 0 || x < 0)
494 return 0;
495
496 if (!stbi_write_tga_with_rle) {
497 return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0,
498 "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8);
499 } else {
500 int i,j,k;
501 int jend, jdir;
502
503 stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8);
504
505 if (stbi__flip_vertically_on_write) {
506 j = 0;
507 jend = y;
508 jdir = 1;
509 } else {
510 j = y-1;
511 jend = -1;
512 jdir = -1;
513 }
514 for (; j != jend; j += jdir) {
515 unsigned char *row = (unsigned char *) data + j * x * comp;
516 int len;
517
518 for (i = 0; i < x; i += len) {
519 unsigned char *begin = row + i * comp;
520 int diff = 1;
521 len = 1;
522
523 if (i < x - 1) {
524 ++len;
525 diff = memcmp(begin, row + (i + 1) * comp, comp);
526 if (diff) {
527 const unsigned char *prev = begin;
528 for (k = i + 2; k < x && len < 128; ++k) {
529 if (memcmp(prev, row + k * comp, comp)) {
530 prev += comp;
531 ++len;
532 } else {
533 --len;
534 break;
535 }
536 }
537 } else {
538 for (k = i + 2; k < x && len < 128; ++k) {
539 if (!memcmp(begin, row + k * comp, comp)) {
540 ++len;
541 } else {
542 break;
543 }
544 }
545 }
546 }
547
548 if (diff) {
549 unsigned char header = STBIW_UCHAR(len - 1);
550 stbiw__write1(s, header);
551 for (k = 0; k < len; ++k) {
552 stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp);
553 }
554 } else {
555 unsigned char header = STBIW_UCHAR(len - 129);
556 stbiw__write1(s, header);
557 stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin);
558 }
559 }
560 }
561 stbiw__write_flush(s);
562 }
563 return 1;
564}
565
566STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data)
567{
568 stbi__write_context s = { 0 };
569 stbi__start_write_callbacks(&s, func, context);
570 return stbi_write_tga_core(&s, x, y, comp, (void *) data);
571}
572
573#ifndef STBI_WRITE_NO_STDIO
574STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data)
575{
576 stbi__write_context s = { 0 };
577 if (stbi__start_write_file(&s,filename)) {
578 int r = stbi_write_tga_core(&s, x, y, comp, (void *) data);
579 stbi__end_write_file(&s);
580 return r;
581 } else
582 return 0;
583}
584#endif
585
586// *************************************************************************************************
587// Radiance RGBE HDR writer
588// by Baldur Karlsson
589
590#define stbiw__max(a, b) ((a) > (b) ? (a) : (b))
591
592#ifndef STBI_WRITE_NO_STDIO
593
594static void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear)
595{
596 int exponent;
597 float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2]));
598
599 if (maxcomp < 1e-32f) {
600 rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0;
601 } else {
602 float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp;
603
604 rgbe[0] = (unsigned char)(linear[0] * normalize);
605 rgbe[1] = (unsigned char)(linear[1] * normalize);
606 rgbe[2] = (unsigned char)(linear[2] * normalize);
607 rgbe[3] = (unsigned char)(exponent + 128);
608 }
609}
610
611static void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte)
612{
613 unsigned char lengthbyte = STBIW_UCHAR(length+128);
614 STBIW_ASSERT(length+128 <= 255);
615 s->func(s->context, &lengthbyte, 1);
616 s->func(s->context, &databyte, 1);
617}
618
619static void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data)
620{
621 unsigned char lengthbyte = STBIW_UCHAR(length);
622 STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code
623 s->func(s->context, &lengthbyte, 1);
624 s->func(s->context, data, length);
625}
626
627static void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline)
628{
629 unsigned char scanlineheader[4] = { 2, 2, 0, 0 };
630 unsigned char rgbe[4];
631 float linear[3];
632 int x;
633
634 scanlineheader[2] = (width&0xff00)>>8;
635 scanlineheader[3] = (width&0x00ff);
636
637 /* skip RLE for images too small or large */
638 if (width < 8 || width >= 32768) {
639 for (x=0; x < width; x++) {
640 switch (ncomp) {
641 case 4: /* fallthrough */
642 case 3: linear[2] = scanline[x*ncomp + 2];
643 linear[1] = scanline[x*ncomp + 1];
644 linear[0] = scanline[x*ncomp + 0];
645 break;
646 default:
647 linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0];
648 break;
649 }
650 stbiw__linear_to_rgbe(rgbe, linear);
651 s->func(s->context, rgbe, 4);
652 }
653 } else {
654 int c,r;
655 /* encode into scratch buffer */
656 for (x=0; x < width; x++) {
657 switch(ncomp) {
658 case 4: /* fallthrough */
659 case 3: linear[2] = scanline[x*ncomp + 2];
660 linear[1] = scanline[x*ncomp + 1];
661 linear[0] = scanline[x*ncomp + 0];
662 break;
663 default:
664 linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0];
665 break;
666 }
667 stbiw__linear_to_rgbe(rgbe, linear);
668 scratch[x + width*0] = rgbe[0];
669 scratch[x + width*1] = rgbe[1];
670 scratch[x + width*2] = rgbe[2];
671 scratch[x + width*3] = rgbe[3];
672 }
673
674 s->func(s->context, scanlineheader, 4);
675
676 /* RLE each component separately */
677 for (c=0; c < 4; c++) {
678 unsigned char *comp = &scratch[width*c];
679
680 x = 0;
681 while (x < width) {
682 // find first run
683 r = x;
684 while (r+2 < width) {
685 if (comp[r] == comp[r+1] && comp[r] == comp[r+2])
686 break;
687 ++r;
688 }
689 if (r+2 >= width)
690 r = width;
691 // dump up to first run
692 while (x < r) {
693 int len = r-x;
694 if (len > 128) len = 128;
695 stbiw__write_dump_data(s, len, &comp[x]);
696 x += len;
697 }
698 // if there's a run, output it
699 if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd
700 // find next byte after run
701 while (r < width && comp[r] == comp[x])
702 ++r;
703 // output run up to r
704 while (x < r) {
705 int len = r-x;
706 if (len > 127) len = 127;
707 stbiw__write_run_data(s, len, comp[x]);
708 x += len;
709 }
710 }
711 }
712 }
713 }
714}
715
716static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data)
717{
718 if (y <= 0 || x <= 0 || data == NULL)
719 return 0;
720 else {
721 // Each component is stored separately. Allocate scratch space for full output scanline.
722 unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4);
723 int i, len;
724 char buffer[128];
725 char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n";
726 s->func(s->context, header, sizeof(header)-1);
727
728#ifdef __STDC_LIB_EXT1__
729 len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x);
730#else
731 len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x);
732#endif
733 s->func(s->context, buffer, len);
734
735 for(i=0; i < y; i++)
736 stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i));
737 STBIW_FREE(scratch);
738 return 1;
739 }
740}
741
742STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data)
743{
744 stbi__write_context s = { 0 };
745 stbi__start_write_callbacks(&s, func, context);
746 return stbi_write_hdr_core(&s, x, y, comp, (float *) data);
747}
748
749STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data)
750{
751 stbi__write_context s = { 0 };
752 if (stbi__start_write_file(&s,filename)) {
753 int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data);
754 stbi__end_write_file(&s);
755 return r;
756 } else
757 return 0;
758}
759#endif // STBI_WRITE_NO_STDIO
760
761
762//////////////////////////////////////////////////////////////////////////////
763//
764// PNG writer
765//
766
767#ifndef STBIW_ZLIB_COMPRESS
768// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size()
769#define stbiw__sbraw(a) ((int *) (void *) (a) - 2)
770#define stbiw__sbm(a) stbiw__sbraw(a)[0]
771#define stbiw__sbn(a) stbiw__sbraw(a)[1]
772
773#define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a))
774#define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0)
775#define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a)))
776
777#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v))
778#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0)
779#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0)
780
781static void *stbiw__sbgrowf(void **arr, int increment, int itemsize)
782{
783 int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1;
784 void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2);
785 STBIW_ASSERT(p);
786 if (p) {
787 if (!*arr) ((int *) p)[1] = 0;
788 *arr = (void *) ((int *) p + 2);
789 stbiw__sbm(*arr) = m;
790 }
791 return *arr;
792}
793
794static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount)
795{
796 while (*bitcount >= 8) {
797 stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer));
798 *bitbuffer >>= 8;
799 *bitcount -= 8;
800 }
801 return data;
802}
803
804static int stbiw__zlib_bitrev(int code, int codebits)
805{
806 int res=0;
807 while (codebits--) {
808 res = (res << 1) | (code & 1);
809 code >>= 1;
810 }
811 return res;
812}
813
814static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit)
815{
816 int i;
817 for (i=0; i < limit && i < 258; ++i)
818 if (a[i] != b[i]) break;
819 return i;
820}
821
822static unsigned int stbiw__zhash(unsigned char *data)
823{
824 stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16);
825 hash ^= hash << 3;
826 hash += hash >> 5;
827 hash ^= hash << 4;
828 hash += hash >> 17;
829 hash ^= hash << 25;
830 hash += hash >> 6;
831 return hash;
832}
833
834#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount))
835#define stbiw__zlib_add(code,codebits) \
836 (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush())
837#define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c)
838// default huffman tables
839#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8)
840#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9)
841#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7)
842#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8)
843#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n))
844#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n))
845
846#define stbiw__ZHASH 16384
847
848#endif // STBIW_ZLIB_COMPRESS
849
850STBIWDEF unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality)
851{
852#ifdef STBIW_ZLIB_COMPRESS
853 // user provided a zlib compress implementation, use that
854 return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality);
855#else // use builtin
856 static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 };
857 static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 };
858 static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 };
859 static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 };
860 unsigned int bitbuf=0;
861 int i,j, bitcount=0;
862 unsigned char *out = NULL;
863 unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(unsigned char**));
864 if (hash_table == NULL)
865 return NULL;
866 if (quality < 5) quality = 5;
867
868 stbiw__sbpush(out, 0x78); // DEFLATE 32K window
869 stbiw__sbpush(out, 0x5e); // FLEVEL = 1
870 stbiw__zlib_add(1,1); // BFINAL = 1
871 stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman
872
873 for (i=0; i < stbiw__ZHASH; ++i)
874 hash_table[i] = NULL;
875
876 i=0;
877 while (i < data_len-3) {
878 // hash next 3 bytes of data to be compressed
879 int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3;
880 unsigned char *bestloc = 0;
881 unsigned char **hlist = hash_table[h];
882 int n = stbiw__sbcount(hlist);
883 for (j=0; j < n; ++j) {
884 if (hlist[j]-data > i-32768) { // if entry lies within window
885 int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i);
886 if (d >= best) { best=d; bestloc=hlist[j]; }
887 }
888 }
889 // when hash table entry is too long, delete half the entries
890 if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) {
891 STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality);
892 stbiw__sbn(hash_table[h]) = quality;
893 }
894 stbiw__sbpush(hash_table[h],data+i);
895
896 if (bestloc) {
897 // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal
898 h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1);
899 hlist = hash_table[h];
900 n = stbiw__sbcount(hlist);
901 for (j=0; j < n; ++j) {
902 if (hlist[j]-data > i-32767) {
903 int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1);
904 if (e > best) { // if next match is better, bail on current match
905 bestloc = NULL;
906 break;
907 }
908 }
909 }
910 }
911
912 if (bestloc) {
913 int d = (int) (data+i - bestloc); // distance back
914 STBIW_ASSERT(d <= 32767 && best <= 258);
915 for (j=0; best > lengthc[j+1]-1; ++j);
916 stbiw__zlib_huff(j+257);
917 if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]);
918 for (j=0; d > distc[j+1]-1; ++j);
919 stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5);
920 if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]);
921 i += best;
922 } else {
923 stbiw__zlib_huffb(data[i]);
924 ++i;
925 }
926 }
927 // write out final bytes
928 for (;i < data_len; ++i)
929 stbiw__zlib_huffb(data[i]);
930 stbiw__zlib_huff(256); // end of block
931 // pad with 0 bits to byte boundary
932 while (bitcount)
933 stbiw__zlib_add(0,1);
934
935 for (i=0; i < stbiw__ZHASH; ++i)
936 (void) stbiw__sbfree(hash_table[i]);
937 STBIW_FREE(hash_table);
938
939 // store uncompressed instead if compression was worse
940 if (stbiw__sbn(out) > data_len + 2 + ((data_len+32766)/32767)*5) {
941 stbiw__sbn(out) = 2; // truncate to DEFLATE 32K window and FLEVEL = 1
942 for (j = 0; j < data_len;) {
943 int blocklen = data_len - j;
944 if (blocklen > 32767) blocklen = 32767;
945 stbiw__sbpush(out, data_len - j == blocklen); // BFINAL = ?, BTYPE = 0 -- no compression
946 stbiw__sbpush(out, STBIW_UCHAR(blocklen)); // LEN
947 stbiw__sbpush(out, STBIW_UCHAR(blocklen >> 8));
948 stbiw__sbpush(out, STBIW_UCHAR(~blocklen)); // NLEN
949 stbiw__sbpush(out, STBIW_UCHAR(~blocklen >> 8));
950 memcpy(out+stbiw__sbn(out), data+j, blocklen);
951 stbiw__sbn(out) += blocklen;
952 j += blocklen;
953 }
954 }
955
956 {
957 // compute adler32 on input
958 unsigned int s1=1, s2=0;
959 int blocklen = (int) (data_len % 5552);
960 j=0;
961 while (j < data_len) {
962 for (i=0; i < blocklen; ++i) { s1 += data[j+i]; s2 += s1; }
963 s1 %= 65521; s2 %= 65521;
964 j += blocklen;
965 blocklen = 5552;
966 }
967 stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8));
968 stbiw__sbpush(out, STBIW_UCHAR(s2));
969 stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8));
970 stbiw__sbpush(out, STBIW_UCHAR(s1));
971 }
972 *out_len = stbiw__sbn(out);
973 // make returned pointer freeable
974 STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len);
975 return (unsigned char *) stbiw__sbraw(out);
976#endif // STBIW_ZLIB_COMPRESS
977}
978
979static unsigned int stbiw__crc32(unsigned char *buffer, int len)
980{
981#ifdef STBIW_CRC32
982 return STBIW_CRC32(buffer, len);
983#else
984 static unsigned int crc_table[256] =
985 {
986 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
987 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
988 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
989 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
990 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
991 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
992 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
993 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
994 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
995 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
996 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
997 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
998 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
999 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
1000 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
1001 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
1002 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
1003 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
1004 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
1005 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
1006 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
1007 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
1008 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
1009 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
1010 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
1011 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
1012 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
1013 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
1014 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
1015 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
1016 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
1017 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
1018 };
1019
1020 unsigned int crc = ~0u;
1021 int i;
1022 for (i=0; i < len; ++i)
1023 crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)];
1024 return ~crc;
1025#endif
1026}
1027
1028#define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4)
1029#define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v));
1030#define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3])
1031
1032static void stbiw__wpcrc(unsigned char **data, int len)
1033{
1034 unsigned int crc = stbiw__crc32(*data - len - 4, len+4);
1035 stbiw__wp32(*data, crc);
1036}
1037
1038static unsigned char stbiw__paeth(int a, int b, int c)
1039{
1040 int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c);
1041 if (pa <= pb && pa <= pc) return STBIW_UCHAR(a);
1042 if (pb <= pc) return STBIW_UCHAR(b);
1043 return STBIW_UCHAR(c);
1044}
1045
1046// @OPTIMIZE: provide an option that always forces left-predict or paeth predict
1047static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer)
1048{
1049 static int mapping[] = { 0,1,2,3,4 };
1050 static int firstmap[] = { 0,1,0,5,6 };
1051 int *mymap = (y != 0) ? mapping : firstmap;
1052 int i;
1053 int type = mymap[filter_type];
1054 unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y);
1055 int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes;
1056
1057 if (type==0) {
1058 memcpy(line_buffer, z, width*n);
1059 return;
1060 }
1061
1062 // first loop isn't optimized since it's just one pixel
1063 for (i = 0; i < n; ++i) {
1064 switch (type) {
1065 case 1: line_buffer[i] = z[i]; break;
1066 case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break;
1067 case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break;
1068 case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break;
1069 case 5: line_buffer[i] = z[i]; break;
1070 case 6: line_buffer[i] = z[i]; break;
1071 }
1072 }
1073 switch (type) {
1074 case 1: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-n]; break;
1075 case 2: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-signed_stride]; break;
1076 case 3: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break;
1077 case 4: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break;
1078 case 5: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - (z[i-n]>>1); break;
1079 case 6: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break;
1080 }
1081}
1082
1083STBIWDEF unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len)
1084{
1085 int force_filter = stbi_write_force_png_filter;
1086 int ctype[5] = { -1, 0, 4, 2, 6 };
1087 unsigned char sig[8] = { 137,80,78,71,13,10,26,10 };
1088 unsigned char *out,*o, *filt, *zlib;
1089 signed char *line_buffer;
1090 int j,zlen;
1091
1092 if (stride_bytes == 0)
1093 stride_bytes = x * n;
1094
1095 if (force_filter >= 5) {
1096 force_filter = -1;
1097 }
1098
1099 filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0;
1100 line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; }
1101 for (j=0; j < y; ++j) {
1102 int filter_type;
1103 if (force_filter > -1) {
1104 filter_type = force_filter;
1105 stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer);
1106 } else { // Estimate the best filter by running through all of them:
1107 int best_filter = 0, best_filter_val = 0x7fffffff, est, i;
1108 for (filter_type = 0; filter_type < 5; filter_type++) {
1109 stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer);
1110
1111 // Estimate the entropy of the line using this filter; the less, the better.
1112 est = 0;
1113 for (i = 0; i < x*n; ++i) {
1114 est += abs((signed char) line_buffer[i]);
1115 }
1116 if (est < best_filter_val) {
1117 best_filter_val = est;
1118 best_filter = filter_type;
1119 }
1120 }
1121 if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it
1122 stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer);
1123 filter_type = best_filter;
1124 }
1125 }
1126 // when we get here, filter_type contains the filter type, and line_buffer contains the data
1127 filt[j*(x*n+1)] = (unsigned char) filter_type;
1128 STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n);
1129 }
1130 STBIW_FREE(line_buffer);
1131 zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level);
1132 STBIW_FREE(filt);
1133 if (!zlib) return 0;
1134
1135 // each tag requires 12 bytes of overhead
1136 out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12);
1137 if (!out) return 0;
1138 *out_len = 8 + 12+13 + 12+zlen + 12;
1139
1140 o=out;
1141 STBIW_MEMMOVE(o,sig,8); o+= 8;
1142 stbiw__wp32(o, 13); // header length
1143 stbiw__wptag(o, "IHDR");
1144 stbiw__wp32(o, x);
1145 stbiw__wp32(o, y);
1146 *o++ = 8;
1147 *o++ = STBIW_UCHAR(ctype[n]);
1148 *o++ = 0;
1149 *o++ = 0;
1150 *o++ = 0;
1151 stbiw__wpcrc(&o,13);
1152
1153 stbiw__wp32(o, zlen);
1154 stbiw__wptag(o, "IDAT");
1155 STBIW_MEMMOVE(o, zlib, zlen);
1156 o += zlen;
1157 STBIW_FREE(zlib);
1158 stbiw__wpcrc(&o, zlen);
1159
1160 stbiw__wp32(o,0);
1161 stbiw__wptag(o, "IEND");
1162 stbiw__wpcrc(&o,0);
1163
1164 STBIW_ASSERT(o == out + *out_len);
1165
1166 return out;
1167}
1168
1169#ifndef STBI_WRITE_NO_STDIO
1170STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes)
1171{
1172 FILE *f;
1173 int len;
1174 unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len);
1175 if (png == NULL) return 0;
1176
1177 f = stbiw__fopen(filename, "wb");
1178 if (!f) { STBIW_FREE(png); return 0; }
1179 fwrite(png, 1, len, f);
1180 fclose(f);
1181 STBIW_FREE(png);
1182 return 1;
1183}
1184#endif
1185
1186STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes)
1187{
1188 int len;
1189 unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len);
1190 if (png == NULL) return 0;
1191 func(context, png, len);
1192 STBIW_FREE(png);
1193 return 1;
1194}
1195
1196
1197/* ***************************************************************************
1198 *
1199 * JPEG writer
1200 *
1201 * This is based on Jon Olick's jo_jpeg.cpp:
1202 * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html
1203 */
1204
1205static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18,
1206 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 };
1207
1208static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) {
1209 int bitBuf = *bitBufP, bitCnt = *bitCntP;
1210 bitCnt += bs[1];
1211 bitBuf |= bs[0] << (24 - bitCnt);
1212 while(bitCnt >= 8) {
1213 unsigned char c = (bitBuf >> 16) & 255;
1214 stbiw__putc(s, c);
1215 if(c == 255) {
1216 stbiw__putc(s, 0);
1217 }
1218 bitBuf <<= 8;
1219 bitCnt -= 8;
1220 }
1221 *bitBufP = bitBuf;
1222 *bitCntP = bitCnt;
1223}
1224
1225static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) {
1226 float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p;
1227 float z1, z2, z3, z4, z5, z11, z13;
1228
1229 float tmp0 = d0 + d7;
1230 float tmp7 = d0 - d7;
1231 float tmp1 = d1 + d6;
1232 float tmp6 = d1 - d6;
1233 float tmp2 = d2 + d5;
1234 float tmp5 = d2 - d5;
1235 float tmp3 = d3 + d4;
1236 float tmp4 = d3 - d4;
1237
1238 // Even part
1239 float tmp10 = tmp0 + tmp3; // phase 2
1240 float tmp13 = tmp0 - tmp3;
1241 float tmp11 = tmp1 + tmp2;
1242 float tmp12 = tmp1 - tmp2;
1243
1244 d0 = tmp10 + tmp11; // phase 3
1245 d4 = tmp10 - tmp11;
1246
1247 z1 = (tmp12 + tmp13) * 0.707106781f; // c4
1248 d2 = tmp13 + z1; // phase 5
1249 d6 = tmp13 - z1;
1250
1251 // Odd part
1252 tmp10 = tmp4 + tmp5; // phase 2
1253 tmp11 = tmp5 + tmp6;
1254 tmp12 = tmp6 + tmp7;
1255
1256 // The rotator is modified from fig 4-8 to avoid extra negations.
1257 z5 = (tmp10 - tmp12) * 0.382683433f; // c6
1258 z2 = tmp10 * 0.541196100f + z5; // c2-c6
1259 z4 = tmp12 * 1.306562965f + z5; // c2+c6
1260 z3 = tmp11 * 0.707106781f; // c4
1261
1262 z11 = tmp7 + z3; // phase 5
1263 z13 = tmp7 - z3;
1264
1265 *d5p = z13 + z2; // phase 6
1266 *d3p = z13 - z2;
1267 *d1p = z11 + z4;
1268 *d7p = z11 - z4;
1269
1270 *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6;
1271}
1272
1273static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) {
1274 int tmp1 = val < 0 ? -val : val;
1275 val = val < 0 ? val-1 : val;
1276 bits[1] = 1;
1277 while(tmp1 >>= 1) {
1278 ++bits[1];
1279 }
1280 bits[0] = val & ((1<<bits[1])-1);
1281}
1282
1283static int stbiw__jpg_processDU(stbi__write_context *s, int *bitBuf, int *bitCnt, float *CDU, int du_stride, float *fdtbl, int DC, const unsigned short HTDC[256][2], const unsigned short HTAC[256][2]) {
1284 const unsigned short EOB[2] = { HTAC[0x00][0], HTAC[0x00][1] };
1285 const unsigned short M16zeroes[2] = { HTAC[0xF0][0], HTAC[0xF0][1] };
1286 int dataOff, i, j, n, diff, end0pos, x, y;
1287 int DU[64];
1288
1289 // DCT rows
1290 for(dataOff=0, n=du_stride*8; dataOff<n; dataOff+=du_stride) {
1291 stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff+1], &CDU[dataOff+2], &CDU[dataOff+3], &CDU[dataOff+4], &CDU[dataOff+5], &CDU[dataOff+6], &CDU[dataOff+7]);
1292 }
1293 // DCT columns
1294 for(dataOff=0; dataOff<8; ++dataOff) {
1295 stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff+du_stride], &CDU[dataOff+du_stride*2], &CDU[dataOff+du_stride*3], &CDU[dataOff+du_stride*4],
1296 &CDU[dataOff+du_stride*5], &CDU[dataOff+du_stride*6], &CDU[dataOff+du_stride*7]);
1297 }
1298 // Quantize/descale/zigzag the coefficients
1299 for(y = 0, j=0; y < 8; ++y) {
1300 for(x = 0; x < 8; ++x,++j) {
1301 float v;
1302 i = y*du_stride+x;
1303 v = CDU[i]*fdtbl[j];
1304 // DU[stbiw__jpg_ZigZag[j]] = (int)(v < 0 ? ceilf(v - 0.5f) : floorf(v + 0.5f));
1305 // ceilf() and floorf() are C99, not C89, but I /think/ they're not needed here anyway?
1306 DU[stbiw__jpg_ZigZag[j]] = (int)(v < 0 ? v - 0.5f : v + 0.5f);
1307 }
1308 }
1309
1310 // Encode DC
1311 diff = DU[0] - DC;
1312 if (diff == 0) {
1313 stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[0]);
1314 } else {
1315 unsigned short bits[2];
1316 stbiw__jpg_calcBits(diff, bits);
1317 stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[bits[1]]);
1318 stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);
1319 }
1320 // Encode ACs
1321 end0pos = 63;
1322 for(; (end0pos>0)&&(DU[end0pos]==0); --end0pos) {
1323 }
1324 // end0pos = first element in reverse order !=0
1325 if(end0pos == 0) {
1326 stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);
1327 return DU[0];
1328 }
1329 for(i = 1; i <= end0pos; ++i) {
1330 int startpos = i;
1331 int nrzeroes;
1332 unsigned short bits[2];
1333 for (; DU[i]==0 && i<=end0pos; ++i) {
1334 }
1335 nrzeroes = i-startpos;
1336 if ( nrzeroes >= 16 ) {
1337 int lng = nrzeroes>>4;
1338 int nrmarker;
1339 for (nrmarker=1; nrmarker <= lng; ++nrmarker)
1340 stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes);
1341 nrzeroes &= 15;
1342 }
1343 stbiw__jpg_calcBits(DU[i], bits);
1344 stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]);
1345 stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);
1346 }
1347 if(end0pos != 63) {
1348 stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);
1349 }
1350 return DU[0];
1351}
1352
1353static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) {
1354 // Constants that don't pollute global namespace
1355 static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0};
1356 static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
1357 static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d};
1358 static const unsigned char std_ac_luminance_values[] = {
1359 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,
1360 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,
1361 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,
1362 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
1363 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,
1364 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,
1365 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa
1366 };
1367 static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0};
1368 static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
1369 static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77};
1370 static const unsigned char std_ac_chrominance_values[] = {
1371 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,
1372 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,
1373 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,
1374 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
1375 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,
1376 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,
1377 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa
1378 };
1379 // Huffman tables
1380 static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}};
1381 static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}};
1382 static const unsigned short YAC_HT[256][2] = {
1383 {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1384 {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1385 {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1386 {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1387 {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1388 {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1389 {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1390 {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1391 {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1392 {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1393 {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1394 {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1395 {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1396 {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1397 {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0},
1398 {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0}
1399 };
1400 static const unsigned short UVAC_HT[256][2] = {
1401 {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1402 {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1403 {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1404 {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1405 {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1406 {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1407 {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1408 {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1409 {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1410 {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1411 {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1412 {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1413 {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1414 {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1415 {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0},
1416 {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0}
1417 };
1418 static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22,
1419 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99};
1420 static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99,
1421 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99};
1422 static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f,
1423 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f };
1424
1425 int row, col, i, k, subsample;
1426 float fdtbl_Y[64], fdtbl_UV[64];
1427 unsigned char YTable[64], UVTable[64];
1428
1429 if(!data || !width || !height || comp > 4 || comp < 1) {
1430 return 0;
1431 }
1432
1433 quality = quality ? quality : 90;
1434 subsample = quality <= 90 ? 1 : 0;
1435 quality = quality < 1 ? 1 : quality > 100 ? 100 : quality;
1436 quality = quality < 50 ? 5000 / quality : 200 - quality * 2;
1437
1438 for(i = 0; i < 64; ++i) {
1439 int uvti, yti = (YQT[i]*quality+50)/100;
1440 YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti);
1441 uvti = (UVQT[i]*quality+50)/100;
1442 UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti);
1443 }
1444
1445 for(row = 0, k = 0; row < 8; ++row) {
1446 for(col = 0; col < 8; ++col, ++k) {
1447 fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);
1448 fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);
1449 }
1450 }
1451
1452 // Write Headers
1453 {
1454 static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 };
1455 static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 };
1456 const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width),
1457 3,1,(unsigned char)(subsample?0x22:0x11),0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 };
1458 s->func(s->context, (void*)head0, sizeof(head0));
1459 s->func(s->context, (void*)YTable, sizeof(YTable));
1460 stbiw__putc(s, 1);
1461 s->func(s->context, UVTable, sizeof(UVTable));
1462 s->func(s->context, (void*)head1, sizeof(head1));
1463 s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1);
1464 s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values));
1465 stbiw__putc(s, 0x10); // HTYACinfo
1466 s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1);
1467 s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values));
1468 stbiw__putc(s, 1); // HTUDCinfo
1469 s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1);
1470 s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values));
1471 stbiw__putc(s, 0x11); // HTUACinfo
1472 s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1);
1473 s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values));
1474 s->func(s->context, (void*)head2, sizeof(head2));
1475 }
1476
1477 // Encode 8x8 macroblocks
1478 {
1479 static const unsigned short fillBits[] = {0x7F, 7};
1480 int DCY=0, DCU=0, DCV=0;
1481 int bitBuf=0, bitCnt=0;
1482 // comp == 2 is grey+alpha (alpha is ignored)
1483 int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0;
1484 const unsigned char *dataR = (const unsigned char *)data;
1485 const unsigned char *dataG = dataR + ofsG;
1486 const unsigned char *dataB = dataR + ofsB;
1487 int x, y, pos;
1488 if(subsample) {
1489 for(y = 0; y < height; y += 16) {
1490 for(x = 0; x < width; x += 16) {
1491 float Y[256], U[256], V[256];
1492 for(row = y, pos = 0; row < y+16; ++row) {
1493 // row >= height => use last input row
1494 int clamped_row = (row < height) ? row : height - 1;
1495 int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp;
1496 for(col = x; col < x+16; ++col, ++pos) {
1497 // if col >= width => use pixel from last input column
1498 int p = base_p + ((col < width) ? col : (width-1))*comp;
1499 float r = dataR[p], g = dataG[p], b = dataB[p];
1500 Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128;
1501 U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b;
1502 V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b;
1503 }
1504 }
1505 DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+0, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1506 DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+8, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1507 DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+128, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1508 DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+136, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1509
1510 // subsample U,V
1511 {
1512 float subU[64], subV[64];
1513 int yy, xx;
1514 for(yy = 0, pos = 0; yy < 8; ++yy) {
1515 for(xx = 0; xx < 8; ++xx, ++pos) {
1516 int j = yy*32+xx*2;
1517 subU[pos] = (U[j+0] + U[j+1] + U[j+16] + U[j+17]) * 0.25f;
1518 subV[pos] = (V[j+0] + V[j+1] + V[j+16] + V[j+17]) * 0.25f;
1519 }
1520 }
1521 DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subU, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);
1522 DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subV, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);
1523 }
1524 }
1525 }
1526 } else {
1527 for(y = 0; y < height; y += 8) {
1528 for(x = 0; x < width; x += 8) {
1529 float Y[64], U[64], V[64];
1530 for(row = y, pos = 0; row < y+8; ++row) {
1531 // row >= height => use last input row
1532 int clamped_row = (row < height) ? row : height - 1;
1533 int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp;
1534 for(col = x; col < x+8; ++col, ++pos) {
1535 // if col >= width => use pixel from last input column
1536 int p = base_p + ((col < width) ? col : (width-1))*comp;
1537 float r = dataR[p], g = dataG[p], b = dataB[p];
1538 Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128;
1539 U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b;
1540 V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b;
1541 }
1542 }
1543
1544 DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y, 8, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1545 DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, U, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);
1546 DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, V, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);
1547 }
1548 }
1549 }
1550
1551 // Do the bit alignment of the EOI marker
1552 stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits);
1553 }
1554
1555 // EOI
1556 stbiw__putc(s, 0xFF);
1557 stbiw__putc(s, 0xD9);
1558
1559 return 1;
1560}
1561
1562STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality)
1563{
1564 stbi__write_context s = { 0 };
1565 stbi__start_write_callbacks(&s, func, context);
1566 return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality);
1567}
1568
1569
1570#ifndef STBI_WRITE_NO_STDIO
1571STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality)
1572{
1573 stbi__write_context s = { 0 };
1574 if (stbi__start_write_file(&s,filename)) {
1575 int r = stbi_write_jpg_core(&s, x, y, comp, data, quality);
1576 stbi__end_write_file(&s);
1577 return r;
1578 } else
1579 return 0;
1580}
1581#endif
1582
1583/* Revision history
1584 1.16 (2021-07-11)
1585 make Deflate code emit uncompressed blocks when it would otherwise expand
1586 support writing BMPs with alpha channel
1587 1.15 (2020-07-13) unknown
1588 1.14 (2020-02-02) updated JPEG writer to downsample chroma channels
1589 1.13
1590 1.12
1591 1.11 (2019-08-11)
1592
1593 1.10 (2019-02-07)
1594 support utf8 filenames in Windows; fix warnings and platform ifdefs
1595 1.09 (2018-02-11)
1596 fix typo in zlib quality API, improve STB_I_W_STATIC in C++
1597 1.08 (2018-01-29)
1598 add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter
1599 1.07 (2017-07-24)
1600 doc fix
1601 1.06 (2017-07-23)
1602 writing JPEG (using Jon Olick's code)
1603 1.05 ???
1604 1.04 (2017-03-03)
1605 monochrome BMP expansion
1606 1.03 ???
1607 1.02 (2016-04-02)
1608 avoid allocating large structures on the stack
1609 1.01 (2016-01-16)
1610 STBIW_REALLOC_SIZED: support allocators with no realloc support
1611 avoid race-condition in crc initialization
1612 minor compile issues
1613 1.00 (2015-09-14)
1614 installable file IO function
1615 0.99 (2015-09-13)
1616 warning fixes; TGA rle support
1617 0.98 (2015-04-08)
1618 added STBIW_MALLOC, STBIW_ASSERT etc
1619 0.97 (2015-01-18)
1620 fixed HDR asserts, rewrote HDR rle logic
1621 0.96 (2015-01-17)
1622 add HDR output
1623 fix monochrome BMP
1624 0.95 (2014-08-17)
1625 add monochrome TGA output
1626 0.94 (2014-05-31)
1627 rename private functions to avoid conflicts with stb_image.h
1628 0.93 (2014-05-27)
1629 warning fixes
1630 0.92 (2010-08-01)
1631 casts to unsigned char to fix warnings
1632 0.91 (2010-07-17)
1633 first public release
1634 0.90 first internal release
1635*/
1636
1637/*
1638------------------------------------------------------------------------------
1639This software is available under 2 licenses -- choose whichever you prefer.
1640------------------------------------------------------------------------------
1641ALTERNATIVE A - MIT License
1642Copyright (c) 2017 Sean Barrett
1643Permission is hereby granted, free of charge, to any person obtaining a copy of
1644this software and associated documentation files (the "Software"), to deal in
1645the Software without restriction, including without limitation the rights to
1646use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
1647of the Software, and to permit persons to whom the Software is furnished to do
1648so, subject to the following conditions:
1649The above copyright notice and this permission notice shall be included in all
1650copies or substantial portions of the Software.
1651THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1652IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1653FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1654AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1655LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1656OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1657SOFTWARE.
1658------------------------------------------------------------------------------
1659ALTERNATIVE B - Public Domain (www.unlicense.org)
1660This is free and unencumbered software released into the public domain.
1661Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
1662software, either in source code form or as a compiled binary, for any purpose,
1663commercial or non-commercial, and by any means.
1664In jurisdictions that recognize copyright laws, the author or authors of this
1665software dedicate any and all copyright interest in the software to the public
1666domain. We make this dedication for the benefit of the public at large and to
1667the detriment of our heirs and successors. We intend this dedication to be an
1668overt act of relinquishment in perpetuity of all present and future rights to
1669this software under copyright law.
1670THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1671IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1672FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1673AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
1674ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
1675WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1676------------------------------------------------------------------------------
1677*/
diff --git a/externals/stb/stb_image_write.h b/externals/stb/stb_image_write.h
index b81082bba..e4b32ed1b 100644
--- a/externals/stb/stb_image_write.h
+++ b/externals/stb/stb_image_write.h
@@ -1,6 +1,3 @@
1// SPDX-FileCopyrightText: stb http://nothings.org/stb
2// SPDX-License-Identifier: MIT
3
4/* stb_image_write - v1.16 - public domain - http://nothings.org/stb 1/* stb_image_write - v1.16 - public domain - http://nothings.org/stb
5 writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 2 writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015
6 no warranty implied; use at your own risk 3 no warranty implied; use at your own risk
@@ -194,12 +191,1442 @@ STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w,
194STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); 191STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data);
195STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); 192STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data);
196STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); 193STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality);
197STBIWDEF unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len);
198 194
199STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); 195STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean);
200 196
201#endif//INCLUDE_STB_IMAGE_WRITE_H 197#endif//INCLUDE_STB_IMAGE_WRITE_H
202 198
199#ifdef STB_IMAGE_WRITE_IMPLEMENTATION
200
201#ifdef _WIN32
202 #ifndef _CRT_SECURE_NO_WARNINGS
203 #define _CRT_SECURE_NO_WARNINGS
204 #endif
205 #ifndef _CRT_NONSTDC_NO_DEPRECATE
206 #define _CRT_NONSTDC_NO_DEPRECATE
207 #endif
208#endif
209
210#ifndef STBI_WRITE_NO_STDIO
211#include <stdio.h>
212#endif // STBI_WRITE_NO_STDIO
213
214#include <stdarg.h>
215#include <stdlib.h>
216#include <string.h>
217#include <math.h>
218
219#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED))
220// ok
221#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED)
222// ok
223#else
224#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)."
225#endif
226
227#ifndef STBIW_MALLOC
228#define STBIW_MALLOC(sz) malloc(sz)
229#define STBIW_REALLOC(p,newsz) realloc(p,newsz)
230#define STBIW_FREE(p) free(p)
231#endif
232
233#ifndef STBIW_REALLOC_SIZED
234#define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz)
235#endif
236
237
238#ifndef STBIW_MEMMOVE
239#define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz)
240#endif
241
242
243#ifndef STBIW_ASSERT
244#include <assert.h>
245#define STBIW_ASSERT(x) assert(x)
246#endif
247
248#define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff)
249
250#ifdef STB_IMAGE_WRITE_STATIC
251static int stbi_write_png_compression_level = 8;
252static int stbi_write_tga_with_rle = 1;
253static int stbi_write_force_png_filter = -1;
254#else
255int stbi_write_png_compression_level = 8;
256int stbi_write_tga_with_rle = 1;
257int stbi_write_force_png_filter = -1;
258#endif
259
260static int stbi__flip_vertically_on_write = 0;
261
262STBIWDEF void stbi_flip_vertically_on_write(int flag)
263{
264 stbi__flip_vertically_on_write = flag;
265}
266
267typedef struct
268{
269 stbi_write_func *func;
270 void *context;
271 unsigned char buffer[64];
272 int buf_used;
273} stbi__write_context;
274
275// initialize a callback-based context
276static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context)
277{
278 s->func = c;
279 s->context = context;
280}
281
282#ifndef STBI_WRITE_NO_STDIO
283
284static void stbi__stdio_write(void *context, void *data, int size)
285{
286 fwrite(data,1,size,(FILE*) context);
287}
288
289#if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8)
290#ifdef __cplusplus
291#define STBIW_EXTERN extern "C"
292#else
293#define STBIW_EXTERN extern
294#endif
295STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
296STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
297
298STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
299{
300 return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
301}
302#endif
303
304static FILE *stbiw__fopen(char const *filename, char const *mode)
305{
306 FILE *f;
307#if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8)
308 wchar_t wMode[64];
309 wchar_t wFilename[1024];
310 if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
311 return 0;
312
313 if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
314 return 0;
315
316#if defined(_MSC_VER) && _MSC_VER >= 1400
317 if (0 != _wfopen_s(&f, wFilename, wMode))
318 f = 0;
319#else
320 f = _wfopen(wFilename, wMode);
321#endif
322
323#elif defined(_MSC_VER) && _MSC_VER >= 1400
324 if (0 != fopen_s(&f, filename, mode))
325 f=0;
326#else
327 f = fopen(filename, mode);
328#endif
329 return f;
330}
331
332static int stbi__start_write_file(stbi__write_context *s, const char *filename)
333{
334 FILE *f = stbiw__fopen(filename, "wb");
335 stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f);
336 return f != NULL;
337}
338
339static void stbi__end_write_file(stbi__write_context *s)
340{
341 fclose((FILE *)s->context);
342}
343
344#endif // !STBI_WRITE_NO_STDIO
345
346typedef unsigned int stbiw_uint32;
347typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1];
348
349static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v)
350{
351 while (*fmt) {
352 switch (*fmt++) {
353 case ' ': break;
354 case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int));
355 s->func(s->context,&x,1);
356 break; }
357 case '2': { int x = va_arg(v,int);
358 unsigned char b[2];
359 b[0] = STBIW_UCHAR(x);
360 b[1] = STBIW_UCHAR(x>>8);
361 s->func(s->context,b,2);
362 break; }
363 case '4': { stbiw_uint32 x = va_arg(v,int);
364 unsigned char b[4];
365 b[0]=STBIW_UCHAR(x);
366 b[1]=STBIW_UCHAR(x>>8);
367 b[2]=STBIW_UCHAR(x>>16);
368 b[3]=STBIW_UCHAR(x>>24);
369 s->func(s->context,b,4);
370 break; }
371 default:
372 STBIW_ASSERT(0);
373 return;
374 }
375 }
376}
377
378static void stbiw__writef(stbi__write_context *s, const char *fmt, ...)
379{
380 va_list v;
381 va_start(v, fmt);
382 stbiw__writefv(s, fmt, v);
383 va_end(v);
384}
385
386static void stbiw__write_flush(stbi__write_context *s)
387{
388 if (s->buf_used) {
389 s->func(s->context, &s->buffer, s->buf_used);
390 s->buf_used = 0;
391 }
392}
393
394static void stbiw__putc(stbi__write_context *s, unsigned char c)
395{
396 s->func(s->context, &c, 1);
397}
398
399static void stbiw__write1(stbi__write_context *s, unsigned char a)
400{
401 if ((size_t)s->buf_used + 1 > sizeof(s->buffer))
402 stbiw__write_flush(s);
403 s->buffer[s->buf_used++] = a;
404}
405
406static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c)
407{
408 int n;
409 if ((size_t)s->buf_used + 3 > sizeof(s->buffer))
410 stbiw__write_flush(s);
411 n = s->buf_used;
412 s->buf_used = n+3;
413 s->buffer[n+0] = a;
414 s->buffer[n+1] = b;
415 s->buffer[n+2] = c;
416}
417
418static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d)
419{
420 unsigned char bg[3] = { 255, 0, 255}, px[3];
421 int k;
422
423 if (write_alpha < 0)
424 stbiw__write1(s, d[comp - 1]);
425
426 switch (comp) {
427 case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case
428 case 1:
429 if (expand_mono)
430 stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp
431 else
432 stbiw__write1(s, d[0]); // monochrome TGA
433 break;
434 case 4:
435 if (!write_alpha) {
436 // composite against pink background
437 for (k = 0; k < 3; ++k)
438 px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255;
439 stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]);
440 break;
441 }
442 /* FALLTHROUGH */
443 case 3:
444 stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]);
445 break;
446 }
447 if (write_alpha > 0)
448 stbiw__write1(s, d[comp - 1]);
449}
450
451static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono)
452{
453 stbiw_uint32 zero = 0;
454 int i,j, j_end;
455
456 if (y <= 0)
457 return;
458
459 if (stbi__flip_vertically_on_write)
460 vdir *= -1;
461
462 if (vdir < 0) {
463 j_end = -1; j = y-1;
464 } else {
465 j_end = y; j = 0;
466 }
467
468 for (; j != j_end; j += vdir) {
469 for (i=0; i < x; ++i) {
470 unsigned char *d = (unsigned char *) data + (j*x+i)*comp;
471 stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d);
472 }
473 stbiw__write_flush(s);
474 s->func(s->context, &zero, scanline_pad);
475 }
476}
477
478static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...)
479{
480 if (y < 0 || x < 0) {
481 return 0;
482 } else {
483 va_list v;
484 va_start(v, fmt);
485 stbiw__writefv(s, fmt, v);
486 va_end(v);
487 stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono);
488 return 1;
489 }
490}
491
492static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data)
493{
494 if (comp != 4) {
495 // write RGB bitmap
496 int pad = (-x*3) & 3;
497 return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad,
498 "11 4 22 4" "4 44 22 444444",
499 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header
500 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header
501 } else {
502 // RGBA bitmaps need a v4 header
503 // use BI_BITFIELDS mode with 32bpp and alpha mask
504 // (straight BI_RGB with alpha mask doesn't work in most readers)
505 return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *)data,1,0,
506 "11 4 22 4" "4 44 22 444444 4444 4 444 444 444 444",
507 'B', 'M', 14+108+x*y*4, 0, 0, 14+108, // file header
508 108, x,y, 1,32, 3,0,0,0,0,0, 0xff0000,0xff00,0xff,0xff000000u, 0, 0,0,0, 0,0,0, 0,0,0, 0,0,0); // bitmap V4 header
509 }
510}
511
512STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data)
513{
514 stbi__write_context s = { 0 };
515 stbi__start_write_callbacks(&s, func, context);
516 return stbi_write_bmp_core(&s, x, y, comp, data);
517}
518
519#ifndef STBI_WRITE_NO_STDIO
520STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data)
521{
522 stbi__write_context s = { 0 };
523 if (stbi__start_write_file(&s,filename)) {
524 int r = stbi_write_bmp_core(&s, x, y, comp, data);
525 stbi__end_write_file(&s);
526 return r;
527 } else
528 return 0;
529}
530#endif //!STBI_WRITE_NO_STDIO
531
532static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data)
533{
534 int has_alpha = (comp == 2 || comp == 4);
535 int colorbytes = has_alpha ? comp-1 : comp;
536 int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3
537
538 if (y < 0 || x < 0)
539 return 0;
540
541 if (!stbi_write_tga_with_rle) {
542 return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0,
543 "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8);
544 } else {
545 int i,j,k;
546 int jend, jdir;
547
548 stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8);
549
550 if (stbi__flip_vertically_on_write) {
551 j = 0;
552 jend = y;
553 jdir = 1;
554 } else {
555 j = y-1;
556 jend = -1;
557 jdir = -1;
558 }
559 for (; j != jend; j += jdir) {
560 unsigned char *row = (unsigned char *) data + j * x * comp;
561 int len;
562
563 for (i = 0; i < x; i += len) {
564 unsigned char *begin = row + i * comp;
565 int diff = 1;
566 len = 1;
567
568 if (i < x - 1) {
569 ++len;
570 diff = memcmp(begin, row + (i + 1) * comp, comp);
571 if (diff) {
572 const unsigned char *prev = begin;
573 for (k = i + 2; k < x && len < 128; ++k) {
574 if (memcmp(prev, row + k * comp, comp)) {
575 prev += comp;
576 ++len;
577 } else {
578 --len;
579 break;
580 }
581 }
582 } else {
583 for (k = i + 2; k < x && len < 128; ++k) {
584 if (!memcmp(begin, row + k * comp, comp)) {
585 ++len;
586 } else {
587 break;
588 }
589 }
590 }
591 }
592
593 if (diff) {
594 unsigned char header = STBIW_UCHAR(len - 1);
595 stbiw__write1(s, header);
596 for (k = 0; k < len; ++k) {
597 stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp);
598 }
599 } else {
600 unsigned char header = STBIW_UCHAR(len - 129);
601 stbiw__write1(s, header);
602 stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin);
603 }
604 }
605 }
606 stbiw__write_flush(s);
607 }
608 return 1;
609}
610
611STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data)
612{
613 stbi__write_context s = { 0 };
614 stbi__start_write_callbacks(&s, func, context);
615 return stbi_write_tga_core(&s, x, y, comp, (void *) data);
616}
617
618#ifndef STBI_WRITE_NO_STDIO
619STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data)
620{
621 stbi__write_context s = { 0 };
622 if (stbi__start_write_file(&s,filename)) {
623 int r = stbi_write_tga_core(&s, x, y, comp, (void *) data);
624 stbi__end_write_file(&s);
625 return r;
626 } else
627 return 0;
628}
629#endif
630
631// *************************************************************************************************
632// Radiance RGBE HDR writer
633// by Baldur Karlsson
634
635#define stbiw__max(a, b) ((a) > (b) ? (a) : (b))
636
637#ifndef STBI_WRITE_NO_STDIO
638
639static void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear)
640{
641 int exponent;
642 float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2]));
643
644 if (maxcomp < 1e-32f) {
645 rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0;
646 } else {
647 float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp;
648
649 rgbe[0] = (unsigned char)(linear[0] * normalize);
650 rgbe[1] = (unsigned char)(linear[1] * normalize);
651 rgbe[2] = (unsigned char)(linear[2] * normalize);
652 rgbe[3] = (unsigned char)(exponent + 128);
653 }
654}
655
656static void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte)
657{
658 unsigned char lengthbyte = STBIW_UCHAR(length+128);
659 STBIW_ASSERT(length+128 <= 255);
660 s->func(s->context, &lengthbyte, 1);
661 s->func(s->context, &databyte, 1);
662}
663
664static void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data)
665{
666 unsigned char lengthbyte = STBIW_UCHAR(length);
667 STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code
668 s->func(s->context, &lengthbyte, 1);
669 s->func(s->context, data, length);
670}
671
672static void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline)
673{
674 unsigned char scanlineheader[4] = { 2, 2, 0, 0 };
675 unsigned char rgbe[4];
676 float linear[3];
677 int x;
678
679 scanlineheader[2] = (width&0xff00)>>8;
680 scanlineheader[3] = (width&0x00ff);
681
682 /* skip RLE for images too small or large */
683 if (width < 8 || width >= 32768) {
684 for (x=0; x < width; x++) {
685 switch (ncomp) {
686 case 4: /* fallthrough */
687 case 3: linear[2] = scanline[x*ncomp + 2];
688 linear[1] = scanline[x*ncomp + 1];
689 linear[0] = scanline[x*ncomp + 0];
690 break;
691 default:
692 linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0];
693 break;
694 }
695 stbiw__linear_to_rgbe(rgbe, linear);
696 s->func(s->context, rgbe, 4);
697 }
698 } else {
699 int c,r;
700 /* encode into scratch buffer */
701 for (x=0; x < width; x++) {
702 switch(ncomp) {
703 case 4: /* fallthrough */
704 case 3: linear[2] = scanline[x*ncomp + 2];
705 linear[1] = scanline[x*ncomp + 1];
706 linear[0] = scanline[x*ncomp + 0];
707 break;
708 default:
709 linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0];
710 break;
711 }
712 stbiw__linear_to_rgbe(rgbe, linear);
713 scratch[x + width*0] = rgbe[0];
714 scratch[x + width*1] = rgbe[1];
715 scratch[x + width*2] = rgbe[2];
716 scratch[x + width*3] = rgbe[3];
717 }
718
719 s->func(s->context, scanlineheader, 4);
720
721 /* RLE each component separately */
722 for (c=0; c < 4; c++) {
723 unsigned char *comp = &scratch[width*c];
724
725 x = 0;
726 while (x < width) {
727 // find first run
728 r = x;
729 while (r+2 < width) {
730 if (comp[r] == comp[r+1] && comp[r] == comp[r+2])
731 break;
732 ++r;
733 }
734 if (r+2 >= width)
735 r = width;
736 // dump up to first run
737 while (x < r) {
738 int len = r-x;
739 if (len > 128) len = 128;
740 stbiw__write_dump_data(s, len, &comp[x]);
741 x += len;
742 }
743 // if there's a run, output it
744 if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd
745 // find next byte after run
746 while (r < width && comp[r] == comp[x])
747 ++r;
748 // output run up to r
749 while (x < r) {
750 int len = r-x;
751 if (len > 127) len = 127;
752 stbiw__write_run_data(s, len, comp[x]);
753 x += len;
754 }
755 }
756 }
757 }
758 }
759}
760
761static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data)
762{
763 if (y <= 0 || x <= 0 || data == NULL)
764 return 0;
765 else {
766 // Each component is stored separately. Allocate scratch space for full output scanline.
767 unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4);
768 int i, len;
769 char buffer[128];
770 char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n";
771 s->func(s->context, header, sizeof(header)-1);
772
773#ifdef __STDC_LIB_EXT1__
774 len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x);
775#else
776 len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x);
777#endif
778 s->func(s->context, buffer, len);
779
780 for(i=0; i < y; i++)
781 stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i));
782 STBIW_FREE(scratch);
783 return 1;
784 }
785}
786
787STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data)
788{
789 stbi__write_context s = { 0 };
790 stbi__start_write_callbacks(&s, func, context);
791 return stbi_write_hdr_core(&s, x, y, comp, (float *) data);
792}
793
794STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data)
795{
796 stbi__write_context s = { 0 };
797 if (stbi__start_write_file(&s,filename)) {
798 int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data);
799 stbi__end_write_file(&s);
800 return r;
801 } else
802 return 0;
803}
804#endif // STBI_WRITE_NO_STDIO
805
806
807//////////////////////////////////////////////////////////////////////////////
808//
809// PNG writer
810//
811
812#ifndef STBIW_ZLIB_COMPRESS
813// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size()
814#define stbiw__sbraw(a) ((int *) (void *) (a) - 2)
815#define stbiw__sbm(a) stbiw__sbraw(a)[0]
816#define stbiw__sbn(a) stbiw__sbraw(a)[1]
817
818#define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a))
819#define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0)
820#define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a)))
821
822#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v))
823#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0)
824#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0)
825
826static void *stbiw__sbgrowf(void **arr, int increment, int itemsize)
827{
828 int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1;
829 void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2);
830 STBIW_ASSERT(p);
831 if (p) {
832 if (!*arr) ((int *) p)[1] = 0;
833 *arr = (void *) ((int *) p + 2);
834 stbiw__sbm(*arr) = m;
835 }
836 return *arr;
837}
838
839static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount)
840{
841 while (*bitcount >= 8) {
842 stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer));
843 *bitbuffer >>= 8;
844 *bitcount -= 8;
845 }
846 return data;
847}
848
849static int stbiw__zlib_bitrev(int code, int codebits)
850{
851 int res=0;
852 while (codebits--) {
853 res = (res << 1) | (code & 1);
854 code >>= 1;
855 }
856 return res;
857}
858
859static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit)
860{
861 int i;
862 for (i=0; i < limit && i < 258; ++i)
863 if (a[i] != b[i]) break;
864 return i;
865}
866
867static unsigned int stbiw__zhash(unsigned char *data)
868{
869 stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16);
870 hash ^= hash << 3;
871 hash += hash >> 5;
872 hash ^= hash << 4;
873 hash += hash >> 17;
874 hash ^= hash << 25;
875 hash += hash >> 6;
876 return hash;
877}
878
879#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount))
880#define stbiw__zlib_add(code,codebits) \
881 (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush())
882#define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c)
883// default huffman tables
884#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8)
885#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9)
886#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7)
887#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8)
888#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n))
889#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n))
890
891#define stbiw__ZHASH 16384
892
893#endif // STBIW_ZLIB_COMPRESS
894
895STBIWDEF unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality)
896{
897#ifdef STBIW_ZLIB_COMPRESS
898 // user provided a zlib compress implementation, use that
899 return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality);
900#else // use builtin
901 static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 };
902 static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 };
903 static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 };
904 static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 };
905 unsigned int bitbuf=0;
906 int i,j, bitcount=0;
907 unsigned char *out = NULL;
908 unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(unsigned char**));
909 if (hash_table == NULL)
910 return NULL;
911 if (quality < 5) quality = 5;
912
913 stbiw__sbpush(out, 0x78); // DEFLATE 32K window
914 stbiw__sbpush(out, 0x5e); // FLEVEL = 1
915 stbiw__zlib_add(1,1); // BFINAL = 1
916 stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman
917
918 for (i=0; i < stbiw__ZHASH; ++i)
919 hash_table[i] = NULL;
920
921 i=0;
922 while (i < data_len-3) {
923 // hash next 3 bytes of data to be compressed
924 int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3;
925 unsigned char *bestloc = 0;
926 unsigned char **hlist = hash_table[h];
927 int n = stbiw__sbcount(hlist);
928 for (j=0; j < n; ++j) {
929 if (hlist[j]-data > i-32768) { // if entry lies within window
930 int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i);
931 if (d >= best) { best=d; bestloc=hlist[j]; }
932 }
933 }
934 // when hash table entry is too long, delete half the entries
935 if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) {
936 STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality);
937 stbiw__sbn(hash_table[h]) = quality;
938 }
939 stbiw__sbpush(hash_table[h],data+i);
940
941 if (bestloc) {
942 // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal
943 h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1);
944 hlist = hash_table[h];
945 n = stbiw__sbcount(hlist);
946 for (j=0; j < n; ++j) {
947 if (hlist[j]-data > i-32767) {
948 int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1);
949 if (e > best) { // if next match is better, bail on current match
950 bestloc = NULL;
951 break;
952 }
953 }
954 }
955 }
956
957 if (bestloc) {
958 int d = (int) (data+i - bestloc); // distance back
959 STBIW_ASSERT(d <= 32767 && best <= 258);
960 for (j=0; best > lengthc[j+1]-1; ++j);
961 stbiw__zlib_huff(j+257);
962 if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]);
963 for (j=0; d > distc[j+1]-1; ++j);
964 stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5);
965 if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]);
966 i += best;
967 } else {
968 stbiw__zlib_huffb(data[i]);
969 ++i;
970 }
971 }
972 // write out final bytes
973 for (;i < data_len; ++i)
974 stbiw__zlib_huffb(data[i]);
975 stbiw__zlib_huff(256); // end of block
976 // pad with 0 bits to byte boundary
977 while (bitcount)
978 stbiw__zlib_add(0,1);
979
980 for (i=0; i < stbiw__ZHASH; ++i)
981 (void) stbiw__sbfree(hash_table[i]);
982 STBIW_FREE(hash_table);
983
984 // store uncompressed instead if compression was worse
985 if (stbiw__sbn(out) > data_len + 2 + ((data_len+32766)/32767)*5) {
986 stbiw__sbn(out) = 2; // truncate to DEFLATE 32K window and FLEVEL = 1
987 for (j = 0; j < data_len;) {
988 int blocklen = data_len - j;
989 if (blocklen > 32767) blocklen = 32767;
990 stbiw__sbpush(out, data_len - j == blocklen); // BFINAL = ?, BTYPE = 0 -- no compression
991 stbiw__sbpush(out, STBIW_UCHAR(blocklen)); // LEN
992 stbiw__sbpush(out, STBIW_UCHAR(blocklen >> 8));
993 stbiw__sbpush(out, STBIW_UCHAR(~blocklen)); // NLEN
994 stbiw__sbpush(out, STBIW_UCHAR(~blocklen >> 8));
995 memcpy(out+stbiw__sbn(out), data+j, blocklen);
996 stbiw__sbn(out) += blocklen;
997 j += blocklen;
998 }
999 }
1000
1001 {
1002 // compute adler32 on input
1003 unsigned int s1=1, s2=0;
1004 int blocklen = (int) (data_len % 5552);
1005 j=0;
1006 while (j < data_len) {
1007 for (i=0; i < blocklen; ++i) { s1 += data[j+i]; s2 += s1; }
1008 s1 %= 65521; s2 %= 65521;
1009 j += blocklen;
1010 blocklen = 5552;
1011 }
1012 stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8));
1013 stbiw__sbpush(out, STBIW_UCHAR(s2));
1014 stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8));
1015 stbiw__sbpush(out, STBIW_UCHAR(s1));
1016 }
1017 *out_len = stbiw__sbn(out);
1018 // make returned pointer freeable
1019 STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len);
1020 return (unsigned char *) stbiw__sbraw(out);
1021#endif // STBIW_ZLIB_COMPRESS
1022}
1023
1024static unsigned int stbiw__crc32(unsigned char *buffer, int len)
1025{
1026#ifdef STBIW_CRC32
1027 return STBIW_CRC32(buffer, len);
1028#else
1029 static unsigned int crc_table[256] =
1030 {
1031 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
1032 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
1033 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
1034 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
1035 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
1036 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
1037 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
1038 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
1039 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
1040 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
1041 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
1042 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
1043 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
1044 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
1045 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
1046 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
1047 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
1048 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
1049 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
1050 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
1051 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
1052 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
1053 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
1054 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
1055 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
1056 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
1057 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
1058 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
1059 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
1060 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
1061 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
1062 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
1063 };
1064
1065 unsigned int crc = ~0u;
1066 int i;
1067 for (i=0; i < len; ++i)
1068 crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)];
1069 return ~crc;
1070#endif
1071}
1072
1073#define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4)
1074#define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v));
1075#define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3])
1076
1077static void stbiw__wpcrc(unsigned char **data, int len)
1078{
1079 unsigned int crc = stbiw__crc32(*data - len - 4, len+4);
1080 stbiw__wp32(*data, crc);
1081}
1082
1083static unsigned char stbiw__paeth(int a, int b, int c)
1084{
1085 int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c);
1086 if (pa <= pb && pa <= pc) return STBIW_UCHAR(a);
1087 if (pb <= pc) return STBIW_UCHAR(b);
1088 return STBIW_UCHAR(c);
1089}
1090
1091// @OPTIMIZE: provide an option that always forces left-predict or paeth predict
1092static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer)
1093{
1094 static int mapping[] = { 0,1,2,3,4 };
1095 static int firstmap[] = { 0,1,0,5,6 };
1096 int *mymap = (y != 0) ? mapping : firstmap;
1097 int i;
1098 int type = mymap[filter_type];
1099 unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y);
1100 int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes;
1101
1102 if (type==0) {
1103 memcpy(line_buffer, z, width*n);
1104 return;
1105 }
1106
1107 // first loop isn't optimized since it's just one pixel
1108 for (i = 0; i < n; ++i) {
1109 switch (type) {
1110 case 1: line_buffer[i] = z[i]; break;
1111 case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break;
1112 case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break;
1113 case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break;
1114 case 5: line_buffer[i] = z[i]; break;
1115 case 6: line_buffer[i] = z[i]; break;
1116 }
1117 }
1118 switch (type) {
1119 case 1: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-n]; break;
1120 case 2: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-signed_stride]; break;
1121 case 3: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break;
1122 case 4: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break;
1123 case 5: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - (z[i-n]>>1); break;
1124 case 6: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break;
1125 }
1126}
1127
1128STBIWDEF unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len)
1129{
1130 int force_filter = stbi_write_force_png_filter;
1131 int ctype[5] = { -1, 0, 4, 2, 6 };
1132 unsigned char sig[8] = { 137,80,78,71,13,10,26,10 };
1133 unsigned char *out,*o, *filt, *zlib;
1134 signed char *line_buffer;
1135 int j,zlen;
1136
1137 if (stride_bytes == 0)
1138 stride_bytes = x * n;
1139
1140 if (force_filter >= 5) {
1141 force_filter = -1;
1142 }
1143
1144 filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0;
1145 line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; }
1146 for (j=0; j < y; ++j) {
1147 int filter_type;
1148 if (force_filter > -1) {
1149 filter_type = force_filter;
1150 stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer);
1151 } else { // Estimate the best filter by running through all of them:
1152 int best_filter = 0, best_filter_val = 0x7fffffff, est, i;
1153 for (filter_type = 0; filter_type < 5; filter_type++) {
1154 stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer);
1155
1156 // Estimate the entropy of the line using this filter; the less, the better.
1157 est = 0;
1158 for (i = 0; i < x*n; ++i) {
1159 est += abs((signed char) line_buffer[i]);
1160 }
1161 if (est < best_filter_val) {
1162 best_filter_val = est;
1163 best_filter = filter_type;
1164 }
1165 }
1166 if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it
1167 stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer);
1168 filter_type = best_filter;
1169 }
1170 }
1171 // when we get here, filter_type contains the filter type, and line_buffer contains the data
1172 filt[j*(x*n+1)] = (unsigned char) filter_type;
1173 STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n);
1174 }
1175 STBIW_FREE(line_buffer);
1176 zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level);
1177 STBIW_FREE(filt);
1178 if (!zlib) return 0;
1179
1180 // each tag requires 12 bytes of overhead
1181 out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12);
1182 if (!out) return 0;
1183 *out_len = 8 + 12+13 + 12+zlen + 12;
1184
1185 o=out;
1186 STBIW_MEMMOVE(o,sig,8); o+= 8;
1187 stbiw__wp32(o, 13); // header length
1188 stbiw__wptag(o, "IHDR");
1189 stbiw__wp32(o, x);
1190 stbiw__wp32(o, y);
1191 *o++ = 8;
1192 *o++ = STBIW_UCHAR(ctype[n]);
1193 *o++ = 0;
1194 *o++ = 0;
1195 *o++ = 0;
1196 stbiw__wpcrc(&o,13);
1197
1198 stbiw__wp32(o, zlen);
1199 stbiw__wptag(o, "IDAT");
1200 STBIW_MEMMOVE(o, zlib, zlen);
1201 o += zlen;
1202 STBIW_FREE(zlib);
1203 stbiw__wpcrc(&o, zlen);
1204
1205 stbiw__wp32(o,0);
1206 stbiw__wptag(o, "IEND");
1207 stbiw__wpcrc(&o,0);
1208
1209 STBIW_ASSERT(o == out + *out_len);
1210
1211 return out;
1212}
1213
1214#ifndef STBI_WRITE_NO_STDIO
1215STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes)
1216{
1217 FILE *f;
1218 int len;
1219 unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len);
1220 if (png == NULL) return 0;
1221
1222 f = stbiw__fopen(filename, "wb");
1223 if (!f) { STBIW_FREE(png); return 0; }
1224 fwrite(png, 1, len, f);
1225 fclose(f);
1226 STBIW_FREE(png);
1227 return 1;
1228}
1229#endif
1230
1231STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes)
1232{
1233 int len;
1234 unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len);
1235 if (png == NULL) return 0;
1236 func(context, png, len);
1237 STBIW_FREE(png);
1238 return 1;
1239}
1240
1241
1242/* ***************************************************************************
1243 *
1244 * JPEG writer
1245 *
1246 * This is based on Jon Olick's jo_jpeg.cpp:
1247 * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html
1248 */
1249
1250static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18,
1251 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 };
1252
1253static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) {
1254 int bitBuf = *bitBufP, bitCnt = *bitCntP;
1255 bitCnt += bs[1];
1256 bitBuf |= bs[0] << (24 - bitCnt);
1257 while(bitCnt >= 8) {
1258 unsigned char c = (bitBuf >> 16) & 255;
1259 stbiw__putc(s, c);
1260 if(c == 255) {
1261 stbiw__putc(s, 0);
1262 }
1263 bitBuf <<= 8;
1264 bitCnt -= 8;
1265 }
1266 *bitBufP = bitBuf;
1267 *bitCntP = bitCnt;
1268}
1269
1270static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) {
1271 float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p;
1272 float z1, z2, z3, z4, z5, z11, z13;
1273
1274 float tmp0 = d0 + d7;
1275 float tmp7 = d0 - d7;
1276 float tmp1 = d1 + d6;
1277 float tmp6 = d1 - d6;
1278 float tmp2 = d2 + d5;
1279 float tmp5 = d2 - d5;
1280 float tmp3 = d3 + d4;
1281 float tmp4 = d3 - d4;
1282
1283 // Even part
1284 float tmp10 = tmp0 + tmp3; // phase 2
1285 float tmp13 = tmp0 - tmp3;
1286 float tmp11 = tmp1 + tmp2;
1287 float tmp12 = tmp1 - tmp2;
1288
1289 d0 = tmp10 + tmp11; // phase 3
1290 d4 = tmp10 - tmp11;
1291
1292 z1 = (tmp12 + tmp13) * 0.707106781f; // c4
1293 d2 = tmp13 + z1; // phase 5
1294 d6 = tmp13 - z1;
1295
1296 // Odd part
1297 tmp10 = tmp4 + tmp5; // phase 2
1298 tmp11 = tmp5 + tmp6;
1299 tmp12 = tmp6 + tmp7;
1300
1301 // The rotator is modified from fig 4-8 to avoid extra negations.
1302 z5 = (tmp10 - tmp12) * 0.382683433f; // c6
1303 z2 = tmp10 * 0.541196100f + z5; // c2-c6
1304 z4 = tmp12 * 1.306562965f + z5; // c2+c6
1305 z3 = tmp11 * 0.707106781f; // c4
1306
1307 z11 = tmp7 + z3; // phase 5
1308 z13 = tmp7 - z3;
1309
1310 *d5p = z13 + z2; // phase 6
1311 *d3p = z13 - z2;
1312 *d1p = z11 + z4;
1313 *d7p = z11 - z4;
1314
1315 *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6;
1316}
1317
1318static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) {
1319 int tmp1 = val < 0 ? -val : val;
1320 val = val < 0 ? val-1 : val;
1321 bits[1] = 1;
1322 while(tmp1 >>= 1) {
1323 ++bits[1];
1324 }
1325 bits[0] = val & ((1<<bits[1])-1);
1326}
1327
1328static int stbiw__jpg_processDU(stbi__write_context *s, int *bitBuf, int *bitCnt, float *CDU, int du_stride, float *fdtbl, int DC, const unsigned short HTDC[256][2], const unsigned short HTAC[256][2]) {
1329 const unsigned short EOB[2] = { HTAC[0x00][0], HTAC[0x00][1] };
1330 const unsigned short M16zeroes[2] = { HTAC[0xF0][0], HTAC[0xF0][1] };
1331 int dataOff, i, j, n, diff, end0pos, x, y;
1332 int DU[64];
1333
1334 // DCT rows
1335 for(dataOff=0, n=du_stride*8; dataOff<n; dataOff+=du_stride) {
1336 stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff+1], &CDU[dataOff+2], &CDU[dataOff+3], &CDU[dataOff+4], &CDU[dataOff+5], &CDU[dataOff+6], &CDU[dataOff+7]);
1337 }
1338 // DCT columns
1339 for(dataOff=0; dataOff<8; ++dataOff) {
1340 stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff+du_stride], &CDU[dataOff+du_stride*2], &CDU[dataOff+du_stride*3], &CDU[dataOff+du_stride*4],
1341 &CDU[dataOff+du_stride*5], &CDU[dataOff+du_stride*6], &CDU[dataOff+du_stride*7]);
1342 }
1343 // Quantize/descale/zigzag the coefficients
1344 for(y = 0, j=0; y < 8; ++y) {
1345 for(x = 0; x < 8; ++x,++j) {
1346 float v;
1347 i = y*du_stride+x;
1348 v = CDU[i]*fdtbl[j];
1349 // DU[stbiw__jpg_ZigZag[j]] = (int)(v < 0 ? ceilf(v - 0.5f) : floorf(v + 0.5f));
1350 // ceilf() and floorf() are C99, not C89, but I /think/ they're not needed here anyway?
1351 DU[stbiw__jpg_ZigZag[j]] = (int)(v < 0 ? v - 0.5f : v + 0.5f);
1352 }
1353 }
1354
1355 // Encode DC
1356 diff = DU[0] - DC;
1357 if (diff == 0) {
1358 stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[0]);
1359 } else {
1360 unsigned short bits[2];
1361 stbiw__jpg_calcBits(diff, bits);
1362 stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[bits[1]]);
1363 stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);
1364 }
1365 // Encode ACs
1366 end0pos = 63;
1367 for(; (end0pos>0)&&(DU[end0pos]==0); --end0pos) {
1368 }
1369 // end0pos = first element in reverse order !=0
1370 if(end0pos == 0) {
1371 stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);
1372 return DU[0];
1373 }
1374 for(i = 1; i <= end0pos; ++i) {
1375 int startpos = i;
1376 int nrzeroes;
1377 unsigned short bits[2];
1378 for (; DU[i]==0 && i<=end0pos; ++i) {
1379 }
1380 nrzeroes = i-startpos;
1381 if ( nrzeroes >= 16 ) {
1382 int lng = nrzeroes>>4;
1383 int nrmarker;
1384 for (nrmarker=1; nrmarker <= lng; ++nrmarker)
1385 stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes);
1386 nrzeroes &= 15;
1387 }
1388 stbiw__jpg_calcBits(DU[i], bits);
1389 stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]);
1390 stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);
1391 }
1392 if(end0pos != 63) {
1393 stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);
1394 }
1395 return DU[0];
1396}
1397
1398static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) {
1399 // Constants that don't pollute global namespace
1400 static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0};
1401 static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
1402 static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d};
1403 static const unsigned char std_ac_luminance_values[] = {
1404 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,
1405 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,
1406 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,
1407 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
1408 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,
1409 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,
1410 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa
1411 };
1412 static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0};
1413 static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
1414 static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77};
1415 static const unsigned char std_ac_chrominance_values[] = {
1416 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,
1417 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,
1418 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,
1419 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
1420 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,
1421 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,
1422 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa
1423 };
1424 // Huffman tables
1425 static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}};
1426 static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}};
1427 static const unsigned short YAC_HT[256][2] = {
1428 {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1429 {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1430 {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1431 {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1432 {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1433 {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1434 {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1435 {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1436 {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1437 {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1438 {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1439 {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1440 {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1441 {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1442 {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0},
1443 {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0}
1444 };
1445 static const unsigned short UVAC_HT[256][2] = {
1446 {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1447 {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1448 {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1449 {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1450 {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1451 {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1452 {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1453 {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1454 {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1455 {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1456 {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1457 {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1458 {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1459 {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1460 {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0},
1461 {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0}
1462 };
1463 static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22,
1464 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99};
1465 static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99,
1466 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99};
1467 static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f,
1468 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f };
1469
1470 int row, col, i, k, subsample;
1471 float fdtbl_Y[64], fdtbl_UV[64];
1472 unsigned char YTable[64], UVTable[64];
1473
1474 if(!data || !width || !height || comp > 4 || comp < 1) {
1475 return 0;
1476 }
1477
1478 quality = quality ? quality : 90;
1479 subsample = quality <= 90 ? 1 : 0;
1480 quality = quality < 1 ? 1 : quality > 100 ? 100 : quality;
1481 quality = quality < 50 ? 5000 / quality : 200 - quality * 2;
1482
1483 for(i = 0; i < 64; ++i) {
1484 int uvti, yti = (YQT[i]*quality+50)/100;
1485 YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti);
1486 uvti = (UVQT[i]*quality+50)/100;
1487 UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti);
1488 }
1489
1490 for(row = 0, k = 0; row < 8; ++row) {
1491 for(col = 0; col < 8; ++col, ++k) {
1492 fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);
1493 fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);
1494 }
1495 }
1496
1497 // Write Headers
1498 {
1499 static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 };
1500 static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 };
1501 const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width),
1502 3,1,(unsigned char)(subsample?0x22:0x11),0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 };
1503 s->func(s->context, (void*)head0, sizeof(head0));
1504 s->func(s->context, (void*)YTable, sizeof(YTable));
1505 stbiw__putc(s, 1);
1506 s->func(s->context, UVTable, sizeof(UVTable));
1507 s->func(s->context, (void*)head1, sizeof(head1));
1508 s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1);
1509 s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values));
1510 stbiw__putc(s, 0x10); // HTYACinfo
1511 s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1);
1512 s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values));
1513 stbiw__putc(s, 1); // HTUDCinfo
1514 s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1);
1515 s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values));
1516 stbiw__putc(s, 0x11); // HTUACinfo
1517 s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1);
1518 s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values));
1519 s->func(s->context, (void*)head2, sizeof(head2));
1520 }
1521
1522 // Encode 8x8 macroblocks
1523 {
1524 static const unsigned short fillBits[] = {0x7F, 7};
1525 int DCY=0, DCU=0, DCV=0;
1526 int bitBuf=0, bitCnt=0;
1527 // comp == 2 is grey+alpha (alpha is ignored)
1528 int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0;
1529 const unsigned char *dataR = (const unsigned char *)data;
1530 const unsigned char *dataG = dataR + ofsG;
1531 const unsigned char *dataB = dataR + ofsB;
1532 int x, y, pos;
1533 if(subsample) {
1534 for(y = 0; y < height; y += 16) {
1535 for(x = 0; x < width; x += 16) {
1536 float Y[256], U[256], V[256];
1537 for(row = y, pos = 0; row < y+16; ++row) {
1538 // row >= height => use last input row
1539 int clamped_row = (row < height) ? row : height - 1;
1540 int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp;
1541 for(col = x; col < x+16; ++col, ++pos) {
1542 // if col >= width => use pixel from last input column
1543 int p = base_p + ((col < width) ? col : (width-1))*comp;
1544 float r = dataR[p], g = dataG[p], b = dataB[p];
1545 Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128;
1546 U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b;
1547 V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b;
1548 }
1549 }
1550 DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+0, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1551 DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+8, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1552 DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+128, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1553 DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+136, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1554
1555 // subsample U,V
1556 {
1557 float subU[64], subV[64];
1558 int yy, xx;
1559 for(yy = 0, pos = 0; yy < 8; ++yy) {
1560 for(xx = 0; xx < 8; ++xx, ++pos) {
1561 int j = yy*32+xx*2;
1562 subU[pos] = (U[j+0] + U[j+1] + U[j+16] + U[j+17]) * 0.25f;
1563 subV[pos] = (V[j+0] + V[j+1] + V[j+16] + V[j+17]) * 0.25f;
1564 }
1565 }
1566 DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subU, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);
1567 DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subV, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);
1568 }
1569 }
1570 }
1571 } else {
1572 for(y = 0; y < height; y += 8) {
1573 for(x = 0; x < width; x += 8) {
1574 float Y[64], U[64], V[64];
1575 for(row = y, pos = 0; row < y+8; ++row) {
1576 // row >= height => use last input row
1577 int clamped_row = (row < height) ? row : height - 1;
1578 int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp;
1579 for(col = x; col < x+8; ++col, ++pos) {
1580 // if col >= width => use pixel from last input column
1581 int p = base_p + ((col < width) ? col : (width-1))*comp;
1582 float r = dataR[p], g = dataG[p], b = dataB[p];
1583 Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128;
1584 U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b;
1585 V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b;
1586 }
1587 }
1588
1589 DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y, 8, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1590 DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, U, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);
1591 DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, V, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);
1592 }
1593 }
1594 }
1595
1596 // Do the bit alignment of the EOI marker
1597 stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits);
1598 }
1599
1600 // EOI
1601 stbiw__putc(s, 0xFF);
1602 stbiw__putc(s, 0xD9);
1603
1604 return 1;
1605}
1606
1607STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality)
1608{
1609 stbi__write_context s = { 0 };
1610 stbi__start_write_callbacks(&s, func, context);
1611 return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality);
1612}
1613
1614
1615#ifndef STBI_WRITE_NO_STDIO
1616STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality)
1617{
1618 stbi__write_context s = { 0 };
1619 if (stbi__start_write_file(&s,filename)) {
1620 int r = stbi_write_jpg_core(&s, x, y, comp, data, quality);
1621 stbi__end_write_file(&s);
1622 return r;
1623 } else
1624 return 0;
1625}
1626#endif
1627
1628#endif // STB_IMAGE_WRITE_IMPLEMENTATION
1629
203/* Revision history 1630/* Revision history
204 1.16 (2021-07-11) 1631 1.16 (2021-07-11)
205 make Deflate code emit uncompressed blocks when it would otherwise expand 1632 make Deflate code emit uncompressed blocks when it would otherwise expand