mirror of
https://git.freebsd.org/src.git
synced 2026-01-16 23:02:24 +00:00
Vendor import of xz 5.8.1 (trimmed)
This commit is contained in:
parent
956197bcea
commit
12eff5f0d8
82 changed files with 7494 additions and 2210 deletions
2
AUTHORS
2
AUTHORS
|
|
@ -24,7 +24,7 @@ Authors of XZ Utils
|
|||
by Michał Górny.
|
||||
|
||||
Architecture-specific CRC optimizations were contributed by
|
||||
Ilya Kurdyukov, Hans Jansen, and Chenxi Mao.
|
||||
Ilya Kurdyukov, Chenxi Mao, and Xi Ruoyao.
|
||||
|
||||
Other authors:
|
||||
- Jonathan Nieder
|
||||
|
|
|
|||
25
COPYING
25
COPYING
|
|
@ -40,6 +40,12 @@ XZ Utils Licensing
|
|||
free software licenses. These aren't built or installed as
|
||||
part of XZ Utils.
|
||||
|
||||
The following command may be helpful in finding per-file license
|
||||
information. It works on xz.git and on a clean file tree extracted
|
||||
from a release tarball.
|
||||
|
||||
sh build-aux/license-check.sh -v
|
||||
|
||||
For the files under the BSD Zero Clause License (0BSD), if
|
||||
a copyright notice is needed, the following is sufficient:
|
||||
|
||||
|
|
@ -59,25 +65,6 @@ XZ Utils Licensing
|
|||
- COPYING.GPLv2: GNU General Public License version 2
|
||||
- COPYING.GPLv3: GNU General Public License version 3
|
||||
|
||||
A note about old XZ Utils releases:
|
||||
|
||||
XZ Utils releases 5.4.6 and older and 5.5.1alpha have a
|
||||
significant amount of code put into the public domain and
|
||||
that obviously remains so. The switch from public domain to
|
||||
0BSD for newer releases was made in Febrary 2024 because
|
||||
public domain has (real or perceived) legal ambiguities in
|
||||
some jurisdictions.
|
||||
|
||||
There is very little *practical* difference between public
|
||||
domain and 0BSD. The main difference likely is that one
|
||||
shouldn't claim that 0BSD-licensed code is in the public
|
||||
domain; 0BSD-licensed code is copyrighted but available under
|
||||
an extremely permissive license. Neither 0BSD nor public domain
|
||||
require retaining or reproducing author, copyright holder, or
|
||||
license notices when distributing the software. (Compare to,
|
||||
for example, BSD 2-Clause "Simplified" License which does have
|
||||
such requirements.)
|
||||
|
||||
If you have questions, don't hesitate to ask for more information.
|
||||
The contact information is in the README file.
|
||||
|
||||
|
|
|
|||
103
README
103
README
|
|
@ -10,6 +10,7 @@ XZ Utils
|
|||
2. Version numbering
|
||||
3. Reporting bugs
|
||||
4. Translations
|
||||
4.1. Testing translations
|
||||
5. Other implementations of the .xz format
|
||||
6. Contact information
|
||||
|
||||
|
|
@ -203,77 +204,47 @@ XZ Utils
|
|||
|
||||
https://translationproject.org/html/translators.html
|
||||
|
||||
Below are notes and testing instructions specific to xz
|
||||
translations.
|
||||
Updates to translations won't be accepted by methods that bypass
|
||||
the Translation Project because there is a risk of duplicate work:
|
||||
translation updates made in the xz repository aren't seen by the
|
||||
translators in the Translation Project. If you have found bugs in
|
||||
a translation, please report them to the Language-Team address
|
||||
which can be found near the beginning of the PO file.
|
||||
|
||||
Testing can be done by installing xz into a temporary directory:
|
||||
If you find language problems in the original English strings,
|
||||
feel free to suggest improvements. Ask if something is unclear.
|
||||
|
||||
|
||||
4.1. Testing translations
|
||||
|
||||
Testing can be done by installing xz into a temporary directory.
|
||||
|
||||
If building from Git repository (not tarball), generate the
|
||||
Autotools files:
|
||||
|
||||
./autogen.sh
|
||||
|
||||
Create a subdirectory for the build files. The tmp-build directory
|
||||
can be deleted after testing.
|
||||
|
||||
mkdir tmp-build
|
||||
cd tmp-build
|
||||
../configure --disable-shared --enable-debug --prefix=$PWD/inst
|
||||
|
||||
Edit the .po file in the po directory. Then build and install to
|
||||
the "tmp-build/inst" directory, and use translations.bash to see
|
||||
how some of the messages look. Repeat these steps if needed:
|
||||
|
||||
./configure --disable-shared --prefix=/tmp/xz-test
|
||||
# <Edit the .po file in the po directory.>
|
||||
make -C po update-po
|
||||
make install
|
||||
bash debug/translation.bash | less
|
||||
bash debug/translation.bash | less -S # For --list outputs
|
||||
make -j"$(nproc)" install
|
||||
bash ../debug/translation.bash | less
|
||||
bash ../debug/translation.bash | less -S # For --list outputs
|
||||
|
||||
Repeat the above as needed (no need to re-run configure though).
|
||||
To test other languages, set the LANGUAGE environment variable
|
||||
before running translations.bash. The value should match the PO file
|
||||
name without the .po suffix. Example:
|
||||
|
||||
Note especially the following:
|
||||
|
||||
- The output of --help and --long-help must look nice on
|
||||
an 80-column terminal. It's OK to add extra lines if needed.
|
||||
|
||||
- In contrast, don't add extra lines to error messages and such.
|
||||
They are often preceded with e.g. a filename on the same line,
|
||||
so you have no way to predict where to put a \n. Let the terminal
|
||||
do the wrapping even if it looks ugly. Adding new lines will be
|
||||
even uglier in the generic case even if it looks nice in a few
|
||||
limited examples.
|
||||
|
||||
- Be careful with column alignment in tables and table-like output
|
||||
(--list, --list --verbose --verbose, --info-memory, --help, and
|
||||
--long-help):
|
||||
|
||||
* All descriptions of options in --help should start in the
|
||||
same column (but it doesn't need to be the same column as
|
||||
in the English messages; just be consistent if you change it).
|
||||
Check that both --help and --long-help look OK, since they
|
||||
share several strings.
|
||||
|
||||
* --list --verbose and --info-memory print lines that have
|
||||
the format "Description: %s". If you need a longer
|
||||
description, you can put extra space between the colon
|
||||
and %s. Then you may need to add extra space to other
|
||||
strings too so that the result as a whole looks good (all
|
||||
values start at the same column).
|
||||
|
||||
* The columns of the actual tables in --list --verbose --verbose
|
||||
should be aligned properly. Abbreviate if necessary. It might
|
||||
be good to keep at least 2 or 3 spaces between column headings
|
||||
and avoid spaces in the headings so that the columns stand out
|
||||
better, but this is a matter of opinion. Do what you think
|
||||
looks best.
|
||||
|
||||
- Be careful to put a period at the end of a sentence when the
|
||||
original version has it, and don't put it when the original
|
||||
doesn't have it. Similarly, be careful with \n characters
|
||||
at the beginning and end of the strings.
|
||||
|
||||
- Read the TRANSLATORS comments that have been extracted from the
|
||||
source code and included in xz.pot. Some comments suggest
|
||||
testing with a specific command which needs an .xz file. You
|
||||
may use e.g. any tests/files/good-*.xz. However, these test
|
||||
commands are included in translations.bash output, so reading
|
||||
translations.bash output carefully can be enough.
|
||||
|
||||
- If you find language problems in the original English strings,
|
||||
feel free to suggest improvements. Ask if something is unclear.
|
||||
|
||||
- The translated messages should be understandable (sometimes this
|
||||
may be a problem with the original English messages too). Don't
|
||||
make a direct word-by-word translation from English especially if
|
||||
the result doesn't sound good in your language.
|
||||
|
||||
Thanks for your help!
|
||||
export LANGUAGE=fi
|
||||
|
||||
|
||||
5. Other implementations of the .xz format
|
||||
|
|
|
|||
37
THANKS
37
THANKS
|
|
@ -20,6 +20,7 @@ has been important. :-) In alphabetical order:
|
|||
- Jakub Bogusz
|
||||
- Adam Borowski
|
||||
- Maarten Bosmans
|
||||
- Roel Bouckaert
|
||||
- Lukas Braune
|
||||
- Benjamin Buch
|
||||
- Trent W. Buck
|
||||
|
|
@ -29,26 +30,35 @@ has been important. :-) In alphabetical order:
|
|||
- Frank Busse
|
||||
- Daniel Mealha Cabrita
|
||||
- Milo Casagrande
|
||||
- Cristiano Ceglia
|
||||
- Marek Černocký
|
||||
- Tomer Chachamu
|
||||
- Vitaly Chikunov
|
||||
- Antoine Cœur
|
||||
- Elijah Almeida Coimbra
|
||||
- Felix Collin
|
||||
- Ryan Colyer
|
||||
- Marcus Comstedt
|
||||
- Vincent Cruz
|
||||
- Gabi Davar
|
||||
- Ron Desmond
|
||||
- İhsan Doğan
|
||||
- Chris Donawa
|
||||
- Andrew Dudman
|
||||
- Markus Duft
|
||||
- İsmail Dönmez
|
||||
- Dexter Castor Döpping
|
||||
- Paul Eggert
|
||||
- Robert Elz
|
||||
- Gilles Espinasse
|
||||
- Denis Excoffier
|
||||
- Vincent Fazio
|
||||
- Michael Felt
|
||||
- Sean Fenian
|
||||
- Michael Fox
|
||||
- Andres Freund
|
||||
- Mike Frysinger
|
||||
- Collin Funk
|
||||
- Daniel Richard G.
|
||||
- Tomasz Gajc
|
||||
- Bjarni Ingi Gislason
|
||||
|
|
@ -57,10 +67,14 @@ has been important. :-) In alphabetical order:
|
|||
- Matthew Good
|
||||
- Michał Górny
|
||||
- Jason Gorski
|
||||
- Alexander M. Greenham
|
||||
- Juan Manuel Guerrero
|
||||
- Gabriela Gutierrez
|
||||
- Diederik de Haas
|
||||
- Jan Terje Hansen
|
||||
- Tobias Lahrmann Hansen
|
||||
- Joachim Henke
|
||||
- Lizandro Heredia
|
||||
- Christian Hesse
|
||||
- Vincenzo Innocente
|
||||
- Peter Ivanov
|
||||
|
|
@ -76,9 +90,11 @@ has been important. :-) In alphabetical order:
|
|||
- Per Øyvind Karlsen
|
||||
- Firas Khalil Khana
|
||||
- Iouri Kharon
|
||||
- Kim Jinyeong
|
||||
- Thomas Klausner
|
||||
- Richard Koch
|
||||
- Anton Kochkov
|
||||
- Harri K. Koskinen
|
||||
- Ville Koskinen
|
||||
- Sergey Kosukhin
|
||||
- Marcin Kowalczyk
|
||||
|
|
@ -103,14 +119,20 @@ has been important. :-) In alphabetical order:
|
|||
- Chenxi Mao
|
||||
- Gregory Margo
|
||||
- Julien Marrec
|
||||
- Pierre-Yves Martin
|
||||
- Ed Maste
|
||||
- Martin Matuška
|
||||
- Scott McAllister
|
||||
- Chris McCrohan
|
||||
- Derwin McGeary
|
||||
- Ivan A. Melnikov
|
||||
- Jim Meyering
|
||||
- Arkadiusz Miskiewicz
|
||||
- Nathan Moinvaziri
|
||||
- Étienne Mollier
|
||||
- Conley Moorhous
|
||||
- Dirk Müller
|
||||
- Rainer Müller
|
||||
- Andrew Murray
|
||||
- Rafał Mużyło
|
||||
- Adrien Nader
|
||||
|
|
@ -118,28 +140,34 @@ has been important. :-) In alphabetical order:
|
|||
- Alexander Neumann
|
||||
- Hongbo Ni
|
||||
- Jonathan Nieder
|
||||
- Asgeir Storesund Nilsen
|
||||
- Andre Noll
|
||||
- Ruarí Ødegaard
|
||||
- Peter O'Gorman
|
||||
- Dimitri Papadopoulos Orfanos
|
||||
- Daniel Packard
|
||||
- Filip Palian
|
||||
- Peter Pallinger
|
||||
- Kai Pastor
|
||||
- Keith Patton
|
||||
- Rui Paulo
|
||||
- Igor Pavlov
|
||||
- Diego Elio Pettenò
|
||||
- Elbert Pol
|
||||
- Guiorgy Potskhishvili
|
||||
- Mikko Pouru
|
||||
- Frank Prochnow
|
||||
- Rich Prohaska
|
||||
- Trần Ngọc Quân
|
||||
- Pavel Raiskup
|
||||
- Matthieu Rakotojaona
|
||||
- Ole André Vadla Ravnås
|
||||
- Eric S. Raymond
|
||||
- Robert Readman
|
||||
- Bernhard Reutner-Fischer
|
||||
- Markus Rickert
|
||||
- Cristian Rodríguez
|
||||
- Jeroen Roovers
|
||||
- Christian von Roques
|
||||
- Boud Roukema
|
||||
- Torsten Rupp
|
||||
|
|
@ -156,6 +184,7 @@ has been important. :-) In alphabetical order:
|
|||
- Dan Shechter
|
||||
- Stuart Shelton
|
||||
- Sebastian Andrzej Siewior
|
||||
- Andrej Skenderija
|
||||
- Ville Skyttä
|
||||
- Brad Smith
|
||||
- Bruce Stark
|
||||
|
|
@ -181,20 +210,28 @@ has been important. :-) In alphabetical order:
|
|||
- Christian Weisgerber
|
||||
- Dan Weiss
|
||||
- Bert Wesarg
|
||||
- Mark Wielaard
|
||||
- Fredrik Wikstrom
|
||||
- Jim Wilcoxson
|
||||
- Ralf Wildenhues
|
||||
- Charles Wilson
|
||||
- Lars Wirzenius
|
||||
- Vincent Wixsom
|
||||
- Pilorz Wojciech
|
||||
- Chien Wong
|
||||
- Xi Ruoyao
|
||||
- Ryan Young
|
||||
- Andreas Zieringer
|
||||
- 榆柳松 (ZhengSen Wang)
|
||||
|
||||
Companies:
|
||||
- Google
|
||||
- Sandfly Security
|
||||
|
||||
Other credits:
|
||||
- cleemy desu wayo working with Trend Micro Zero Day Initiative
|
||||
- Orange Tsai and splitline from DEVCORE Research Team
|
||||
|
||||
Also thanks to all the people who have participated in the Tukaani project.
|
||||
|
||||
I have probably forgot to add some names to the above list. Sorry about
|
||||
|
|
|
|||
25
TODO
25
TODO
|
|
@ -5,12 +5,7 @@ XZ Utils To-Do List
|
|||
Known bugs
|
||||
----------
|
||||
|
||||
The test suite is too incomplete.
|
||||
|
||||
If the memory usage limit is less than about 13 MiB, xz is unable to
|
||||
automatically scale down the compression settings enough even though
|
||||
it would be possible by switching from BT2/BT3/BT4 match finder to
|
||||
HC3/HC4.
|
||||
The test suite is incomplete.
|
||||
|
||||
XZ Utils compress some files significantly worse than LZMA Utils.
|
||||
This is due to faster compression presets used by XZ Utils, and
|
||||
|
|
@ -19,9 +14,6 @@ Known bugs
|
|||
compress extremely well, so going from compression ratio of 0.003
|
||||
to 0.004 means big relative increase in the compressed file size.
|
||||
|
||||
xz doesn't quote unprintable characters when it displays file names
|
||||
given on the command line.
|
||||
|
||||
tuklib_exit() doesn't block signals => EINTR is possible.
|
||||
|
||||
If liblzma has created threads and fork() gets called, liblzma
|
||||
|
|
@ -41,9 +33,6 @@ Missing features
|
|||
be mostly useful when using a preset dictionary in LZMA2, but
|
||||
it may have other uses too. Compare to deflateCopy() in zlib.
|
||||
|
||||
Support LZMA_FINISH in raw decoder to indicate end of LZMA1 and
|
||||
other streams that don't have an end of payload marker.
|
||||
|
||||
Adjust dictionary size when the input file size is known.
|
||||
Maybe do this only if an option is given.
|
||||
|
||||
|
|
@ -67,9 +56,9 @@ Missing features
|
|||
Support LZMA_FULL_FLUSH for lzma_stream_decoder() to stop at
|
||||
Block and Stream boundaries.
|
||||
|
||||
lzma_strerror() to convert lzma_ret to human readable form?
|
||||
This is tricky, because the same error codes are used with
|
||||
slightly different meanings, and this cannot be fixed anymore.
|
||||
Error codes from lzma_code() aren't very specific. A more detailed
|
||||
error message (string) could be provided too. It could be returned
|
||||
by a new function or use a currently-reserved member of lzma_stream.
|
||||
|
||||
Make it possible to adjust LZMA2 options in the middle of a Block
|
||||
so that the encoding speed vs. compression ratio can be optimized
|
||||
|
|
@ -97,9 +86,3 @@ Documentation
|
|||
|
||||
Document the LZMA1 and LZMA2 algorithms.
|
||||
|
||||
|
||||
Miscellaneous
|
||||
------------
|
||||
|
||||
Try to get the media type for .xz registered at IANA.
|
||||
|
||||
|
|
|
|||
141
src/common/my_landlock.h
Normal file
141
src/common/my_landlock.h
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
// SPDX-License-Identifier: 0BSD
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file my_landlock.h
|
||||
/// \brief Linux Landlock sandbox helper functions
|
||||
//
|
||||
// Author: Lasse Collin
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef MY_LANDLOCK_H
|
||||
#define MY_LANDLOCK_H
|
||||
|
||||
#include "sysdefs.h"
|
||||
|
||||
#include <linux/landlock.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
|
||||
/// \brief Initialize Landlock ruleset attributes to forbid everything
|
||||
///
|
||||
/// The supported Landlock ABI is checked at runtime and only the supported
|
||||
/// actions are forbidden in the attributes. Thus, if the attributes are
|
||||
/// used with my_landlock_create_ruleset(), it shouldn't fail.
|
||||
///
|
||||
/// \return On success, the Landlock ABI version is returned (a positive
|
||||
/// integer). If Landlock isn't supported, -1 is returned.
|
||||
static int
|
||||
my_landlock_ruleset_attr_forbid_all(struct landlock_ruleset_attr *attr)
|
||||
{
|
||||
memzero(attr, sizeof(*attr));
|
||||
|
||||
const int abi_version = syscall(SYS_landlock_create_ruleset,
|
||||
(void *)NULL, 0, LANDLOCK_CREATE_RULESET_VERSION);
|
||||
if (abi_version <= 0)
|
||||
return -1;
|
||||
|
||||
// ABI 1 except the few at the end
|
||||
attr->handled_access_fs
|
||||
= LANDLOCK_ACCESS_FS_EXECUTE
|
||||
| LANDLOCK_ACCESS_FS_WRITE_FILE
|
||||
| LANDLOCK_ACCESS_FS_READ_FILE
|
||||
| LANDLOCK_ACCESS_FS_READ_DIR
|
||||
| LANDLOCK_ACCESS_FS_REMOVE_DIR
|
||||
| LANDLOCK_ACCESS_FS_REMOVE_FILE
|
||||
| LANDLOCK_ACCESS_FS_MAKE_CHAR
|
||||
| LANDLOCK_ACCESS_FS_MAKE_DIR
|
||||
| LANDLOCK_ACCESS_FS_MAKE_REG
|
||||
| LANDLOCK_ACCESS_FS_MAKE_SOCK
|
||||
| LANDLOCK_ACCESS_FS_MAKE_FIFO
|
||||
| LANDLOCK_ACCESS_FS_MAKE_BLOCK
|
||||
| LANDLOCK_ACCESS_FS_MAKE_SYM
|
||||
#ifdef LANDLOCK_ACCESS_FS_REFER
|
||||
| LANDLOCK_ACCESS_FS_REFER // ABI 2
|
||||
#endif
|
||||
#ifdef LANDLOCK_ACCESS_FS_TRUNCATE
|
||||
| LANDLOCK_ACCESS_FS_TRUNCATE // ABI 3
|
||||
#endif
|
||||
#ifdef LANDLOCK_ACCESS_FS_IOCTL_DEV
|
||||
| LANDLOCK_ACCESS_FS_IOCTL_DEV // ABI 5
|
||||
#endif
|
||||
;
|
||||
|
||||
#ifdef LANDLOCK_ACCESS_NET_BIND_TCP
|
||||
// ABI 4
|
||||
attr->handled_access_net
|
||||
= LANDLOCK_ACCESS_NET_BIND_TCP
|
||||
| LANDLOCK_ACCESS_NET_CONNECT_TCP;
|
||||
#endif
|
||||
|
||||
#ifdef LANDLOCK_SCOPE_SIGNAL
|
||||
// ABI 6
|
||||
attr->scoped
|
||||
= LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET
|
||||
| LANDLOCK_SCOPE_SIGNAL;
|
||||
#endif
|
||||
|
||||
// Disable flags that require a new ABI version.
|
||||
switch (abi_version) {
|
||||
case 1:
|
||||
#ifdef LANDLOCK_ACCESS_FS_REFER
|
||||
attr->handled_access_fs &= ~LANDLOCK_ACCESS_FS_REFER;
|
||||
#endif
|
||||
FALLTHROUGH;
|
||||
|
||||
case 2:
|
||||
#ifdef LANDLOCK_ACCESS_FS_TRUNCATE
|
||||
attr->handled_access_fs &= ~LANDLOCK_ACCESS_FS_TRUNCATE;
|
||||
#endif
|
||||
FALLTHROUGH;
|
||||
|
||||
case 3:
|
||||
#ifdef LANDLOCK_ACCESS_NET_BIND_TCP
|
||||
attr->handled_access_net = 0;
|
||||
#endif
|
||||
FALLTHROUGH;
|
||||
|
||||
case 4:
|
||||
#ifdef LANDLOCK_ACCESS_FS_IOCTL_DEV
|
||||
attr->handled_access_fs &= ~LANDLOCK_ACCESS_FS_IOCTL_DEV;
|
||||
#endif
|
||||
FALLTHROUGH;
|
||||
|
||||
case 5:
|
||||
#ifdef LANDLOCK_SCOPE_SIGNAL
|
||||
attr->scoped = 0;
|
||||
#endif
|
||||
FALLTHROUGH;
|
||||
|
||||
default:
|
||||
// We only know about the features of the ABIs 1-6.
|
||||
break;
|
||||
}
|
||||
|
||||
return abi_version;
|
||||
}
|
||||
|
||||
|
||||
/// \brief Wrapper for the landlock_create_ruleset(2) syscall
|
||||
///
|
||||
/// Syscall wrappers provide argument type checking.
|
||||
///
|
||||
/// \note Remember to call `prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)` too!
|
||||
static inline int
|
||||
my_landlock_create_ruleset(const struct landlock_ruleset_attr *attr,
|
||||
size_t size, uint32_t flags)
|
||||
{
|
||||
return syscall(SYS_landlock_create_ruleset, attr, size, flags);
|
||||
}
|
||||
|
||||
|
||||
/// \brief Wrapper for the landlock_restrict_self(2) syscall
|
||||
static inline int
|
||||
my_landlock_restrict_self(int ruleset_fd, uint32_t flags)
|
||||
{
|
||||
return syscall(SYS_landlock_restrict_self, ruleset_fd, flags);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -23,17 +23,29 @@
|
|||
# include <config.h>
|
||||
#endif
|
||||
|
||||
// This #define ensures that C99 and POSIX compliant stdio functions are
|
||||
// available with MinGW-w64 (both 32-bit and 64-bit). Modern MinGW-w64 adds
|
||||
// this automatically, for example, when the compiler is in C99 (or later)
|
||||
// mode when building against msvcrt.dll. It still doesn't hurt to be explicit
|
||||
// that we always want this and #define this unconditionally.
|
||||
// Choose if MinGW-w64's stdio replacement functions should be used.
|
||||
// The default has varied slightly in the past so it's clearest to always
|
||||
// set it explicitly.
|
||||
//
|
||||
// With Universal CRT (UCRT) this is less important because UCRT contains
|
||||
// C99-compatible stdio functions. It's still nice to #define this as UCRT
|
||||
// doesn't support the POSIX thousand separator flag in printf (like "%'u").
|
||||
#ifdef __MINGW32__
|
||||
// Modern MinGW-w64 enables the replacement functions even with UCRT
|
||||
// when _GNU_SOURCE is defined. That's good because UCRT doesn't support
|
||||
// the POSIX thousand separator flag in printf (like "%'u"). Otherwise
|
||||
// XZ Utils works with the UCRT stdio functions.
|
||||
//
|
||||
// The replacement functions add over 20 KiB to each executable. For
|
||||
// size-optimized builds (HAVE_SMALL), disable the replacements.
|
||||
// Then thousand separators aren't shown in xz's messages but this is
|
||||
// a minor downside compare to the slower speed of the HAVE_SMALL builds.
|
||||
//
|
||||
// The legacy MSVCRT is pre-C99 and it's best to always use the stdio
|
||||
// replacements functions from MinGW-w64.
|
||||
#if defined(__MINGW32__) && !defined(__USE_MINGW_ANSI_STDIO)
|
||||
# define __USE_MINGW_ANSI_STDIO 1
|
||||
# include <_mingw.h>
|
||||
# if defined(_UCRT) && defined(HAVE_SMALL)
|
||||
# undef __USE_MINGW_ANSI_STDIO
|
||||
# define __USE_MINGW_ANSI_STDIO 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// size_t and NULL
|
||||
|
|
@ -156,17 +168,26 @@ typedef unsigned char _Bool;
|
|||
# define __bool_true_false_are_defined 1
|
||||
#endif
|
||||
|
||||
// We may need alignas from C11/C17/C23.
|
||||
#if __STDC_VERSION__ >= 202311
|
||||
// alignas is a keyword in C23. Do nothing.
|
||||
#elif __STDC_VERSION__ >= 201112
|
||||
// Oracle Developer Studio 12.6 lacks <stdalign.h>.
|
||||
// For simplicity, avoid the header with all C11/C17 compilers.
|
||||
# define alignas _Alignas
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
# define alignas(n) __attribute__((__aligned__(n)))
|
||||
#else
|
||||
# define alignas(n)
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
|
||||
// Visual Studio 2013 update 2 supports only __inline, not inline.
|
||||
// MSVC v19.0 / VS 2015 and newer support both.
|
||||
// MSVC v19.00 (VS 2015 version 14.0) and later should work.
|
||||
//
|
||||
// MSVC v19.27 (VS 2019 version 16.7) added support for restrict.
|
||||
// Older ones support only __restrict.
|
||||
#ifdef _MSC_VER
|
||||
# if _MSC_VER < 1900 && !defined(inline)
|
||||
# define inline __inline
|
||||
# endif
|
||||
# if _MSC_VER < 1927 && !defined(restrict)
|
||||
# define restrict __restrict
|
||||
# endif
|
||||
|
|
@ -196,4 +217,13 @@ typedef unsigned char _Bool;
|
|||
# define lzma_attr_alloc_size(x)
|
||||
#endif
|
||||
|
||||
#if __STDC_VERSION__ >= 202311
|
||||
# define FALLTHROUGH [[__fallthrough__]]
|
||||
#elif (defined(__GNUC__) && __GNUC__ >= 7) \
|
||||
|| (defined(__clang_major__) && __clang_major__ >= 10)
|
||||
# define FALLTHROUGH __attribute__((__fallthrough__))
|
||||
#else
|
||||
# define FALLTHROUGH ((void)0)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -56,6 +56,13 @@
|
|||
# define TUKLIB_GNUC_REQ(major, minor) 0
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
# define tuklib_attr_format_printf(fmt_index, args_index) \
|
||||
__attribute__((__format__(__printf__, fmt_index, args_index)))
|
||||
#else
|
||||
# define tuklib_attr_format_printf(fmt_index, args_index)
|
||||
#endif
|
||||
|
||||
// tuklib_attr_noreturn attribute is used to mark functions as non-returning.
|
||||
// We cannot use "noreturn" as the macro name because then C23 code that
|
||||
// uses [[noreturn]] would break as it would expand to [[ [[noreturn]] ]].
|
||||
|
|
@ -68,9 +75,7 @@
|
|||
// __attribute__((nonnull(1)))
|
||||
// extern void foo(const char *s);
|
||||
//
|
||||
// FIXME: Update __STDC_VERSION__ for the final C23 version. 202000 is used
|
||||
// by GCC 13 and Clang 15 with -std=c2x.
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311
|
||||
# define tuklib_attr_noreturn [[noreturn]]
|
||||
#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112
|
||||
# define tuklib_attr_noreturn _Noreturn
|
||||
|
|
|
|||
|
|
@ -40,4 +40,15 @@
|
|||
#endif
|
||||
#define N_(msgid) msgid
|
||||
|
||||
// Optional: Strings that are word wrapped using tuklib_mbstr_wrap may be
|
||||
// marked with W_("foo) in the source code. xgettext can then add a comment
|
||||
// to all such strings to inform translators. The following option needs to
|
||||
// be added to XGETTEXT_OPTIONS in po/Makevars or in an equivalent place:
|
||||
//
|
||||
// '--keyword=W_:1,"This is word wrapped at spaces. The Unicode character U+00A0 works as a non-breaking space. Tab (\t) is interpret as a zero-width space (the tab itself is not displayed); U+200B is NOT supported. Manual word wrapping with \n is supported but requires care."'
|
||||
//
|
||||
// NOTE: The double-quotes in the --keyword argument above must be passed to
|
||||
// xgettext as is, thus one needs the single-quotes in Makevars.
|
||||
#define W_(msgid) _(msgid)
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -27,10 +27,7 @@ extern size_t tuklib_mbstr_width(const char *str, size_t *bytes);
|
|||
///
|
||||
/// This is somewhat similar to wcswidth() but works on multibyte strings.
|
||||
///
|
||||
/// \param str String whose width is to be calculated. If the
|
||||
/// current locale uses a multibyte character set
|
||||
/// that has shift states, the string must begin
|
||||
/// and end in the initial shift state.
|
||||
/// \param str String whose width is to be calculated.
|
||||
/// \param bytes If this is not NULL, *bytes is set to the
|
||||
/// value returned by strlen(str) (even if an
|
||||
/// error occurs when calculating the width).
|
||||
|
|
@ -38,8 +35,24 @@ extern size_t tuklib_mbstr_width(const char *str, size_t *bytes);
|
|||
/// \return On success, the number of columns needed to display the
|
||||
/// string e.g. in a terminal emulator is returned. On error,
|
||||
/// (size_t)-1 is returned. Possible errors include invalid,
|
||||
/// partial, or non-printable multibyte character in str, or
|
||||
/// that str doesn't end in the initial shift state.
|
||||
/// partial, or non-printable multibyte character in str.
|
||||
|
||||
#define tuklib_mbstr_width_mem TUKLIB_SYMBOL(tuklib_mbstr_width_mem)
|
||||
extern size_t tuklib_mbstr_width_mem(const char *str, size_t len);
|
||||
///<
|
||||
/// \brief Get the number of columns needed for the multibyte buffer
|
||||
///
|
||||
/// This is like tuklib_mbstr_width() except that this takes the buffer
|
||||
/// length in bytes as the second argument. This allows using the function
|
||||
/// for buffers that aren't terminated with '\0'.
|
||||
///
|
||||
/// \param str String whose width is to be calculated.
|
||||
/// \param len Number of bytes to read from str.
|
||||
///
|
||||
/// \return On success, the number of columns needed to display the
|
||||
/// string e.g. in a terminal emulator is returned. On error,
|
||||
/// (size_t)-1 is returned. Possible errors include invalid,
|
||||
/// partial, or non-printable multibyte character in str.
|
||||
|
||||
#define tuklib_mbstr_fw TUKLIB_SYMBOL(tuklib_mbstr_fw)
|
||||
extern int tuklib_mbstr_fw(const char *str, int columns_min);
|
||||
|
|
|
|||
162
src/common/tuklib_mbstr_nonprint.c
Normal file
162
src/common/tuklib_mbstr_nonprint.c
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
// SPDX-License-Identifier: 0BSD
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file tuklib_mbstr_nonprint.c
|
||||
/// \brief Find and replace non-printable characters with question marks
|
||||
//
|
||||
// Author: Lasse Collin
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "tuklib_mbstr_nonprint.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
|
||||
#ifdef HAVE_MBRTOWC
|
||||
# include <wchar.h>
|
||||
# include <wctype.h>
|
||||
#else
|
||||
# include <ctype.h>
|
||||
#endif
|
||||
|
||||
|
||||
static bool
|
||||
is_next_printable(const char *str, size_t len, size_t *next_len)
|
||||
{
|
||||
#ifdef HAVE_MBRTOWC
|
||||
// This assumes that character sets with locking shift states aren't
|
||||
// used, and thus mbsinit() is never needed.
|
||||
mbstate_t ps;
|
||||
memset(&ps, 0, sizeof(ps));
|
||||
|
||||
wchar_t wc;
|
||||
*next_len = mbrtowc(&wc, str, len, &ps);
|
||||
|
||||
if (*next_len == (size_t)-2) {
|
||||
// Incomplete multibyte sequence: Treat the whole sequence
|
||||
// as a single non-printable multibyte character that ends
|
||||
// the string.
|
||||
*next_len = len;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check more broadly than just ret == (size_t)-1 to be safe
|
||||
// in case mbrtowc() returns something weird. This check
|
||||
// covers (size_t)-1 (that is, SIZE_MAX) too because len is from
|
||||
// strlen() and the terminating '\0' isn't part of the length.
|
||||
if (*next_len < 1 || *next_len > len) {
|
||||
// Invalid multibyte sequence: Treat the first byte as
|
||||
// a non-printable single-byte character. Decoding will
|
||||
// be restarted from the next byte on the next call to
|
||||
// this function.
|
||||
*next_len = 1;
|
||||
return false;
|
||||
}
|
||||
|
||||
# if defined(_WIN32) && !defined(__CYGWIN__)
|
||||
// On Windows, wchar_t stores UTF-16 code units, thus characters
|
||||
// outside the Basic Multilingual Plane (BMP) don't fit into
|
||||
// a single wchar_t. In an UTF-8 locale, UCRT's mbrtowc() returns
|
||||
// successfully when the input is a non-BMP character but the
|
||||
// output is the replacement character U+FFFD.
|
||||
//
|
||||
// iswprint() returns 0 for U+FFFD on Windows for some reason. Treat
|
||||
// U+FFFD as printable and thus also all non-BMP chars as printable.
|
||||
if (wc == 0xFFFD)
|
||||
return true;
|
||||
# endif
|
||||
|
||||
return iswprint((wint_t)wc) != 0;
|
||||
#else
|
||||
(void)len;
|
||||
*next_len = 1;
|
||||
return isprint((unsigned char)str[0]) != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
has_nonprint(const char *str, size_t len)
|
||||
{
|
||||
for (size_t i = 0; i < len; ) {
|
||||
size_t next_len;
|
||||
if (!is_next_printable(str + i, len - i, &next_len))
|
||||
return true;
|
||||
|
||||
i += next_len;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
extern bool
|
||||
tuklib_has_nonprint(const char *str)
|
||||
{
|
||||
const int saved_errno = errno;
|
||||
const bool ret = has_nonprint(str, strlen(str));
|
||||
errno = saved_errno;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
extern const char *
|
||||
tuklib_mask_nonprint_r(const char *str, char **mem)
|
||||
{
|
||||
const int saved_errno = errno;
|
||||
|
||||
// Free the old string, if any.
|
||||
free(*mem);
|
||||
*mem = NULL;
|
||||
|
||||
// If the whole input string contains only printable characters,
|
||||
// return the input string.
|
||||
const size_t len = strlen(str);
|
||||
if (!has_nonprint(str, len)) {
|
||||
errno = saved_errno;
|
||||
return str;
|
||||
}
|
||||
|
||||
// Allocate memory for the masked string. Since we use the single-byte
|
||||
// character '?' to mask non-printable characters, it's possible that
|
||||
// a few bytes less memory would be needed in reality if multibyte
|
||||
// characters are masked.
|
||||
//
|
||||
// If allocation fails, return "???" because it should be safer than
|
||||
// returning the unmasked string.
|
||||
*mem = malloc(len + 1);
|
||||
if (*mem == NULL) {
|
||||
errno = saved_errno;
|
||||
return "???";
|
||||
}
|
||||
|
||||
// Replace all non-printable characters with '?'.
|
||||
char *dest = *mem;
|
||||
|
||||
for (size_t i = 0; i < len; ) {
|
||||
size_t next_len;
|
||||
if (is_next_printable(str + i, len - i, &next_len)) {
|
||||
memcpy(dest, str + i, next_len);
|
||||
dest += next_len;
|
||||
} else {
|
||||
*dest++ = '?';
|
||||
}
|
||||
|
||||
i += next_len;
|
||||
}
|
||||
|
||||
*dest = '\0';
|
||||
|
||||
errno = saved_errno;
|
||||
return *mem;
|
||||
}
|
||||
|
||||
|
||||
extern const char *
|
||||
tuklib_mask_nonprint(const char *str)
|
||||
{
|
||||
static char *mem = NULL;
|
||||
return tuklib_mask_nonprint_r(str, &mem);
|
||||
}
|
||||
71
src/common/tuklib_mbstr_nonprint.h
Normal file
71
src/common/tuklib_mbstr_nonprint.h
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
// SPDX-License-Identifier: 0BSD
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file tuklib_mbstr_nonprint.h
|
||||
/// \brief Find and replace non-printable characters with question marks
|
||||
///
|
||||
/// If mbrtowc(3) is available, it and iswprint(3) is used to check if all
|
||||
/// characters are printable. Otherwise single-byte character set is assumed
|
||||
/// and isprint(3) is used.
|
||||
//
|
||||
// Author: Lasse Collin
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TUKLIB_MBSTR_NONPRINT_H
|
||||
#define TUKLIB_MBSTR_NONPRINT_H
|
||||
|
||||
#include "tuklib_common.h"
|
||||
TUKLIB_DECLS_BEGIN
|
||||
|
||||
#define tuklib_has_nonprint TUKLIB_SYMBOL(tuklib_has_nonprint)
|
||||
extern bool tuklib_has_nonprint(const char *str);
|
||||
///<
|
||||
/// \brief Check if a string contains any non-printable characters
|
||||
///
|
||||
/// \return false if str contains only valid multibyte characters and
|
||||
/// iswprint(3) returns non-zero for all of them; true otherwise.
|
||||
/// The value of errno is preserved.
|
||||
///
|
||||
/// \note In case mbrtowc(3) isn't available, single-byte character set
|
||||
/// is assumed and isprint(3) is used instead of iswprint(3).
|
||||
|
||||
#define tuklib_mask_nonprint_r TUKLIB_SYMBOL(tuklib_mask_nonprint_r)
|
||||
extern const char *tuklib_mask_nonprint_r(const char *str, char **mem);
|
||||
///<
|
||||
/// \brief Replace non-printable characters with question marks
|
||||
///
|
||||
/// \param str Untrusted string, for example, a filename
|
||||
/// \param mem This function always calls free(*mem) to free the old
|
||||
/// allocation and then sets *mem = NULL. Before the first
|
||||
/// call, *mem should be initialized to NULL. If this
|
||||
/// function needs to allocate memory for a modified
|
||||
/// string, a pointer to the allocated memory will be
|
||||
/// stored to *mem. Otherwise *mem will remain NULL.
|
||||
///
|
||||
/// \return If tuklib_has_nonprint(str) returns false, this function
|
||||
/// returns str. Otherwise memory is allocated to hold a modified
|
||||
/// string and a pointer to that is returned. The pointer to the
|
||||
/// allocated memory is also stored to *mem. A modified string
|
||||
/// has the problematic characters replaced by '?'. If memory
|
||||
/// allocation fails, "???" is returned and *mem is NULL.
|
||||
/// The value of errno is preserved.
|
||||
|
||||
#define tuklib_mask_nonprint TUKLIB_SYMBOL(tuklib_mask_nonprint)
|
||||
extern const char *tuklib_mask_nonprint(const char *str);
|
||||
///<
|
||||
/// \brief Replace non-printable characters with question marks
|
||||
///
|
||||
/// This is a convenience function for single-threaded use. This calls
|
||||
/// tuklib_mask_nonprint_r() using an internal static variable to hold
|
||||
/// the possible allocation.
|
||||
///
|
||||
/// \param str Untrusted string, for example, a filename
|
||||
///
|
||||
/// \return See tuklib_mask_nonprint_r().
|
||||
///
|
||||
/// \note This function is not thread safe!
|
||||
|
||||
TUKLIB_DECLS_END
|
||||
#endif
|
||||
|
|
@ -12,7 +12,7 @@
|
|||
#include "tuklib_mbstr.h"
|
||||
#include <string.h>
|
||||
|
||||
#if defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
|
||||
#ifdef HAVE_MBRTOWC
|
||||
# include <wchar.h>
|
||||
#endif
|
||||
|
||||
|
|
@ -24,9 +24,17 @@ tuklib_mbstr_width(const char *str, size_t *bytes)
|
|||
if (bytes != NULL)
|
||||
*bytes = len;
|
||||
|
||||
#if !(defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH))
|
||||
return tuklib_mbstr_width_mem(str, len);
|
||||
}
|
||||
|
||||
|
||||
extern size_t
|
||||
tuklib_mbstr_width_mem(const char *str, size_t len)
|
||||
{
|
||||
#ifndef HAVE_MBRTOWC
|
||||
// In single-byte mode, the width of the string is the same
|
||||
// as its length.
|
||||
(void)str;
|
||||
return len;
|
||||
|
||||
#else
|
||||
|
|
@ -41,21 +49,35 @@ tuklib_mbstr_width(const char *str, size_t *bytes)
|
|||
while (i < len) {
|
||||
wchar_t wc;
|
||||
const size_t ret = mbrtowc(&wc, str + i, len - i, &state);
|
||||
if (ret < 1 || ret > len)
|
||||
if (ret < 1 || ret > len - i)
|
||||
return (size_t)-1;
|
||||
|
||||
i += ret;
|
||||
|
||||
#ifdef HAVE_WCWIDTH
|
||||
const int wc_width = wcwidth(wc);
|
||||
if (wc_width < 0)
|
||||
return (size_t)-1;
|
||||
|
||||
width += (size_t)wc_width;
|
||||
#else
|
||||
// Without wcwidth() (like in a native Windows build),
|
||||
// assume that one multibyte char == one column. With
|
||||
// UTF-8, this is less bad than one byte == one column.
|
||||
// This way quite a few languages will be handled correctly
|
||||
// in practice; CJK chars will be very wrong though.
|
||||
++width;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Require that the string ends in the initial shift state.
|
||||
// This way the caller can be combine the string with other
|
||||
// strings without needing to worry about the shift states.
|
||||
// It's good to check that the string ended in the initial state.
|
||||
// However, in practice this is redundant:
|
||||
//
|
||||
// - No one will use this code with character sets that have
|
||||
// locking shift states.
|
||||
//
|
||||
// - We already checked that mbrtowc() didn't return (size_t)-2
|
||||
// which would indicate a partial multibyte character.
|
||||
if (!mbsinit(&state))
|
||||
return (size_t)-1;
|
||||
|
||||
|
|
|
|||
294
src/common/tuklib_mbstr_wrap.c
Normal file
294
src/common/tuklib_mbstr_wrap.c
Normal file
|
|
@ -0,0 +1,294 @@
|
|||
// SPDX-License-Identifier: 0BSD
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file tuklib_mbstr_wrap.c
|
||||
/// \brief Word wraps a string and prints it to a FILE stream
|
||||
///
|
||||
/// This depends on tuklib_mbstr_width.c.
|
||||
//
|
||||
// Author: Lasse Collin
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "tuklib_mbstr.h"
|
||||
#include "tuklib_mbstr_wrap.h"
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
extern int
|
||||
tuklib_wraps(FILE *outfile, const struct tuklib_wrap_opt *opt, const char *str)
|
||||
{
|
||||
// left_cont may be less than left_margin. In that case, if the first
|
||||
// word is extremely long, it will stay on the first line even if
|
||||
// the line then gets overlong.
|
||||
//
|
||||
// On the other hand, left2_cont < left2_margin isn't allowed because
|
||||
// it could result in inconsistent behavior when a very long word
|
||||
// comes right after a \v.
|
||||
//
|
||||
// It is fine to have left2_margin < left_margin although it would be
|
||||
// an odd use case.
|
||||
if (!(opt->left_margin < opt->right_margin
|
||||
&& opt->left_cont < opt->right_margin
|
||||
&& opt->left2_margin <= opt->left2_cont
|
||||
&& opt->left2_cont < opt->right_margin))
|
||||
return TUKLIB_WRAP_ERR_OPT;
|
||||
|
||||
// This is set to TUKLIB_WRAP_WARN_OVERLONG if one or more
|
||||
// output lines extend past opt->right_margin columns.
|
||||
int warn_overlong = 0;
|
||||
|
||||
// Indentation of the first output line after \n or \r.
|
||||
// \v sets this to opt->left2_margin.
|
||||
// \r resets this back to the original value.
|
||||
size_t first_indent = opt->left_margin;
|
||||
|
||||
// Indentation of the output lines that occur due to word wrapping.
|
||||
// \v sets this to opt->left2_cont and \r back to the original value.
|
||||
size_t cont_indent = opt->left_cont;
|
||||
|
||||
// If word wrapping occurs, the newline isn't printed unless more
|
||||
// text would be put on the continuation line. This is also used
|
||||
// when \v needs to start on a new line.
|
||||
bool pending_newline = false;
|
||||
|
||||
// Spaces are printed only when there is something else to put
|
||||
// after the spaces on the line. This avoids unwanted empty lines
|
||||
// in the output and makes it possible to ignore possible spaces
|
||||
// before a \v character.
|
||||
size_t pending_spaces = first_indent;
|
||||
|
||||
// Current output column. When cur_col == pending_spaces, nothing
|
||||
// has been actually printed to the current output line.
|
||||
size_t cur_col = pending_spaces;
|
||||
|
||||
while (true) {
|
||||
// Number of bytes until the *next* line-break opportunity.
|
||||
size_t len = 0;
|
||||
|
||||
// Number of columns until the *next* line-break opportunity.
|
||||
size_t width = 0;
|
||||
|
||||
// Text between a pair of \b characters is treated as
|
||||
// an unbreakable block even if it contains spaces.
|
||||
// It must not contain any control characters before
|
||||
// the closing \b.
|
||||
bool unbreakable = false;
|
||||
|
||||
while (true) {
|
||||
// Find the next character that we handle specially.
|
||||
// In an unbreakable block, search only for the
|
||||
// closing \b; if missing, the unbreakable block
|
||||
// extends to the end of the string.
|
||||
const size_t n = strcspn(str + len,
|
||||
unbreakable ? "\b" : " \t\n\r\v\b");
|
||||
|
||||
// Calculate how many columns the characters need.
|
||||
const size_t w = tuklib_mbstr_width_mem(str + len, n);
|
||||
if (w == (size_t)-1)
|
||||
return TUKLIB_WRAP_ERR_STR;
|
||||
|
||||
width += w;
|
||||
len += n;
|
||||
|
||||
// \b isn't a line-break opportunity so it has to
|
||||
// be handled here. For simplicity, empty blocks
|
||||
// are treated as zero-width characters.
|
||||
if (str[len] == '\b') {
|
||||
++len;
|
||||
unbreakable = !unbreakable;
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// Determine if adding this chunk of text would make the
|
||||
// current output line exceed opt->right_margin columns.
|
||||
const bool too_long = cur_col + width > opt->right_margin;
|
||||
|
||||
// Wrap the line if needed. However:
|
||||
//
|
||||
// - Don't wrap if the current column is less than where
|
||||
// the continuation line would begin. In that case
|
||||
// the chunk wouldn't fit on the next line either so
|
||||
// we just have to produce an overlong line.
|
||||
//
|
||||
// - Don't wrap if so far the line only contains spaces.
|
||||
// Wrapping in that case would leave a weird empty line.
|
||||
// NOTE: This "only contains spaces" condition is the
|
||||
// reason why left2_margin > left2_cont isn't allowed.
|
||||
if (too_long && cur_col > cont_indent
|
||||
&& cur_col > pending_spaces) {
|
||||
// There might be trailing spaces or zero-width spaces
|
||||
// which need to be ignored to keep the output pretty.
|
||||
//
|
||||
// Spaces need to be ignored because in some
|
||||
// writing styles there are two spaces after
|
||||
// a full stop. Example string:
|
||||
//
|
||||
// "Foo bar. Abc def."
|
||||
// ^
|
||||
// If the first space after the first full stop
|
||||
// triggers word wrapping, both spaces must be
|
||||
// ignored. Otherwise the next line would be
|
||||
// indented too much.
|
||||
//
|
||||
// Zero-width spaces are ignored the same way
|
||||
// because they are meaningless if an adjacent
|
||||
// character is a space.
|
||||
while (*str == ' ' || *str == '\t')
|
||||
++str;
|
||||
|
||||
// Don't print the newline here; only mark it as
|
||||
// pending. This avoids an unwanted empty line if
|
||||
// there is a \n or \r or \0 after the spaces have
|
||||
// been ignored.
|
||||
pending_newline = true;
|
||||
pending_spaces = cont_indent;
|
||||
cur_col = pending_spaces;
|
||||
|
||||
// Since str may have been incremented due to the
|
||||
// ignored spaces, the loop needs to be restarted.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Print the current chunk of text before the next
|
||||
// line-break opportunity. If the chunk was empty,
|
||||
// don't print anything so that the pending newline
|
||||
// and pending spaces aren't printed on their own.
|
||||
if (len > 0) {
|
||||
if (pending_newline) {
|
||||
pending_newline = false;
|
||||
if (putc('\n', outfile) == EOF)
|
||||
return TUKLIB_WRAP_ERR_IO;
|
||||
}
|
||||
|
||||
while (pending_spaces > 0) {
|
||||
if (putc(' ', outfile) == EOF)
|
||||
return TUKLIB_WRAP_ERR_IO;
|
||||
|
||||
--pending_spaces;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
// Ignore unbreakable block characters (\b).
|
||||
const int c = (unsigned char)str[i];
|
||||
if (c != '\b' && putc(c, outfile) == EOF)
|
||||
return TUKLIB_WRAP_ERR_IO;
|
||||
}
|
||||
|
||||
str += len;
|
||||
cur_col += width;
|
||||
|
||||
// Remember if the line got overlong. If no other
|
||||
// errors occur, we return warn_overlong. It might
|
||||
// help in catching problematic strings.
|
||||
if (too_long)
|
||||
warn_overlong = TUKLIB_WRAP_WARN_OVERLONG;
|
||||
}
|
||||
|
||||
// Handle the special character after the chunk of text.
|
||||
switch (*str) {
|
||||
case ' ':
|
||||
// Regular space.
|
||||
++cur_col;
|
||||
++pending_spaces;
|
||||
break;
|
||||
|
||||
case '\v':
|
||||
// Set the alternative indentation settings.
|
||||
first_indent = opt->left2_margin;
|
||||
cont_indent = opt->left2_cont;
|
||||
|
||||
if (first_indent > cur_col) {
|
||||
// Add one or more spaces to reach
|
||||
// the column specified in first_indent.
|
||||
pending_spaces += first_indent - cur_col;
|
||||
} else {
|
||||
// There is no room to add even one space
|
||||
// before reaching the column first_indent.
|
||||
pending_newline = true;
|
||||
pending_spaces = first_indent;
|
||||
}
|
||||
|
||||
cur_col = first_indent;
|
||||
break;
|
||||
|
||||
case '\0': // Implicit newline at the end of the string.
|
||||
case '\r': // Newline that also resets the effect of \v.
|
||||
case '\n': // Newline without resetting the indentation mode.
|
||||
if (putc('\n', outfile) == EOF)
|
||||
return TUKLIB_WRAP_ERR_IO;
|
||||
|
||||
if (*str == '\0')
|
||||
return warn_overlong;
|
||||
|
||||
if (*str == '\r') {
|
||||
first_indent = opt->left_margin;
|
||||
cont_indent = opt->left_cont;
|
||||
}
|
||||
|
||||
pending_newline = false;
|
||||
pending_spaces = first_indent;
|
||||
cur_col = first_indent;
|
||||
break;
|
||||
}
|
||||
|
||||
// Skip the specially-handled character.
|
||||
++str;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
tuklib_wrapf(FILE *stream, const struct tuklib_wrap_opt *opt,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
char *buf;
|
||||
|
||||
#ifdef HAVE_VASPRINTF
|
||||
va_start(ap, fmt);
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Wformat-nonliteral"
|
||||
#endif
|
||||
const int n = vasprintf(&buf, fmt, ap);
|
||||
#ifdef __clang__
|
||||
# pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
va_end(ap);
|
||||
if (n == -1)
|
||||
return TUKLIB_WRAP_ERR_FORMAT;
|
||||
#else
|
||||
// Fixed buffer size is dumb but in practice one shouldn't need
|
||||
// huge strings for *formatted* output. This simple method is safe
|
||||
// with pre-C99 vsnprintf() implementations too which don't return
|
||||
// the required buffer size (they return -1 or buf_size - 1) or
|
||||
// which might not null-terminate the buffer in case it's too small.
|
||||
const size_t buf_size = 128 * 1024;
|
||||
buf = malloc(buf_size);
|
||||
if (buf == NULL)
|
||||
return TUKLIB_WRAP_ERR_FORMAT;
|
||||
|
||||
va_start(ap, fmt);
|
||||
const int n = vsnprintf(buf, buf_size, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (n <= 0 || n >= (int)(buf_size - 1)) {
|
||||
free(buf);
|
||||
return TUKLIB_WRAP_ERR_FORMAT;
|
||||
}
|
||||
#endif
|
||||
|
||||
const int ret = tuklib_wraps(stream, opt, buf);
|
||||
free(buf);
|
||||
return ret;
|
||||
}
|
||||
204
src/common/tuklib_mbstr_wrap.h
Normal file
204
src/common/tuklib_mbstr_wrap.h
Normal file
|
|
@ -0,0 +1,204 @@
|
|||
// SPDX-License-Identifier: 0BSD
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file tuklib_mbstr_wrap.h
|
||||
/// \brief Word wrapping for multibyte strings
|
||||
///
|
||||
/// The word wrapping functions are intended to be usable, for example,
|
||||
/// for printing --help text in command line tools. While manually-wrapped
|
||||
/// --help text allows precise formatting, such freedom requires translators
|
||||
/// to count spaces and determine where line breaks should occur. It's
|
||||
/// tedious and error prone, and experience has shown that only some
|
||||
/// translators do it well. Automatic word wrapping is less flexible but
|
||||
/// results in polished-enough look with less effort from everyone.
|
||||
/// Right-to-left languages and languages that don't use spaces between
|
||||
/// words will still need extra effort though.
|
||||
//
|
||||
// Author: Lasse Collin
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TUKLIB_MBSTR_WRAP_H
|
||||
#define TUKLIB_MBSTR_WRAP_H
|
||||
|
||||
#include "tuklib_common.h"
|
||||
#include <stdio.h>
|
||||
|
||||
TUKLIB_DECLS_BEGIN
|
||||
|
||||
/// One or more output lines exceeded right_margin.
|
||||
/// This only a warning; everything was still printed successfully.
|
||||
#define TUKLIB_WRAP_WARN_OVERLONG 0x01
|
||||
|
||||
/// Error writing to to the output FILE. The error flag in the FILE
|
||||
/// should have been set as well.
|
||||
#define TUKLIB_WRAP_ERR_IO 0x02
|
||||
|
||||
/// Invalid options in struct tuklib_wrap_opt.
|
||||
/// Nothing was printed.
|
||||
#define TUKLIB_WRAP_ERR_OPT 0x04
|
||||
|
||||
/// Invalid or unsupported multibyte character in the input string:
|
||||
/// either mbrtowc() failed or wcwidth() returned a negative value.
|
||||
#define TUKLIB_WRAP_ERR_STR 0x08
|
||||
|
||||
/// Only tuklib_wrapf(): Error in converting the format string.
|
||||
/// It's either a memory allocation failure or something bad with the
|
||||
/// format string or arguments.
|
||||
#define TUKLIB_WRAP_ERR_FORMAT 0x10
|
||||
|
||||
/// Options for tuklib_wraps() and tuklib_wrapf()
|
||||
struct tuklib_wrap_opt {
|
||||
/// Indentation of the first output line after `\n` or `\r`.
|
||||
/// This can be anything less than right_margin.
|
||||
unsigned short left_margin;
|
||||
|
||||
/// Column where word-wrapped continuation lines start.
|
||||
/// This can be anything less than right_margin.
|
||||
unsigned short left_cont;
|
||||
|
||||
/// Column where the text after `\v` will start, either on the current
|
||||
/// line (when there is room to add at least one space) or on a new
|
||||
/// empty line.
|
||||
unsigned short left2_margin;
|
||||
|
||||
/// Like left_cont but for text after a `\v`. However, this must
|
||||
/// be greater than or equal to left2_margin in addition to being
|
||||
/// less than right_margin.
|
||||
unsigned short left2_cont;
|
||||
|
||||
/// For 80-column terminals, it is recommended to use 79 here for
|
||||
/// maximum portability. 80 will work most of the time but it will
|
||||
/// result in unwanted empty lines in the rare case where a terminal
|
||||
/// moves the cursor to the beginning of the next line immediately
|
||||
/// when the last column has been used.
|
||||
unsigned short right_margin;
|
||||
};
|
||||
|
||||
#define tuklib_wraps TUKLIB_SYMBOL(tuklib_wraps)
|
||||
extern int tuklib_wraps(FILE *stream, const struct tuklib_wrap_opt *opt,
|
||||
const char *str);
|
||||
///<
|
||||
/// \brief Word wrap a multibyte string and write it to a FILE
|
||||
///
|
||||
/// Word wrapping is done only at spaces and at the special control characters
|
||||
/// described below. Multiple consecutive spaces are handled properly: strings
|
||||
/// that have two (or more) spaces after a full sentence will look good even
|
||||
/// when the spaces occur at a word wrapping boundary. Trailing spaces are
|
||||
/// ignored at the end of a line or at the end of a string.
|
||||
///
|
||||
/// The following control characters have been repurposed:
|
||||
///
|
||||
/// - `\t` = Zero-width space allows a line break without producing any
|
||||
/// output by itself. This can be useful after hard hyphens as
|
||||
/// hyphens aren't otherwise used for line breaking. This can also
|
||||
/// be useful in languages that don't use spaces between words.
|
||||
/// (The Unicode character U+200B isn't supported.)
|
||||
/// - `\b` = Text between a pair of `\b` characters is treated as an
|
||||
/// unbreakable block (not wrapped even if there are spaces).
|
||||
/// For example, a non-breaking space can be done like
|
||||
/// in `"123\b \bMiB"`. Control characters (like `\n` or `\t`)
|
||||
/// aren't allowed before the closing `\b`. If closing `\b` is
|
||||
/// missing, the block extends to the end of the string. Empty
|
||||
/// blocks are treated as zero-width characters. If line breaks
|
||||
/// are possible around an empty block (like in `"foo \b\b bar"`
|
||||
/// or `"foo \b"`), it can result in weird output.
|
||||
/// - `\v` = Change to alternative indentation (left2_margin).
|
||||
/// - `\r` = Reset back to the initial indentation and add a newline.
|
||||
/// The next line will be indented by left_margin.
|
||||
/// - `\n` = Add a newline without resetting the effect of `\v`. The
|
||||
/// next line will be indented by left_margin or left2_margin
|
||||
/// (not left_cont or left2_cont).
|
||||
///
|
||||
/// Only `\n` should appear in translatable strings. `\t` works too but
|
||||
/// even that might confuse some translators even if there is a TRANSLATORS
|
||||
/// comment explaining its meaning.
|
||||
///
|
||||
/// To use the other control characters in messages, one should use
|
||||
/// tuklib_wrapf() with appropriate printf format string to combine
|
||||
/// translatable strings with non-translatable portions. For example:
|
||||
///
|
||||
/// \code{.c}
|
||||
/// static const struct tuklib_wrap_opt wrap2 = { 2, 2, 22, 22, 79 };
|
||||
/// int e = 0;
|
||||
/// ...
|
||||
/// e |= tuklib_wrapf(stdout, &wrap2,
|
||||
/// "-h, --help\v%s\r"
|
||||
/// " --version\v%s",
|
||||
/// W_("display this help and exit"),
|
||||
/// W_("display version information and exit"));
|
||||
/// ...
|
||||
/// if (e != 0) {
|
||||
/// // Handle warning or error.
|
||||
/// ...
|
||||
/// }
|
||||
/// \endcode
|
||||
///
|
||||
/// Control characters other than `\n` and `\t` are unusable in
|
||||
/// translatable strings:
|
||||
///
|
||||
/// - Gettext tools show annoying warnings if C escape sequences other
|
||||
/// than `\n` or `\t` are seen. (Otherwise they still work perfectly
|
||||
/// fine though.)
|
||||
///
|
||||
/// - While at least Poedit and Lokalize support all escapes, some
|
||||
/// editors only support `\n` and `\t`.
|
||||
///
|
||||
/// - They could confuse some translators, resulting in broken
|
||||
/// translations.
|
||||
///
|
||||
/// Using non-control characters would solve some issues but it wouldn't
|
||||
/// help with the unfortunate real-world issue that some translators would
|
||||
/// likely have trouble understanding a new syntax. The Gettext manual
|
||||
/// specifically warns about this, see the subheading "No unusual markup"
|
||||
/// in `info (gettext)Preparing Strings`. (While using `\t` for zero-width
|
||||
/// space is such custom markup, most translators will never need it.)
|
||||
///
|
||||
/// Translators can use the Unicode character U+00A0 (or U+202F) if they
|
||||
/// need a non-breaking space. For example, in French a non-breaking space
|
||||
/// may be needed before colons and question marks (U+00A0 is common in
|
||||
/// real-world French PO files).
|
||||
///
|
||||
/// Using a non-ASCII char in a string in the C code (like `"123\u00A0MiB"`)
|
||||
/// can work if one tells xgettext that input encoding is UTF-8, one
|
||||
/// ensures that the C compiler uses UTF-8 as the input charset, and one
|
||||
/// is certain that the program is *always* run under an UTF-8 locale.
|
||||
/// Unfortunately a portable program cannot make this kind of assumptions,
|
||||
/// which means that there is no pretty way to have a non-breaking space in
|
||||
/// a translatable string.
|
||||
///
|
||||
/// Optional: To tell translators which strings are automatically word
|
||||
/// wrapped, see the macro `W_` in tuklib_gettext.h.
|
||||
///
|
||||
/// \param stream Output FILE stream. For decent performance, it
|
||||
/// should be in buffered mode because this function
|
||||
/// writes the output one byte at a time with fputc().
|
||||
/// \param opt Word wrapping options.
|
||||
/// \param str Null-terminated multibyte string that is in
|
||||
/// the encoding used by the current locale.
|
||||
///
|
||||
/// \return Returns 0 on success. If an error or warning occurs, one of
|
||||
/// TUKLIB_WRAP_* codes is returned. Those codes are powers
|
||||
/// of two. When warning/error detection can be delayed, the
|
||||
/// return values can be accumulated from multiple calls using
|
||||
/// bitwise-or into a single variable which can be checked after
|
||||
/// all strings have (hopefully) been printed.
|
||||
|
||||
#define tuklib_wrapf TUKLIB_SYMBOL(tuklib_wrapf)
|
||||
tuklib_attr_format_printf(3, 4)
|
||||
extern int tuklib_wrapf(FILE *stream, const struct tuklib_wrap_opt *opt,
|
||||
const char *fmt, ...);
|
||||
///<
|
||||
/// \brief Format and word-wrap a multibyte string and write it to a FILE
|
||||
///
|
||||
/// This is like tuklib_wraps() except that this takes a printf
|
||||
/// format string.
|
||||
///
|
||||
/// \note On platforms that lack vasprintf(), the intermediate
|
||||
/// result from vsnprintf() must fit into a 128 KiB buffer.
|
||||
/// TUKLIB_WRAP_ERR_FORMAT is returned if it doesn't but
|
||||
/// only on platforms that lack vasprintf().
|
||||
|
||||
TUKLIB_DECLS_END
|
||||
#endif
|
||||
|
|
@ -91,18 +91,11 @@ tuklib_physmem(void)
|
|||
// supports reporting values greater than 4 GiB. To keep the
|
||||
// code working also on older Windows versions, use
|
||||
// GlobalMemoryStatusEx() conditionally.
|
||||
HMODULE kernel32 = GetModuleHandle(TEXT("kernel32.dll"));
|
||||
HMODULE kernel32 = GetModuleHandleA("kernel32.dll");
|
||||
if (kernel32 != NULL) {
|
||||
typedef BOOL (WINAPI *gmse_type)(LPMEMORYSTATUSEX);
|
||||
#ifdef CAN_DISABLE_WCAST_FUNCTION_TYPE
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Wcast-function-type"
|
||||
#endif
|
||||
gmse_type gmse = (gmse_type)GetProcAddress(
|
||||
kernel32, "GlobalMemoryStatusEx");
|
||||
#ifdef CAN_DISABLE_WCAST_FUNCTION_TYPE
|
||||
# pragma GCC diagnostic pop
|
||||
#endif
|
||||
if (gmse != NULL) {
|
||||
MEMORYSTATUSEX meminfo;
|
||||
meminfo.dwLength = sizeof(meminfo);
|
||||
|
|
@ -155,7 +148,7 @@ tuklib_physmem(void)
|
|||
ret += entries[i].end - entries[i].start + 1;
|
||||
|
||||
#elif defined(TUKLIB_PHYSMEM_AIX)
|
||||
ret = _system_configuration.physmem;
|
||||
ret = (uint64_t)_system_configuration.physmem;
|
||||
|
||||
#elif defined(TUKLIB_PHYSMEM_SYSCONF)
|
||||
const long pagesize = sysconf(_SC_PAGESIZE);
|
||||
|
|
|
|||
|
|
@ -67,6 +67,13 @@ This is useful for programs that use main():
|
|||
to the UTF-8 code page and aren't distinguishable from
|
||||
filenames that contain the actual replacement character U+FFFD.
|
||||
|
||||
FindFirstFileA() and FindFirstFileExA() also suffer from the above
|
||||
issue where unpaired surrogates become U+FFFD. Another issue is
|
||||
that filenames may require more bytes in UTF-8 than in a legacy
|
||||
code page. In UTF-8, a very long filename may exceed MAX_PATH bytes
|
||||
and thus these APIs cannot list such filenames anymore because
|
||||
WIN32_FIND_DATAA has a member "CHAR cFileName[MAX_PATH]".
|
||||
|
||||
If different programs use different code pages, compatibility issues
|
||||
are possible. For example, if one program produces a list of
|
||||
filenames and another program reads it, both programs should use
|
||||
|
|
@ -82,11 +89,18 @@ when writing to console with printf(). With UCRT it works.
|
|||
Long path names
|
||||
---------------
|
||||
|
||||
The manifest enables support for path names longer than 259
|
||||
characters if the feature has been enabled in the Windows registry.
|
||||
Omit the longPathAware element from the manifest if the application
|
||||
isn't compatible with it. For example, uses of MAX_PATH might be
|
||||
a sign of incompatibility.
|
||||
The manifest enables support for path names longer than 260 wide
|
||||
characters (UTF-16 code units) if the feature has been enabled in
|
||||
the Windows registry. Omit the longPathAware element from the manifest
|
||||
if the application isn't compatible with it. For example, some uses
|
||||
of MAX_PATH might be a sign of incompatibility.
|
||||
|
||||
Note that UTF-8 encoded filenames can exceed MAX_PATH (260) bytes when
|
||||
the UTF-16 form is still within MAX_PATH wide characters. In this
|
||||
situation the application doesn't need to be long path aware: functions
|
||||
like _open() work with UTF-8 names that exceed MAX_PATH bytes if the
|
||||
wide character form stays within MAX_PATH wide characters. (MAX_PATH
|
||||
includes the terminating null character.)
|
||||
|
||||
Documentation of the registry setting:
|
||||
https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=registry#enable-long-paths-in-windows-10-version-1607-and-later
|
||||
|
|
@ -123,9 +137,9 @@ trustInfo
|
|||
longPathAware
|
||||
|
||||
Declare the application as long path aware. This way many file
|
||||
system operations aren't limited by MAX_PATH (260 characters
|
||||
including the terminating null character) if the feature has
|
||||
also been enabled in the Windows registry.
|
||||
system operations aren't limited to MAX_PATH (260) wide characters
|
||||
(including the terminating null character). The feature has to be
|
||||
enabled in the Windows registry too.
|
||||
|
||||
activeCodePage
|
||||
|
||||
|
|
|
|||
|
|
@ -96,3 +96,100 @@ typedef struct {
|
|||
uint32_t start_offset;
|
||||
|
||||
} lzma_options_bcj;
|
||||
|
||||
|
||||
/**
|
||||
* \brief Raw ARM64 BCJ encoder
|
||||
*
|
||||
* This is for special use cases only.
|
||||
*
|
||||
* \param start_offset The lowest 32 bits of the offset in the
|
||||
* executable being filtered. For the ARM64
|
||||
* filter, this must be a multiple of four.
|
||||
* For the very best results, this should also
|
||||
* be in sync with 4096-byte page boundaries
|
||||
* in the executable due to how ARM64's ADRP
|
||||
* instruction works.
|
||||
* \param buf Buffer to be filtered in place
|
||||
* \param size Size of the buffer
|
||||
*
|
||||
* \return Number of bytes that were processed in `buf`. This is at most
|
||||
* `size`. With the ARM64 filter, the return value is always
|
||||
* a multiple of 4, and at most 3 bytes are left unfiltered.
|
||||
*
|
||||
* \since 5.7.1alpha
|
||||
*/
|
||||
extern LZMA_API(size_t) lzma_bcj_arm64_encode(
|
||||
uint32_t start_offset, uint8_t *buf, size_t size) lzma_nothrow;
|
||||
|
||||
/**
|
||||
* \brief Raw ARM64 BCJ decoder
|
||||
*
|
||||
* See lzma_bcj_arm64_encode().
|
||||
*
|
||||
* \since 5.7.1alpha
|
||||
*/
|
||||
extern LZMA_API(size_t) lzma_bcj_arm64_decode(
|
||||
uint32_t start_offset, uint8_t *buf, size_t size) lzma_nothrow;
|
||||
|
||||
|
||||
/**
|
||||
* \brief Raw RISC-V BCJ encoder
|
||||
*
|
||||
* This is for special use cases only.
|
||||
*
|
||||
* \param start_offset The lowest 32 bits of the offset in the
|
||||
* executable being filtered. For the RISC-V
|
||||
* filter, this must be a multiple of 2.
|
||||
* \param buf Buffer to be filtered in place
|
||||
* \param size Size of the buffer
|
||||
*
|
||||
* \return Number of bytes that were processed in `buf`. This is at most
|
||||
* `size`. With the RISC-V filter, the return value is always
|
||||
* a multiple of 2, and at most 7 bytes are left unfiltered.
|
||||
*
|
||||
* \since 5.7.1alpha
|
||||
*/
|
||||
extern LZMA_API(size_t) lzma_bcj_riscv_encode(
|
||||
uint32_t start_offset, uint8_t *buf, size_t size) lzma_nothrow;
|
||||
|
||||
/**
|
||||
* \brief Raw RISC-V BCJ decoder
|
||||
*
|
||||
* See lzma_bcj_riscv_encode().
|
||||
*
|
||||
* \since 5.7.1alpha
|
||||
*/
|
||||
extern LZMA_API(size_t) lzma_bcj_riscv_decode(
|
||||
uint32_t start_offset, uint8_t *buf, size_t size) lzma_nothrow;
|
||||
|
||||
|
||||
/**
|
||||
* \brief Raw x86 BCJ encoder
|
||||
*
|
||||
* This is for special use cases only.
|
||||
*
|
||||
* \param start_offset The lowest 32 bits of the offset in the
|
||||
* executable being filtered. For the x86
|
||||
* filter, all values are valid.
|
||||
* \param buf Buffer to be filtered in place
|
||||
* \param size Size of the buffer
|
||||
*
|
||||
* \return Number of bytes that were processed in `buf`. This is at most
|
||||
* `size`. For the x86 filter, the return value is always
|
||||
* a multiple of 1, and at most 4 bytes are left unfiltered.
|
||||
*
|
||||
* \since 5.7.1alpha
|
||||
*/
|
||||
extern LZMA_API(size_t) lzma_bcj_x86_encode(
|
||||
uint32_t start_offset, uint8_t *buf, size_t size) lzma_nothrow;
|
||||
|
||||
/**
|
||||
* \brief Raw x86 BCJ decoder
|
||||
*
|
||||
* See lzma_bcj_x86_encode().
|
||||
*
|
||||
* \since 5.7.1alpha
|
||||
*/
|
||||
extern LZMA_API(size_t) lzma_bcj_x86_decode(
|
||||
uint32_t start_offset, uint8_t *buf, size_t size) lzma_nothrow;
|
||||
|
|
|
|||
|
|
@ -573,7 +573,7 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_encode(
|
|||
* The action argument must be LZMA_FINISH and the return value will never be
|
||||
* LZMA_OK. Thus the encoding is always done with a single lzma_code() after
|
||||
* the initialization. The benefit of the combination of initialization
|
||||
* function and lzma_code() is that memory allocations can be re-used for
|
||||
* function and lzma_code() is that memory allocations can be reused for
|
||||
* better performance.
|
||||
*
|
||||
* lzma_code() will try to encode as much input as is possible to fit into
|
||||
|
|
|
|||
|
|
@ -461,7 +461,7 @@ typedef struct {
|
|||
*
|
||||
* ext_size_low holds the least significant 32 bits of the
|
||||
* uncompressed size. The most significant 32 bits must be set
|
||||
* in ext_size_high. The macro lzma_ext_size_set(opt_lzma, u64size)
|
||||
* in ext_size_high. The macro lzma_set_ext_size(opt_lzma, u64size)
|
||||
* can be used to set these members.
|
||||
*
|
||||
* The 64-bit uncompressed size is split into two uint32_t variables
|
||||
|
|
|
|||
|
|
@ -19,10 +19,10 @@
|
|||
#define LZMA_VERSION_MAJOR 5
|
||||
|
||||
/** \brief Minor version number of the liblzma release. */
|
||||
#define LZMA_VERSION_MINOR 6
|
||||
#define LZMA_VERSION_MINOR 8
|
||||
|
||||
/** \brief Patch version number of the liblzma release. */
|
||||
#define LZMA_VERSION_PATCH 3
|
||||
#define LZMA_VERSION_PATCH 1
|
||||
|
||||
/**
|
||||
* \brief Version stability marker
|
||||
|
|
|
|||
|
|
@ -95,24 +95,6 @@ typedef struct {
|
|||
} lzma_check_state;
|
||||
|
||||
|
||||
/// lzma_crc32_table[0] is needed by LZ encoder so we need to keep
|
||||
/// the array two-dimensional.
|
||||
#ifdef HAVE_SMALL
|
||||
lzma_attr_visibility_hidden
|
||||
extern uint32_t lzma_crc32_table[1][256];
|
||||
|
||||
extern void lzma_crc32_init(void);
|
||||
|
||||
#else
|
||||
|
||||
lzma_attr_visibility_hidden
|
||||
extern const uint32_t lzma_crc32_table[8][256];
|
||||
|
||||
lzma_attr_visibility_hidden
|
||||
extern const uint64_t lzma_crc64_table[4][256];
|
||||
#endif
|
||||
|
||||
|
||||
/// \brief Initialize *check depending on type
|
||||
extern void lzma_check_init(lzma_check_state *check, lzma_check type);
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
//
|
||||
// Authors: Chenxi Mao
|
||||
// Jia Tan
|
||||
// Hans Jansen
|
||||
// Lasse Collin
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
|
@ -49,25 +49,50 @@ crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc)
|
|||
{
|
||||
crc = ~crc;
|
||||
|
||||
// Align the input buffer because this was shown to be
|
||||
// significantly faster than unaligned accesses.
|
||||
const size_t align_amount = my_min(size, (0U - (uintptr_t)buf) & 7);
|
||||
if (size >= 8) {
|
||||
// Align the input buffer because this was shown to be
|
||||
// significantly faster than unaligned accesses.
|
||||
const size_t align = (0 - (uintptr_t)buf) & 7;
|
||||
|
||||
for (const uint8_t *limit = buf + align_amount; buf < limit; ++buf)
|
||||
crc = __crc32b(crc, *buf);
|
||||
if (align & 1)
|
||||
crc = __crc32b(crc, *buf++);
|
||||
|
||||
size -= align_amount;
|
||||
if (align & 2) {
|
||||
crc = __crc32h(crc, aligned_read16le(buf));
|
||||
buf += 2;
|
||||
}
|
||||
|
||||
// Process 8 bytes at a time. The end point is determined by
|
||||
// ignoring the least significant three bits of size to ensure
|
||||
// we do not process past the bounds of the buffer. This guarantees
|
||||
// that limit is a multiple of 8 and is strictly less than size.
|
||||
for (const uint8_t *limit = buf + (size & ~(size_t)7);
|
||||
buf < limit; buf += 8)
|
||||
crc = __crc32d(crc, aligned_read64le(buf));
|
||||
if (align & 4) {
|
||||
crc = __crc32w(crc, aligned_read32le(buf));
|
||||
buf += 4;
|
||||
}
|
||||
|
||||
size -= align;
|
||||
|
||||
// Process 8 bytes at a time. The end point is determined by
|
||||
// ignoring the least significant three bits of size to
|
||||
// ensure we do not process past the bounds of the buffer.
|
||||
// This guarantees that limit is a multiple of 8 and is
|
||||
// strictly less than size.
|
||||
for (const uint8_t *limit = buf + (size & ~(size_t)7);
|
||||
buf < limit; buf += 8)
|
||||
crc = __crc32d(crc, aligned_read64le(buf));
|
||||
|
||||
size &= 7;
|
||||
}
|
||||
|
||||
// Process the remaining bytes that are not 8 byte aligned.
|
||||
for (const uint8_t *limit = buf + (size & 7); buf < limit; ++buf)
|
||||
if (size & 4) {
|
||||
crc = __crc32w(crc, aligned_read32le(buf));
|
||||
buf += 4;
|
||||
}
|
||||
|
||||
if (size & 2) {
|
||||
crc = __crc32h(crc, aligned_read16le(buf));
|
||||
buf += 2;
|
||||
}
|
||||
|
||||
if (size & 1)
|
||||
crc = __crc32b(crc, *buf);
|
||||
|
||||
return ~crc;
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@
|
|||
//
|
||||
// Authors: Lasse Collin
|
||||
// Ilya Kurdyukov
|
||||
// Hans Jansen
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
|
@ -15,10 +14,12 @@
|
|||
#include "crc_common.h"
|
||||
|
||||
#if defined(CRC_X86_CLMUL)
|
||||
# define BUILDING_CRC32_CLMUL
|
||||
# define BUILDING_CRC_CLMUL 32
|
||||
# include "crc_x86_clmul.h"
|
||||
#elif defined(CRC32_ARM64)
|
||||
# include "crc32_arm64.h"
|
||||
#elif defined(CRC32_LOONGARCH)
|
||||
# include "crc32_loongarch.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -28,8 +29,19 @@
|
|||
// Generic CRC32 //
|
||||
///////////////////
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
# include "crc32_table_be.h"
|
||||
#else
|
||||
# include "crc32_table_le.h"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef HAVE_CRC_X86_ASM
|
||||
extern uint32_t lzma_crc32_generic(
|
||||
const uint8_t *buf, size_t size, uint32_t crc);
|
||||
#else
|
||||
static uint32_t
|
||||
crc32_generic(const uint8_t *buf, size_t size, uint32_t crc)
|
||||
lzma_crc32_generic(const uint8_t *buf, size_t size, uint32_t crc)
|
||||
{
|
||||
crc = ~crc;
|
||||
|
||||
|
|
@ -85,7 +97,8 @@ crc32_generic(const uint8_t *buf, size_t size, uint32_t crc)
|
|||
|
||||
return ~crc;
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_CRC_X86_ASM
|
||||
#endif // CRC32_GENERIC
|
||||
|
||||
|
||||
#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)
|
||||
|
|
@ -119,7 +132,7 @@ static crc32_func_type
|
|||
crc32_resolve(void)
|
||||
{
|
||||
return is_arch_extension_supported()
|
||||
? &crc32_arch_optimized : &crc32_generic;
|
||||
? &crc32_arch_optimized : &lzma_crc32_generic;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -164,27 +177,6 @@ extern LZMA_API(uint32_t)
|
|||
lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc)
|
||||
{
|
||||
#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)
|
||||
// On x86-64, if CLMUL is available, it is the best for non-tiny
|
||||
// inputs, being over twice as fast as the generic slice-by-four
|
||||
// version. However, for size <= 16 it's different. In the extreme
|
||||
// case of size == 1 the generic version can be five times faster.
|
||||
// At size >= 8 the CLMUL starts to become reasonable. It
|
||||
// varies depending on the alignment of buf too.
|
||||
//
|
||||
// The above doesn't include the overhead of mythread_once().
|
||||
// At least on x86-64 GNU/Linux, pthread_once() is very fast but
|
||||
// it still makes lzma_crc32(buf, 1, crc) 50-100 % slower. When
|
||||
// size reaches 12-16 bytes the overhead becomes negligible.
|
||||
//
|
||||
// So using the generic version for size <= 16 may give better
|
||||
// performance with tiny inputs but if such inputs happen rarely
|
||||
// it's not so obvious because then the lookup table of the
|
||||
// generic version may not be in the processor cache.
|
||||
#ifdef CRC_USE_GENERIC_FOR_SMALL_INPUTS
|
||||
if (size <= 16)
|
||||
return crc32_generic(buf, size, crc);
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
|
||||
// See crc32_dispatch(). This would be the alternative which uses
|
||||
|
|
@ -199,6 +191,6 @@ lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc)
|
|||
return crc32_arch_optimized(buf, size, crc);
|
||||
|
||||
#else
|
||||
return crc32_generic(buf, size, crc);
|
||||
return lzma_crc32_generic(buf, size, crc);
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
65
src/liblzma/check/crc32_loongarch.h
Normal file
65
src/liblzma/check/crc32_loongarch.h
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
// SPDX-License-Identifier: 0BSD
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file crc32_loongarch.h
|
||||
/// \brief CRC32 calculation with LoongArch optimization
|
||||
//
|
||||
// Authors: Xi Ruoyao
|
||||
// Lasse Collin
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef LZMA_CRC32_LOONGARCH_H
|
||||
#define LZMA_CRC32_LOONGARCH_H
|
||||
|
||||
#include <larchintrin.h>
|
||||
|
||||
|
||||
static uint32_t
|
||||
crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc_unsigned)
|
||||
{
|
||||
int32_t crc = (int32_t)~crc_unsigned;
|
||||
|
||||
if (size >= 8) {
|
||||
const size_t align = (0 - (uintptr_t)buf) & 7;
|
||||
|
||||
if (align & 1)
|
||||
crc = __crc_w_b_w((int8_t)*buf++, crc);
|
||||
|
||||
if (align & 2) {
|
||||
crc = __crc_w_h_w((int16_t)aligned_read16le(buf), crc);
|
||||
buf += 2;
|
||||
}
|
||||
|
||||
if (align & 4) {
|
||||
crc = __crc_w_w_w((int32_t)aligned_read32le(buf), crc);
|
||||
buf += 4;
|
||||
}
|
||||
|
||||
size -= align;
|
||||
|
||||
for (const uint8_t *limit = buf + (size & ~(size_t)7);
|
||||
buf < limit; buf += 8)
|
||||
crc = __crc_w_d_w((int64_t)aligned_read64le(buf), crc);
|
||||
|
||||
size &= 7;
|
||||
}
|
||||
|
||||
if (size & 4) {
|
||||
crc = __crc_w_w_w((int32_t)aligned_read32le(buf), crc);
|
||||
buf += 4;
|
||||
}
|
||||
|
||||
if (size & 2) {
|
||||
crc = __crc_w_h_w((int16_t)aligned_read16le(buf), crc);
|
||||
buf += 2;
|
||||
}
|
||||
|
||||
if (size & 1)
|
||||
crc = __crc_w_b_w((int8_t)*buf, crc);
|
||||
|
||||
return (uint32_t)~crc;
|
||||
}
|
||||
|
||||
#endif // LZMA_CRC32_LOONGARCH_H
|
||||
|
|
@ -10,8 +10,11 @@
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "check.h"
|
||||
#include "crc_common.h"
|
||||
|
||||
|
||||
// The table is used by the LZ encoder too, thus it's not static like
|
||||
// in crc64_small.c.
|
||||
uint32_t lzma_crc32_table[1][256];
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,42 +0,0 @@
|
|||
// SPDX-License-Identifier: 0BSD
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file crc32_table.c
|
||||
/// \brief Precalculated CRC32 table with correct endianness
|
||||
//
|
||||
// Author: Lasse Collin
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "common.h"
|
||||
|
||||
|
||||
// FIXME: Compared to crc_common.h this has to check for __x86_64__ too
|
||||
// so that in 32-bit builds crc32_x86.S won't break due to a missing table.
|
||||
#if defined(HAVE_USABLE_CLMUL) && ((defined(__x86_64__) && defined(__SSSE3__) \
|
||||
&& defined(__SSE4_1__) && defined(__PCLMUL__)) \
|
||||
|| (defined(__e2k__) && __iset__ >= 6))
|
||||
# define NO_CRC32_TABLE
|
||||
|
||||
#elif defined(HAVE_ARM64_CRC32) \
|
||||
&& !defined(WORDS_BIGENDIAN) \
|
||||
&& defined(__ARM_FEATURE_CRC32)
|
||||
# define NO_CRC32_TABLE
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(HAVE_ENCODERS) && defined(NO_CRC32_TABLE)
|
||||
// No table needed. Use a typedef to avoid an empty translation unit.
|
||||
typedef void lzma_crc32_dummy;
|
||||
|
||||
#else
|
||||
// Having the declaration here silences clang -Wmissing-variable-declarations.
|
||||
extern const uint32_t lzma_crc32_table[8][256];
|
||||
|
||||
# ifdef WORDS_BIGENDIAN
|
||||
# include "crc32_table_be.h"
|
||||
# else
|
||||
# include "crc32_table_le.h"
|
||||
# endif
|
||||
#endif
|
||||
|
|
@ -67,7 +67,7 @@ init_table(void)
|
|||
#endif
|
||||
#define MAKE_SYM_CAT(prefix, sym) prefix ## sym
|
||||
#define MAKE_SYM(prefix, sym) MAKE_SYM_CAT(prefix, sym)
|
||||
#define LZMA_CRC32 MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc32)
|
||||
#define LZMA_CRC32 MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc32_generic)
|
||||
#define LZMA_CRC32_TABLE MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc32_table)
|
||||
|
||||
/*
|
||||
|
|
@ -82,6 +82,9 @@ init_table(void)
|
|||
|
||||
.text
|
||||
.globl LZMA_CRC32
|
||||
#ifdef __ELF__
|
||||
.hidden LZMA_CRC32
|
||||
#endif
|
||||
|
||||
#if !defined(__APPLE__) && !defined(_WIN32) && !defined(__CYGWIN__) \
|
||||
&& !defined(__MSDOS__)
|
||||
|
|
@ -290,14 +293,7 @@ LZMA_CRC32:
|
|||
.indirect_symbol LZMA_CRC32_TABLE
|
||||
.long 0
|
||||
|
||||
#elif defined(_WIN32) || defined(__CYGWIN__)
|
||||
# ifdef DLL_EXPORT
|
||||
/* This is equivalent of __declspec(dllexport). */
|
||||
.section .drectve
|
||||
.ascii " -export:lzma_crc32"
|
||||
# endif
|
||||
|
||||
#elif !defined(__MSDOS__)
|
||||
#elif !defined(_WIN32) && !defined(__CYGWIN__) && !defined(__MSDOS__)
|
||||
/* ELF */
|
||||
.size LZMA_CRC32, .-LZMA_CRC32
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
#include "crc_common.h"
|
||||
|
||||
#if defined(CRC_X86_CLMUL)
|
||||
# define BUILDING_CRC64_CLMUL
|
||||
# define BUILDING_CRC_CLMUL 64
|
||||
# include "crc_x86_clmul.h"
|
||||
#endif
|
||||
|
||||
|
|
@ -25,6 +25,18 @@
|
|||
// Generic slice-by-four CRC64 //
|
||||
/////////////////////////////////
|
||||
|
||||
#if defined(WORDS_BIGENDIAN)
|
||||
# include "crc64_table_be.h"
|
||||
#else
|
||||
# include "crc64_table_le.h"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef HAVE_CRC_X86_ASM
|
||||
extern uint64_t lzma_crc64_generic(
|
||||
const uint8_t *buf, size_t size, uint64_t crc);
|
||||
#else
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
# define A1(x) ((x) >> 56)
|
||||
#else
|
||||
|
|
@ -34,7 +46,7 @@
|
|||
|
||||
// See the comments in crc32_fast.c. They aren't duplicated here.
|
||||
static uint64_t
|
||||
crc64_generic(const uint8_t *buf, size_t size, uint64_t crc)
|
||||
lzma_crc64_generic(const uint8_t *buf, size_t size, uint64_t crc)
|
||||
{
|
||||
crc = ~crc;
|
||||
|
||||
|
|
@ -78,7 +90,8 @@ crc64_generic(const uint8_t *buf, size_t size, uint64_t crc)
|
|||
|
||||
return ~crc;
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_CRC_X86_ASM
|
||||
#endif // CRC64_GENERIC
|
||||
|
||||
|
||||
#if defined(CRC64_GENERIC) && defined(CRC64_ARCH_OPTIMIZED)
|
||||
|
|
@ -97,7 +110,7 @@ static crc64_func_type
|
|||
crc64_resolve(void)
|
||||
{
|
||||
return is_arch_extension_supported()
|
||||
? &crc64_arch_optimized : &crc64_generic;
|
||||
? &crc64_arch_optimized : &lzma_crc64_generic;
|
||||
}
|
||||
|
||||
#ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
|
||||
|
|
@ -133,24 +146,24 @@ crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc)
|
|||
extern LZMA_API(uint64_t)
|
||||
lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc)
|
||||
{
|
||||
#if defined(CRC64_GENERIC) && defined(CRC64_ARCH_OPTIMIZED)
|
||||
|
||||
#ifdef CRC_USE_GENERIC_FOR_SMALL_INPUTS
|
||||
if (size <= 16)
|
||||
return crc64_generic(buf, size, crc);
|
||||
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__clang__) \
|
||||
&& defined(_M_IX86) && defined(CRC64_ARCH_OPTIMIZED)
|
||||
// VS2015-2022 might corrupt the ebx register on 32-bit x86 when
|
||||
// the CLMUL code is enabled. This hack forces MSVC to store and
|
||||
// restore ebx. This is only needed here, not in lzma_crc32().
|
||||
__asm mov ebx, ebx
|
||||
#endif
|
||||
|
||||
#if defined(CRC64_GENERIC) && defined(CRC64_ARCH_OPTIMIZED)
|
||||
return crc64_func(buf, size, crc);
|
||||
|
||||
#elif defined(CRC64_ARCH_OPTIMIZED)
|
||||
// If arch-optimized version is used unconditionally without runtime
|
||||
// CPU detection then omitting the generic version and its 8 KiB
|
||||
// lookup table makes the library smaller.
|
||||
//
|
||||
// FIXME: Lookup table isn't currently omitted on 32-bit x86,
|
||||
// see crc64_table.c.
|
||||
return crc64_arch_optimized(buf, size, crc);
|
||||
|
||||
#else
|
||||
return crc64_generic(buf, size, crc);
|
||||
return lzma_crc64_generic(buf, size, crc);
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,37 +0,0 @@
|
|||
// SPDX-License-Identifier: 0BSD
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file crc64_table.c
|
||||
/// \brief Precalculated CRC64 table with correct endianness
|
||||
//
|
||||
// Author: Lasse Collin
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "common.h"
|
||||
|
||||
|
||||
// FIXME: Compared to crc_common.h this has to check for __x86_64__ too
|
||||
// so that in 32-bit builds crc64_x86.S won't break due to a missing table.
|
||||
#if defined(HAVE_USABLE_CLMUL) && ((defined(__x86_64__) && defined(__SSSE3__) \
|
||||
&& defined(__SSE4_1__) && defined(__PCLMUL__)) \
|
||||
|| (defined(__e2k__) && __iset__ >= 6))
|
||||
# define NO_CRC64_TABLE
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef NO_CRC64_TABLE
|
||||
// No table needed. Use a typedef to avoid an empty translation unit.
|
||||
typedef void lzma_crc64_dummy;
|
||||
|
||||
#else
|
||||
// Having the declaration here silences clang -Wmissing-variable-declarations.
|
||||
extern const uint64_t lzma_crc64_table[4][256];
|
||||
|
||||
# if defined(WORDS_BIGENDIAN)
|
||||
# include "crc64_table_be.h"
|
||||
# else
|
||||
# include "crc64_table_le.h"
|
||||
# endif
|
||||
#endif
|
||||
|
|
@ -57,7 +57,7 @@ init_table(void)
|
|||
#endif
|
||||
#define MAKE_SYM_CAT(prefix, sym) prefix ## sym
|
||||
#define MAKE_SYM(prefix, sym) MAKE_SYM_CAT(prefix, sym)
|
||||
#define LZMA_CRC64 MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc64)
|
||||
#define LZMA_CRC64 MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc64_generic)
|
||||
#define LZMA_CRC64_TABLE MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc64_table)
|
||||
|
||||
/*
|
||||
|
|
@ -72,6 +72,9 @@ init_table(void)
|
|||
|
||||
.text
|
||||
.globl LZMA_CRC64
|
||||
#ifdef __ELF__
|
||||
.hidden LZMA_CRC64
|
||||
#endif
|
||||
|
||||
#if !defined(__APPLE__) && !defined(_WIN32) && !defined(__CYGWIN__) \
|
||||
&& !defined(__MSDOS__)
|
||||
|
|
@ -273,14 +276,7 @@ LZMA_CRC64:
|
|||
.indirect_symbol LZMA_CRC64_TABLE
|
||||
.long 0
|
||||
|
||||
#elif defined(_WIN32) || defined(__CYGWIN__)
|
||||
# ifdef DLL_EXPORT
|
||||
/* This is equivalent of __declspec(dllexport). */
|
||||
.section .drectve
|
||||
.ascii " -export:lzma_crc64"
|
||||
# endif
|
||||
|
||||
#elif !defined(__MSDOS__)
|
||||
#elif !defined(_WIN32) && !defined(__CYGWIN__) && !defined(__MSDOS__)
|
||||
/* ELF */
|
||||
.size LZMA_CRC64, .-LZMA_CRC64
|
||||
#endif
|
||||
|
|
|
|||
160
src/liblzma/check/crc_clmul_consts_gen.c
Normal file
160
src/liblzma/check/crc_clmul_consts_gen.c
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
// SPDX-License-Identifier: 0BSD
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file crc_clmul_consts_gen.c
|
||||
/// \brief Generate constants for CLMUL CRC code
|
||||
///
|
||||
/// Compiling: gcc -std=c99 -o crc_clmul_consts_gen crc_clmul_consts_gen.c
|
||||
///
|
||||
/// This is for CRCs that use reversed bit order (bit reflection).
|
||||
/// The same CLMUL CRC code can be used with CRC64 and smaller ones like
|
||||
/// CRC32 apart from one special case: CRC64 needs an extra step in the
|
||||
/// Barrett reduction to handle the 65th bit; the smaller ones don't.
|
||||
/// Otherwise it's enough to just change the polynomial and the derived
|
||||
/// constants and use the same code.
|
||||
///
|
||||
/// See the Intel white paper "Fast CRC Computation for Generic Polynomials
|
||||
/// Using PCLMULQDQ Instruction" from 2009.
|
||||
//
|
||||
// Author: Lasse Collin
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
/// CRC32 (Ethernet) polynomial in reversed representation
|
||||
static const uint64_t p32 = 0xedb88320;
|
||||
|
||||
// CRC64 (ECMA-182) polynomial in reversed representation
|
||||
static const uint64_t p64 = 0xc96c5795d7870f42;
|
||||
|
||||
|
||||
/// Calculates floor(x^128 / p) where p is a CRC64 polynomial in
|
||||
/// reversed representation. The result is in reversed representation too.
|
||||
static uint64_t
|
||||
calc_cldiv(uint64_t p)
|
||||
{
|
||||
// Quotient
|
||||
uint64_t q = 0;
|
||||
|
||||
// Align the x^64 term with the x^128 (the implied high bits of the
|
||||
// divisor and the dividend) and do the first step of polynomial long
|
||||
// division, calculating the first remainder. The variable q remains
|
||||
// zero because the highest bit of the quotient is an implied bit 1
|
||||
// (we kind of set q = 1 << -1).
|
||||
uint64_t r = p;
|
||||
|
||||
// Then process the remaining 64 terms. Note that r has no implied
|
||||
// high bit, only q and p do. (And remember that a high bit in the
|
||||
// polynomial is stored at a low bit in the variable due to the
|
||||
// reversed bit order.)
|
||||
for (unsigned i = 0; i < 64; ++i) {
|
||||
q |= (r & 1) << i;
|
||||
r = (r >> 1) ^ (r & 1 ? p : 0);
|
||||
}
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
|
||||
/// Calculate the remainder of carryless division:
|
||||
///
|
||||
/// x^(bits + n - 1) % p, where n=64 (for CRC64)
|
||||
///
|
||||
/// p must be in reversed representation which omits the bit of
|
||||
/// the highest term of the polynomial. Instead, it is an implied bit
|
||||
/// at kind of like "1 << -1" position, as if it had just been shifted out.
|
||||
///
|
||||
/// The return value is in the reversed bit order. (There are no implied bits.)
|
||||
static uint64_t
|
||||
calc_clrem(uint64_t p, unsigned bits)
|
||||
{
|
||||
// Do the first step of polynomial long division.
|
||||
uint64_t r = p;
|
||||
|
||||
// Then process the remaining terms. Start with i = 1 instead of i = 0
|
||||
// to account for the -1 in x^(bits + n - 1). This -1 is convenient
|
||||
// with the reversed bit order. See the "Bit-Reflection" section in
|
||||
// the Intel white paper.
|
||||
for (unsigned i = 1; i < bits; ++i)
|
||||
r = (r >> 1) ^ (r & 1 ? p : 0);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
main(void)
|
||||
{
|
||||
puts("// CRC64");
|
||||
|
||||
// The order of the two 64-bit constants in a vector don't matter.
|
||||
// It feels logical to put them in this order as it matches the
|
||||
// order in which the input bytes are read.
|
||||
printf("const __m128i fold512 = _mm_set_epi64x("
|
||||
"0x%016" PRIx64 ", 0x%016" PRIx64 ");\n",
|
||||
calc_clrem(p64, 4 * 128 - 64),
|
||||
calc_clrem(p64, 4 * 128));
|
||||
|
||||
printf("const __m128i fold128 = _mm_set_epi64x("
|
||||
"0x%016" PRIx64 ", 0x%016" PRIx64 ");\n",
|
||||
calc_clrem(p64, 128 - 64),
|
||||
calc_clrem(p64, 128));
|
||||
|
||||
// When we multiply by mu, we care about the high bits of the result
|
||||
// (in reversed bit order!). It doesn't matter that the low bit gets
|
||||
// shifted out because the affected output bits will be ignored.
|
||||
// Below we add the implied high bit with "| 1" after the shifting
|
||||
// so that the high bits of the multiplication will be correct.
|
||||
//
|
||||
// p64 is shifted left by one so that the final multiplication
|
||||
// in Barrett reduction won't be misaligned by one bit. We could
|
||||
// use "(p64 << 1) | 1" instead of "p64 << 1" too but it makes
|
||||
// no difference as that bit won't affect the relevant output bits
|
||||
// (we only care about the lowest 64 bits of the result, that is,
|
||||
// lowest in the reversed bit order).
|
||||
//
|
||||
// NOTE: The 65rd bit of p64 gets shifted out. It needs to be
|
||||
// compensated with 64-bit shift and xor in the CRC64 code.
|
||||
printf("const __m128i mu_p = _mm_set_epi64x("
|
||||
"0x%016" PRIx64 ", 0x%016" PRIx64 ");\n",
|
||||
(calc_cldiv(p64) << 1) | 1,
|
||||
p64 << 1);
|
||||
|
||||
puts("");
|
||||
|
||||
puts("// CRC32");
|
||||
|
||||
printf("const __m128i fold512 = _mm_set_epi64x("
|
||||
"0x%08" PRIx64 ", 0x%08" PRIx64 ");\n",
|
||||
calc_clrem(p32, 4 * 128 - 64),
|
||||
calc_clrem(p32, 4 * 128));
|
||||
|
||||
printf("const __m128i fold128 = _mm_set_epi64x("
|
||||
"0x%08" PRIx64 ", 0x%08" PRIx64 ");\n",
|
||||
calc_clrem(p32, 128 - 64),
|
||||
calc_clrem(p32, 128));
|
||||
|
||||
// CRC32 calculation is done by modulus scaling it to a CRC64.
|
||||
// Since the CRC is in reversed representation, only the mu
|
||||
// constant changes with the modulus scaling. This method avoids
|
||||
// one additional constant and one additional clmul in the final
|
||||
// reduction steps, making the code both simpler and faster.
|
||||
//
|
||||
// p32 is shifted left by one so that the final multiplication
|
||||
// in Barrett reduction won't be misaligned by one bit. We could
|
||||
// use "(p32 << 1) | 1" instead of "p32 << 1" too but it makes
|
||||
// no difference as that bit won't affect the relevant output bits.
|
||||
//
|
||||
// NOTE: The 33-bit value fits in 64 bits so, unlike with CRC64,
|
||||
// there is no need to compensate for any missing bits in the code.
|
||||
printf("const __m128i mu_p = _mm_set_epi64x("
|
||||
"0x%016" PRIx64 ", 0x%" PRIx64 ");\n",
|
||||
(calc_cldiv(p32) << 1) | 1,
|
||||
p32 << 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -3,11 +3,10 @@
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file crc_common.h
|
||||
/// \brief Some functions and macros for CRC32 and CRC64
|
||||
/// \brief Macros and declarations for CRC32 and CRC64
|
||||
//
|
||||
// Authors: Lasse Collin
|
||||
// Ilya Kurdyukov
|
||||
// Hans Jansen
|
||||
// Jia Tan
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
|
@ -18,6 +17,10 @@
|
|||
#include "common.h"
|
||||
|
||||
|
||||
/////////////
|
||||
// Generic //
|
||||
/////////////
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
# define A(x) ((x) >> 24)
|
||||
# define B(x) (((x) >> 16) & 0xFF)
|
||||
|
|
@ -38,43 +41,63 @@
|
|||
#endif
|
||||
|
||||
|
||||
// CRC CLMUL code needs this because accessing input buffers that aren't
|
||||
// aligned to the vector size will inherently trip the address sanitizer.
|
||||
#if lzma_has_attribute(__no_sanitize_address__)
|
||||
# define crc_attr_no_sanitize_address \
|
||||
__attribute__((__no_sanitize_address__))
|
||||
/// lzma_crc32_table[0] is needed by LZ encoder so we need to keep
|
||||
/// the array two-dimensional.
|
||||
#ifdef HAVE_SMALL
|
||||
lzma_attr_visibility_hidden
|
||||
extern uint32_t lzma_crc32_table[1][256];
|
||||
|
||||
extern void lzma_crc32_init(void);
|
||||
|
||||
#else
|
||||
# define crc_attr_no_sanitize_address
|
||||
#endif
|
||||
|
||||
// Keep this in sync with changes to crc32_arm64.h
|
||||
#if defined(_WIN32) || defined(HAVE_GETAUXVAL) \
|
||||
|| defined(HAVE_ELF_AUX_INFO) \
|
||||
|| (defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME))
|
||||
# define ARM64_RUNTIME_DETECTION 1
|
||||
lzma_attr_visibility_hidden
|
||||
extern const uint32_t lzma_crc32_table[8][256];
|
||||
|
||||
lzma_attr_visibility_hidden
|
||||
extern const uint64_t lzma_crc64_table[4][256];
|
||||
#endif
|
||||
|
||||
|
||||
///////////////////
|
||||
// Configuration //
|
||||
///////////////////
|
||||
|
||||
// NOTE: This config isn't used if HAVE_SMALL is defined!
|
||||
|
||||
// These are defined if the generic slicing-by-n implementations and their
|
||||
// lookup tables are built.
|
||||
#undef CRC32_GENERIC
|
||||
#undef CRC64_GENERIC
|
||||
|
||||
// These are defined if an arch-specific version is built. If both this
|
||||
// and matching _GENERIC is defined then runtime detection must be used.
|
||||
#undef CRC32_ARCH_OPTIMIZED
|
||||
#undef CRC64_ARCH_OPTIMIZED
|
||||
|
||||
// The x86 CLMUL is used for both CRC32 and CRC64.
|
||||
#undef CRC_X86_CLMUL
|
||||
|
||||
// Many ARM64 processor have CRC32 instructions.
|
||||
// CRC64 could be done with CLMUL but it's not implemented yet.
|
||||
#undef CRC32_ARM64
|
||||
#undef CRC64_ARM64_CLMUL
|
||||
|
||||
#undef CRC_USE_GENERIC_FOR_SMALL_INPUTS
|
||||
// 64-bit LoongArch has CRC32 instructions.
|
||||
#undef CRC32_LOONGARCH
|
||||
|
||||
|
||||
// ARM64
|
||||
//
|
||||
// Keep this in sync with changes to crc32_arm64.h
|
||||
#if defined(_WIN32) || defined(HAVE_GETAUXVAL) \
|
||||
|| defined(HAVE_ELF_AUX_INFO) \
|
||||
|| (defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME))
|
||||
# define CRC_ARM64_RUNTIME_DETECTION 1
|
||||
#endif
|
||||
|
||||
// ARM64 CRC32 instruction is only useful for CRC32. Currently, only
|
||||
// little endian is supported since we were unable to test on a big
|
||||
// endian machine.
|
||||
//
|
||||
// NOTE: Keep this and the next check in sync with the macro
|
||||
// NO_CRC32_TABLE in crc32_table.c
|
||||
#if defined(HAVE_ARM64_CRC32) && !defined(WORDS_BIGENDIAN)
|
||||
// Allow ARM64 CRC32 instruction without a runtime check if
|
||||
// __ARM_FEATURE_CRC32 is defined. GCC and Clang only define
|
||||
|
|
@ -82,21 +105,40 @@
|
|||
# if defined(__ARM_FEATURE_CRC32)
|
||||
# define CRC32_ARCH_OPTIMIZED 1
|
||||
# define CRC32_ARM64 1
|
||||
# elif defined(ARM64_RUNTIME_DETECTION)
|
||||
# elif defined(CRC_ARM64_RUNTIME_DETECTION)
|
||||
# define CRC32_ARCH_OPTIMIZED 1
|
||||
# define CRC32_ARM64 1
|
||||
# define CRC32_GENERIC 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_USABLE_CLMUL)
|
||||
// If CLMUL is allowed unconditionally in the compiler options then the
|
||||
// generic version can be omitted. Note that this doesn't work with MSVC
|
||||
// as I don't know how to detect the features here.
|
||||
|
||||
// LoongArch
|
||||
//
|
||||
// NOTE: Keep this in sync with the NO_CRC32_TABLE macro in crc32_table.c
|
||||
// and NO_CRC64_TABLE in crc64_table.c.
|
||||
# if (defined(__SSSE3__) && defined(__SSE4_1__) && defined(__PCLMUL__)) \
|
||||
// Only 64-bit LoongArch is supported for now. No runtime detection
|
||||
// is needed because the LoongArch specification says that the CRC32
|
||||
// instructions are a part of the Basic Integer Instructions and
|
||||
// they shall be implemented by 64-bit LoongArch implementations.
|
||||
#ifdef HAVE_LOONGARCH_CRC32
|
||||
# define CRC32_ARCH_OPTIMIZED 1
|
||||
# define CRC32_LOONGARCH 1
|
||||
#endif
|
||||
|
||||
|
||||
// x86 and E2K
|
||||
#if defined(HAVE_USABLE_CLMUL)
|
||||
// If CLMUL is allowed unconditionally in the compiler options then
|
||||
// the generic version and the tables can be omitted. Exceptions:
|
||||
//
|
||||
// - If 32-bit x86 assembly files are enabled then those are always
|
||||
// built and runtime detection is used even if compiler flags
|
||||
// were set to allow CLMUL unconditionally.
|
||||
//
|
||||
// - This doesn't work with MSVC as I don't know how to detect
|
||||
// the features here.
|
||||
//
|
||||
# if (defined(__SSSE3__) && defined(__SSE4_1__) && defined(__PCLMUL__) \
|
||||
&& !defined(HAVE_CRC_X86_ASM)) \
|
||||
|| (defined(__e2k__) && __iset__ >= 6)
|
||||
# define CRC32_ARCH_OPTIMIZED 1
|
||||
# define CRC64_ARCH_OPTIMIZED 1
|
||||
|
|
@ -107,21 +149,12 @@
|
|||
# define CRC32_ARCH_OPTIMIZED 1
|
||||
# define CRC64_ARCH_OPTIMIZED 1
|
||||
# define CRC_X86_CLMUL 1
|
||||
|
||||
/*
|
||||
// The generic code is much faster with 1-8-byte inputs and
|
||||
// has similar performance up to 16 bytes at least in
|
||||
// microbenchmarks (it depends on input buffer alignment
|
||||
// too). If both versions are built, this #define will use
|
||||
// the generic version for inputs up to 16 bytes and CLMUL
|
||||
// for bigger inputs. It saves a little in code size since
|
||||
// the special cases for 0-16-byte inputs will be omitted
|
||||
// from the CLMUL code.
|
||||
# define CRC_USE_GENERIC_FOR_SMALL_INPUTS 1
|
||||
*/
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
// Fallback configuration
|
||||
//
|
||||
// For CRC32 use the generic slice-by-eight implementation if no optimized
|
||||
// version is available.
|
||||
#if !defined(CRC32_ARCH_OPTIMIZED) && !defined(CRC32_GENERIC)
|
||||
|
|
|
|||
|
|
@ -8,26 +8,20 @@
|
|||
/// The CRC32 and CRC64 implementations use 32/64-bit x86 SSSE3, SSE4.1, and
|
||||
/// CLMUL instructions. This is compatible with Elbrus 2000 (E2K) too.
|
||||
///
|
||||
/// They were derived from
|
||||
/// See the Intel white paper "Fast CRC Computation for Generic Polynomials
|
||||
/// Using PCLMULQDQ Instruction" from 2009. The original file seems to be
|
||||
/// gone from Intel's website but a version is available here:
|
||||
/// https://www.researchgate.net/publication/263424619_Fast_CRC_computation
|
||||
/// and the public domain code from https://github.com/rawrunprotected/crc
|
||||
/// (URLs were checked on 2023-10-14).
|
||||
/// (The link was checked on 2024-06-11.)
|
||||
///
|
||||
/// While this file has both CRC32 and CRC64 implementations, only one
|
||||
/// should be built at a time to ensure that crc_simd_body() is inlined
|
||||
/// even with compilers with which lzma_always_inline expands to plain inline.
|
||||
/// The version to build is selected by defining BUILDING_CRC32_CLMUL or
|
||||
/// BUILDING_CRC64_CLMUL before including this file.
|
||||
/// can be built at a time. The version to build is selected by defining
|
||||
/// BUILDING_CRC_CLMUL to 32 or 64 before including this file.
|
||||
///
|
||||
/// FIXME: Builds for 32-bit x86 use the assembly .S files by default
|
||||
/// unless configured with --disable-assembler. Even then the lookup table
|
||||
/// isn't omitted in crc64_table.c since it doesn't know that assembly
|
||||
/// code has been disabled.
|
||||
/// NOTE: The x86 CLMUL CRC implementation was rewritten for XZ Utils 5.8.0.
|
||||
//
|
||||
// Authors: Ilya Kurdyukov
|
||||
// Hans Jansen
|
||||
// Lasse Collin
|
||||
// Jia Tan
|
||||
// Authors: Lasse Collin
|
||||
// Ilya Kurdyukov
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
|
@ -37,6 +31,10 @@
|
|||
#endif
|
||||
#define LZMA_CRC_X86_CLMUL_H
|
||||
|
||||
#if BUILDING_CRC_CLMUL != 32 && BUILDING_CRC_CLMUL != 64
|
||||
# error BUILDING_CRC_CLMUL is undefined or has an invalid value
|
||||
#endif
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
|
@ -59,330 +57,277 @@
|
|||
#endif
|
||||
|
||||
|
||||
#define MASK_L(in, mask, r) r = _mm_shuffle_epi8(in, mask)
|
||||
|
||||
#define MASK_H(in, mask, r) \
|
||||
r = _mm_shuffle_epi8(in, _mm_xor_si128(mask, vsign))
|
||||
|
||||
#define MASK_LH(in, mask, low, high) \
|
||||
MASK_L(in, mask, low); \
|
||||
MASK_H(in, mask, high)
|
||||
|
||||
|
||||
crc_attr_target
|
||||
crc_attr_no_sanitize_address
|
||||
static lzma_always_inline void
|
||||
crc_simd_body(const uint8_t *buf, const size_t size, __m128i *v0, __m128i *v1,
|
||||
const __m128i vfold16, const __m128i initial_crc)
|
||||
{
|
||||
// Create a vector with 8-bit values 0 to 15. This is used to
|
||||
// construct control masks for _mm_blendv_epi8 and _mm_shuffle_epi8.
|
||||
const __m128i vramp = _mm_setr_epi32(
|
||||
0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c);
|
||||
|
||||
// This is used to inverse the control mask of _mm_shuffle_epi8
|
||||
// so that bytes that wouldn't be picked with the original mask
|
||||
// will be picked and vice versa.
|
||||
const __m128i vsign = _mm_set1_epi8(-0x80);
|
||||
|
||||
// Memory addresses A to D and the distances between them:
|
||||
//
|
||||
// A B C D
|
||||
// [skip_start][size][skip_end]
|
||||
// [ size2 ]
|
||||
//
|
||||
// A and D are 16-byte aligned. B and C are 1-byte aligned.
|
||||
// skip_start and skip_end are 0-15 bytes. size is at least 1 byte.
|
||||
//
|
||||
// A = aligned_buf will initially point to this address.
|
||||
// B = The address pointed by the caller-supplied buf.
|
||||
// C = buf + size == aligned_buf + size2
|
||||
// D = buf + size + skip_end == aligned_buf + size2 + skip_end
|
||||
const size_t skip_start = (size_t)((uintptr_t)buf & 15);
|
||||
const size_t skip_end = (size_t)((0U - (uintptr_t)(buf + size)) & 15);
|
||||
const __m128i *aligned_buf = (const __m128i *)(
|
||||
(uintptr_t)buf & ~(uintptr_t)15);
|
||||
|
||||
// If size2 <= 16 then the whole input fits into a single 16-byte
|
||||
// vector. If size2 > 16 then at least two 16-byte vectors must
|
||||
// be processed. If size2 > 16 && size <= 16 then there is only
|
||||
// one 16-byte vector's worth of input but it is unaligned in memory.
|
||||
//
|
||||
// NOTE: There is no integer overflow here if the arguments
|
||||
// are valid. If this overflowed, buf + size would too.
|
||||
const size_t size2 = skip_start + size;
|
||||
|
||||
// Masks to be used with _mm_blendv_epi8 and _mm_shuffle_epi8:
|
||||
// The first skip_start or skip_end bytes in the vectors will have
|
||||
// the high bit (0x80) set. _mm_blendv_epi8 and _mm_shuffle_epi8
|
||||
// will produce zeros for these positions. (Bitwise-xor of these
|
||||
// masks with vsign will produce the opposite behavior.)
|
||||
const __m128i mask_start
|
||||
= _mm_sub_epi8(vramp, _mm_set1_epi8((char)skip_start));
|
||||
const __m128i mask_end
|
||||
= _mm_sub_epi8(vramp, _mm_set1_epi8((char)skip_end));
|
||||
|
||||
// Get the first 1-16 bytes into data0. If loading less than 16
|
||||
// bytes, the bytes are loaded to the high bits of the vector and
|
||||
// the least significant positions are filled with zeros.
|
||||
const __m128i data0 = _mm_blendv_epi8(_mm_load_si128(aligned_buf),
|
||||
_mm_setzero_si128(), mask_start);
|
||||
aligned_buf++;
|
||||
|
||||
__m128i v2, v3;
|
||||
|
||||
#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS
|
||||
if (size <= 16) {
|
||||
// Right-shift initial_crc by 1-16 bytes based on "size"
|
||||
// and store the result in v1 (high bytes) and v0 (low bytes).
|
||||
//
|
||||
// NOTE: The highest 8 bytes of initial_crc are zeros so
|
||||
// v1 will be filled with zeros if size >= 8. The highest
|
||||
// 8 bytes of v1 will always become zeros.
|
||||
//
|
||||
// [ v1 ][ v0 ]
|
||||
// [ initial_crc ] size == 1
|
||||
// [ initial_crc ] size == 2
|
||||
// [ initial_crc ] size == 15
|
||||
// [ initial_crc ] size == 16 (all in v0)
|
||||
const __m128i mask_low = _mm_add_epi8(
|
||||
vramp, _mm_set1_epi8((char)(size - 16)));
|
||||
MASK_LH(initial_crc, mask_low, *v0, *v1);
|
||||
|
||||
if (size2 <= 16) {
|
||||
// There are 1-16 bytes of input and it is all
|
||||
// in data0. Copy the input bytes to v3. If there
|
||||
// are fewer than 16 bytes, the low bytes in v3
|
||||
// will be filled with zeros. That is, the input
|
||||
// bytes are stored to the same position as
|
||||
// (part of) initial_crc is in v0.
|
||||
MASK_L(data0, mask_end, v3);
|
||||
} else {
|
||||
// There are 2-16 bytes of input but not all bytes
|
||||
// are in data0.
|
||||
const __m128i data1 = _mm_load_si128(aligned_buf);
|
||||
|
||||
// Collect the 2-16 input bytes from data0 and data1
|
||||
// to v2 and v3, and bitwise-xor them with the
|
||||
// low bits of initial_crc in v0. Note that the
|
||||
// the second xor is below this else-block as it
|
||||
// is shared with the other branch.
|
||||
MASK_H(data0, mask_end, v2);
|
||||
MASK_L(data1, mask_end, v3);
|
||||
*v0 = _mm_xor_si128(*v0, v2);
|
||||
}
|
||||
|
||||
*v0 = _mm_xor_si128(*v0, v3);
|
||||
*v1 = _mm_alignr_epi8(*v1, *v0, 8);
|
||||
} else
|
||||
// GCC and Clang would produce good code with _mm_set_epi64x
|
||||
// but MSVC needs _mm_cvtsi64_si128 on x86-64.
|
||||
#if defined(__i386__) || defined(_M_IX86)
|
||||
# define my_set_low64(a) _mm_set_epi64x(0, (a))
|
||||
#else
|
||||
# define my_set_low64(a) _mm_cvtsi64_si128(a)
|
||||
#endif
|
||||
{
|
||||
// There is more than 16 bytes of input.
|
||||
const __m128i data1 = _mm_load_si128(aligned_buf);
|
||||
const __m128i *end = (const __m128i*)(
|
||||
(const char *)aligned_buf - 16 + size2);
|
||||
aligned_buf++;
|
||||
|
||||
MASK_LH(initial_crc, mask_start, *v0, *v1);
|
||||
*v0 = _mm_xor_si128(*v0, data0);
|
||||
*v1 = _mm_xor_si128(*v1, data1);
|
||||
|
||||
while (aligned_buf < end) {
|
||||
*v1 = _mm_xor_si128(*v1, _mm_clmulepi64_si128(
|
||||
*v0, vfold16, 0x00));
|
||||
*v0 = _mm_xor_si128(*v1, _mm_clmulepi64_si128(
|
||||
*v0, vfold16, 0x11));
|
||||
*v1 = _mm_load_si128(aligned_buf++);
|
||||
}
|
||||
|
||||
if (aligned_buf != end) {
|
||||
MASK_H(*v0, mask_end, v2);
|
||||
MASK_L(*v0, mask_end, *v0);
|
||||
MASK_L(*v1, mask_end, v3);
|
||||
*v1 = _mm_or_si128(v2, v3);
|
||||
}
|
||||
|
||||
*v1 = _mm_xor_si128(*v1, _mm_clmulepi64_si128(
|
||||
*v0, vfold16, 0x00));
|
||||
*v0 = _mm_xor_si128(*v1, _mm_clmulepi64_si128(
|
||||
*v0, vfold16, 0x11));
|
||||
*v1 = _mm_srli_si128(*v0, 8);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/////////////////////
|
||||
// x86 CLMUL CRC32 //
|
||||
/////////////////////
|
||||
// Align it so that the whole array is within the same cache line.
|
||||
// More than one unaligned load can be done from this during the
|
||||
// same CRC function call.
|
||||
//
|
||||
// The bytes [0] to [31] are used with AND to clear the low bytes. (With ANDN
|
||||
// those could be used to clear the high bytes too but it's not needed here.)
|
||||
//
|
||||
// The bytes [16] to [47] are for left shifts.
|
||||
// The bytes [32] to [63] are for right shifts.
|
||||
alignas(64)
|
||||
static uint8_t vmasks[64] = {
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
};
|
||||
|
||||
/*
|
||||
// These functions were used to generate the constants
|
||||
// at the top of crc32_arch_optimized().
|
||||
static uint64_t
|
||||
calc_lo(uint64_t p, uint64_t a, int n)
|
||||
|
||||
// *Unaligned* 128-bit load
|
||||
crc_attr_target
|
||||
static inline __m128i
|
||||
my_load128(const uint8_t *p)
|
||||
{
|
||||
uint64_t b = 0; int i;
|
||||
for (i = 0; i < n; i++) {
|
||||
b = b >> 1 | (a & 1) << (n - 1);
|
||||
a = (a >> 1) ^ ((0 - (a & 1)) & p);
|
||||
}
|
||||
return b;
|
||||
return _mm_loadu_si128((const __m128i *)p);
|
||||
}
|
||||
|
||||
// same as ~crc(&a, sizeof(a), ~0)
|
||||
static uint64_t
|
||||
calc_hi(uint64_t p, uint64_t a, int n)
|
||||
|
||||
// Keep the highest "count" bytes as is and clear the remaining low bytes.
|
||||
crc_attr_target
|
||||
static inline __m128i
|
||||
keep_high_bytes(__m128i v, size_t count)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; i++)
|
||||
a = (a >> 1) ^ ((0 - (a & 1)) & p);
|
||||
return a;
|
||||
return _mm_and_si128(my_load128((vmasks + count)), v);
|
||||
}
|
||||
|
||||
|
||||
// Shift the 128-bit value left by "amount" bytes (not bits).
|
||||
crc_attr_target
|
||||
static inline __m128i
|
||||
shift_left(__m128i v, size_t amount)
|
||||
{
|
||||
return _mm_shuffle_epi8(v, my_load128((vmasks + 32 - amount)));
|
||||
}
|
||||
|
||||
|
||||
// Shift the 128-bit value right by "amount" bytes (not bits).
|
||||
crc_attr_target
|
||||
static inline __m128i
|
||||
shift_right(__m128i v, size_t amount)
|
||||
{
|
||||
return _mm_shuffle_epi8(v, my_load128((vmasks + 32 + amount)));
|
||||
}
|
||||
*/
|
||||
|
||||
#ifdef BUILDING_CRC32_CLMUL
|
||||
|
||||
crc_attr_target
|
||||
crc_attr_no_sanitize_address
|
||||
static inline __m128i
|
||||
fold(__m128i v, __m128i k)
|
||||
{
|
||||
__m128i a = _mm_clmulepi64_si128(v, k, 0x00);
|
||||
__m128i b = _mm_clmulepi64_si128(v, k, 0x11);
|
||||
return _mm_xor_si128(a, b);
|
||||
}
|
||||
|
||||
|
||||
crc_attr_target
|
||||
static inline __m128i
|
||||
fold_xor(__m128i v, __m128i k, const uint8_t *buf)
|
||||
{
|
||||
return _mm_xor_si128(my_load128(buf), fold(v, k));
|
||||
}
|
||||
|
||||
|
||||
#if BUILDING_CRC_CLMUL == 32
|
||||
crc_attr_target
|
||||
static uint32_t
|
||||
crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc)
|
||||
{
|
||||
#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS
|
||||
// The code assumes that there is at least one byte of input.
|
||||
if (size == 0)
|
||||
return crc;
|
||||
#endif
|
||||
|
||||
// uint32_t poly = 0xedb88320;
|
||||
const int64_t p = 0x1db710640; // p << 1
|
||||
const int64_t mu = 0x1f7011641; // calc_lo(p, p, 32) << 1 | 1
|
||||
const int64_t k5 = 0x163cd6124; // calc_hi(p, p, 32) << 1
|
||||
const int64_t k4 = 0x0ccaa009e; // calc_hi(p, p, 64) << 1
|
||||
const int64_t k3 = 0x1751997d0; // calc_hi(p, p, 128) << 1
|
||||
|
||||
const __m128i vfold4 = _mm_set_epi64x(mu, p);
|
||||
const __m128i vfold8 = _mm_set_epi64x(0, k5);
|
||||
const __m128i vfold16 = _mm_set_epi64x(k4, k3);
|
||||
|
||||
__m128i v0, v1, v2;
|
||||
|
||||
crc_simd_body(buf, size, &v0, &v1, vfold16,
|
||||
_mm_cvtsi32_si128((int32_t)~crc));
|
||||
|
||||
v1 = _mm_xor_si128(
|
||||
_mm_clmulepi64_si128(v0, vfold16, 0x10), v1); // xxx0
|
||||
v2 = _mm_shuffle_epi32(v1, 0xe7); // 0xx0
|
||||
v0 = _mm_slli_epi64(v1, 32); // [0]
|
||||
v0 = _mm_clmulepi64_si128(v0, vfold8, 0x00);
|
||||
v0 = _mm_xor_si128(v0, v2); // [1] [2]
|
||||
v2 = _mm_clmulepi64_si128(v0, vfold4, 0x10);
|
||||
v2 = _mm_clmulepi64_si128(v2, vfold4, 0x00);
|
||||
v0 = _mm_xor_si128(v0, v2); // [2]
|
||||
return ~(uint32_t)_mm_extract_epi32(v0, 2);
|
||||
}
|
||||
#endif // BUILDING_CRC32_CLMUL
|
||||
|
||||
|
||||
/////////////////////
|
||||
// x86 CLMUL CRC64 //
|
||||
/////////////////////
|
||||
|
||||
/*
|
||||
// These functions were used to generate the constants
|
||||
// at the top of crc64_arch_optimized().
|
||||
static uint64_t
|
||||
calc_lo(uint64_t poly)
|
||||
{
|
||||
uint64_t a = poly;
|
||||
uint64_t b = 0;
|
||||
|
||||
for (unsigned i = 0; i < 64; ++i) {
|
||||
b = (b >> 1) | (a << 63);
|
||||
a = (a >> 1) ^ (a & 1 ? poly : 0);
|
||||
}
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
calc_hi(uint64_t poly, uint64_t a)
|
||||
{
|
||||
for (unsigned i = 0; i < 64; ++i)
|
||||
a = (a >> 1) ^ (a & 1 ? poly : 0);
|
||||
|
||||
return a;
|
||||
}
|
||||
*/
|
||||
|
||||
#ifdef BUILDING_CRC64_CLMUL
|
||||
|
||||
// MSVC (VS2015 - VS2022) produces bad 32-bit x86 code from the CLMUL CRC
|
||||
// code when optimizations are enabled (release build). According to the bug
|
||||
// report, the ebx register is corrupted and the calculated result is wrong.
|
||||
// Trying to workaround the problem with "__asm mov ebx, ebx" didn't help.
|
||||
// The following pragma works and performance is still good. x86-64 builds
|
||||
// and CRC32 CLMUL aren't affected by this problem. The problem does not
|
||||
// happen in crc_simd_body() either (which is shared with CRC32 CLMUL anyway).
|
||||
//
|
||||
// NOTE: Another pragma after crc64_arch_optimized() restores
|
||||
// the optimizations. If the #if condition here is updated,
|
||||
// the other one must be updated too.
|
||||
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__clang__) \
|
||||
&& defined(_M_IX86)
|
||||
# pragma optimize("g", off)
|
||||
#endif
|
||||
|
||||
#else
|
||||
crc_attr_target
|
||||
crc_attr_no_sanitize_address
|
||||
static uint64_t
|
||||
crc64_arch_optimized(const uint8_t *buf, size_t size, uint64_t crc)
|
||||
#endif
|
||||
{
|
||||
#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS
|
||||
// The code assumes that there is at least one byte of input.
|
||||
// We will assume that there is at least one byte of input.
|
||||
if (size == 0)
|
||||
return crc;
|
||||
|
||||
// See crc_clmul_consts_gen.c.
|
||||
#if BUILDING_CRC_CLMUL == 32
|
||||
const __m128i fold512 = _mm_set_epi64x(0x1d9513d7, 0x8f352d95);
|
||||
const __m128i fold128 = _mm_set_epi64x(0xccaa009e, 0xae689191);
|
||||
const __m128i mu_p = _mm_set_epi64x(
|
||||
(int64_t)0xb4e5b025f7011641, 0x1db710640);
|
||||
#else
|
||||
const __m128i fold512 = _mm_set_epi64x(
|
||||
(int64_t)0x081f6054a7842df4, (int64_t)0x6ae3efbb9dd441f3);
|
||||
|
||||
const __m128i fold128 = _mm_set_epi64x(
|
||||
(int64_t)0xdabe95afc7875f40, (int64_t)0xe05dd497ca393ae4);
|
||||
|
||||
const __m128i mu_p = _mm_set_epi64x(
|
||||
(int64_t)0x9c3e466c172963d5, (int64_t)0x92d8af2baf0e1e84);
|
||||
#endif
|
||||
|
||||
// const uint64_t poly = 0xc96c5795d7870f42; // CRC polynomial
|
||||
const uint64_t p = 0x92d8af2baf0e1e85; // (poly << 1) | 1
|
||||
const uint64_t mu = 0x9c3e466c172963d5; // (calc_lo(poly) << 1) | 1
|
||||
const uint64_t k2 = 0xdabe95afc7875f40; // calc_hi(poly, 1)
|
||||
const uint64_t k1 = 0xe05dd497ca393ae4; // calc_hi(poly, k2)
|
||||
__m128i v0, v1, v2, v3;
|
||||
|
||||
const __m128i vfold8 = _mm_set_epi64x((int64_t)p, (int64_t)mu);
|
||||
const __m128i vfold16 = _mm_set_epi64x((int64_t)k2, (int64_t)k1);
|
||||
crc = ~crc;
|
||||
|
||||
__m128i v0, v1, v2;
|
||||
if (size < 8) {
|
||||
uint64_t x = crc;
|
||||
size_t i = 0;
|
||||
|
||||
// Checking the bit instead of comparing the size means
|
||||
// that we don't need to update the size between the steps.
|
||||
if (size & 4) {
|
||||
x ^= read32le(buf);
|
||||
buf += 4;
|
||||
i = 32;
|
||||
}
|
||||
|
||||
if (size & 2) {
|
||||
x ^= (uint64_t)read16le(buf) << i;
|
||||
buf += 2;
|
||||
i += 16;
|
||||
}
|
||||
|
||||
if (size & 1)
|
||||
x ^= (uint64_t)*buf << i;
|
||||
|
||||
v0 = my_set_low64((int64_t)x);
|
||||
v0 = shift_left(v0, 8 - size);
|
||||
|
||||
} else if (size < 16) {
|
||||
v0 = my_set_low64((int64_t)(crc ^ read64le(buf)));
|
||||
|
||||
// NOTE: buf is intentionally left 8 bytes behind so that
|
||||
// we can read the last 1-7 bytes with read64le(buf + size).
|
||||
size -= 8;
|
||||
|
||||
// Handling 8-byte input specially is a speed optimization
|
||||
// as the clmul can be skipped. A branch is also needed to
|
||||
// avoid a too high shift amount.
|
||||
if (size > 0) {
|
||||
const size_t padding = 8 - size;
|
||||
uint64_t high = read64le(buf + size) >> (padding * 8);
|
||||
|
||||
#if defined(__i386__) || defined(_M_IX86)
|
||||
crc_simd_body(buf, size, &v0, &v1, vfold16,
|
||||
_mm_set_epi64x(0, (int64_t)~crc));
|
||||
// Simple but likely not the best code for 32-bit x86.
|
||||
v0 = _mm_insert_epi32(v0, (int32_t)high, 2);
|
||||
v0 = _mm_insert_epi32(v0, (int32_t)(high >> 32), 3);
|
||||
#else
|
||||
// GCC and Clang would produce good code with _mm_set_epi64x
|
||||
// but MSVC needs _mm_cvtsi64_si128 on x86-64.
|
||||
crc_simd_body(buf, size, &v0, &v1, vfold16,
|
||||
_mm_cvtsi64_si128((int64_t)~crc));
|
||||
v0 = _mm_insert_epi64(v0, (int64_t)high, 1);
|
||||
#endif
|
||||
|
||||
v1 = _mm_xor_si128(_mm_clmulepi64_si128(v0, vfold16, 0x10), v1);
|
||||
v0 = _mm_clmulepi64_si128(v1, vfold8, 0x00);
|
||||
v2 = _mm_clmulepi64_si128(v0, vfold8, 0x10);
|
||||
v0 = _mm_xor_si128(_mm_xor_si128(v1, _mm_slli_si128(v0, 8)), v2);
|
||||
v0 = shift_left(v0, padding);
|
||||
|
||||
v1 = _mm_srli_si128(v0, 8);
|
||||
v0 = _mm_clmulepi64_si128(v0, fold128, 0x10);
|
||||
v0 = _mm_xor_si128(v0, v1);
|
||||
}
|
||||
} else {
|
||||
v0 = my_set_low64((int64_t)crc);
|
||||
|
||||
// To align or not to align the buf pointer? If the end of
|
||||
// the buffer isn't aligned, aligning the pointer here would
|
||||
// make us do an extra folding step with the associated byte
|
||||
// shuffling overhead. The cost of that would need to be
|
||||
// lower than the benefit of aligned reads. Testing on an old
|
||||
// Intel Ivy Bridge processor suggested that aligning isn't
|
||||
// worth the cost but it likely depends on the processor and
|
||||
// buffer size. Unaligned loads (MOVDQU) should be fast on
|
||||
// x86 processors that support PCLMULQDQ, so we don't align
|
||||
// the buf pointer here.
|
||||
|
||||
// Read the first (and possibly the only) full 16 bytes.
|
||||
v0 = _mm_xor_si128(v0, my_load128(buf));
|
||||
buf += 16;
|
||||
size -= 16;
|
||||
|
||||
if (size >= 48) {
|
||||
v1 = my_load128(buf);
|
||||
v2 = my_load128(buf + 16);
|
||||
v3 = my_load128(buf + 32);
|
||||
buf += 48;
|
||||
size -= 48;
|
||||
|
||||
while (size >= 64) {
|
||||
v0 = fold_xor(v0, fold512, buf);
|
||||
v1 = fold_xor(v1, fold512, buf + 16);
|
||||
v2 = fold_xor(v2, fold512, buf + 32);
|
||||
v3 = fold_xor(v3, fold512, buf + 48);
|
||||
buf += 64;
|
||||
size -= 64;
|
||||
}
|
||||
|
||||
v0 = _mm_xor_si128(v1, fold(v0, fold128));
|
||||
v0 = _mm_xor_si128(v2, fold(v0, fold128));
|
||||
v0 = _mm_xor_si128(v3, fold(v0, fold128));
|
||||
}
|
||||
|
||||
while (size >= 16) {
|
||||
v0 = fold_xor(v0, fold128, buf);
|
||||
buf += 16;
|
||||
size -= 16;
|
||||
}
|
||||
|
||||
if (size > 0) {
|
||||
// We want the last "size" number of input bytes to
|
||||
// be at the high bits of v1. First do a full 16-byte
|
||||
// load and then mask the low bytes to zeros.
|
||||
v1 = my_load128(buf + size - 16);
|
||||
v1 = keep_high_bytes(v1, size);
|
||||
|
||||
// Shift high bytes from v0 to the low bytes of v1.
|
||||
//
|
||||
// Alternatively we could replace the combination
|
||||
// keep_high_bytes + shift_right + _mm_or_si128 with
|
||||
// _mm_shuffle_epi8 + _mm_blendv_epi8 but that would
|
||||
// require larger tables for the masks. Now there are
|
||||
// three loads (instead of two) from the mask tables
|
||||
// but they all are from the same cache line.
|
||||
v1 = _mm_or_si128(v1, shift_right(v0, size));
|
||||
|
||||
// Shift high bytes of v0 away, padding the
|
||||
// low bytes with zeros.
|
||||
v0 = shift_left(v0, 16 - size);
|
||||
|
||||
v0 = _mm_xor_si128(v1, fold(v0, fold128));
|
||||
}
|
||||
|
||||
v1 = _mm_srli_si128(v0, 8);
|
||||
v0 = _mm_clmulepi64_si128(v0, fold128, 0x10);
|
||||
v0 = _mm_xor_si128(v0, v1);
|
||||
}
|
||||
|
||||
// Barrett reduction
|
||||
|
||||
#if BUILDING_CRC_CLMUL == 32
|
||||
v1 = _mm_clmulepi64_si128(v0, mu_p, 0x10); // v0 * mu
|
||||
v1 = _mm_clmulepi64_si128(v1, mu_p, 0x00); // v1 * p
|
||||
v0 = _mm_xor_si128(v0, v1);
|
||||
return ~(uint32_t)_mm_extract_epi32(v0, 2);
|
||||
#else
|
||||
// Because p is 65 bits but one bit doesn't fit into the 64-bit
|
||||
// half of __m128i, finish the second clmul by shifting v1 left
|
||||
// by 64 bits and xorring it to the final result.
|
||||
v1 = _mm_clmulepi64_si128(v0, mu_p, 0x10); // v0 * mu
|
||||
v2 = _mm_slli_si128(v1, 8);
|
||||
v1 = _mm_clmulepi64_si128(v1, mu_p, 0x00); // v1 * p
|
||||
v0 = _mm_xor_si128(v0, v2);
|
||||
v0 = _mm_xor_si128(v0, v1);
|
||||
#if defined(__i386__) || defined(_M_IX86)
|
||||
return ~(((uint64_t)(uint32_t)_mm_extract_epi32(v0, 3) << 32) |
|
||||
(uint64_t)(uint32_t)_mm_extract_epi32(v0, 2));
|
||||
#else
|
||||
return ~(uint64_t)_mm_extract_epi64(v0, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__clang__) \
|
||||
&& defined(_M_IX86)
|
||||
# pragma optimize("", on)
|
||||
#endif
|
||||
|
||||
#endif // BUILDING_CRC64_CLMUL
|
||||
}
|
||||
|
||||
|
||||
// Even though this is an inline function, compile it only when needed.
|
||||
|
|
|
|||
|
|
@ -134,8 +134,7 @@ alone_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
|
||||
coder->pos = 0;
|
||||
coder->sequence = SEQ_CODER_INIT;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_CODER_INIT: {
|
||||
if (coder->memusage > coder->memlimit)
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ auto_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
return LZMA_GET_CHECK;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_CODE: {
|
||||
const lzma_ret ret = coder->next.code(
|
||||
|
|
@ -91,10 +91,9 @@ auto_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
return ret;
|
||||
|
||||
coder->sequence = SEQ_FINISH;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_FINISH:
|
||||
// When LZMA_CONCATENATED was used and we were decoding
|
||||
// a LZMA_Alone file, we need to check that there is no
|
||||
|
|
|
|||
|
|
@ -146,10 +146,9 @@ block_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
coder->block->uncompressed_size = coder->uncompressed_size;
|
||||
|
||||
coder->sequence = SEQ_PADDING;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_PADDING:
|
||||
// Compressed Data is padded to a multiple of four bytes.
|
||||
while (coder->compressed_size & 3) {
|
||||
|
|
@ -173,8 +172,7 @@ block_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
lzma_check_finish(&coder->check, coder->block->check);
|
||||
|
||||
coder->sequence = SEQ_CHECK;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_CHECK: {
|
||||
const size_t check_size = lzma_check_size(coder->block->check);
|
||||
|
|
|
|||
|
|
@ -94,10 +94,9 @@ block_encode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
coder->block->uncompressed_size = coder->uncompressed_size;
|
||||
|
||||
coder->sequence = SEQ_PADDING;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_PADDING:
|
||||
// Pad Compressed Data to a multiple of four bytes. We can
|
||||
// use coder->compressed_size for this since we don't need
|
||||
|
|
@ -117,8 +116,7 @@ block_encode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
lzma_check_finish(&coder->check, coder->block->check);
|
||||
|
||||
coder->sequence = SEQ_CHECK;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_CHECK: {
|
||||
const size_t check_size = lzma_check_size(coder->block->check);
|
||||
|
|
|
|||
|
|
@ -96,6 +96,12 @@ lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos,
|
|||
size_t in_size, uint8_t *restrict out,
|
||||
size_t *restrict out_pos, size_t out_size)
|
||||
{
|
||||
assert(in != NULL || *in_pos == in_size);
|
||||
assert(out != NULL || *out_pos == out_size);
|
||||
|
||||
assert(*in_pos <= in_size);
|
||||
assert(*out_pos <= out_size);
|
||||
|
||||
const size_t in_avail = in_size - *in_pos;
|
||||
const size_t out_avail = out_size - *out_pos;
|
||||
const size_t copy_size = my_min(in_avail, out_avail);
|
||||
|
|
@ -348,7 +354,7 @@ lzma_code(lzma_stream *strm, lzma_action action)
|
|||
else
|
||||
strm->internal->sequence = ISEQ_END;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case LZMA_NO_CHECK:
|
||||
case LZMA_UNSUPPORTED_CHECK:
|
||||
|
|
|
|||
|
|
@ -298,15 +298,13 @@ file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
// Start looking for Stream Padding and Stream Footer
|
||||
// at the end of the file.
|
||||
coder->file_target_pos = coder->file_size;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_PADDING_SEEK:
|
||||
coder->sequence = SEQ_PADDING_DECODE;
|
||||
return_if_error(reverse_seek(
|
||||
coder, in_start, in_pos, in_size));
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_PADDING_DECODE: {
|
||||
// Copy to coder->temp first. This keeps the code simpler if
|
||||
|
|
@ -356,9 +354,9 @@ file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
if (coder->temp_size < LZMA_STREAM_HEADER_SIZE)
|
||||
return_if_error(reverse_seek(
|
||||
coder, in_start, in_pos, in_size));
|
||||
}
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
case SEQ_FOOTER:
|
||||
// Copy the Stream Footer field into coder->temp.
|
||||
|
|
@ -414,7 +412,7 @@ file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
return LZMA_SEEK_NEEDED;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_INDEX_INIT: {
|
||||
// Calculate the amount of memory already used by the earlier
|
||||
|
|
@ -444,10 +442,9 @@ file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
|
||||
coder->index_remaining = coder->footer_flags.backward_size;
|
||||
coder->sequence = SEQ_INDEX_DECODE;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_INDEX_DECODE: {
|
||||
// Decode (a part of) the Index. If the whole Index is already
|
||||
// in coder->temp, read it from there. Otherwise read from
|
||||
|
|
@ -574,9 +571,9 @@ file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
return_if_error(reverse_seek(coder,
|
||||
in_start, in_pos, in_size));
|
||||
}
|
||||
}
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
case SEQ_HEADER_DECODE:
|
||||
// Copy the Stream Header field into coder->temp.
|
||||
|
|
@ -596,8 +593,7 @@ file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
coder->temp + coder->temp_size)));
|
||||
|
||||
coder->sequence = SEQ_HEADER_COMPARE;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_HEADER_COMPARE:
|
||||
// Compare Stream Header against Stream Footer. They must
|
||||
|
|
|
|||
|
|
@ -93,8 +93,7 @@ index_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
|
||||
coder->pos = 0;
|
||||
coder->sequence = SEQ_MEMUSAGE;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_MEMUSAGE:
|
||||
if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
|
||||
|
|
@ -153,8 +152,7 @@ index_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
case SEQ_PADDING_INIT:
|
||||
coder->pos = lzma_index_padding_size(coder->index);
|
||||
coder->sequence = SEQ_PADDING;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_PADDING:
|
||||
if (coder->pos > 0) {
|
||||
|
|
@ -170,8 +168,7 @@ index_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
*in_pos - in_start, coder->crc32);
|
||||
|
||||
coder->sequence = SEQ_CRC32;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_CRC32:
|
||||
do {
|
||||
|
|
|
|||
|
|
@ -93,8 +93,7 @@ index_encode(void *coder_ptr,
|
|||
}
|
||||
|
||||
coder->sequence = SEQ_UNPADDED;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_UNPADDED:
|
||||
case SEQ_UNCOMPRESSED: {
|
||||
|
|
@ -127,8 +126,7 @@ index_encode(void *coder_ptr,
|
|||
*out_pos - out_start, coder->crc32);
|
||||
|
||||
coder->sequence = SEQ_CRC32;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_CRC32:
|
||||
// We don't use the main loop, because we don't want
|
||||
|
|
|
|||
|
|
@ -267,9 +267,9 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
|
|||
index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded(
|
||||
index_hash->records.count,
|
||||
index_hash->records.index_list_size)) & 3;
|
||||
index_hash->sequence = SEQ_PADDING;
|
||||
|
||||
// Fall through
|
||||
index_hash->sequence = SEQ_PADDING;
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_PADDING:
|
||||
if (index_hash->pos > 0) {
|
||||
|
|
@ -302,8 +302,7 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
|
|||
*in_pos - in_start, index_hash->crc32);
|
||||
|
||||
index_hash->sequence = SEQ_CRC32;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_CRC32:
|
||||
do {
|
||||
|
|
|
|||
|
|
@ -150,10 +150,9 @@ lzip_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
coder->member_size = sizeof(lzip_id_string);
|
||||
|
||||
coder->sequence = SEQ_VERSION;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_VERSION:
|
||||
if (*in_pos >= in_size)
|
||||
return LZMA_OK;
|
||||
|
|
@ -173,7 +172,7 @@ lzip_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
if (coder->tell_any_check)
|
||||
return LZMA_GET_CHECK;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_DICT_SIZE: {
|
||||
if (*in_pos >= in_size)
|
||||
|
|
@ -220,10 +219,9 @@ lzip_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
// LZMA_MEMLIMIT_ERROR we need to be able to restart after
|
||||
// the memlimit has been increased.
|
||||
coder->sequence = SEQ_CODER_INIT;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_CODER_INIT: {
|
||||
if (coder->memusage > coder->memlimit)
|
||||
return LZMA_MEMLIMIT_ERROR;
|
||||
|
|
@ -243,10 +241,9 @@ lzip_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
|
||||
coder->crc32 = 0;
|
||||
coder->sequence = SEQ_LZMA_STREAM;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_LZMA_STREAM: {
|
||||
const size_t in_start = *in_pos;
|
||||
const size_t out_start = *out_pos;
|
||||
|
|
@ -273,10 +270,9 @@ lzip_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
return ret;
|
||||
|
||||
coder->sequence = SEQ_MEMBER_FOOTER;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_MEMBER_FOOTER: {
|
||||
// The footer of .lz version 0 lacks the Member size field.
|
||||
// This is the only difference between version 0 and
|
||||
|
|
|
|||
|
|
@ -58,15 +58,13 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2,
|
|||
|
||||
#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
|
||||
&& (((TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) \
|
||||
&& (defined(__x86_64__) \
|
||||
|| defined(__aarch64__))) \
|
||||
&& SIZE_MAX == UINT64_MAX) \
|
||||
|| (defined(__INTEL_COMPILER) && defined(__x86_64__)) \
|
||||
|| (defined(__INTEL_COMPILER) && defined(_M_X64)) \
|
||||
|| (defined(_MSC_VER) && (defined(_M_X64) \
|
||||
|| defined(_M_ARM64) || defined(_M_ARM64EC))))
|
||||
// This is only for x86-64 and ARM64 for now. This might be fine on
|
||||
// other 64-bit processors too. On big endian one should use xor
|
||||
// instead of subtraction and switch to __builtin_clzll().
|
||||
// other 64-bit processors too.
|
||||
//
|
||||
// Reasons to use subtraction instead of xor:
|
||||
//
|
||||
|
|
@ -82,7 +80,11 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2,
|
|||
// version 2023-05-26. https://www.agner.org/optimize/
|
||||
#define LZMA_MEMCMPLEN_EXTRA 8
|
||||
while (len < limit) {
|
||||
# ifdef WORDS_BIGENDIAN
|
||||
const uint64_t x = read64ne(buf1 + len) ^ read64ne(buf2 + len);
|
||||
# else
|
||||
const uint64_t x = read64ne(buf1 + len) - read64ne(buf2 + len);
|
||||
# endif
|
||||
if (x != 0) {
|
||||
// MSVC or Intel C compiler on Windows
|
||||
# if defined(_MSC_VER) || defined(__INTEL_COMPILER)
|
||||
|
|
@ -90,6 +92,8 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2,
|
|||
_BitScanForward64(&tmp, x);
|
||||
len += (uint32_t)tmp >> 3;
|
||||
// GCC, Clang, or Intel C compiler
|
||||
# elif defined(WORDS_BIGENDIAN)
|
||||
len += (uint32_t)__builtin_clzll(x) >> 3;
|
||||
# else
|
||||
len += (uint32_t)__builtin_ctzll(x) >> 3;
|
||||
# endif
|
||||
|
|
|
|||
|
|
@ -154,9 +154,9 @@ stream_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
|
||||
if (coder->tell_any_check)
|
||||
return LZMA_GET_CHECK;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
case SEQ_BLOCK_HEADER: {
|
||||
if (*in_pos >= in_size)
|
||||
|
|
@ -187,10 +187,9 @@ stream_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
|
||||
coder->pos = 0;
|
||||
coder->sequence = SEQ_BLOCK_INIT;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_BLOCK_INIT: {
|
||||
// Checking memusage and doing the initialization needs
|
||||
// its own sequence point because we need to be able to
|
||||
|
|
@ -252,10 +251,9 @@ stream_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
return ret;
|
||||
|
||||
coder->sequence = SEQ_BLOCK_RUN;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_BLOCK_RUN: {
|
||||
const lzma_ret ret = coder->block_decoder.code(
|
||||
coder->block_decoder.coder, allocator,
|
||||
|
|
@ -291,10 +289,9 @@ stream_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
return ret;
|
||||
|
||||
coder->sequence = SEQ_STREAM_FOOTER;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_STREAM_FOOTER: {
|
||||
// Copy the Stream Footer to the internal buffer.
|
||||
lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
|
||||
|
|
@ -331,10 +328,9 @@ stream_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|||
return LZMA_STREAM_END;
|
||||
|
||||
coder->sequence = SEQ_STREAM_PADDING;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_STREAM_PADDING:
|
||||
assert(coder->concatenated);
|
||||
|
||||
|
|
|
|||
|
|
@ -23,15 +23,10 @@ typedef enum {
|
|||
THR_IDLE,
|
||||
|
||||
/// Decoding is in progress.
|
||||
/// Main thread may change this to THR_STOP or THR_EXIT.
|
||||
/// Main thread may change this to THR_IDLE or THR_EXIT.
|
||||
/// The worker thread may change this to THR_IDLE.
|
||||
THR_RUN,
|
||||
|
||||
/// The main thread wants the thread to stop whatever it was doing
|
||||
/// but not exit. Main thread may change this to THR_EXIT.
|
||||
/// The worker thread may change this to THR_IDLE.
|
||||
THR_STOP,
|
||||
|
||||
/// The main thread wants the thread to exit.
|
||||
THR_EXIT,
|
||||
|
||||
|
|
@ -346,27 +341,6 @@ worker_enable_partial_update(void *thr_ptr)
|
|||
}
|
||||
|
||||
|
||||
/// Things do to at THR_STOP or when finishing a Block.
|
||||
/// This is called with thr->mutex locked.
|
||||
static void
|
||||
worker_stop(struct worker_thread *thr)
|
||||
{
|
||||
// Update memory usage counters.
|
||||
thr->coder->mem_in_use -= thr->in_size;
|
||||
thr->in_size = 0; // thr->in was freed above.
|
||||
|
||||
thr->coder->mem_in_use -= thr->mem_filters;
|
||||
thr->coder->mem_cached += thr->mem_filters;
|
||||
|
||||
// Put this thread to the stack of free threads.
|
||||
thr->next = thr->coder->threads_free;
|
||||
thr->coder->threads_free = thr;
|
||||
|
||||
mythread_cond_signal(&thr->coder->cond);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
static MYTHREAD_RET_TYPE
|
||||
worker_decoder(void *thr_ptr)
|
||||
{
|
||||
|
|
@ -397,17 +371,6 @@ next_loop_unlocked:
|
|||
return MYTHREAD_RET_VALUE;
|
||||
}
|
||||
|
||||
if (thr->state == THR_STOP) {
|
||||
thr->state = THR_IDLE;
|
||||
mythread_mutex_unlock(&thr->mutex);
|
||||
|
||||
mythread_sync(thr->coder->mutex) {
|
||||
worker_stop(thr);
|
||||
}
|
||||
|
||||
goto next_loop_lock;
|
||||
}
|
||||
|
||||
assert(thr->state == THR_RUN);
|
||||
|
||||
// Update progress info for get_progress().
|
||||
|
|
@ -472,8 +435,7 @@ next_loop_unlocked:
|
|||
}
|
||||
|
||||
// Either we finished successfully (LZMA_STREAM_END) or an error
|
||||
// occurred. Both cases are handled almost identically. The error
|
||||
// case requires updating thr->coder->thread_error.
|
||||
// occurred.
|
||||
//
|
||||
// The sizes are in the Block Header and the Block decoder
|
||||
// checks that they match, thus we know these:
|
||||
|
|
@ -481,16 +443,30 @@ next_loop_unlocked:
|
|||
assert(ret != LZMA_STREAM_END
|
||||
|| thr->out_pos == thr->block_options.uncompressed_size);
|
||||
|
||||
// Free the input buffer. Don't update in_size as we need
|
||||
// it later to update thr->coder->mem_in_use.
|
||||
lzma_free(thr->in, thr->allocator);
|
||||
thr->in = NULL;
|
||||
|
||||
mythread_sync(thr->mutex) {
|
||||
// Block decoder ensures this, but do a sanity check anyway
|
||||
// because thr->in_filled < thr->in_size means that the main
|
||||
// thread is still writing to thr->in.
|
||||
if (ret == LZMA_STREAM_END && thr->in_filled != thr->in_size) {
|
||||
assert(0);
|
||||
ret = LZMA_PROG_ERROR;
|
||||
}
|
||||
|
||||
if (thr->state != THR_EXIT)
|
||||
thr->state = THR_IDLE;
|
||||
}
|
||||
|
||||
// Free the input buffer. Don't update in_size as we need
|
||||
// it later to update thr->coder->mem_in_use.
|
||||
//
|
||||
// This step is skipped if an error occurred because the main thread
|
||||
// might still be writing to thr->in. The memory will be freed after
|
||||
// threads_end() sets thr->state = THR_EXIT.
|
||||
if (ret == LZMA_STREAM_END) {
|
||||
lzma_free(thr->in, thr->allocator);
|
||||
thr->in = NULL;
|
||||
}
|
||||
|
||||
mythread_sync(thr->coder->mutex) {
|
||||
// Move our progress info to the main thread.
|
||||
thr->coder->progress_in += thr->in_pos;
|
||||
|
|
@ -510,7 +486,20 @@ next_loop_unlocked:
|
|||
&& thr->coder->thread_error == LZMA_OK)
|
||||
thr->coder->thread_error = ret;
|
||||
|
||||
worker_stop(thr);
|
||||
// Return the worker thread to the stack of available
|
||||
// threads only if no errors occurred.
|
||||
if (ret == LZMA_STREAM_END) {
|
||||
// Update memory usage counters.
|
||||
thr->coder->mem_in_use -= thr->in_size;
|
||||
thr->coder->mem_in_use -= thr->mem_filters;
|
||||
thr->coder->mem_cached += thr->mem_filters;
|
||||
|
||||
// Put this thread to the stack of free threads.
|
||||
thr->next = thr->coder->threads_free;
|
||||
thr->coder->threads_free = thr;
|
||||
}
|
||||
|
||||
mythread_cond_signal(&thr->coder->cond);
|
||||
}
|
||||
|
||||
goto next_loop_lock;
|
||||
|
|
@ -544,17 +533,22 @@ threads_end(struct lzma_stream_coder *coder, const lzma_allocator *allocator)
|
|||
}
|
||||
|
||||
|
||||
/// Tell worker threads to stop without doing any cleaning up.
|
||||
/// The clean up will be done when threads_exit() is called;
|
||||
/// it's not possible to reuse the threads after threads_stop().
|
||||
///
|
||||
/// This is called before returning an unrecoverable error code
|
||||
/// to the application. It would be waste of processor time
|
||||
/// to keep the threads running in such a situation.
|
||||
static void
|
||||
threads_stop(struct lzma_stream_coder *coder)
|
||||
{
|
||||
for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
|
||||
// The threads that are in the THR_RUN state will stop
|
||||
// when they check the state the next time. There's no
|
||||
// need to signal coder->threads[i].cond.
|
||||
mythread_sync(coder->threads[i].mutex) {
|
||||
// The state must be changed conditionally because
|
||||
// THR_IDLE -> THR_STOP is not a valid state change.
|
||||
if (coder->threads[i].state != THR_IDLE) {
|
||||
coder->threads[i].state = THR_STOP;
|
||||
mythread_cond_signal(&coder->threads[i].cond);
|
||||
}
|
||||
coder->threads[i].state = THR_IDLE;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1077,9 +1071,9 @@ stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
|
|||
|
||||
if (coder->tell_any_check)
|
||||
return LZMA_GET_CHECK;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
case SEQ_BLOCK_HEADER: {
|
||||
const size_t in_old = *in_pos;
|
||||
|
|
@ -1214,10 +1208,9 @@ stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
|
|||
}
|
||||
|
||||
coder->sequence = SEQ_BLOCK_INIT;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_BLOCK_INIT: {
|
||||
// Check if decoding is possible at all with the current
|
||||
// memlimit_stop which we must never exceed.
|
||||
|
|
@ -1303,10 +1296,9 @@ stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
|
|||
}
|
||||
|
||||
coder->sequence = SEQ_BLOCK_THR_INIT;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_BLOCK_THR_INIT: {
|
||||
// We need to wait for a multiple conditions to become true
|
||||
// until we can initialize the Block decoder and let a worker
|
||||
|
|
@ -1508,10 +1500,9 @@ stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
|
|||
}
|
||||
|
||||
coder->sequence = SEQ_BLOCK_THR_RUN;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_BLOCK_THR_RUN: {
|
||||
if (action == LZMA_FINISH && coder->fail_fast) {
|
||||
// We know that we won't get more input and that
|
||||
|
|
@ -1549,10 +1540,17 @@ stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
|
|||
// Read output from the output queue. Just like in
|
||||
// SEQ_BLOCK_HEADER, we wait to fill the output buffer
|
||||
// only if waiting_allowed was set to true in the beginning
|
||||
// of this function (see the comment there).
|
||||
// of this function (see the comment there) and there is
|
||||
// no input available. In SEQ_BLOCK_HEADER, there is never
|
||||
// input available when read_output_and_wait() is called,
|
||||
// but here there can be when LZMA_FINISH is used, thus we
|
||||
// need to check if *in_pos == in_size. Otherwise we would
|
||||
// wait here instead of using the available input to start
|
||||
// a new thread.
|
||||
return_if_error(read_output_and_wait(coder, allocator,
|
||||
out, out_pos, out_size,
|
||||
NULL, waiting_allowed,
|
||||
NULL,
|
||||
waiting_allowed && *in_pos == in_size,
|
||||
&wait_abs, &has_blocked));
|
||||
|
||||
if (coder->pending_error != LZMA_OK) {
|
||||
|
|
@ -1561,6 +1559,10 @@ stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
|
|||
}
|
||||
|
||||
// Return if the input didn't contain the whole Block.
|
||||
//
|
||||
// NOTE: When we updated coder->thr->in_filled a few lines
|
||||
// above, the worker thread might by now have finished its
|
||||
// work and returned itself back to the stack of free threads.
|
||||
if (coder->thr->in_filled < coder->thr->in_size) {
|
||||
assert(*in_pos == in_size);
|
||||
return LZMA_OK;
|
||||
|
|
@ -1613,10 +1615,9 @@ stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
|
|||
coder->mem_direct_mode = coder->mem_next_filters;
|
||||
|
||||
coder->sequence = SEQ_BLOCK_DIRECT_RUN;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_BLOCK_DIRECT_RUN: {
|
||||
const size_t in_old = *in_pos;
|
||||
const size_t out_old = *out_pos;
|
||||
|
|
@ -1652,8 +1653,7 @@ stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
|
|||
return LZMA_OK;
|
||||
|
||||
coder->sequence = SEQ_INDEX_DECODE;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_INDEX_DECODE: {
|
||||
// If we don't have any input, don't call
|
||||
|
|
@ -1672,10 +1672,9 @@ stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
|
|||
return ret;
|
||||
|
||||
coder->sequence = SEQ_STREAM_FOOTER;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_STREAM_FOOTER: {
|
||||
// Copy the Stream Footer to the internal buffer.
|
||||
const size_t in_old = *in_pos;
|
||||
|
|
@ -1714,10 +1713,9 @@ stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
|
|||
return LZMA_STREAM_END;
|
||||
|
||||
coder->sequence = SEQ_STREAM_PADDING;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_STREAM_PADDING:
|
||||
assert(coder->concatenated);
|
||||
|
||||
|
|
@ -1948,7 +1946,7 @@ stream_decoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
|
|||
// accounting from scratch, too. Changes in filter and block sizes may
|
||||
// affect number of threads.
|
||||
//
|
||||
// FIXME? Reusing should be easy but unlike the single-threaded
|
||||
// Reusing threads doesn't seem worth it. Unlike the single-threaded
|
||||
// decoder, with some types of input file combinations reusing
|
||||
// could leave quite a lot of memory allocated but unused (first
|
||||
// file could allocate a lot, the next files could use fewer
|
||||
|
|
|
|||
|
|
@ -731,8 +731,7 @@ stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator,
|
|||
|
||||
coder->header_pos = 0;
|
||||
coder->sequence = SEQ_BLOCK;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_BLOCK: {
|
||||
// Initialized to silence warnings.
|
||||
|
|
@ -851,9 +850,9 @@ stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator,
|
|||
// to be ready to be copied out.
|
||||
coder->progress_out += lzma_index_size(coder->index)
|
||||
+ LZMA_STREAM_HEADER_SIZE;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
case SEQ_INDEX: {
|
||||
// Call the Index encoder. It doesn't take any input, so
|
||||
|
|
@ -873,10 +872,9 @@ stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator,
|
|||
return LZMA_PROG_ERROR;
|
||||
|
||||
coder->sequence = SEQ_STREAM_FOOTER;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_STREAM_FOOTER:
|
||||
lzma_bufcpy(coder->header, &coder->header_pos,
|
||||
sizeof(coder->header),
|
||||
|
|
|
|||
|
|
@ -12,6 +12,11 @@
|
|||
#include "filter_common.h"
|
||||
|
||||
|
||||
// liblzma itself doesn't use gettext to translate messages.
|
||||
// Mark the strings still so that xz can translate them.
|
||||
#define N_(msgid) msgid
|
||||
|
||||
|
||||
/////////////////////
|
||||
// String building //
|
||||
/////////////////////
|
||||
|
|
@ -317,6 +322,10 @@ parse_lzma12_preset(const char **const str, const char *str_end,
|
|||
uint32_t *preset)
|
||||
{
|
||||
assert(*str < str_end);
|
||||
|
||||
if (!(**str >= '0' && **str <= '9'))
|
||||
return N_("Unsupported preset");
|
||||
|
||||
*preset = (uint32_t)(**str - '0');
|
||||
|
||||
// NOTE: Remember to update LZMA12_PRESET_STR if this is modified!
|
||||
|
|
@ -327,7 +336,7 @@ parse_lzma12_preset(const char **const str, const char *str_end,
|
|||
break;
|
||||
|
||||
default:
|
||||
return "Unsupported preset flag";
|
||||
return N_("Unsupported flag in the preset");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -346,7 +355,7 @@ set_lzma12_preset(const char **const str, const char *str_end,
|
|||
|
||||
lzma_options_lzma *opts = filter_options;
|
||||
if (lzma_lzma_preset(opts, preset))
|
||||
return "Unsupported preset";
|
||||
return N_("Unsupported preset");
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -438,7 +447,7 @@ parse_lzma12(const char **const str, const char *str_end, void *filter_options)
|
|||
return errmsg;
|
||||
|
||||
if (opts->lc + opts->lp > LZMA_LCLP_MAX)
|
||||
return "The sum of lc and lp must not exceed 4";
|
||||
return N_("The sum of lc and lp must not exceed 4");
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -574,21 +583,21 @@ parse_options(const char **const str, const char *str_end,
|
|||
// Fail if the '=' wasn't found or the option name is missing
|
||||
// (the first char is '=').
|
||||
if (equals_sign == NULL || **str == '=')
|
||||
return "Options must be 'name=value' pairs separated "
|
||||
"with commas";
|
||||
return N_("Options must be 'name=value' pairs "
|
||||
"separated with commas");
|
||||
|
||||
// Reject a too long option name so that the memcmp()
|
||||
// in the loop below won't read past the end of the
|
||||
// string in optmap[i].name.
|
||||
const size_t name_len = (size_t)(equals_sign - *str);
|
||||
if (name_len > NAME_LEN_MAX)
|
||||
return "Unknown option name";
|
||||
return N_("Unknown option name");
|
||||
|
||||
// Find the option name from optmap[].
|
||||
size_t i = 0;
|
||||
while (true) {
|
||||
if (i == optmap_size)
|
||||
return "Unknown option name";
|
||||
return N_("Unknown option name");
|
||||
|
||||
if (memcmp(*str, optmap[i].name, name_len) == 0
|
||||
&& optmap[i].name[name_len] == '\0')
|
||||
|
|
@ -605,7 +614,7 @@ parse_options(const char **const str, const char *str_end,
|
|||
// string so check it here.
|
||||
const size_t value_len = (size_t)(name_eq_value_end - *str);
|
||||
if (value_len == 0)
|
||||
return "Option value cannot be empty";
|
||||
return N_("Option value cannot be empty");
|
||||
|
||||
// LZMA1/2 preset has its own parsing function.
|
||||
if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) {
|
||||
|
|
@ -626,14 +635,14 @@ parse_options(const char **const str, const char *str_end,
|
|||
// in the loop below won't read past the end of the
|
||||
// string in optmap[i].u.map[j].name.
|
||||
if (value_len > NAME_LEN_MAX)
|
||||
return "Invalid option value";
|
||||
return N_("Invalid option value");
|
||||
|
||||
const name_value_map *map = optmap[i].u.map;
|
||||
size_t j = 0;
|
||||
while (true) {
|
||||
// The array is terminated with an empty name.
|
||||
if (map[j].name[0] == '\0')
|
||||
return "Invalid option value";
|
||||
return N_("Invalid option value");
|
||||
|
||||
if (memcmp(*str, map[j].name, value_len) == 0
|
||||
&& map[j].name[value_len]
|
||||
|
|
@ -647,7 +656,8 @@ parse_options(const char **const str, const char *str_end,
|
|||
} else if (**str < '0' || **str > '9') {
|
||||
// Note that "max" isn't supported while it is
|
||||
// supported in xz. It's not useful here.
|
||||
return "Value is not a non-negative decimal integer";
|
||||
return N_("Value is not a non-negative "
|
||||
"decimal integer");
|
||||
} else {
|
||||
// strtoul() has locale-specific behavior so it cannot
|
||||
// be relied on to get reproducible results since we
|
||||
|
|
@ -661,13 +671,13 @@ parse_options(const char **const str, const char *str_end,
|
|||
v = 0;
|
||||
do {
|
||||
if (v > UINT32_MAX / 10)
|
||||
return "Value out of range";
|
||||
return N_("Value out of range");
|
||||
|
||||
v *= 10;
|
||||
|
||||
const uint32_t add = (uint32_t)(*p - '0');
|
||||
if (UINT32_MAX - add < v)
|
||||
return "Value out of range";
|
||||
return N_("Value out of range");
|
||||
|
||||
v += add;
|
||||
++p;
|
||||
|
|
@ -692,8 +702,9 @@ parse_options(const char **const str, const char *str_end,
|
|||
if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX)
|
||||
== 0) {
|
||||
*str = multiplier_start;
|
||||
return "This option does not support "
|
||||
"any integer suffixes";
|
||||
return N_("This option does not "
|
||||
"support any multiplier "
|
||||
"suffixes");
|
||||
}
|
||||
|
||||
uint32_t shift;
|
||||
|
|
@ -716,8 +727,13 @@ parse_options(const char **const str, const char *str_end,
|
|||
|
||||
default:
|
||||
*str = multiplier_start;
|
||||
return "Invalid multiplier suffix "
|
||||
"(KiB, MiB, or GiB)";
|
||||
|
||||
// TRANSLATORS: Don't translate the
|
||||
// suffixes "KiB", "MiB", or "GiB"
|
||||
// because a user can only specify
|
||||
// untranslated suffixes.
|
||||
return N_("Invalid multiplier suffix "
|
||||
"(KiB, MiB, or GiB)");
|
||||
}
|
||||
|
||||
++p;
|
||||
|
|
@ -736,19 +752,19 @@ parse_options(const char **const str, const char *str_end,
|
|||
// Now we must have no chars remaining.
|
||||
if (p < name_eq_value_end) {
|
||||
*str = multiplier_start;
|
||||
return "Invalid multiplier suffix "
|
||||
"(KiB, MiB, or GiB)";
|
||||
return N_("Invalid multiplier suffix "
|
||||
"(KiB, MiB, or GiB)");
|
||||
}
|
||||
|
||||
if (v > (UINT32_MAX >> shift))
|
||||
return "Value out of range";
|
||||
return N_("Value out of range");
|
||||
|
||||
v <<= shift;
|
||||
}
|
||||
|
||||
if (v < optmap[i].u.range.min
|
||||
|| v > optmap[i].u.range.max)
|
||||
return "Value out of range";
|
||||
return N_("Value out of range");
|
||||
}
|
||||
|
||||
// Set the value in filter_options. Enums are handled
|
||||
|
|
@ -810,15 +826,15 @@ parse_filter(const char **const str, const char *str_end, lzma_filter *filter,
|
|||
// string in filter_name_map[i].name.
|
||||
const size_t name_len = (size_t)(name_end - *str);
|
||||
if (name_len > NAME_LEN_MAX)
|
||||
return "Unknown filter name";
|
||||
return N_("Unknown filter name");
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
|
||||
if (memcmp(*str, filter_name_map[i].name, name_len) == 0
|
||||
&& filter_name_map[i].name[name_len] == '\0') {
|
||||
if (only_xz && filter_name_map[i].id
|
||||
>= LZMA_FILTER_RESERVED_START)
|
||||
return "This filter cannot be used in "
|
||||
"the .xz format";
|
||||
return N_("This filter cannot be used in "
|
||||
"the .xz format");
|
||||
|
||||
// Allocate the filter-specific options and
|
||||
// initialize the memory with zeros.
|
||||
|
|
@ -826,7 +842,7 @@ parse_filter(const char **const str, const char *str_end, lzma_filter *filter,
|
|||
filter_name_map[i].opts_size,
|
||||
allocator);
|
||||
if (options == NULL)
|
||||
return "Memory allocation failed";
|
||||
return N_("Memory allocation failed");
|
||||
|
||||
// Filter name was found so the input string is good
|
||||
// at least this far.
|
||||
|
|
@ -846,7 +862,7 @@ parse_filter(const char **const str, const char *str_end, lzma_filter *filter,
|
|||
}
|
||||
}
|
||||
|
||||
return "Unknown filter name";
|
||||
return N_("Unknown filter name");
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -865,8 +881,8 @@ str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
|
|||
++*str;
|
||||
|
||||
if (**str == '\0')
|
||||
return "Empty string is not allowed, "
|
||||
"try \"6\" if a default value is needed";
|
||||
return N_("Empty string is not allowed, "
|
||||
"try '6' if a default value is needed");
|
||||
|
||||
// Detect the type of the string.
|
||||
//
|
||||
|
|
@ -889,7 +905,7 @@ str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
|
|||
// there are no chars other than spaces.
|
||||
for (size_t i = 1; str_end[i] != '\0'; ++i)
|
||||
if (str_end[i] != ' ')
|
||||
return "Unsupported preset";
|
||||
return N_("Unsupported preset");
|
||||
} else {
|
||||
// There are no trailing spaces. Use the whole string.
|
||||
str_end = *str + str_len;
|
||||
|
|
@ -902,11 +918,11 @@ str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
|
|||
|
||||
lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator);
|
||||
if (opts == NULL)
|
||||
return "Memory allocation failed";
|
||||
return N_("Memory allocation failed");
|
||||
|
||||
if (lzma_lzma_preset(opts, preset)) {
|
||||
lzma_free(opts, allocator);
|
||||
return "Unsupported preset";
|
||||
return N_("Unsupported preset");
|
||||
}
|
||||
|
||||
filters[0].id = LZMA_FILTER_LZMA2;
|
||||
|
|
@ -930,7 +946,7 @@ str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
|
|||
size_t i = 0;
|
||||
do {
|
||||
if (i == LZMA_FILTERS_MAX) {
|
||||
errmsg = "The maximum number of filters is four";
|
||||
errmsg = N_("The maximum number of filters is four");
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
|
@ -952,7 +968,7 @@ str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
|
|||
// Inputs that have "--" at the end or "-- " in the middle
|
||||
// will result in an empty filter name.
|
||||
if (filter_end == *str) {
|
||||
errmsg = "Filter name is missing";
|
||||
errmsg = N_("Filter name is missing");
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
|
@ -979,8 +995,8 @@ str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
|
|||
const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy);
|
||||
assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR);
|
||||
if (ret != LZMA_OK) {
|
||||
errmsg = "Invalid filter chain "
|
||||
"('lzma2' missing at the end?)";
|
||||
errmsg = N_("Invalid filter chain "
|
||||
"('lzma2' missing at the end?)");
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
|
@ -1008,17 +1024,26 @@ lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters,
|
|||
if (error_pos != NULL)
|
||||
*error_pos = 0;
|
||||
|
||||
if (str == NULL || filters == NULL)
|
||||
if (str == NULL || filters == NULL) {
|
||||
// Don't translate this because it's only shown in case of
|
||||
// a programming error.
|
||||
return "Unexpected NULL pointer argument(s) "
|
||||
"to lzma_str_to_filters()";
|
||||
}
|
||||
|
||||
// Validate the flags.
|
||||
const uint32_t supported_flags
|
||||
= LZMA_STR_ALL_FILTERS
|
||||
| LZMA_STR_NO_VALIDATION;
|
||||
|
||||
if (flags & ~supported_flags)
|
||||
if (flags & ~supported_flags) {
|
||||
// This message is possible only if the caller uses flags
|
||||
// that are only supported in a newer liblzma version (or
|
||||
// the flags are simply buggy). Don't translate this at least
|
||||
// when liblzma itself doesn't use gettext; xz and liblzma
|
||||
// are usually upgraded at the same time.
|
||||
return "Unsupported flags to lzma_str_to_filters()";
|
||||
}
|
||||
|
||||
const char *used = str;
|
||||
const char *errmsg = str_to_filters(&used, filters, flags, allocator);
|
||||
|
|
|
|||
|
|
@ -126,3 +126,13 @@ XZ_5.6.0 {
|
|||
global:
|
||||
lzma_mt_block_size;
|
||||
} XZ_5.4;
|
||||
|
||||
XZ_5.8 {
|
||||
global:
|
||||
lzma_bcj_arm64_encode;
|
||||
lzma_bcj_arm64_decode;
|
||||
lzma_bcj_riscv_encode;
|
||||
lzma_bcj_riscv_decode;
|
||||
lzma_bcj_x86_encode;
|
||||
lzma_bcj_x86_decode;
|
||||
} XZ_5.6.0;
|
||||
|
|
|
|||
|
|
@ -141,3 +141,13 @@ XZ_5.6.0 {
|
|||
global:
|
||||
lzma_mt_block_size;
|
||||
} XZ_5.4;
|
||||
|
||||
XZ_5.8 {
|
||||
global:
|
||||
lzma_bcj_arm64_encode;
|
||||
lzma_bcj_arm64_decode;
|
||||
lzma_bcj_riscv_encode;
|
||||
lzma_bcj_riscv_decode;
|
||||
lzma_bcj_x86_encode;
|
||||
lzma_bcj_x86_decode;
|
||||
} XZ_5.6.0;
|
||||
|
|
|
|||
|
|
@ -53,9 +53,9 @@ typedef struct {
|
|||
static void
|
||||
lz_decoder_reset(lzma_coder *coder)
|
||||
{
|
||||
coder->dict.pos = 2 * LZ_DICT_REPEAT_MAX;
|
||||
coder->dict.pos = LZ_DICT_INIT_POS;
|
||||
coder->dict.full = 0;
|
||||
coder->dict.buf[2 * LZ_DICT_REPEAT_MAX - 1] = '\0';
|
||||
coder->dict.buf[LZ_DICT_INIT_POS - 1] = '\0';
|
||||
coder->dict.has_wrapped = false;
|
||||
coder->dict.need_reset = false;
|
||||
return;
|
||||
|
|
@ -261,10 +261,12 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
|
|||
// recommended to give aligned buffers to liblzma.
|
||||
//
|
||||
// Reserve 2 * LZ_DICT_REPEAT_MAX bytes of extra space which is
|
||||
// needed for alloc_size.
|
||||
// needed for alloc_size. Reserve also LZ_DICT_EXTRA bytes of extra
|
||||
// space which is *not* counted in alloc_size or coder->dict.size.
|
||||
//
|
||||
// Avoid integer overflow.
|
||||
if (lz_options.dict_size > SIZE_MAX - 15 - 2 * LZ_DICT_REPEAT_MAX)
|
||||
if (lz_options.dict_size > SIZE_MAX - 15 - 2 * LZ_DICT_REPEAT_MAX
|
||||
- LZ_DICT_EXTRA)
|
||||
return LZMA_MEM_ERROR;
|
||||
|
||||
lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15));
|
||||
|
|
@ -277,7 +279,13 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
|
|||
// Allocate and initialize the dictionary.
|
||||
if (coder->dict.size != alloc_size) {
|
||||
lzma_free(coder->dict.buf, allocator);
|
||||
coder->dict.buf = lzma_alloc(alloc_size, allocator);
|
||||
|
||||
// The LZ_DICT_EXTRA bytes at the end of the buffer aren't
|
||||
// included in alloc_size. These extra bytes allow
|
||||
// dict_repeat() to read and write more data than requested.
|
||||
// Otherwise this extra space is ignored.
|
||||
coder->dict.buf = lzma_alloc(alloc_size + LZ_DICT_EXTRA,
|
||||
allocator);
|
||||
if (coder->dict.buf == NULL)
|
||||
return LZMA_MEM_ERROR;
|
||||
|
||||
|
|
@ -320,5 +328,6 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
|
|||
extern uint64_t
|
||||
lzma_lz_decoder_memusage(size_t dictionary_size)
|
||||
{
|
||||
return sizeof(lzma_coder) + (uint64_t)(dictionary_size);
|
||||
return sizeof(lzma_coder) + (uint64_t)(dictionary_size)
|
||||
+ 2 * LZ_DICT_REPEAT_MAX + LZ_DICT_EXTRA;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,10 +15,40 @@
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#ifdef HAVE_IMMINTRIN_H
|
||||
# include <immintrin.h>
|
||||
#endif
|
||||
|
||||
/// Maximum length of a match rounded up to a nice power of 2 which is
|
||||
/// a good size for aligned memcpy(). The allocated dictionary buffer will
|
||||
/// be 2 * LZ_DICT_REPEAT_MAX bytes larger than the actual dictionary size:
|
||||
|
||||
// dict_repeat() implementation variant:
|
||||
// 0 = Byte-by-byte copying only.
|
||||
// 1 = Use memcpy() for non-overlapping copies.
|
||||
// 2 = Use x86 SSE2 for non-overlapping copies.
|
||||
#ifndef LZMA_LZ_DECODER_CONFIG
|
||||
# if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
|
||||
&& defined(HAVE_IMMINTRIN_H) \
|
||||
&& (defined(__SSE2__) || defined(_M_X64) \
|
||||
|| (defined(_M_IX86_FP) && _M_IX86_FP >= 2))
|
||||
# define LZMA_LZ_DECODER_CONFIG 2
|
||||
# else
|
||||
# define LZMA_LZ_DECODER_CONFIG 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/// Byte-by-byte and memcpy() copy exactly the amount needed. Other methods
|
||||
/// can copy up to LZ_DICT_EXTRA bytes more than requested, and this amount
|
||||
/// of extra space is needed at the end of the allocated dictionary buffer.
|
||||
///
|
||||
/// NOTE: If this is increased, update LZMA_DICT_REPEAT_MAX too.
|
||||
#if LZMA_LZ_DECODER_CONFIG >= 2
|
||||
# define LZ_DICT_EXTRA 32
|
||||
#else
|
||||
# define LZ_DICT_EXTRA 0
|
||||
#endif
|
||||
|
||||
/// Maximum number of bytes that dict_repeat() may copy. The allocated
|
||||
/// dictionary buffer will be 2 * LZ_DICT_REPEAT_MAX + LZMA_DICT_EXTRA bytes
|
||||
/// larger than the actual dictionary size:
|
||||
///
|
||||
/// (1) Every time the decoder reaches the end of the dictionary buffer,
|
||||
/// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning.
|
||||
|
|
@ -27,14 +57,26 @@
|
|||
///
|
||||
/// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between
|
||||
/// the oldest byte still in the dictionary and the current write
|
||||
/// position. This way dict_repeat(dict, dict->size - 1, &len)
|
||||
/// position. This way dict_repeat() with the maximum valid distance
|
||||
/// won't need memmove() as the copying cannot overlap.
|
||||
///
|
||||
/// (3) LZ_DICT_EXTRA bytes are required at the end of the dictionary buffer
|
||||
/// so that extra copying done by dict_repeat() won't write or read past
|
||||
/// the end of the allocated buffer. This amount is *not* counted as part
|
||||
/// of lzma_dict.size.
|
||||
///
|
||||
/// Note that memcpy() still cannot be used if distance < len.
|
||||
///
|
||||
/// LZMA's longest match length is 273 so pick a multiple of 16 above that.
|
||||
/// LZMA's longest match length is 273 bytes. The LZMA decoder looks at
|
||||
/// the lowest four bits of the dictionary position, thus 273 must be
|
||||
/// rounded up to the next multiple of 16 (288). In addition, optimized
|
||||
/// dict_repeat() copies 32 bytes at a time, thus this must also be
|
||||
/// a multiple of 32.
|
||||
#define LZ_DICT_REPEAT_MAX 288
|
||||
|
||||
/// Initial position in lzma_dict.buf when the dictionary is empty.
|
||||
#define LZ_DICT_INIT_POS (2 * LZ_DICT_REPEAT_MAX)
|
||||
|
||||
|
||||
typedef struct {
|
||||
/// Pointer to the dictionary buffer.
|
||||
|
|
@ -158,7 +200,8 @@ dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
|
|||
|
||||
/// Repeat *len bytes at distance.
|
||||
static inline bool
|
||||
dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
|
||||
dict_repeat(lzma_dict *restrict dict,
|
||||
uint32_t distance, uint32_t *restrict len)
|
||||
{
|
||||
// Don't write past the end of the dictionary.
|
||||
const size_t dict_avail = dict->limit - dict->pos;
|
||||
|
|
@ -169,9 +212,17 @@ dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
|
|||
if (distance >= dict->pos)
|
||||
back += dict->size - LZ_DICT_REPEAT_MAX;
|
||||
|
||||
// Repeat a block of data from the history. Because memcpy() is faster
|
||||
// than copying byte by byte in a loop, the copying process gets split
|
||||
// into two cases.
|
||||
#if LZMA_LZ_DECODER_CONFIG == 0
|
||||
// Minimal byte-by-byte method. This might be the least bad choice
|
||||
// if memcpy() isn't fast and there's no replacement for it below.
|
||||
while (left-- > 0) {
|
||||
dict->buf[dict->pos++] = dict->buf[back++];
|
||||
}
|
||||
|
||||
#else
|
||||
// Because memcpy() or a similar method can be faster than copying
|
||||
// byte by byte in a loop, the copying process is split into
|
||||
// two cases.
|
||||
if (distance < left) {
|
||||
// Source and target areas overlap, thus we can't use
|
||||
// memcpy() nor even memmove() safely.
|
||||
|
|
@ -179,32 +230,56 @@ dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
|
|||
dict->buf[dict->pos++] = dict->buf[back++];
|
||||
} while (--left > 0);
|
||||
} else {
|
||||
# if LZMA_LZ_DECODER_CONFIG == 1
|
||||
memcpy(dict->buf + dict->pos, dict->buf + back, left);
|
||||
dict->pos += left;
|
||||
|
||||
# elif LZMA_LZ_DECODER_CONFIG == 2
|
||||
// This can copy up to 32 bytes more than required.
|
||||
// (If left == 0, we still copy 32 bytes.)
|
||||
size_t pos = dict->pos;
|
||||
dict->pos += left;
|
||||
do {
|
||||
const __m128i x0 = _mm_loadu_si128(
|
||||
(__m128i *)(dict->buf + back));
|
||||
const __m128i x1 = _mm_loadu_si128(
|
||||
(__m128i *)(dict->buf + back + 16));
|
||||
back += 32;
|
||||
_mm_storeu_si128(
|
||||
(__m128i *)(dict->buf + pos), x0);
|
||||
_mm_storeu_si128(
|
||||
(__m128i *)(dict->buf + pos + 16), x1);
|
||||
pos += 32;
|
||||
} while (pos < dict->pos);
|
||||
|
||||
# else
|
||||
# error "Invalid LZMA_LZ_DECODER_CONFIG value"
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// Update how full the dictionary is.
|
||||
if (!dict->has_wrapped)
|
||||
dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
|
||||
dict->full = dict->pos - LZ_DICT_INIT_POS;
|
||||
|
||||
return *len != 0;
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
dict_put(lzma_dict *dict, uint8_t byte)
|
||||
dict_put(lzma_dict *restrict dict, uint8_t byte)
|
||||
{
|
||||
dict->buf[dict->pos++] = byte;
|
||||
|
||||
if (!dict->has_wrapped)
|
||||
dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
|
||||
dict->full = dict->pos - LZ_DICT_INIT_POS;
|
||||
}
|
||||
|
||||
|
||||
/// Puts one byte into the dictionary. Returns true if the dictionary was
|
||||
/// already full and the byte couldn't be added.
|
||||
static inline bool
|
||||
dict_put_safe(lzma_dict *dict, uint8_t byte)
|
||||
dict_put_safe(lzma_dict *restrict dict, uint8_t byte)
|
||||
{
|
||||
if (unlikely(dict->pos == dict->limit))
|
||||
return true;
|
||||
|
|
@ -234,7 +309,7 @@ dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
|
|||
dict->buf, &dict->pos, dict->limit);
|
||||
|
||||
if (!dict->has_wrapped)
|
||||
dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
|
||||
dict->full = dict->pos - LZ_DICT_INIT_POS;
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
// See lz_encoder_hash.h. This is a bit hackish but avoids making
|
||||
// endianness a conditional in makefiles.
|
||||
#if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL)
|
||||
#ifdef LZMA_LZ_HASH_TABLE_IS_NEEDED
|
||||
# include "lz_encoder_hash_table.h"
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -5,23 +5,37 @@
|
|||
/// \file lz_encoder_hash.h
|
||||
/// \brief Hash macros for match finders
|
||||
//
|
||||
// Author: Igor Pavlov
|
||||
// Authors: Igor Pavlov
|
||||
// Lasse Collin
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef LZMA_LZ_ENCODER_HASH_H
|
||||
#define LZMA_LZ_ENCODER_HASH_H
|
||||
|
||||
#if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL)
|
||||
// This is to make liblzma produce the same output on big endian
|
||||
// systems that it does on little endian systems. lz_encoder.c
|
||||
// takes care of including the actual table.
|
||||
// We need to know if CRC32_GENERIC is defined and we may need the declaration
|
||||
// of lzma_crc32_table[][].
|
||||
#include "crc_common.h"
|
||||
|
||||
// If HAVE_SMALL is defined, then lzma_crc32_table[][] exists and
|
||||
// it's little endian even on big endian systems.
|
||||
//
|
||||
// If HAVE_SMALL isn't defined, lzma_crc32_table[][] is in native endian
|
||||
// but we want a little endian one so that the compressed output won't
|
||||
// depend on the processor endianness. Big endian systems are less common
|
||||
// so those get the burden of an extra 1 KiB table.
|
||||
//
|
||||
// If HAVE_SMALL isn't defined and CRC32_GENERIC isn't defined either,
|
||||
// then lzma_crc32_table[][] doesn't exist.
|
||||
#if defined(HAVE_SMALL) \
|
||||
|| (defined(CRC32_GENERIC) && !defined(WORDS_BIGENDIAN))
|
||||
# define hash_table lzma_crc32_table[0]
|
||||
#else
|
||||
// lz_encoder.c takes care of including the actual table.
|
||||
lzma_attr_visibility_hidden
|
||||
extern const uint32_t lzma_lz_hash_table[256];
|
||||
# define hash_table lzma_lz_hash_table
|
||||
#else
|
||||
# include "check.h"
|
||||
# define hash_table lzma_crc32_table[0]
|
||||
# define LZMA_LZ_HASH_TABLE_IS_NEEDED 1
|
||||
#endif
|
||||
|
||||
#define HASH_2_SIZE (UINT32_C(1) << 10)
|
||||
|
|
|
|||
|
|
@ -159,8 +159,7 @@ lzma2_encode(void *coder_ptr, lzma_mf *restrict mf,
|
|||
coder->uncompressed_size = 0;
|
||||
coder->compressed_size = 0;
|
||||
coder->sequence = SEQ_LZMA_ENCODE;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_LZMA_ENCODE: {
|
||||
// Calculate how much more uncompressed data this chunk
|
||||
|
|
@ -219,10 +218,9 @@ lzma2_encode(void *coder_ptr, lzma_mf *restrict mf,
|
|||
lzma2_header_lzma(coder);
|
||||
|
||||
coder->sequence = SEQ_LZMA_COPY;
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
// Fall through
|
||||
|
||||
case SEQ_LZMA_COPY:
|
||||
// Copy the compressed chunk along its headers to the
|
||||
// output buffer.
|
||||
|
|
@ -244,8 +242,7 @@ lzma2_encode(void *coder_ptr, lzma_mf *restrict mf,
|
|||
return LZMA_OK;
|
||||
|
||||
coder->sequence = SEQ_UNCOMPRESSED_COPY;
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case SEQ_UNCOMPRESSED_COPY:
|
||||
// Copy the uncompressed data as is from the dictionary
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
// The macros unroll loops with switch statements.
|
||||
// Silence warnings about missing fall-through comments.
|
||||
#if TUKLIB_GNUC_REQ(7, 0)
|
||||
#if TUKLIB_GNUC_REQ(7, 0) || defined(__clang__)
|
||||
# pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -18,8 +18,10 @@ arm_code(void *simple lzma_attribute((__unused__)),
|
|||
uint32_t now_pos, bool is_encoder,
|
||||
uint8_t *buffer, size_t size)
|
||||
{
|
||||
size &= ~(size_t)3;
|
||||
|
||||
size_t i;
|
||||
for (i = 0; i + 4 <= size; i += 4) {
|
||||
for (i = 0; i < size; i += 4) {
|
||||
if (buffer[i + 3] == 0xEB) {
|
||||
uint32_t src = ((uint32_t)(buffer[i + 2]) << 16)
|
||||
| ((uint32_t)(buffer[i + 1]) << 8)
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@ arm64_code(void *simple lzma_attribute((__unused__)),
|
|||
uint32_t now_pos, bool is_encoder,
|
||||
uint8_t *buffer, size_t size)
|
||||
{
|
||||
size &= ~(size_t)3;
|
||||
|
||||
size_t i;
|
||||
|
||||
// Clang 14.0.6 on x86-64 makes this four times bigger and 40 % slower
|
||||
|
|
@ -37,7 +39,7 @@ arm64_code(void *simple lzma_attribute((__unused__)),
|
|||
#ifdef __clang__
|
||||
# pragma clang loop vectorize(disable)
|
||||
#endif
|
||||
for (i = 0; i + 4 <= size; i += 4) {
|
||||
for (i = 0; i < size; i += 4) {
|
||||
uint32_t pc = (uint32_t)(now_pos + i);
|
||||
uint32_t instr = read32le(buffer + i);
|
||||
|
||||
|
|
@ -122,6 +124,15 @@ lzma_simple_arm64_encoder_init(lzma_next_coder *next,
|
|||
{
|
||||
return arm64_coder_init(next, allocator, filters, true);
|
||||
}
|
||||
|
||||
|
||||
extern LZMA_API(size_t)
|
||||
lzma_bcj_arm64_encode(uint32_t start_offset, uint8_t *buf, size_t size)
|
||||
{
|
||||
// start_offset must be a multiple of four.
|
||||
start_offset &= ~UINT32_C(3);
|
||||
return arm64_code(NULL, start_offset, true, buf, size);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -133,4 +144,13 @@ lzma_simple_arm64_decoder_init(lzma_next_coder *next,
|
|||
{
|
||||
return arm64_coder_init(next, allocator, filters, false);
|
||||
}
|
||||
|
||||
|
||||
extern LZMA_API(size_t)
|
||||
lzma_bcj_arm64_decode(uint32_t start_offset, uint8_t *buf, size_t size)
|
||||
{
|
||||
// start_offset must be a multiple of four.
|
||||
start_offset &= ~UINT32_C(3);
|
||||
return arm64_code(NULL, start_offset, false, buf, size);
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -18,8 +18,13 @@ armthumb_code(void *simple lzma_attribute((__unused__)),
|
|||
uint32_t now_pos, bool is_encoder,
|
||||
uint8_t *buffer, size_t size)
|
||||
{
|
||||
if (size < 4)
|
||||
return 0;
|
||||
|
||||
size -= 4;
|
||||
|
||||
size_t i;
|
||||
for (i = 0; i + 4 <= size; i += 2) {
|
||||
for (i = 0; i <= size; i += 2) {
|
||||
if ((buffer[i + 1] & 0xF8) == 0xF0
|
||||
&& (buffer[i + 3] & 0xF8) == 0xF8) {
|
||||
uint32_t src = (((uint32_t)(buffer[i + 1]) & 7) << 19)
|
||||
|
|
|
|||
|
|
@ -25,8 +25,10 @@ ia64_code(void *simple lzma_attribute((__unused__)),
|
|||
4, 4, 0, 0, 4, 4, 0, 0
|
||||
};
|
||||
|
||||
size &= ~(size_t)15;
|
||||
|
||||
size_t i;
|
||||
for (i = 0; i + 16 <= size; i += 16) {
|
||||
for (i = 0; i < size; i += 16) {
|
||||
const uint32_t instr_template = buffer[i] & 0x1F;
|
||||
const uint32_t mask = BRANCH_TABLE[instr_template];
|
||||
uint32_t bit_pos = 5;
|
||||
|
|
|
|||
|
|
@ -18,8 +18,10 @@ powerpc_code(void *simple lzma_attribute((__unused__)),
|
|||
uint32_t now_pos, bool is_encoder,
|
||||
uint8_t *buffer, size_t size)
|
||||
{
|
||||
size &= ~(size_t)3;
|
||||
|
||||
size_t i;
|
||||
for (i = 0; i + 4 <= size; i += 4) {
|
||||
for (i = 0; i < size; i += 4) {
|
||||
// PowerPC branch 6(48) 24(Offset) 1(Abs) 1(Link)
|
||||
if ((buffer[i] >> 2) == 0x12
|
||||
&& ((buffer[i + 3] & 3) == 1)) {
|
||||
|
|
|
|||
|
|
@ -617,6 +617,15 @@ lzma_simple_riscv_encoder_init(lzma_next_coder *next,
|
|||
return lzma_simple_coder_init(next, allocator, filters,
|
||||
&riscv_encode, 0, 8, 2, true);
|
||||
}
|
||||
|
||||
|
||||
extern LZMA_API(size_t)
|
||||
lzma_bcj_riscv_encode(uint32_t start_offset, uint8_t *buf, size_t size)
|
||||
{
|
||||
// start_offset must be a multiple of two.
|
||||
start_offset &= ~UINT32_C(1);
|
||||
return riscv_encode(NULL, start_offset, true, buf, size);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -752,4 +761,13 @@ lzma_simple_riscv_decoder_init(lzma_next_coder *next,
|
|||
return lzma_simple_coder_init(next, allocator, filters,
|
||||
&riscv_decode, 0, 8, 2, false);
|
||||
}
|
||||
|
||||
|
||||
extern LZMA_API(size_t)
|
||||
lzma_bcj_riscv_decode(uint32_t start_offset, uint8_t *buf, size_t size)
|
||||
{
|
||||
// start_offset must be a multiple of two.
|
||||
start_offset &= ~UINT32_C(1);
|
||||
return riscv_decode(NULL, start_offset, false, buf, size);
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -18,9 +18,10 @@ sparc_code(void *simple lzma_attribute((__unused__)),
|
|||
uint32_t now_pos, bool is_encoder,
|
||||
uint8_t *buffer, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i + 4 <= size; i += 4) {
|
||||
size &= ~(size_t)3;
|
||||
|
||||
size_t i;
|
||||
for (i = 0; i < size; i += 4) {
|
||||
if ((buffer[i] == 0x40 && (buffer[i + 1] & 0xC0) == 0x00)
|
||||
|| (buffer[i] == 0x7F
|
||||
&& (buffer[i + 1] & 0xC0) == 0xC0)) {
|
||||
|
|
|
|||
|
|
@ -143,6 +143,18 @@ lzma_simple_x86_encoder_init(lzma_next_coder *next,
|
|||
{
|
||||
return x86_coder_init(next, allocator, filters, true);
|
||||
}
|
||||
|
||||
|
||||
extern LZMA_API(size_t)
|
||||
lzma_bcj_x86_encode(uint32_t start_offset, uint8_t *buf, size_t size)
|
||||
{
|
||||
lzma_simple_x86 simple = {
|
||||
.prev_mask = 0,
|
||||
.prev_pos = (uint32_t)(-5),
|
||||
};
|
||||
|
||||
return x86_code(&simple, start_offset, true, buf, size);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -154,4 +166,16 @@ lzma_simple_x86_decoder_init(lzma_next_coder *next,
|
|||
{
|
||||
return x86_coder_init(next, allocator, filters, false);
|
||||
}
|
||||
|
||||
|
||||
extern LZMA_API(size_t)
|
||||
lzma_bcj_x86_decode(uint32_t start_offset, uint8_t *buf, size_t size)
|
||||
{
|
||||
lzma_simple_x86 simple = {
|
||||
.prev_mask = 0,
|
||||
.prev_pos = (uint32_t)(-5),
|
||||
};
|
||||
|
||||
return x86_code(&simple, start_offset, false, buf, size);
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@
|
|||
#include "getopt.h"
|
||||
#include "tuklib_gettext.h"
|
||||
#include "tuklib_progname.h"
|
||||
#include "tuklib_mbstr_nonprint.h"
|
||||
#include "tuklib_mbstr_wrap.h"
|
||||
#include "tuklib_exit.h"
|
||||
|
||||
#ifdef TUKLIB_DOSLIKE
|
||||
|
|
@ -29,17 +31,36 @@ tuklib_attr_noreturn
|
|||
static void
|
||||
help(void)
|
||||
{
|
||||
printf(
|
||||
_("Usage: %s [--help] [--version] [FILE]...\n"
|
||||
"Show information stored in the .lzma file header"), progname);
|
||||
// A few languages use so long strings that we need automatic
|
||||
// wrapping. A few strings are the same as in xz/message.c and
|
||||
// should be kept in sync.
|
||||
static const struct tuklib_wrap_opt wrap0 = { 0, 0, 0, 0, 79 };
|
||||
int e = 0;
|
||||
|
||||
printf(_(
|
||||
"\nWith no FILE, or when FILE is -, read standard input.\n"));
|
||||
printf("\n");
|
||||
printf(_("Usage: %s [--help] [--version] [FILE]...\n"), progname);
|
||||
|
||||
printf(_("Report bugs to <%s> (in English or Finnish).\n"),
|
||||
e |= tuklib_wraps(stdout, &wrap0,
|
||||
W_("Show information stored in the .lzma file header."));
|
||||
e |= tuklib_wraps(stdout, &wrap0,
|
||||
W_("With no FILE, or when FILE is -, read standard input."));
|
||||
|
||||
putchar('\n');
|
||||
|
||||
e |= tuklib_wrapf(stdout, &wrap0,
|
||||
W_("Report bugs to <%s> (in English or Finnish)."),
|
||||
PACKAGE_BUGREPORT);
|
||||
printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
|
||||
|
||||
e |= tuklib_wrapf(stdout, &wrap0,
|
||||
W_("%s home page: <%s>"), PACKAGE_NAME, PACKAGE_URL);
|
||||
|
||||
if (e != 0) {
|
||||
// Avoid new translatable strings by printing the message
|
||||
// in pieces.
|
||||
fprintf(stderr, _("%s: "), progname);
|
||||
fprintf(stderr, _("Error printing the help text "
|
||||
"(error code %d)"), e);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, true);
|
||||
}
|
||||
|
|
@ -104,7 +125,8 @@ lzmainfo(const char *name, FILE *f)
|
|||
uint8_t buf[13];
|
||||
const size_t size = fread(buf, 1, sizeof(buf), f);
|
||||
if (size != 13) {
|
||||
fprintf(stderr, "%s: %s: %s\n", progname, name,
|
||||
fprintf(stderr, "%s: %s: %s\n", progname,
|
||||
tuklib_mask_nonprint(name),
|
||||
ferror(f) ? strerror(errno)
|
||||
: _("File is too small to be a .lzma file"));
|
||||
return true;
|
||||
|
|
@ -118,7 +140,8 @@ lzmainfo(const char *name, FILE *f)
|
|||
break;
|
||||
|
||||
case LZMA_OPTIONS_ERROR:
|
||||
fprintf(stderr, "%s: %s: %s\n", progname, name,
|
||||
fprintf(stderr, "%s: %s: %s\n", progname,
|
||||
tuklib_mask_nonprint(name),
|
||||
_("Not a .lzma file"));
|
||||
return true;
|
||||
|
||||
|
|
@ -142,7 +165,7 @@ lzmainfo(const char *name, FILE *f)
|
|||
// this output and we don't want to break that when people move
|
||||
// from LZMA Utils to XZ Utils.
|
||||
if (f != stdin)
|
||||
printf("%s\n", name);
|
||||
printf("%s\n", tuklib_mask_nonprint(name));
|
||||
|
||||
printf("Uncompressed size: ");
|
||||
if (uncompressed_size == UINT64_MAX)
|
||||
|
|
@ -200,9 +223,10 @@ main(int argc, char **argv)
|
|||
if (f == NULL) {
|
||||
ret = EXIT_FAILURE;
|
||||
fprintf(stderr, "%s: %s: %s\n",
|
||||
progname,
|
||||
argv[optind],
|
||||
strerror(errno));
|
||||
progname,
|
||||
tuklib_mask_nonprint(
|
||||
argv[optind]),
|
||||
strerror(errno));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
bool opt_stdout = false;
|
||||
bool opt_force = false;
|
||||
bool opt_keep_original = false;
|
||||
bool opt_synchronous = true;
|
||||
bool opt_robot = false;
|
||||
bool opt_ignore_check = false;
|
||||
|
||||
|
|
@ -217,6 +218,7 @@ parse_real(args_info *args, int argc, char **argv)
|
|||
OPT_LZMA1,
|
||||
OPT_LZMA2,
|
||||
|
||||
OPT_NO_SYNC,
|
||||
OPT_SINGLE_STREAM,
|
||||
OPT_NO_SPARSE,
|
||||
OPT_FILES,
|
||||
|
|
@ -249,6 +251,7 @@ parse_real(args_info *args, int argc, char **argv)
|
|||
{ "force", no_argument, NULL, 'f' },
|
||||
{ "stdout", no_argument, NULL, 'c' },
|
||||
{ "to-stdout", no_argument, NULL, 'c' },
|
||||
{ "no-sync", no_argument, NULL, OPT_NO_SYNC },
|
||||
{ "single-stream", no_argument, NULL, OPT_SINGLE_STREAM },
|
||||
{ "no-sparse", no_argument, NULL, OPT_NO_SPARSE },
|
||||
{ "suffix", required_argument, NULL, 'S' },
|
||||
|
|
@ -275,17 +278,17 @@ parse_real(args_info *args, int argc, char **argv)
|
|||
{ "best", no_argument, NULL, '9' },
|
||||
|
||||
// Filters
|
||||
{ "filters", optional_argument, NULL, OPT_FILTERS},
|
||||
{ "filters1", optional_argument, NULL, OPT_FILTERS1},
|
||||
{ "filters2", optional_argument, NULL, OPT_FILTERS2},
|
||||
{ "filters3", optional_argument, NULL, OPT_FILTERS3},
|
||||
{ "filters4", optional_argument, NULL, OPT_FILTERS4},
|
||||
{ "filters5", optional_argument, NULL, OPT_FILTERS5},
|
||||
{ "filters6", optional_argument, NULL, OPT_FILTERS6},
|
||||
{ "filters7", optional_argument, NULL, OPT_FILTERS7},
|
||||
{ "filters8", optional_argument, NULL, OPT_FILTERS8},
|
||||
{ "filters9", optional_argument, NULL, OPT_FILTERS9},
|
||||
{ "filters-help", optional_argument, NULL, OPT_FILTERS_HELP},
|
||||
{ "filters", required_argument, NULL, OPT_FILTERS},
|
||||
{ "filters1", required_argument, NULL, OPT_FILTERS1},
|
||||
{ "filters2", required_argument, NULL, OPT_FILTERS2},
|
||||
{ "filters3", required_argument, NULL, OPT_FILTERS3},
|
||||
{ "filters4", required_argument, NULL, OPT_FILTERS4},
|
||||
{ "filters5", required_argument, NULL, OPT_FILTERS5},
|
||||
{ "filters6", required_argument, NULL, OPT_FILTERS6},
|
||||
{ "filters7", required_argument, NULL, OPT_FILTERS7},
|
||||
{ "filters8", required_argument, NULL, OPT_FILTERS8},
|
||||
{ "filters9", required_argument, NULL, OPT_FILTERS9},
|
||||
{ "filters-help", no_argument, NULL, OPT_FILTERS_HELP},
|
||||
|
||||
{ "lzma1", optional_argument, NULL, OPT_LZMA1 },
|
||||
{ "lzma2", optional_argument, NULL, OPT_LZMA2 },
|
||||
|
|
@ -612,6 +615,9 @@ parse_real(args_info *args, int argc, char **argv)
|
|||
|
||||
case OPT_SINGLE_STREAM:
|
||||
opt_single_stream = true;
|
||||
|
||||
// Since 5.7.1alpha --single-stream implies --keep.
|
||||
opt_keep_original = true;
|
||||
break;
|
||||
|
||||
case OPT_NO_SPARSE:
|
||||
|
|
@ -621,7 +627,7 @@ parse_real(args_info *args, int argc, char **argv)
|
|||
case OPT_FILES:
|
||||
args->files_delim = '\n';
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case OPT_FILES0:
|
||||
if (args->files_name != NULL)
|
||||
|
|
@ -655,6 +661,10 @@ parse_real(args_info *args, int argc, char **argv)
|
|||
optarg, 0, UINT64_MAX);
|
||||
break;
|
||||
|
||||
case OPT_NO_SYNC:
|
||||
opt_synchronous = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
message_try_help();
|
||||
tuklib_exit(E_ERROR, E_ERROR, false);
|
||||
|
|
@ -823,6 +833,13 @@ args_parse(args_info *args, int argc, char **argv)
|
|||
opt_stdout = true;
|
||||
}
|
||||
|
||||
// Don't use fsync() if --keep is specified or implied.
|
||||
// However, don't document this as "--keep implies --no-sync"
|
||||
// because if syncing support was added to --flush-timeout,
|
||||
// it would sync even if --keep was specified.
|
||||
if (opt_keep_original)
|
||||
opt_synchronous = false;
|
||||
|
||||
// When compressing, if no --format flag was used, or it
|
||||
// was --format=auto, we compress to the .xz format.
|
||||
if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ typedef struct {
|
|||
extern bool opt_stdout;
|
||||
extern bool opt_force;
|
||||
extern bool opt_keep_original;
|
||||
// extern bool opt_recursive;
|
||||
extern bool opt_synchronous;
|
||||
extern bool opt_robot;
|
||||
extern bool opt_ignore_check;
|
||||
|
||||
|
|
|
|||
|
|
@ -168,16 +168,13 @@ str_to_filters(const char *str, uint32_t index, uint32_t flags)
|
|||
if (index > 0)
|
||||
filter_num[0] = '0' + index;
|
||||
|
||||
// FIXME? The message in err isn't translated.
|
||||
// Including the translations in the xz translations is
|
||||
// slightly ugly but possible. Creating a new domain for
|
||||
// liblzma might not be worth it especially since on some
|
||||
// OSes it adds extra dependencies to translation libraries.
|
||||
// liblzma doesn't translate the error messages but
|
||||
// the messages are included in xz's translations.
|
||||
message(V_ERROR, _("Error in --filters%s=FILTERS option:"),
|
||||
filter_num);
|
||||
message(V_ERROR, "%s", str);
|
||||
message(V_ERROR, "%*s^", error_pos, "");
|
||||
message_fatal("%s", err);
|
||||
message_fatal("%s", _(err));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1003,8 +1000,9 @@ coder_init(file_pair *pair)
|
|||
strm.avail_out = 0;
|
||||
while ((ret = lzma_code(&strm, LZMA_RUN))
|
||||
== LZMA_UNSUPPORTED_CHECK)
|
||||
message_warning(_("%s: %s"), pair->src_name,
|
||||
message_strm(ret));
|
||||
message_warning(_("%s: %s"),
|
||||
tuklib_mask_nonprint(pair->src_name),
|
||||
message_strm(ret));
|
||||
|
||||
// With --single-stream lzma_code won't wait for
|
||||
// LZMA_FINISH and thus it can return LZMA_STREAM_END
|
||||
|
|
@ -1019,7 +1017,9 @@ coder_init(file_pair *pair)
|
|||
}
|
||||
|
||||
if (ret != LZMA_OK) {
|
||||
message_error(_("%s: %s"), pair->src_name, message_strm(ret));
|
||||
message_error(_("%s: %s"),
|
||||
tuklib_mask_nonprint(pair->src_name),
|
||||
message_strm(ret));
|
||||
if (ret == LZMA_MEMLIMIT_ERROR)
|
||||
message_mem_needed(V_ERROR, lzma_memusage(&strm));
|
||||
|
||||
|
|
@ -1320,11 +1320,13 @@ coder_normal(file_pair *pair)
|
|||
// wrong and we print an error. Otherwise it's just
|
||||
// a warning and coding can continue.
|
||||
if (stop) {
|
||||
message_error(_("%s: %s"), pair->src_name,
|
||||
message_strm(ret));
|
||||
message_error(_("%s: %s"),
|
||||
tuklib_mask_nonprint(pair->src_name),
|
||||
message_strm(ret));
|
||||
} else {
|
||||
message_warning(_("%s: %s"), pair->src_name,
|
||||
message_strm(ret));
|
||||
message_warning(_("%s: %s"),
|
||||
tuklib_mask_nonprint(pair->src_name),
|
||||
message_strm(ret));
|
||||
|
||||
// When compressing, all possible errors set
|
||||
// stop to true.
|
||||
|
|
|
|||
260
src/xz/file_io.c
260
src/xz/file_io.c
|
|
@ -17,6 +17,7 @@
|
|||
# include <io.h>
|
||||
#else
|
||||
# include <poll.h>
|
||||
# include <libgen.h>
|
||||
static bool warn_fchown;
|
||||
#endif
|
||||
|
||||
|
|
@ -56,6 +57,10 @@ static bool warn_fchown;
|
|||
# define S_ISREG(m) (((m) & _S_IFMT) == _S_IFREG)
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) && !defined(__CYGWIN__)
|
||||
# define fsync _commit
|
||||
#endif
|
||||
|
||||
#ifndef O_BINARY
|
||||
# define O_BINARY 0
|
||||
#endif
|
||||
|
|
@ -64,6 +69,25 @@ static bool warn_fchown;
|
|||
# define O_NOCTTY 0
|
||||
#endif
|
||||
|
||||
// In musl 1.2.5, O_SEARCH is defined to O_PATH. As of Linux 6.12,
|
||||
// a file descriptor from open("dir", O_SEARCH | O_DIRECTORY) cannot be
|
||||
// used with fsync() (fails with EBADF). musl 1.2.5 doesn't emulate it
|
||||
// using /proc/self/fd. Even if it did, it might need to do it with
|
||||
// fd = open("/proc/...", O_RDONLY); fsync(fd); which fails if the
|
||||
// directory lacks read permission. Since we need a working fsync(),
|
||||
// O_RDONLY imitates O_SEARCH better than O_PATH.
|
||||
#if defined(O_SEARCH) && defined(O_PATH) && O_SEARCH == O_PATH
|
||||
# undef O_SEARCH
|
||||
#endif
|
||||
|
||||
#ifndef O_SEARCH
|
||||
# define O_SEARCH O_RDONLY
|
||||
#endif
|
||||
|
||||
#ifndef O_DIRECTORY
|
||||
# define O_DIRECTORY 0
|
||||
#endif
|
||||
|
||||
// Using this macro to silence a warning from gcc -Wlogical-op.
|
||||
#if EAGAIN == EWOULDBLOCK
|
||||
# define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN)
|
||||
|
|
@ -205,8 +229,9 @@ io_wait(file_pair *pair, int timeout, bool is_reading)
|
|||
continue;
|
||||
|
||||
message_error(_("%s: poll() failed: %s"),
|
||||
is_reading ? pair->src_name
|
||||
: pair->dest_name,
|
||||
tuklib_mask_nonprint(is_reading
|
||||
? pair->src_name
|
||||
: pair->dest_name),
|
||||
strerror(errno));
|
||||
return IO_WAIT_ERROR;
|
||||
}
|
||||
|
|
@ -272,14 +297,15 @@ io_unlink(const char *name, const struct stat *known_st)
|
|||
// of the original file, and in that case it obviously
|
||||
// shouldn't be removed.
|
||||
message_warning(_("%s: File seems to have been moved, "
|
||||
"not removing"), name);
|
||||
"not removing"), tuklib_mask_nonprint(name));
|
||||
else
|
||||
#endif
|
||||
// There's a race condition between lstat() and unlink()
|
||||
// but at least we have tried to avoid removing wrong file.
|
||||
if (unlink(name))
|
||||
message_warning(_("%s: Cannot remove: %s"),
|
||||
name, strerror(errno));
|
||||
tuklib_mask_nonprint(name),
|
||||
strerror(errno));
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
@ -305,7 +331,8 @@ io_copy_attrs(const file_pair *pair)
|
|||
if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1))
|
||||
&& warn_fchown)
|
||||
message_warning(_("%s: Cannot set the file owner: %s"),
|
||||
pair->dest_name, strerror(errno));
|
||||
tuklib_mask_nonprint(pair->dest_name),
|
||||
strerror(errno));
|
||||
|
||||
mode_t mode;
|
||||
|
||||
|
|
@ -318,7 +345,8 @@ io_copy_attrs(const file_pair *pair)
|
|||
&& fchown(pair->dest_fd, (uid_t)(-1),
|
||||
pair->src_st.st_gid)) {
|
||||
message_warning(_("%s: Cannot set the file group: %s"),
|
||||
pair->dest_name, strerror(errno));
|
||||
tuklib_mask_nonprint(pair->dest_name),
|
||||
strerror(errno));
|
||||
// We can still safely copy some additional permissions:
|
||||
// 'group' must be at least as strict as 'other' and
|
||||
// also vice versa.
|
||||
|
|
@ -337,7 +365,8 @@ io_copy_attrs(const file_pair *pair)
|
|||
|
||||
if (fchmod(pair->dest_fd, mode))
|
||||
message_warning(_("%s: Cannot set the file permissions: %s"),
|
||||
pair->dest_name, strerror(errno));
|
||||
tuklib_mask_nonprint(pair->dest_name),
|
||||
strerror(errno));
|
||||
#endif
|
||||
|
||||
// Copy the timestamps. We have several possible ways to do this, of
|
||||
|
|
@ -445,6 +474,39 @@ io_copy_attrs(const file_pair *pair)
|
|||
}
|
||||
|
||||
|
||||
/// \brief Synchronizes the destination file to permanent storage
|
||||
///
|
||||
/// \param pair File pair having the destination file open for writing
|
||||
///
|
||||
/// \return On success, false is returned. On error, error message
|
||||
/// is printed and true is returned.
|
||||
static bool
|
||||
io_sync_dest(file_pair *pair)
|
||||
{
|
||||
assert(pair->dest_fd != -1);
|
||||
assert(pair->dest_fd != STDOUT_FILENO);
|
||||
|
||||
if (fsync(pair->dest_fd)) {
|
||||
message_error(_("%s: Synchronizing the file failed: %s"),
|
||||
tuklib_mask_nonprint(pair->dest_name),
|
||||
strerror(errno));
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifndef TUKLIB_DOSLIKE
|
||||
if (fsync(pair->dir_fd)) {
|
||||
message_error(_("%s: Synchronizing the directory of "
|
||||
"the file failed: %s"),
|
||||
tuklib_mask_nonprint(pair->dest_name),
|
||||
strerror(errno));
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/// Opens the source file. Returns false on success, true on error.
|
||||
static bool
|
||||
io_open_src_real(file_pair *pair)
|
||||
|
|
@ -515,13 +577,15 @@ io_open_src_real(file_pair *pair)
|
|||
if (!follow_symlinks) {
|
||||
struct stat st;
|
||||
if (lstat(pair->src_name, &st)) {
|
||||
message_error(_("%s: %s"), pair->src_name,
|
||||
message_error(_("%s: %s"),
|
||||
tuklib_mask_nonprint(pair->src_name),
|
||||
strerror(errno));
|
||||
return true;
|
||||
|
||||
} else if (S_ISLNK(st.st_mode)) {
|
||||
message_warning(_("%s: Is a symbolic link, "
|
||||
"skipping"), pair->src_name);
|
||||
"skipping"),
|
||||
tuklib_mask_nonprint(pair->src_name));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -583,13 +647,15 @@ io_open_src_real(file_pair *pair)
|
|||
|
||||
if (was_symlink)
|
||||
message_warning(_("%s: Is a symbolic link, "
|
||||
"skipping"), pair->src_name);
|
||||
"skipping"),
|
||||
tuklib_mask_nonprint(pair->src_name));
|
||||
else
|
||||
#endif
|
||||
// Something else than O_NOFOLLOW failing
|
||||
// (assuming that the race conditions didn't
|
||||
// confuse us).
|
||||
message_error(_("%s: %s"), pair->src_name,
|
||||
message_error(_("%s: %s"),
|
||||
tuklib_mask_nonprint(pair->src_name),
|
||||
strerror(errno));
|
||||
|
||||
return true;
|
||||
|
|
@ -612,13 +678,13 @@ io_open_src_real(file_pair *pair)
|
|||
|
||||
if (S_ISDIR(pair->src_st.st_mode)) {
|
||||
message_warning(_("%s: Is a directory, skipping"),
|
||||
pair->src_name);
|
||||
tuklib_mask_nonprint(pair->src_name));
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) {
|
||||
message_warning(_("%s: Not a regular file, skipping"),
|
||||
pair->src_name);
|
||||
tuklib_mask_nonprint(pair->src_name));
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
|
@ -636,21 +702,21 @@ io_open_src_real(file_pair *pair)
|
|||
// explicitly in io_copy_attr().
|
||||
message_warning(_("%s: File has setuid or "
|
||||
"setgid bit set, skipping"),
|
||||
pair->src_name);
|
||||
tuklib_mask_nonprint(pair->src_name));
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (pair->src_st.st_mode & S_ISVTX) {
|
||||
message_warning(_("%s: File has sticky bit "
|
||||
"set, skipping"),
|
||||
pair->src_name);
|
||||
tuklib_mask_nonprint(pair->src_name));
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (pair->src_st.st_nlink > 1) {
|
||||
message_warning(_("%s: Input file has more "
|
||||
"than one hard link, "
|
||||
"skipping"), pair->src_name);
|
||||
"than one hard link, skipping"),
|
||||
tuklib_mask_nonprint(pair->src_name));
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
|
@ -679,7 +745,8 @@ io_open_src_real(file_pair *pair)
|
|||
return false;
|
||||
|
||||
error_msg:
|
||||
message_error(_("%s: %s"), pair->src_name, strerror(errno));
|
||||
message_error(_("%s: %s"), tuklib_mask_nonprint(pair->src_name),
|
||||
strerror(errno));
|
||||
error:
|
||||
(void)close(pair->src_fd);
|
||||
return true;
|
||||
|
|
@ -707,6 +774,9 @@ io_open_src(const char *src_name)
|
|||
.dest_name = NULL,
|
||||
.src_fd = -1,
|
||||
.dest_fd = -1,
|
||||
#ifndef TUKLIB_DOSLIKE
|
||||
.dir_fd = -1,
|
||||
#endif
|
||||
.src_eof = false,
|
||||
.src_has_seen_input = false,
|
||||
.flush_needed = false,
|
||||
|
|
@ -809,6 +879,56 @@ io_open_dest_real(file_pair *pair)
|
|||
if (pair->dest_name == NULL)
|
||||
return true;
|
||||
|
||||
#ifndef TUKLIB_DOSLIKE
|
||||
if (opt_synchronous) {
|
||||
// Open the directory where the destination file will
|
||||
// be created (the file descriptor is needed for
|
||||
// fsync()). Do this before creating the destination
|
||||
// file:
|
||||
//
|
||||
// - We currently have no files to clean up if
|
||||
// opening the directory fails. (We aren't
|
||||
// reading from stdin so there are no stdin_flags
|
||||
// to restore either.)
|
||||
//
|
||||
// - Allocating memory with xstrdup() is safe only
|
||||
// when we have nothing to clean up.
|
||||
char *buf = xstrdup(pair->dest_name);
|
||||
const char *dir_name = dirname(buf);
|
||||
|
||||
// O_NOCTTY and O_NONBLOCK are there in case
|
||||
// O_DIRECTORY is 0 and dir_name doesn't refer
|
||||
// to a directory. (We opened the source file
|
||||
// already but directories might have been renamed
|
||||
// after the source file was opened.)
|
||||
pair->dir_fd = open(dir_name, O_SEARCH | O_DIRECTORY
|
||||
| O_NOCTTY | O_NONBLOCK);
|
||||
if (pair->dir_fd == -1) {
|
||||
// Since we did open the source file
|
||||
// successfully, we should rarely get here.
|
||||
// Perhaps something has been renamed or
|
||||
// had its permissions changed.
|
||||
//
|
||||
// In an odd case, the directory has write
|
||||
// and search permissions but not read
|
||||
// permission (d-wx------), and O_SEARCH is
|
||||
// actually O_RDONLY. Then we would be able
|
||||
// to create a new file and only the directory
|
||||
// syncing would be impossible. But let's be
|
||||
// strict about syncing and require users to
|
||||
// explicitly disable it if they don't want it.
|
||||
message_error(_("%s: Opening the directory "
|
||||
"failed: %s"),
|
||||
tuklib_mask_nonprint(dir_name),
|
||||
strerror(errno));
|
||||
free(buf);
|
||||
goto error;
|
||||
}
|
||||
|
||||
free(buf);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __DJGPP__
|
||||
struct stat st;
|
||||
if (stat(pair->dest_name, &st) == 0) {
|
||||
|
|
@ -816,9 +936,9 @@ io_open_dest_real(file_pair *pair)
|
|||
if (st.st_dev == -1) {
|
||||
message_error("%s: Refusing to write to "
|
||||
"a DOS special file",
|
||||
pair->dest_name);
|
||||
free(pair->dest_name);
|
||||
return true;
|
||||
tuklib_mask_nonprint(
|
||||
pair->dest_name));
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Check that we aren't overwriting the source file.
|
||||
|
|
@ -826,9 +946,9 @@ io_open_dest_real(file_pair *pair)
|
|||
&& st.st_ino == pair->src_st.st_ino) {
|
||||
message_error("%s: Output file is the same "
|
||||
"as the input file",
|
||||
pair->dest_name);
|
||||
free(pair->dest_name);
|
||||
return true;
|
||||
tuklib_mask_nonprint(
|
||||
pair->dest_name));
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
@ -836,9 +956,9 @@ io_open_dest_real(file_pair *pair)
|
|||
// If --force was used, unlink the target file first.
|
||||
if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
|
||||
message_error(_("%s: Cannot remove: %s"),
|
||||
pair->dest_name, strerror(errno));
|
||||
free(pair->dest_name);
|
||||
return true;
|
||||
tuklib_mask_nonprint(pair->dest_name),
|
||||
strerror(errno));
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Open the file.
|
||||
|
|
@ -851,11 +971,15 @@ io_open_dest_real(file_pair *pair)
|
|||
pair->dest_fd = open(pair->dest_name, flags, mode);
|
||||
|
||||
if (pair->dest_fd == -1) {
|
||||
message_error(_("%s: %s"), pair->dest_name,
|
||||
message_error(_("%s: %s"),
|
||||
tuklib_mask_nonprint(pair->dest_name),
|
||||
strerror(errno));
|
||||
free(pair->dest_name);
|
||||
return true;
|
||||
goto error;
|
||||
}
|
||||
|
||||
// We could sync dir_fd now and close it. However, performance
|
||||
// can be better if this is delayed until dest_fd has been
|
||||
// synced in io_sync_dest().
|
||||
}
|
||||
|
||||
if (fstat(pair->dest_fd, &pair->dest_st)) {
|
||||
|
|
@ -881,15 +1005,13 @@ io_open_dest_real(file_pair *pair)
|
|||
// With fstat()/_fstat64() it works.
|
||||
else if (pair->dest_fd != STDOUT_FILENO
|
||||
&& !S_ISREG(pair->dest_st.st_mode)) {
|
||||
message_error("%s: Destination is not a regular file",
|
||||
pair->dest_name);
|
||||
message_error(_("%s: Destination is not a regular file"),
|
||||
tuklib_mask_nonprint(pair->dest_name));
|
||||
|
||||
// dest_fd needs to be reset to -1 to keep io_close() working.
|
||||
(void)close(pair->dest_fd);
|
||||
pair->dest_fd = -1;
|
||||
|
||||
free(pair->dest_name);
|
||||
return true;
|
||||
goto error;
|
||||
}
|
||||
#elif !defined(TUKLIB_DOSLIKE)
|
||||
else if (try_sparse && opt_mode == MODE_DECOMPRESS) {
|
||||
|
|
@ -961,6 +1083,18 @@ io_open_dest_real(file_pair *pair)
|
|||
#endif
|
||||
|
||||
return false;
|
||||
|
||||
error:
|
||||
#ifndef TUKLIB_DOSLIKE
|
||||
// io_close() closes pair->dir_fd but let's do it here anyway.
|
||||
if (pair->dir_fd != -1) {
|
||||
(void)close(pair->dir_fd);
|
||||
pair->dir_fd = -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
free(pair->dest_name);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -979,8 +1113,8 @@ io_open_dest(file_pair *pair)
|
|||
/// \param pair File whose dest_fd should be closed
|
||||
/// \param success If false, the file will be removed from the disk.
|
||||
///
|
||||
/// \return Zero if closing succeeds. On error, -1 is returned and
|
||||
/// error message printed.
|
||||
/// \return If closing succeeds, false is returned. On error, an error
|
||||
/// message is printed and true is returned.
|
||||
static bool
|
||||
io_close_dest(file_pair *pair, bool success)
|
||||
{
|
||||
|
|
@ -1003,9 +1137,17 @@ io_close_dest(file_pair *pair, bool success)
|
|||
if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO)
|
||||
return false;
|
||||
|
||||
#ifndef TUKLIB_DOSLIKE
|
||||
// dir_fd was only used for syncing the directory.
|
||||
// Error checking was done when syncing.
|
||||
if (pair->dir_fd != -1)
|
||||
(void)close(pair->dir_fd);
|
||||
#endif
|
||||
|
||||
if (close(pair->dest_fd)) {
|
||||
message_error(_("%s: Closing the file failed: %s"),
|
||||
pair->dest_name, strerror(errno));
|
||||
tuklib_mask_nonprint(pair->dest_name),
|
||||
strerror(errno));
|
||||
|
||||
// Closing destination file failed, so we cannot trust its
|
||||
// contents. Get rid of junk:
|
||||
|
|
@ -1042,7 +1184,8 @@ io_close(file_pair *pair, bool success)
|
|||
SEEK_CUR) == -1) {
|
||||
message_error(_("%s: Seeking failed when trying "
|
||||
"to create a sparse file: %s"),
|
||||
pair->dest_name, strerror(errno));
|
||||
tuklib_mask_nonprint(pair->dest_name),
|
||||
strerror(errno));
|
||||
success = false;
|
||||
} else {
|
||||
const uint8_t zero[1] = { '\0' };
|
||||
|
|
@ -1053,11 +1196,16 @@ io_close(file_pair *pair, bool success)
|
|||
|
||||
signals_block();
|
||||
|
||||
// Copy the file attributes. We need to skip this if destination
|
||||
// file isn't open or it is standard output.
|
||||
if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO)
|
||||
if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) {
|
||||
// Copy the file attributes. This may produce warnings but
|
||||
// not errors so "success" isn't affected.
|
||||
io_copy_attrs(pair);
|
||||
|
||||
// Synchronize the file and its directory if needed.
|
||||
if (opt_synchronous)
|
||||
success = !io_sync_dest(pair);
|
||||
}
|
||||
|
||||
// Close the destination first. If it fails, we must not remove
|
||||
// the source file!
|
||||
if (io_close_dest(pair, success))
|
||||
|
|
@ -1141,7 +1289,8 @@ io_read(file_pair *pair, io_buf *buf, size_t size)
|
|||
#endif
|
||||
|
||||
message_error(_("%s: Read error: %s"),
|
||||
pair->src_name, strerror(errno));
|
||||
tuklib_mask_nonprint(pair->src_name),
|
||||
strerror(errno));
|
||||
|
||||
return SIZE_MAX;
|
||||
}
|
||||
|
|
@ -1171,7 +1320,8 @@ io_seek_src(file_pair *pair, uint64_t pos)
|
|||
|
||||
if (lseek(pair->src_fd, (off_t)(pos), SEEK_SET) == -1) {
|
||||
message_error(_("%s: Error seeking the file: %s"),
|
||||
pair->src_name, strerror(errno));
|
||||
tuklib_mask_nonprint(pair->src_name),
|
||||
strerror(errno));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1195,7 +1345,7 @@ io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos)
|
|||
|
||||
if (amount != size) {
|
||||
message_error(_("%s: Unexpected end of file"),
|
||||
pair->src_name);
|
||||
tuklib_mask_nonprint(pair->src_name));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1240,6 +1390,19 @@ io_write_buf(file_pair *pair, const uint8_t *buf, size_t size)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) && !defined(__CYGWIN__)
|
||||
// On native Windows, broken pipe is reported as
|
||||
// EINVAL. Don't show an error message in this case.
|
||||
// Try: xz -dc bigfile.xz | head -n1
|
||||
if (errno == EINVAL
|
||||
&& pair->dest_fd == STDOUT_FILENO) {
|
||||
// Emulate SIGPIPE by setting user_abort here.
|
||||
user_abort = true;
|
||||
set_exit_status(E_ERROR);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Handle broken pipe specially. gzip and bzip2
|
||||
// don't print anything on SIGPIPE. In addition,
|
||||
// gzip --quiet uses exit status 2 (warning) on
|
||||
|
|
@ -1254,7 +1417,8 @@ io_write_buf(file_pair *pair, const uint8_t *buf, size_t size)
|
|||
// user_abort, and get EPIPE here.
|
||||
if (errno != EPIPE)
|
||||
message_error(_("%s: Write error: %s"),
|
||||
pair->dest_name, strerror(errno));
|
||||
tuklib_mask_nonprint(pair->dest_name),
|
||||
strerror(errno));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -1304,7 +1468,9 @@ io_write(file_pair *pair, const io_buf *buf, size_t size)
|
|||
SEEK_CUR) == -1) {
|
||||
message_error(_("%s: Seeking failed when "
|
||||
"trying to create a sparse "
|
||||
"file: %s"), pair->dest_name,
|
||||
"file: %s"),
|
||||
tuklib_mask_nonprint(
|
||||
pair->dest_name),
|
||||
strerror(errno));
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,6 +55,12 @@ typedef struct {
|
|||
/// File descriptor of the target file
|
||||
int dest_fd;
|
||||
|
||||
#ifndef TUKLIB_DOSLIKE
|
||||
/// File descriptor of the directory of the target file (which is
|
||||
/// also the directory of the source file)
|
||||
int dir_fd;
|
||||
#endif
|
||||
|
||||
/// True once end of the source file has been detected.
|
||||
bool src_eof;
|
||||
|
||||
|
|
@ -177,6 +183,6 @@ extern bool io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos);
|
|||
/// \param buf Buffer containing the data to be written
|
||||
/// \param size Size of the buffer; must be at most IO_BUFFER_SIZE
|
||||
///
|
||||
/// \return On success, zero is returned. On error, -1 is returned
|
||||
/// and error message printed.
|
||||
/// \return On success, false is returned. On error, error message
|
||||
/// is printed and true is returned.
|
||||
extern bool io_write(file_pair *pair, const io_buf *buf, size_t size);
|
||||
|
|
|
|||
|
|
@ -347,13 +347,14 @@ static bool
|
|||
parse_indexes(xz_file_info *xfi, file_pair *pair)
|
||||
{
|
||||
if (pair->src_st.st_size <= 0) {
|
||||
message_error(_("%s: File is empty"), pair->src_name);
|
||||
message_error(_("%s: File is empty"),
|
||||
tuklib_mask_nonprint(pair->src_name));
|
||||
return true;
|
||||
}
|
||||
|
||||
if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
|
||||
message_error(_("%s: Too small to be a valid .xz file"),
|
||||
pair->src_name);
|
||||
tuklib_mask_nonprint(pair->src_name));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -365,7 +366,9 @@ parse_indexes(xz_file_info *xfi, file_pair *pair)
|
|||
hardware_memlimit_get(MODE_LIST),
|
||||
(uint64_t)(pair->src_st.st_size));
|
||||
if (ret != LZMA_OK) {
|
||||
message_error(_("%s: %s"), pair->src_name, message_strm(ret));
|
||||
message_error(_("%s: %s"),
|
||||
tuklib_mask_nonprint(pair->src_name),
|
||||
message_strm(ret));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -411,7 +414,8 @@ parse_indexes(xz_file_info *xfi, file_pair *pair)
|
|||
}
|
||||
|
||||
default:
|
||||
message_error(_("%s: %s"), pair->src_name,
|
||||
message_error(_("%s: %s"),
|
||||
tuklib_mask_nonprint(pair->src_name),
|
||||
message_strm(ret));
|
||||
|
||||
// If the error was too low memory usage limit,
|
||||
|
|
@ -473,7 +477,8 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter,
|
|||
break;
|
||||
|
||||
case LZMA_OPTIONS_ERROR:
|
||||
message_error(_("%s: %s"), pair->src_name,
|
||||
message_error(_("%s: %s"),
|
||||
tuklib_mask_nonprint(pair->src_name),
|
||||
message_strm(LZMA_OPTIONS_ERROR));
|
||||
return true;
|
||||
|
||||
|
|
@ -520,8 +525,7 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter,
|
|||
|
||||
// If the above fails, the file is corrupt so
|
||||
// LZMA_DATA_ERROR is a good error code.
|
||||
|
||||
// Fall through
|
||||
FALLTHROUGH;
|
||||
|
||||
case LZMA_DATA_ERROR:
|
||||
// Free the memory allocated by lzma_block_header_decode().
|
||||
|
|
@ -587,7 +591,8 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter,
|
|||
|
||||
// Check if the stringification succeeded.
|
||||
if (str_ret != LZMA_OK) {
|
||||
message_error(_("%s: %s"), pair->src_name,
|
||||
message_error(_("%s: %s"),
|
||||
tuklib_mask_nonprint(pair->src_name),
|
||||
message_strm(str_ret));
|
||||
return true;
|
||||
}
|
||||
|
|
@ -596,7 +601,8 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter,
|
|||
|
||||
data_error:
|
||||
// Show the error message.
|
||||
message_error(_("%s: %s"), pair->src_name,
|
||||
message_error(_("%s: %s"),
|
||||
tuklib_mask_nonprint(pair->src_name),
|
||||
message_strm(LZMA_DATA_ERROR));
|
||||
return true;
|
||||
}
|
||||
|
|
@ -744,7 +750,7 @@ print_info_basic(const xz_file_info *xfi, file_pair *pair)
|
|||
char checks[CHECKS_STR_SIZE];
|
||||
get_check_names(checks, lzma_index_checks(xfi->idx), false);
|
||||
|
||||
const char *cols[7] = {
|
||||
const char *cols[6] = {
|
||||
uint64_to_str(lzma_index_stream_count(xfi->idx), 0),
|
||||
uint64_to_str(lzma_index_block_count(xfi->idx), 1),
|
||||
uint64_to_nicestr(lzma_index_file_size(xfi->idx),
|
||||
|
|
@ -754,7 +760,6 @@ print_info_basic(const xz_file_info *xfi, file_pair *pair)
|
|||
get_ratio(lzma_index_file_size(xfi->idx),
|
||||
lzma_index_uncompressed_size(xfi->idx)),
|
||||
checks,
|
||||
pair->src_name,
|
||||
};
|
||||
printf("%*s %*s %*s %*s %*s %-*s %s\n",
|
||||
tuklib_mbstr_fw(cols[0], 5), cols[0],
|
||||
|
|
@ -763,7 +768,7 @@ print_info_basic(const xz_file_info *xfi, file_pair *pair)
|
|||
tuklib_mbstr_fw(cols[3], 11), cols[3],
|
||||
tuklib_mbstr_fw(cols[4], 5), cols[4],
|
||||
tuklib_mbstr_fw(cols[5], 7), cols[5],
|
||||
cols[6]);
|
||||
tuklib_mask_nonprint(pair->src_name));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1034,7 +1039,7 @@ print_info_adv(xz_file_info *xfi, file_pair *pair)
|
|||
printf(" %-*s %s\n", COLON_STR(COLON_STR_SIZES_IN_HEADERS),
|
||||
xfi->all_have_sizes ? _("Yes") : _("No"));
|
||||
//printf(" %-*s %s\n", COLON_STR(COLON_STR_MINIMUM_XZ_VERSION),
|
||||
printf(_(" Minimum XZ Utils version: %s\n"),
|
||||
printf(" %s %s\n", _("Minimum XZ Utils version:"),
|
||||
xz_ver_to_str(xfi->min_version));
|
||||
}
|
||||
|
||||
|
|
@ -1048,7 +1053,11 @@ print_info_robot(xz_file_info *xfi, file_pair *pair)
|
|||
char checks[CHECKS_STR_SIZE];
|
||||
get_check_names(checks, lzma_index_checks(xfi->idx), false);
|
||||
|
||||
printf("name\t%s\n", pair->src_name);
|
||||
// Robot mode has to mask at least some control chars to prevent
|
||||
// the output from getting out of sync if filename is malicious.
|
||||
// Masking all non-printable chars is more than we need but
|
||||
// perhaps this is good enough in practice.
|
||||
printf("name\t%s\n", tuklib_mask_nonprint(pair->src_name));
|
||||
|
||||
printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
|
||||
"\t%s\t%s\t%" PRIu64 "\n",
|
||||
|
|
@ -1219,7 +1228,7 @@ print_totals_adv(void)
|
|||
printf(" %-*s %s\n", COLON_STR(COLON_STR_SIZES_IN_HEADERS),
|
||||
totals.all_have_sizes ? _("Yes") : _("No"));
|
||||
//printf(" %-*s %s\n", COLON_STR(COLON_STR_MINIMUM_XZ_VERSION),
|
||||
printf(_(" Minimum XZ Utils version: %s\n"),
|
||||
printf(" %s %s\n", _("Minimum XZ Utils version:"),
|
||||
xz_ver_to_str(totals.min_version));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -87,7 +87,8 @@ read_name(const args_info *args)
|
|||
continue;
|
||||
|
||||
message_error(_("%s: Error reading filenames: %s"),
|
||||
args->files_name, strerror(errno));
|
||||
tuklib_mask_nonprint(args->files_name),
|
||||
strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
@ -95,7 +96,8 @@ read_name(const args_info *args)
|
|||
if (pos != 0)
|
||||
message_error(_("%s: Unexpected end of input "
|
||||
"when reading filenames"),
|
||||
args->files_name);
|
||||
tuklib_mask_nonprint(
|
||||
args->files_name));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -120,7 +122,9 @@ read_name(const args_info *args)
|
|||
message_error(_("%s: Null character found when "
|
||||
"reading filenames; maybe you meant "
|
||||
"to use '--files0' instead "
|
||||
"of '--files'?"), args->files_name);
|
||||
"of '--files'?"),
|
||||
tuklib_mask_nonprint(
|
||||
args->files_name));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
|||
514
src/xz/message.c
514
src/xz/message.c
|
|
@ -11,7 +11,7 @@
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "private.h"
|
||||
|
||||
#include "tuklib_mbstr_wrap.h"
|
||||
#include <stdarg.h>
|
||||
|
||||
|
||||
|
|
@ -196,10 +196,12 @@ print_filename(void)
|
|||
// If we don't know how many files there will be due
|
||||
// to usage of --files or --files0.
|
||||
if (files_total == 0)
|
||||
fprintf(file, "%s (%u)\n", filename,
|
||||
fprintf(file, "%s (%u)\n",
|
||||
tuklib_mask_nonprint(filename),
|
||||
files_pos);
|
||||
else
|
||||
fprintf(file, "%s (%u/%u)\n", filename,
|
||||
fprintf(file, "%s (%u/%u)\n",
|
||||
tuklib_mask_nonprint(filename),
|
||||
files_pos, files_total);
|
||||
|
||||
signals_unblock();
|
||||
|
|
@ -648,7 +650,7 @@ progress_flush(bool finished)
|
|||
cols[4]);
|
||||
} else {
|
||||
// The filename is always printed.
|
||||
fprintf(stderr, _("%s: "), filename);
|
||||
fprintf(stderr, _("%s: "), tuklib_mask_nonprint(filename));
|
||||
|
||||
// Percentage is printed only if we didn't finish yet.
|
||||
if (!finished) {
|
||||
|
|
@ -936,213 +938,360 @@ message_version(void)
|
|||
}
|
||||
|
||||
|
||||
static void
|
||||
detect_wrapping_errors(int error_mask)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
// This might help in catching problematic strings in translations.
|
||||
// It's a debug message so don't translate this.
|
||||
if (error_mask & TUKLIB_WRAP_WARN_OVERLONG)
|
||||
message_fatal("The help text contains overlong lines");
|
||||
#endif
|
||||
|
||||
if (error_mask & ~TUKLIB_WRAP_WARN_OVERLONG)
|
||||
message_fatal(_("Error printing the help text "
|
||||
"(error code %d)"), error_mask);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
extern void
|
||||
message_help(bool long_help)
|
||||
{
|
||||
printf(_("Usage: %s [OPTION]... [FILE]...\n"
|
||||
"Compress or decompress FILEs in the .xz format.\n\n"),
|
||||
progname);
|
||||
static const struct tuklib_wrap_opt wrap0 = { 0, 0, 0, 0, 79 };
|
||||
static const struct tuklib_wrap_opt wrap1 = { 1, 1, 1, 1, 79 };
|
||||
static const struct tuklib_wrap_opt wrap2 = { 2, 2, 22, 22, 79 };
|
||||
static const struct tuklib_wrap_opt wrap3 = { 24, 24, 36, 36, 79 };
|
||||
|
||||
// NOTE: The short help doesn't currently have options that
|
||||
// take arguments.
|
||||
if (long_help)
|
||||
puts(_("Mandatory arguments to long options are mandatory "
|
||||
"for short options too.\n"));
|
||||
// Accumulated error codes from tuklib_wraps() and tuklib_wrapf()
|
||||
int e = 0;
|
||||
|
||||
if (long_help)
|
||||
puts(_(" Operation mode:\n"));
|
||||
printf(_("Usage: %s [OPTION]... [FILE]...\n"), progname);
|
||||
e |= tuklib_wraps(stdout, &wrap0,
|
||||
W_("Compress or decompress FILEs in the .xz format."));
|
||||
putchar('\n');
|
||||
|
||||
puts(_(
|
||||
" -z, --compress force compression\n"
|
||||
" -d, --decompress force decompression\n"
|
||||
" -t, --test test compressed file integrity\n"
|
||||
" -l, --list list information about .xz files"));
|
||||
e |= tuklib_wraps(stdout, &wrap0,
|
||||
W_("Mandatory arguments to long options are "
|
||||
"mandatory for short options too."));
|
||||
putchar('\n');
|
||||
|
||||
if (long_help)
|
||||
puts(_("\n Operation modifiers:\n"));
|
||||
if (long_help) {
|
||||
e |= tuklib_wraps(stdout, &wrap1, W_("Operation mode:"));
|
||||
putchar('\n');
|
||||
}
|
||||
|
||||
puts(_(
|
||||
" -k, --keep keep (don't delete) input files\n"
|
||||
" -f, --force force overwrite of output file and (de)compress links\n"
|
||||
" -c, --stdout write to standard output and don't delete input files"));
|
||||
e |= tuklib_wrapf(stdout, &wrap2,
|
||||
"-z, --compress\v%s\r"
|
||||
"-d, --decompress\v%s\r"
|
||||
"-t, --test\v%s\r"
|
||||
"-l, --list\v%s",
|
||||
W_("force compression"),
|
||||
W_("force decompression"),
|
||||
W_("test compressed file integrity"),
|
||||
W_("list information about .xz files"));
|
||||
|
||||
if (long_help) {
|
||||
putchar('\n');
|
||||
e |= tuklib_wraps(stdout, &wrap1, W_("Operation modifiers:"));
|
||||
putchar('\n');
|
||||
}
|
||||
|
||||
e |= tuklib_wrapf(stdout, &wrap2,
|
||||
"-k, --keep\v%s\r"
|
||||
"-f, --force\v%s\r"
|
||||
"-c, --stdout\v%s",
|
||||
W_("keep (don't delete) input files"),
|
||||
W_("force overwrite of output file and (de)compress links"),
|
||||
W_("write to standard output and don't delete input files"));
|
||||
// NOTE: --to-stdout isn't included above because it's not
|
||||
// the recommended spelling. It was copied from gzip but other
|
||||
// compressors with gzip-like syntax don't support it.
|
||||
|
||||
if (long_help) {
|
||||
puts(_(
|
||||
" --single-stream decompress only the first stream, and silently\n"
|
||||
" ignore possible remaining input data"));
|
||||
puts(_(
|
||||
" --no-sparse do not create sparse files when decompressing\n"
|
||||
" -S, --suffix=.SUF use the suffix '.SUF' on compressed files\n"
|
||||
" --files[=FILE] read filenames to process from FILE; if FILE is\n"
|
||||
" omitted, filenames are read from the standard input;\n"
|
||||
" filenames must be terminated with the newline character\n"
|
||||
" --files0[=FILE] like --files but use the null character as terminator"));
|
||||
e |= tuklib_wrapf(stdout, &wrap2,
|
||||
" --no-sync\v%s\r"
|
||||
" --single-stream\v%s\r"
|
||||
" --no-sparse\v%s\r"
|
||||
"-S, --suffix=%s\v%s\r"
|
||||
" --files[=%s]\v%s\r"
|
||||
" --files0[=%s]\v%s\r",
|
||||
W_("don't synchronize the output file to the storage "
|
||||
"device before removing the input file"),
|
||||
W_("decompress only the first stream, and silently "
|
||||
"ignore possible remaining input data"),
|
||||
W_("do not create sparse files when decompressing"),
|
||||
_(".SUF"),
|
||||
W_("use the suffix '.SUF' on compressed files"),
|
||||
_("FILE"),
|
||||
W_("read filenames to process from FILE; "
|
||||
"if FILE is omitted, "
|
||||
"filenames are read from the standard input; "
|
||||
"filenames must be terminated with "
|
||||
"the newline character"),
|
||||
_("FILE"),
|
||||
W_("like --files but use the null character as "
|
||||
"terminator"));
|
||||
|
||||
e |= tuklib_wraps(stdout, &wrap1,
|
||||
W_("Basic file format and compression options:"));
|
||||
|
||||
e |= tuklib_wrapf(stdout, &wrap2,
|
||||
"\n"
|
||||
"-F, --format=%s\v%s\r"
|
||||
"-C, --check=%s\v%s\r"
|
||||
" --ignore-check\v%s",
|
||||
_("FORMAT"),
|
||||
W_("file format to encode or decode; possible values "
|
||||
"are 'auto' (default), 'xz', 'lzma', 'lzip', "
|
||||
"and 'raw'"),
|
||||
_("NAME"),
|
||||
W_("integrity check type: 'none' (use with caution), "
|
||||
"'crc32', 'crc64' (default), or 'sha256'"),
|
||||
W_("don't verify the integrity check when "
|
||||
"decompressing"));
|
||||
}
|
||||
|
||||
e |= tuklib_wrapf(stdout, &wrap2,
|
||||
"-0 ... -9\v%s\r"
|
||||
"-e, --extreme\v%s\r"
|
||||
"-T, --threads=%s\v%s",
|
||||
W_("compression preset; default is 6; take compressor *and* "
|
||||
"decompressor memory usage into account before "
|
||||
"using 7-9!"),
|
||||
W_("try to improve compression ratio by using more CPU time; "
|
||||
"does not affect decompressor memory requirements"),
|
||||
// TRANSLATORS: Short for NUMBER. A longer string is fine but
|
||||
// wider than 5 columns makes --long-help a few lines longer.
|
||||
_("NUM"),
|
||||
W_("use at most NUM threads; the default is 0 which uses "
|
||||
"as many threads as there are processor cores"));
|
||||
|
||||
if (long_help) {
|
||||
e |= tuklib_wrapf(stdout, &wrap2,
|
||||
" --block-size=%s\v%s\r"
|
||||
" --block-list=%s\v%s\r"
|
||||
" --flush-timeout=%s\v%s",
|
||||
_("SIZE"),
|
||||
W_("start a new .xz block after every SIZE bytes "
|
||||
"of input; use this to set the block size "
|
||||
"for threaded compression"),
|
||||
_("BLOCKS"),
|
||||
W_("start a new .xz block after the given "
|
||||
"comma-separated intervals of uncompressed "
|
||||
"data; optionally, specify a "
|
||||
"filter chain number (0-9) followed by "
|
||||
"a ':' before the uncompressed data size"),
|
||||
_("NUM"),
|
||||
W_("when compressing, if more than NUM "
|
||||
"milliseconds has passed since the previous "
|
||||
"flush and reading more input would block, "
|
||||
"all pending data is flushed out"));
|
||||
|
||||
e |= tuklib_wrapf(stdout, &wrap2,
|
||||
" --memlimit-compress=%s\n"
|
||||
" --memlimit-decompress=%s\n"
|
||||
" --memlimit-mt-decompress=%s\n"
|
||||
"-M, --memlimit=%s\v%s\r"
|
||||
" --no-adjust\v%s",
|
||||
_("LIMIT"),
|
||||
_("LIMIT"),
|
||||
_("LIMIT"),
|
||||
_("LIMIT"),
|
||||
// xgettext:no-c-format
|
||||
W_("set memory usage limit for compression, "
|
||||
"decompression, threaded decompression, "
|
||||
"or all of these; LIMIT is in "
|
||||
"bytes, % of RAM, or 0 for defaults"),
|
||||
W_("if compression settings exceed the "
|
||||
"memory usage limit, "
|
||||
"give an error instead of adjusting "
|
||||
"the settings downwards"));
|
||||
}
|
||||
|
||||
if (long_help) {
|
||||
puts(_("\n Basic file format and compression options:\n"));
|
||||
puts(_(
|
||||
" -F, --format=FMT file format to encode or decode; possible values are\n"
|
||||
" 'auto' (default), 'xz', 'lzma', 'lzip', and 'raw'\n"
|
||||
" -C, --check=CHECK integrity check type: 'none' (use with caution),\n"
|
||||
" 'crc32', 'crc64' (default), or 'sha256'"));
|
||||
puts(_(
|
||||
" --ignore-check don't verify the integrity check when decompressing"));
|
||||
}
|
||||
putchar('\n');
|
||||
|
||||
puts(_(
|
||||
" -0 ... -9 compression preset; default is 6; take compressor *and*\n"
|
||||
" decompressor memory usage into account before using 7-9!"));
|
||||
e |= tuklib_wraps(stdout, &wrap1,
|
||||
W_("Custom filter chain for compression "
|
||||
"(an alternative to using presets):"));
|
||||
|
||||
puts(_(
|
||||
" -e, --extreme try to improve compression ratio by using more CPU time;\n"
|
||||
" does not affect decompressor memory requirements"));
|
||||
|
||||
puts(_(
|
||||
" -T, --threads=NUM use at most NUM threads; the default is 0 which uses\n"
|
||||
" as many threads as there are processor cores"));
|
||||
|
||||
if (long_help) {
|
||||
puts(_(
|
||||
" --block-size=SIZE\n"
|
||||
" start a new .xz block after every SIZE bytes of input;\n"
|
||||
" use this to set the block size for threaded compression"));
|
||||
puts(_(
|
||||
" --block-list=BLOCKS\n"
|
||||
" start a new .xz block after the given comma-separated\n"
|
||||
" intervals of uncompressed data; optionally, specify a\n"
|
||||
" filter chain number (0-9) followed by a ':' before the\n"
|
||||
" uncompressed data size"));
|
||||
puts(_(
|
||||
" --flush-timeout=TIMEOUT\n"
|
||||
" when compressing, if more than TIMEOUT milliseconds has\n"
|
||||
" passed since the previous flush and reading more input\n"
|
||||
" would block, all pending data is flushed out"
|
||||
));
|
||||
puts(_( // xgettext:no-c-format
|
||||
" --memlimit-compress=LIMIT\n"
|
||||
" --memlimit-decompress=LIMIT\n"
|
||||
" --memlimit-mt-decompress=LIMIT\n"
|
||||
" -M, --memlimit=LIMIT\n"
|
||||
" set memory usage limit for compression, decompression,\n"
|
||||
" threaded decompression, or all of these; LIMIT is in\n"
|
||||
" bytes, % of RAM, or 0 for defaults"));
|
||||
|
||||
puts(_(
|
||||
" --no-adjust if compression settings exceed the memory usage limit,\n"
|
||||
" give an error instead of adjusting the settings downwards"));
|
||||
}
|
||||
|
||||
if (long_help) {
|
||||
puts(_(
|
||||
"\n Custom filter chain for compression (alternative for using presets):"));
|
||||
|
||||
puts(_(
|
||||
"\n"
|
||||
" --filters=FILTERS set the filter chain using the liblzma filter string\n"
|
||||
" syntax; use --filters-help for more information"
|
||||
));
|
||||
|
||||
puts(_(
|
||||
" --filters1=FILTERS ... --filters9=FILTERS\n"
|
||||
" set additional filter chains using the liblzma filter\n"
|
||||
" string syntax to use with --block-list"
|
||||
));
|
||||
|
||||
puts(_(
|
||||
" --filters-help display more information about the liblzma filter string\n"
|
||||
" syntax and exit."
|
||||
));
|
||||
e |= tuklib_wrapf(stdout, &wrap2,
|
||||
"\n"
|
||||
"--filters=%s\v%s\r"
|
||||
"--filters1=%s ... --filters9=%s\v%s\r"
|
||||
"--filters-help\v%s",
|
||||
_("FILTERS"),
|
||||
W_("set the filter chain using the "
|
||||
"liblzma filter string syntax; "
|
||||
"use --filters-help for more information"),
|
||||
_("FILTERS"),
|
||||
_("FILTERS"),
|
||||
W_("set additional filter chains using the "
|
||||
"liblzma filter string syntax to use "
|
||||
"with --block-list"),
|
||||
W_("display more information about the "
|
||||
"liblzma filter string syntax and exit"));
|
||||
|
||||
#if defined(HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) \
|
||||
|| defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
|
||||
// TRANSLATORS: The word "literal" in "literal context bits"
|
||||
// means how many "context bits" to use when encoding
|
||||
// literals. A literal is a single 8-bit byte. It doesn't
|
||||
// mean "literally" here.
|
||||
puts(_(
|
||||
"\n"
|
||||
" --lzma1[=OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n"
|
||||
" --lzma2[=OPTS] more of the following options (valid values; default):\n"
|
||||
" preset=PRE reset options to a preset (0-9[e])\n"
|
||||
" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n"
|
||||
" lc=NUM number of literal context bits (0-4; 3)\n"
|
||||
" lp=NUM number of literal position bits (0-4; 0)\n"
|
||||
" pb=NUM number of position bits (0-4; 2)\n"
|
||||
" mode=MODE compression mode (fast, normal; normal)\n"
|
||||
" nice=NUM nice length of a match (2-273; 64)\n"
|
||||
" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n"
|
||||
" depth=NUM maximum search depth; 0=automatic (default)"));
|
||||
e |= tuklib_wrapf(stdout, &wrap2,
|
||||
"\n"
|
||||
"--lzma1[=%s]\n"
|
||||
"--lzma2[=%s]\v%s",
|
||||
// TRANSLATORS: Short for OPTIONS.
|
||||
_("OPTS"),
|
||||
_("OPTS"),
|
||||
// TRANSLATORS: Use semicolon (or its fullwidth form)
|
||||
// in "(valid values; default)" even if it is weird in
|
||||
// your language. There are non-translatable strings
|
||||
// that look like "(foo, bar, baz; foo)" which list
|
||||
// the supported values and the default value.
|
||||
W_("LZMA1 or LZMA2; OPTS is a comma-separated list "
|
||||
"of zero or more of the following options "
|
||||
"(valid values; default):"));
|
||||
|
||||
e |= tuklib_wrapf(stdout, &wrap3,
|
||||
"preset=%s\v%s (0-9[e])\r"
|
||||
"dict=%s\v%s \b(4KiB - 1536MiB; 8MiB)\b\r"
|
||||
"lc=%s\v%s \b(0-4; 3)\b\r"
|
||||
"lp=%s\v%s \b(0-4; 0)\b\r"
|
||||
"pb=%s\v%s \b(0-4; 2)\b\r"
|
||||
"mode=%s\v%s (fast, normal; normal)\r"
|
||||
"nice=%s\v%s \b(2-273; 64)\b\r"
|
||||
"mf=%s\v%s (hc3, hc4, bt2, bt3, bt4; bt4)\r"
|
||||
"depth=%s\v%s",
|
||||
// TRANSLATORS: Short for PRESET. A longer string is
|
||||
// fine but wider than 4 columns makes --long-help
|
||||
// one line longer.
|
||||
_("PRE"),
|
||||
W_("reset options to a preset"),
|
||||
_("NUM"), W_("dictionary size"),
|
||||
_("NUM"),
|
||||
// TRANSLATORS: The word "literal" in "literal context
|
||||
// bits" means how many "context bits" to use when
|
||||
// encoding literals. A literal is a single 8-bit
|
||||
// byte. It doesn't mean "literally" here.
|
||||
W_("number of literal context bits"),
|
||||
_("NUM"), W_("number of literal position bits"),
|
||||
_("NUM"), W_("number of position bits"),
|
||||
_("MODE"), W_("compression mode"),
|
||||
_("NUM"), W_("nice length of a match"),
|
||||
_("NAME"), W_("match finder"),
|
||||
_("NUM"), W_("maximum search depth; "
|
||||
"0=automatic (default)"));
|
||||
#endif
|
||||
|
||||
puts(_(
|
||||
"\n"
|
||||
" --x86[=OPTS] x86 BCJ filter (32-bit and 64-bit)\n"
|
||||
" --arm[=OPTS] ARM BCJ filter\n"
|
||||
" --armthumb[=OPTS] ARM-Thumb BCJ filter\n"
|
||||
" --arm64[=OPTS] ARM64 BCJ filter\n"
|
||||
" --powerpc[=OPTS] PowerPC BCJ filter (big endian only)\n"
|
||||
" --ia64[=OPTS] IA-64 (Itanium) BCJ filter\n"
|
||||
" --sparc[=OPTS] SPARC BCJ filter\n"
|
||||
" --riscv[=OPTS] RISC-V BCJ filter\n"
|
||||
" Valid OPTS for all BCJ filters:\n"
|
||||
" start=NUM start offset for conversions (default=0)"));
|
||||
e |= tuklib_wrapf(stdout, &wrap2,
|
||||
"\n"
|
||||
"--x86[=%s]\v%s\r"
|
||||
"--arm[=%s]\v%s\r"
|
||||
"--armthumb[=%s]\v%s\r"
|
||||
"--arm64[=%s]\v%s\r"
|
||||
"--powerpc[=%s]\v%s\r"
|
||||
"--ia64[=%s]\v%s\r"
|
||||
"--sparc[=%s]\v%s\r"
|
||||
"--riscv[=%s]\v%s\r"
|
||||
"\v%s",
|
||||
_("OPTS"),
|
||||
W_("x86 BCJ filter (32-bit and 64-bit)"),
|
||||
_("OPTS"),
|
||||
W_("ARM BCJ filter"),
|
||||
_("OPTS"),
|
||||
W_("ARM-Thumb BCJ filter"),
|
||||
_("OPTS"),
|
||||
W_("ARM64 BCJ filter"),
|
||||
_("OPTS"),
|
||||
W_("PowerPC BCJ filter (big endian only)"),
|
||||
_("OPTS"),
|
||||
W_("IA-64 (Itanium) BCJ filter"),
|
||||
_("OPTS"),
|
||||
W_("SPARC BCJ filter"),
|
||||
_("OPTS"),
|
||||
W_("RISC-V BCJ filter"),
|
||||
W_("Valid OPTS for all BCJ filters:"));
|
||||
e |= tuklib_wrapf(stdout, &wrap3,
|
||||
"start=%s\v%s",
|
||||
_("NUM"),
|
||||
W_("start offset for conversions (default=0)"));
|
||||
|
||||
#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
|
||||
puts(_(
|
||||
"\n"
|
||||
" --delta[=OPTS] Delta filter; valid OPTS (valid values; default):\n"
|
||||
" dist=NUM distance between bytes being subtracted\n"
|
||||
" from each other (1-256; 1)"));
|
||||
e |= tuklib_wrapf(stdout, &wrap2,
|
||||
"\n"
|
||||
"--delta[=%s]\v%s",
|
||||
_("OPTS"),
|
||||
W_("Delta filter; valid OPTS "
|
||||
"(valid values; default):"));
|
||||
e |= tuklib_wrapf(stdout, &wrap3,
|
||||
"dist=%s\v%s \b(1-256; 1)\b",
|
||||
_("NUM"),
|
||||
W_("distance between bytes being subtracted "
|
||||
"from each other"));
|
||||
#endif
|
||||
}
|
||||
|
||||
if (long_help)
|
||||
puts(_("\n Other options:\n"));
|
||||
|
||||
puts(_(
|
||||
" -q, --quiet suppress warnings; specify twice to suppress errors too\n"
|
||||
" -v, --verbose be verbose; specify twice for even more verbose"));
|
||||
|
||||
if (long_help) {
|
||||
puts(_(
|
||||
" -Q, --no-warn make warnings not affect the exit status"));
|
||||
puts(_(
|
||||
" --robot use machine-parsable messages (useful for scripts)"));
|
||||
puts("");
|
||||
puts(_(
|
||||
" --info-memory display the total amount of RAM and the currently active\n"
|
||||
" memory usage limits, and exit"));
|
||||
puts(_(
|
||||
" -h, --help display the short help (lists only the basic options)\n"
|
||||
" -H, --long-help display this long help and exit"));
|
||||
} else {
|
||||
puts(_(
|
||||
" -h, --help display this short help and exit\n"
|
||||
" -H, --long-help display the long help (lists also the advanced options)"));
|
||||
putchar('\n');
|
||||
e |= tuklib_wraps(stdout, &wrap1, W_("Other options:"));
|
||||
putchar('\n');
|
||||
}
|
||||
|
||||
puts(_(
|
||||
" -V, --version display the version number and exit"));
|
||||
e |= tuklib_wrapf(stdout, &wrap2,
|
||||
"-q, --quiet\v%s\r"
|
||||
"-v, --verbose\v%s",
|
||||
W_("suppress warnings; specify twice to suppress errors too"),
|
||||
W_("be verbose; specify twice for even more verbose"));
|
||||
|
||||
puts(_("\nWith no FILE, or when FILE is -, read standard input.\n"));
|
||||
if (long_help) {
|
||||
e |= tuklib_wrapf(stdout, &wrap2,
|
||||
"-Q, --no-warn\v%s\r"
|
||||
" --robot\v%s\r"
|
||||
"\n"
|
||||
" --info-memory\v%s\r"
|
||||
"-h, --help\v%s\r"
|
||||
"-H, --long-help\v%s",
|
||||
W_("make warnings not affect the exit status"),
|
||||
W_("use machine-parsable messages (useful for scripts)"),
|
||||
W_("display the total amount of RAM and the currently active "
|
||||
"memory usage limits, and exit"),
|
||||
W_("display the short help (lists only the basic options)"),
|
||||
W_("display this long help and exit"));
|
||||
} else {
|
||||
e |= tuklib_wrapf(stdout, &wrap2,
|
||||
"-h, --help\v%s\r"
|
||||
"-H, --long-help\v%s",
|
||||
W_("display this short help and exit"),
|
||||
W_("display the long help (lists also the advanced options)"));
|
||||
}
|
||||
|
||||
// TRANSLATORS: This message indicates the bug reporting address
|
||||
// for this package. Please add _another line_ saying
|
||||
// "Report translation bugs to <...>\n" with the email or WWW
|
||||
// address for translation bugs. Thanks.
|
||||
printf(_("Report bugs to <%s> (in English or Finnish).\n"),
|
||||
PACKAGE_BUGREPORT);
|
||||
printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
|
||||
e |= tuklib_wrapf(stdout, &wrap2, "-V, --version\v%s",
|
||||
W_("display the version number and exit"));
|
||||
|
||||
putchar('\n');
|
||||
e |= tuklib_wraps(stdout, &wrap0,
|
||||
W_("With no FILE, or when FILE is -, read standard input."));
|
||||
putchar('\n');
|
||||
|
||||
e |= tuklib_wrapf(stdout, &wrap0,
|
||||
// TRANSLATORS: This message indicates the bug reporting
|
||||
// address for this package. Please add another line saying
|
||||
// "\nReport translation bugs to <...>." with the email or WWW
|
||||
// address for translation bugs. Thanks!
|
||||
W_("Report bugs to <%s> (in English or Finnish)."),
|
||||
PACKAGE_BUGREPORT);
|
||||
|
||||
e |= tuklib_wrapf(stdout, &wrap0,
|
||||
// TRANSLATORS: The first %s is the name of this software.
|
||||
// The second <%s> is an URL.
|
||||
W_("%s home page: <%s>"), PACKAGE_NAME, PACKAGE_URL);
|
||||
|
||||
#if LZMA_VERSION_STABILITY != LZMA_VERSION_STABILITY_STABLE
|
||||
puts(_(
|
||||
e |= tuklib_wraps(stdout, &wrap0, W_(
|
||||
"THIS IS A DEVELOPMENT VERSION NOT INTENDED FOR PRODUCTION USE."));
|
||||
#endif
|
||||
|
||||
detect_wrapping_errors(e);
|
||||
tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT);
|
||||
}
|
||||
|
||||
|
|
@ -1150,20 +1299,25 @@ message_help(bool long_help)
|
|||
extern void
|
||||
message_filters_help(void)
|
||||
{
|
||||
static const struct tuklib_wrap_opt wrap = { .right_margin = 76 };
|
||||
|
||||
char *encoder_options;
|
||||
if (lzma_str_list_filters(&encoder_options, LZMA_VLI_UNKNOWN,
|
||||
LZMA_STR_ENCODER, NULL) != LZMA_OK)
|
||||
message_bug();
|
||||
|
||||
if (!opt_robot) {
|
||||
puts(_(
|
||||
"Filter chains are set using the --filters=FILTERS or\n"
|
||||
"--filters1=FILTERS ... --filters9=FILTERS options. Each filter in the chain\n"
|
||||
"can be separated by spaces or '--'. Alternatively a preset <0-9>[e] can be\n"
|
||||
"specified instead of a filter chain.\n"
|
||||
));
|
||||
int e = tuklib_wrapf(stdout, &wrap,
|
||||
W_("Filter chains are set using the --filters=FILTERS or "
|
||||
"--filters1=FILTERS ... --filters9=FILTERS options. "
|
||||
"Each filter in the chain can be separated by spaces or '--'. "
|
||||
"Alternatively a preset %s can be specified instead of a filter chain."),
|
||||
"<0-9>[e]");
|
||||
putchar('\n');
|
||||
e |= tuklib_wraps(stdout, &wrap,
|
||||
W_("The supported filters and their options are:"));
|
||||
|
||||
puts(_("The supported filters and their options are:"));
|
||||
detect_wrapping_errors(e);
|
||||
}
|
||||
|
||||
puts(encoder_options);
|
||||
|
|
|
|||
|
|
@ -82,15 +82,16 @@ parse_options(const char *str, const option_map *opts,
|
|||
*value++ = '\0';
|
||||
|
||||
if (value == NULL || value[0] == '\0')
|
||||
message_fatal(_("%s: Options must be 'name=value' "
|
||||
"pairs separated with commas"), str);
|
||||
message_fatal(_("%s: %s"), tuklib_mask_nonprint(str),
|
||||
_("Options must be 'name=value' "
|
||||
"pairs separated with commas"));
|
||||
|
||||
// Look for the option name from the option map.
|
||||
unsigned i = 0;
|
||||
while (true) {
|
||||
if (opts[i].name == NULL)
|
||||
message_fatal(_("%s: Invalid option name"),
|
||||
name);
|
||||
tuklib_mask_nonprint(name));
|
||||
|
||||
if (strcmp(name, opts[i].name) == 0)
|
||||
break;
|
||||
|
|
@ -109,8 +110,9 @@ parse_options(const char *str, const option_map *opts,
|
|||
}
|
||||
|
||||
if (opts[i].map[j].name == NULL)
|
||||
message_fatal(_("%s: Invalid option value"),
|
||||
value);
|
||||
message_fatal(_("%s: %s"),
|
||||
tuklib_mask_nonprint(value),
|
||||
_("Invalid option value"));
|
||||
|
||||
set(filter_options, i, opts[i].map[j].id, value);
|
||||
|
||||
|
|
@ -244,7 +246,8 @@ tuklib_attr_noreturn
|
|||
static void
|
||||
error_lzma_preset(const char *valuestr)
|
||||
{
|
||||
message_fatal(_("Unsupported LZMA1/LZMA2 preset: %s"), valuestr);
|
||||
message_fatal(_("Unsupported LZMA1/LZMA2 preset: %s"),
|
||||
tuklib_mask_nonprint(valuestr));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@
|
|||
#include "tuklib_gettext.h"
|
||||
#include "tuklib_progname.h"
|
||||
#include "tuklib_exit.h"
|
||||
#include "tuklib_mbstr_nonprint.h"
|
||||
#include "tuklib_mbstr.h"
|
||||
|
||||
#if defined(_WIN32) && !defined(__CYGWIN__)
|
||||
|
|
|
|||
|
|
@ -115,26 +115,7 @@ sandbox_enable_strict_if_allowed(int src_fd lzma_attribute((__unused__)),
|
|||
// Landlock //
|
||||
//////////////
|
||||
|
||||
#include <linux/landlock.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
|
||||
// Highest Landlock ABI version supported by this file:
|
||||
// - For ABI versions 1-3 we don't need anything from <linux/landlock.h>
|
||||
// that isn't part of version 1.
|
||||
// - For ABI version 4 we need the larger struct landlock_ruleset_attr
|
||||
// with the handled_access_net member. That is bundled with the macros
|
||||
// LANDLOCK_ACCESS_NET_BIND_TCP and LANDLOCK_ACCESS_NET_CONNECT_TCP.
|
||||
#ifdef LANDLOCK_ACCESS_NET_BIND_TCP
|
||||
# define LANDLOCK_ABI_MAX 4
|
||||
#else
|
||||
# define LANDLOCK_ABI_MAX 3
|
||||
#endif
|
||||
|
||||
|
||||
/// Landlock ABI version supported by the kernel
|
||||
static int landlock_abi;
|
||||
#include "my_landlock.h"
|
||||
|
||||
|
||||
// The required_rights should have those bits set that must not be restricted.
|
||||
|
|
@ -144,40 +125,19 @@ static int landlock_abi;
|
|||
static void
|
||||
enable_landlock(uint64_t required_rights)
|
||||
{
|
||||
assert(landlock_abi <= LANDLOCK_ABI_MAX);
|
||||
|
||||
if (landlock_abi <= 0)
|
||||
// Initialize the ruleset to forbid all actions that the available
|
||||
// Landlock ABI version supports. Return if Landlock isn't supported
|
||||
// at all.
|
||||
struct landlock_ruleset_attr attr;
|
||||
if (my_landlock_ruleset_attr_forbid_all(&attr) == -1)
|
||||
return;
|
||||
|
||||
// We want to set all supported flags in handled_access_fs.
|
||||
// This way the ruleset will initially forbid access to all
|
||||
// actions that the available Landlock ABI version supports.
|
||||
// Exceptions can be added using landlock_add_rule(2) to
|
||||
// allow certain actions on certain files or directories.
|
||||
//
|
||||
// The same flag values are used on all archs. ABI v2 and v3
|
||||
// both add one new flag.
|
||||
//
|
||||
// First in ABI v1: LANDLOCK_ACCESS_FS_EXECUTE = 1ULL << 0
|
||||
// Last in ABI v1: LANDLOCK_ACCESS_FS_MAKE_SYM = 1ULL << 12
|
||||
// Last in ABI v2: LANDLOCK_ACCESS_FS_REFER = 1ULL << 13
|
||||
// Last in ABI v3: LANDLOCK_ACCESS_FS_TRUNCATE = 1ULL << 14
|
||||
//
|
||||
// This makes it simple to set the mask based on the ABI
|
||||
// version and we don't need to care which flags are #defined
|
||||
// in the installed <linux/landlock.h> for ABI versions 1-3.
|
||||
const struct landlock_ruleset_attr attr = {
|
||||
.handled_access_fs = ~required_rights
|
||||
& ((1ULL << (12 + my_min(3, landlock_abi))) - 1),
|
||||
#if LANDLOCK_ABI_MAX >= 4
|
||||
.handled_access_net = landlock_abi < 4 ? 0 :
|
||||
(LANDLOCK_ACCESS_NET_BIND_TCP
|
||||
| LANDLOCK_ACCESS_NET_CONNECT_TCP),
|
||||
#endif
|
||||
};
|
||||
// Allow the required rights.
|
||||
attr.handled_access_fs &= ~required_rights;
|
||||
|
||||
const int ruleset_fd = syscall(SYS_landlock_create_ruleset,
|
||||
&attr, sizeof(attr), 0U);
|
||||
// Create the ruleset in the kernel. This shouldn't fail.
|
||||
const int ruleset_fd = my_landlock_create_ruleset(
|
||||
&attr, sizeof(attr), 0);
|
||||
if (ruleset_fd < 0)
|
||||
message_fatal(_("Failed to enable the sandbox"));
|
||||
|
||||
|
|
@ -193,9 +153,10 @@ enable_landlock(uint64_t required_rights)
|
|||
//
|
||||
// prctl(PR_SET_NO_NEW_PRIVS, ...) was already called in
|
||||
// sandbox_init() so we don't do it here again.
|
||||
if (syscall(SYS_landlock_restrict_self, ruleset_fd, 0U) != 0)
|
||||
if (my_landlock_restrict_self(ruleset_fd, 0) != 0)
|
||||
message_fatal(_("Failed to enable the sandbox"));
|
||||
|
||||
(void)close(ruleset_fd);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -213,19 +174,14 @@ sandbox_init(void)
|
|||
// fails here the error will still be detected when it matters.
|
||||
(void)prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
|
||||
// Get the highest Landlock ABI version supported by the kernel.
|
||||
landlock_abi = syscall(SYS_landlock_create_ruleset,
|
||||
(void *)NULL, 0, LANDLOCK_CREATE_RULESET_VERSION);
|
||||
|
||||
// The kernel might support a newer ABI than this file.
|
||||
if (landlock_abi > LANDLOCK_ABI_MAX)
|
||||
landlock_abi = LANDLOCK_ABI_MAX;
|
||||
|
||||
// These are all in ABI version 1 already. We don't need truncate
|
||||
// rights because files are created with open() using O_EXCL and
|
||||
// without O_TRUNC.
|
||||
//
|
||||
// LANDLOCK_ACCESS_FS_READ_DIR is included here to get a clear error
|
||||
// LANDLOCK_ACCESS_FS_READ_DIR is required to synchronize the
|
||||
// directory before removing the source file.
|
||||
//
|
||||
// LANDLOCK_ACCESS_FS_READ_DIR is also helpful to show a clear error
|
||||
// message if xz is given a directory name. Without this permission
|
||||
// the message would be "Permission denied" but with this permission
|
||||
// it's "Is a directory, skipping". It could be worked around with
|
||||
|
|
|
|||
|
|
@ -163,7 +163,7 @@ uncompressed_name(const char *src_name, const size_t src_len)
|
|||
|
||||
if (new_len == 0) {
|
||||
message_warning(_("%s: Filename has an unknown suffix, "
|
||||
"skipping"), src_name);
|
||||
"skipping"), tuklib_mask_nonprint(src_name));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
@ -178,13 +178,14 @@ uncompressed_name(const char *src_name, const size_t src_len)
|
|||
}
|
||||
|
||||
|
||||
/// This message is needed in multiple places in compressed_name(),
|
||||
/// so the message has been put into its own function.
|
||||
static void
|
||||
msg_suffix(const char *src_name, const char *suffix)
|
||||
{
|
||||
char *mem = NULL;
|
||||
message_warning(_("%s: File already has '%s' suffix, skipping"),
|
||||
src_name, suffix);
|
||||
tuklib_mask_nonprint(src_name),
|
||||
tuklib_mask_nonprint_r(suffix, &mem));
|
||||
free(mem);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -390,7 +391,8 @@ suffix_set(const char *suffix)
|
|||
// Empty suffix and suffixes having a directory separator are
|
||||
// rejected. Such suffixes would break things later.
|
||||
if (suffix[0] == '\0' || has_dir_sep(suffix))
|
||||
message_fatal(_("%s: Invalid filename suffix"), suffix);
|
||||
message_fatal(_("%s: Invalid filename suffix"),
|
||||
tuklib_mask_nonprint(suffix));
|
||||
|
||||
// Replace the old custom_suffix (if any) with the new suffix.
|
||||
free(custom_suffix);
|
||||
|
|
|
|||
|
|
@ -25,7 +25,11 @@ static char bufs[4][128];
|
|||
// for DJGPP builds.
|
||||
//
|
||||
// MSVC doesn't support thousand separators.
|
||||
#if defined(__DJGPP__) || defined(_MSC_VER)
|
||||
//
|
||||
// MinGW-w64 supports thousand separators only with its own stdio functions
|
||||
// which our sysdefs.h disables when _UCRT && HAVE_SMALL.
|
||||
#if defined(__DJGPP__) || defined(_MSC_VER) \
|
||||
|| (defined(__MINGW32__) && __USE_MINGW_ANSI_STDIO == 0)
|
||||
# define FORMAT_THOUSAND_SEP(prefix, suffix) prefix suffix
|
||||
# define check_thousand_sep(slot) do { } while (0)
|
||||
#else
|
||||
|
|
@ -103,8 +107,8 @@ str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max)
|
|||
return max;
|
||||
|
||||
if (*value < '0' || *value > '9')
|
||||
message_fatal(_("%s: Value is not a non-negative "
|
||||
"decimal integer"), value);
|
||||
message_fatal(_("%s: %s"), value,
|
||||
_("Value is not a non-negative decimal integer"));
|
||||
|
||||
do {
|
||||
// Don't overflow.
|
||||
|
|
|
|||
94
src/xz/xz.1
94
src/xz/xz.1
|
|
@ -4,7 +4,7 @@
|
|||
.\" Authors: Lasse Collin
|
||||
.\" Jia Tan
|
||||
.\"
|
||||
.TH XZ 1 "2024-04-08" "Tukaani" "XZ Utils"
|
||||
.TH XZ 1 "2025-03-08" "Tukaani" "XZ Utils"
|
||||
.
|
||||
.SH NAME
|
||||
xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
|
||||
|
|
@ -237,6 +237,8 @@ The memory usage limiter can be enabled with
|
|||
the command line option \fB\-\-memlimit=\fIlimit\fR.
|
||||
Often it is more convenient to enable the limiter
|
||||
by default by setting the environment variable
|
||||
.\" TRANSLATORS: Don't translate the uppercase XZ_DEFAULTS.
|
||||
.\" It's a name of an environment variable.
|
||||
.BR XZ_DEFAULTS ,
|
||||
for example,
|
||||
.BR XZ_DEFAULTS=\-\-memlimit=150MiB .
|
||||
|
|
@ -351,9 +353,24 @@ the command name (for example,
|
|||
.B unxz
|
||||
implies
|
||||
.BR \-\-decompress ).
|
||||
.IP ""
|
||||
.\" The DESCRIPTION section already says this but it's good to repeat it
|
||||
.\" here because the default behavior is a bit dangerous and new users
|
||||
.\" in a hurry may skip reading the DESCRIPTION section.
|
||||
After successful compression, the source file is removed
|
||||
unless writing to standard output or
|
||||
.B \-\-keep
|
||||
was specified.
|
||||
.TP
|
||||
.BR \-d ", " \-\-decompress ", " \-\-uncompress
|
||||
Decompress.
|
||||
.\" The DESCRIPTION section already says this but it's good to repeat it
|
||||
.\" here because the default behavior is a bit dangerous and new users
|
||||
.\" in a hurry may skip reading the DESCRIPTION section.
|
||||
After successful decompression, the source file is removed
|
||||
unless writing to standard output or
|
||||
.B \-\-keep
|
||||
was specified.
|
||||
.TP
|
||||
.BR \-t ", " \-\-test
|
||||
Test the integrity of compressed
|
||||
|
|
@ -482,6 +499,13 @@ This option has no effect if the operation mode is not
|
|||
.B \-\-decompress
|
||||
or
|
||||
.BR \-\-test .
|
||||
.IP ""
|
||||
Since
|
||||
.B xz
|
||||
5.7.1alpha,
|
||||
.B \-\-single\-stream
|
||||
implies
|
||||
.BR \-\-keep .
|
||||
.TP
|
||||
.B \-\-no\-sparse
|
||||
Disable creation of sparse files.
|
||||
|
|
@ -553,6 +577,7 @@ Specify the file
|
|||
to compress or decompress:
|
||||
.RS
|
||||
.TP
|
||||
.\" TRANSLATORS: Don't translate bold string B<auto>.
|
||||
.B auto
|
||||
This is the default.
|
||||
When compressing,
|
||||
|
|
@ -639,6 +664,9 @@ Supported
|
|||
types:
|
||||
.RS
|
||||
.TP
|
||||
.\" TRANSLATORS: Don't translate the bold strings B<none>, B<crc32>,
|
||||
.\" B<crc64>, and B<sha256>. The command line option --check accepts
|
||||
.\" only the untranslated strings.
|
||||
.B none
|
||||
Don't calculate an integrity check at all.
|
||||
This is usually a bad idea.
|
||||
|
|
@ -1039,6 +1067,28 @@ is unsuitable for decompressing the stream in real time due to how
|
|||
.B xz
|
||||
does buffering.
|
||||
.TP
|
||||
.B \-\-no\-sync
|
||||
Do not synchronize the target file and its directory
|
||||
to the storage device before removing the source file.
|
||||
This can improve performance if compressing or decompressing
|
||||
many small files.
|
||||
However, if the system crashes soon after the deletion,
|
||||
it is possible that the target file was not written
|
||||
to the storage device but the delete operation was.
|
||||
In that case neither the original source file
|
||||
nor the target file is available.
|
||||
.IP ""
|
||||
This option has an effect only when
|
||||
.B xz
|
||||
is going to remove the source file.
|
||||
In other cases synchronization is never done.
|
||||
.IP ""
|
||||
The synchronization and
|
||||
.B \-\-no\-sync
|
||||
were added in
|
||||
.B xz
|
||||
5.7.1alpha.
|
||||
.TP
|
||||
.BI \-\-memlimit\-compress= limit
|
||||
Set a memory usage limit for compression.
|
||||
If this option is specified multiple times,
|
||||
|
|
@ -1453,6 +1503,11 @@ LZMA1 and LZMA2 share the same set of
|
|||
.IR options :
|
||||
.RS
|
||||
.TP
|
||||
.\" TRANSLATORS: Don't translate bold strings like B<preset>, B<dict>,
|
||||
.\" B<mode>, B<nice>, B<fast>, or B<normal> because those are command line
|
||||
.\" options. On the other hand, do translate the italic strings like
|
||||
.\" I<preset>, I<size>, and I<mode>, because such italic strings are
|
||||
.\" placeholders which a user replaces with an actual value.
|
||||
.BI preset= preset
|
||||
Reset all LZMA1 or LZMA2
|
||||
.I options
|
||||
|
|
@ -2103,6 +2158,11 @@ uses tab-separated output.
|
|||
The first column of every line has a string
|
||||
that indicates the type of the information found on that line:
|
||||
.TP
|
||||
.\" TRANSLATORS: The bold strings B<name>, B<file>, B<stream>, B<block>,
|
||||
.\" B<summary>, and B<totals> are produced by the xz tool for scripts to
|
||||
.\" parse, thus the untranslated strings must be included in the translated
|
||||
.\" man page. It may be useful to provide a translated string in parenthesis
|
||||
.\" without bold, for example: "B<name> (nimi)"
|
||||
.B name
|
||||
This is always the first line when starting to list a file.
|
||||
The second column on the line is the filename.
|
||||
|
|
@ -2181,6 +2241,9 @@ are displayed instead of the ratio.
|
|||
.IP 7. 4
|
||||
Comma-separated list of integrity check names.
|
||||
The following strings are used for the known check types:
|
||||
.\" TRANSLATORS: Don't translate the bold strings B<None>, B<CRC32>,
|
||||
.\" B<CRC64>, B<SHA-256>, or B<Unknown-> here. In robot mode, xz produces
|
||||
.\" them in untranslated form for scripts to parse.
|
||||
.BR None ,
|
||||
.BR CRC32 ,
|
||||
.BR CRC64 ,
|
||||
|
|
@ -2467,6 +2530,7 @@ prints the version number of
|
|||
.B xz
|
||||
and liblzma in the following format:
|
||||
.PP
|
||||
.\" TRANSLATORS: Don't translate the uppercase XZ_VERSION or LIBLZMA_VERSION.
|
||||
.BI XZ_VERSION= XYYYZZZS
|
||||
.br
|
||||
.BI LIBLZMA_VERSION= XYYYZZZS
|
||||
|
|
@ -2521,6 +2585,8 @@ don't affect the exit status.
|
|||
.B xz
|
||||
parses space-separated lists of options
|
||||
from the environment variables
|
||||
.\" TRANSLATORS: Don't translate the uppercase XZ_DEFAULTS or XZ_OPT.
|
||||
.\" They are names of environment variables.
|
||||
.B XZ_DEFAULTS
|
||||
and
|
||||
.BR XZ_OPT ,
|
||||
|
|
@ -2530,14 +2596,36 @@ all non-options are silently ignored.
|
|||
Parsing is done with
|
||||
.BR getopt_long (3)
|
||||
which is used also for the command line arguments.
|
||||
.PP
|
||||
.B Warning:
|
||||
By setting these environment variables,
|
||||
one is effectively modifying programs and scripts that run
|
||||
.BR xz .
|
||||
Most of the time it is safe to set memory usage limits, number of threads,
|
||||
and compression options via the environment variables.
|
||||
However, some options can break scripts.
|
||||
An obvious example is
|
||||
.B \-\-help
|
||||
which makes
|
||||
.B xz
|
||||
show the help text instead of compressing or decompressing a file.
|
||||
More subtle examples are
|
||||
.B \-\-quiet
|
||||
and
|
||||
.BR \-\-verbose .
|
||||
In many cases it works well to enable the progress indicator using
|
||||
.BR \-\-verbose ,
|
||||
but in some situations the extra messages create problems.
|
||||
The verbosity level also affects the behavior of
|
||||
.BR \-\-list .
|
||||
.TP
|
||||
.B XZ_DEFAULTS
|
||||
User-specific or system-wide default options.
|
||||
Typically this is set in a shell initialization script to enable
|
||||
.BR xz 's
|
||||
memory usage limiter by default.
|
||||
memory usage limiter by default or set the default number of threads.
|
||||
Excluding shell initialization scripts
|
||||
and similar special cases, scripts must never set or unset
|
||||
and similar special cases, scripts should never set or unset
|
||||
.BR XZ_DEFAULTS .
|
||||
.TP
|
||||
.B XZ_OPT
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
#include <stdarg.h>
|
||||
#include <errno.h>
|
||||
#include <locale.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifndef _MSC_VER
|
||||
|
|
@ -25,14 +26,7 @@
|
|||
#endif
|
||||
|
||||
#ifdef HAVE_LINUX_LANDLOCK
|
||||
# include <linux/landlock.h>
|
||||
# include <sys/prctl.h>
|
||||
# include <sys/syscall.h>
|
||||
# ifdef LANDLOCK_ACCESS_NET_BIND_TCP
|
||||
# define LANDLOCK_ABI_MAX 4
|
||||
# else
|
||||
# define LANDLOCK_ABI_MAX 3
|
||||
# endif
|
||||
# include "my_landlock.h"
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_CAP_RIGHTS_LIMIT) || defined(HAVE_PLEDGE) \
|
||||
|
|
@ -42,6 +36,7 @@
|
|||
|
||||
#include "getopt.h"
|
||||
#include "tuklib_progname.h"
|
||||
#include "tuklib_mbstr_nonprint.h"
|
||||
#include "tuklib_exit.h"
|
||||
|
||||
#ifdef TUKLIB_DOSLIKE
|
||||
|
|
@ -209,7 +204,8 @@ uncompress(lzma_stream *strm, FILE *file, const char *filename)
|
|||
// an error occurred. ferror() doesn't
|
||||
// touch errno.
|
||||
my_errorf("%s: Error reading input file: %s",
|
||||
filename, strerror(errno));
|
||||
tuklib_mask_nonprint(filename),
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
|
@ -234,8 +230,17 @@ uncompress(lzma_stream *strm, FILE *file, const char *filename)
|
|||
// Wouldn't be a surprise if writing to stderr
|
||||
// would fail too but at least try to show an
|
||||
// error message.
|
||||
my_errorf("Cannot write to standard output: "
|
||||
#if defined(_WIN32) && !defined(__CYGWIN__)
|
||||
// On native Windows, broken pipe is reported
|
||||
// as EINVAL. Don't show an error message
|
||||
// in this case.
|
||||
if (errno != EINVAL)
|
||||
#endif
|
||||
{
|
||||
my_errorf("Cannot write to "
|
||||
"standard output: "
|
||||
"%s", strerror(errno));
|
||||
}
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
|
@ -292,7 +297,8 @@ uncompress(lzma_stream *strm, FILE *file, const char *filename)
|
|||
break;
|
||||
}
|
||||
|
||||
my_errorf("%s: %s", filename, msg);
|
||||
my_errorf("%s: %s", tuklib_mask_nonprint(filename),
|
||||
msg);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
|
@ -334,33 +340,20 @@ sandbox_enter(int src_fd)
|
|||
(void)src_fd;
|
||||
|
||||
#elif defined(HAVE_LINUX_LANDLOCK)
|
||||
int landlock_abi = syscall(SYS_landlock_create_ruleset,
|
||||
(void *)NULL, 0, LANDLOCK_CREATE_RULESET_VERSION);
|
||||
|
||||
if (landlock_abi > 0) {
|
||||
if (landlock_abi > LANDLOCK_ABI_MAX)
|
||||
landlock_abi = LANDLOCK_ABI_MAX;
|
||||
|
||||
const struct landlock_ruleset_attr attr = {
|
||||
.handled_access_fs = (1ULL
|
||||
<< (12 + my_min(3, landlock_abi))) - 1,
|
||||
# if LANDLOCK_ABI_MAX >= 4
|
||||
.handled_access_net = landlock_abi < 4 ? 0 :
|
||||
(LANDLOCK_ACCESS_NET_BIND_TCP
|
||||
| LANDLOCK_ACCESS_NET_CONNECT_TCP),
|
||||
# endif
|
||||
};
|
||||
|
||||
const int ruleset_fd = syscall(SYS_landlock_create_ruleset,
|
||||
&attr, sizeof(attr), 0U);
|
||||
struct landlock_ruleset_attr attr;
|
||||
if (my_landlock_ruleset_attr_forbid_all(&attr) > 0) {
|
||||
const int ruleset_fd = my_landlock_create_ruleset(
|
||||
&attr, sizeof(attr), 0);
|
||||
if (ruleset_fd < 0)
|
||||
goto error;
|
||||
|
||||
// All files we need should have already been opened. Thus,
|
||||
// we don't need to add any rules using landlock_add_rule(2)
|
||||
// before activating the sandbox.
|
||||
if (syscall(SYS_landlock_restrict_self, ruleset_fd, 0U) != 0)
|
||||
if (my_landlock_restrict_self(ruleset_fd, 0) != 0)
|
||||
goto error;
|
||||
|
||||
(void)close(ruleset_fd);
|
||||
}
|
||||
|
||||
(void)src_fd;
|
||||
|
|
@ -391,6 +384,9 @@ error:
|
|||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
// Initialize progname which will be used in error messages.
|
||||
tuklib_progname_init(argv);
|
||||
|
||||
#ifdef HAVE_PLEDGE
|
||||
// OpenBSD's pledge(2) sandbox.
|
||||
// Initially enable the sandbox slightly more relaxed so that
|
||||
|
|
@ -416,8 +412,15 @@ main(int argc, char **argv)
|
|||
(void)prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
#endif
|
||||
|
||||
// Initialize progname which we will be used in error messages.
|
||||
tuklib_progname_init(argv);
|
||||
// We need to set the locale even though we don't have any
|
||||
// translated messages:
|
||||
//
|
||||
// - tuklib_mask_nonprint() has locale-specific behavior (LC_CTYPE).
|
||||
//
|
||||
// - This is needed on Windows to make non-ASCII filenames display
|
||||
// properly when the active code page has been set to UTF-8
|
||||
// in the application manifest.
|
||||
setlocale(LC_ALL, "");
|
||||
|
||||
// Parse the command line options.
|
||||
parse_options(argc, argv);
|
||||
|
|
@ -453,8 +456,10 @@ main(int argc, char **argv)
|
|||
src_name = argv[optind];
|
||||
src_file = fopen(src_name, "rb");
|
||||
if (src_file == NULL) {
|
||||
my_errorf("%s: %s", src_name,
|
||||
strerror(errno));
|
||||
my_errorf("%s: %s",
|
||||
tuklib_mask_nonprint(
|
||||
src_name),
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue