diff options
author | CoprDistGit <infra@openeuler.org> | 2025-02-28 10:03:49 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2025-02-28 10:03:49 +0000 |
commit | 73127104a245052cd5cf29cdaaca3e5c32c70348 (patch) | |
tree | 8e28b63e478c43c252f18b49836dff7313affe54 /0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch | |
parent | 49d3feaf4665cdb07576fc1a2382a4d82a612d35 (diff) |
automatic import of gccopeneuler24.03_LTS_SP1
Diffstat (limited to '0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch')
-rw-r--r-- | 0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch | 2216 |
1 files changed, 2216 insertions, 0 deletions
diff --git a/0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch b/0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch new file mode 100644 index 0000000..dae19fa --- /dev/null +++ b/0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch @@ -0,0 +1,2216 @@ +From 4c262af8e178ac7c81b32be5b159b4d09a5841c9 Mon Sep 17 00:00:00 2001 +From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +Date: Fri, 8 Mar 2024 07:07:50 +0800 +Subject: [PATCH 1/2] Port fixes for IPA prefetch to GCC 12 + +--- + gcc/ipa-devirt.cc | 9 +- + gcc/ipa-prefetch.cc | 174 +- + gcc/ipa-sra.cc | 7 + + gcc/params.opt | 4 +- + gcc/testsuite/gcc.dg/completion-1.c | 1 + + gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c | 1843 ++++++++++++++++++++ + 6 files changed, 1974 insertions(+), 64 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c + +diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc +index dd3562d56..dd000b401 100644 +--- a/gcc/ipa-devirt.cc ++++ b/gcc/ipa-devirt.cc +@@ -5029,9 +5029,12 @@ analyze_assign_stmt (gimple *stmt) + } + else + { +- fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ", +- get_tree_code_name (TREE_CODE (rhs))); +- print_gimple_stmt (dump_file, stmt, 0); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ", ++ get_tree_code_name (TREE_CODE (rhs))); ++ print_gimple_stmt (dump_file, stmt, 0); ++ } + gcc_unreachable (); + } + } +diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc +index aeea51105..9537e4835 100644 +--- a/gcc/ipa-prefetch.cc ++++ b/gcc/ipa-prefetch.cc +@@ -167,6 +167,7 @@ analyse_cgraph () + } + + /* TODO: maybe remove loop info here. */ ++ n->get_body (); + push_cfun (DECL_STRUCT_FUNCTION (n->decl)); + calculate_dominance_info (CDI_DOMINATORS); + loop_optimizer_init (LOOPS_NORMAL); +@@ -942,6 +943,9 @@ compare_memrefs (memref_t* mr, memref_t* mr2) + (*mr_candidate_map)[mr] = mr2; + return; + } ++ /* Probably we shouldn't leave nulls in the map. */ ++ if ((*mr_candidate_map)[mr] == NULL) ++ return; + /* TODO: support analysis with incrementation of different fields. */ + if ((*mr_candidate_map)[mr]->offset != mr2->offset) + { +@@ -1090,6 +1094,15 @@ analyse_loops () + memref_t *mr = it->first, *mr2 = it->second; + if (mr2 == NULL || !(*fmrs_map)[fn]->count (mr)) + continue; ++ /* For now optimize only MRs that mem is MEM_REF. ++ TODO: support other MR types. */ ++ if (TREE_CODE (mr->mem) != MEM_REF) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Skip MR %d: unsupported tree code = %s\n", ++ mr->mr_id, get_tree_code_name (TREE_CODE (mr->mem))); ++ continue; ++ } + if (!optimize_mrs_map->count (fn)) + (*optimize_mrs_map)[fn] = new memref_set; + (*optimize_mrs_map)[fn]->insert (mr); +@@ -1102,7 +1115,7 @@ analyse_loops () + it != (*optimize_mrs_map)[fn]->end (); it++) + { + memref_t *mr = *it, *mr2 = (*mr_candidate_map)[mr]; +- fprintf (dump_file, "MRs %d,%d with incremental offset ", ++ fprintf (dump_file, "MRs %d, %d with incremental offset ", + mr->mr_id, mr2->mr_id); + print_generic_expr (dump_file, mr2->offset); + fprintf (dump_file, "\n"); +@@ -1435,6 +1448,52 @@ remap_gimple_op_r (tree *tp, int *walk_subtrees, void *data) + return NULL_TREE; + } + ++/* Copy stmt and remap its operands. */ ++ ++static gimple * ++gimple_copy_and_remap (gimple *stmt) ++{ ++ gimple *copy = gimple_copy (stmt); ++ gcc_checking_assert (!is_gimple_debug (copy)); ++ ++ /* Remap all the operands in COPY. */ ++ struct walk_stmt_info wi; ++ memset (&wi, 0, sizeof (wi)); ++ wi.info = copy; ++ walk_gimple_op (copy, remap_gimple_op_r, &wi); ++ if (dump_file) ++ { ++ fprintf (dump_file, "Stmt copy after remap:\n"); ++ print_gimple_stmt (dump_file, copy, 0); ++ } ++ return copy; ++} ++ ++/* Copy and remap stmts listed in MR in reverse order to last_idx, skipping ++ processed ones. Insert new stmts to the sequence. */ ++ ++static gimple * ++gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts, ++ int last_idx, stmt_set &processed) ++{ ++ gimple *last_stmt = NULL; ++ for (int i = mr->stmts.length () - 1; i >= last_idx ; i--) ++ { ++ if (processed.count (mr->stmts[i])) ++ continue; ++ processed.insert (mr->stmts[i]); ++ if (dump_file) ++ { ++ fprintf (dump_file, "Copy stmt %d from used MR (%d):\n", ++ i, mr->mr_id); ++ print_gimple_stmt (dump_file, mr->stmts[i], 0); ++ } ++ last_stmt = gimple_copy_and_remap (mr->stmts[i]); ++ gimple_seq_add_stmt (&stmts, last_stmt); ++ } ++ return last_stmt; ++} ++ + static void + create_cgraph_edge (cgraph_node *n, gimple *stmt) + { +@@ -1490,6 +1549,13 @@ optimize_function (cgraph_node *n, function *fn) + "Skip the case.\n"); + return 0; + } ++ if (!tree_fits_shwi_p (inc_mr->step)) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Cannot represent incremental MR's step as " ++ "integer. Skip the case.\n"); ++ return 0; ++ } + if (dump_file && !used_mrs.empty ()) + print_mrs_ids (used_mrs, "Common list of used mrs:\n"); + +@@ -1539,16 +1605,44 @@ optimize_function (cgraph_node *n, function *fn) + return 0; + } + else if (dump_file) +- fprintf (dump_file, "Dominator bb %d for MRs\n", dom_bb->index); ++ { ++ fprintf (dump_file, "Dominator bb %d for MRs:\n", dom_bb->index); ++ gimple_dump_bb (dump_file, dom_bb, 0, dump_flags); ++ fprintf (dump_file, "\n"); ++ } + +- split_block (dom_bb, (gimple *) NULL); ++ /* Try to find comp_mr's stmt in the dominator bb. */ ++ gimple *last_used = NULL; ++ for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si); ++ gsi_prev (&si)) ++ if (comp_mr->stmts[0] == gsi_stmt (si)) ++ { ++ last_used = gsi_stmt (si); ++ if (dump_file) ++ { ++ fprintf (dump_file, "Last used stmt in dominator bb:\n"); ++ print_gimple_stmt (dump_file, last_used, 0); ++ } ++ break; ++ } ++ ++ split_block (dom_bb, last_used); + gimple_stmt_iterator gsi = gsi_last_bb (dom_bb); + + /* Create new inc var. Insert new_var = old_var + step * factor. */ + decl_map = new tree_map; + gcc_assert (comp_mr->stmts[0] && gimple_assign_single_p (comp_mr->stmts[0])); + tree inc_var = gimple_assign_lhs (comp_mr->stmts[0]); ++ /* If old_var definition dominates the current use, just use it, otherwise ++ evaluate it just before new inc var evaluation. */ + gimple_seq stmts = NULL; ++ stmt_set processed_stmts; ++ if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (comp_mr->stmts[0]))) ++ { ++ gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0, ++ processed_stmts); ++ inc_var = gimple_assign_lhs (tmp); ++ } + tree var_type = TREE_TYPE (inc_var); + enum tree_code inc_code; + if (TREE_CODE (var_type) == POINTER_TYPE) +@@ -1556,52 +1650,28 @@ optimize_function (cgraph_node *n, function *fn) + else + inc_code = PLUS_EXPR; + tree step = inc_mr->step; +- unsigned dist_val = tree_to_uhwi (step) * param_ipa_prefetch_distance_factor; ++ HOST_WIDE_INT dist_val = tree_to_shwi (step) ++ * param_ipa_prefetch_distance_factor; + tree dist = build_int_cst (TREE_TYPE (step), dist_val); + tree new_inc_var = gimple_build (&stmts, inc_code, var_type, inc_var, dist); + (*decl_map)[inc_var] = new_inc_var; ++ if (dump_file) ++ { ++ fprintf (dump_file, "New distance value: %ld, new inc var: ", dist_val); ++ print_generic_expr (dump_file, new_inc_var); ++ fprintf (dump_file, "\n"); ++ } + + /* Create other new vars. Insert new stmts. */ +- struct walk_stmt_info wi; +- stmt_set processed_stmts; +- memref_tree_map mr_new_trees; + for (memref_set::const_iterator it = used_mrs.begin (); + it != used_mrs.end (); it++) + { + memref_t *mr = *it; +- gimple *last_stmt = NULL; + if (mr == comp_mr) + continue; +- for (int i = mr->stmts.length () - 1; i >= 0 ; i--) +- { +- if (processed_stmts.count (mr->stmts[i])) +- continue; +- processed_stmts.insert (mr->stmts[i]); +- if (dump_file) +- { +- fprintf (dump_file, "Copy stmt %d from used MR (%d):\n", +- i, mr->mr_id); +- print_gimple_stmt (dump_file, mr->stmts[i], 0); +- } +- /* Create a new copy of STMT and duplicate STMT's virtual +- operands. */ +- gimple *copy = gimple_copy (mr->stmts[i]); +- gcc_checking_assert (!is_gimple_debug (copy)); +- +- /* Remap all the operands in COPY. */ +- memset (&wi, 0, sizeof (wi)); +- last_stmt = copy; +- wi.info = copy; +- walk_gimple_op (copy, remap_gimple_op_r, &wi); +- if (dump_file) +- { +- fprintf (dump_file, "Stmt %d after remap:\n",i); +- print_gimple_stmt (dump_file, copy, 0); +- } +- gimple_seq_add_stmt (&stmts, copy); +- } ++ gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0, ++ processed_stmts); + gcc_assert (last_stmt); +- mr_new_trees[mr] = gimple_assign_lhs (last_stmt); + if (dump_file) + { + fprintf (dump_file, "MR (%d) new mem: ", mr->mr_id); +@@ -1637,29 +1707,9 @@ optimize_function (cgraph_node *n, function *fn) + memref_t *mr = vmrs[j]; + /* Don't need to copy the last stmt, since we insert prefetch insn + instead of it. */ +- for (int i = mr->stmts.length () - 1; i >= 1 ; i--) +- { +- if (processed_stmts.count (mr->stmts[i])) +- continue; +- processed_stmts.insert (mr->stmts[i]); +- +- gimple *copy = gimple_copy (mr->stmts[i]); +- gcc_checking_assert (!is_gimple_debug (copy)); +- +- /* Remap all the operands in COPY. */ +- memset (&wi, 0, sizeof (wi)); +- wi.info = copy; +- walk_gimple_op (copy, remap_gimple_op_r, &wi); +- if (dump_file) +- { +- fprintf (dump_file, "Stmt %d after remap:\n",i); +- print_gimple_stmt (dump_file, copy, 0); +- } +- gimple_seq_add_stmt (&stmts, copy); +- } ++ gimple_copy_and_remap_memref_stmts (mr, stmts, 1, processed_stmts); + gimple *last_stmt = mr->stmts[0]; + gcc_assert (last_stmt); +- mr_new_trees[mr] = gimple_assign_lhs (last_stmt); + tree write_p = mr->is_store ? integer_one_node : integer_zero_node; + tree addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE); + if (decl_map->count (addr)) +@@ -1668,6 +1718,11 @@ optimize_function (cgraph_node *n, function *fn) + 3, addr, write_p, local); + pcalls.safe_push (last_stmt); + gimple_seq_add_stmt (&stmts, last_stmt); ++ if (dump_file) ++ { ++ fprintf (dump_file, "Insert %d prefetch stmt:\n", j); ++ print_gimple_stmt (dump_file, last_stmt, 0); ++ } + } + + gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); +@@ -1677,6 +1732,7 @@ optimize_function (cgraph_node *n, function *fn) + for (unsigned i = 0; i < pcalls.length (); i++) + create_cgraph_edge (n, pcalls[i]); + ipa_update_overall_fn_summary (n); ++ renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (n->decl)); + + return 1; + } +@@ -1806,7 +1862,7 @@ pass_ipa_prefetch::gate (function *) + /* Don't bother doing anything if the program has errors. */ + && !seen_error () + && flag_lto_partition == LTO_PARTITION_ONE +- /* Only enable struct optimizations in lto or whole_program. */ ++ /* Only enable prefetch optimizations in lto or whole_program. */ + && (in_lto_p || flag_whole_program)); + } + +diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc +index 5355cf2f4..471b3927c 100644 +--- a/gcc/ipa-sra.cc ++++ b/gcc/ipa-sra.cc +@@ -3393,6 +3393,13 @@ param_splitting_across_edge (cgraph_edge *cs) + gcc_checking_assert (from_ifs && from_ifs->m_parameters); + + isra_call_summary *csum = call_sums->get (cs); ++ /* TODO: implement better support for call edges inserted after summary ++ collection but before sra wpa invocation. */ ++ if (!csum) ++ { ++ csum = call_sums->get_create (cs); ++ csum->m_return_ignored = true; ++ } + gcc_checking_assert (csum); + unsigned args_count = csum->m_arg_flow.length (); + isra_func_summary *to_ifs = func_sums->get (callee); +diff --git a/gcc/params.opt b/gcc/params.opt +index 5c07e3986..50385dfd7 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -314,8 +314,8 @@ Common Joined UInteger Var(param_ipa_prefetch_distance_factor) Init(4) Param Opt + The factor represents the number of inductive variable incrementations to evaluate an indirect memory address for IPA prefetch. + + -param=ipa-prefetch-locality= +-Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) Param Optimization +-The flag represents temporal locality values in the following way: 0:pstl1strm, 1:pstl3keep, 2:pstl2keep, 3:pstl1keep. ++Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) IntegerRange(0, 3) Param Optimization ++The flag represents temporal locality value between 0 and 3, the higher value means the higher temporal locality in the data. + + -param=ira-loop-reserved-regs= + Common Joined UInteger Var(param_ira_loop_reserved_regs) Init(2) Param Optimization +diff --git a/gcc/testsuite/gcc.dg/completion-1.c b/gcc/testsuite/gcc.dg/completion-1.c +index 64da64f1c..df2319c76 100644 +--- a/gcc/testsuite/gcc.dg/completion-1.c ++++ b/gcc/testsuite/gcc.dg/completion-1.c +@@ -2,6 +2,7 @@ + /* { dg-options "--completion=-fipa-ic" } */ + + /* { dg-begin-multiline-output "" } ++-fipa-ic + -fipa-icf + -fipa-icf-functions + -fipa-icf-variables +diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c b/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c +new file mode 100644 +index 000000000..bd4fb2bdc +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c +@@ -0,0 +1,1843 @@ ++/* { dg-do link } */ ++/* { dg-options "-O3 -fipa-ic -fipa-prefetch -flto -flto-partition=one -fdump-ipa-ipa_prefetch -fdump-ipa-icp" } */ ++/* { dg-require-effective-target lto } */ ++ ++/* Based on opensource xz code. */ ++ ++#include <stdlib.h> ++#include <string.h> ++ ++typedef long int ptrdiff_t; ++typedef long unsigned int size_t; ++typedef unsigned int wchar_t; ++ ++typedef unsigned char __u_char; ++typedef unsigned short int __u_short; ++typedef unsigned int __u_int; ++typedef unsigned long int __u_long; ++ ++typedef signed char __int8_t; ++typedef unsigned char __uint8_t; ++typedef signed short int __int16_t; ++typedef unsigned short int __uint16_t; ++typedef signed int __int32_t; ++typedef unsigned int __uint32_t; ++ ++typedef signed long int __int64_t; ++typedef unsigned long int __uint64_t; ++ ++typedef __int8_t __int_least8_t; ++typedef __uint8_t __uint_least8_t; ++typedef __int16_t __int_least16_t; ++typedef __uint16_t __uint_least16_t; ++typedef __int32_t __int_least32_t; ++typedef __uint32_t __uint_least32_t; ++typedef __int64_t __int_least64_t; ++typedef __uint64_t __uint_least64_t; ++ ++typedef __int8_t int8_t; ++typedef __int16_t int16_t; ++typedef __int32_t int32_t; ++typedef __int64_t int64_t; ++ ++typedef __uint8_t uint8_t; ++typedef __uint16_t uint16_t; ++typedef __uint32_t uint32_t; ++typedef __uint64_t uint64_t; ++ ++typedef long int intptr_t; ++typedef unsigned long int uintptr_t; ++ ++static inline uint16_t ++read16ne(const uint8_t *buf) ++{ ++ uint16_t num; ++ memcpy(&num, buf, sizeof(num)); ++ return num; ++} ++ ++static inline uint32_t ++read32ne(const uint8_t *buf) ++{ ++ uint32_t num; ++ memcpy(&num, buf, sizeof(num)); ++ return num; ++} ++ ++static inline uint16_t ++aligned_read16ne(const uint8_t *buf) ++{ ++ uint16_t num; ++ memcpy(&num, __builtin_assume_aligned(buf, sizeof(num)), sizeof(num)); ++ return num; ++} ++ ++ ++static inline uint32_t ++aligned_read32ne(const uint8_t *buf) ++{ ++ uint32_t num; ++ memcpy(&num, __builtin_assume_aligned(buf, sizeof(num)), sizeof(num)); ++ return num; ++} ++ ++static inline uint64_t ++aligned_read64ne(const uint8_t *buf) ++{ ++ uint64_t num; ++ memcpy(&num, __builtin_assume_aligned(buf, sizeof(num)), sizeof(num)); ++ return num; ++} ++ ++typedef unsigned char lzma_bool; ++ ++typedef enum { ++ LZMA_RESERVED_ENUM = 0 ++} lzma_reserved_enum; ++ ++typedef enum { ++ LZMA_OK = 0, ++ LZMA_STREAM_END = 1, ++ LZMA_NO_CHECK = 2, ++ LZMA_UNSUPPORTED_CHECK = 3, ++ LZMA_GET_CHECK = 4, ++ LZMA_MEM_ERROR = 5, ++ LZMA_MEMLIMIT_ERROR = 6, ++ LZMA_FORMAT_ERROR = 7, ++ LZMA_OPTIONS_ERROR = 8, ++ LZMA_DATA_ERROR = 9, ++ LZMA_BUF_ERROR = 10, ++ LZMA_PROG_ERROR = 11, ++} lzma_ret; ++ ++typedef enum { ++ LZMA_RUN = 0, ++ LZMA_SYNC_FLUSH = 1, ++ LZMA_FULL_FLUSH = 2, ++ LZMA_FULL_BARRIER = 4, ++ LZMA_FINISH = 3 ++} lzma_action; ++ ++typedef struct { ++ void *( *alloc)(void *opaque, size_t nmemb, size_t size); ++ ++ void ( *free)(void *opaque, void *ptr); ++ ++ void *opaque; ++} lzma_allocator; ++ ++typedef uint64_t lzma_vli; ++ ++typedef enum { ++ LZMA_CHECK_NONE = 0, ++ LZMA_CHECK_CRC32 = 1, ++ LZMA_CHECK_CRC64 = 4, ++ LZMA_CHECK_SHA256 = 10 ++} lzma_check; ++ ++typedef struct { ++ lzma_vli id; ++ void *options; ++} lzma_filter; ++ ++typedef enum { ++ LZMA_MF_HC3 = 0x03, ++ LZMA_MF_HC4 = 0x04, ++ LZMA_MF_BT2 = 0x12, ++ LZMA_MF_BT3 = 0x13, ++ LZMA_MF_BT4 = 0x14 ++} lzma_match_finder; ++ ++typedef struct lzma_next_coder_s lzma_next_coder; ++ ++typedef struct lzma_filter_info_s lzma_filter_info; ++ ++typedef lzma_ret (*lzma_init_function)( ++ lzma_next_coder *next, const lzma_allocator *allocator, ++ const lzma_filter_info *filters); ++ ++typedef lzma_ret (*lzma_code_function)( ++ void *coder, const lzma_allocator *allocator, ++ const uint8_t *restrict in, size_t *restrict in_pos, ++ size_t in_size, uint8_t *restrict out, ++ size_t *restrict out_pos, size_t out_size, ++ lzma_action action); ++ ++typedef void (*lzma_end_function)( ++ void *coder, const lzma_allocator *allocator); ++ ++struct lzma_filter_info_s { ++ lzma_vli id; ++ lzma_init_function init; ++ void *options; ++}; ++ ++struct lzma_next_coder_s { ++ void *coder; ++ lzma_vli id; ++ uintptr_t init; ++ ++ lzma_code_function code; ++ lzma_end_function end; ++ void (*get_progress)(void *coder, ++ uint64_t *progress_in, uint64_t *progress_out); ++ ++ lzma_check (*get_check)(const void *coder); ++ lzma_ret (*memconfig)(void *coder, uint64_t *memusage, ++ uint64_t *old_memlimit, uint64_t new_memlimit); ++ lzma_ret (*update)(void *coder, const lzma_allocator *allocator, ++ const lzma_filter *filters, const lzma_filter *reversed_filters); ++}; ++ ++typedef struct { ++ uint32_t len; ++ uint32_t dist; ++} lzma_match; ++ ++typedef struct lzma_mf_s lzma_mf; ++struct lzma_mf_s { ++ uint8_t *buffer; ++ uint32_t size; ++ uint32_t keep_size_before; ++ uint32_t keep_size_after; ++ uint32_t offset; ++ uint32_t read_pos; ++ uint32_t read_ahead; ++ uint32_t read_limit; ++ uint32_t write_pos; ++ uint32_t pending; ++ uint32_t (*find)(lzma_mf *mf, lzma_match *matches); ++ void (*skip)(lzma_mf *mf, uint32_t num); ++ uint32_t *hash; ++ uint32_t *son; ++ uint32_t cyclic_pos; ++ uint32_t cyclic_size; ++ uint32_t hash_mask; ++ uint32_t depth; ++ uint32_t nice_len; ++ uint32_t match_len_max; ++ lzma_action action; ++ uint32_t hash_count; ++ uint32_t sons_count; ++}; ++ ++typedef struct { ++ size_t before_size; ++ size_t dict_size; ++ size_t after_size; ++ size_t match_len_max; ++ size_t nice_len; ++ lzma_match_finder match_finder; ++ uint32_t depth; ++ const uint8_t *preset_dict; ++ uint32_t preset_dict_size; ++} lzma_lz_options; ++ ++typedef struct { ++ void *coder; ++ lzma_ret (*code)(void *coder, ++ lzma_mf *restrict mf, uint8_t *restrict out, ++ size_t *restrict out_pos, size_t out_size); ++ void (*end)(void *coder, const lzma_allocator *allocator); ++ lzma_ret (*options_update)(void *coder, const lzma_filter *filter); ++} lzma_lz_encoder; ++ ++static inline const uint8_t * ++mf_ptr(const lzma_mf *mf) ++{ ++ return mf->buffer + mf->read_pos; ++} ++ ++static inline uint32_t ++mf_avail(const lzma_mf *mf) ++{ ++ return mf->write_pos - mf->read_pos; ++} ++ ++typedef struct { ++ uint32_t state[8]; ++ uint64_t size; ++} lzma_sha256_state; ++ ++typedef struct { ++ union { ++ uint8_t u8[64]; ++ uint32_t u32[16]; ++ uint64_t u64[8]; ++ } buffer; ++ union { ++ uint32_t crc32; ++ uint64_t crc64; ++ lzma_sha256_state sha256; ++ } state; ++} lzma_check_state; ++ ++// The table is constantly initialized in the original code. ++// Skip it in the test. ++const uint32_t lzma_crc32_table[8][256]; ++ ++static inline uint32_t __attribute__((__always_inline__)) ++lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2, ++ uint32_t len, uint32_t limit) ++{ ++ while (len < limit) { ++ uint32_t x = read32ne(buf1 + len) - read32ne(buf2 + len); ++ if (x != 0) { ++ if ((x & 0xFFFF) == 0) { ++ len += 2; ++ x >>= 16; ++ } ++ ++ if ((x & 0xFF) == 0) ++ ++len; ++ ++ return ((len) < (limit) ? (len) : (limit)); ++ } ++ ++ len += 4; ++ } ++ ++ return limit; ++} ++ ++extern uint32_t ++lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches) ++{ ++ const uint32_t count = mf->find(mf, matches); ++ uint32_t len_best = 0; ++ ++ if (count > 0) { ++ len_best = matches[count - 1].len; ++ if (len_best == mf->nice_len) { ++ uint32_t limit = mf_avail(mf) + 1; ++ if (limit > mf->match_len_max) ++ limit = mf->match_len_max; ++ const uint8_t *p1 = mf_ptr(mf) - 1; ++ const uint8_t *p2 = p1 - matches[count - 1].dist - 1; ++ len_best = lzma_memcmplen(p1, p2, len_best, limit); ++ } ++ } ++ ++ *count_ptr = count; ++ ++mf->read_ahead; ++ ++ return len_best; ++} ++ ++static void ++normalize(lzma_mf *mf) ++{ ++ const uint32_t subvalue = ((4294967295U) - mf->cyclic_size); ++ ++ for (uint32_t i = 0; i < mf->hash_count; ++i) { ++ if (mf->hash[i] <= subvalue) ++ mf->hash[i] = 0; ++ else ++ mf->hash[i] -= subvalue; ++ } ++ ++ for (uint32_t i = 0; i < mf->sons_count; ++i) { ++ if (mf->son[i] <= subvalue) ++ mf->son[i] = 0; ++ else ++ mf->son[i] -= subvalue; ++ } ++ ++ mf->offset -= subvalue; ++ return; ++} ++ ++static void ++move_pos(lzma_mf *mf) ++{ ++ if (++mf->cyclic_pos == mf->cyclic_size) ++ mf->cyclic_pos = 0; ++ ++mf->read_pos; ++ if (__builtin_expect(mf->read_pos + mf->offset == (4294967295U), 0 )) ++ normalize(mf); ++} ++ ++static void ++move_pending(lzma_mf *mf) ++{ ++ ++mf->read_pos; ++ ++mf->pending; ++} ++ ++static lzma_match * ++hc_find_func( ++ const uint32_t len_limit, ++ const uint32_t pos, ++ const uint8_t *const cur, ++ uint32_t cur_match, ++ uint32_t depth, ++ uint32_t *const son, ++ const uint32_t cyclic_pos, ++ const uint32_t cyclic_size, ++ lzma_match *matches, ++ uint32_t len_best) ++{ ++ son[cyclic_pos] = cur_match; ++ ++ while (1) { ++ const uint32_t delta = pos - cur_match; ++ if (depth-- == 0 || delta >= cyclic_size) ++ return matches; ++ ++ const uint8_t *const pb = cur - delta; ++ cur_match = son[cyclic_pos - delta ++ + (delta > cyclic_pos ? cyclic_size : 0)]; ++ ++ if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) { ++ uint32_t len = lzma_memcmplen(pb, cur, 1, len_limit); ++ ++ if (len_best < len) { ++ len_best = len; ++ matches->len = len; ++ matches->dist = delta - 1; ++ ++matches; ++ ++ if (len == len_limit) ++ return matches; ++ } ++ } ++ } ++} ++ ++extern uint32_t ++lzma_mf_hc3_find(lzma_mf *mf, lzma_match *matches) ++{ ++ uint32_t len_limit = mf_avail(mf); ++ if (mf->nice_len <= len_limit) { ++ len_limit = mf->nice_len; ++ } else if (len_limit < (3)) { ++ move_pending(mf); ++ return 0; ++ } ++ const uint8_t *cur = mf_ptr(mf); ++ const uint32_t pos = mf->read_pos + mf->offset; ++ uint32_t matches_count = 0; ++ ++ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; ++ const uint32_t hash_2_value = temp & ((1U << 10) - 1); ++ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask; ++ ++ const uint32_t delta2 = pos - mf->hash[hash_2_value]; ++ const uint32_t cur_match = mf->hash[((1U << 10)) + hash_value]; ++ ++ mf->hash[hash_2_value] = pos; ++ mf->hash[((1U << 10)) + hash_value] = pos; ++ ++ uint32_t len_best = 2; ++ ++ if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { ++ len_best = lzma_memcmplen(cur - delta2, cur, len_best, len_limit); ++ ++ matches[0].len = len_best; ++ matches[0].dist = delta2 - 1; ++ matches_count = 1; ++ ++ if (len_best == len_limit) { ++ mf->son[mf->cyclic_pos] = cur_match; ++ move_pos(mf); ++ return 1; ++ } ++ } ++ ++ matches_count = hc_find_func(len_limit, pos, cur, cur_match, mf->depth, ++ mf->son, mf->cyclic_pos, mf->cyclic_size, ++ matches + matches_count, len_best) - matches; ++ move_pos(mf); ++ return matches_count; ++} ++ ++extern void ++lzma_mf_hc3_skip(lzma_mf *mf, uint32_t amount) ++{ ++ do { ++ if (mf_avail(mf) < 3) { ++ move_pending(mf); ++ continue; ++ } ++ ++ const uint8_t *cur = mf_ptr(mf); ++ const uint32_t pos = mf->read_pos + mf->offset; ++ ++ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; ++ const uint32_t hash_2_value = temp & ((1U << 10) - 1); ++ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask; ++ ++ const uint32_t cur_match ++ = mf->hash[((1U << 10)) + hash_value]; ++ ++ mf->hash[hash_2_value] = pos; ++ mf->hash[((1U << 10)) + hash_value] = pos; ++ ++ do { mf->son[mf->cyclic_pos] = cur_match; move_pos(mf); } while (0); ++ ++ } while (--amount != 0); ++} ++ ++extern uint32_t ++lzma_mf_hc4_find(lzma_mf *mf, lzma_match *matches) ++{ ++ uint32_t len_limit = mf_avail(mf); ++ if (mf->nice_len <= len_limit) { ++ len_limit = mf->nice_len; ++ } else if (len_limit < (4)) { ++ move_pending(mf); ++ return 0; ++ } ++ const uint8_t *cur = mf_ptr(mf); ++ const uint32_t pos = mf->read_pos + mf->offset; ++ uint32_t matches_count = 0; ++ ++ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; ++ const uint32_t hash_2_value = temp & ((1U << 10) - 1); ++ const uint32_t hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) ++ & ((1U << 16) - 1); ++ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) ++ ^ (lzma_crc32_table[0][cur[3]] << 5)) ++ & mf->hash_mask; ++ uint32_t delta2 = pos - mf->hash[hash_2_value]; ++ const uint32_t delta3 ++ = pos - mf->hash[((1U << 10)) + hash_3_value]; ++ const uint32_t cur_match = mf->hash[((1U << 10) + (1U << 16)) + hash_value]; ++ ++ mf->hash[hash_2_value ] = pos; ++ mf->hash[((1U << 10)) + hash_3_value] = pos; ++ mf->hash[((1U << 10) + (1U << 16)) + hash_value] = pos; ++ ++ uint32_t len_best = 1; ++ ++ if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { ++ len_best = 2; ++ matches[0].len = 2; ++ matches[0].dist = delta2 - 1; ++ matches_count = 1; ++ } ++ ++ if (delta2 != delta3 && delta3 < mf->cyclic_size ++ && *(cur - delta3) == *cur) { ++ len_best = 3; ++ matches[matches_count++].dist = delta3 - 1; ++ delta2 = delta3; ++ } ++ ++ if (matches_count != 0) { ++ len_best = lzma_memcmplen(cur - delta2, cur, ++ len_best, len_limit); ++ ++ matches[matches_count - 1].len = len_best; ++ ++ if (len_best == len_limit) { ++ mf->son[mf->cyclic_pos] = cur_match; move_pos(mf); ++ return matches_count; ++ } ++ } ++ ++ if (len_best < 3) ++ len_best = 3; ++ ++ matches_count = hc_find_func(len_limit, pos, cur, cur_match, mf->depth, ++ mf->son, mf->cyclic_pos, mf->cyclic_size, ++ matches + matches_count, len_best) - matches; ++ move_pos(mf); ++ return matches_count; ++} ++ ++extern void ++lzma_mf_hc4_skip(lzma_mf *mf, uint32_t amount) ++{ ++ do { ++ if (mf_avail(mf) < 4) { ++ move_pending(mf); ++ continue; ++ } ++ ++ const uint8_t *cur = mf_ptr(mf); ++ const uint32_t pos = mf->read_pos + mf->offset; ++ ++ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; ++ const uint32_t hash_2_value = temp & ((1U << 10) - 1); ++ const uint32_t hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & ((1U << 16) - 1); ++ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) ++ ^ (lzma_crc32_table[0][cur[3]] << 5)) ++ & mf->hash_mask; ++ ++ const uint32_t cur_match ++ = mf->hash[((1U << 10) + (1U << 16)) + hash_value]; ++ ++ mf->hash[hash_2_value] = pos; ++ mf->hash[((1U << 10)) + hash_3_value] = pos; ++ mf->hash[((1U << 10) + (1U << 16)) + hash_value] = pos; ++ ++ mf->son[mf->cyclic_pos] = cur_match; ++ move_pos(mf); ++ } while (--amount != 0); ++} ++ ++static lzma_match * ++bt_find_func( ++ const uint32_t len_limit, ++ const uint32_t pos, ++ const uint8_t *const cur, ++ uint32_t cur_match, ++ uint32_t depth, ++ uint32_t *const son, ++ const uint32_t cyclic_pos, ++ const uint32_t cyclic_size, ++ lzma_match *matches, ++ uint32_t len_best) ++{ ++ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; ++ uint32_t *ptr1 = son + (cyclic_pos << 1); ++ ++ uint32_t len0 = 0; ++ uint32_t len1 = 0; ++ ++ while (1) { ++ const uint32_t delta = pos - cur_match; ++ if (depth-- == 0 || delta >= cyclic_size) { ++ *ptr0 = 0; ++ *ptr1 = 0; ++ return matches; ++ } ++ ++ uint32_t *const pair = son + ((cyclic_pos - delta ++ + (delta > cyclic_pos ? cyclic_size : 0)) ++ << 1); ++ ++ const uint8_t *const pb = cur - delta; ++ uint32_t len = ((len0) < (len1) ? (len0) : (len1)); ++ ++ if (pb[len] == cur[len]) { ++ len = lzma_memcmplen(pb, cur, len + 1, len_limit); ++ ++ if (len_best < len) { ++ len_best = len; ++ matches->len = len; ++ matches->dist = delta - 1; ++ ++matches; ++ ++ if (len == len_limit) { ++ *ptr1 = pair[0]; ++ *ptr0 = pair[1]; ++ return matches; ++ } ++ } ++ } ++ ++ if (pb[len] < cur[len]) { ++ *ptr1 = cur_match; ++ ptr1 = pair + 1; ++ cur_match = *ptr1; ++ len1 = len; ++ } else { ++ *ptr0 = cur_match; ++ ptr0 = pair; ++ cur_match = *ptr0; ++ len0 = len; ++ } ++ } ++} ++ ++ ++static void ++bt_skip_func( ++ const uint32_t len_limit, ++ const uint32_t pos, ++ const uint8_t *const cur, ++ uint32_t cur_match, ++ uint32_t depth, ++ uint32_t *const son, ++ const uint32_t cyclic_pos, ++ const uint32_t cyclic_size) ++{ ++ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; ++ uint32_t *ptr1 = son + (cyclic_pos << 1); ++ ++ uint32_t len0 = 0; ++ uint32_t len1 = 0; ++ ++ while (1) { ++ const uint32_t delta = pos - cur_match; ++ if (depth-- == 0 || delta >= cyclic_size) { ++ *ptr0 = 0; ++ *ptr1 = 0; ++ return; ++ } ++ ++ uint32_t *pair = son + ((cyclic_pos - delta ++ + (delta > cyclic_pos ? cyclic_size : 0)) ++ << 1); ++ const uint8_t *pb = cur - delta; ++ uint32_t len = ((len0) < (len1) ? (len0) : (len1)); ++ ++ if (pb[len] == cur[len]) { ++ len = lzma_memcmplen(pb, cur, len + 1, len_limit); ++ ++ if (len == len_limit) { ++ *ptr1 = pair[0]; ++ *ptr0 = pair[1]; ++ return; ++ } ++ } ++ ++ if (pb[len] < cur[len]) { ++ *ptr1 = cur_match; ++ ptr1 = pair + 1; ++ cur_match = *ptr1; ++ len1 = len; ++ } else { ++ *ptr0 = cur_match; ++ ptr0 = pair; ++ cur_match = *ptr0; ++ len0 = len; ++ } ++ } ++} ++ ++extern uint32_t ++lzma_mf_bt2_find(lzma_mf *mf, lzma_match *matches) ++{ ++ uint32_t len_limit = mf_avail(mf); ++ if (mf->nice_len <= len_limit) { ++ len_limit = mf->nice_len; ++ } else if (len_limit < (2) || (mf->action == LZMA_SYNC_FLUSH)) { ++ move_pending(mf); ++ return 0; ++ } ++ const uint8_t *cur = mf_ptr(mf); ++ const uint32_t pos = mf->read_pos + mf->offset; ++ uint32_t matches_count = 0; ++ const uint32_t hash_value = read16ne(cur); ++ const uint32_t cur_match = mf->hash[hash_value]; ++ mf->hash[hash_value] = pos; ++ ++ matches_count = bt_find_func(len_limit, pos, cur, cur_match, mf->depth, ++ mf->son, mf->cyclic_pos, mf->cyclic_size, ++ matches + matches_count, 1) - matches; ++ move_pos(mf); ++ return matches_count; ++} ++ ++extern void ++lzma_mf_bt2_skip(lzma_mf *mf, uint32_t amount) ++{ ++ do { ++ uint32_t len_limit = mf_avail(mf); ++ if (mf->nice_len <= len_limit) { ++ len_limit = mf->nice_len; ++ } else if (len_limit < (2) || (mf->action == LZMA_SYNC_FLUSH)) { ++ move_pending(mf); ++ continue; ++ } ++ const uint8_t *cur = mf_ptr(mf); ++ const uint32_t pos = mf->read_pos + mf->offset; ++ ++ const uint32_t hash_value = read16ne(cur); ++ const uint32_t cur_match = mf->hash[hash_value]; ++ mf->hash[hash_value] = pos; ++ ++ bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son, ++ mf->cyclic_pos, mf->cyclic_size); ++ move_pos(mf); ++ } while (--amount != 0); ++} ++ ++extern uint32_t ++lzma_mf_bt3_find(lzma_mf *mf, lzma_match *matches) ++{ ++ uint32_t len_limit = mf_avail(mf); ++ if (mf->nice_len <= len_limit) { ++ len_limit = mf->nice_len; ++ } else if (len_limit < (3) || (1 && mf->action == LZMA_SYNC_FLUSH)) { ++ move_pending(mf); ++ return 0; ++ } ++ const uint8_t *cur = mf_ptr(mf); ++ const uint32_t pos = mf->read_pos + mf->offset; ++ uint32_t matches_count = 0; ++ ++ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; ++ const uint32_t hash_2_value = temp & ((1U << 10) - 1); ++ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask; ++ ++ const uint32_t delta2 = pos - mf->hash[hash_2_value]; ++ const uint32_t cur_match = mf->hash[((1U << 10)) + hash_value]; ++ ++ mf->hash[hash_2_value] = pos; ++ mf->hash[((1U << 10)) + hash_value] = pos; ++ ++ uint32_t len_best = 2; ++ ++ if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { ++ len_best = lzma_memcmplen( ++ cur, cur - delta2, len_best, len_limit); ++ ++ matches[0].len = len_best; ++ matches[0].dist = delta2 - 1; ++ matches_count = 1; ++ ++ if (len_best == len_limit) { ++ bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son, ++ mf->cyclic_pos, mf->cyclic_size); ++ move_pos(mf); ++ return 1; ++ } ++ } ++ ++ matches_count = bt_find_func(len_limit, pos, cur, cur_match, mf->depth, ++ mf->son, mf->cyclic_pos, mf->cyclic_size, ++ matches + matches_count, len_best) - matches; ++ move_pos(mf); ++ return matches_count; ++} ++ ++ ++extern void ++lzma_mf_bt3_skip(lzma_mf *mf, uint32_t amount) ++{ ++ do { ++ uint32_t len_limit = mf_avail(mf); ++ if (mf->nice_len <= len_limit) { ++ len_limit = mf->nice_len; } ++ else if (len_limit < (3) || (1 && mf->action == LZMA_SYNC_FLUSH)) { ++ move_pending(mf); ++ continue; ++ } ++ const uint8_t *cur = mf_ptr(mf); ++ const uint32_t pos = mf->read_pos + mf->offset; ++ ++ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; ++ const uint32_t hash_2_value = temp & ((1U << 10) - 1); ++ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask; ++ ++ const uint32_t cur_match = mf->hash[((1U << 10)) + hash_value]; ++ ++ mf->hash[hash_2_value] = pos; ++ mf->hash[((1U << 10)) + hash_value] = pos; ++ ++ bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son, ++ mf->cyclic_pos, mf->cyclic_size); ++ move_pos(mf); ++ } while (--amount != 0); ++} ++ ++extern uint32_t ++lzma_mf_bt4_find(lzma_mf *mf, lzma_match *matches) ++{ ++ uint32_t len_limit = mf->write_pos - mf->read_pos; ++ if (mf->nice_len <= len_limit) { ++ len_limit = mf->nice_len; ++ } else if (len_limit < (4) || (mf->action == LZMA_SYNC_FLUSH)) { ++ ++mf->read_pos; ++ ++mf->pending; ++ return 0; ++ } ++ ++ const uint8_t *cur = mf->buffer + mf->read_pos; ++ const uint32_t pos = mf->read_pos + mf->offset; ++ uint32_t matches_count = 0; ++ ++ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; ++ const uint32_t hash_2_value = temp & ((1U << 10) - 1); ++ const uint32_t hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & ((1U << 16) - 1); ++ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) ++ ^ (lzma_crc32_table[0][cur[3]] << 5)) ++ & mf->hash_mask; ++ ++ uint32_t delta2 = pos - mf->hash[hash_2_value]; ++ const uint32_t delta3 = pos - mf->hash[((1U << 10)) + hash_3_value]; ++ const uint32_t cur_match = mf->hash[((1U << 10) + (1U << 16)) + hash_value]; ++ ++ mf->hash[hash_2_value] = pos; ++ mf->hash[((1U << 10)) + hash_3_value] = pos; ++ mf->hash[((1U << 10) + (1U << 16)) + hash_value] = pos; ++ ++ uint32_t len_best = 1; ++ ++ if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { ++ len_best = 2; ++ matches[0].len = 2; ++ matches[0].dist = delta2 - 1; ++ matches_count = 1; ++ } ++ ++ if (delta2 != delta3 && delta3 < mf->cyclic_size && *(cur - delta3) == *cur) { ++ len_best = 3; ++ matches[matches_count++].dist = delta3 - 1; ++ delta2 = delta3; ++ } ++ ++ if (matches_count != 0) { ++ len_best = lzma_memcmplen(cur, cur - delta2, len_best, len_limit); ++ ++ matches[matches_count - 1].len = len_best; ++ ++ if (len_best == len_limit) { ++ bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son, ++ mf->cyclic_pos, mf->cyclic_size); ++ move_pos(mf); ++ return matches_count; ++ } ++ } ++ ++ if (len_best < 3) ++ len_best = 3; ++ ++ matches_count = bt_find_func(len_limit, pos, cur, cur_match, mf->depth, mf->son, ++ mf->cyclic_pos, mf->cyclic_size, ++ matches + matches_count, len_best) - matches; ++ move_pos(mf); ++ return matches_count; ++} ++ ++extern void ++lzma_mf_bt4_skip(lzma_mf *mf, uint32_t amount) ++{ ++ do { ++ uint32_t len_limit = mf_avail(mf); ++ if (mf->nice_len <= len_limit) { ++ len_limit = mf->nice_len; ++ } else if (len_limit < (4) || (mf->action == LZMA_SYNC_FLUSH)) { ++ move_pending(mf); ++ continue; ++ } ++ ++ const uint8_t *cur = mf->buffer + mf->read_pos; ++ const uint32_t pos = mf->read_pos + mf->offset; ++ ++ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; ++ const uint32_t hash_2_value = temp & ((1U << 10) - 1); ++ const uint32_t hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) ++ & ((1U << 16) - 1); ++ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) ++ ^ (lzma_crc32_table[0][cur[3]] << 5)) ++ & mf->hash_mask; ++ ++ const uint32_t cur_match = mf->hash[((1U << 10) + (1U << 16)) + hash_value]; ++ ++ mf->hash[hash_2_value] = pos; ++ mf->hash[((1U << 10)) + hash_3_value] = pos; ++ mf->hash[((1U << 10) + (1U << 16)) + hash_value] = pos; ++ ++ bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son, ++ mf->cyclic_pos, mf->cyclic_size); ++ move_pos(mf); ++ } while (--amount != 0); ++} ++ ++static inline void ++mf_skip(lzma_mf *mf, uint32_t amount) ++{ ++ if (amount != 0) { ++ mf->skip(mf, amount); ++ mf->read_ahead += amount; ++ } ++} ++ ++typedef struct lzma_lzma1_encoder_s lzma_lzma1_encoder; ++typedef uint16_t probability; ++ ++typedef struct { ++ probability choice; ++ probability choice2; ++ probability low[(1 << 4)][(1 << 3)]; ++ probability mid[(1 << 4)][(1 << 3)]; ++ probability high[(1 << 8)]; ++ uint32_t prices[(1 << 4)][((1 << 3) + (1 << 3) + (1 << 8))]; ++ uint32_t table_size; ++ uint32_t counters[(1 << 4)]; ++} lzma_length_encoder; ++ ++typedef struct { ++ uint64_t low; ++ uint64_t cache_size; ++ uint32_t range; ++ uint8_t cache; ++ size_t count; ++ size_t pos; ++ ++ enum { ++ RC_BIT_0, ++ RC_BIT_1, ++ RC_DIRECT_0, ++ RC_DIRECT_1, ++ RC_FLUSH, ++ } symbols[58]; ++ ++ probability *probs[58]; ++} lzma_range_encoder; ++ ++ ++typedef enum { ++ STATE_LIT_LIT, ++ STATE_MATCH_LIT_LIT, ++ STATE_REP_LIT_LIT, ++ STATE_SHORTREP_LIT_LIT, ++ STATE_MATCH_LIT, ++ STATE_REP_LIT, ++ STATE_SHORTREP_LIT, ++ STATE_LIT_MATCH, ++ STATE_LIT_LONGREP, ++ STATE_LIT_SHORTREP, ++ STATE_NONLIT_MATCH, ++ STATE_NONLIT_REP, ++} lzma_lzma_state; ++ ++typedef struct { ++ lzma_lzma_state state; ++ _Bool prev_1_is_literal; ++ _Bool prev_2; ++ ++ uint32_t pos_prev_2; ++ uint32_t back_prev_2; ++ ++ uint32_t price; ++ uint32_t pos_prev; ++ uint32_t back_prev; ++ ++ uint32_t backs[4]; ++} lzma_optimal; ++ ++struct lzma_lzma1_encoder_s { ++ lzma_range_encoder rc; ++ lzma_lzma_state state; ++ uint32_t reps[4]; ++ lzma_match matches[(2 + ((1 << 3) + (1 << 3) + (1 << 8)) - 1) + 1]; ++ uint32_t matches_count; ++ uint32_t longest_match_length; ++ _Bool fast_mode; ++ _Bool is_initialized; ++ _Bool is_flushed; ++ uint32_t pos_mask; ++ uint32_t literal_context_bits; ++ uint32_t literal_pos_mask; ++ ++ probability literal[(1 << 4)][0x300]; ++ probability is_match[12][(1 << 4)]; ++ probability is_rep[12]; ++ probability is_rep0[12]; ++ probability is_rep1[12]; ++ probability is_rep2[12]; ++ probability is_rep0_long[12][(1 << 4)]; ++ probability dist_slot[4][(1 << 6)]; ++ probability dist_special[(1 << (14 / 2)) - 14]; ++ probability dist_align[(1 << 4)]; ++ ++ lzma_length_encoder match_len_encoder; ++ lzma_length_encoder rep_len_encoder; ++ ++ uint32_t dist_slot_prices[4][(1 << 6)]; ++ uint32_t dist_prices[4][(1 << (14 / 2))]; ++ uint32_t dist_table_size; ++ uint32_t match_price_count; ++ ++ uint32_t align_prices[(1 << 4)]; ++ uint32_t align_price_count; ++ uint32_t opts_end_index; ++ uint32_t opts_current_index; ++ lzma_optimal opts[(1 << 12)]; ++}; ++ ++extern void ++lzma_lzma_optimum_fast(lzma_lzma1_encoder *restrict coder, ++ lzma_mf *restrict mf, ++ uint32_t *restrict back_res, uint32_t *restrict len_res) ++{ ++ const uint32_t nice_len = mf->nice_len; ++ ++ uint32_t len_main; ++ uint32_t matches_count; ++ if (mf->read_ahead == 0) { ++ len_main = lzma_mf_find(mf, &matches_count, coder->matches); ++ } else { ++ len_main = coder->longest_match_length; ++ matches_count = coder->matches_count; ++ } ++ ++ const uint8_t *buf = mf_ptr(mf) - 1; ++ const uint32_t buf_avail ++ = ((mf_avail(mf) + 1) < ((2 + ((1 << 3) + (1 << 3) + (1 << 8)) - 1)) ++ ? (mf_avail(mf) + 1) : ((2 + ((1 << 3) + (1 << 3) + (1 << 8)) - 1))); ++ ++ if (buf_avail < 2) { ++ *back_res = (4294967295U); ++ *len_res = 1; ++ return; ++ } ++ ++ uint32_t rep_len = 0; ++ uint32_t rep_index = 0; ++ ++ for (uint32_t i = 0; i < 4; ++i) { ++ const uint8_t *const buf_back = buf - coder->reps[i] - 1; ++ if ((read16ne(buf) != read16ne(buf_back))) ++ continue; ++ const uint32_t len = lzma_memcmplen(buf, buf_back, 2, buf_avail); ++ if (len >= nice_len) { ++ *back_res = i; ++ *len_res = len; ++ mf_skip(mf, len - 1); ++ return; ++ } ++ if (len > rep_len) { ++ rep_index = i; ++ rep_len = len; ++ } ++ } ++ if (len_main >= nice_len) { ++ *back_res = coder->matches[matches_count - 1].dist + 4; ++ *len_res = len_main; ++ mf_skip(mf, len_main - 1); ++ return; ++ } ++ ++ uint32_t back_main = 0; ++ if (len_main >= 2) { ++ back_main = coder->matches[matches_count - 1].dist; ++ while (matches_count > 1 && len_main == ++ coder->matches[matches_count - 2].len + 1) { ++ if (!(((back_main) >> 7) > (coder->matches[ matches_count - 2].dist))) ++ break; ++ --matches_count; ++ len_main = coder->matches[matches_count - 1].len; ++ back_main = coder->matches[matches_count - 1].dist; ++ } ++ if (len_main == 2 && back_main >= 0x80) ++ len_main = 1; ++ } ++ ++ if (rep_len >= 2) { ++ if (rep_len + 1 >= len_main ++ || (rep_len + 2 >= len_main ++ && back_main > (1U << 9)) ++ || (rep_len + 3 >= len_main ++ && back_main > (1U << 15))) { ++ *back_res = rep_index; ++ *len_res = rep_len; ++ mf_skip(mf, rep_len - 1); ++ return; ++ } ++ } ++ ++ if (len_main < 2 || buf_avail <= 2) { ++ *back_res = (4294967295U); ++ *len_res = 1; ++ return; ++ } ++ ++ coder->longest_match_length = lzma_mf_find(mf, ++ &coder->matches_count, coder->matches); ++ ++ if (coder->longest_match_length >= 2) { ++ const uint32_t new_dist = coder->matches[ ++ coder->matches_count - 1].dist; ++ ++ if ((coder->longest_match_length >= len_main ++ && new_dist < back_main) ++ || (coder->longest_match_length == len_main + 1 ++ && !(((new_dist) >> 7) > (back_main))) ++ || (coder->longest_match_length > len_main + 1) ++ || (coder->longest_match_length + 1 >= len_main ++ && len_main >= 3 ++ && (((back_main) >> 7) > (new_dist)))) { ++ *back_res = (4294967295U); ++ *len_res = 1; ++ return; ++ } ++ } ++ ++buf; ++ const uint32_t limit = ((2) > (len_main - 1) ? (2) : (len_main - 1)); ++ for (uint32_t i = 0; i < 4; ++i) { ++ if (memcmp(buf, buf - coder->reps[i] - 1, limit) == 0) { ++ *back_res = (4294967295U); ++ *len_res = 1; ++ return; ++ } ++ } ++ ++ *back_res = back_main + 4; ++ *len_res = len_main; ++ mf_skip(mf, len_main - 2); ++ return; ++} ++ ++static inline void ++rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit) ++{ ++ rc->symbols[rc->count] = bit; ++ rc->probs[rc->count] = prob; ++ ++rc->count; ++} ++ ++static inline void ++rc_bittree(lzma_range_encoder *rc, probability *probs, ++ uint32_t bit_count, uint32_t symbol) ++{ ++ uint32_t model_index = 1; ++ ++ do { ++ const uint32_t bit = (symbol >> --bit_count) & 1; ++ rc_bit(rc, &probs[model_index], bit); ++ model_index = (model_index << 1) + bit; ++ } while (bit_count != 0); ++} ++ ++static _Bool ++encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf) ++{ ++ if (mf->read_pos == mf->read_limit) { ++ if (mf->action == LZMA_RUN) ++ return 0; ++ } else { ++ mf_skip(mf, 1); ++ mf->read_ahead = 0; ++ rc_bit(&coder->rc, &coder->is_match[0][0], 0); ++ rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]); ++ } ++ ++ coder->is_initialized = 1; ++ ++ return 1; ++} ++ ++static inline uint32_t ++mf_position(const lzma_mf *mf) ++{ ++ return mf->read_pos - mf->read_ahead; ++} ++ ++static inline _Bool ++rc_shift_low(lzma_range_encoder *rc, ++ uint8_t *out, size_t *out_pos, size_t out_size) ++{ ++ if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000) ++ || (uint32_t)(rc->low >> 32) != 0) { ++ do { ++ if (*out_pos == out_size) ++ return 1; ++ ++ out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32); ++ ++*out_pos; ++ rc->cache = 0xFF; ++ } while (--rc->cache_size != 0); ++ rc->cache = (rc->low >> 24) & 0xFF; ++ } ++ ++ ++rc->cache_size; ++ rc->low = (rc->low & 0x00FFFFFF) << 8; ++ return 0; ++} ++ ++static inline void ++rc_reset(lzma_range_encoder *rc) ++{ ++ rc->low = 0; ++ rc->cache_size = 1; ++ rc->range = (4294967295U); ++ rc->cache = 0; ++ rc->count = 0; ++ rc->pos = 0; ++} ++ ++static inline _Bool ++rc_encode(lzma_range_encoder *rc, ++ uint8_t *out, size_t *out_pos, size_t out_size) ++{ ++ while (rc->pos < rc->count) { ++ if (rc->range < (1U << 24)) { ++ if (rc_shift_low(rc, out, out_pos, out_size)) ++ return 1; ++ rc->range <<= 8; ++ } ++ ++ switch (rc->symbols[rc->pos]) { ++ case RC_BIT_0: { ++ probability prob = *rc->probs[rc->pos]; ++ rc->range = (rc->range >> 11) ++ * prob; ++ prob += ((1U << 11) - prob) >> 5; ++ *rc->probs[rc->pos] = prob; ++ break; ++ } ++ ++ case RC_BIT_1: { ++ probability prob = *rc->probs[rc->pos]; ++ const uint32_t bound = prob * (rc->range ++ >> 11); ++ rc->low += bound; ++ rc->range -= bound; ++ prob -= prob >> 5; ++ *rc->probs[rc->pos] = prob; ++ break; ++ } ++ ++ case RC_DIRECT_0: ++ rc->range >>= 1; ++ break; ++ ++ case RC_DIRECT_1: ++ rc->range >>= 1; ++ rc->low += rc->range; ++ break; ++ ++ case RC_FLUSH: ++ rc->range = (4294967295U); ++ do { ++ if (rc_shift_low(rc, out, out_pos, out_size)) ++ return 1; ++ } while (++rc->pos < rc->count); ++ ++ rc_reset(rc); ++ return 0; ++ ++ default: ++ break; ++ } ++ ++rc->pos; ++ } ++ ++ rc->count = 0; ++ rc->pos = 0; ++ return 0; ++} ++ ++static inline uint64_t ++rc_pending(const lzma_range_encoder *rc) ++{ ++ return rc->cache_size + 5 - 1; ++} ++ ++static inline void ++literal_matched(lzma_range_encoder *rc, probability *subcoder, ++ uint32_t match_byte, uint32_t symbol) ++{ ++ uint32_t offset = 0x100; ++ symbol += 1U << 8; ++ ++ do { ++ match_byte <<= 1; ++ const uint32_t match_bit = match_byte & offset; ++ const uint32_t subcoder_index ++ = offset + match_bit + (symbol >> 8); ++ const uint32_t bit = (symbol >> 7) & 1; ++ rc_bit(rc, &subcoder[subcoder_index], bit); ++ ++ symbol <<= 1; ++ offset &= ~(match_byte ^ symbol); ++ ++ } while (symbol < (1U << 16)); ++} ++ ++static inline void ++literal(lzma_lzma1_encoder *coder, lzma_mf *mf, uint32_t position) ++{ ++ const uint8_t cur_byte = mf->buffer[mf->read_pos - mf->read_ahead]; ++ probability *subcoder = ((coder->literal)[ ++ (((position) & (coder->literal_pos_mask)) ++ << (coder->literal_context_bits)) ++ + ((uint32_t)(mf->buffer[mf->read_pos - mf->read_ahead - 1]) ++ >> (8U - (coder->literal_context_bits)))]); ++ ++ if (((coder->state) < 7)) { ++ rc_bittree(&coder->rc, subcoder, 8, cur_byte); ++ } else { ++ const uint8_t match_byte ++ = mf->buffer[mf->read_pos - coder->reps[0] - 1 - mf->read_ahead]; ++ literal_matched(&coder->rc, subcoder, match_byte, cur_byte); ++ } ++ coder->state ++ = ((coder->state) <= STATE_SHORTREP_LIT_LIT ++ ? STATE_LIT_LIT : ((coder->state) <= STATE_LIT_SHORTREP ++ ? (coder->state) - 3 : (coder->state) - 6)); ++} ++ ++const uint8_t lzma_rc_prices[] = { ++ 128, 103, 91, 84, 78, 73, 69, 66, ++ 63, 61, 58, 56, 54, 52, 51, 49, ++ 48, 46, 45, 44, 43, 42, 41, 40, ++ 39, 38, 37, 36, 35, 34, 34, 33, ++ 32, 31, 31, 30, 29, 29, 28, 28, ++ 27, 26, 26, 25, 25, 24, 24, 23, ++ 23, 22, 22, 22, 21, 21, 20, 20, ++ 19, 19, 19, 18, 18, 17, 17, 17, ++ 16, 16, 16, 15, 15, 15, 14, 14, ++ 14, 13, 13, 13, 12, 12, 12, 11, ++ 11, 11, 11, 10, 10, 10, 10, 9, ++ 9, 9, 9, 8, 8, 8, 8, 7, ++ 7, 7, 7, 6, 6, 6, 6, 5, ++ 5, 5, 5, 5, 4, 4, 4, 4, ++ 3, 3, 3, 3, 3, 2, 2, 2, ++ 2, 2, 2, 1, 1, 1, 1, 1 ++}; ++ ++static inline uint32_t ++rc_bit_price(const probability prob, const uint32_t bit) ++{ ++ return lzma_rc_prices[(prob ^ ((0U - bit) ++ & ((1U << 11) - 1))) >> 4]; ++} ++ ++static inline uint32_t ++rc_bit_0_price(const probability prob) ++{ ++ return lzma_rc_prices[prob >> 4]; ++} ++ ++static inline uint32_t ++rc_bit_1_price(const probability prob) ++{ ++ return lzma_rc_prices[(prob ^ ((1U << 11) - 1)) ++ >> 4]; ++} ++ ++static inline uint32_t ++rc_bittree_price(const probability *const probs, ++ const uint32_t bit_levels, uint32_t symbol) ++{ ++ uint32_t price = 0; ++ symbol += 1U << bit_levels; ++ ++ do { ++ const uint32_t bit = symbol & 1; ++ symbol >>= 1; ++ price += rc_bit_price(probs[symbol], bit); ++ } while (symbol != 1); ++ ++ return price; ++} ++ ++static void ++length_update_prices(lzma_length_encoder *lc, const uint32_t pos_state) ++{ ++ const uint32_t table_size = lc->table_size; ++ lc->counters[pos_state] = table_size; ++ ++ const uint32_t a0 = rc_bit_0_price(lc->choice); ++ const uint32_t a1 = rc_bit_1_price(lc->choice); ++ const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2); ++ const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2); ++ uint32_t *const prices = lc->prices[pos_state]; ++ ++ uint32_t i; ++ for (i = 0; i < table_size && i < (1 << 3); ++i) ++ prices[i] = a0 + rc_bittree_price(lc->low[pos_state], ++ 3, i); ++ ++ for (; i < table_size && i < (1 << 3) + (1 << 3); ++i) ++ prices[i] = b0 + rc_bittree_price(lc->mid[pos_state], ++ 3, i - (1 << 3)); ++ ++ for (; i < table_size; ++i) ++ prices[i] = b1 + rc_bittree_price(lc->high, 8, ++ i - (1 << 3) - (1 << 3)); ++ ++ return; ++} ++ ++static inline void ++length(lzma_range_encoder *rc, lzma_length_encoder *lc, ++ const uint32_t pos_state, uint32_t len, const _Bool fast_mode) ++{ ++ len -= 2; ++ ++ if (len < (1 << 3)) { ++ rc_bit(rc, &lc->choice, 0); ++ rc_bittree(rc, lc->low[pos_state], 3, len); ++ } else { ++ rc_bit(rc, &lc->choice, 1); ++ len -= (1 << 3); ++ ++ if (len < (1 << 3)) { ++ rc_bit(rc, &lc->choice2, 0); ++ rc_bittree(rc, lc->mid[pos_state], 3, len); ++ } else { ++ rc_bit(rc, &lc->choice2, 1); ++ len -= (1 << 3); ++ rc_bittree(rc, lc->high, 8, len); ++ } ++ } ++ ++ if (!fast_mode) ++ if (--lc->counters[pos_state] == 0) ++ length_update_prices(lc, pos_state); ++} ++ ++static inline void ++rep_match(lzma_lzma1_encoder *coder, const uint32_t pos_state, ++ const uint32_t rep, const uint32_t len) ++{ ++ if (rep == 0) { ++ rc_bit(&coder->rc, &coder->is_rep0[coder->state], 0); ++ rc_bit(&coder->rc, ++ &coder->is_rep0_long[coder->state][pos_state], ++ len != 1); ++ } else { ++ const uint32_t distance = coder->reps[rep]; ++ rc_bit(&coder->rc, &coder->is_rep0[coder->state], 1); ++ ++ if (rep == 1) { ++ rc_bit(&coder->rc, &coder->is_rep1[coder->state], 0); ++ } else { ++ rc_bit(&coder->rc, &coder->is_rep1[coder->state], 1); ++ rc_bit(&coder->rc, &coder->is_rep2[coder->state], ++ rep - 2); ++ ++ if (rep == 3) ++ coder->reps[3] = coder->reps[2]; ++ ++ coder->reps[2] = coder->reps[1]; ++ } ++ ++ coder->reps[1] = coder->reps[0]; ++ coder->reps[0] = distance; ++ } ++ ++ if (len == 1) { ++ coder->state = ((coder->state) < 7 ? STATE_LIT_SHORTREP : STATE_NONLIT_REP); ++ } else { ++ length(&coder->rc, &coder->rep_len_encoder, pos_state, len, ++ coder->fast_mode); ++ coder->state = ((coder->state) < 7 ? STATE_LIT_LONGREP : STATE_NONLIT_REP); ++ } ++} ++ ++// This array is constantly initialized in the original code. It's quite big ++// so we skip it. ++const uint8_t lzma_fastpos[1 << 13]; ++ ++static inline uint32_t ++get_dist_slot(uint32_t dist) ++{ ++ if (dist < (1U << (13 + ((0) + (0) * (13 - 1))))) ++ return lzma_fastpos[dist]; ++ ++ if (dist < (1U << (13 + ((0) + (1) * (13 - 1))))) ++ return (uint32_t)(lzma_fastpos[(dist) >> ((0) + (1) * (13 - 1))]) + 2 * ((0) + (1) * (13 - 1)); ++ ++ return (uint32_t)(lzma_fastpos[(dist) >> ((0) + (2) * (13 - 1))]) + 2 * ((0) + (2) * (13 - 1)); ++} ++ ++static inline void ++rc_bittree_reverse(lzma_range_encoder *rc, probability *probs, ++ uint32_t bit_count, uint32_t symbol) ++{ ++ uint32_t model_index = 1; ++ do { ++ const uint32_t bit = symbol & 1; ++ symbol >>= 1; ++ rc_bit(rc, &probs[model_index], bit); ++ model_index = (model_index << 1) + bit; ++ } while (--bit_count != 0); ++} ++ ++static inline void ++rc_direct(lzma_range_encoder *rc, uint32_t value, uint32_t bit_count) ++{ ++ do { ++ rc->symbols[rc->count++] ++ = RC_DIRECT_0 + ((value >> --bit_count) & 1); ++ } while (bit_count != 0); ++} ++ ++static inline void ++match(lzma_lzma1_encoder *coder, const uint32_t pos_state, ++ const uint32_t distance, const uint32_t len) ++{ ++ coder->state = ((coder->state) < 7 ? STATE_LIT_MATCH : STATE_NONLIT_MATCH); ++ ++ length(&coder->rc, &coder->match_len_encoder, pos_state, len, ++ coder->fast_mode); ++ ++ const uint32_t dist_slot = get_dist_slot(distance); ++ const uint32_t dist_state = ((len) < 4 + 2 ? (len) - 2 : 4 - 1); ++ rc_bittree(&coder->rc, coder->dist_slot[dist_state], 6, dist_slot); ++ ++ if (dist_slot >= 4) { ++ const uint32_t footer_bits = (dist_slot >> 1) - 1; ++ const uint32_t base = (2 | (dist_slot & 1)) << footer_bits; ++ const uint32_t dist_reduced = distance - base; ++ ++ if (dist_slot < 14) { ++ rc_bittree_reverse(&coder->rc, coder->dist_special + base - dist_slot - 1, ++ footer_bits, dist_reduced); ++ } else { ++ rc_direct(&coder->rc, dist_reduced >> 4, ++ footer_bits - 4); ++ rc_bittree_reverse( ++ &coder->rc, coder->dist_align, ++ 4, dist_reduced & ((1 << 4) - 1)); ++ ++coder->align_price_count; ++ } ++ } ++ ++ coder->reps[3] = coder->reps[2]; ++ coder->reps[2] = coder->reps[1]; ++ coder->reps[1] = coder->reps[0]; ++ coder->reps[0] = distance; ++ ++coder->match_price_count; ++} ++ ++static void ++encode_symbol(lzma_lzma1_encoder *coder, lzma_mf *mf, ++ uint32_t back, uint32_t len, uint32_t position) ++{ ++ const uint32_t pos_state = position & coder->pos_mask; ++ ++ if (back == (4294967295U)) { ++ rc_bit(&coder->rc, ++ &coder->is_match[coder->state][pos_state], 0); ++ literal(coder, mf, position); ++ } else { ++ rc_bit(&coder->rc, ++ &coder->is_match[coder->state][pos_state], 1); ++ ++ if (back < 4) { ++ rc_bit(&coder->rc, &coder->is_rep[coder->state], 1); ++ rep_match(coder, pos_state, back, len); ++ } else { ++ rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); ++ match(coder, pos_state, back - 4, len); ++ } ++ } ++ mf->read_ahead -= len; ++} ++ ++static void ++encode_eopm(lzma_lzma1_encoder *coder, uint32_t position) ++{ ++ const uint32_t pos_state = position & coder->pos_mask; ++ rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); ++ rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); ++ match(coder, pos_state, (4294967295U), 2); ++} ++ ++static inline void ++rc_flush(lzma_range_encoder *rc) ++{ ++ for (size_t i = 0; i < 5; ++i) ++ rc->symbols[rc->count++] = RC_FLUSH; ++} ++ ++extern void exit (int __status) ++ __attribute__ ((__nothrow__ , __leaf__ , __noreturn__)); ++ ++extern lzma_ret ++lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, ++ uint8_t *restrict out, size_t *restrict out_pos, ++ size_t out_size, uint32_t limit) ++{ ++ ++ if (!coder->is_initialized && !encode_init(coder, mf)) ++ return LZMA_OK; ++ ++ uint32_t position = mf_position(mf); ++ ++ while (1) { ++ if (rc_encode(&coder->rc, out, out_pos, out_size)) { ++ return LZMA_OK; ++ } ++ ++ if (limit != (4294967295U) ++ && (mf->read_pos - mf->read_ahead >= limit ++ || *out_pos + rc_pending(&coder->rc) ++ >= (1U << 16) - ((1 << 12) + 1))) ++ break; ++ ++ if (mf->read_pos >= mf->read_limit) { ++ if (mf->action == LZMA_RUN) ++ return LZMA_OK; ++ ++ ++ if (mf->read_ahead == 0) ++ break; ++ } ++ uint32_t len; ++ uint32_t back; ++ ++ if (coder->fast_mode) ++ lzma_lzma_optimum_fast(coder, mf, &back, &len); ++ else ++ // The original code contains the call to ++ // lzma_lzma_optimum_normal(coder, mf, &back, &len, position); ++ exit (-1); ++ ++ encode_symbol(coder, mf, back, len, position); ++ ++ position += len; ++ } ++ ++ if (!coder->is_flushed) { ++ coder->is_flushed = 1; ++ if (limit == (4294967295U)) ++ encode_eopm(coder, position); ++ ++ rc_flush(&coder->rc); ++ ++ if (rc_encode(&coder->rc, out, out_pos, out_size)) { ++ return LZMA_OK; ++ } ++ } ++ ++ coder->is_flushed = 0; ++ return LZMA_STREAM_END; ++} ++ ++extern void ++lzma_free(void *ptr, const lzma_allocator *allocator) ++{ ++ if (allocator != ((void *)0) && allocator->free != ((void *)0)) ++ allocator->free(allocator->opaque, ptr); ++ else ++ free(ptr); ++ return; ++} ++ ++static _Bool ++lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator, ++ const lzma_lz_options *lz_options) ++{ ++ if (lz_options->dict_size < 4096U ++ || lz_options->dict_size ++ > (1U << 30) + (1U << 29) ++ || lz_options->nice_len > lz_options->match_len_max) ++ return 1; ++ ++ mf->keep_size_before = lz_options->before_size + lz_options->dict_size; ++ mf->keep_size_after = lz_options->after_size ++ + lz_options->match_len_max; ++ uint32_t reserve = lz_options->dict_size / 2; ++ if (reserve > (1U << 30)) ++ reserve /= 2; ++ ++ reserve += (lz_options->before_size + lz_options->match_len_max ++ + lz_options->after_size) / 2 + (1U << 19); ++ ++ const uint32_t old_size = mf->size; ++ mf->size = mf->keep_size_before + reserve + mf->keep_size_after; ++ ++ if ((mf->buffer != ((void *)0)) && old_size != mf->size) { ++ lzma_free(mf->buffer, allocator); ++ mf->buffer = ((void *)0); ++ } ++ ++ mf->match_len_max = lz_options->match_len_max; ++ mf->nice_len = lz_options->nice_len; ++ mf->cyclic_size = lz_options->dict_size + 1; ++ ++ switch (lz_options->match_finder) { ++ case LZMA_MF_HC3: ++ mf->find = &lzma_mf_hc3_find; ++ mf->skip = &lzma_mf_hc3_skip; ++ break; ++ ++ case LZMA_MF_HC4: ++ mf->find = &lzma_mf_hc4_find; ++ mf->skip = &lzma_mf_hc4_skip; ++ break; ++ ++ case LZMA_MF_BT2: ++ mf->find = &lzma_mf_bt2_find; ++ mf->skip = &lzma_mf_bt2_skip; ++ break; ++ ++ case LZMA_MF_BT3: ++ mf->find = &lzma_mf_bt3_find; ++ mf->skip = &lzma_mf_bt3_skip; ++ break; ++ ++ case LZMA_MF_BT4: ++ mf->find = &lzma_mf_bt4_find; ++ mf->skip = &lzma_mf_bt4_skip; ++ break; ++ ++ default: ++ return 1; ++ } ++ ++ const uint32_t hash_bytes = lz_options->match_finder & 0x0F; ++ if (hash_bytes > mf->nice_len) ++ return 1; ++ ++ const _Bool is_bt = (lz_options->match_finder & 0x10) != 0; ++ uint32_t hs; ++ ++ if (hash_bytes == 2) { ++ hs = 0xFFFF; ++ } else { ++ hs = lz_options->dict_size - 1; ++ hs |= hs >> 1; ++ hs |= hs >> 2; ++ hs |= hs >> 4; ++ hs |= hs >> 8; ++ hs >>= 1; ++ hs |= 0xFFFF; ++ ++ if (hs > (1U << 24)) { ++ if (hash_bytes == 3) ++ hs = (1U << 24) - 1; ++ else ++ hs >>= 1; ++ } ++ } ++ ++ mf->hash_mask = hs; ++ ++ ++hs; ++ if (hash_bytes > 2) ++ hs += (1U << 10); ++ if (hash_bytes > 3) ++ hs += (1U << 16); ++ ++ const uint32_t old_hash_count = mf->hash_count; ++ const uint32_t old_sons_count = mf->sons_count; ++ mf->hash_count = hs; ++ mf->sons_count = mf->cyclic_size; ++ if (is_bt) ++ mf->sons_count *= 2; ++ ++ if (old_hash_count != mf->hash_count ++ || old_sons_count != mf->sons_count) { ++ lzma_free(mf->hash, allocator); ++ mf->hash = ((void *)0); ++ ++ lzma_free(mf->son, allocator); ++ mf->son = ((void *)0); ++ } ++ ++ mf->depth = lz_options->depth; ++ if (mf->depth == 0) { ++ if (is_bt) ++ mf->depth = 16 + mf->nice_len / 2; ++ else ++ mf->depth = 4 + mf->nice_len / 4; ++ } ++ ++ return 0; ++} ++ ++int ++main () ++{ ++ lzma_mf mf; ++ lzma_allocator allocator; ++ lzma_lz_options lz_options; ++ ++ void *coder; ++ uint8_t *restrict out; ++ size_t *restrict out_pos; ++ size_t out_size; ++ ++ lz_encoder_prepare(&mf, &allocator, &lz_options); ++ return (int) lzma_lzma_encode(coder, &mf, out, out_pos, out_size, (4294967295U)); ++} ++ ++ ++/* { dg-final { scan-wpa-ipa-dump "Save results of indirect call analysis." "icp"} } */ ++/* { dg-final { scan-wpa-ipa-dump-times "For call" 2 "icp"} } */ ++/* { dg-final { scan-wpa-ipa-dump-times "Insert 0 prefetch stmt:" 5 "ipa_prefetch"} } */ ++/* { dg-final { scan-wpa-ipa-dump-times "Insert 1 prefetch stmt:" 4 "ipa_prefetch"} } */ ++/* { dg-final { scan-wpa-ipa-dump-times "Insert 2 prefetch stmt:" 2 "ipa_prefetch"} } */ +-- +2.33.0 + |