1 files changed, 2216 insertions, 0 deletions
diff --git a/0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch b/0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch
new file mode 100644
index 0000000..dae19fa
--- /dev/null
+++ b/0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch
@@ -0,0 +1,2216 @@
+From 4c262af8e178ac7c81b32be5b159b4d09a5841c9 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Fri, 8 Mar 2024 07:07:50 +0800
+Subject: [PATCH 1/2] Port fixes for IPA prefetch to GCC 12
+
+---
+ gcc/ipa-devirt.cc                          |    9 +-
+ gcc/ipa-prefetch.cc                        |  174 +-
+ gcc/ipa-sra.cc                             |    7 +
+ gcc/params.opt                             |    4 +-
+ gcc/testsuite/gcc.dg/completion-1.c        |    1 +
+ gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c | 1843 ++++++++++++++++++++
+ 6 files changed, 1974 insertions(+), 64 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c
+
+diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc
+index dd3562d56..dd000b401 100644
+--- a/gcc/ipa-devirt.cc
++++ b/gcc/ipa-devirt.cc
+@@ -5029,9 +5029,12 @@ analyze_assign_stmt (gimple *stmt)
+ 	}
+       else
+ 	{
+-	  fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
+-		   get_tree_code_name (TREE_CODE (rhs)));
+-	  print_gimple_stmt (dump_file, stmt, 0);
++	  if (dump_file && (dump_flags & TDF_DETAILS))
++	    {
++	      fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
++		       get_tree_code_name (TREE_CODE (rhs)));
++	      print_gimple_stmt (dump_file, stmt, 0);
++	    }
+ 	  gcc_unreachable ();
+ 	}
+     }
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index aeea51105..9537e4835 100644
+--- a/gcc/ipa-prefetch.cc
++++ b/gcc/ipa-prefetch.cc
+@@ -167,6 +167,7 @@ analyse_cgraph ()
+ 	}
+ 
+       /* TODO: maybe remove loop info here.  */
++      n->get_body ();
+       push_cfun (DECL_STRUCT_FUNCTION (n->decl));
+       calculate_dominance_info (CDI_DOMINATORS);
+       loop_optimizer_init (LOOPS_NORMAL);
+@@ -942,6 +943,9 @@ compare_memrefs (memref_t* mr, memref_t* mr2)
+       (*mr_candidate_map)[mr] = mr2;
+       return;
+     }
++  /* Probably we shouldn't leave nulls in the map.  */
++  if ((*mr_candidate_map)[mr] == NULL)
++    return;
+   /* TODO: support analysis with incrementation of different fields.  */
+   if ((*mr_candidate_map)[mr]->offset != mr2->offset)
+     {
+@@ -1090,6 +1094,15 @@ analyse_loops ()
+ 	  memref_t *mr = it->first, *mr2 = it->second;
+ 	  if (mr2 == NULL || !(*fmrs_map)[fn]->count (mr))
+ 	    continue;
++	  /* For now optimize only MRs that mem is MEM_REF.
++	     TODO: support other MR types.  */
++	  if (TREE_CODE (mr->mem) != MEM_REF)
++	    {
++	      if (dump_file)
++		fprintf (dump_file, "Skip MR %d: unsupported tree code = %s\n",
++			 mr->mr_id, get_tree_code_name (TREE_CODE (mr->mem)));
++	      continue;
++	    }
+ 	  if (!optimize_mrs_map->count (fn))
+ 	    (*optimize_mrs_map)[fn] = new memref_set;
+ 	  (*optimize_mrs_map)[fn]->insert (mr);
+@@ -1102,7 +1115,7 @@ analyse_loops ()
+ 	       it != (*optimize_mrs_map)[fn]->end (); it++)
+ 	    {
+ 	      memref_t *mr = *it, *mr2 = (*mr_candidate_map)[mr];
+-	      fprintf (dump_file, "MRs %d,%d with incremental offset ",
++	      fprintf (dump_file, "MRs %d, %d with incremental offset ",
+ 		       mr->mr_id, mr2->mr_id);
+ 	      print_generic_expr (dump_file, mr2->offset);
+ 	      fprintf (dump_file, "\n");
+@@ -1435,6 +1448,52 @@ remap_gimple_op_r (tree *tp, int *walk_subtrees, void *data)
+   return NULL_TREE;
+ }
+ 
++/* Copy stmt and remap its operands.  */
++
++static gimple *
++gimple_copy_and_remap (gimple *stmt)
++{
++  gimple *copy = gimple_copy (stmt);
++  gcc_checking_assert (!is_gimple_debug (copy));
++
++  /* Remap all the operands in COPY.  */
++  struct walk_stmt_info wi;
++  memset (&wi, 0, sizeof (wi));
++  wi.info = copy;
++  walk_gimple_op (copy, remap_gimple_op_r, &wi);
++  if (dump_file)
++    {
++      fprintf (dump_file, "Stmt copy after remap:\n");
++      print_gimple_stmt (dump_file, copy, 0);
++    }
++  return copy;
++}
++
++/* Copy and remap stmts listed in MR in reverse order to last_idx, skipping
++   processed ones.  Insert new stmts to the sequence.  */
++
++static gimple *
++gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts,
++				    int last_idx, stmt_set &processed)
++{
++  gimple *last_stmt = NULL;
++  for (int i = mr->stmts.length () - 1; i >= last_idx ; i--)
++    {
++      if (processed.count (mr->stmts[i]))
++	continue;
++      processed.insert (mr->stmts[i]);
++      if (dump_file)
++	{
++	  fprintf (dump_file, "Copy stmt %d from used MR (%d):\n",
++		   i, mr->mr_id);
++	  print_gimple_stmt (dump_file, mr->stmts[i], 0);
++	}
++      last_stmt = gimple_copy_and_remap (mr->stmts[i]);
++      gimple_seq_add_stmt (&stmts, last_stmt);
++  }
++  return last_stmt;
++}
++
+ static void
+ create_cgraph_edge (cgraph_node *n, gimple *stmt)
+ {
+@@ -1490,6 +1549,13 @@ optimize_function (cgraph_node *n, function *fn)
+ 		 "Skip the case.\n");
+       return 0;
+     }
++  if (!tree_fits_shwi_p (inc_mr->step))
++    {
++      if (dump_file)
++	fprintf (dump_file, "Cannot represent incremental MR's step as "
++		 "integer.  Skip the case.\n");
++      return 0;
++    }
+   if (dump_file && !used_mrs.empty ())
+     print_mrs_ids (used_mrs, "Common list of used mrs:\n");
+ 
+@@ -1539,16 +1605,44 @@ optimize_function (cgraph_node *n, function *fn)
+       return 0;
+     }
+   else if (dump_file)
+-    fprintf (dump_file, "Dominator bb %d for MRs\n", dom_bb->index);
++    {
++      fprintf (dump_file, "Dominator bb %d for MRs:\n", dom_bb->index);
++      gimple_dump_bb (dump_file, dom_bb, 0, dump_flags);
++      fprintf (dump_file, "\n");
++    }
+ 
+-  split_block (dom_bb, (gimple *) NULL);
++  /* Try to find comp_mr's stmt in the dominator bb.  */
++  gimple *last_used = NULL;
++  for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si);
++       gsi_prev (&si))
++    if (comp_mr->stmts[0] == gsi_stmt (si))
++      {
++	last_used = gsi_stmt (si);
++	if (dump_file)
++	  {
++	    fprintf (dump_file, "Last used stmt in dominator bb:\n");
++	    print_gimple_stmt (dump_file, last_used, 0);
++	  }
++	break;
++      }
++
++  split_block (dom_bb, last_used);
+   gimple_stmt_iterator gsi = gsi_last_bb (dom_bb);
+ 
+   /* Create new inc var.  Insert new_var = old_var + step * factor.  */
+   decl_map = new tree_map;
+   gcc_assert (comp_mr->stmts[0] && gimple_assign_single_p (comp_mr->stmts[0]));
+   tree inc_var = gimple_assign_lhs (comp_mr->stmts[0]);
++  /* If old_var definition dominates the current use, just use it, otherwise
++     evaluate it just before new inc var evaluation.  */
+   gimple_seq stmts = NULL;
++  stmt_set processed_stmts;
++  if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (comp_mr->stmts[0])))
++    {
++      gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0,
++							processed_stmts);
++      inc_var = gimple_assign_lhs (tmp);
++    }
+   tree var_type = TREE_TYPE (inc_var);
+   enum tree_code inc_code;
+   if (TREE_CODE (var_type) == POINTER_TYPE)
+@@ -1556,52 +1650,28 @@ optimize_function (cgraph_node *n, function *fn)
+   else
+     inc_code = PLUS_EXPR;
+   tree step = inc_mr->step;
+-  unsigned dist_val = tree_to_uhwi (step) * param_ipa_prefetch_distance_factor;
++  HOST_WIDE_INT dist_val = tree_to_shwi (step)
++			   * param_ipa_prefetch_distance_factor;
+   tree dist = build_int_cst (TREE_TYPE (step), dist_val);
+   tree new_inc_var = gimple_build (&stmts, inc_code, var_type, inc_var, dist);
+   (*decl_map)[inc_var] = new_inc_var;
++  if (dump_file)
++    {
++      fprintf (dump_file, "New distance value: %ld, new inc var: ", dist_val);
++      print_generic_expr (dump_file, new_inc_var);
++      fprintf (dump_file, "\n");
++    }
+ 
+   /* Create other new vars.  Insert new stmts.  */
+-  struct walk_stmt_info wi;
+-  stmt_set processed_stmts;
+-  memref_tree_map mr_new_trees;
+   for (memref_set::const_iterator it = used_mrs.begin ();
+        it != used_mrs.end (); it++)
+     {
+       memref_t *mr = *it;
+-      gimple *last_stmt = NULL;
+       if (mr == comp_mr)
+ 	continue;
+-      for (int i = mr->stmts.length () - 1; i >= 0 ; i--)
+-	{
+-	  if (processed_stmts.count (mr->stmts[i]))
+-	    continue;
+-	  processed_stmts.insert (mr->stmts[i]);
+-	  if (dump_file)
+-	    {
+-	      fprintf (dump_file, "Copy stmt %d from used MR (%d):\n",
+-		       i, mr->mr_id);
+-	      print_gimple_stmt (dump_file, mr->stmts[i], 0);
+-	    }
+-	  /* Create a new copy of STMT and duplicate STMT's virtual
+-	     operands.  */
+-	  gimple *copy = gimple_copy (mr->stmts[i]);
+-	  gcc_checking_assert (!is_gimple_debug (copy));
+-
+-	  /* Remap all the operands in COPY.  */
+-	  memset (&wi, 0, sizeof (wi));
+-	  last_stmt = copy;
+-	  wi.info = copy;
+-	  walk_gimple_op (copy, remap_gimple_op_r, &wi);
+-	  if (dump_file)
+-	    {
+-	      fprintf (dump_file, "Stmt %d after remap:\n",i);
+-	      print_gimple_stmt (dump_file, copy, 0);
+-	    }
+-	  gimple_seq_add_stmt (&stmts, copy);
+-	}
++      gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0,
++							      processed_stmts);
+       gcc_assert (last_stmt);
+-      mr_new_trees[mr] = gimple_assign_lhs (last_stmt);
+       if (dump_file)
+ 	{
+ 	  fprintf (dump_file, "MR (%d) new mem: ", mr->mr_id);
+@@ -1637,29 +1707,9 @@ optimize_function (cgraph_node *n, function *fn)
+       memref_t *mr = vmrs[j];
+       /* Don't need to copy the last stmt, since we insert prefetch insn
+ 	 instead of it.  */
+-      for (int i = mr->stmts.length () - 1; i >= 1 ; i--)
+-	{
+-	  if (processed_stmts.count (mr->stmts[i]))
+-	    continue;
+-	  processed_stmts.insert (mr->stmts[i]);
+-
+-	  gimple *copy = gimple_copy (mr->stmts[i]);
+-	  gcc_checking_assert (!is_gimple_debug (copy));
+-
+-	  /* Remap all the operands in COPY.  */
+-	  memset (&wi, 0, sizeof (wi));
+-	  wi.info = copy;
+-	  walk_gimple_op (copy, remap_gimple_op_r, &wi);
+-	  if (dump_file)
+-	    {
+-	      fprintf (dump_file, "Stmt %d after remap:\n",i);
+-	      print_gimple_stmt (dump_file, copy, 0);
+-	    }
+-	  gimple_seq_add_stmt (&stmts, copy);
+-	}
++      gimple_copy_and_remap_memref_stmts (mr, stmts, 1, processed_stmts);
+       gimple *last_stmt = mr->stmts[0];
+       gcc_assert (last_stmt);
+-      mr_new_trees[mr] = gimple_assign_lhs (last_stmt);
+       tree write_p = mr->is_store ? integer_one_node : integer_zero_node;
+       tree addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE);
+       if (decl_map->count (addr))
+@@ -1668,6 +1718,11 @@ optimize_function (cgraph_node *n, function *fn)
+ 				     3, addr, write_p, local);
+       pcalls.safe_push (last_stmt);
+       gimple_seq_add_stmt (&stmts, last_stmt);
++      if (dump_file)
++	{
++	  fprintf (dump_file, "Insert %d prefetch stmt:\n", j);
++	  print_gimple_stmt (dump_file, last_stmt, 0);
++	}
+     }
+ 
+   gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+@@ -1677,6 +1732,7 @@ optimize_function (cgraph_node *n, function *fn)
+   for (unsigned i = 0; i < pcalls.length (); i++)
+     create_cgraph_edge (n, pcalls[i]);
+   ipa_update_overall_fn_summary (n);
++  renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (n->decl));
+ 
+   return 1;
+ }
+@@ -1806,7 +1862,7 @@ pass_ipa_prefetch::gate (function *)
+ 	  /* Don't bother doing anything if the program has errors.  */
+ 	  && !seen_error ()
+ 	  && flag_lto_partition == LTO_PARTITION_ONE
+-	  /* Only enable struct optimizations in lto or whole_program.  */
++	  /* Only enable prefetch optimizations in lto or whole_program.  */
+ 	  && (in_lto_p || flag_whole_program));
+ }
+ 
+diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
+index 5355cf2f4..471b3927c 100644
+--- a/gcc/ipa-sra.cc
++++ b/gcc/ipa-sra.cc
+@@ -3393,6 +3393,13 @@ param_splitting_across_edge (cgraph_edge *cs)
+   gcc_checking_assert (from_ifs && from_ifs->m_parameters);
+ 
+   isra_call_summary *csum = call_sums->get (cs);
++  /* TODO: implement better support for call edges inserted after summary
++     collection but before sra wpa invocation.  */
++  if (!csum)
++    {
++      csum = call_sums->get_create (cs);
++      csum->m_return_ignored = true;
++    }
+   gcc_checking_assert (csum);
+   unsigned args_count = csum->m_arg_flow.length ();
+   isra_func_summary *to_ifs = func_sums->get (callee);
+diff --git a/gcc/params.opt b/gcc/params.opt
+index 5c07e3986..50385dfd7 100644
+--- a/gcc/params.opt
++++ b/gcc/params.opt
+@@ -314,8 +314,8 @@ Common Joined UInteger Var(param_ipa_prefetch_distance_factor) Init(4) Param Opt
+ The factor represents the number of inductive variable incrementations to evaluate an indirect memory address for IPA prefetch.
+ 
+ -param=ipa-prefetch-locality=
+-Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) Param Optimization
+-The flag represents temporal locality values in the following way: 0:pstl1strm, 1:pstl3keep, 2:pstl2keep, 3:pstl1keep.
++Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) IntegerRange(0, 3) Param Optimization
++The flag represents temporal locality value between 0 and 3, the higher value means the higher temporal locality in the data.
+ 
+ -param=ira-loop-reserved-regs=
+ Common Joined UInteger Var(param_ira_loop_reserved_regs) Init(2) Param Optimization
+diff --git a/gcc/testsuite/gcc.dg/completion-1.c b/gcc/testsuite/gcc.dg/completion-1.c
+index 64da64f1c..df2319c76 100644
+--- a/gcc/testsuite/gcc.dg/completion-1.c
++++ b/gcc/testsuite/gcc.dg/completion-1.c
+@@ -2,6 +2,7 @@
+ /* { dg-options "--completion=-fipa-ic" } */
+ 
+ /* { dg-begin-multiline-output "" }
++-fipa-ic
+ -fipa-icf
+ -fipa-icf-functions
+ -fipa-icf-variables
+diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c b/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c
+new file mode 100644
+index 000000000..bd4fb2bdc
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c
+@@ -0,0 +1,1843 @@
++/* { dg-do link } */
++/* { dg-options "-O3 -fipa-ic -fipa-prefetch -flto -flto-partition=one -fdump-ipa-ipa_prefetch -fdump-ipa-icp" } */
++/* { dg-require-effective-target lto } */
++
++/* Based on opensource xz code.  */
++
++#include <stdlib.h>
++#include <string.h>
++
++typedef long int ptrdiff_t;
++typedef long unsigned int size_t;
++typedef unsigned int wchar_t;
++
++typedef unsigned char __u_char;
++typedef unsigned short int __u_short;
++typedef unsigned int __u_int;
++typedef unsigned long int __u_long;
++
++typedef signed char __int8_t;
++typedef unsigned char __uint8_t;
++typedef signed short int __int16_t;
++typedef unsigned short int __uint16_t;
++typedef signed int __int32_t;
++typedef unsigned int __uint32_t;
++
++typedef signed long int __int64_t;
++typedef unsigned long int __uint64_t;
++
++typedef __int8_t __int_least8_t;
++typedef __uint8_t __uint_least8_t;
++typedef __int16_t __int_least16_t;
++typedef __uint16_t __uint_least16_t;
++typedef __int32_t __int_least32_t;
++typedef __uint32_t __uint_least32_t;
++typedef __int64_t __int_least64_t;
++typedef __uint64_t __uint_least64_t;
++
++typedef __int8_t int8_t;
++typedef __int16_t int16_t;
++typedef __int32_t int32_t;
++typedef __int64_t int64_t;
++
++typedef __uint8_t uint8_t;
++typedef __uint16_t uint16_t;
++typedef __uint32_t uint32_t;
++typedef __uint64_t uint64_t;
++
++typedef long int intptr_t;
++typedef unsigned long int uintptr_t;
++
++static inline uint16_t
++read16ne(const uint8_t *buf)
++{
++ uint16_t num;
++ memcpy(&num, buf, sizeof(num));
++ return num;
++}
++
++static inline uint32_t
++read32ne(const uint8_t *buf)
++{
++ uint32_t num;
++ memcpy(&num, buf, sizeof(num));
++ return num;
++}
++
++static inline uint16_t
++aligned_read16ne(const uint8_t *buf)
++{
++ uint16_t num;
++ memcpy(&num, __builtin_assume_aligned(buf, sizeof(num)), sizeof(num));
++ return num;
++}
++
++
++static inline uint32_t
++aligned_read32ne(const uint8_t *buf)
++{
++ uint32_t num;
++ memcpy(&num, __builtin_assume_aligned(buf, sizeof(num)), sizeof(num));
++ return num;
++}
++
++static inline uint64_t
++aligned_read64ne(const uint8_t *buf)
++{
++ uint64_t num;
++ memcpy(&num, __builtin_assume_aligned(buf, sizeof(num)), sizeof(num));
++ return num;
++}
++
++typedef unsigned char lzma_bool;
++
++typedef enum {
++ LZMA_RESERVED_ENUM = 0
++} lzma_reserved_enum;
++
++typedef enum {
++ LZMA_OK = 0,
++ LZMA_STREAM_END = 1,
++ LZMA_NO_CHECK = 2,
++ LZMA_UNSUPPORTED_CHECK = 3,
++ LZMA_GET_CHECK = 4,
++ LZMA_MEM_ERROR = 5,
++ LZMA_MEMLIMIT_ERROR = 6,
++ LZMA_FORMAT_ERROR = 7,
++ LZMA_OPTIONS_ERROR = 8,
++ LZMA_DATA_ERROR = 9,
++ LZMA_BUF_ERROR = 10,
++ LZMA_PROG_ERROR = 11,
++} lzma_ret;
++
++typedef enum {
++ LZMA_RUN = 0,
++ LZMA_SYNC_FLUSH = 1,
++ LZMA_FULL_FLUSH = 2,
++ LZMA_FULL_BARRIER = 4,
++ LZMA_FINISH = 3
++} lzma_action;
++
++typedef struct {
++ void *( *alloc)(void *opaque, size_t nmemb, size_t size);
++
++ void ( *free)(void *opaque, void *ptr);
++
++ void *opaque;
++} lzma_allocator;
++
++typedef uint64_t lzma_vli;
++
++typedef enum {
++ LZMA_CHECK_NONE = 0,
++ LZMA_CHECK_CRC32 = 1,
++ LZMA_CHECK_CRC64 = 4,
++ LZMA_CHECK_SHA256 = 10
++} lzma_check;
++
++typedef struct {
++ lzma_vli id;
++ void *options;
++} lzma_filter;
++
++typedef enum {
++ LZMA_MF_HC3 = 0x03,
++ LZMA_MF_HC4 = 0x04,
++ LZMA_MF_BT2 = 0x12,
++ LZMA_MF_BT3 = 0x13,
++ LZMA_MF_BT4 = 0x14
++} lzma_match_finder;
++
++typedef struct lzma_next_coder_s lzma_next_coder;
++
++typedef struct lzma_filter_info_s lzma_filter_info;
++
++typedef lzma_ret (*lzma_init_function)(
++  lzma_next_coder *next, const lzma_allocator *allocator,
++  const lzma_filter_info *filters);
++
++typedef lzma_ret (*lzma_code_function)(
++  void *coder, const lzma_allocator *allocator,
++  const uint8_t *restrict in, size_t *restrict in_pos,
++  size_t in_size, uint8_t *restrict out,
++  size_t *restrict out_pos, size_t out_size,
++  lzma_action action);
++
++typedef void (*lzma_end_function)(
++  void *coder, const lzma_allocator *allocator);
++
++struct lzma_filter_info_s {
++ lzma_vli id;
++ lzma_init_function init;
++ void *options;
++};
++
++struct lzma_next_coder_s {
++ void *coder;
++ lzma_vli id;
++ uintptr_t init;
++
++ lzma_code_function code;
++ lzma_end_function end;
++ void (*get_progress)(void *coder,
++   uint64_t *progress_in, uint64_t *progress_out);
++
++ lzma_check (*get_check)(const void *coder);
++ lzma_ret (*memconfig)(void *coder, uint64_t *memusage,
++   uint64_t *old_memlimit, uint64_t new_memlimit);
++ lzma_ret (*update)(void *coder, const lzma_allocator *allocator,
++   const lzma_filter *filters, const lzma_filter *reversed_filters);
++};
++
++typedef struct {
++ uint32_t len;
++ uint32_t dist;
++} lzma_match;
++
++typedef struct lzma_mf_s lzma_mf;
++struct lzma_mf_s {
++ uint8_t *buffer;
++ uint32_t size;
++ uint32_t keep_size_before;
++ uint32_t keep_size_after;
++ uint32_t offset;
++ uint32_t read_pos;
++ uint32_t read_ahead;
++ uint32_t read_limit;
++ uint32_t write_pos;
++ uint32_t pending;
++ uint32_t (*find)(lzma_mf *mf, lzma_match *matches);
++ void (*skip)(lzma_mf *mf, uint32_t num);
++ uint32_t *hash;
++ uint32_t *son;
++ uint32_t cyclic_pos;
++ uint32_t cyclic_size;
++ uint32_t hash_mask;
++ uint32_t depth;
++ uint32_t nice_len;
++ uint32_t match_len_max;
++ lzma_action action;
++ uint32_t hash_count;
++ uint32_t sons_count;
++};
++
++typedef struct {
++ size_t before_size;
++ size_t dict_size;
++ size_t after_size;
++ size_t match_len_max;
++ size_t nice_len;
++ lzma_match_finder match_finder;
++ uint32_t depth;
++ const uint8_t *preset_dict;
++ uint32_t preset_dict_size;
++} lzma_lz_options;
++
++typedef struct {
++ void *coder;
++ lzma_ret (*code)(void *coder,
++   lzma_mf *restrict mf, uint8_t *restrict out,
++   size_t *restrict out_pos, size_t out_size);
++ void (*end)(void *coder, const lzma_allocator *allocator);
++ lzma_ret (*options_update)(void *coder, const lzma_filter *filter);
++} lzma_lz_encoder;
++
++static inline const uint8_t *
++mf_ptr(const lzma_mf *mf)
++{
++ return mf->buffer + mf->read_pos;
++}
++
++static inline uint32_t
++mf_avail(const lzma_mf *mf)
++{
++ return mf->write_pos - mf->read_pos;
++}
++
++typedef struct {
++ uint32_t state[8];
++ uint64_t size;
++} lzma_sha256_state;
++
++typedef struct {
++ union {
++  uint8_t u8[64];
++  uint32_t u32[16];
++  uint64_t u64[8];
++ } buffer;
++ union {
++  uint32_t crc32;
++  uint64_t crc64;
++  lzma_sha256_state sha256;
++ } state;
++} lzma_check_state;
++
++// The table is constantly initialized in the original code.
++// Skip it in the test.
++const uint32_t lzma_crc32_table[8][256];
++
++static inline uint32_t __attribute__((__always_inline__))
++lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2,
++  uint32_t len, uint32_t limit)
++{
++ while (len < limit) {
++  uint32_t x = read32ne(buf1 + len) - read32ne(buf2 + len);
++  if (x != 0) {
++   if ((x & 0xFFFF) == 0) {
++    len += 2;
++    x >>= 16;
++   }
++
++   if ((x & 0xFF) == 0)
++    ++len;
++
++   return ((len) < (limit) ? (len) : (limit));
++  }
++
++  len += 4;
++ }
++
++ return limit;
++}
++
++extern uint32_t
++lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches)
++{
++ const uint32_t count = mf->find(mf, matches);
++ uint32_t len_best = 0;
++
++ if (count > 0) {
++  len_best = matches[count - 1].len;
++  if (len_best == mf->nice_len) {
++   uint32_t limit = mf_avail(mf) + 1;
++   if (limit > mf->match_len_max)
++    limit = mf->match_len_max;
++   const uint8_t *p1 = mf_ptr(mf) - 1;
++   const uint8_t *p2 = p1 - matches[count - 1].dist - 1;
++   len_best = lzma_memcmplen(p1, p2, len_best, limit);
++  }
++ }
++
++ *count_ptr = count;
++ ++mf->read_ahead;
++
++ return len_best;
++}
++
++static void
++normalize(lzma_mf *mf)
++{
++ const uint32_t subvalue = ((4294967295U) - mf->cyclic_size);
++
++ for (uint32_t i = 0; i < mf->hash_count; ++i) {
++  if (mf->hash[i] <= subvalue)
++   mf->hash[i] = 0;
++  else
++   mf->hash[i] -= subvalue;
++ }
++
++ for (uint32_t i = 0; i < mf->sons_count; ++i) {
++  if (mf->son[i] <= subvalue)
++   mf->son[i] = 0;
++  else
++   mf->son[i] -= subvalue;
++ }
++
++ mf->offset -= subvalue;
++ return;
++}
++
++static void
++move_pos(lzma_mf *mf)
++{
++ if (++mf->cyclic_pos == mf->cyclic_size)
++  mf->cyclic_pos = 0;
++ ++mf->read_pos;
++ if (__builtin_expect(mf->read_pos + mf->offset == (4294967295U), 0 ))
++  normalize(mf);
++}
++
++static void
++move_pending(lzma_mf *mf)
++{
++ ++mf->read_pos;
++ ++mf->pending;
++}
++
++static lzma_match *
++hc_find_func(
++  const uint32_t len_limit,
++  const uint32_t pos,
++  const uint8_t *const cur,
++  uint32_t cur_match,
++  uint32_t depth,
++  uint32_t *const son,
++  const uint32_t cyclic_pos,
++  const uint32_t cyclic_size,
++  lzma_match *matches,
++  uint32_t len_best)
++{
++ son[cyclic_pos] = cur_match;
++
++ while (1) {
++  const uint32_t delta = pos - cur_match;
++  if (depth-- == 0 || delta >= cyclic_size)
++   return matches;
++
++  const uint8_t *const pb = cur - delta;
++  cur_match = son[cyclic_pos - delta
++    + (delta > cyclic_pos ? cyclic_size : 0)];
++
++  if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) {
++   uint32_t len = lzma_memcmplen(pb, cur, 1, len_limit);
++
++   if (len_best < len) {
++    len_best = len;
++    matches->len = len;
++    matches->dist = delta - 1;
++    ++matches;
++
++    if (len == len_limit)
++     return matches;
++   }
++  }
++ }
++}
++
++extern uint32_t
++lzma_mf_hc3_find(lzma_mf *mf, lzma_match *matches)
++{
++ uint32_t len_limit = mf_avail(mf);
++ if (mf->nice_len <= len_limit) {
++  len_limit = mf->nice_len;
++ } else if (len_limit < (3)) {
++  move_pending(mf);
++  return 0;
++ }
++ const uint8_t *cur = mf_ptr(mf);
++ const uint32_t pos = mf->read_pos + mf->offset;
++ uint32_t matches_count = 0;
++
++ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
++ const uint32_t hash_2_value = temp & ((1U << 10) - 1);
++ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask;
++
++ const uint32_t delta2 = pos - mf->hash[hash_2_value];
++ const uint32_t cur_match = mf->hash[((1U << 10)) + hash_value];
++
++ mf->hash[hash_2_value] = pos;
++ mf->hash[((1U << 10)) + hash_value] = pos;
++
++ uint32_t len_best = 2;
++
++ if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
++  len_best = lzma_memcmplen(cur - delta2, cur, len_best, len_limit);
++
++  matches[0].len = len_best;
++  matches[0].dist = delta2 - 1;
++  matches_count = 1;
++
++  if (len_best == len_limit) {
++   mf->son[mf->cyclic_pos] = cur_match;
++   move_pos(mf);
++   return 1;
++  }
++ }
++
++ matches_count = hc_find_func(len_limit, pos, cur, cur_match, mf->depth,
++			      mf->son, mf->cyclic_pos, mf->cyclic_size,
++			      matches + matches_count, len_best) - matches;
++ move_pos(mf);
++ return matches_count;
++}
++
++extern void
++lzma_mf_hc3_skip(lzma_mf *mf, uint32_t amount)
++{
++ do {
++  if (mf_avail(mf) < 3) {
++   move_pending(mf);
++   continue;
++  }
++
++  const uint8_t *cur = mf_ptr(mf);
++  const uint32_t pos = mf->read_pos + mf->offset;
++
++  const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
++  const uint32_t hash_2_value = temp & ((1U << 10) - 1);
++  const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask;
++
++  const uint32_t cur_match
++    = mf->hash[((1U << 10)) + hash_value];
++
++  mf->hash[hash_2_value] = pos;
++  mf->hash[((1U << 10)) + hash_value] = pos;
++
++  do { mf->son[mf->cyclic_pos] = cur_match; move_pos(mf); } while (0);
++
++ } while (--amount != 0);
++}
++
++extern uint32_t
++lzma_mf_hc4_find(lzma_mf *mf, lzma_match *matches)
++{
++ uint32_t len_limit = mf_avail(mf);
++ if (mf->nice_len <= len_limit) {
++  len_limit = mf->nice_len;
++ } else if (len_limit < (4)) {
++  move_pending(mf);
++  return 0;
++ }
++ const uint8_t *cur = mf_ptr(mf);
++ const uint32_t pos = mf->read_pos + mf->offset;
++ uint32_t matches_count = 0;
++
++ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
++ const uint32_t hash_2_value = temp & ((1U << 10) - 1);
++ const uint32_t hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8))
++				& ((1U << 16) - 1);
++ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)
++				      ^ (lzma_crc32_table[0][cur[3]] << 5))
++			      & mf->hash_mask;
++ uint32_t delta2 = pos - mf->hash[hash_2_value];
++ const uint32_t delta3
++   = pos - mf->hash[((1U << 10)) + hash_3_value];
++ const uint32_t cur_match = mf->hash[((1U << 10) + (1U << 16)) + hash_value];
++
++ mf->hash[hash_2_value ] = pos;
++ mf->hash[((1U << 10)) + hash_3_value] = pos;
++ mf->hash[((1U << 10) + (1U << 16)) + hash_value] = pos;
++
++ uint32_t len_best = 1;
++
++ if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
++  len_best = 2;
++  matches[0].len = 2;
++  matches[0].dist = delta2 - 1;
++  matches_count = 1;
++ }
++
++ if (delta2 != delta3 && delta3 < mf->cyclic_size
++   && *(cur - delta3) == *cur) {
++  len_best = 3;
++  matches[matches_count++].dist = delta3 - 1;
++  delta2 = delta3;
++ }
++
++ if (matches_count != 0) {
++  len_best = lzma_memcmplen(cur - delta2, cur,
++    len_best, len_limit);
++
++  matches[matches_count - 1].len = len_best;
++
++  if (len_best == len_limit) {
++   mf->son[mf->cyclic_pos] = cur_match; move_pos(mf);
++   return matches_count;
++  }
++ }
++
++ if (len_best < 3)
++  len_best = 3;
++
++ matches_count = hc_find_func(len_limit, pos, cur, cur_match, mf->depth,
++			      mf->son, mf->cyclic_pos, mf->cyclic_size,
++			      matches + matches_count, len_best) - matches;
++ move_pos(mf);
++ return matches_count;
++}
++
++extern void
++lzma_mf_hc4_skip(lzma_mf *mf, uint32_t amount)
++{
++ do {
++  if (mf_avail(mf) < 4) {
++   move_pending(mf);
++   continue;
++  }
++
++  const uint8_t *cur = mf_ptr(mf);
++  const uint32_t pos = mf->read_pos + mf->offset;
++
++  const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
++  const uint32_t hash_2_value = temp & ((1U << 10) - 1);
++  const uint32_t hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & ((1U << 16) - 1);
++  const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)
++				       ^ (lzma_crc32_table[0][cur[3]] << 5))
++			       & mf->hash_mask;
++
++  const uint32_t cur_match
++    = mf->hash[((1U << 10) + (1U << 16)) + hash_value];
++
++  mf->hash[hash_2_value] = pos;
++  mf->hash[((1U << 10)) + hash_3_value] = pos;
++  mf->hash[((1U << 10) + (1U << 16)) + hash_value] = pos;
++
++  mf->son[mf->cyclic_pos] = cur_match;
++  move_pos(mf);
++ } while (--amount != 0);
++}
++
++static lzma_match *
++bt_find_func(
++  const uint32_t len_limit,
++  const uint32_t pos,
++  const uint8_t *const cur,
++  uint32_t cur_match,
++  uint32_t depth,
++  uint32_t *const son,
++  const uint32_t cyclic_pos,
++  const uint32_t cyclic_size,
++  lzma_match *matches,
++  uint32_t len_best)
++{
++ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
++ uint32_t *ptr1 = son + (cyclic_pos << 1);
++
++ uint32_t len0 = 0;
++ uint32_t len1 = 0;
++
++ while (1) {
++  const uint32_t delta = pos - cur_match;
++  if (depth-- == 0 || delta >= cyclic_size) {
++   *ptr0 = 0;
++   *ptr1 = 0;
++   return matches;
++  }
++
++  uint32_t *const pair = son + ((cyclic_pos - delta
++    + (delta > cyclic_pos ? cyclic_size : 0))
++    << 1);
++
++  const uint8_t *const pb = cur - delta;
++  uint32_t len = ((len0) < (len1) ? (len0) : (len1));
++
++  if (pb[len] == cur[len]) {
++   len = lzma_memcmplen(pb, cur, len + 1, len_limit);
++
++   if (len_best < len) {
++    len_best = len;
++    matches->len = len;
++    matches->dist = delta - 1;
++    ++matches;
++
++    if (len == len_limit) {
++     *ptr1 = pair[0];
++     *ptr0 = pair[1];
++     return matches;
++    }
++   }
++  }
++
++  if (pb[len] < cur[len]) {
++   *ptr1 = cur_match;
++   ptr1 = pair + 1;
++   cur_match = *ptr1;
++   len1 = len;
++  } else {
++   *ptr0 = cur_match;
++   ptr0 = pair;
++   cur_match = *ptr0;
++   len0 = len;
++  }
++ }
++}
++
++
++static void
++bt_skip_func(
++  const uint32_t len_limit,
++  const uint32_t pos,
++  const uint8_t *const cur,
++  uint32_t cur_match,
++  uint32_t depth,
++  uint32_t *const son,
++  const uint32_t cyclic_pos,
++  const uint32_t cyclic_size)
++{
++ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
++ uint32_t *ptr1 = son + (cyclic_pos << 1);
++
++ uint32_t len0 = 0;
++ uint32_t len1 = 0;
++
++ while (1) {
++  const uint32_t delta = pos - cur_match;
++  if (depth-- == 0 || delta >= cyclic_size) {
++   *ptr0 = 0;
++   *ptr1 = 0;
++   return;
++  }
++
++  uint32_t *pair = son + ((cyclic_pos - delta
++    + (delta > cyclic_pos ? cyclic_size : 0))
++    << 1);
++  const uint8_t *pb = cur - delta;
++  uint32_t len = ((len0) < (len1) ? (len0) : (len1));
++
++  if (pb[len] == cur[len]) {
++   len = lzma_memcmplen(pb, cur, len + 1, len_limit);
++
++   if (len == len_limit) {
++    *ptr1 = pair[0];
++    *ptr0 = pair[1];
++    return;
++   }
++  }
++
++  if (pb[len] < cur[len]) {
++   *ptr1 = cur_match;
++   ptr1 = pair + 1;
++   cur_match = *ptr1;
++   len1 = len;
++  } else {
++   *ptr0 = cur_match;
++   ptr0 = pair;
++   cur_match = *ptr0;
++   len0 = len;
++  }
++ }
++}
++
++extern uint32_t
++lzma_mf_bt2_find(lzma_mf *mf, lzma_match *matches)
++{
++ uint32_t len_limit = mf_avail(mf);
++ if (mf->nice_len <= len_limit) {
++  len_limit = mf->nice_len;
++ } else if (len_limit < (2) || (mf->action == LZMA_SYNC_FLUSH)) {
++  move_pending(mf);
++  return 0;
++ }
++ const uint8_t *cur = mf_ptr(mf);
++ const uint32_t pos = mf->read_pos + mf->offset;
++ uint32_t matches_count = 0;
++ const uint32_t hash_value = read16ne(cur);
++ const uint32_t cur_match = mf->hash[hash_value];
++ mf->hash[hash_value] = pos;
++
++ matches_count = bt_find_func(len_limit, pos, cur, cur_match, mf->depth,
++                              mf->son, mf->cyclic_pos, mf->cyclic_size,
++                              matches + matches_count, 1) - matches;
++ move_pos(mf);
++ return matches_count;
++}
++
++extern void
++lzma_mf_bt2_skip(lzma_mf *mf, uint32_t amount)
++{
++ do {
++  uint32_t len_limit = mf_avail(mf);
++  if (mf->nice_len <= len_limit) {
++   len_limit = mf->nice_len;
++  } else if (len_limit < (2) || (mf->action == LZMA_SYNC_FLUSH)) { 
++   move_pending(mf);
++   continue;
++  }
++  const uint8_t *cur = mf_ptr(mf);
++  const uint32_t pos = mf->read_pos + mf->offset;
++
++  const uint32_t hash_value = read16ne(cur);
++  const uint32_t cur_match = mf->hash[hash_value];
++  mf->hash[hash_value] = pos;
++
++  bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son,
++	       mf->cyclic_pos, mf->cyclic_size);
++  move_pos(mf);
++ } while (--amount != 0);
++}
++
++extern uint32_t
++lzma_mf_bt3_find(lzma_mf *mf, lzma_match *matches)
++{
++ uint32_t len_limit = mf_avail(mf);
++ if (mf->nice_len <= len_limit) {
++  len_limit = mf->nice_len;
++ } else if (len_limit < (3) || (1 && mf->action == LZMA_SYNC_FLUSH)) { 
++  move_pending(mf);
++  return 0;
++ }
++ const uint8_t *cur = mf_ptr(mf);
++ const uint32_t pos = mf->read_pos + mf->offset;
++ uint32_t matches_count = 0;
++
++ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
++ const uint32_t hash_2_value = temp & ((1U << 10) - 1);
++ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask;
++
++ const uint32_t delta2 = pos - mf->hash[hash_2_value];
++ const uint32_t cur_match = mf->hash[((1U << 10)) + hash_value];
++
++ mf->hash[hash_2_value] = pos;
++ mf->hash[((1U << 10)) + hash_value] = pos;
++
++ uint32_t len_best = 2;
++
++ if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
++  len_best = lzma_memcmplen(
++    cur, cur - delta2, len_best, len_limit);
++
++  matches[0].len = len_best;
++  matches[0].dist = delta2 - 1;
++  matches_count = 1;
++
++  if (len_best == len_limit) {
++   bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son,
++		mf->cyclic_pos, mf->cyclic_size);
++   move_pos(mf);
++   return 1;
++  }
++ }
++
++ matches_count = bt_find_func(len_limit, pos, cur, cur_match, mf->depth,
++			      mf->son, mf->cyclic_pos, mf->cyclic_size,
++			      matches + matches_count, len_best) - matches;
++ move_pos(mf);
++ return matches_count;
++}
++
++
++extern void
++lzma_mf_bt3_skip(lzma_mf *mf, uint32_t amount)
++{
++ do {
++  uint32_t len_limit = mf_avail(mf);
++  if (mf->nice_len <= len_limit) {
++    len_limit = mf->nice_len; }
++  else if (len_limit < (3) || (1 && mf->action == LZMA_SYNC_FLUSH)) { 
++    move_pending(mf);
++    continue;
++  }
++  const uint8_t *cur = mf_ptr(mf);
++  const uint32_t pos = mf->read_pos + mf->offset;
++
++  const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
++  const uint32_t hash_2_value = temp & ((1U << 10) - 1);
++  const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask;
++
++  const uint32_t cur_match = mf->hash[((1U << 10)) + hash_value];
++
++  mf->hash[hash_2_value] = pos;
++  mf->hash[((1U << 10)) + hash_value] = pos;
++
++  bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son,
++	       mf->cyclic_pos, mf->cyclic_size); 
++  move_pos(mf);
++ } while (--amount != 0);
++}
++
++extern uint32_t
++lzma_mf_bt4_find(lzma_mf *mf, lzma_match *matches)
++{
++ uint32_t len_limit = mf->write_pos - mf->read_pos;
++ if (mf->nice_len <= len_limit) {
++  len_limit = mf->nice_len;
++ } else if (len_limit < (4) || (mf->action == LZMA_SYNC_FLUSH)) {
++  ++mf->read_pos;
++  ++mf->pending;
++  return 0;
++ }
++
++ const uint8_t *cur = mf->buffer + mf->read_pos;
++ const uint32_t pos = mf->read_pos + mf->offset;
++ uint32_t matches_count = 0;
++
++ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
++ const uint32_t hash_2_value = temp & ((1U << 10) - 1);
++ const uint32_t hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & ((1U << 16) - 1);
++ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)
++				      ^ (lzma_crc32_table[0][cur[3]] << 5))
++			      & mf->hash_mask;
++
++ uint32_t delta2 = pos - mf->hash[hash_2_value];
++ const uint32_t delta3 = pos - mf->hash[((1U << 10)) + hash_3_value];
++ const uint32_t cur_match = mf->hash[((1U << 10) + (1U << 16)) + hash_value];
++
++ mf->hash[hash_2_value] = pos;
++ mf->hash[((1U << 10)) + hash_3_value] = pos;
++ mf->hash[((1U << 10) + (1U << 16)) + hash_value] = pos;
++
++ uint32_t len_best = 1;
++
++ if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
++  len_best = 2;
++  matches[0].len = 2;
++  matches[0].dist = delta2 - 1;
++  matches_count = 1;
++ }
++
++ if (delta2 != delta3 && delta3 < mf->cyclic_size && *(cur - delta3) == *cur) {
++  len_best = 3;
++  matches[matches_count++].dist = delta3 - 1;
++  delta2 = delta3;
++ }
++
++ if (matches_count != 0) {
++  len_best = lzma_memcmplen(cur, cur - delta2, len_best, len_limit);
++
++  matches[matches_count - 1].len = len_best;
++
++  if (len_best == len_limit) {
++    bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son,
++		 mf->cyclic_pos, mf->cyclic_size);
++    move_pos(mf);
++    return matches_count;
++  }
++ }
++
++ if (len_best < 3)
++  len_best = 3;
++
++ matches_count = bt_find_func(len_limit, pos, cur, cur_match, mf->depth, mf->son,
++                              mf->cyclic_pos, mf->cyclic_size,
++                              matches + matches_count, len_best) - matches;
++ move_pos(mf);
++ return matches_count;
++}
++
++extern void
++lzma_mf_bt4_skip(lzma_mf *mf, uint32_t amount)
++{
++ do {
++  uint32_t len_limit = mf_avail(mf);
++  if (mf->nice_len <= len_limit) {
++   len_limit = mf->nice_len;
++  } else if (len_limit < (4) || (mf->action == LZMA_SYNC_FLUSH)) {
++   move_pending(mf);
++   continue;
++  }
++
++  const uint8_t *cur = mf->buffer + mf->read_pos;
++  const uint32_t pos = mf->read_pos + mf->offset;
++
++  const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
++  const uint32_t hash_2_value = temp & ((1U << 10) - 1);
++  const uint32_t hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8))
++				& ((1U << 16) - 1);
++  const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)
++				       ^ (lzma_crc32_table[0][cur[3]] << 5))
++			       & mf->hash_mask;
++
++  const uint32_t cur_match = mf->hash[((1U << 10) + (1U << 16)) + hash_value];
++
++  mf->hash[hash_2_value] = pos;
++  mf->hash[((1U << 10)) + hash_3_value] = pos;
++  mf->hash[((1U << 10) + (1U << 16)) + hash_value] = pos;
++
++  bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son,
++	       mf->cyclic_pos, mf->cyclic_size);
++  move_pos(mf);
++ } while (--amount != 0);
++}
++
++static inline void
++mf_skip(lzma_mf *mf, uint32_t amount)
++{
++ if (amount != 0) {
++  mf->skip(mf, amount);
++  mf->read_ahead += amount;
++ }
++}
++
++typedef struct lzma_lzma1_encoder_s lzma_lzma1_encoder;
++typedef uint16_t probability;
++
++typedef struct {
++ probability choice;
++ probability choice2;
++ probability low[(1 << 4)][(1 << 3)];
++ probability mid[(1 << 4)][(1 << 3)];
++ probability high[(1 << 8)];
++ uint32_t prices[(1 << 4)][((1 << 3) + (1 << 3) + (1 << 8))];
++ uint32_t table_size;
++ uint32_t counters[(1 << 4)];
++} lzma_length_encoder;
++
++typedef struct {
++ uint64_t low;
++ uint64_t cache_size;
++ uint32_t range;
++ uint8_t cache;
++ size_t count;
++ size_t pos;
++
++ enum {
++  RC_BIT_0,
++  RC_BIT_1,
++  RC_DIRECT_0,
++  RC_DIRECT_1,
++  RC_FLUSH,
++ } symbols[58];
++
++ probability *probs[58];
++} lzma_range_encoder;
++
++
++typedef enum {
++ STATE_LIT_LIT,
++ STATE_MATCH_LIT_LIT,
++ STATE_REP_LIT_LIT,
++ STATE_SHORTREP_LIT_LIT,
++ STATE_MATCH_LIT,
++ STATE_REP_LIT,
++ STATE_SHORTREP_LIT,
++ STATE_LIT_MATCH,
++ STATE_LIT_LONGREP,
++ STATE_LIT_SHORTREP,
++ STATE_NONLIT_MATCH,
++ STATE_NONLIT_REP,
++} lzma_lzma_state;
++
++typedef struct {
++ lzma_lzma_state state;
++ _Bool prev_1_is_literal;
++ _Bool prev_2;
++
++ uint32_t pos_prev_2;
++ uint32_t back_prev_2;
++
++ uint32_t price;
++ uint32_t pos_prev;
++ uint32_t back_prev;
++
++ uint32_t backs[4];
++} lzma_optimal;
++
++struct lzma_lzma1_encoder_s {
++ lzma_range_encoder rc;
++ lzma_lzma_state state;
++ uint32_t reps[4];
++ lzma_match matches[(2 + ((1 << 3) + (1 << 3) + (1 << 8)) - 1) + 1];
++ uint32_t matches_count;
++ uint32_t longest_match_length;
++ _Bool fast_mode;
++ _Bool is_initialized;
++ _Bool is_flushed;
++ uint32_t pos_mask;
++ uint32_t literal_context_bits;
++ uint32_t literal_pos_mask;
++
++ probability literal[(1 << 4)][0x300];
++ probability is_match[12][(1 << 4)];
++ probability is_rep[12];
++ probability is_rep0[12];
++ probability is_rep1[12];
++ probability is_rep2[12];
++ probability is_rep0_long[12][(1 << 4)];
++ probability dist_slot[4][(1 << 6)];
++ probability dist_special[(1 << (14 / 2)) - 14];
++ probability dist_align[(1 << 4)];
++
++ lzma_length_encoder match_len_encoder;
++ lzma_length_encoder rep_len_encoder;
++
++ uint32_t dist_slot_prices[4][(1 << 6)];
++ uint32_t dist_prices[4][(1 << (14 / 2))];
++ uint32_t dist_table_size;
++ uint32_t match_price_count;
++
++ uint32_t align_prices[(1 << 4)];
++ uint32_t align_price_count;
++ uint32_t opts_end_index;
++ uint32_t opts_current_index;
++ lzma_optimal opts[(1 << 12)];
++};
++
++extern void
++lzma_lzma_optimum_fast(lzma_lzma1_encoder *restrict coder,
++  lzma_mf *restrict mf,
++  uint32_t *restrict back_res, uint32_t *restrict len_res)
++{
++ const uint32_t nice_len = mf->nice_len;
++
++ uint32_t len_main;
++ uint32_t matches_count;
++ if (mf->read_ahead == 0) {
++  len_main = lzma_mf_find(mf, &matches_count, coder->matches);
++ } else {
++  len_main = coder->longest_match_length;
++  matches_count = coder->matches_count;
++ }
++
++ const uint8_t *buf = mf_ptr(mf) - 1;
++ const uint32_t buf_avail
++   = ((mf_avail(mf) + 1) < ((2 + ((1 << 3) + (1 << 3) + (1 << 8)) - 1))
++      ? (mf_avail(mf) + 1) : ((2 + ((1 << 3) + (1 << 3) + (1 << 8)) - 1)));
++
++ if (buf_avail < 2) {
++  *back_res = (4294967295U);
++  *len_res = 1;
++  return;
++ }
++
++ uint32_t rep_len = 0;
++ uint32_t rep_index = 0;
++
++ for (uint32_t i = 0; i < 4; ++i) {
++  const uint8_t *const buf_back = buf - coder->reps[i] - 1;
++  if ((read16ne(buf) != read16ne(buf_back)))
++   continue;
++  const uint32_t len = lzma_memcmplen(buf, buf_back, 2, buf_avail);
++  if (len >= nice_len) {
++   *back_res = i;
++   *len_res = len;
++   mf_skip(mf, len - 1);
++   return;
++  }
++  if (len > rep_len) {
++   rep_index = i;
++   rep_len = len;
++  }
++ }
++ if (len_main >= nice_len) {
++  *back_res = coder->matches[matches_count - 1].dist + 4;
++  *len_res = len_main;
++  mf_skip(mf, len_main - 1);
++  return;
++ }
++
++ uint32_t back_main = 0;
++ if (len_main >= 2) {
++  back_main = coder->matches[matches_count - 1].dist;
++  while (matches_count > 1 && len_main ==
++    coder->matches[matches_count - 2].len + 1) {
++   if (!(((back_main) >> 7) > (coder->matches[ matches_count - 2].dist)))
++    break;
++   --matches_count;
++   len_main = coder->matches[matches_count - 1].len;
++   back_main = coder->matches[matches_count - 1].dist;
++  }
++  if (len_main == 2 && back_main >= 0x80)
++   len_main = 1;
++ }
++
++ if (rep_len >= 2) {
++  if (rep_len + 1 >= len_main
++    || (rep_len + 2 >= len_main
++     && back_main > (1U << 9))
++    || (rep_len + 3 >= len_main
++     && back_main > (1U << 15))) {
++   *back_res = rep_index;
++   *len_res = rep_len;
++   mf_skip(mf, rep_len - 1);
++   return;
++  }
++ }
++
++ if (len_main < 2 || buf_avail <= 2) {
++  *back_res = (4294967295U);
++  *len_res = 1;
++  return;
++ }
++
++ coder->longest_match_length = lzma_mf_find(mf,
++   &coder->matches_count, coder->matches);
++
++ if (coder->longest_match_length >= 2) {
++  const uint32_t new_dist = coder->matches[
++    coder->matches_count - 1].dist;
++
++  if ((coder->longest_match_length >= len_main
++     && new_dist < back_main)
++    || (coder->longest_match_length == len_main + 1
++     && !(((new_dist) >> 7) > (back_main)))
++    || (coder->longest_match_length > len_main + 1)
++    || (coder->longest_match_length + 1 >= len_main
++     && len_main >= 3
++     && (((back_main) >> 7) > (new_dist)))) {
++   *back_res = (4294967295U);
++   *len_res = 1;
++   return;
++  }
++ }
++ ++buf;
++ const uint32_t limit = ((2) > (len_main - 1) ? (2) : (len_main - 1));
++ for (uint32_t i = 0; i < 4; ++i) {
++  if (memcmp(buf, buf - coder->reps[i] - 1, limit) == 0) {
++   *back_res = (4294967295U);
++   *len_res = 1;
++   return;
++  }
++ }
++
++ *back_res = back_main + 4;
++ *len_res = len_main;
++ mf_skip(mf, len_main - 2);
++ return;
++}
++
++static inline void
++rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit)
++{
++ rc->symbols[rc->count] = bit;
++ rc->probs[rc->count] = prob;
++ ++rc->count;
++}
++
++static inline void
++rc_bittree(lzma_range_encoder *rc, probability *probs,
++  uint32_t bit_count, uint32_t symbol)
++{
++ uint32_t model_index = 1;
++
++ do {
++  const uint32_t bit = (symbol >> --bit_count) & 1;
++  rc_bit(rc, &probs[model_index], bit);
++  model_index = (model_index << 1) + bit;
++ } while (bit_count != 0);
++}
++
++static _Bool
++encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf)
++{
++ if (mf->read_pos == mf->read_limit) {
++  if (mf->action == LZMA_RUN)
++   return 0;
++ } else {
++  mf_skip(mf, 1);
++  mf->read_ahead = 0;
++  rc_bit(&coder->rc, &coder->is_match[0][0], 0);
++  rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]);
++ }
++
++ coder->is_initialized = 1;
++
++ return 1;
++}
++
++static inline uint32_t
++mf_position(const lzma_mf *mf)
++{
++ return mf->read_pos - mf->read_ahead;
++}
++
++static inline _Bool
++rc_shift_low(lzma_range_encoder *rc,
++  uint8_t *out, size_t *out_pos, size_t out_size)
++{
++ if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000)
++   || (uint32_t)(rc->low >> 32) != 0) {
++  do {
++   if (*out_pos == out_size)
++    return 1;
++
++   out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32);
++   ++*out_pos;
++   rc->cache = 0xFF;
++  } while (--rc->cache_size != 0);
++  rc->cache = (rc->low >> 24) & 0xFF;
++ }
++
++ ++rc->cache_size;
++ rc->low = (rc->low & 0x00FFFFFF) << 8;
++ return 0;
++}
++
++static inline void
++rc_reset(lzma_range_encoder *rc)
++{
++ rc->low = 0;
++ rc->cache_size = 1;
++ rc->range = (4294967295U);
++ rc->cache = 0;
++ rc->count = 0;
++ rc->pos = 0;
++}
++
++static inline _Bool
++rc_encode(lzma_range_encoder *rc,
++  uint8_t *out, size_t *out_pos, size_t out_size)
++{
++ while (rc->pos < rc->count) {
++  if (rc->range < (1U << 24)) {
++   if (rc_shift_low(rc, out, out_pos, out_size))
++    return 1;
++   rc->range <<= 8;
++  }
++
++  switch (rc->symbols[rc->pos]) {
++  case RC_BIT_0: {
++   probability prob = *rc->probs[rc->pos];
++   rc->range = (rc->range >> 11)
++     * prob;
++   prob += ((1U << 11) - prob) >> 5;
++   *rc->probs[rc->pos] = prob;
++   break;
++  }
++
++  case RC_BIT_1: {
++   probability prob = *rc->probs[rc->pos];
++   const uint32_t bound = prob * (rc->range
++     >> 11);
++   rc->low += bound;
++   rc->range -= bound;
++   prob -= prob >> 5;
++   *rc->probs[rc->pos] = prob;
++   break;
++  }
++
++  case RC_DIRECT_0:
++   rc->range >>= 1;
++   break;
++
++  case RC_DIRECT_1:
++   rc->range >>= 1;
++   rc->low += rc->range;
++   break;
++
++  case RC_FLUSH:
++   rc->range = (4294967295U);
++   do {
++    if (rc_shift_low(rc, out, out_pos, out_size))
++     return 1;
++   } while (++rc->pos < rc->count);
++
++   rc_reset(rc);
++   return 0;
++
++  default:
++   break;
++  }
++  ++rc->pos;
++ }
++
++ rc->count = 0;
++ rc->pos = 0;
++ return 0;
++}
++
++static inline uint64_t
++rc_pending(const lzma_range_encoder *rc)
++{
++ return rc->cache_size + 5 - 1;
++}
++
++static inline void
++literal_matched(lzma_range_encoder *rc, probability *subcoder,
++  uint32_t match_byte, uint32_t symbol)
++{
++ uint32_t offset = 0x100;
++ symbol += 1U << 8;
++
++ do {
++  match_byte <<= 1;
++  const uint32_t match_bit = match_byte & offset;
++  const uint32_t subcoder_index
++    = offset + match_bit + (symbol >> 8);
++  const uint32_t bit = (symbol >> 7) & 1;
++  rc_bit(rc, &subcoder[subcoder_index], bit);
++
++  symbol <<= 1;
++  offset &= ~(match_byte ^ symbol);
++
++ } while (symbol < (1U << 16));
++}
++
++static inline void
++literal(lzma_lzma1_encoder *coder, lzma_mf *mf, uint32_t position)
++{
++ const uint8_t cur_byte = mf->buffer[mf->read_pos - mf->read_ahead];
++ probability *subcoder  = ((coder->literal)[
++   (((position) & (coder->literal_pos_mask))
++    << (coder->literal_context_bits))
++   + ((uint32_t)(mf->buffer[mf->read_pos - mf->read_ahead - 1])
++   >> (8U - (coder->literal_context_bits)))]);
++
++ if (((coder->state) < 7)) {
++  rc_bittree(&coder->rc, subcoder, 8, cur_byte);
++ } else {
++  const uint8_t match_byte
++    = mf->buffer[mf->read_pos - coder->reps[0] - 1 - mf->read_ahead];
++  literal_matched(&coder->rc, subcoder, match_byte, cur_byte);
++ }
++ coder->state
++   = ((coder->state) <= STATE_SHORTREP_LIT_LIT
++      ? STATE_LIT_LIT : ((coder->state) <= STATE_LIT_SHORTREP
++			 ? (coder->state) - 3 : (coder->state) - 6));
++}
++
++const uint8_t lzma_rc_prices[] = {
++         128, 103,  91,  84,  78,  73,  69,  66,
++          63,  61,  58,  56,  54,  52,  51,  49,
++          48,  46,  45,  44,  43,  42,  41,  40,
++          39,  38,  37,  36,  35,  34,  34,  33,
++          32,  31,  31,  30,  29,  29,  28,  28,
++          27,  26,  26,  25,  25,  24,  24,  23,
++          23,  22,  22,  22,  21,  21,  20,  20,
++          19,  19,  19,  18,  18,  17,  17,  17,
++          16,  16,  16,  15,  15,  15,  14,  14,
++          14,  13,  13,  13,  12,  12,  12,  11,
++          11,  11,  11,  10,  10,  10,  10,   9,
++           9,   9,   9,   8,   8,   8,   8,   7,
++           7,   7,   7,   6,   6,   6,   6,   5,
++           5,   5,   5,   5,   4,   4,   4,   4,
++           3,   3,   3,   3,   3,   2,   2,   2,
++           2,   2,   2,   1,   1,   1,   1,   1
++};
++
++static inline uint32_t
++rc_bit_price(const probability prob, const uint32_t bit)
++{
++ return lzma_rc_prices[(prob ^ ((0U - bit)
++   & ((1U << 11) - 1))) >> 4];
++}
++
++static inline uint32_t
++rc_bit_0_price(const probability prob)
++{
++ return lzma_rc_prices[prob >> 4];
++}
++
++static inline uint32_t
++rc_bit_1_price(const probability prob)
++{
++ return lzma_rc_prices[(prob ^ ((1U << 11) - 1))
++   >> 4];
++}
++
++static inline uint32_t
++rc_bittree_price(const probability *const probs,
++  const uint32_t bit_levels, uint32_t symbol)
++{
++ uint32_t price = 0;
++ symbol += 1U << bit_levels;
++
++ do {
++  const uint32_t bit = symbol & 1;
++  symbol >>= 1;
++  price += rc_bit_price(probs[symbol], bit);
++ } while (symbol != 1);
++
++ return price;
++}
++
++static void
++length_update_prices(lzma_length_encoder *lc, const uint32_t pos_state)
++{
++ const uint32_t table_size = lc->table_size;
++ lc->counters[pos_state] = table_size;
++
++ const uint32_t a0 = rc_bit_0_price(lc->choice);
++ const uint32_t a1 = rc_bit_1_price(lc->choice);
++ const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2);
++ const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2);
++ uint32_t *const prices = lc->prices[pos_state];
++
++ uint32_t i;
++ for (i = 0; i < table_size && i < (1 << 3); ++i)
++  prices[i] = a0 + rc_bittree_price(lc->low[pos_state],
++    3, i);
++
++ for (; i < table_size && i < (1 << 3) + (1 << 3); ++i)
++  prices[i] = b0 + rc_bittree_price(lc->mid[pos_state],
++    3, i - (1 << 3));
++
++ for (; i < table_size; ++i)
++  prices[i] = b1 + rc_bittree_price(lc->high, 8,
++    i - (1 << 3) - (1 << 3));
++
++ return;
++}
++
++static inline void
++length(lzma_range_encoder *rc, lzma_length_encoder *lc,
++  const uint32_t pos_state, uint32_t len, const _Bool fast_mode)
++{
++ len -= 2;
++
++ if (len < (1 << 3)) {
++  rc_bit(rc, &lc->choice, 0);
++  rc_bittree(rc, lc->low[pos_state], 3, len);
++ } else {
++  rc_bit(rc, &lc->choice, 1);
++  len -= (1 << 3);
++
++  if (len < (1 << 3)) {
++   rc_bit(rc, &lc->choice2, 0);
++   rc_bittree(rc, lc->mid[pos_state], 3, len);
++  } else {
++   rc_bit(rc, &lc->choice2, 1);
++   len -= (1 << 3);
++   rc_bittree(rc, lc->high, 8, len);
++  }
++ }
++
++ if (!fast_mode)
++  if (--lc->counters[pos_state] == 0)
++   length_update_prices(lc, pos_state);
++}
++
++static inline void
++rep_match(lzma_lzma1_encoder *coder, const uint32_t pos_state,
++  const uint32_t rep, const uint32_t len)
++{
++ if (rep == 0) {
++  rc_bit(&coder->rc, &coder->is_rep0[coder->state], 0);
++  rc_bit(&coder->rc,
++    &coder->is_rep0_long[coder->state][pos_state],
++    len != 1);
++ } else {
++  const uint32_t distance = coder->reps[rep];
++  rc_bit(&coder->rc, &coder->is_rep0[coder->state], 1);
++
++  if (rep == 1) {
++   rc_bit(&coder->rc, &coder->is_rep1[coder->state], 0);
++  } else {
++   rc_bit(&coder->rc, &coder->is_rep1[coder->state], 1);
++   rc_bit(&coder->rc, &coder->is_rep2[coder->state],
++     rep - 2);
++
++   if (rep == 3)
++    coder->reps[3] = coder->reps[2];
++
++   coder->reps[2] = coder->reps[1];
++  }
++
++  coder->reps[1] = coder->reps[0];
++  coder->reps[0] = distance;
++ }
++
++ if (len == 1) {
++  coder->state = ((coder->state) < 7 ? STATE_LIT_SHORTREP : STATE_NONLIT_REP);
++ } else {
++  length(&coder->rc, &coder->rep_len_encoder, pos_state, len,
++    coder->fast_mode);
++  coder->state = ((coder->state) < 7 ? STATE_LIT_LONGREP : STATE_NONLIT_REP);
++ }
++}
++
++// This array is constantly initialized in the original code. It's quite big
++// so we skip it.
++const uint8_t lzma_fastpos[1 << 13];
++
++static inline uint32_t
++get_dist_slot(uint32_t dist)
++{
++ if (dist < (1U << (13 + ((0) + (0) * (13 - 1)))))
++  return lzma_fastpos[dist];
++
++ if (dist < (1U << (13 + ((0) + (1) * (13 - 1)))))
++  return (uint32_t)(lzma_fastpos[(dist) >> ((0) + (1) * (13 - 1))]) + 2 * ((0) + (1) * (13 - 1));
++
++ return (uint32_t)(lzma_fastpos[(dist) >> ((0) + (2) * (13 - 1))]) + 2 * ((0) + (2) * (13 - 1));
++}
++
++static inline void
++rc_bittree_reverse(lzma_range_encoder *rc, probability *probs,
++  uint32_t bit_count, uint32_t symbol)
++{
++ uint32_t model_index = 1;
++ do {
++  const uint32_t bit = symbol & 1;
++  symbol >>= 1;
++  rc_bit(rc, &probs[model_index], bit);
++  model_index = (model_index << 1) + bit;
++ } while (--bit_count != 0);
++}
++
++static inline void
++rc_direct(lzma_range_encoder *rc, uint32_t value, uint32_t bit_count)
++{
++ do {
++  rc->symbols[rc->count++]
++    = RC_DIRECT_0 + ((value >> --bit_count) & 1);
++ } while (bit_count != 0);
++}
++
++static inline void
++match(lzma_lzma1_encoder *coder, const uint32_t pos_state,
++      const uint32_t distance, const uint32_t len)
++{
++ coder->state = ((coder->state) < 7 ? STATE_LIT_MATCH : STATE_NONLIT_MATCH);
++
++ length(&coder->rc, &coder->match_len_encoder, pos_state, len,
++	coder->fast_mode);
++
++ const uint32_t dist_slot = get_dist_slot(distance);
++ const uint32_t dist_state = ((len) < 4 + 2 ? (len) - 2 : 4 - 1);
++ rc_bittree(&coder->rc, coder->dist_slot[dist_state], 6, dist_slot);
++
++ if (dist_slot >= 4) {
++  const uint32_t footer_bits = (dist_slot >> 1) - 1;
++  const uint32_t base = (2 | (dist_slot & 1)) << footer_bits;
++  const uint32_t dist_reduced = distance - base;
++
++  if (dist_slot < 14) {
++   rc_bittree_reverse(&coder->rc, coder->dist_special + base - dist_slot - 1,
++		     footer_bits, dist_reduced);
++  } else {
++   rc_direct(&coder->rc, dist_reduced >> 4,
++     footer_bits - 4);
++   rc_bittree_reverse(
++     &coder->rc, coder->dist_align,
++     4, dist_reduced & ((1 << 4) - 1));
++   ++coder->align_price_count;
++  }
++ }
++
++ coder->reps[3] = coder->reps[2];
++ coder->reps[2] = coder->reps[1];
++ coder->reps[1] = coder->reps[0];
++ coder->reps[0] = distance;
++ ++coder->match_price_count;
++}
++
++static void
++encode_symbol(lzma_lzma1_encoder *coder, lzma_mf *mf,
++  uint32_t back, uint32_t len, uint32_t position)
++{
++ const uint32_t pos_state = position & coder->pos_mask;
++
++ if (back == (4294967295U)) {
++  rc_bit(&coder->rc,
++    &coder->is_match[coder->state][pos_state], 0);
++  literal(coder, mf, position);
++ } else {
++  rc_bit(&coder->rc,
++   &coder->is_match[coder->state][pos_state], 1);
++
++  if (back < 4) {
++   rc_bit(&coder->rc, &coder->is_rep[coder->state], 1);
++   rep_match(coder, pos_state, back, len);
++  } else {
++   rc_bit(&coder->rc, &coder->is_rep[coder->state], 0);
++   match(coder, pos_state, back - 4, len);
++  }
++ }
++ mf->read_ahead -= len;
++}
++
++static void
++encode_eopm(lzma_lzma1_encoder *coder, uint32_t position)
++{
++ const uint32_t pos_state = position & coder->pos_mask;
++ rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1);
++ rc_bit(&coder->rc, &coder->is_rep[coder->state], 0);
++ match(coder, pos_state, (4294967295U), 2);
++}
++
++static inline void
++rc_flush(lzma_range_encoder *rc)
++{
++ for (size_t i = 0; i < 5; ++i)
++  rc->symbols[rc->count++] = RC_FLUSH;
++}
++
++extern void exit (int __status)
++ __attribute__ ((__nothrow__ , __leaf__ , __noreturn__));
++
++extern lzma_ret
++lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf,
++  uint8_t *restrict out, size_t *restrict out_pos,
++  size_t out_size, uint32_t limit)
++{
++
++ if (!coder->is_initialized && !encode_init(coder, mf))
++  return LZMA_OK;
++
++ uint32_t position = mf_position(mf);
++
++ while (1) {
++  if (rc_encode(&coder->rc, out, out_pos, out_size)) {
++   return LZMA_OK;
++  }
++
++  if (limit != (4294967295U)
++      && (mf->read_pos - mf->read_ahead >= limit
++	 || *out_pos + rc_pending(&coder->rc)
++	    >= (1U << 16) - ((1 << 12) + 1)))
++   break;
++
++  if (mf->read_pos >= mf->read_limit) {
++   if (mf->action == LZMA_RUN)
++    return LZMA_OK;
++
++
++   if (mf->read_ahead == 0)
++    break;
++  }
++  uint32_t len;
++  uint32_t back;
++
++  if (coder->fast_mode)
++   lzma_lzma_optimum_fast(coder, mf, &back, &len);
++  else
++   // The original code contains the  call to
++   // lzma_lzma_optimum_normal(coder, mf, &back, &len, position);
++   exit (-1);
++
++  encode_symbol(coder, mf, back, len, position);
++
++  position += len;
++ }
++
++ if (!coder->is_flushed) {
++  coder->is_flushed = 1;
++  if (limit == (4294967295U))
++   encode_eopm(coder, position);
++
++  rc_flush(&coder->rc);
++
++  if (rc_encode(&coder->rc, out, out_pos, out_size)) {
++   return LZMA_OK;
++  }
++ }
++
++ coder->is_flushed = 0;
++ return LZMA_STREAM_END;
++}
++
++extern void
++lzma_free(void *ptr, const lzma_allocator *allocator)
++{
++ if (allocator != ((void *)0) && allocator->free != ((void *)0))
++  allocator->free(allocator->opaque, ptr);
++ else
++  free(ptr);
++ return;
++}
++
++static _Bool
++lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator,
++  const lzma_lz_options *lz_options)
++{
++ if (lz_options->dict_size < 4096U
++   || lz_options->dict_size
++    > (1U << 30) + (1U << 29)
++   || lz_options->nice_len > lz_options->match_len_max)
++  return 1;
++
++ mf->keep_size_before = lz_options->before_size + lz_options->dict_size;
++ mf->keep_size_after = lz_options->after_size
++   + lz_options->match_len_max;
++ uint32_t reserve = lz_options->dict_size / 2;
++ if (reserve > (1U << 30))
++  reserve /= 2;
++
++ reserve += (lz_options->before_size + lz_options->match_len_max
++   + lz_options->after_size) / 2 + (1U << 19);
++
++ const uint32_t old_size = mf->size;
++ mf->size = mf->keep_size_before + reserve + mf->keep_size_after;
++
++ if ((mf->buffer != ((void *)0)) && old_size != mf->size) {
++  lzma_free(mf->buffer, allocator);
++  mf->buffer = ((void *)0);
++ }
++
++ mf->match_len_max = lz_options->match_len_max;
++ mf->nice_len = lz_options->nice_len;
++ mf->cyclic_size = lz_options->dict_size + 1;
++
++ switch (lz_options->match_finder) {
++ case LZMA_MF_HC3:
++  mf->find = &lzma_mf_hc3_find;
++  mf->skip = &lzma_mf_hc3_skip;
++  break;
++
++ case LZMA_MF_HC4:
++  mf->find = &lzma_mf_hc4_find;
++  mf->skip = &lzma_mf_hc4_skip;
++  break;
++
++ case LZMA_MF_BT2:
++  mf->find = &lzma_mf_bt2_find;
++  mf->skip = &lzma_mf_bt2_skip;
++  break;
++
++ case LZMA_MF_BT3:
++  mf->find = &lzma_mf_bt3_find;
++  mf->skip = &lzma_mf_bt3_skip;
++  break;
++
++ case LZMA_MF_BT4:
++  mf->find = &lzma_mf_bt4_find;
++  mf->skip = &lzma_mf_bt4_skip;
++  break;
++
++ default:
++  return 1;
++ }
++
++ const uint32_t hash_bytes = lz_options->match_finder & 0x0F;
++ if (hash_bytes > mf->nice_len)
++  return 1;
++
++ const _Bool is_bt = (lz_options->match_finder & 0x10) != 0;
++ uint32_t hs;
++
++ if (hash_bytes == 2) {
++  hs = 0xFFFF;
++ } else {
++  hs = lz_options->dict_size - 1;
++  hs |= hs >> 1;
++  hs |= hs >> 2;
++  hs |= hs >> 4;
++  hs |= hs >> 8;
++  hs >>= 1;
++  hs |= 0xFFFF;
++
++  if (hs > (1U << 24)) {
++   if (hash_bytes == 3)
++    hs = (1U << 24) - 1;
++   else
++    hs >>= 1;
++  }
++ }
++
++ mf->hash_mask = hs;
++
++ ++hs;
++ if (hash_bytes > 2)
++  hs += (1U << 10);
++ if (hash_bytes > 3)
++  hs += (1U << 16);
++
++ const uint32_t old_hash_count = mf->hash_count;
++ const uint32_t old_sons_count = mf->sons_count;
++ mf->hash_count = hs;
++ mf->sons_count = mf->cyclic_size;
++ if (is_bt)
++  mf->sons_count *= 2;
++
++ if (old_hash_count != mf->hash_count
++   || old_sons_count != mf->sons_count) {
++  lzma_free(mf->hash, allocator);
++  mf->hash = ((void *)0);
++
++  lzma_free(mf->son, allocator);
++  mf->son = ((void *)0);
++ }
++
++ mf->depth = lz_options->depth;
++ if (mf->depth == 0) {
++  if (is_bt)
++   mf->depth = 16 + mf->nice_len / 2;
++  else
++   mf->depth = 4 + mf->nice_len / 4;
++ }
++
++ return 0;
++}
++
++int
++main ()
++{
++  lzma_mf mf;
++  lzma_allocator allocator;
++  lzma_lz_options lz_options;
++
++  void *coder;
++  uint8_t *restrict out;
++  size_t *restrict out_pos;
++  size_t out_size;
++
++  lz_encoder_prepare(&mf, &allocator, &lz_options);
++  return (int) lzma_lzma_encode(coder, &mf, out, out_pos, out_size, (4294967295U));
++}
++
++
++/* { dg-final { scan-wpa-ipa-dump "Save results of indirect call analysis." "icp"} } */
++/* { dg-final { scan-wpa-ipa-dump-times "For call" 2 "icp"} } */
++/* { dg-final { scan-wpa-ipa-dump-times "Insert 0 prefetch stmt:" 5 "ipa_prefetch"} } */
++/* { dg-final { scan-wpa-ipa-dump-times "Insert 1 prefetch stmt:" 4 "ipa_prefetch"} } */
++/* { dg-final { scan-wpa-ipa-dump-times "Insert 2 prefetch stmt:" 2 "ipa_prefetch"} } */
+-- 
+2.33.0
+