current/SOURCES/116-Fix-memory-handling-in-strxfrm_l-BZ-16009.patch

From f9e0f439b72e0b2fb035be1bc60aaceeed7f6ed0 Mon Sep 17 00:00:00 2001
From: Leonhard Holz <leonhard.holz@web.de>
Date: Tue, 13 Jan 2015 11:33:56 +0530
Subject: [PATCH 16/18] Fix memory handling in strxfrm_l [BZ #16009]

[Modified from the original email by Siddhesh Poyarekar]

This patch solves bug #16009 by implementing an additional path in
strxfrm that does not depend on caching the weight and rule indices.

In detail the following changed:

* The old main loop was factored out of strxfrm_l into the function
do_xfrm_cached to be able to alternativly use the non-caching version
do_xfrm.

* strxfrm_l allocates a a fixed size array on the stack. If this is not
sufficiant to store the weight and rule indices, the non-caching path is
taken. As the cache size is not dependent on the input there can be no
problems with integer overflows or stack allocations greater than
__MAX_ALLOCA_CUTOFF. Note that malloc-ing is not possible because the
definition of strxfrm does not allow an oom errorhandling.

* The uncached path determines the weight and rule index for every char
and for every pass again.

* Passing all the locale data array by array resulted in very long
parameter lists, so I introduced a structure that holds them.

* Checking for zero src string has been moved a bit upwards, it is
before the locale data initialization now.

* To verify that the non-caching path works correct I added a test run
to localedata/sort-test.sh & localedata/xfrm-test.c where all strings
are patched up with spaces so that they are too large for the caching path.

(cherry picked from commit 0f9e585480edcdf1e30dc3d79e24b84aeee516fa)

Conflicts:
        ChangeLog
        NEWS
---
 ChangeLog               |  16 ++
 NEWS                    |   2 +-
 localedata/sort-test.sh |   7 +
 localedata/xfrm-test.c  |  52 +++++-
 string/strxfrm_l.c      | 488 ++++++++++++++++++++++++++++++++++++++----------
 5 files changed, 464 insertions(+), 101 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index dbf7e86..7a2e6c9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2015-02-16  Leonhard Holz  <leonhard.holz@web.de>
+
+       [BZ #16009]
+       * string/strxfrm_l.c (STRXFRM): Allocate fixed size cache for
+       weights and rules. Use do_xfrm_cached if data fits in cache,
+       do_xfrm otherwise.  Moved former main loop to...
+       * (do_xfrm_cached): New function.
+       * (do_xfrm): Non-caching version of do_xfrm_cached. Uses
+       find_idx, find_position and stack_push.
+       * (find_idx): New function.
+       * (find_position): Likewise.
+       * localedata/sort-test.sh: Added test run for do_xfrm.
+       * localedata/xfrm-test.c (main): Added command line option
+       -nocache to run the test with strings that are too large for
+       the STRXFRM cache.
+
 2015-02-16  Kostya Serebryany  <konstantin.s.serebryany@gmail.com>
            Roland McGrath  <roland@hack.frob.com>
 
diff --git a/NEWS b/NEWS
index 9bc835c..f578805 100644
--- a/NEWS
+++ b/NEWS
@@ -9,7 +9,7 @@ Version 2.20.1
 
 * The following bugs are resolved with this release:
 
-  16617, 17266, 17370, 17371, 17460, 17485, 17555, 17625, 17630.
+  16009, 16617, 17266, 17370, 17371, 17460, 17485, 17555, 17625, 17630.
 
 * CVE-2014-7817 The wordexp function could ignore the WRDE_NOCMD flag
   under certain input conditions resulting in the execution of a shell for
diff --git a/localedata/sort-test.sh b/localedata/sort-test.sh
index e37129a..3cb57fb 100644
--- a/localedata/sort-test.sh
+++ b/localedata/sort-test.sh
@@ -53,11 +53,18 @@ for l in $lang; do
    ${common_objpfx}localedata/xfrm-test $id < $cns.in \
    > ${common_objpfx}localedata/$cns.xout || here=1
   cmp -s $cns.in ${common_objpfx}localedata/$cns.xout || here=1
+  ${test_program_prefix_before_env} \
+   ${run_program_env} \
+   LC_ALL=$l ${test_program_prefix_after_env} \
+   ${common_objpfx}localedata/xfrm-test $id -nocache < $cns.in \
+   > ${common_objpfx}localedata/$cns.nocache.xout || here=1
+  cmp -s $cns.in ${common_objpfx}localedata/$cns.nocache.xout || here=1
   if test $here -eq 0; then
     echo "$l xfrm-test OK"
   else
     echo "$l xfrm-test FAIL"
     diff -u $cns.in ${common_objpfx}localedata/$cns.xout | sed 's/^/  /'
+    diff -u $cns.in ${common_objpfx}localedata/$cns.nocache.xout | sed 's/^/  /'
     status=1
   fi
 done
diff --git a/localedata/xfrm-test.c b/localedata/xfrm-test.c
index d2aba7d..5cf29f6 100644
--- a/localedata/xfrm-test.c
+++ b/localedata/xfrm-test.c
@@ -23,7 +23,10 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <stdbool.h>
 
+/* Keep in sync with string/strxfrm_l.c.  */
+#define SMALL_STR_SIZE 4095
 
 struct lines
 {
@@ -37,6 +40,7 @@ int
 main (int argc, char *argv[])
 {
   int result = 0;
+  bool nocache = false;
   size_t nstrings, nstrings_max;
   struct lines *strings;
   char *line = NULL;
@@ -44,7 +48,18 @@ main (int argc, char *argv[])
   size_t n;
 
   if (argc < 2)
-    error (1, 0, "usage: %s <random seed>", argv[0]);
+    error (1, 0, "usage: %s <random seed> [-nocache]", argv[0]);
+
+  if (argc == 3)
+    {
+      if (strcmp (argv[2], "-nocache") == 0)
+       nocache = true;
+      else
+       {
+         printf ("Unknown option %s!\n", argv[2]);
+         exit (1);
+       }
+    }
 
   setlocale (LC_ALL, "");
 
@@ -59,9 +74,9 @@ main (int argc, char *argv[])
 
   while (1)
     {
-      char saved, *newp;
-      int needed;
-      int l;
+      char saved, *word, *newp;
+      size_t l, line_len, needed;
+
       if (getline (&line, &len, stdin) < 0)
        break;
 
@@ -83,10 +98,35 @@ main (int argc, char *argv[])
 
       saved = line[l];
       line[l] = '\0';
-      needed = strxfrm (NULL, line, 0);
+
+      if (nocache)
+       {
+         line_len = strlen (line);
+         word = malloc (line_len + SMALL_STR_SIZE + 1);
+         if (word == NULL)
+           {
+             printf ("malloc failed: %m\n");
+             exit (1);
+           }
+         memset (word, ' ', SMALL_STR_SIZE);
+         memcpy (word + SMALL_STR_SIZE, line, line_len);
+         word[line_len + SMALL_STR_SIZE] = '\0';
+       }
+      else
+        word = line;
+
+      needed = strxfrm (NULL, word, 0);
       newp = malloc (needed + 1);
-      strxfrm (newp, line, needed + 1);
+      if (newp == NULL)
+       {
+         printf ("malloc failed: %m\n");
+         exit (1);
+       }
+      strxfrm (newp, word, needed + 1);
       strings[nstrings].xfrm = newp;
+
+      if (nocache)
+       free (word);
       line[l] = saved;
       ++nstrings;
     }
diff --git a/string/strxfrm_l.c b/string/strxfrm_l.c
index 2d3f1bd..95ffd6f 100644
--- a/string/strxfrm_l.c
+++ b/string/strxfrm_l.c
@@ -40,9 +40,24 @@
 #define CONCAT(a,b) CONCAT1(a,b)
 #define CONCAT1(a,b) a##b
 
+/* Maximum string size that is calculated with cached indices.  Right now this
+   is an arbitrary value open to optimizations.  SMALL_STR_SIZE * 4 has to be
+   lower than __MAX_ALLOCA_CUTOFF.  Keep localedata/xfrm-test.c in sync.  */
+#define SMALL_STR_SIZE 4095
+
 #include "../locale/localeinfo.h"
 #include WEIGHT_H
 
+/* Group locale data for shorter parameter lists.  */
+typedef struct
+{
+  uint_fast32_t nrules;
+  unsigned char *rulesets;
+  USTRING_TYPE *weights;
+  int32_t *table;
+  USTRING_TYPE *extra;
+  int32_t *indirect;
+} locale_data_t;
 
 #ifndef WIDE_CHAR_VERSION
 
@@ -81,113 +96,325 @@ utf8_encode (char *buf, int val)
 }
 #endif
 
+/* Find next weight and rule index.  Inlined since called for every char.  */
+static __always_inline size_t
+find_idx (const USTRING_TYPE **us, int32_t *weight_idx,
+         unsigned char *rule_idx, const locale_data_t *l_data, const int pass)
+{
+  int32_t tmp = findidx (l_data->table, l_data->indirect, l_data->extra, us,
+                        -1);
+  *rule_idx = tmp >> 24;
+  int32_t idx = tmp & 0xffffff;
+  size_t len = l_data->weights[idx++];
+
+  /* Skip over indices of previous levels.  */
+  for (int i = 0; i < pass; i++)
+    {
+      idx += len;
+      len = l_data->weights[idx++];
+    }
 
-size_t
-STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
+  *weight_idx = idx;
+  return len;
+}
+
+static int
+find_position (const USTRING_TYPE *us, const locale_data_t *l_data,
+              const int pass)
 {
-  struct __locale_data *current = l->__locales[LC_COLLATE];
-  uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
-  /* We don't assign the following values right away since it might be
-     unnecessary in case there are no rules.  */
-  const unsigned char *rulesets;
-  const int32_t *table;
-  const USTRING_TYPE *weights;
-  const USTRING_TYPE *extra;
-  const int32_t *indirect;
+  int32_t weight_idx;
+  unsigned char rule_idx;
+  const USTRING_TYPE *usrc = us;
+
+  find_idx (&usrc, &weight_idx, &rule_idx, l_data, pass);
+  return l_data->rulesets[rule_idx * l_data->nrules + pass] & sort_position;
+}
+
+/* Do the transformation.  */
+static size_t
+do_xfrm (const USTRING_TYPE *usrc, STRING_TYPE *dest, size_t n,
+        const locale_data_t *l_data)
+{
+  int32_t weight_idx;
+  unsigned char rule_idx;
   uint_fast32_t pass;
-  size_t needed;
+  size_t needed = 0;
   size_t last_needed;
-  const USTRING_TYPE *usrc;
-  size_t srclen = STRLEN (src);
-  int32_t *idxarr;
-  unsigned char *rulearr;
-  size_t idxmax;
-  size_t idxcnt;
-  int use_malloc;
 
-  if (nrules == 0)
+  /* Now the passes over the weights.  */
+  for (pass = 0; pass < l_data->nrules; ++pass)
     {
-      if (n != 0)
-       STPNCPY (dest, src, MIN (srclen + 1, n));
+      size_t backw_len = 0;
+      last_needed = needed;
+      const USTRING_TYPE *cur = usrc;
+      const USTRING_TYPE *backw_start = NULL;
 
-      return srclen;
-    }
+       /* We assume that if a rule has defined `position' in one section
+         this is true for all of them.  */
+      int position = find_position (cur, l_data, pass);
 
-  rulesets = (const unsigned char *)
-    current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
-  table = (const int32_t *)
-    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
-  weights = (const USTRING_TYPE *)
-    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
-  extra = (const USTRING_TYPE *)
-    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
-  indirect = (const int32_t *)
-    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
-  use_malloc = 0;
+      if (position == 0)
+       {
+         while (*cur != L('\0'))
+           {
+             const USTRING_TYPE *pos = cur;
+             size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data,
+                                    pass);
+             int rule = l_data->rulesets[rule_idx * l_data->nrules + pass];
 
-  assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
-  assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
-  assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
-  assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
+             if ((rule & sort_forward) != 0)
+               {
+                 /* Handle the pushed backward sequence.  */
+                 if (backw_start != NULL)
+                   {
+                     for (size_t i = backw_len; i > 0; )
+                       {
+                         int32_t weight_idx;
+                         unsigned char rule_idx;
+                         size_t len = find_idx (&backw_start, &weight_idx,
+                                                &rule_idx, l_data, pass);
+                         if (needed + i < n)
+                           for (size_t j = len; j > 0; j--)
+                             dest[needed + i - j] =
+                               l_data->weights[weight_idx++];
+
+                         i -= len;
+                       }
 
-  /* Handle an empty string as a special case.  */
-  if (srclen == 0)
-    {
-      if (n != 0)
-       *dest = L('\0');
-      return 0;
-    }
+                     needed += backw_len;
+                     backw_start = NULL;
+                     backw_len = 0;
+                   }
 
-  /* We need the elements of the string as unsigned values since they
-     are used as indeces.  */
-  usrc = (const USTRING_TYPE *) src;
-
-  /* Perform the first pass over the string and while doing this find
-     and store the weights for each character.  Since we want this to
-     be as fast as possible we are using `alloca' to store the temporary
-     values.  But since there is no limit on the length of the string
-     we have to use `malloc' if the string is too long.  We should be
-     very conservative here.  */
-  if (! __libc_use_alloca ((srclen + 1) * (sizeof (int32_t) + 1)))
-    {
-      idxarr = (int32_t *) malloc ((srclen + 1) * (sizeof (int32_t) + 1));
-      rulearr = (unsigned char *) &idxarr[srclen];
-
-      if (idxarr == NULL)
-       /* No memory.  Well, go with the stack then.
-
-          XXX Once this implementation is stable we will handle this
-          differently.  Instead of precomputing the indeces we will
-          do this in time.  This means, though, that this happens for
-          every pass again.  */
-       goto try_stack;
-      use_malloc = 1;
-    }
-  else
-    {
-    try_stack:
-      idxarr = (int32_t *) alloca (srclen * sizeof (int32_t));
-      rulearr = (unsigned char *) alloca (srclen + 1);
+                 /* Now handle the forward element.  */
+                 if (needed + len < n)
+                   while (len-- > 0)
+                     dest[needed++] = l_data->weights[weight_idx++];
+                 else
+                   /* No more characters fit into the buffer.  */
+                   needed += len;
+               }
+             else
+               {
+                 /* Remember start of the backward sequence & track length.  */
+                 if (backw_start == NULL)
+                   backw_start = pos;
+                 backw_len += len;
+               }
+           }
+
+
+         /* Handle the pushed backward sequence.  */
+         if (backw_start != NULL)
+           {
+             for (size_t i = backw_len; i > 0; )
+               {
+                 size_t len = find_idx (&backw_start, &weight_idx, &rule_idx,
+                                        l_data, pass);
+                 if (needed + i < n)
+                   for (size_t j = len; j > 0; j--)
+                     dest[needed + i - j] =
+                       l_data->weights[weight_idx++];
+
+                 i -= len;
+               }
+
+             needed += backw_len;
+           }
+       }
+      else
+       {
+         int val = 1;
+#ifndef WIDE_CHAR_VERSION
+         char buf[7];
+         size_t buflen;
+#endif
+         size_t i;
+
+         while (*cur != L('\0'))
+           {
+             const USTRING_TYPE *pos = cur;
+             size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data,
+                                    pass);
+             int rule = l_data->rulesets[rule_idx * l_data->nrules + pass];
+
+             if ((rule & sort_forward) != 0)
+               {
+                 /* Handle the pushed backward sequence.  */
+                 if (backw_start != NULL)
+                   {
+                     for (size_t p = backw_len; p > 0; p--)
+                       {
+                         size_t len;
+                         int32_t weight_idx;
+                         unsigned char rule_idx;
+                         const USTRING_TYPE *backw_cur = backw_start;
+
+                         /* To prevent a warning init the used vars.  */
+                         len = find_idx (&backw_cur, &weight_idx,
+                                         &rule_idx, l_data, pass);
+
+                         for (i = 1; i < p; i++)
+                           len = find_idx (&backw_cur, &weight_idx,
+                                           &rule_idx, l_data, pass);
+
+                         if (len != 0)
+                           {
+#ifdef WIDE_CHAR_VERSION
+                             if (needed + 1 + len < n)
+                               {
+                                 dest[needed] = val;
+                                 for (i = 0; i < len; ++i)
+                                   dest[needed + 1 + i] =
+                                     l_data->weights[weight_idx + i];
+                               }
+                             needed += 1 + len;
+#else
+                             buflen = utf8_encode (buf, val);
+                             if (needed + buflen + len < n)
+                               {
+                                 for (i = 0; i < buflen; ++i)
+                                   dest[needed + i] = buf[i];
+                                 for (i = 0; i < len; ++i)
+                                   dest[needed + buflen + i] =
+                                     l_data->weights[weight_idx + i];
+                               }
+                             needed += buflen + len;
+#endif
+                             val = 1;
+                           }
+                         else
+                           ++val;
+                       }
+
+                     backw_start = NULL;
+                     backw_len = 0;
+                   }
+
+                 /* Now handle the forward element.  */
+                 if (len != 0)
+                   {
+#ifdef WIDE_CHAR_VERSION
+                     if (needed + 1 + len < n)
+                       {
+                         dest[needed] = val;
+                         for (i = 0; i < len; ++i)
+                           dest[needed + 1 + i] =
+                             l_data->weights[weight_idx + i];
+                       }
+                     needed += 1 + len;
+#else
+                     buflen = utf8_encode (buf, val);
+                     if (needed + buflen + len < n)
+                       {
+                         for (i = 0; i < buflen; ++i)
+                           dest[needed + i] = buf[i];
+                         for (i = 0; i < len; ++i)
+                           dest[needed + buflen + i] =
+                             l_data->weights[weight_idx + i];
+                       }
+                     needed += buflen + len;
+#endif
+                     val = 1;
+                   }
+                 else
+                   ++val;
+               }
+             else
+               {
+                 /* Remember start of the backward sequence & track length.  */
+                 if (backw_start == NULL)
+                   backw_start = pos;
+                 backw_len++;
+               }
+           }
+
+         /* Handle the pushed backward sequence.  */
+         if (backw_start != NULL)
+           {
+             for (size_t p = backw_len; p > 0; p--)
+               {
+                 size_t len;
+                 int32_t weight_idx;
+                 unsigned char rule_idx;
+                 const USTRING_TYPE *backw_cur = backw_start;
+
+                 /* To prevent a warning init the used vars.  */
+                 len = find_idx (&backw_cur, &weight_idx,
+                                 &rule_idx, l_data, pass);
+
+                 for (i = 1; i < p; i++)
+                   len = find_idx (&backw_cur, &weight_idx,
+                                   &rule_idx, l_data, pass);
+
+                 if (len != 0)
+                   {
+#ifdef WIDE_CHAR_VERSION
+                     if (needed + 1 + len < n)
+                       {
+                         dest[needed] = val;
+                         for (i = 0; i < len; ++i)
+                           dest[needed + 1 + i] =
+                             l_data->weights[weight_idx + i];
+                       }
+                     needed += 1 + len;
+#else
+                     buflen = utf8_encode (buf, val);
+                     if (needed + buflen + len < n)
+                       {
+                         for (i = 0; i < buflen; ++i)
+                           dest[needed + i] = buf[i];
+                         for (i = 0; i < len; ++i)
+                           dest[needed + buflen + i] =
+                             l_data->weights[weight_idx + i];
+                       }
+                     needed += buflen + len;
+#endif
+                     val = 1;
+                   }
+                 else
+                   ++val;
+               }
+           }
+       }
+
+      /* Finally store the byte to separate the passes or terminate
+        the string.  */
+      if (needed < n)
+       dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0');
+      ++needed;
     }
 
-  idxmax = 0;
-  do
+  /* This is a little optimization: many collation specifications have
+     a `position' rule at the end and if no non-ignored character
+     is found the last \1 byte is immediately followed by a \0 byte
+     signalling this.  We can avoid the \1 byte(s).  */
+  if (needed > 2 && needed == last_needed + 1)
     {
-      int32_t tmp = findidx (table, indirect, extra, &usrc, -1);
-      rulearr[idxmax] = tmp >> 24;
-      idxarr[idxmax] = tmp & 0xffffff;
-
-      ++idxmax;
+      /* Remove the \1 byte.  */
+      if (--needed <= n)
+       dest[needed - 1] = L('\0');
     }
-  while (*usrc != L('\0'));
 
-  /* This element is only read, the value never used but to determine
-     another value which then is ignored.  */
-  rulearr[idxmax] = '\0';
+  /* Return the number of bytes/words we need, but don't count the NUL
+     byte/word at the end.  */
+  return needed - 1;
+}
+
+/* Do the transformation using weight-index and rule cache.  */
+static size_t
+do_xfrm_cached (STRING_TYPE *dest, size_t n, const locale_data_t *l_data,
+               size_t idxmax, int32_t *idxarr, const unsigned char *rulearr)
+{
+  uint_fast32_t nrules = l_data->nrules;
+  unsigned char *rulesets = l_data->rulesets;
+  USTRING_TYPE *weights = l_data->weights;
+  uint_fast32_t pass;
+  size_t needed = 0;
+  size_t last_needed;
+  size_t idxcnt;
 
-  /* Now the passes over the weights.  We now use the indeces we found
-     before.  */
-  needed = 0;
+  /* Now the passes over the weights.  */
   for (pass = 0; pass < nrules; ++pass)
     {
       size_t backw_stop = ~0ul;
@@ -433,14 +660,87 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
        dest[needed - 1] = L('\0');
     }
 
-  /* Free the memory if needed.  */
-  if (use_malloc)
-    free (idxarr);
-
   /* Return the number of bytes/words we need, but don't count the NUL
      byte/word at the end.  */
   return needed - 1;
 }
+
+size_t
+STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
+{
+  locale_data_t l_data;
+  struct __locale_data *current = l->__locales[LC_COLLATE];
+  l_data.nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
+
+  /* Handle byte comparison case.  */
+  if (l_data.nrules == 0)
+    {
+      size_t srclen = STRLEN (src);
+
+      if (n != 0)
+       STPNCPY (dest, src, MIN (srclen + 1, n));
+
+      return srclen;
+    }
+
+  /* Handle an empty string, code hereafter relies on strlen (src) > 0.  */
+  if (*src == L('\0'))
+    {
+      if (n != 0)
+       *dest = L('\0');
+      return 0;
+    }
+
+  /* Get the locale data.  */
+  l_data.rulesets = (unsigned char *)
+    current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
+  l_data.table = (int32_t *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
+  l_data.weights = (USTRING_TYPE *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
+  l_data.extra = (USTRING_TYPE *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
+  l_data.indirect = (int32_t *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
+
+  assert (((uintptr_t) l_data.table) % __alignof__ (l_data.table[0]) == 0);
+  assert (((uintptr_t) l_data.weights) % __alignof__ (l_data.weights[0]) == 0);
+  assert (((uintptr_t) l_data.extra) % __alignof__ (l_data.extra[0]) == 0);
+  assert (((uintptr_t) l_data.indirect) % __alignof__ (l_data.indirect[0]) == 0);
+
+  /* We need the elements of the string as unsigned values since they
+     are used as indeces.  */
+  const USTRING_TYPE *usrc = (const USTRING_TYPE *) src;
+
+  /* Allocate cache for small strings on the stack and fill it with weight and
+     rule indices.  If the cache size is not sufficient, continue with the
+     uncached xfrm version.  */
+  size_t idxmax = 0;
+  const USTRING_TYPE *cur = usrc;
+  int32_t *idxarr = alloca (SMALL_STR_SIZE * sizeof (int32_t));
+  unsigned char *rulearr = alloca (SMALL_STR_SIZE + 1);
+
+  do
+    {
+      int32_t tmp = findidx (l_data.table, l_data.indirect, l_data.extra, &cur,
+                            -1);
+      rulearr[idxmax] = tmp >> 24;
+      idxarr[idxmax] = tmp & 0xffffff;
+
+      ++idxmax;
+    }
+  while (*cur != L('\0') && idxmax < SMALL_STR_SIZE);
+
+  /* This element is only read, the value never used but to determine
+     another value which then is ignored.  */
+  rulearr[idxmax] = '\0';
+
+  /* Do the transformation.  */
+  if (*cur == L('\0'))
+    return do_xfrm_cached (dest, n, &l_data, idxmax, idxarr, rulearr);
+  else
+    return do_xfrm (usrc, dest, n, &l_data);
+}
 libc_hidden_def (STRXFRM)
 
 #ifndef WIDE_CHAR_VERSION
-- 
2.3.0