1 |
From b12f34fe32821a69dc12ff9a021daca0856de238 Mon Sep 17 00:00:00 2001 |
2 |
From: Samanta Navarro <ferivoz@riseup.net> |
3 |
Date: Sat, 19 Feb 2022 23:59:25 +0000 |
4 |
Subject: [PATCH] Fix build_model regression. |
5 |
|
6 |
The iterative approach in build_model failed to fill children arrays |
7 |
correctly. A preorder traversal is not required and turned out to be the |
8 |
culprit. Use an easier algorithm: |
9 |
|
10 |
Add nodes from scaffold tree starting at index 0 (root) to the target |
11 |
array whenever children are encountered. This ensures that children |
12 |
are adjacent to each other. This complies with the recursive version. |
13 |
|
14 |
Store only the scaffold index in numchildren field to prevent a direct |
15 |
processing of these children, which would require a recursive solution. |
16 |
This allows the algorithm to iterate through the target array from start |
17 |
to end without jumping back and forth, converting on the fly. |
18 |
|
19 |
Co-authored-by: Sebastian Pipping <sebastian@pipping.org> |
20 |
--- |
21 |
expat/lib/xmlparse.c | 79 ++++++++++++++++++++++++++------------------ |
22 |
1 file changed, 47 insertions(+), 32 deletions(-) |
23 |
|
24 |
--- a/expat/lib/xmlparse.c |
25 |
+++ b/lib/xmlparse.c |
26 |
@@ -7035,39 +7035,58 @@ build_model(XML_Parser parser) { |
27 |
* |
28 |
* The iterative approach works as follows: |
29 |
* |
30 |
- * - We use space in the target array for building a temporary stack structure |
31 |
- * while that space is still unused. |
32 |
- * The stack grows from the array's end downwards and the "actual data" |
33 |
- * grows from the start upwards, sequentially. |
34 |
- * (Because stack grows downwards, pushing onto the stack is a decrement |
35 |
- * while popping off the stack is an increment.) |
36 |
- * |
37 |
- * - A stack element appears as a regular XML_Content node on the outside, |
38 |
- * but only uses a single field -- numchildren -- to store the source |
39 |
- * tree node array index. These are the breadcrumbs leading the way back |
40 |
- * during pre-order (node first) depth-first traversal. |
41 |
- * |
42 |
- * - The reason we know the stack will never grow into (or overlap with) |
43 |
- * the area with data of value at the start of the array is because |
44 |
- * the overall number of elements to process matches the size of the array, |
45 |
- * and the sum of fully processed nodes and yet-to-be processed nodes |
46 |
- * on the stack, cannot be more than the total number of nodes. |
47 |
- * It is possible for the top of the stack and the about-to-write node |
48 |
- * to meet, but that is safe because we get the source index out |
49 |
- * before doing any writes on that node. |
50 |
+ * - We have two writing pointers, both walking up the result array; one does |
51 |
+ * the work, the other creates "jobs" for its colleague to do, and leads |
52 |
+ * the way: |
53 |
+ * |
54 |
+ * - The faster one, pointer jobDest, always leads and writes "what job |
55 |
+ * to do" by the other, once they reach that place in the |
56 |
+ * array: leader "jobDest" stores the source node array index (relative |
57 |
+ * to array dtd->scaffold) in field "numchildren". |
58 |
+ * |
59 |
+ * - The slower one, pointer dest, looks at the value stored in the |
60 |
+ * "numchildren" field (which actually holds a source node array index |
61 |
+ * at that time) and puts the real data from dtd->scaffold in. |
62 |
+ * |
63 |
+ * - Before the loop starts, jobDest writes source array index 0 |
64 |
+ * (where the root node is located) so that dest will have something to do |
65 |
+ * when it starts operation. |
66 |
+ * |
67 |
+ * - Whenever nodes with children are encountered, jobDest appends |
68 |
+ * them as new jobs, in order. As a result, tree node siblings are |
69 |
+ * adjacent in the resulting array, for example: |
70 |
+ * |
71 |
+ * [0] root, has two children |
72 |
+ * [1] first child of 0, has three children |
73 |
+ * [3] first child of 1, does not have children |
74 |
+ * [4] second child of 1, does not have children |
75 |
+ * [5] third child of 1, does not have children |
76 |
+ * [2] second child of 0, does not have children |
77 |
+ * |
78 |
+ * Or (the same data) presented in flat array view: |
79 |
+ * |
80 |
+ * [0] root, has two children |
81 |
+ * |
82 |
+ * [1] first child of 0, has three children |
83 |
+ * [2] second child of 0, does not have children |
84 |
+ * |
85 |
+ * [3] first child of 1, does not have children |
86 |
+ * [4] second child of 1, does not have children |
87 |
+ * [5] third child of 1, does not have children |
88 |
+ * |
89 |
+ * - The algorithm repeats until all target array indices have been processed. |
90 |
*/ |
91 |
XML_Content *dest = ret; /* tree node writing location, moves upwards */ |
92 |
XML_Content *const destLimit = &ret[dtd->scaffCount]; |
93 |
- XML_Content *const stackBottom = &ret[dtd->scaffCount]; |
94 |
- XML_Content *stackTop = stackBottom; /* i.e. stack is initially empty */ |
95 |
+ XML_Content *jobDest = ret; /* next free writing location in target array */ |
96 |
str = (XML_Char *)&ret[dtd->scaffCount]; |
97 |
|
98 |
- /* Push source tree root node index onto the stack */ |
99 |
- (--stackTop)->numchildren = 0; |
100 |
+ /* Add the starting job, the root node (index 0) of the source tree */ |
101 |
+ (jobDest++)->numchildren = 0; |
102 |
|
103 |
for (; dest < destLimit; dest++) { |
104 |
- /* Pop source tree node index off the stack */ |
105 |
- const int src_node = (int)(stackTop++)->numchildren; |
106 |
+ /* Retrieve source tree array index from job storage */ |
107 |
+ const int src_node = (int)dest->numchildren; |
108 |
|
109 |
/* Convert item */ |
110 |
dest->type = dtd->scaffold[src_node].type; |
111 |
@@ -7089,16 +7108,12 @@ build_model(XML_Parser parser) { |
112 |
int cn; |
113 |
dest->name = NULL; |
114 |
dest->numchildren = dtd->scaffold[src_node].childcnt; |
115 |
- dest->children = &dest[1]; |
116 |
+ dest->children = jobDest; |
117 |
|
118 |
- /* Push children to the stack |
119 |
- * in a way where the first child ends up at the top of the |
120 |
- * (downwards growing) stack, in order to be processed first. */ |
121 |
- stackTop -= dest->numchildren; |
122 |
+ /* Append scaffold indices of children to array */ |
123 |
for (i = 0, cn = dtd->scaffold[src_node].firstchild; |
124 |
- i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) { |
125 |
- (stackTop + i)->numchildren = (unsigned int)cn; |
126 |
- } |
127 |
+ i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) |
128 |
+ (jobDest++)->numchildren = (unsigned int)cn; |
129 |
} |
130 |
} |
131 |
|