ICU 54.1  54.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
stringtriebuilder.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 * Copyright (C) 2010-2012,2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: stringtriebuilder.h
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2010dec24
12 * created by: Markus W. Scherer
13 */
14 
15 #ifndef __STRINGTRIEBUILDER_H__
16 #define __STRINGTRIEBUILDER_H__
17 
18 #include "unicode/utypes.h"
19 #include "unicode/uobject.h"
20 
26 // Forward declaration.
27 struct UHashtable;
28 typedef struct UHashtable UHashtable;
29 
51 };
52 
54 
62 public:
63 #ifndef U_HIDE_INTERNAL_API
64 
65  static UBool hashNode(const void *node);
67  static UBool equalNodes(const void *left, const void *right);
68 #endif /* U_HIDE_INTERNAL_API */
69 
70 protected:
71  // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API
72  // or else the compiler will create a public default constructor.
76  virtual ~StringTrieBuilder();
77 
78 #ifndef U_HIDE_INTERNAL_API
79 
80  void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
82  void deleteCompactBuilder();
83 
85  void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
86 
88  int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
90  int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
91 #endif /* U_HIDE_INTERNAL_API */
92 
93  class Node;
94 
95 #ifndef U_HIDE_INTERNAL_API
96 
97  Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
99  Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
100  int32_t length, UErrorCode &errorCode);
101 #endif /* U_HIDE_INTERNAL_API */
102 
104  virtual int32_t getElementStringLength(int32_t i) const = 0;
106  virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const = 0;
108  virtual int32_t getElementValue(int32_t i) const = 0;
109 
110  // Finds the first unit index after this one where
111  // the first and last element have different units again.
113  virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
114 
115  // Number of different units at unitIndex.
117  virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
119  virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
121  virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const = 0;
122 
124  virtual UBool matchNodesCanHaveValues() const = 0;
125 
127  virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
129  virtual int32_t getMinLinearMatch() const = 0;
131  virtual int32_t getMaxLinearMatchLength() const = 0;
132 
133 #ifndef U_HIDE_INTERNAL_API
134  // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).
136  static const int32_t kMaxBranchLinearSubNodeLength=5;
137 
138  // Maximum number of nested split-branch levels for a branch on all 2^16 possible UChar units.
139  // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.
141  static const int32_t kMaxSplitBranchLevels=14;
142 
153  Node *registerNode(Node *newNode, UErrorCode &errorCode);
164  Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
165 #endif /* U_HIDE_INTERNAL_API */
166 
167  /*
168  * C++ note:
169  * registerNode() and registerFinalValue() take ownership of their input nodes,
170  * and only return owned nodes.
171  * If they see a failure UErrorCode, they will delete the input node.
172  * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR.
173  * If there is a failure, they return NULL.
174  *
175  * NULL Node pointers can be safely passed into other Nodes because
176  * they call the static Node::hashCode() which checks for a NULL pointer first.
177  *
178  * Therefore, as long as builder functions register a new node,
179  * they need to check for failures only before explicitly dereferencing
180  * a Node pointer, or before setting a new UErrorCode.
181  */
182 
183  // Hash set of nodes, maps from nodes to integer 1.
185  UHashtable *nodes;
186 
187 #ifndef U_HIDE_INTERNAL_API
188 
189  class Node : public UObject {
190  public:
191  Node(int32_t initialHash) : hash(initialHash), offset(0) {}
192  inline int32_t hashCode() const { return hash; }
193  // Handles node==NULL.
194  static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
195  // Base class operator==() compares the actual class types.
196  virtual UBool operator==(const Node &other) const;
197  inline UBool operator!=(const Node &other) const { return !operator==(other); }
225  virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
226  // write() must set the offset to a positive value.
227  virtual void write(StringTrieBuilder &builder) = 0;
228  // See markRightEdgesFirst.
229  inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
230  StringTrieBuilder &builder) {
231  // Note: Edge numbers are negative, lastRight<=firstRight.
232  // If offset>0 then this node and its sub-nodes have been written already
233  // and we need not write them again.
234  // If this node is part of the unwritten right branch edge,
235  // then we wait until that is written.
236  if(offset<0 && (offset<lastRight || firstRight<offset)) {
237  write(builder);
238  }
239  }
240  inline int32_t getOffset() const { return offset; }
241  protected:
242  int32_t hash;
243  int32_t offset;
244  };
245 
246  // This class should not be overridden because
247  // registerFinalValue() compares a stack-allocated FinalValueNode
248  // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)
249  // with the input node, and the
250  // !Node::operator==(other) used inside FinalValueNode::operator==(other)
251  // will be false if the typeid's are different.
253  class FinalValueNode : public Node {
254  public:
255  FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {}
256  virtual UBool operator==(const Node &other) const;
257  virtual void write(StringTrieBuilder &builder);
258  protected:
259  int32_t value;
260  };
261 
265  class ValueNode : public Node {
266  public:
267  ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
268  virtual UBool operator==(const Node &other) const;
269  void setValue(int32_t v) {
270  hasValue=TRUE;
271  value=v;
272  hash=hash*37+v;
273  }
274  protected:
275  UBool hasValue;
276  int32_t value;
277  };
278 
283  public:
284  IntermediateValueNode(int32_t v, Node *nextNode)
285  : ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); }
286  virtual UBool operator==(const Node &other) const;
287  virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
288  virtual void write(StringTrieBuilder &builder);
289  protected:
290  Node *next;
291  };
292 
296  class LinearMatchNode : public ValueNode {
297  public:
298  LinearMatchNode(int32_t len, Node *nextNode)
299  : ValueNode((0x333333*37+len)*37+hashCode(nextNode)),
300  length(len), next(nextNode) {}
301  virtual UBool operator==(const Node &other) const;
302  virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
303  protected:
304  int32_t length;
305  Node *next;
306  };
307 
311  class BranchNode : public Node {
312  public:
313  BranchNode(int32_t initialHash) : Node(initialHash) {}
314  protected:
315  int32_t firstEdgeNumber;
316  };
317 
321  class ListBranchNode : public BranchNode {
322  public:
323  ListBranchNode() : BranchNode(0x444444), length(0) {}
324  virtual UBool operator==(const Node &other) const;
325  virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
326  virtual void write(StringTrieBuilder &builder);
327  // Adds a unit with a final value.
328  void add(int32_t c, int32_t value) {
329  units[length]=(UChar)c;
330  equal[length]=NULL;
331  values[length]=value;
332  ++length;
333  hash=(hash*37+c)*37+value;
334  }
335  // Adds a unit which leads to another match node.
336  void add(int32_t c, Node *node) {
337  units[length]=(UChar)c;
338  equal[length]=node;
339  values[length]=0;
340  ++length;
341  hash=(hash*37+c)*37+hashCode(node);
342  }
343  protected:
344  Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value".
345  int32_t length;
346  int32_t values[kMaxBranchLinearSubNodeLength];
347  UChar units[kMaxBranchLinearSubNodeLength];
348  };
349 
353  class SplitBranchNode : public BranchNode {
354  public:
355  SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
356  : BranchNode(((0x555555*37+middleUnit)*37+
357  hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)),
358  unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
359  virtual UBool operator==(const Node &other) const;
360  virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
361  virtual void write(StringTrieBuilder &builder);
362  protected:
363  UChar unit;
364  Node *lessThan;
365  Node *greaterOrEqual;
366  };
367 
368  // Branch head node, for writing the actual node lead unit.
370  class BranchHeadNode : public ValueNode {
371  public:
372  BranchHeadNode(int32_t len, Node *subNode)
373  : ValueNode((0x666666*37+len)*37+hashCode(subNode)),
374  length(len), next(subNode) {}
375  virtual UBool operator==(const Node &other) const;
376  virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
377  virtual void write(StringTrieBuilder &builder);
378  protected:
379  int32_t length;
380  Node *next; // A branch sub-node.
381  };
382 #endif /* U_HIDE_INTERNAL_API */
383 
385  virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
386  Node *nextNode) const = 0;
387 
389  virtual int32_t write(int32_t unit) = 0;
391  virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
393  virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
395  virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
397  virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
398 };
399 
401 
402 #endif // __STRINGTRIEBUILDER_H__