StreamBase C++ API  10.6.6.1
Schema.hpp
1 // Copyright (c) 2004-2023 TIBCO Software Inc. All rights reserved.
2 
3 #ifndef STREAMBASE_SCHEMA_H
4 #define STREAMBASE_SCHEMA_H
5 
6 #if !(defined(WIN32) || defined(__APPLE__))
7 #include <limits.h>
8 #endif
9 
10 #include "StreamBase.hpp"
11 
12 #include <cassert>
13 
14 #include "Exceptions.hpp"
15 #include "SBHashMap.hpp"
16 
17 SB_INTERNAL_FWD(SchemaUtil);
18 SB_INTERNAL_FWD(Errors);
19 
20 SB_NAMESPACE_BEGIN;
21 
22 class Field;
23 class Tuple;
24 class Timestamp;
25 class JavaQBox;
26 class SchemaRep;
27 
28 /// Error parsing a tuple as a delimited string
29 STREAMBASE_EXCEPTION_TYPE(TupleStringParseException, sb_internal::Errors::NON_FATAL_ERROR);
30 
31 /// No field found with the given name or index
32 STREAMBASE_EXCEPTION_TYPE(NoSuchFieldException, sb_internal::Errors::NON_FATAL_ERROR);
33 
34 /// Field path name contained an not-leaf segment whose type was not TUPLE
35 STREAMBASE_EXCEPTION_TYPE(NonTupleFieldInPath, sb_internal::Errors::NON_FATAL_ERROR);
36 
37 #ifndef DOXYGEN_SKIP
38 ///
39 /// A hash func that knows how to work on a string
40 struct StringHashFunc
41 {
42  size_t operator()(const std::string &str) const
43  {
44  const std::string::value_type *buffer = str.data();
45  size_t hashcode = 0;
46  for (unsigned int i = 0; i < str.size(); i++)
47  {
48  // TODO: Find a better hash function or something.
49  hashcode = (37 * hashcode) + *(buffer++);
50  }
51  return hashcode;
52  }
53 };
54 #endif
55 
56 /// A type of tuple, containing zero or more fields (each encapsulated
57 /// as a Schema::Field object).
58 ///
59 /// Schemas are immutable: once constructed they may not be
60 /// changed.
61 class Schema
62 {
63 private:
64  /// The length of hashes returned by the hash() method.
65  static const int HASH_LENGTH = 16;
66 
67  /// The header of each tuple has a tuple id (an unsigned int)
68  /// which requires a fixed amount of space. In addition,
69  /// a variable size word aligned bit field containing a mask
70  /// for null fields is added to the header after the schema
71  /// is created.
72  static const int HEADER_SIZE = 4;
73 
74 public:
75  typedef sb::Field Field;
76 
77  static const Schema EMPTY_SCHEMA;
78 
79  /// Flags for creating a schema.
80  enum Flags {
81  /// @deprecated ignored
82  NO_HEADER = 1
83  };
84 
85  /// Creates an uninitialized Schema. This should NOT be used
86  /// to create a Schema with no fields; to construct such a Schema
87  /// use one of the other overloads and pass an empty list of fields.
88  Schema();
89  ~Schema();
90 
91  /// Copy constructor
92  Schema(const Schema& schema);
93 
94  /// Assignment operator
95  Schema& operator=(const Schema&);
96 
97  /// Constructs a Schema from a sequence of Field objects.
98  template <typename Iterator>
99  Schema(std::string name, Iterator begin, Iterator end, Flags flags = Flags());
100 
101  /// Constructs a Schema from a sequence of Field objects.
102  template <typename Iterator>
103  Schema(Iterator begin, Iterator end, Flags flags = Flags());
104 
105  /// Constructs a Schema from a vector of Field objects.
106  /// If the vector is empty the Schema will contain no fields.
107  Schema(std::string name, const std::vector<Field>& fields, Flags flags = Flags());
108 
109  /// Constructs a Schema from a vector of Field objects.
110  /// If the vector is empty the Schema will contain no fields.
111  Schema(const std::vector<Field>& fields, Flags flags = Flags());
112 
113  private:
114  // hash_map<> or map<>? Does the hash calc overhead overwhelm the
115  // the calc time for map to do a operator<? In some benchmarks (on linux)
116  // hash_map<> was faster than map<> in all cases. KDM 3/16/06
117  typedef QUALIFIED_SB_HASH_MAP<std::string, int, StringHashFunc> FieldIndexMap;
118  typedef QUALIFIED_SB_HASH_MAP<std::string, Field, StringHashFunc> FieldPathMap;
119 
120  public:
121  /// Returns a non-null value if the object was not created with
122  /// the no-argument constructor, i.e., this is a meaningful
123  /// Schema.
124  ///
125  /// Useful in the following manner:
126  ///
127  /// <pre>
128  /// Schema td = callSomeMethod();
129  /// if (td) {
130  /// cout << "someMethod returned a valid Schema: " << td << endl;
131  /// } else {
132  /// cout << "no such Schema" << endl;
133  /// }
134  /// </pre>
135  operator const void *() const { return _isInitialized ? _rep.get() : (const void*) NULL; }
136 
137  /// Return true if the schema has space for a header, false otherwise.
138  /// @deprecated
139  bool hasHeader() const {
140  return true;
141  }
142 
143  /// Return the size of the header.
144  size_t getHeaderSize() const {
145  return HEADER_SIZE;
146  }
147 
148 #ifndef DOXYGEN_SKIP
149  /// Return the size of the Null mask.
150  size_t getNullMaskSize() const;
151 
152  /// Return the offset of the Null mask.
153  size_t getNullMaskOffset() const {
154  if ( hasHeader() )
155  return HEADER_SIZE;
156  else
157  return 0;
158  }
159 #endif
160 
161  /// Return the Fields object description of a field in the schema.
162  ///
163  /// @param field_num the zero-relative index of the field.
164  /// @throws NoSuchFieldException if field_num is too large
165  const Field& getField(size_t field_num) const;
166 
167  /// Return the Fields object description of a field in the schema.
168  ///
169  /// @param pathName the path name of the field to fetch.
170  /// @throws NoSuchFieldException if field is not found.
171  const Field& getField(const std::string &pathName) const;
172 
173  /// Returns true if a field of a given path name is present,
174  /// otherwise it returns false.
175  ///
176  /// @param pathName the path name of the field to fetch.
177  const bool hasField(const std::string &pathName) const;
178 
179  /// Returns a reference to a vector of all fields in the schema.
180  const std::vector<Field>& getFields() const;
181 
182  /// Returns the index of the field with a particular name (note: <b>not</b>
183  /// path name), or -1 if there is no such field.
184  int indexOfField(const std::string &name) const;
185 
186  /// Returns the name of the schema or empty string at runtime.
187  const std::string &getName() const;
188 
189  /// Returns the number of fields in the schema.
190  size_t getNumFields() const;
191 
192  /// Returns a string representation of the schema.
193  std::string as_string() const;
194 
195 #ifndef DOXYGEN_SKIP
196  /// Returns an XML representation of this schema.
197  ///
198  /// @param name a name to use if the schema is anonymous (ignored
199  /// if the schema has a name)
200  /// @param aAddUUID add the UUID to the XML output
201  std::string as_xml(std::string name = std::string(), bool aAddUUID = true) const;
202 #endif
203 
204  /// Returns true if this schema has the same field types in the
205  /// same order as another Schema.
206  bool sameFieldTypes(const Schema& other) const;
207 
208  /// Returns true if this schema has the same field Names and Types
209  /// in the same order as another Schema
210  bool sameFields(const Schema& other) const;
211 
212  /// Parse and return a schema from an XML string. The root tag must be
213  /// &lt;schema&gt;.
214  static Schema fromXml(const std::string &input, Flags flags = Flags());
215 
216  bool operator == (const Schema& other) const;
217  bool operator != (const Schema& other) const {
218  return !(*this == other);
219  }
220 
221 private:
222 
223  // Add a field to the description.
224  void addField(const Field& f);
225 
226  // Computes and sets _header_size
227  // Add a sequence of fields to the description.
228  template <typename Iterator>
229  void addFields(Iterator begin, Iterator end) {
230  while (begin != end)
231  addField(*(begin++));
232  }
233 
234  void resolveFieldPathName(const std::string& pathName, std::vector<Field>& fields) const;
235 
236 private:
237  std::shared_ptr<SchemaRep> _rep;
238  bool _isInitialized;
239  friend class sb_internal::SchemaUtil;
240  friend class SchemaRep;
241 };
242 
243 SB_NAMESPACE_END;
244 
245 inline std::ostream& operator << (std::ostream& os, const sb::Schema& schema) {
246  os << schema.as_string();
247  return os;
248 }
249 
250 #endif