1 | /** |
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one |
---|
3 | * or more contributor license agreements. See the NOTICE file |
---|
4 | * distributed with this work for additional information |
---|
5 | * regarding copyright ownership. The ASF licenses this file |
---|
6 | * to you under the Apache License, Version 2.0 (the |
---|
7 | * "License"); you may not use this file except in compliance |
---|
8 | * with the License. You may obtain a copy of the License at |
---|
9 | * |
---|
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
11 | * |
---|
12 | * Unless required by applicable law or agreed to in writing, software |
---|
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
15 | * See the License for the specific language governing permissions and |
---|
16 | * limitations under the License. |
---|
17 | */ |
---|
18 | |
---|
19 | #ifndef XMLARCHIVE_HH_ |
---|
20 | #define XMLARCHIVE_HH_ |
---|
21 | |
---|
22 | #include <xercesc/parsers/SAXParser.hpp> |
---|
23 | #include <xercesc/util/PlatformUtils.hpp> |
---|
24 | #include <xercesc/util/BinInputStream.hpp> |
---|
25 | #include <xercesc/sax/HandlerBase.hpp> |
---|
26 | #include <xercesc/sax/InputSource.hpp> |
---|
27 | #include "recordio.hh" |
---|
28 | |
---|
29 | XERCES_CPP_NAMESPACE_USE |
---|
30 | |
---|
31 | namespace hadoop { |
---|
32 | |
---|
33 | class Value { |
---|
34 | private: |
---|
35 | std::string type; |
---|
36 | std::string value; |
---|
37 | public: |
---|
38 | Value(const std::string& t) { type = t; } |
---|
39 | void addChars(const char* buf, unsigned int len) { |
---|
40 | value += std::string(buf, len); |
---|
41 | } |
---|
42 | const std::string& getType() const { return type; } |
---|
43 | const std::string& getValue() const { return value; } |
---|
44 | }; |
---|
45 | |
---|
46 | class MySAXHandler : public HandlerBase { |
---|
47 | private: |
---|
48 | std::vector<Value>& vlist; |
---|
49 | bool charsValid; |
---|
50 | public: |
---|
51 | MySAXHandler(std::vector<Value>& list) : vlist(list) {charsValid = false;} |
---|
52 | void startElement(const XMLCh* const name, AttributeList& attr); |
---|
53 | void endElement(const XMLCh* const name); |
---|
54 | void characters(const XMLCh* const buf, unsigned int len); |
---|
55 | }; |
---|
56 | |
---|
57 | class XmlIndex : public Index { |
---|
58 | private: |
---|
59 | std::vector<Value>& vlist; |
---|
60 | unsigned int& vidx; |
---|
61 | public: |
---|
62 | XmlIndex(std::vector<Value>& list, unsigned int& idx) : vlist(list), vidx(idx) {} |
---|
63 | bool done() { |
---|
64 | Value v = vlist[vidx]; |
---|
65 | return (v.getType() == "/array") ? true : false; |
---|
66 | } |
---|
67 | void incr() {} |
---|
68 | ~XmlIndex() {} |
---|
69 | }; |
---|
70 | |
---|
71 | class MyBinInputStream : public BinInputStream { |
---|
72 | private: |
---|
73 | InStream& stream; |
---|
74 | unsigned int pos; |
---|
75 | public: |
---|
76 | MyBinInputStream(InStream& s) : stream(s) { pos = 0; } |
---|
77 | virtual unsigned int curPos() const { return pos; } |
---|
78 | virtual unsigned int readBytes(XMLByte* const toFill, |
---|
79 | const unsigned int maxToRead) { |
---|
80 | ssize_t nread = stream.read(toFill, maxToRead); |
---|
81 | if (nread < 0) { |
---|
82 | return 0; |
---|
83 | } else { |
---|
84 | pos += nread; |
---|
85 | return nread; |
---|
86 | } |
---|
87 | } |
---|
88 | }; |
---|
89 | |
---|
90 | |
---|
91 | class MyInputSource : public InputSource { |
---|
92 | private: |
---|
93 | InStream& stream; |
---|
94 | public: |
---|
95 | MyInputSource(InStream& s) : stream(s) { } |
---|
96 | virtual BinInputStream* makeStream() const { |
---|
97 | return new MyBinInputStream(stream); |
---|
98 | } |
---|
99 | virtual const XMLCh* getEncoding() const { |
---|
100 | return XMLString::transcode("UTF-8"); |
---|
101 | } |
---|
102 | virtual ~MyInputSource() {} |
---|
103 | }; |
---|
104 | |
---|
105 | class IXmlArchive : public IArchive { |
---|
106 | private: |
---|
107 | std::vector<Value> vlist; |
---|
108 | unsigned int vidx; |
---|
109 | MySAXHandler *docHandler; |
---|
110 | SAXParser *parser; |
---|
111 | MyInputSource* src; |
---|
112 | Value next() { |
---|
113 | Value v = vlist[vidx]; |
---|
114 | vidx++; |
---|
115 | return v; |
---|
116 | } |
---|
117 | public: |
---|
118 | IXmlArchive(InStream& _stream) { |
---|
119 | vidx = 0; |
---|
120 | try { |
---|
121 | XMLPlatformUtils::Initialize(); |
---|
122 | } catch (const XMLException& e) { |
---|
123 | throw new IOException("Unable to initialize XML Parser."); |
---|
124 | } |
---|
125 | parser = new SAXParser(); |
---|
126 | docHandler = new MySAXHandler(vlist); |
---|
127 | parser->setDocumentHandler(docHandler); |
---|
128 | src = new MyInputSource(_stream); |
---|
129 | try { |
---|
130 | parser->parse(*src); |
---|
131 | } catch (const XMLException& e) { |
---|
132 | throw new IOException("Unable to parse XML stream."); |
---|
133 | } catch (const SAXParseException& e) { |
---|
134 | throw new IOException("Unable to parse XML stream."); |
---|
135 | } |
---|
136 | delete parser; |
---|
137 | delete docHandler; |
---|
138 | } |
---|
139 | virtual void deserialize(int8_t& t, const char* tag); |
---|
140 | virtual void deserialize(bool& t, const char* tag); |
---|
141 | virtual void deserialize(int32_t& t, const char* tag); |
---|
142 | virtual void deserialize(int64_t& t, const char* tag); |
---|
143 | virtual void deserialize(float& t, const char* tag); |
---|
144 | virtual void deserialize(double& t, const char* tag); |
---|
145 | virtual void deserialize(std::string& t, const char* tag); |
---|
146 | virtual void deserialize(std::string& t, size_t& len, const char* tag); |
---|
147 | virtual void startRecord(Record& s, const char* tag); |
---|
148 | virtual void endRecord(Record& s, const char* tag); |
---|
149 | virtual Index* startVector(const char* tag); |
---|
150 | virtual void endVector(Index* idx, const char* tag); |
---|
151 | virtual Index* startMap(const char* tag); |
---|
152 | virtual void endMap(Index* idx, const char* tag); |
---|
153 | virtual ~IXmlArchive() { |
---|
154 | XMLPlatformUtils::Terminate(); |
---|
155 | } |
---|
156 | }; |
---|
157 | |
---|
158 | class OXmlArchive : public OArchive { |
---|
159 | private: |
---|
160 | OutStream& stream; |
---|
161 | |
---|
162 | std::vector<std::string> cstack; |
---|
163 | |
---|
164 | void insideRecord(const char* tag) { |
---|
165 | printBeginEnvelope(tag); |
---|
166 | cstack.push_back("record"); |
---|
167 | } |
---|
168 | |
---|
169 | void outsideRecord(const char* tag) { |
---|
170 | std::string s = cstack.back(); |
---|
171 | cstack.pop_back(); |
---|
172 | if (s != "record") { |
---|
173 | throw new IOException("Error deserializing record."); |
---|
174 | } |
---|
175 | printEndEnvelope(tag); |
---|
176 | } |
---|
177 | |
---|
178 | void insideVector(const char* tag) { |
---|
179 | printBeginEnvelope(tag); |
---|
180 | cstack.push_back("vector"); |
---|
181 | } |
---|
182 | |
---|
183 | void outsideVector(const char* tag) { |
---|
184 | std::string s = cstack.back(); |
---|
185 | cstack.pop_back(); |
---|
186 | if (s != "vector") { |
---|
187 | throw new IOException("Error deserializing vector."); |
---|
188 | } |
---|
189 | printEndEnvelope(tag); |
---|
190 | } |
---|
191 | |
---|
192 | void insideMap(const char* tag) { |
---|
193 | printBeginEnvelope(tag); |
---|
194 | cstack.push_back("map"); |
---|
195 | } |
---|
196 | |
---|
197 | void outsideMap(const char* tag) { |
---|
198 | std::string s = cstack.back(); |
---|
199 | cstack.pop_back(); |
---|
200 | if (s != "map") { |
---|
201 | throw new IOException("Error deserializing map."); |
---|
202 | } |
---|
203 | printEndEnvelope(tag); |
---|
204 | } |
---|
205 | |
---|
206 | void p(const char* cstr) { |
---|
207 | stream.write(cstr, strlen(cstr)); |
---|
208 | } |
---|
209 | |
---|
210 | void printBeginEnvelope(const char* tag) { |
---|
211 | if (cstack.size() != 0) { |
---|
212 | std::string s = cstack.back(); |
---|
213 | if ("record" == s) { |
---|
214 | p("<member>\n"); |
---|
215 | p("<name>"); |
---|
216 | p(tag); |
---|
217 | p("</name>\n"); |
---|
218 | p("<value>"); |
---|
219 | } else if ("vector" == s) { |
---|
220 | p("<value>"); |
---|
221 | } else if ("map" == s) { |
---|
222 | p("<value>"); |
---|
223 | } |
---|
224 | } else { |
---|
225 | p("<value>"); |
---|
226 | } |
---|
227 | } |
---|
228 | |
---|
229 | void printEndEnvelope(const char* tag) { |
---|
230 | if (cstack.size() != 0) { |
---|
231 | std::string s = cstack.back(); |
---|
232 | if ("record" == s) { |
---|
233 | p("</value>\n"); |
---|
234 | p("</member>\n"); |
---|
235 | } else if ("vector" == s) { |
---|
236 | p("</value>\n"); |
---|
237 | } else if ("map" == s) { |
---|
238 | p("</value>\n"); |
---|
239 | } |
---|
240 | } else { |
---|
241 | p("</value>\n"); |
---|
242 | } |
---|
243 | } |
---|
244 | |
---|
245 | public: |
---|
246 | OXmlArchive(OutStream& _stream) : stream(_stream) {} |
---|
247 | virtual void serialize(int8_t t, const char* tag); |
---|
248 | virtual void serialize(bool t, const char* tag); |
---|
249 | virtual void serialize(int32_t t, const char* tag); |
---|
250 | virtual void serialize(int64_t t, const char* tag); |
---|
251 | virtual void serialize(float t, const char* tag); |
---|
252 | virtual void serialize(double t, const char* tag); |
---|
253 | virtual void serialize(const std::string& t, const char* tag); |
---|
254 | virtual void serialize(const std::string& t, size_t len, const char* tag); |
---|
255 | virtual void startRecord(const Record& s, const char* tag); |
---|
256 | virtual void endRecord(const Record& s, const char* tag); |
---|
257 | virtual void startVector(size_t len, const char* tag); |
---|
258 | virtual void endVector(size_t len, const char* tag); |
---|
259 | virtual void startMap(size_t len, const char* tag); |
---|
260 | virtual void endMap(size_t len, const char* tag); |
---|
261 | virtual ~OXmlArchive(); |
---|
262 | }; |
---|
263 | |
---|
264 | } |
---|
265 | #endif /*XMLARCHIVE_HH_*/ |
---|