[120] | 1 | /** |
---|
| 2 | * Licensed to the Apache Software Foundation (ASF) under one |
---|
| 3 | * or more contributor license agreements. See the NOTICE file |
---|
| 4 | * distributed with this work for additional information |
---|
| 5 | * regarding copyright ownership. The ASF licenses this file |
---|
| 6 | * to you under the Apache License, Version 2.0 (the |
---|
| 7 | * "License"); you may not use this file except in compliance |
---|
| 8 | * with the License. You may obtain a copy of the License at |
---|
| 9 | * |
---|
| 10 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
| 11 | * |
---|
| 12 | * Unless required by applicable law or agreed to in writing, software |
---|
| 13 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
| 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
| 15 | * See the License for the specific language governing permissions and |
---|
| 16 | * limitations under the License. |
---|
| 17 | */ |
---|
| 18 | |
---|
| 19 | #ifndef XMLARCHIVE_HH_ |
---|
| 20 | #define XMLARCHIVE_HH_ |
---|
| 21 | |
---|
| 22 | #include <xercesc/parsers/SAXParser.hpp> |
---|
| 23 | #include <xercesc/util/PlatformUtils.hpp> |
---|
| 24 | #include <xercesc/util/BinInputStream.hpp> |
---|
| 25 | #include <xercesc/sax/HandlerBase.hpp> |
---|
| 26 | #include <xercesc/sax/InputSource.hpp> |
---|
| 27 | #include "recordio.hh" |
---|
| 28 | |
---|
| 29 | XERCES_CPP_NAMESPACE_USE |
---|
| 30 | |
---|
| 31 | namespace hadoop { |
---|
| 32 | |
---|
| 33 | class Value { |
---|
| 34 | private: |
---|
| 35 | std::string type; |
---|
| 36 | std::string value; |
---|
| 37 | public: |
---|
| 38 | Value(const std::string& t) { type = t; } |
---|
| 39 | void addChars(const char* buf, unsigned int len) { |
---|
| 40 | value += std::string(buf, len); |
---|
| 41 | } |
---|
| 42 | const std::string& getType() const { return type; } |
---|
| 43 | const std::string& getValue() const { return value; } |
---|
| 44 | }; |
---|
| 45 | |
---|
| 46 | class MySAXHandler : public HandlerBase { |
---|
| 47 | private: |
---|
| 48 | std::vector<Value>& vlist; |
---|
| 49 | bool charsValid; |
---|
| 50 | public: |
---|
| 51 | MySAXHandler(std::vector<Value>& list) : vlist(list) {charsValid = false;} |
---|
| 52 | void startElement(const XMLCh* const name, AttributeList& attr); |
---|
| 53 | void endElement(const XMLCh* const name); |
---|
| 54 | void characters(const XMLCh* const buf, unsigned int len); |
---|
| 55 | }; |
---|
| 56 | |
---|
| 57 | class XmlIndex : public Index { |
---|
| 58 | private: |
---|
| 59 | std::vector<Value>& vlist; |
---|
| 60 | unsigned int& vidx; |
---|
| 61 | public: |
---|
| 62 | XmlIndex(std::vector<Value>& list, unsigned int& idx) : vlist(list), vidx(idx) {} |
---|
| 63 | bool done() { |
---|
| 64 | Value v = vlist[vidx]; |
---|
| 65 | return (v.getType() == "/array") ? true : false; |
---|
| 66 | } |
---|
| 67 | void incr() {} |
---|
| 68 | ~XmlIndex() {} |
---|
| 69 | }; |
---|
| 70 | |
---|
| 71 | class MyBinInputStream : public BinInputStream { |
---|
| 72 | private: |
---|
| 73 | InStream& stream; |
---|
| 74 | unsigned int pos; |
---|
| 75 | public: |
---|
| 76 | MyBinInputStream(InStream& s) : stream(s) { pos = 0; } |
---|
| 77 | virtual unsigned int curPos() const { return pos; } |
---|
| 78 | virtual unsigned int readBytes(XMLByte* const toFill, |
---|
| 79 | const unsigned int maxToRead) { |
---|
| 80 | ssize_t nread = stream.read(toFill, maxToRead); |
---|
| 81 | if (nread < 0) { |
---|
| 82 | return 0; |
---|
| 83 | } else { |
---|
| 84 | pos += nread; |
---|
| 85 | return nread; |
---|
| 86 | } |
---|
| 87 | } |
---|
| 88 | }; |
---|
| 89 | |
---|
| 90 | |
---|
| 91 | class MyInputSource : public InputSource { |
---|
| 92 | private: |
---|
| 93 | InStream& stream; |
---|
| 94 | public: |
---|
| 95 | MyInputSource(InStream& s) : stream(s) { } |
---|
| 96 | virtual BinInputStream* makeStream() const { |
---|
| 97 | return new MyBinInputStream(stream); |
---|
| 98 | } |
---|
| 99 | virtual const XMLCh* getEncoding() const { |
---|
| 100 | return XMLString::transcode("UTF-8"); |
---|
| 101 | } |
---|
| 102 | virtual ~MyInputSource() {} |
---|
| 103 | }; |
---|
| 104 | |
---|
| 105 | class IXmlArchive : public IArchive { |
---|
| 106 | private: |
---|
| 107 | std::vector<Value> vlist; |
---|
| 108 | unsigned int vidx; |
---|
| 109 | MySAXHandler *docHandler; |
---|
| 110 | SAXParser *parser; |
---|
| 111 | MyInputSource* src; |
---|
| 112 | Value next() { |
---|
| 113 | Value v = vlist[vidx]; |
---|
| 114 | vidx++; |
---|
| 115 | return v; |
---|
| 116 | } |
---|
| 117 | public: |
---|
| 118 | IXmlArchive(InStream& _stream) { |
---|
| 119 | vidx = 0; |
---|
| 120 | try { |
---|
| 121 | XMLPlatformUtils::Initialize(); |
---|
| 122 | } catch (const XMLException& e) { |
---|
| 123 | throw new IOException("Unable to initialize XML Parser."); |
---|
| 124 | } |
---|
| 125 | parser = new SAXParser(); |
---|
| 126 | docHandler = new MySAXHandler(vlist); |
---|
| 127 | parser->setDocumentHandler(docHandler); |
---|
| 128 | src = new MyInputSource(_stream); |
---|
| 129 | try { |
---|
| 130 | parser->parse(*src); |
---|
| 131 | } catch (const XMLException& e) { |
---|
| 132 | throw new IOException("Unable to parse XML stream."); |
---|
| 133 | } catch (const SAXParseException& e) { |
---|
| 134 | throw new IOException("Unable to parse XML stream."); |
---|
| 135 | } |
---|
| 136 | delete parser; |
---|
| 137 | delete docHandler; |
---|
| 138 | } |
---|
| 139 | virtual void deserialize(int8_t& t, const char* tag); |
---|
| 140 | virtual void deserialize(bool& t, const char* tag); |
---|
| 141 | virtual void deserialize(int32_t& t, const char* tag); |
---|
| 142 | virtual void deserialize(int64_t& t, const char* tag); |
---|
| 143 | virtual void deserialize(float& t, const char* tag); |
---|
| 144 | virtual void deserialize(double& t, const char* tag); |
---|
| 145 | virtual void deserialize(std::string& t, const char* tag); |
---|
| 146 | virtual void deserialize(std::string& t, size_t& len, const char* tag); |
---|
| 147 | virtual void startRecord(Record& s, const char* tag); |
---|
| 148 | virtual void endRecord(Record& s, const char* tag); |
---|
| 149 | virtual Index* startVector(const char* tag); |
---|
| 150 | virtual void endVector(Index* idx, const char* tag); |
---|
| 151 | virtual Index* startMap(const char* tag); |
---|
| 152 | virtual void endMap(Index* idx, const char* tag); |
---|
| 153 | virtual ~IXmlArchive() { |
---|
| 154 | XMLPlatformUtils::Terminate(); |
---|
| 155 | } |
---|
| 156 | }; |
---|
| 157 | |
---|
| 158 | class OXmlArchive : public OArchive { |
---|
| 159 | private: |
---|
| 160 | OutStream& stream; |
---|
| 161 | |
---|
| 162 | std::vector<std::string> cstack; |
---|
| 163 | |
---|
| 164 | void insideRecord(const char* tag) { |
---|
| 165 | printBeginEnvelope(tag); |
---|
| 166 | cstack.push_back("record"); |
---|
| 167 | } |
---|
| 168 | |
---|
| 169 | void outsideRecord(const char* tag) { |
---|
| 170 | std::string s = cstack.back(); |
---|
| 171 | cstack.pop_back(); |
---|
| 172 | if (s != "record") { |
---|
| 173 | throw new IOException("Error deserializing record."); |
---|
| 174 | } |
---|
| 175 | printEndEnvelope(tag); |
---|
| 176 | } |
---|
| 177 | |
---|
| 178 | void insideVector(const char* tag) { |
---|
| 179 | printBeginEnvelope(tag); |
---|
| 180 | cstack.push_back("vector"); |
---|
| 181 | } |
---|
| 182 | |
---|
| 183 | void outsideVector(const char* tag) { |
---|
| 184 | std::string s = cstack.back(); |
---|
| 185 | cstack.pop_back(); |
---|
| 186 | if (s != "vector") { |
---|
| 187 | throw new IOException("Error deserializing vector."); |
---|
| 188 | } |
---|
| 189 | printEndEnvelope(tag); |
---|
| 190 | } |
---|
| 191 | |
---|
| 192 | void insideMap(const char* tag) { |
---|
| 193 | printBeginEnvelope(tag); |
---|
| 194 | cstack.push_back("map"); |
---|
| 195 | } |
---|
| 196 | |
---|
| 197 | void outsideMap(const char* tag) { |
---|
| 198 | std::string s = cstack.back(); |
---|
| 199 | cstack.pop_back(); |
---|
| 200 | if (s != "map") { |
---|
| 201 | throw new IOException("Error deserializing map."); |
---|
| 202 | } |
---|
| 203 | printEndEnvelope(tag); |
---|
| 204 | } |
---|
| 205 | |
---|
| 206 | void p(const char* cstr) { |
---|
| 207 | stream.write(cstr, strlen(cstr)); |
---|
| 208 | } |
---|
| 209 | |
---|
| 210 | void printBeginEnvelope(const char* tag) { |
---|
| 211 | if (cstack.size() != 0) { |
---|
| 212 | std::string s = cstack.back(); |
---|
| 213 | if ("record" == s) { |
---|
| 214 | p("<member>\n"); |
---|
| 215 | p("<name>"); |
---|
| 216 | p(tag); |
---|
| 217 | p("</name>\n"); |
---|
| 218 | p("<value>"); |
---|
| 219 | } else if ("vector" == s) { |
---|
| 220 | p("<value>"); |
---|
| 221 | } else if ("map" == s) { |
---|
| 222 | p("<value>"); |
---|
| 223 | } |
---|
| 224 | } else { |
---|
| 225 | p("<value>"); |
---|
| 226 | } |
---|
| 227 | } |
---|
| 228 | |
---|
| 229 | void printEndEnvelope(const char* tag) { |
---|
| 230 | if (cstack.size() != 0) { |
---|
| 231 | std::string s = cstack.back(); |
---|
| 232 | if ("record" == s) { |
---|
| 233 | p("</value>\n"); |
---|
| 234 | p("</member>\n"); |
---|
| 235 | } else if ("vector" == s) { |
---|
| 236 | p("</value>\n"); |
---|
| 237 | } else if ("map" == s) { |
---|
| 238 | p("</value>\n"); |
---|
| 239 | } |
---|
| 240 | } else { |
---|
| 241 | p("</value>\n"); |
---|
| 242 | } |
---|
| 243 | } |
---|
| 244 | |
---|
| 245 | public: |
---|
| 246 | OXmlArchive(OutStream& _stream) : stream(_stream) {} |
---|
| 247 | virtual void serialize(int8_t t, const char* tag); |
---|
| 248 | virtual void serialize(bool t, const char* tag); |
---|
| 249 | virtual void serialize(int32_t t, const char* tag); |
---|
| 250 | virtual void serialize(int64_t t, const char* tag); |
---|
| 251 | virtual void serialize(float t, const char* tag); |
---|
| 252 | virtual void serialize(double t, const char* tag); |
---|
| 253 | virtual void serialize(const std::string& t, const char* tag); |
---|
| 254 | virtual void serialize(const std::string& t, size_t len, const char* tag); |
---|
| 255 | virtual void startRecord(const Record& s, const char* tag); |
---|
| 256 | virtual void endRecord(const Record& s, const char* tag); |
---|
| 257 | virtual void startVector(size_t len, const char* tag); |
---|
| 258 | virtual void endVector(size_t len, const char* tag); |
---|
| 259 | virtual void startMap(size_t len, const char* tag); |
---|
| 260 | virtual void endMap(size_t len, const char* tag); |
---|
| 261 | virtual ~OXmlArchive(); |
---|
| 262 | }; |
---|
| 263 | |
---|
| 264 | } |
---|
| 265 | #endif /*XMLARCHIVE_HH_*/ |
---|