Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

csvarchive.cc @ 141

Last change on this file since 141 was 120, checked in by (none), 14 years ago
Added the mail files for the Hadoop JUNit Project
Property svn:executable set to ``*
File size: 8.7 KB

Line
1	/**
2	* Licensed to the Apache Software Foundation (ASF) under one
3	* or more contributor license agreements. See the NOTICE file
4	* distributed with this work for additional information
5	* regarding copyright ownership. The ASF licenses this file
6	* to you under the Apache License, Version 2.0 (the
7	* "License"); you may not use this file except in compliance
8	* with the License. You may obtain a copy of the License at
9	*
10	* http://www.apache.org/licenses/LICENSE-2.0
11	*
12	* Unless required by applicable law or agreed to in writing, software
13	* distributed under the License is distributed on an "AS IS" BASIS,
14	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15	* See the License for the specific language governing permissions and
16	* limitations under the License.
17	*/
18
19	#include "csvarchive.hh"
20	#include <stdlib.h>
21
22	using namespace hadoop;
23
24	static std::string readUptoTerminator(PushBackInStream& stream)
25	{
26	std::string s;
27	while (1) {
28	char c;
29	if (1 != stream.read(&c, 1)) {
30	throw new IOException("Error in deserialization.");
31	}
32	if (c == ',' \|\| c == '\n' \|\| c == '}') {
33	if (c != ',') {
34	stream.pushBack(c);
35	}
36	break;
37	}
38	s.push_back(c);
39	}
40	return s;
41	}
42
43	void hadoop::ICsvArchive::deserialize(int8_t& t, const char* tag)
44	{
45	std::string s = readUptoTerminator(stream);
46	t = (int8_t) strtol(s.c_str(), NULL, 10);
47	}
48
49	void hadoop::ICsvArchive::deserialize(bool& t, const char* tag)
50	{
51	std::string s = readUptoTerminator(stream);
52	t = (s == "T") ? true : false;
53	}
54
55	void hadoop::ICsvArchive::deserialize(int32_t& t, const char* tag)
56	{
57	std::string s = readUptoTerminator(stream);
58	t = strtol(s.c_str(), NULL, 10);
59	}
60
61	void hadoop::ICsvArchive::deserialize(int64_t& t, const char* tag)
62	{
63	std::string s = readUptoTerminator(stream);
64	t = strtoll(s.c_str(), NULL, 10);
65	}
66
67	void hadoop::ICsvArchive::deserialize(float& t, const char* tag)
68	{
69	std::string s = readUptoTerminator(stream);
70	t = strtof(s.c_str(), NULL);
71	}
72
73	void hadoop::ICsvArchive::deserialize(double& t, const char* tag)
74	{
75	std::string s = readUptoTerminator(stream);
76	t = strtod(s.c_str(), NULL);
77	}
78
79	void hadoop::ICsvArchive::deserialize(std::string& t, const char* tag)
80	{
81	std::string temp = readUptoTerminator(stream);
82	if (temp[0] != '\'') {
83	throw new IOException("Errror deserializing string.");
84	}
85	t.clear();
86	// skip first character, replace escaped characters
87	int len = temp.length();
88	for (int i = 1; i < len; i++) {
89	char c = temp.at(i);
90	if (c == '%') {
91	// since we escape '%', there have to be at least two chars following a '%'
92	char ch1 = temp.at(i+1);
93	char ch2 = temp.at(i+2);
94	i += 2;
95	if (ch1 == '0' && ch2 == '0') {
96	t.append(1, '\0');
97	} else if (ch1 == '0' && ch2 == 'A') {
98	t.append(1, '\n');
99	} else if (ch1 == '0' && ch2 == 'D') {
100	t.append(1, '\r');
101	} else if (ch1 == '2' && ch2 == 'C') {
102	t.append(1, ',');
103	} else if (ch1 == '7' && ch2 == 'D') {
104	t.append(1, '}');
105	} else if (ch1 == '2' && ch2 == '5') {
106	t.append(1, '%');
107	} else {
108	throw new IOException("Error deserializing string.");
109	}
110	}
111	else {
112	t.append(1, c);
113	}
114	}
115	}
116
117	void hadoop::ICsvArchive::deserialize(std::string& t, size_t& len, const char* tag)
118	{
119	std::string s = readUptoTerminator(stream);
120	if (s[0] != '#') {
121	throw new IOException("Errror deserializing buffer.");
122	}
123	s.erase(0, 1); /// erase first character
124	len = s.length();
125	if (len%2 == 1) { // len is guaranteed to be even
126	throw new IOException("Errror deserializing buffer.");
127	}
128	len = len >> 1;
129	for (size_t idx = 0; idx < len; idx++) {
130	char buf[3];
131	buf[0] = s[2*idx];
132	buf[1] = s[2*idx+1];
133	buf[2] = '\0';
134	int i;
135	if (1 != sscanf(buf, "%2x", &i)) {
136	throw new IOException("Errror deserializing buffer.");
137	}
138	t.push_back((char) i);
139	}
140	len = t.length();
141	}
142
143	void hadoop::ICsvArchive::startRecord(Record& s, const char* tag)
144	{
145	if (tag != NULL) {
146	char mark[2];
147	if (2 != stream.read(mark, 2)) {
148	throw new IOException("Error deserializing record.");
149	}
150	if (mark[0] != 's' \|\| mark[1] != '{') {
151	throw new IOException("Error deserializing record.");
152	}
153	}
154	}
155
156	void hadoop::ICsvArchive::endRecord(Record& s, const char* tag)
157	{
158	char mark;
159	if (1 != stream.read(&mark, 1)) {
160	throw new IOException("Error deserializing record.");
161	}
162	if (tag == NULL) {
163	if (mark != '\n') {
164	throw new IOException("Error deserializing record.");
165	}
166	} else if (mark != '}') {
167	throw new IOException("Error deserializing record.");
168	} else {
169	readUptoTerminator(stream);
170	}
171	}
172
173	Index* hadoop::ICsvArchive::startVector(const char* tag)
174	{
175	char mark[2];
176	if (2 != stream.read(mark, 2)) {
177	throw new IOException("Error deserializing vector.");
178	}
179	if (mark[0] != 'v' \|\| mark[1] != '{') {
180	throw new IOException("Error deserializing vector.");
181	}
182	return new CsvIndex(stream);
183	}
184
185	void hadoop::ICsvArchive::endVector(Index* idx, const char* tag)
186	{
187	delete idx;
188	char mark;
189	if (1 != stream.read(&mark, 1)) {
190	throw new IOException("Error deserializing vector.");
191	}
192	if (mark != '}') {
193	throw new IOException("Error deserializing vector.");
194	}
195	readUptoTerminator(stream);
196	}
197
198	Index* hadoop::ICsvArchive::startMap(const char* tag)
199	{
200	char mark[2];
201	if (2 != stream.read(mark, 2)) {
202	throw new IOException("Error deserializing map.");
203	}
204	if (mark[0] != 'm' \|\| mark[1] != '{') {
205	throw new IOException("Error deserializing map.");
206	}
207
208	return new CsvIndex(stream);
209	}
210
211	void hadoop::ICsvArchive::endMap(Index* idx, const char* tag)
212	{
213	delete idx;
214	char mark;
215	if (1 != stream.read(&mark, 1)) {
216	throw new IOException("Error deserializing map.");
217	}
218	if (mark != '}') {
219	throw new IOException("Error deserializing map.");
220	}
221	readUptoTerminator(stream);
222	}
223
224	hadoop::ICsvArchive::~ICsvArchive()
225	{
226	}
227
228	void hadoop::OCsvArchive::serialize(int8_t t, const char* tag)
229	{
230	printCommaUnlessFirst();
231	char sval[5];
232	sprintf(sval, "%d", t);
233	stream.write(sval, strlen(sval));
234	}
235
236	void hadoop::OCsvArchive::serialize(bool t, const char* tag)
237	{
238	printCommaUnlessFirst();
239	const char *sval = t ? "T" : "F";
240	stream.write(sval,1);
241	}
242
243	void hadoop::OCsvArchive::serialize(int32_t t, const char* tag)
244	{
245	printCommaUnlessFirst();
246	char sval[128];
247	sprintf(sval, "%d", t);
248	stream.write(sval, strlen(sval));
249	}
250
251	void hadoop::OCsvArchive::serialize(int64_t t, const char* tag)
252	{
253	printCommaUnlessFirst();
254	char sval[128];
255	sprintf(sval, "%lld", t);
256	stream.write(sval, strlen(sval));
257	}
258
259	void hadoop::OCsvArchive::serialize(float t, const char* tag)
260	{
261	printCommaUnlessFirst();
262	char sval[128];
263	sprintf(sval, "%f", t);
264	stream.write(sval, strlen(sval));
265	}
266
267	void hadoop::OCsvArchive::serialize(double t, const char* tag)
268	{
269	printCommaUnlessFirst();
270	char sval[128];
271	sprintf(sval, "%lf", t);
272	stream.write(sval, strlen(sval));
273	}
274
275	void hadoop::OCsvArchive::serialize(const std::string& t, const char* tag)
276	{
277	printCommaUnlessFirst();
278	stream.write("'",1);
279	int len = t.length();
280	for (int idx = 0; idx < len; idx++) {
281	char c = t[idx];
282	switch(c) {
283	case '\0':
284	stream.write("%00",3);
285	break;
286	case 0x0A:
287	stream.write("%0A",3);
288	break;
289	case 0x0D:
290	stream.write("%0D",3);
291	break;
292	case 0x25:
293	stream.write("%25",3);
294	break;
295	case 0x2C:
296	stream.write("%2C",3);
297	break;
298	case 0x7D:
299	stream.write("%7D",3);
300	break;
301	default:
302	stream.write(&c,1);
303	break;
304	}
305	}
306	}
307
308	void hadoop::OCsvArchive::serialize(const std::string& t, size_t len, const char* tag)
309	{
310	printCommaUnlessFirst();
311	stream.write("#",1);
312	for(size_t idx = 0; idx < len; idx++) {
313	uint8_t b = t[idx];
314	char sval[3];
315	sprintf(sval,"%2x",b);
316	stream.write(sval, 2);
317	}
318	}
319
320	void hadoop::OCsvArchive::startRecord(const Record& s, const char* tag)
321	{
322	printCommaUnlessFirst();
323	if (tag != NULL && strlen(tag) != 0) {
324	stream.write("s{",2);
325	}
326	isFirst = true;
327	}
328
329	void hadoop::OCsvArchive::endRecord(const Record& s, const char* tag)
330	{
331	if (tag == NULL \|\| strlen(tag) == 0) {
332	stream.write("\n",1);
333	isFirst = true;
334	} else {
335	stream.write("}",1);
336	isFirst = false;
337	}
338	}
339
340	void hadoop::OCsvArchive::startVector(size_t len, const char* tag)
341	{
342	printCommaUnlessFirst();
343	stream.write("v{",2);
344	isFirst = true;
345	}
346
347	void hadoop::OCsvArchive::endVector(size_t len, const char* tag)
348	{
349	stream.write("}",1);
350	isFirst = false;
351	}
352
353	void hadoop::OCsvArchive::startMap(size_t len, const char* tag)
354	{
355	printCommaUnlessFirst();
356	stream.write("m{",2);
357	isFirst = true;
358	}
359
360	void hadoop::OCsvArchive::endMap(size_t len, const char* tag)
361	{
362	stream.write("}",1);
363	isFirst = false;
364	}
365
366	hadoop::OCsvArchive::~OCsvArchive()
367	{
368	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: proiecte/HadoopJUnit/hadoop-0.20.1/src/c++/librecordio/csvarchive.cc @ 141

Download in other formats: