[120] | 1 | /** |
---|
| 2 | * Licensed to the Apache Software Foundation (ASF) under one |
---|
| 3 | * or more contributor license agreements. See the NOTICE file |
---|
| 4 | * distributed with this work for additional information |
---|
| 5 | * regarding copyright ownership. The ASF licenses this file |
---|
| 6 | * to you under the Apache License, Version 2.0 (the |
---|
| 7 | * "License"); you may not use this file except in compliance |
---|
| 8 | * with the License. You may obtain a copy of the License at |
---|
| 9 | * |
---|
| 10 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
| 11 | * |
---|
| 12 | * Unless required by applicable law or agreed to in writing, software |
---|
| 13 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
| 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
| 15 | * See the License for the specific language governing permissions and |
---|
| 16 | * limitations under the License. |
---|
| 17 | */ |
---|
| 18 | #ifndef HADOOP_PIPES_HH |
---|
| 19 | #define HADOOP_PIPES_HH |
---|
| 20 | |
---|
| 21 | #ifdef SWIG |
---|
| 22 | %module (directors="1") HadoopPipes |
---|
| 23 | %include "std_string.i" |
---|
| 24 | %feature("director") Mapper; |
---|
| 25 | %feature("director") Reducer; |
---|
| 26 | %feature("director") Partitioner; |
---|
| 27 | %feature("director") RecordReader; |
---|
| 28 | %feature("director") RecordWriter; |
---|
| 29 | %feature("director") Factory; |
---|
| 30 | #else |
---|
| 31 | #include <string> |
---|
| 32 | #endif |
---|
| 33 | |
---|
| 34 | namespace HadoopPipes { |
---|
| 35 | |
---|
| 36 | /** |
---|
| 37 | * This interface defines the interface between application code and the |
---|
| 38 | * foreign code interface to Hadoop Map/Reduce. |
---|
| 39 | */ |
---|
| 40 | |
---|
| 41 | /** |
---|
| 42 | * A JobConf defines the properties for a job. |
---|
| 43 | */ |
---|
| 44 | class JobConf { |
---|
| 45 | public: |
---|
| 46 | virtual bool hasKey(const std::string& key) const = 0; |
---|
| 47 | virtual const std::string& get(const std::string& key) const = 0; |
---|
| 48 | virtual int getInt(const std::string& key) const = 0; |
---|
| 49 | virtual float getFloat(const std::string& key) const = 0; |
---|
| 50 | virtual bool getBoolean(const std::string&key) const = 0; |
---|
| 51 | virtual ~JobConf() {} |
---|
| 52 | }; |
---|
| 53 | |
---|
| 54 | /** |
---|
| 55 | * Task context provides the information about the task and job. |
---|
| 56 | */ |
---|
| 57 | class TaskContext { |
---|
| 58 | public: |
---|
| 59 | /** |
---|
| 60 | * Counter to keep track of a property and its value. |
---|
| 61 | */ |
---|
| 62 | class Counter { |
---|
| 63 | private: |
---|
| 64 | int id; |
---|
| 65 | public: |
---|
| 66 | Counter(int counterId) : id(counterId) {} |
---|
| 67 | Counter(const Counter& counter) : id(counter.id) {} |
---|
| 68 | |
---|
| 69 | int getId() const { return id; } |
---|
| 70 | }; |
---|
| 71 | |
---|
| 72 | /** |
---|
| 73 | * Get the JobConf for the current task. |
---|
| 74 | */ |
---|
| 75 | virtual const JobConf* getJobConf() = 0; |
---|
| 76 | |
---|
| 77 | /** |
---|
| 78 | * Get the current key. |
---|
| 79 | * @return the current key |
---|
| 80 | */ |
---|
| 81 | virtual const std::string& getInputKey() = 0; |
---|
| 82 | |
---|
| 83 | /** |
---|
| 84 | * Get the current value. |
---|
| 85 | * @return the current value |
---|
| 86 | */ |
---|
| 87 | virtual const std::string& getInputValue() = 0; |
---|
| 88 | |
---|
| 89 | /** |
---|
| 90 | * Generate an output record |
---|
| 91 | */ |
---|
| 92 | virtual void emit(const std::string& key, const std::string& value) = 0; |
---|
| 93 | |
---|
| 94 | /** |
---|
| 95 | * Mark your task as having made progress without changing the status |
---|
| 96 | * message. |
---|
| 97 | */ |
---|
| 98 | virtual void progress() = 0; |
---|
| 99 | |
---|
| 100 | /** |
---|
| 101 | * Set the status message and call progress. |
---|
| 102 | */ |
---|
| 103 | virtual void setStatus(const std::string& status) = 0; |
---|
| 104 | |
---|
| 105 | /** |
---|
| 106 | * Register a counter with the given group and name. |
---|
| 107 | */ |
---|
| 108 | virtual Counter* |
---|
| 109 | getCounter(const std::string& group, const std::string& name) = 0; |
---|
| 110 | |
---|
| 111 | /** |
---|
| 112 | * Increment the value of the counter with the given amount. |
---|
| 113 | */ |
---|
| 114 | virtual void incrementCounter(const Counter* counter, uint64_t amount) = 0; |
---|
| 115 | |
---|
| 116 | virtual ~TaskContext() {} |
---|
| 117 | }; |
---|
| 118 | |
---|
| 119 | class MapContext: public TaskContext { |
---|
| 120 | public: |
---|
| 121 | |
---|
| 122 | /** |
---|
| 123 | * Access the InputSplit of the mapper. |
---|
| 124 | */ |
---|
| 125 | virtual const std::string& getInputSplit() = 0; |
---|
| 126 | |
---|
| 127 | /** |
---|
| 128 | * Get the name of the key class of the input to this task. |
---|
| 129 | */ |
---|
| 130 | virtual const std::string& getInputKeyClass() = 0; |
---|
| 131 | |
---|
| 132 | /** |
---|
| 133 | * Get the name of the value class of the input to this task. |
---|
| 134 | */ |
---|
| 135 | virtual const std::string& getInputValueClass() = 0; |
---|
| 136 | |
---|
| 137 | }; |
---|
| 138 | |
---|
| 139 | class ReduceContext: public TaskContext { |
---|
| 140 | public: |
---|
| 141 | /** |
---|
| 142 | * Advance to the next value. |
---|
| 143 | */ |
---|
| 144 | virtual bool nextValue() = 0; |
---|
| 145 | }; |
---|
| 146 | |
---|
| 147 | class Closable { |
---|
| 148 | public: |
---|
| 149 | virtual void close() {} |
---|
| 150 | virtual ~Closable() {} |
---|
| 151 | }; |
---|
| 152 | |
---|
| 153 | /** |
---|
| 154 | * The application's mapper class to do map. |
---|
| 155 | */ |
---|
| 156 | class Mapper: public Closable { |
---|
| 157 | public: |
---|
| 158 | virtual void map(MapContext& context) = 0; |
---|
| 159 | }; |
---|
| 160 | |
---|
| 161 | /** |
---|
| 162 | * The application's reducer class to do reduce. |
---|
| 163 | */ |
---|
| 164 | class Reducer: public Closable { |
---|
| 165 | public: |
---|
| 166 | virtual void reduce(ReduceContext& context) = 0; |
---|
| 167 | }; |
---|
| 168 | |
---|
| 169 | /** |
---|
| 170 | * User code to decide where each key should be sent. |
---|
| 171 | */ |
---|
| 172 | class Partitioner { |
---|
| 173 | public: |
---|
| 174 | virtual int partition(const std::string& key, int numOfReduces) = 0; |
---|
| 175 | virtual ~Partitioner() {} |
---|
| 176 | }; |
---|
| 177 | |
---|
| 178 | /** |
---|
| 179 | * For applications that want to read the input directly for the map function |
---|
| 180 | * they can define RecordReaders in C++. |
---|
| 181 | */ |
---|
| 182 | class RecordReader: public Closable { |
---|
| 183 | public: |
---|
| 184 | virtual bool next(std::string& key, std::string& value) = 0; |
---|
| 185 | |
---|
| 186 | /** |
---|
| 187 | * The progress of the record reader through the split as a value between |
---|
| 188 | * 0.0 and 1.0. |
---|
| 189 | */ |
---|
| 190 | virtual float getProgress() = 0; |
---|
| 191 | }; |
---|
| 192 | |
---|
| 193 | /** |
---|
| 194 | * An object to write key/value pairs as they are emited from the reduce. |
---|
| 195 | */ |
---|
| 196 | class RecordWriter: public Closable { |
---|
| 197 | public: |
---|
| 198 | virtual void emit(const std::string& key, |
---|
| 199 | const std::string& value) = 0; |
---|
| 200 | }; |
---|
| 201 | |
---|
| 202 | /** |
---|
| 203 | * A factory to create the necessary application objects. |
---|
| 204 | */ |
---|
| 205 | class Factory { |
---|
| 206 | public: |
---|
| 207 | virtual Mapper* createMapper(MapContext& context) const = 0; |
---|
| 208 | virtual Reducer* createReducer(ReduceContext& context) const = 0; |
---|
| 209 | |
---|
| 210 | /** |
---|
| 211 | * Create a combiner, if this application has one. |
---|
| 212 | * @return the new combiner or NULL, if one is not needed |
---|
| 213 | */ |
---|
| 214 | virtual Reducer* createCombiner(MapContext& context) const { |
---|
| 215 | return NULL; |
---|
| 216 | } |
---|
| 217 | |
---|
| 218 | /** |
---|
| 219 | * Create an application partitioner object. |
---|
| 220 | * @return the new partitioner or NULL, if the default partitioner should be |
---|
| 221 | * used. |
---|
| 222 | */ |
---|
| 223 | virtual Partitioner* createPartitioner(MapContext& context) const { |
---|
| 224 | return NULL; |
---|
| 225 | } |
---|
| 226 | |
---|
| 227 | /** |
---|
| 228 | * Create an application record reader. |
---|
| 229 | * @return the new RecordReader or NULL, if the Java RecordReader should be |
---|
| 230 | * used. |
---|
| 231 | */ |
---|
| 232 | virtual RecordReader* createRecordReader(MapContext& context) const { |
---|
| 233 | return NULL; |
---|
| 234 | } |
---|
| 235 | |
---|
| 236 | /** |
---|
| 237 | * Create an application record writer. |
---|
| 238 | * @return the new RecordWriter or NULL, if the Java RecordWriter should be |
---|
| 239 | * used. |
---|
| 240 | */ |
---|
| 241 | virtual RecordWriter* createRecordWriter(ReduceContext& context) const { |
---|
| 242 | return NULL; |
---|
| 243 | } |
---|
| 244 | |
---|
| 245 | virtual ~Factory() {} |
---|
| 246 | }; |
---|
| 247 | |
---|
| 248 | /** |
---|
| 249 | * Run the assigned task in the framework. |
---|
| 250 | * The user's main function should set the various functions using the |
---|
| 251 | * set* functions above and then call this. |
---|
| 252 | * @return true, if the task succeeded. |
---|
| 253 | */ |
---|
| 254 | bool runTask(const Factory& factory); |
---|
| 255 | |
---|
| 256 | } |
---|
| 257 | |
---|
| 258 | #endif |
---|