tuplejump · May 11, 2014
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎project/CalliopeBuild.scala
+7-16 b/‎project/CalliopeBuild.scala
+7-16
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/AbstractColumnFamilyInputFormat.java
+290 b/‎src/main/java/com/tuplejump/calliope/hadoop/AbstractColumnFamilyInputFormat.java
+290
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/AbstractColumnFamilyOutputFormat.java
+158 b/‎src/main/java/com/tuplejump/calliope/hadoop/AbstractColumnFamilyOutputFormat.java
+158
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/AbstractColumnFamilyRecordWriter.java
+179 b/‎src/main/java/com/tuplejump/calliope/hadoop/AbstractColumnFamilyRecordWriter.java
+179
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/BulkOutputFormat.java
+91 b/‎src/main/java/com/tuplejump/calliope/hadoop/BulkOutputFormat.java
+91
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/BulkRecordWriter.java
+329 b/‎src/main/java/com/tuplejump/calliope/hadoop/BulkRecordWriter.java
+329
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/ColumnFamilyInputFormat.java
+80 b/‎src/main/java/com/tuplejump/calliope/hadoop/ColumnFamilyInputFormat.java
+80
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/ColumnFamilyOutputFormat.java
+67 b/‎src/main/java/com/tuplejump/calliope/hadoop/ColumnFamilyOutputFormat.java
+67
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/ColumnFamilyRecordReader.java
+590 b/‎src/main/java/com/tuplejump/calliope/hadoop/ColumnFamilyRecordReader.java
+590
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/ColumnFamilyRecordWriter.java
+232 b/‎src/main/java/com/tuplejump/calliope/hadoop/ColumnFamilyRecordWriter.java
+232
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/ColumnFamilySplit.java
+116 b/‎src/main/java/com/tuplejump/calliope/hadoop/ColumnFamilySplit.java
+116
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/ConfigHelper.java
+599 b/‎src/main/java/com/tuplejump/calliope/hadoop/ConfigHelper.java
+599
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/HadoopCompat.java
+309 b/‎src/main/java/com/tuplejump/calliope/hadoop/HadoopCompat.java
+309
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/ReporterWrapper.java
+74 b/‎src/main/java/com/tuplejump/calliope/hadoop/ReporterWrapper.java
+74
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/cql3/CqlConfigHelper.java
+548 b/‎src/main/java/com/tuplejump/calliope/hadoop/cql3/CqlConfigHelper.java
+548
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/cql3/CqlInputFormat.java
+72 b/‎src/main/java/com/tuplejump/calliope/hadoop/cql3/CqlInputFormat.java
+72
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/cql3/CqlOutputFormat.java
+73 b/‎src/main/java/com/tuplejump/calliope/hadoop/cql3/CqlOutputFormat.java
+73
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/cql3/CqlPagingInputFormat.java
+82 b/‎src/main/java/com/tuplejump/calliope/hadoop/cql3/CqlPagingInputFormat.java
+82
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/cql3/CqlPagingRecordReader.java
+783 b/‎src/main/java/com/tuplejump/calliope/hadoop/cql3/CqlPagingRecordReader.java
+783
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/cql3/CqlRecordReader.java
+486 b/‎src/main/java/com/tuplejump/calliope/hadoop/cql3/CqlRecordReader.java
+486
diff --git a/‎src/main/java/com/tuplejump/calliope/hadoop/cql3/CqlRecordWriter.java
+396 b/‎src/main/java/com/tuplejump/calliope/hadoop/cql3/CqlRecordWriter.java
+396
diff --git a/‎src/main/scala/com/tuplejump/calliope/CasBuilder.scala
+2-2 b/‎src/main/scala/com/tuplejump/calliope/CasBuilder.scala
+2-2
diff --git a/‎src/main/scala/com/tuplejump/calliope/CassandraRDDFunctions.scala
+2-2 b/‎src/main/scala/com/tuplejump/calliope/CassandraRDDFunctions.scala
+2-2
diff --git a/‎src/main/scala/com/tuplejump/calliope/cql3/Cql3CassandraRDD.scala
+1-1 b/‎src/main/scala/com/tuplejump/calliope/cql3/Cql3CassandraRDD.scala
+1-1
diff --git a/‎src/main/scala/com/tuplejump/calliope/thrift/ThriftCassandraRDD.scala
+1-1 b/‎src/main/scala/com/tuplejump/calliope/thrift/ThriftCassandraRDD.scala
+1-1
diff --git a/‎src/main/scala/com/tuplejump/calliope/utils/RichByteBuffer.scala
+10-3 b/‎src/main/scala/com/tuplejump/calliope/utils/RichByteBuffer.scala
+10-3
@@ -5,3 +5,4 @@ project/project
 .idea_modules
 _sites
 lib_managed
+*~
@@ -1,31 +1,20 @@
 import sbt._
 import sbt.Keys._
-import scala.xml.NodeSeq
 
 object CalliopeBuild extends Build {
 
   lazy val USE_CASV2 = System.getenv("USE_CASV2") != null && System.getenv("USE_CASV2").equalsIgnoreCase("true")
 
-  lazy val VERSION = "0.9.0-U1-" + (if (USE_CASV2) "C2-EA" else "EA")
+  lazy val VERSION = "0.9.1-U1-C2-EA"
 
-  lazy val CAS_VERSION = if (USE_CASV2) "2.0.5" else "1.2.16"
+  lazy val CAS_VERSION = "2.0.7"
 
-  lazy val THRIFT_VERSION = if (USE_CASV2) "0.9.1" else "0.7.0"
+  lazy val THRIFT_VERSION = "0.9.1"
 
   lazy val SCALA_VERSION = "2.10.3"
 
   lazy val DS_DRIVER_VERSION = "2.0.1"
 
-  def sparkDependency(scalaVer: String) =
-    scalaVer match {
-      case "2.10.3" =>
-        Seq("org.apache.spark" %% "spark-core" % "0.9.0-incubating",
-          "org.apache.spark" %% "spark-streaming" % "0.9.0-incubating" % "provided")
-
-      case x =>
-        Seq("org.apache.spark" %% "spark-core" % "0.9.0-incubating",
-          "org.apache.spark" %% "spark-streaming" % "0.9.0-incubating" % "provided")
-    }
 
   lazy val calliope = {
     val dependencies = Seq(
@@ -34,6 +23,10 @@ object CalliopeBuild extends Build {
       "org.apache.thrift" % "libthrift" % THRIFT_VERSION exclude("org.slf4j", "slf4j-api") exclude("javax.servlet", "servlet-api"),
       "com.datastax.cassandra" % "cassandra-driver-core" % DS_DRIVER_VERSION,
       "org.slf4j" % "slf4j-jdk14" % "1.7.5",
+      "org.apache.spark" %% "spark-core" % "0.9.1" exclude("org.apache.hadoop", "hadoop-core"),
+      "org.apache.spark" %% "spark-streaming" % "0.9.1" % "provided",
+      "org.apache.hadoop" % "hadoop-core" % "1.0.3",
+      "org.apache.commons" % "commons-lang3" % "3.1",
       "org.scalatest" %% "scalatest" % "1.9.1" % "test"
     )
 
@@ -71,8 +64,6 @@ object CalliopeBuild extends Build {
 
       libraryDependencies ++= dependencies,
 
-      libraryDependencies <++= (scalaVersion)(sparkDependency),
-
       parallelExecution in Test := false,
 
       pomExtra := pom,
 
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tuplejump.calliope.hadoop;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import org.apache.cassandra.auth.IAuthenticator;
+import org.apache.cassandra.dht.IPartitioner;
+import org.apache.cassandra.dht.Range;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.thrift.*;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapreduce.*;
+import org.apache.thrift.TApplicationException;
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TBinaryProtocol;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.transport.TTransport;
+import org.apache.thrift.transport.TTransportException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.concurrent.*;
+
+
+public abstract class AbstractColumnFamilyInputFormat<K, Y> extends InputFormat<K, Y> implements org.apache.hadoop.mapred.InputFormat<K, Y> {
+    private static final Logger logger = LoggerFactory.getLogger(AbstractColumnFamilyInputFormat.class);
+
+    public static final String MAPRED_TASK_ID = "mapred.task.id";
+    // The simple fact that we need this is because the old Hadoop API wants us to "write"
+    // to the key and value whereas the new asks for it.
+    // I choose 8kb as the default max key size (instanciated only once), but you can
+    // override it in your jobConf with this setting.
+    public static final String CASSANDRA_HADOOP_MAX_KEY_SIZE = "cassandra.hadoop.max_key_size";
+    public static final int CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT = 8192;
+
+    private String keyspace;
+    private String cfName;
+    private IPartitioner partitioner;
+
+    protected void validateConfiguration(Configuration conf) {
+        if (ConfigHelper.getInputKeyspace(conf) == null || ConfigHelper.getInputColumnFamily(conf) == null) {
+            throw new UnsupportedOperationException("you must set the keyspace and columnfamily with setInputColumnFamily()");
+        }
+        if (ConfigHelper.getInputInitialAddress(conf) == null)
+            throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node with setInputInitialAddress");
+        if (ConfigHelper.getInputPartitioner(conf) == null)
+            throw new UnsupportedOperationException("You must set the Cassandra partitioner class with setInputPartitioner");
+    }
+
+    public static Cassandra.Client createAuthenticatedClient(String location, int port, Configuration conf) throws Exception {
+        logger.debug("Creating authenticated client for CF input format");
+        TTransport transport;
+        try {
+            transport = ConfigHelper.getClientTransportFactory(conf).openTransport(location, port);
+        } catch (Exception e) {
+            throw new TTransportException("Failed to open a transport to " + location + ":" + port + ".", e);
+        }
+        TProtocol binaryProtocol = new TBinaryProtocol(transport, true, true);
+        Cassandra.Client client = new Cassandra.Client(binaryProtocol);
+
+        // log in
+        client.set_keyspace(ConfigHelper.getInputKeyspace(conf));
+        if ((ConfigHelper.getInputKeyspaceUserName(conf) != null) && (ConfigHelper.getInputKeyspacePassword(conf) != null)) {
+            Map<String, String> creds = new HashMap<String, String>();
+            creds.put(IAuthenticator.USERNAME_KEY, ConfigHelper.getInputKeyspaceUserName(conf));
+            creds.put(IAuthenticator.PASSWORD_KEY, ConfigHelper.getInputKeyspacePassword(conf));
+            AuthenticationRequest authRequest = new AuthenticationRequest(creds);
+            client.login(authRequest);
+        }
+        logger.debug("Authenticated client for CF input format created successfully");
+        return client;
+    }
+
+    public List<InputSplit> getSplits(JobContext context) throws IOException {
+        Configuration conf = HadoopCompat.getConfiguration(context);
+        ;
+
+        validateConfiguration(conf);
+
+        // cannonical ranges and nodes holding replicas
+        List<TokenRange> masterRangeNodes = getRangeMap(conf);
+
+        keyspace = ConfigHelper.getInputKeyspace(conf);
+        cfName = ConfigHelper.getInputColumnFamily(conf);
+        partitioner = ConfigHelper.getInputPartitioner(conf);
+        logger.debug("partitioner is " + partitioner);
+
+
+        // cannonical ranges, split into pieces, fetching the splits in parallel
+        ExecutorService executor = new ThreadPoolExecutor(0, 128, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
+        List<InputSplit> splits = new ArrayList<InputSplit>();
+
+        try {
+            List<Future<List<InputSplit>>> splitfutures = new ArrayList<Future<List<InputSplit>>>();
+            KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
+            Range<Token> jobRange = null;
+            if (jobKeyRange != null) {
+                if (jobKeyRange.start_key != null) {
+                    if (!partitioner.preservesOrder())
+                        throw new UnsupportedOperationException("KeyRange based on keys can only be used with a order preserving paritioner");
+                    if (jobKeyRange.start_token != null)
+                        throw new IllegalArgumentException("only start_key supported");
+                    if (jobKeyRange.end_token != null)
+                        throw new IllegalArgumentException("only start_key supported");
+                    jobRange = new Range<>(partitioner.getToken(jobKeyRange.start_key),
+                            partitioner.getToken(jobKeyRange.end_key),
+                            partitioner);
+                } else if (jobKeyRange.start_token != null) {
+                    jobRange = new Range<>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
+                            partitioner.getTokenFactory().fromString(jobKeyRange.end_token),
+                            partitioner);
+                } else {
+                    logger.warn("ignoring jobKeyRange specified without start_key or start_token");
+                }
+            }
+
+            for (TokenRange range : masterRangeNodes) {
+                if (jobRange == null) {
+                    // for each range, pick a live owner and ask it to compute bite-sized splits
+                    splitfutures.add(executor.submit(new SplitCallable(range, conf)));
+                } else {
+                    Range<Token> dhtRange = new Range<Token>(partitioner.getTokenFactory().fromString(range.start_token),
+                            partitioner.getTokenFactory().fromString(range.end_token),
+                            partitioner);
+
+                    if (dhtRange.intersects(jobRange)) {
+                        for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) {
+                            range.start_token = partitioner.getTokenFactory().toString(intersection.left);
+                            range.end_token = partitioner.getTokenFactory().toString(intersection.right);
+                            // for each range, pick a live owner and ask it to compute bite-sized splits
+                            splitfutures.add(executor.submit(new SplitCallable(range, conf)));
+                        }
+                    }
+                }
+            }
+
+            // wait until we have all the results back
+            for (Future<List<InputSplit>> futureInputSplits : splitfutures) {
+                try {
+                    splits.addAll(futureInputSplits.get());
+                } catch (Exception e) {
+                    throw new IOException("Could not get input splits", e);
+                }
+            }
+        } finally {
+            executor.shutdownNow();
+        }
+
+        assert splits.size() > 0;
+        Collections.shuffle(splits, new Random(System.nanoTime()));
+        return splits;
+    }
+
+    /**
+     * Gets a token range and splits it up according to the suggested
+     * size into input splits that Hadoop can use.
+     */
+    class SplitCallable implements Callable<List<InputSplit>> {
+
+        private final TokenRange range;
+        private final Configuration conf;
+
+        public SplitCallable(TokenRange tr, Configuration conf) {
+            this.range = tr;
+            this.conf = conf;
+        }
+
+        public List<InputSplit> call() throws Exception {
+            ArrayList<InputSplit> splits = new ArrayList<InputSplit>();
+            List<CfSplit> subSplits = getSubSplits(keyspace, cfName, range, conf);
+            assert range.rpc_endpoints.size() == range.endpoints.size() : "rpc_endpoints size must match endpoints size";
+            // turn the sub-ranges into InputSplits
+            String[] endpoints = range.endpoints.toArray(new String[range.endpoints.size()]);
+
+            int endpointIndex = 0;
+            for (String endpoint : range.rpc_endpoints) {
+                String endpoint_address = endpoint;
+                if (endpoint_address == null || endpoint_address.equals("0.0.0.0"))
+                    endpoint_address = range.endpoints.get(endpointIndex);
+                endpoints[endpointIndex++] = endpoint_address;
+            }
+
+            Token.TokenFactory factory = partitioner.getTokenFactory();
+            for (CfSplit subSplit : subSplits) {
+                Token left = factory.fromString(subSplit.getStart_token());
+                Token right = factory.fromString(subSplit.getEnd_token());
+                Range<Token> range = new Range<Token>(left, right, partitioner);
+                List<Range<Token>> ranges = range.isWrapAround() ? range.unwrap() : ImmutableList.of(range);
+                for (Range<Token> subrange : ranges) {
+                    ColumnFamilySplit split =
+                            new ColumnFamilySplit(
+                                    factory.toString(subrange.left),
+                                    factory.toString(subrange.right),
+                                    subSplit.getRow_count(),
+                                    endpoints);
+
+                    logger.debug("adding " + split);
+                    splits.add(split);
+                }
+            }
+            return splits;
+        }
+    }
+
+    private List<CfSplit> getSubSplits(String keyspace, String cfName, TokenRange range, Configuration conf) throws IOException {
+        int splitsize = ConfigHelper.getInputSplitSize(conf);
+        for (int i = 0; i < range.rpc_endpoints.size(); i++) {
+            String host = range.rpc_endpoints.get(i);
+
+            if (host == null || host.equals("0.0.0.0"))
+                host = range.endpoints.get(i);
+
+            try {
+                Cassandra.Client client = ConfigHelper.createConnection(conf, host, ConfigHelper.getInputRpcPort(conf));
+                client.set_keyspace(keyspace);
+
+                try {
+                    return client.describe_splits_ex(cfName, range.start_token, range.end_token, splitsize);
+                } catch (TApplicationException e) {
+                    // fallback to guessing split size if talking to a server without describe_splits_ex method
+                    if (e.getType() == TApplicationException.UNKNOWN_METHOD) {
+                        List<String> splitPoints = client.describe_splits(cfName, range.start_token, range.end_token, splitsize);
+                        return tokenListToSplits(splitPoints, splitsize);
+                    }
+                    throw e;
+                }
+            } catch (IOException e) {
+                logger.debug("failed connect to endpoint " + host, e);
+            } catch (InvalidRequestException e) {
+                throw new RuntimeException(e);
+            } catch (TException e) {
+                throw new RuntimeException(e);
+            }
+        }
+        throw new IOException("failed connecting to all endpoints " + StringUtils.join(range.endpoints, ","));
+    }
+
+    private List<CfSplit> tokenListToSplits(List<String> splitTokens, int splitsize) {
+        List<CfSplit> splits = Lists.newArrayListWithExpectedSize(splitTokens.size() - 1);
+        for (int j = 0; j < splitTokens.size() - 1; j++)
+            splits.add(new CfSplit(splitTokens.get(j), splitTokens.get(j + 1), splitsize));
+        return splits;
+    }
+
+    private List<TokenRange> getRangeMap(Configuration conf) throws IOException {
+        Cassandra.Client client = ConfigHelper.getClientFromInputAddressList(conf);
+
+        List<TokenRange> map;
+        try {
+            map = client.describe_local_ring(ConfigHelper.getInputKeyspace(conf));
+        } catch (InvalidRequestException e) {
+            throw new RuntimeException(e);
+        } catch (TException e) {
+            throw new RuntimeException(e);
+        }
+        return map;
+    }
+
+    //
+    // Old Hadoop API
+    //
+    public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
+        TaskAttemptContext tac = HadoopCompat.newTaskAttemptContext(jobConf, new TaskAttemptID());
+        List<InputSplit> newInputSplits = this.getSplits(tac);
+        org.apache.hadoop.mapred.InputSplit[] oldInputSplits = new org.apache.hadoop.mapred.InputSplit[newInputSplits.size()];
+        for (int i = 0; i < newInputSplits.size(); i++)
+            oldInputSplits[i] = (ColumnFamilySplit) newInputSplits.get(i);
+        return oldInputSplits;
+    }
+}
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tuplejump.calliope.hadoop;
+
+
+import org.apache.cassandra.auth.IAuthenticator;
+import org.apache.cassandra.thrift.AuthenticationRequest;
+import org.apache.cassandra.thrift.Cassandra;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.thrift.protocol.TBinaryProtocol;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.transport.TTransport;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * The <code>ColumnFamilyOutputFormat</code> acts as a Hadoop-specific
+ * OutputFormat that allows reduce tasks to store keys (and corresponding
+ * values) as Cassandra rows (and respective columns) in a given
+ * ColumnFamily.
+ * <p/>
+ * <p>
+ * As is the case with the {@link ColumnFamilyInputFormat}, you need to set the
+ * Keyspace and ColumnFamily in your
+ * Hadoop job Configuration. The {@link ConfigHelper} class, through its
+ * {@link ConfigHelper#setOutputColumnFamily} method, is provided to make this
+ * simple.
+ * </p>
+ * <p/>
+ * <p>
+ * For the sake of performance, this class employs a lazy write-back caching
+ * mechanism, where its record writer batches mutations created based on the
+ * reduce's inputs (in a task-specific map), and periodically makes the changes
+ * official by sending a batch mutate request to Cassandra.
+ * </p>
+ *
+ * @param <Y>
+ */
+public abstract class AbstractColumnFamilyOutputFormat<K, Y> extends OutputFormat<K, Y> implements org.apache.hadoop.mapred.OutputFormat<K, Y> {
+    public static final String BATCH_THRESHOLD = "mapreduce.output.columnfamilyoutputformat.batch.threshold";
+    public static final String QUEUE_SIZE = "mapreduce.output.columnfamilyoutputformat.queue.size";
+    private static final Logger logger = LoggerFactory.getLogger(AbstractColumnFamilyOutputFormat.class);
+
+
+    /**
+     * Check for validity of the output-specification for the job.
+     *
+     * @param context information about the job
+     * @throws java.io.IOException when output should not be attempted
+     */
+    public void checkOutputSpecs(JobContext context) {
+        checkOutputSpecs(HadoopCompat.getConfiguration(context));
+    }
+
+    protected void checkOutputSpecs(Configuration conf) {
+        if (ConfigHelper.getOutputKeyspace(conf) == null)
+            throw new UnsupportedOperationException("You must set the keyspace with setOutputKeyspace()");
+        if (ConfigHelper.getOutputPartitioner(conf) == null)
+            throw new UnsupportedOperationException("You must set the output partitioner to the one used by your Cassandra cluster");
+        if (ConfigHelper.getOutputInitialAddress(conf) == null)
+            throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node");
+    }
+
+    /**
+     * Fills the deprecated OutputFormat interface for streaming.
+     */
+    @Deprecated
+    public void checkOutputSpecs(org.apache.hadoop.fs.FileSystem filesystem, org.apache.hadoop.mapred.JobConf job) throws IOException {
+        checkOutputSpecs(job);
+    }
+
+    /**
+     * The OutputCommitter for this format does not write any data to the DFS.
+     *
+     * @param context the task context
+     * @return an output committer
+     * @throws java.io.IOException
+     * @throws InterruptedException
+     */
+    public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
+        return new NullOutputCommitter();
+    }
+
+    /**
+     * Connects to the given server:port and returns a client based on the given socket that points to the configured
+     * keyspace, and is logged in with the configured credentials.
+     *
+     * @param host fully qualified host name to connect to
+     * @param port RPC port of the server
+     * @param conf a job configuration
+     * @return a cassandra client
+     * @throws Exception set of thrown exceptions may be implementation defined,
+     *                   depending on the used transport factory
+     */
+    public static Cassandra.Client createAuthenticatedClient(String host, int port, Configuration conf) throws Exception {
+        logger.debug("Creating authenticated client for CF output format");
+        TTransport transport = ConfigHelper.getClientTransportFactory(conf).openTransport(host, port);
+        TProtocol binaryProtocol = new TBinaryProtocol(transport, true, true);
+        Cassandra.Client client = new Cassandra.Client(binaryProtocol);
+        client.set_keyspace(ConfigHelper.getOutputKeyspace(conf));
+        if ((ConfigHelper.getOutputKeyspaceUserName(conf) != null) && (ConfigHelper.getOutputKeyspacePassword(conf) != null)) {
+            Map<String, String> creds = new HashMap<String, String>();
+            creds.put(IAuthenticator.USERNAME_KEY, ConfigHelper.getOutputKeyspaceUserName(conf));
+            creds.put(IAuthenticator.PASSWORD_KEY, ConfigHelper.getOutputKeyspacePassword(conf));
+            AuthenticationRequest authRequest = new AuthenticationRequest(creds);
+            client.login(authRequest);
+        }
+        logger.debug("Authenticated client for CF output format created successfully");
+        return client;
+    }
+
+    /**
+     * An {@link org.apache.hadoop.mapreduce.OutputCommitter} that does nothing.
+     */
+    private static class NullOutputCommitter extends OutputCommitter {
+        public void abortTask(TaskAttemptContext taskContext) {
+        }
+
+        public void cleanupJob(JobContext jobContext) {
+        }
+
+        public void commitTask(TaskAttemptContext taskContext) {
+        }
+
+        public boolean needsTaskCommit(TaskAttemptContext taskContext) {
+            return false;
+        }
+
+        public void setupJob(JobContext jobContext) {
+        }
+
+        public void setupTask(TaskAttemptContext taskContext) {
+        }
+    }
+}
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tuplejump.calliope.hadoop;
+
+
+import org.apache.cassandra.client.RingCache;
+import org.apache.cassandra.thrift.Cassandra;
+import org.apache.cassandra.thrift.ConsistencyLevel;
+import org.apache.cassandra.utils.FBUtilities;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.util.Progressable;
+import org.apache.thrift.transport.TTransport;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.util.List;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.TimeUnit;
+
+
+/**
+ * The <code>ColumnFamilyRecordWriter</code> maps the output &lt;key, value&gt;
+ * pairs to a Cassandra column family. In particular, it applies all mutations
+ * in the value, which it associates with the key, and in turn the responsible
+ * endpoint.
+ * <p/>
+ * <p>
+ * Furthermore, this writer groups the mutations by the endpoint responsible for
+ * the rows being affected. This allows the mutations to be executed in parallel,
+ * directly to a responsible endpoint.
+ * </p>
+ *
+ * @see ColumnFamilyOutputFormat
+ */
+public abstract class AbstractColumnFamilyRecordWriter<K, Y> extends RecordWriter<K, Y> implements org.apache.hadoop.mapred.RecordWriter<K, Y> {
+    // The configuration this writer is associated with.
+    protected final Configuration conf;
+
+    // The ring cache that describes the token ranges each node in the ring is
+    // responsible for. This is what allows us to group the mutations by
+    // the endpoints they should be targeted at. The targeted endpoint
+    // essentially
+    // acts as the primary replica for the rows being affected by the mutations.
+    protected final RingCache ringCache;
+
+    // The number of mutations to buffer per endpoint
+    protected final int queueSize;
+
+    protected final long batchThreshold;
+
+    protected final ConsistencyLevel consistencyLevel;
+    protected Progressable progressable;
+    protected TaskAttemptContext context;
+
+    protected AbstractColumnFamilyRecordWriter(Configuration conf) {
+        this.conf = conf;
+        this.ringCache = new RingCache(conf);
+        this.queueSize = conf.getInt(AbstractColumnFamilyOutputFormat.QUEUE_SIZE, 32 * FBUtilities.getAvailableProcessors());
+        batchThreshold = conf.getLong(AbstractColumnFamilyOutputFormat.BATCH_THRESHOLD, 32);
+        consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getWriteConsistencyLevel(conf));
+    }
+
+    /**
+     * Close this <code>RecordWriter</code> to future operations, but not before
+     * flushing out the batched mutations.
+     *
+     * @param context the context of the task
+     * @throws java.io.IOException
+     */
+    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+        close();
+    }
+
+    /**
+     * Fills the deprecated RecordWriter interface for streaming.
+     */
+    @Deprecated
+    public void close(org.apache.hadoop.mapred.Reporter reporter) throws IOException {
+        close();
+    }
+
+    protected abstract void close() throws IOException;
+
+    /**
+     * A client that runs in a threadpool and connects to the list of endpoints for a particular
+     * range. Mutations for keys in that range are sent to this client via a queue.
+     */
+    public abstract class AbstractRangeClient<K> extends Thread {
+        // The list of endpoints for this range
+        protected final List<InetAddress> endpoints;
+        // A bounded queue of incoming mutations for this range
+        protected final BlockingQueue<K> queue = new ArrayBlockingQueue<K>(queueSize);
+
+        protected volatile boolean run = true;
+        // we want the caller to know if something went wrong, so we record any unrecoverable exception while writing
+        // so we can throw it on the caller's stack when he calls put() again, or if there are no more put calls,
+        // when the client is closed.
+        protected volatile IOException lastException;
+
+        protected Cassandra.Client client;
+
+        /**
+         * Constructs an {@link AbstractColumnFamilyRecordWriter.AbstractRangeClient} for the given endpoints.
+         *
+         * @param endpoints the possible endpoints to execute the mutations on
+         */
+        public AbstractRangeClient(List<InetAddress> endpoints) {
+            super("client-" + endpoints);
+            this.endpoints = endpoints;
+        }
+
+        /**
+         * enqueues the given value to Cassandra
+         */
+        public void put(K value) throws IOException {
+            while (true) {
+                if (lastException != null)
+                    throw lastException;
+                try {
+                    if (queue.offer(value, 100, TimeUnit.MILLISECONDS))
+                        break;
+                } catch (InterruptedException e) {
+                    throw new AssertionError(e);
+                }
+            }
+        }
+
+        public void close() throws IOException {
+            // stop the run loop.  this will result in closeInternal being called by the time join() finishes.
+            run = false;
+            interrupt();
+            try {
+                this.join();
+            } catch (InterruptedException e) {
+                throw new AssertionError(e);
+            }
+
+            if (lastException != null)
+                throw lastException;
+        }
+
+        protected void closeInternal() {
+            if (client != null) {
+                TTransport transport = client.getOutputProtocol().getTransport();
+                if (transport.isOpen())
+                    transport.close();
+            }
+        }
+
+        /**
+         * Loops collecting mutations from the queue and sending to Cassandra
+         */
+        public abstract void run();
+
+        @Override
+        public String toString() {
+            return "#<Client for " + endpoints.toString() + ">";
+        }
+    }
+}
+
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tuplejump.calliope.hadoop;
+
+
+import org.apache.cassandra.thrift.Mutation;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+
+public class BulkOutputFormat extends OutputFormat<ByteBuffer, List<Mutation>>
+        implements org.apache.hadoop.mapred.OutputFormat<ByteBuffer, List<Mutation>> {
+    @Override
+    public void checkOutputSpecs(JobContext context) {
+        checkOutputSpecs(HadoopCompat.getConfiguration(context));
+    }
+
+    private void checkOutputSpecs(Configuration conf) {
+        if (ConfigHelper.getOutputKeyspace(conf) == null) {
+            throw new UnsupportedOperationException("you must set the keyspace with setColumnFamily()");
+        }
+    }
+
+    @Override
+    public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
+        return new NullOutputCommitter();
+    }
+
+    /**
+     * Fills the deprecated OutputFormat interface for streaming.
+     */
+    @Deprecated
+    public void checkOutputSpecs(org.apache.hadoop.fs.FileSystem filesystem, org.apache.hadoop.mapred.JobConf job) throws IOException {
+        checkOutputSpecs(job);
+    }
+
+    /**
+     * Fills the deprecated OutputFormat interface for streaming.
+     */
+    @Deprecated
+    public BulkRecordWriter getRecordWriter(org.apache.hadoop.fs.FileSystem filesystem, org.apache.hadoop.mapred.JobConf job, String name, org.apache.hadoop.util.Progressable progress) throws IOException {
+        return new BulkRecordWriter(job, progress);
+    }
+
+    @Override
+    public BulkRecordWriter getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException {
+        return new BulkRecordWriter(context);
+    }
+
+    public static class NullOutputCommitter extends OutputCommitter {
+        public void abortTask(TaskAttemptContext taskContext) {
+        }
+
+        public void cleanupJob(JobContext jobContext) {
+        }
+
+        public void commitTask(TaskAttemptContext taskContext) {
+        }
+
+        public boolean needsTaskCommit(TaskAttemptContext taskContext) {
+            return false;
+        }
+
+        public void setupJob(JobContext jobContext) {
+        }
+
+        public void setupTask(TaskAttemptContext taskContext) {
+        }
+    }
+}
@@ -0,0 +1,329 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tuplejump.calliope.hadoop;
+
+import org.apache.cassandra.auth.IAuthenticator;
+import org.apache.cassandra.config.CFMetaData;
+import org.apache.cassandra.config.Config;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.db.marshal.BytesType;
+import org.apache.cassandra.dht.Range;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.io.sstable.SSTableLoader;
+import org.apache.cassandra.io.sstable.SSTableSimpleUnsortedWriter;
+import org.apache.cassandra.streaming.StreamState;
+import org.apache.cassandra.thrift.*;
+import org.apache.cassandra.utils.OutputHandler;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.util.Progressable;
+import org.apache.thrift.protocol.TBinaryProtocol;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.transport.TFramedTransport;
+import org.apache.thrift.transport.TSocket;
+import org.apache.thrift.transport.TTransport;
+import org.apache.thrift.transport.TTransportException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.nio.ByteBuffer;
+import java.util.*;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+final class BulkRecordWriter extends RecordWriter<ByteBuffer, List<Mutation>>
+        implements org.apache.hadoop.mapred.RecordWriter<ByteBuffer, List<Mutation>> {
+    private final static String OUTPUT_LOCATION = "mapreduce.output.bulkoutputformat.localdir";
+    private final static String BUFFER_SIZE_IN_MB = "mapreduce.output.bulkoutputformat.buffersize";
+    private final static String STREAM_THROTTLE_MBITS = "mapreduce.output.bulkoutputformat.streamthrottlembits";
+    private final static String MAX_FAILED_HOSTS = "mapreduce.output.bulkoutputformat.maxfailedhosts";
+    private final Configuration conf;
+    private final Logger logger = LoggerFactory.getLogger(BulkRecordWriter.class);
+    private SSTableSimpleUnsortedWriter writer;
+    private SSTableLoader loader;
+    private File outputdir;
+    private Progressable progress;
+    private TaskAttemptContext context;
+    private int maxFailures;
+
+    private enum CFType {
+        NORMAL,
+        SUPER,
+    }
+
+    private enum ColType {
+        NORMAL,
+        COUNTER
+    }
+
+    private CFType cfType;
+    private ColType colType;
+
+    BulkRecordWriter(TaskAttemptContext context) {
+        this(HadoopCompat.getConfiguration(context));
+        this.context = context;
+    }
+
+    BulkRecordWriter(Configuration conf, Progressable progress) {
+        this(conf);
+        this.progress = progress;
+    }
+
+    BulkRecordWriter(Configuration conf) {
+        Config.setClientMode(true);
+        Config.setOutboundBindAny(true);
+        this.conf = conf;
+        DatabaseDescriptor.setStreamThroughputOutboundMegabitsPerSec(Integer.parseInt(conf.get(STREAM_THROTTLE_MBITS, "0")));
+        maxFailures = Integer.parseInt(conf.get(MAX_FAILED_HOSTS, "0"));
+    }
+
+    private String getOutputLocation() throws IOException {
+        String dir = conf.get(OUTPUT_LOCATION, System.getProperty("java.io.tmpdir"));
+        if (dir == null)
+            throw new IOException("Output directory not defined, if hadoop is not setting java.io.tmpdir then define " + OUTPUT_LOCATION);
+        return dir;
+    }
+
+    private void setTypes(Mutation mutation) {
+        if (cfType == null) {
+            if (mutation.getColumn_or_supercolumn().isSetSuper_column() || mutation.getColumn_or_supercolumn().isSetCounter_super_column())
+                cfType = CFType.SUPER;
+            else
+                cfType = CFType.NORMAL;
+            if (mutation.getColumn_or_supercolumn().isSetCounter_column() || mutation.getColumn_or_supercolumn().isSetCounter_super_column())
+                colType = ColType.COUNTER;
+            else
+                colType = ColType.NORMAL;
+        }
+    }
+
+    private void prepareWriter() throws IOException {
+        if (outputdir == null) {
+            String keyspace = ConfigHelper.getOutputKeyspace(conf);
+            //dir must be named by ks/cf for the loader
+            outputdir = new File(getOutputLocation() + File.separator + keyspace + File.separator + ConfigHelper.getOutputColumnFamily(conf));
+            outputdir.mkdirs();
+        }
+
+        if (writer == null) {
+            AbstractType<?> subcomparator = null;
+            ExternalClient externalClient = null;
+            String username = ConfigHelper.getOutputKeyspaceUserName(conf);
+            String password = ConfigHelper.getOutputKeyspacePassword(conf);
+
+            if (cfType == CFType.SUPER)
+                subcomparator = BytesType.instance;
+
+            this.writer = new SSTableSimpleUnsortedWriter(
+                    outputdir,
+                    ConfigHelper.getOutputPartitioner(conf),
+                    ConfigHelper.getOutputKeyspace(conf),
+                    ConfigHelper.getOutputColumnFamily(conf),
+                    BytesType.instance,
+                    subcomparator,
+                    Integer.parseInt(conf.get(BUFFER_SIZE_IN_MB, "64")),
+                    ConfigHelper.getOutputCompressionParamaters(conf));
+
+            externalClient = new ExternalClient(ConfigHelper.getOutputInitialAddress(conf),
+                    ConfigHelper.getOutputRpcPort(conf),
+                    username,
+                    password);
+
+            this.loader = new SSTableLoader(outputdir, externalClient, new NullOutputHandler());
+        }
+    }
+
+    @Override
+    public void write(ByteBuffer keybuff, List<Mutation> value) throws IOException {
+        setTypes(value.get(0));
+        prepareWriter();
+        writer.newRow(keybuff);
+        for (Mutation mut : value) {
+            if (cfType == CFType.SUPER) {
+                writer.newSuperColumn(mut.getColumn_or_supercolumn().getSuper_column().name);
+                if (colType == ColType.COUNTER)
+                    for (CounterColumn column : mut.getColumn_or_supercolumn().getCounter_super_column().columns)
+                        writer.addCounterColumn(column.name, column.value);
+                else {
+                    for (Column column : mut.getColumn_or_supercolumn().getSuper_column().columns) {
+                        if (column.ttl == 0)
+                            writer.addColumn(column.name, column.value, column.timestamp);
+                        else
+                            writer.addExpiringColumn(column.name, column.value, column.timestamp, column.ttl, System.currentTimeMillis() + ((long) column.ttl * 1000));
+                    }
+                }
+            } else {
+                if (colType == ColType.COUNTER)
+                    writer.addCounterColumn(mut.getColumn_or_supercolumn().counter_column.name, mut.getColumn_or_supercolumn().counter_column.value);
+                else {
+                    if (mut.getColumn_or_supercolumn().column.ttl == 0)
+                        writer.addColumn(mut.getColumn_or_supercolumn().column.name, mut.getColumn_or_supercolumn().column.value, mut.getColumn_or_supercolumn().column.timestamp);
+                    else
+                        writer.addExpiringColumn(mut.getColumn_or_supercolumn().column.name, mut.getColumn_or_supercolumn().column.value, mut.getColumn_or_supercolumn().column.timestamp, mut.getColumn_or_supercolumn().column.ttl, System.currentTimeMillis() + ((long) (mut.getColumn_or_supercolumn().column.ttl) * 1000));
+                }
+            }
+            if (null != progress)
+                progress.progress();
+            if (null != context)
+                HadoopCompat.progress(context);
+        }
+    }
+
+    @Override
+    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+        close();
+    }
+
+    /**
+     * Fills the deprecated RecordWriter interface for streaming.
+     */
+    @Deprecated
+    public void close(org.apache.hadoop.mapred.Reporter reporter) throws IOException {
+        close();
+    }
+
+    private void close() throws IOException {
+        if (writer != null) {
+            writer.close();
+            Future<StreamState> future = loader.stream();
+            while (true) {
+                try {
+                    future.get(1000, TimeUnit.MILLISECONDS);
+                    break;
+                } catch (ExecutionException | TimeoutException te) {
+                    if (null != progress)
+                        progress.progress();
+                    if (null != context)
+                        HadoopCompat.progress(context);
+                } catch (InterruptedException e) {
+                    throw new IOException(e);
+                }
+            }
+            if (loader.getFailedHosts().size() > 0) {
+                if (loader.getFailedHosts().size() > maxFailures)
+                    throw new IOException("Too many hosts failed: " + loader.getFailedHosts());
+                else
+                    logger.warn("Some hosts failed: " + loader.getFailedHosts());
+            }
+        }
+    }
+
+    static class ExternalClient extends SSTableLoader.Client {
+        private final Map<String, Map<String, CFMetaData>> knownCfs = new HashMap<>();
+        private final String hostlist;
+        private final int rpcPort;
+        private final String username;
+        private final String password;
+
+        public ExternalClient(String hostlist, int port, String username, String password) {
+            super();
+            this.hostlist = hostlist;
+            this.rpcPort = port;
+            this.username = username;
+            this.password = password;
+        }
+
+        public void init(String keyspace) {
+            Set<InetAddress> hosts = new HashSet<InetAddress>();
+            String[] nodes = hostlist.split(",");
+            for (String node : nodes) {
+                try {
+                    hosts.add(InetAddress.getByName(node));
+                } catch (UnknownHostException e) {
+                    throw new RuntimeException(e);
+                }
+            }
+            Iterator<InetAddress> hostiter = hosts.iterator();
+            while (hostiter.hasNext()) {
+                try {
+                    InetAddress host = hostiter.next();
+                    Cassandra.Client client = createThriftClient(host.getHostAddress(), rpcPort);
+
+                    // log in
+                    client.set_keyspace(keyspace);
+                    if (username != null) {
+                        Map<String, String> creds = new HashMap<String, String>();
+                        creds.put(IAuthenticator.USERNAME_KEY, username);
+                        creds.put(IAuthenticator.PASSWORD_KEY, password);
+                        AuthenticationRequest authRequest = new AuthenticationRequest(creds);
+                        client.login(authRequest);
+                    }
+
+                    List<TokenRange> tokenRanges = client.describe_ring(keyspace);
+                    List<KsDef> ksDefs = client.describe_keyspaces();
+
+                    setPartitioner(client.describe_partitioner());
+                    Token.TokenFactory tkFactory = getPartitioner().getTokenFactory();
+
+                    for (TokenRange tr : tokenRanges) {
+                        Range<Token> range = new Range<Token>(tkFactory.fromString(tr.start_token), tkFactory.fromString(tr.end_token));
+                        for (String ep : tr.endpoints) {
+                            addRangeForEndpoint(range, InetAddress.getByName(ep));
+                        }
+                    }
+
+                    for (KsDef ksDef : ksDefs) {
+                        Map<String, CFMetaData> cfs = new HashMap<>(ksDef.cf_defs.size());
+                        for (CfDef cfDef : ksDef.cf_defs)
+                            cfs.put(cfDef.name, CFMetaData.fromThrift(cfDef));
+                        knownCfs.put(ksDef.name, cfs);
+                    }
+                    break;
+                } catch (Exception e) {
+                    if (!hostiter.hasNext())
+                        throw new RuntimeException("Could not retrieve endpoint ranges: ", e);
+                }
+            }
+        }
+
+        public CFMetaData getCFMetaData(String keyspace, String cfName) {
+            Map<String, CFMetaData> cfs = knownCfs.get(keyspace);
+            return cfs != null ? cfs.get(cfName) : null;
+        }
+
+        private static Cassandra.Client createThriftClient(String host, int port) throws TTransportException {
+            TSocket socket = new TSocket(host, port);
+            TTransport trans = new TFramedTransport(socket);
+            trans.open();
+            TProtocol protocol = new TBinaryProtocol(trans);
+            return new Cassandra.Client(protocol);
+        }
+    }
+
+    static class NullOutputHandler implements OutputHandler {
+        public void output(String msg) {
+        }
+
+        public void debug(String msg) {
+        }
+
+        public void warn(String msg) {
+        }
+
+        public void warn(String msg, Throwable th) {
+        }
+    }
+}
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tuplejump.calliope.hadoop;
+
+import org.apache.cassandra.db.Column;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.SortedMap;
+
+/**
+ * Hadoop InputFormat allowing map/reduce against Cassandra rows within one ColumnFamily.
+ * <p/>
+ * At minimum, you need to set the CF and predicate (description of columns to extract from each row)
+ * in your Hadoop job Configuration.  The ConfigHelper class is provided to make this
+ * simple:
+ * ConfigHelper.setInputColumnFamily
+ * ConfigHelper.setInputSlicePredicate
+ * <p/>
+ * You can also configure the number of rows per InputSplit with
+ * ConfigHelper.setInputSplitSize
+ * This should be "as big as possible, but no bigger."  Each InputSplit is read from Cassandra
+ * with multiple get_slice_range queries, and the per-call overhead of get_slice_range is high,
+ * so larger split sizes are better -- but if it is too large, you will run out of memory.
+ * <p/>
+ * The default split size is 64k rows.
+ */
+public class ColumnFamilyInputFormat extends AbstractColumnFamilyInputFormat<ByteBuffer, SortedMap<ByteBuffer, Column>> {
+
+    public RecordReader<ByteBuffer, SortedMap<ByteBuffer, Column>> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
+        return new ColumnFamilyRecordReader();
+    }
+
+    public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, Column>> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException {
+        TaskAttemptContext tac = HadoopCompat.newMapContext(
+                jobConf,
+                TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID)),
+                null,
+                null,
+                null,
+                new ReporterWrapper(reporter),
+                null);
+
+        ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader(jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT));
+        recordReader.initialize((InputSplit) split, tac);
+        return recordReader;
+    }
+
+    @Override
+    protected void validateConfiguration(Configuration conf) {
+        super.validateConfiguration(conf);
+
+        if (ConfigHelper.getInputSlicePredicate(conf) == null) {
+            throw new UnsupportedOperationException("you must set the predicate with setInputSlicePredicate");
+        }
+    }
+
+}
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tuplejump.calliope.hadoop;
+
+
+import org.apache.cassandra.thrift.Mutation;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.nio.ByteBuffer;
+import java.util.List;
+
+/**
+ * The <code>ColumnFamilyOutputFormat</code> acts as a Hadoop-specific
+ * OutputFormat that allows reduce tasks to store keys (and corresponding
+ * values) as Cassandra rows (and respective columns) in a given
+ * ColumnFamily.
+ * <p/>
+ * <p>
+ * As is the case with the {@link ColumnFamilyInputFormat}, you need to set the
+ * Keyspace and ColumnFamily in your
+ * Hadoop job Configuration. The {@link ConfigHelper} class, through its
+ * {@link ConfigHelper#setOutputColumnFamily} method, is provided to make this
+ * simple.
+ * </p>
+ * <p/>
+ * <p>
+ * For the sake of performance, this class employs a lazy write-back caching
+ * mechanism, where its record writer batches mutations created based on the
+ * reduce's inputs (in a task-specific map), and periodically makes the changes
+ * official by sending a batch mutate request to Cassandra.
+ * </p>
+ */
+public class ColumnFamilyOutputFormat extends AbstractColumnFamilyOutputFormat<ByteBuffer, List<Mutation>> {
+    /**
+     * Fills the deprecated OutputFormat interface for streaming.
+     */
+    @Deprecated
+    public ColumnFamilyRecordWriter getRecordWriter(org.apache.hadoop.fs.FileSystem filesystem, org.apache.hadoop.mapred.JobConf job, String name, org.apache.hadoop.util.Progressable progress) {
+        return new ColumnFamilyRecordWriter(job, progress);
+    }
+
+    /**
+     * Get the {@link org.apache.hadoop.mapreduce.RecordWriter} for the given task.
+     *
+     * @param context the information about the current task.
+     * @return a {@link org.apache.hadoop.mapreduce.RecordWriter} to write the output for the job.
+     * @throws java.io.IOException
+     */
+    public ColumnFamilyRecordWriter getRecordWriter(final TaskAttemptContext context) throws InterruptedException {
+        return new ColumnFamilyRecordWriter(context);
+    }
+}
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tuplejump.calliope.hadoop;
+
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.nio.ByteBuffer;
+import java.util.*;
+
+import org.apache.cassandra.dht.Range;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.thrift.*;
+import org.apache.cassandra.utils.Pair;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.thrift.TException;
+import org.apache.hadoop.util.Progressable;
+
+
+/**
+ * The <code>ColumnFamilyRecordWriter</code> maps the output &lt;key, value&gt;
+ * pairs to a Cassandra column family. In particular, it applies all mutations
+ * in the value, which it associates with the key, and in turn the responsible
+ * endpoint.
+ *
+ * <p>
+ * Furthermore, this writer groups the mutations by the endpoint responsible for
+ * the rows being affected. This allows the mutations to be executed in parallel,
+ * directly to a responsible endpoint.
+ * </p>
+ *
+ * @see ColumnFamilyOutputFormat
+ */
+final class ColumnFamilyRecordWriter extends AbstractColumnFamilyRecordWriter<ByteBuffer, List<Mutation>>
+{
+    // handles for clients for each range running in the threadpool
+    private final Map<Range, RangeClient> clients;
+    
+    /**
+     * Upon construction, obtain the map that this writer will use to collect
+     * mutations, and the ring cache for the given keyspace.
+     *
+     * @param context the task attempt context
+     * @throws java.io.IOException
+     */
+    ColumnFamilyRecordWriter(TaskAttemptContext context)
+    {
+        this(HadoopCompat.getConfiguration(context));
+        this.context = context;
+
+    }
+    ColumnFamilyRecordWriter(Configuration conf, Progressable progressable)
+    {
+        this(conf);
+        this.progressable = progressable;
+    }
+
+    ColumnFamilyRecordWriter(Configuration conf)
+    {
+        super(conf);
+        this.clients = new HashMap<Range, RangeClient>();
+    }
+    
+    @Override
+    public void close() throws IOException
+    {
+        // close all the clients before throwing anything
+        IOException clientException = null;
+        for (RangeClient client : clients.values())
+        {
+            try
+            {
+                client.close();
+            }
+            catch (IOException e)
+            {
+                clientException = e;
+            }
+        }
+        if (clientException != null)
+            throw clientException;
+    }
+    
+    /**
+     * If the key is to be associated with a valid value, a mutation is created
+     * for it with the given column family and columns. In the event the value
+     * in the column is missing (i.e., null), then it is marked for
+     * {@link org.apache.cassandra.thrift.Deletion}. Similarly, if the entire value for a key is missing
+     * (i.e., null), then the entire key is marked for {@link org.apache.cassandra.thrift.Deletion}.
+     * </p>
+     *
+     * @param keybuff
+     *            the key to write.
+     * @param value
+     *            the value to write.
+     * @throws java.io.IOException
+     */
+    @Override
+    public void write(ByteBuffer keybuff, List<Mutation> value) throws IOException
+    {
+        Range<Token> range = ringCache.getRange(keybuff);
+
+        // get the client for the given range, or create a new one
+        RangeClient client = clients.get(range);
+        if (client == null)
+        {
+            // haven't seen keys for this range: create new client
+            client = new RangeClient(ringCache.getEndpoint(range));
+            client.start();
+            clients.put(range, client);
+        }
+
+        for (Mutation amut : value)
+            client.put(Pair.create(keybuff, amut));
+        if (progressable != null)
+            progressable.progress();
+        if (context != null)
+            HadoopCompat.progress(context);
+    }
+
+    /**
+     * A client that runs in a threadpool and connects to the list of endpoints for a particular
+     * range. Mutations for keys in that range are sent to this client via a queue.
+     */
+    public class RangeClient extends AbstractRangeClient<Pair<ByteBuffer, Mutation>>
+    {
+        public final String columnFamily = ConfigHelper.getOutputColumnFamily(conf);
+        
+        /**
+        * Constructs an {@link ColumnFamilyRecordWriter.RangeClient} for the given endpoints.
+        * @param endpoints the possible endpoints to execute the mutations on
+        */
+        public RangeClient(List<InetAddress> endpoints)
+        {
+            super(endpoints);
+         }
+        
+        /**
+         * Loops collecting mutations from the queue and sending to Cassandra
+         */
+        public void run()
+        {
+            outer:
+            while (run || !queue.isEmpty())
+            {
+                Pair<ByteBuffer, Mutation> mutation;
+                try
+                {
+                    mutation = queue.take();
+                }
+                catch (InterruptedException e)
+                {
+                    // re-check loop condition after interrupt
+                    continue;
+                }
+
+                Map<ByteBuffer, Map<String, List<Mutation>>> batch = new HashMap<ByteBuffer, Map<String, List<Mutation>>>();
+                while (mutation != null)
+                {
+                    Map<String, List<Mutation>> subBatch = batch.get(mutation.left);
+                    if (subBatch == null)
+                    {
+                        subBatch = Collections.singletonMap(columnFamily, (List<Mutation>) new ArrayList<Mutation>());
+                        batch.put(mutation.left, subBatch);
+                    }
+
+                    subBatch.get(columnFamily).add(mutation.right);
+                    if (batch.size() >= batchThreshold)
+                        break;
+
+                    mutation = queue.poll();
+                }
+
+                Iterator<InetAddress> iter = endpoints.iterator();
+                while (true)
+                {
+                    // send the mutation to the last-used endpoint.  first time through, this will NPE harmlessly.
+                    try
+                    {
+                        client.batch_mutate(batch, consistencyLevel);
+                        break;
+                    }
+                    catch (Exception e)
+                    {
+                        closeInternal();
+                        if (!iter.hasNext())
+                        {
+                            lastException = new IOException(e);
+                            break outer;
+                        }
+                    }
+
+                    // attempt to connect to a different endpoint
+                    try
+                    {
+                        InetAddress address = iter.next();
+                        String host = address.getHostName();
+                        int port = ConfigHelper.getOutputRpcPort(conf);
+                        client = ColumnFamilyOutputFormat.createAuthenticatedClient(host, port, conf);
+                    }
+                    catch (Exception e)
+                    {
+                        closeInternal();
+                        // TException means something unexpected went wrong to that endpoint, so
+                        // we should try again to another.  Other exceptions (auth or invalid request) are fatal.
+                        if ((!(e instanceof TException)) || !iter.hasNext())
+                        {
+                            lastException = new IOException(e);
+                            break outer;
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tuplejump.calliope.hadoop;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputSplit;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
+public class ColumnFamilySplit extends InputSplit implements Writable, org.apache.hadoop.mapred.InputSplit
+{
+    private String startToken;
+    private String endToken;
+    private long length;
+    private String[] dataNodes;
+
+    @Deprecated
+    public ColumnFamilySplit(String startToken, String endToken, String[] dataNodes)
+    {
+        this(startToken, endToken, Long.MAX_VALUE, dataNodes);
+    }
+
+    public ColumnFamilySplit(String startToken, String endToken, long length, String[] dataNodes)
+    {
+        assert startToken != null;
+        assert endToken != null;
+        this.startToken = startToken;
+        this.endToken = endToken;
+        this.length = length;
+        this.dataNodes = dataNodes;
+    }
+
+    public String getStartToken()
+    {
+        return startToken;
+    }
+
+    public String getEndToken()
+    {
+        return endToken;
+    }
+
+    // getLength and getLocations satisfy the InputSplit abstraction
+
+    public long getLength()
+    {
+        return length;
+    }
+
+    public String[] getLocations()
+    {
+        return dataNodes;
+    }
+
+    // This should only be used by KeyspaceSplit.read();
+    protected ColumnFamilySplit() {}
+
+    // These three methods are for serializing and deserializing
+    // KeyspaceSplits as needed by the Writable interface.
+    public void write(DataOutput out) throws IOException
+    {
+        out.writeUTF(startToken);
+        out.writeUTF(endToken);
+        out.writeInt(dataNodes.length);
+        for (String endpoint : dataNodes)
+        {
+            out.writeUTF(endpoint);
+        }
+    }
+
+    public void readFields(DataInput in) throws IOException
+    {
+        startToken = in.readUTF();
+        endToken = in.readUTF();
+        int numOfEndpoints = in.readInt();
+        dataNodes = new String[numOfEndpoints];
+        for(int i = 0; i < numOfEndpoints; i++)
+        {
+            dataNodes[i] = in.readUTF();
+        }
+    }
+
+    @Override
+    public String toString()
+    {
+        return "ColumnFamilySplit(" +
+               "(" + startToken
+               + ", '" + endToken + ']'
+               + " @" + (dataNodes == null ? null : Arrays.asList(dataNodes)) + ')';
+    }
+
+    public static ColumnFamilySplit read(DataInput in) throws IOException
+    {
+        ColumnFamilySplit w = new ColumnFamilySplit();
+        w.readFields(in);
+        return w;
+    }
+}
@@ -0,0 +1,309 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.tuplejump.calliope.hadoop;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.MapContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.StatusReporter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+
+/*
+ * This is based on ContextFactory.java from hadoop-2.0.x sources.
+ */
+
+/**
+ * Utility methods to allow applications to deal with inconsistencies between
+ * MapReduce Context Objects API between Hadoop 1.x and 2.x.
+ */
+public class HadoopCompat {
+
+    private static final boolean useV21;
+
+    private static final Constructor<?> JOB_CONTEXT_CONSTRUCTOR;
+    private static final Constructor<?> TASK_CONTEXT_CONSTRUCTOR;
+    private static final Constructor<?> MAP_CONTEXT_CONSTRUCTOR;
+    private static final Constructor<?> GENERIC_COUNTER_CONSTRUCTOR;
+
+    private static final Field READER_FIELD;
+    private static final Field WRITER_FIELD;
+
+    private static final Method GET_CONFIGURATION_METHOD;
+    private static final Method SET_STATUS_METHOD;
+    private static final Method GET_COUNTER_METHOD;
+    private static final Method INCREMENT_COUNTER_METHOD;
+    private static final Method GET_TASK_ATTEMPT_ID;
+    private static final Method PROGRESS_METHOD;
+
+    static {
+        boolean v21 = true;
+        final String PACKAGE = "org.apache.hadoop.mapreduce";
+        try {
+            Class.forName(PACKAGE + ".task.JobContextImpl");
+        } catch (ClassNotFoundException cnfe) {
+            v21 = false;
+        }
+        useV21 = v21;
+        Class<?> jobContextCls;
+        Class<?> taskContextCls;
+        Class<?> taskIOContextCls;
+        Class<?> mapContextCls;
+        Class<?> genericCounterCls;
+        try {
+            if (v21) {
+                jobContextCls =
+                        Class.forName(PACKAGE+".task.JobContextImpl");
+                taskContextCls =
+                        Class.forName(PACKAGE+".task.TaskAttemptContextImpl");
+                taskIOContextCls =
+                        Class.forName(PACKAGE+".task.TaskInputOutputContextImpl");
+                mapContextCls = Class.forName(PACKAGE + ".task.MapContextImpl");
+                genericCounterCls = Class.forName(PACKAGE+".counters.GenericCounter");
+            } else {
+                jobContextCls =
+                        Class.forName(PACKAGE+".JobContext");
+                taskContextCls =
+                        Class.forName(PACKAGE+".TaskAttemptContext");
+                taskIOContextCls =
+                        Class.forName(PACKAGE+".TaskInputOutputContext");
+                mapContextCls = Class.forName(PACKAGE + ".MapContext");
+                genericCounterCls =
+                        Class.forName("org.apache.hadoop.mapred.Counters$Counter");
+
+            }
+        } catch (ClassNotFoundException e) {
+            throw new IllegalArgumentException("Can't find class", e);
+        }
+        try {
+            JOB_CONTEXT_CONSTRUCTOR =
+                    jobContextCls.getConstructor(Configuration.class, JobID.class);
+            JOB_CONTEXT_CONSTRUCTOR.setAccessible(true);
+            TASK_CONTEXT_CONSTRUCTOR =
+                    taskContextCls.getConstructor(Configuration.class,
+                            TaskAttemptID.class);
+            TASK_CONTEXT_CONSTRUCTOR.setAccessible(true);
+            GENERIC_COUNTER_CONSTRUCTOR =
+                    genericCounterCls.getDeclaredConstructor(String.class,
+                            String.class,
+                            Long.TYPE);
+            GENERIC_COUNTER_CONSTRUCTOR.setAccessible(true);
+
+            if (useV21) {
+                MAP_CONTEXT_CONSTRUCTOR =
+                        mapContextCls.getDeclaredConstructor(Configuration.class,
+                                TaskAttemptID.class,
+                                RecordReader.class,
+                                RecordWriter.class,
+                                OutputCommitter.class,
+                                StatusReporter.class,
+                                InputSplit.class);
+                Method get_counter;
+                try {
+                    get_counter = Class.forName(PACKAGE + ".TaskAttemptContext").getMethod("getCounter", String.class,
+                            String.class);
+                } catch (Exception e) {
+                    get_counter = Class.forName(PACKAGE + ".TaskInputOutputContext").getMethod("getCounter",
+                            String.class, String.class);
+                }
+                GET_COUNTER_METHOD = get_counter;
+            } else {
+                MAP_CONTEXT_CONSTRUCTOR =
+                        mapContextCls.getConstructor(Configuration.class,
+                                TaskAttemptID.class,
+                                RecordReader.class,
+                                RecordWriter.class,
+                                OutputCommitter.class,
+                                StatusReporter.class,
+                                InputSplit.class);
+                GET_COUNTER_METHOD = Class.forName(PACKAGE+".TaskInputOutputContext")
+                        .getMethod("getCounter", String.class, String.class);
+            }
+            MAP_CONTEXT_CONSTRUCTOR.setAccessible(true);
+            READER_FIELD = mapContextCls.getDeclaredField("reader");
+            READER_FIELD.setAccessible(true);
+            WRITER_FIELD = taskIOContextCls.getDeclaredField("output");
+            WRITER_FIELD.setAccessible(true);
+            GET_CONFIGURATION_METHOD = Class.forName(PACKAGE+".JobContext")
+                    .getMethod("getConfiguration");
+            SET_STATUS_METHOD = Class.forName(PACKAGE+".TaskAttemptContext")
+                    .getMethod("setStatus", String.class);
+            GET_TASK_ATTEMPT_ID = Class.forName(PACKAGE+".TaskAttemptContext")
+                    .getMethod("getTaskAttemptID");
+            INCREMENT_COUNTER_METHOD = Class.forName(PACKAGE+".Counter")
+                    .getMethod("increment", Long.TYPE);
+            PROGRESS_METHOD = Class.forName(PACKAGE+".TaskAttemptContext")
+                    .getMethod("progress");
+
+        } catch (SecurityException e) {
+            throw new IllegalArgumentException("Can't run constructor ", e);
+        } catch (NoSuchMethodException e) {
+            throw new IllegalArgumentException("Can't find constructor ", e);
+        } catch (NoSuchFieldException e) {
+            throw new IllegalArgumentException("Can't find field ", e);
+        } catch (ClassNotFoundException e) {
+            throw new IllegalArgumentException("Can't find class", e);
+        }
+    }
+
+    /**
+     * True if runtime Hadoop version is 2.x, false otherwise.
+     */
+    public static boolean isVersion2x() {
+        return useV21;
+    }
+
+    private static Object newInstance(Constructor<?> constructor, Object...args) {
+        try {
+            return constructor.newInstance(args);
+        } catch (InstantiationException e) {
+            throw new IllegalArgumentException("Can't instantiate " + constructor, e);
+        } catch (IllegalAccessException e) {
+            throw new IllegalArgumentException("Can't instantiate " + constructor, e);
+        } catch (InvocationTargetException e) {
+            throw new IllegalArgumentException("Can't instantiate " + constructor, e);
+        }
+    }
+
+    /**
+     * Creates JobContext from a JobConf and jobId using the correct constructor
+     * for based on Hadoop version. <code>jobId</code> could be null.
+     */
+    public static JobContext newJobContext(Configuration conf, JobID jobId) {
+        return (JobContext) newInstance(JOB_CONTEXT_CONSTRUCTOR, conf, jobId);
+    }
+
+    /**
+     * Creates TaskAttempContext from a JobConf and jobId using the correct
+     * constructor for based on Hadoop version.
+     */
+    public static TaskAttemptContext newTaskAttemptContext(
+            Configuration conf, TaskAttemptID taskAttemptId) {
+        return (TaskAttemptContext)
+                newInstance(TASK_CONTEXT_CONSTRUCTOR, conf, taskAttemptId);
+    }
+
+    /**
+     * Instantiates MapContext under Hadoop 1 and MapContextImpl under Hadoop 2.
+     */
+    public static MapContext newMapContext(Configuration conf,
+                                           TaskAttemptID taskAttemptID,
+                                           RecordReader recordReader,
+                                           RecordWriter recordWriter,
+                                           OutputCommitter outputCommitter,
+                                           StatusReporter statusReporter,
+                                           InputSplit inputSplit) {
+        return (MapContext) newInstance(MAP_CONTEXT_CONSTRUCTOR,
+                conf, taskAttemptID, recordReader, recordWriter, outputCommitter,
+                statusReporter, inputSplit);
+    }
+
+    /**
+     * @return with Hadoop 2 : <code>new GenericCounter(args)</code>,<br>
+     *         with Hadoop 1 : <code>new Counter(args)</code>
+     */
+    public static Counter newGenericCounter(String name, String displayName, long value) {
+        try {
+            return (Counter)
+                    GENERIC_COUNTER_CONSTRUCTOR.newInstance(name, displayName, value);
+        } catch (InstantiationException e) {
+            throw new IllegalArgumentException("Can't instantiate Counter", e);
+        } catch (IllegalAccessException e) {
+            throw new IllegalArgumentException("Can't instantiate Counter", e);
+        } catch (InvocationTargetException e) {
+            throw new IllegalArgumentException("Can't instantiate Counter", e);
+        }
+    }
+
+    /**
+     * Invokes a method and rethrows any exception as runtime excetpions.
+     */
+    private static Object invoke(Method method, Object obj, Object... args) {
+        try {
+            return method.invoke(obj, args);
+        } catch (IllegalAccessException e) {
+            throw new IllegalArgumentException("Can't invoke method " + method.getName(), e);
+        } catch (InvocationTargetException e) {
+            throw new IllegalArgumentException("Can't invoke method " + method.getName(), e);
+        }
+    }
+
+    /**
+     * Invoke getConfiguration() on JobContext. Works with both
+     * Hadoop 1 and 2.
+     */
+    public static Configuration getConfiguration(JobContext context) {
+        return (Configuration) invoke(GET_CONFIGURATION_METHOD, context);
+    }
+
+    /**
+     * Invoke setStatus() on TaskAttemptContext. Works with both
+     * Hadoop 1 and 2.
+     */
+    public static void setStatus(TaskAttemptContext context, String status) {
+        invoke(SET_STATUS_METHOD, context, status);
+    }
+
+    /**
+     * returns TaskAttemptContext.getTaskAttemptID(). Works with both
+     * Hadoop 1 and 2.
+     */
+    public static TaskAttemptID getTaskAttemptID(TaskAttemptContext taskContext) {
+        return (TaskAttemptID) invoke(GET_TASK_ATTEMPT_ID, taskContext);
+    }
+
+    /**
+     * Invoke getCounter() on TaskInputOutputContext. Works with both
+     * Hadoop 1 and 2.
+     */
+    public static Counter getCounter(TaskInputOutputContext context,
+                                     String groupName, String counterName) {
+        return (Counter) invoke(GET_COUNTER_METHOD, context, groupName, counterName);
+    }
+
+    /**
+     * Invoke TaskAttemptContext.progress(). Works with both
+     * Hadoop 1 and 2.
+     */
+    public static void progress(TaskAttemptContext context) {
+        invoke(PROGRESS_METHOD, context);
+    }
+
+    /**
+     * Increment the counter. Works with both Hadoop 1 and 2
+     */
+    public static void incrementCounter(Counter counter, long increment) {
+        // incrementing a count might be called often. Might be affected by
+        // cost of invoke(). might be good candidate to handle in a shim.
+        // (TODO Raghu) figure out how achieve such a build with maven
+        invoke(INCREMENT_COUNTER_METHOD, counter, increment);
+    }
+}
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tuplejump.calliope.hadoop;
+
+import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapreduce.StatusReporter;
+
+/**
+ * A reporter that works with both mapred and mapreduce APIs.
+ */
+public class ReporterWrapper extends StatusReporter implements Reporter {
+    private Reporter wrappedReporter;
+
+    public ReporterWrapper(Reporter reporter) {
+        wrappedReporter = reporter;
+    }
+
+    @Override
+    public Counters.Counter getCounter(Enum<?> anEnum) {
+        return wrappedReporter.getCounter(anEnum);
+    }
+
+    @Override
+    public Counters.Counter getCounter(String s, String s1) {
+        return wrappedReporter.getCounter(s, s1);
+    }
+
+    @Override
+    public void incrCounter(Enum<?> anEnum, long l) {
+        wrappedReporter.incrCounter(anEnum, l);
+    }
+
+    @Override
+    public void incrCounter(String s, String s1, long l) {
+        wrappedReporter.incrCounter(s, s1, l);
+    }
+
+    @Override
+    public InputSplit getInputSplit() throws UnsupportedOperationException {
+        return wrappedReporter.getInputSplit();
+    }
+
+    @Override
+    public void progress() {
+        wrappedReporter.progress();
+    }
+
+    // @Override
+    public float getProgress() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void setStatus(String s) {
+        wrappedReporter.setStatus(s);
+    }
+}
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tuplejump.calliope.hadoop.cql3;
+
+import com.datastax.driver.core.Row;
+import com.tuplejump.calliope.hadoop.AbstractColumnFamilyInputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+
+import java.io.IOException;
+
+/**
+ * Hadoop InputFormat allowing map/reduce against Cassandra rows within one ColumnFamily.
+ * <p/>
+ * At minimum, you need to set the KS and CF in your Hadoop job Configuration.
+ * The ConfigHelper class is provided to make this
+ * simple:
+ * ConfigHelper.setInputColumnFamily
+ * <p/>
+ * You can also configure the number of rows per InputSplit with
+ * ConfigHelper.setInputSplitSize. The default split size is 64k rows.
+ * <p/>
+ * the number of CQL rows per page
+ * CQLConfigHelper.setInputCQLPageRowSize. The default page row size is 1000. You
+ * should set it to "as big as possible, but no bigger." It set the LIMIT for the CQL
+ * query, so you need set it big enough to minimize the network overhead, and also
+ * not too big to avoid out of memory issue.
+ * <p/>
+ * other native protocol connection parameters in CqlConfigHelper
+ */
+public class CqlInputFormat extends AbstractColumnFamilyInputFormat<Long, Row> {
+    public RecordReader<Long, Row> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter)
+            throws IOException {
+        TaskAttemptContext tac = new TaskAttemptContext(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) {
+            @Override
+            public void progress() {
+                reporter.progress();
+            }
+        };
+
+        CqlRecordReader recordReader = new CqlRecordReader();
+        recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
+        return recordReader;
+    }
+
+    @Override
+    public org.apache.hadoop.mapreduce.RecordReader<Long, Row> createRecordReader(
+            org.apache.hadoop.mapreduce.InputSplit arg0, TaskAttemptContext arg1) throws IOException,
+            InterruptedException {
+        return new CqlRecordReader();
+    }
+
+}
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tuplejump.calliope.hadoop.cql3;
+
+
+import com.tuplejump.calliope.hadoop.AbstractColumnFamilyOutputFormat;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * The <code>ColumnFamilyOutputFormat</code> acts as a Hadoop-specific
+ * OutputFormat that allows reduce tasks to store keys (and corresponding
+ * binded variable values) as CQL rows (and respective columns) in a given
+ * ColumnFamily.
+ * <p/>
+ * <p>
+ * As is the case with the {@link com.tuplejump.calliope.hadoop.ColumnFamilyInputFormat},
+ * you need to set the prepared statement in your
+ * Hadoop job Configuration. The {@link CqlConfigHelper} class, through its
+ * {@link com.tuplejump.calliope.hadoop.ConfigHelper} method, is provided to make this
+ * simple.
+ * you need to set the Keyspace. The {@link com.tuplejump.calliope.hadoop.ConfigHelper} class, through its
+ * {@link com.tuplejump.calliope.hadoop.ConfigHelper#setOutputColumnFamily} method, is provided to make this
+ * simple.
+ * </p>
+ * <p/>
+ * <p>
+ * For the sake of performance, this class employs a lazy write-back caching
+ * mechanism, where its record writer prepared statement binded variable values
+ * created based on the reduce's inputs (in a task-specific map), and periodically
+ * makes the changes official by sending a execution of prepared statement request
+ * to Cassandra.
+ * </p>
+ */
+public class CqlOutputFormat extends AbstractColumnFamilyOutputFormat<Map<String, ByteBuffer>, List<ByteBuffer>> {
+    /**
+     * Fills the deprecated OutputFormat interface for streaming.
+     */
+    @Deprecated
+    public CqlRecordWriter getRecordWriter(org.apache.hadoop.fs.FileSystem filesystem, org.apache.hadoop.mapred.JobConf job, String name, org.apache.hadoop.util.Progressable progress) throws IOException {
+        return new CqlRecordWriter(job, progress);
+    }
+
+    /**
+     * Get the {@link org.apache.hadoop.mapreduce.RecordWriter} for the given task.
+     *
+     * @param context the information about the current task.
+     * @return a {@link org.apache.hadoop.mapreduce.RecordWriter} to write the output for the job.
+     * @throws java.io.IOException
+     */
+    public CqlRecordWriter getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException {
+        return new CqlRecordWriter(context);
+    }
+}
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tuplejump.calliope.hadoop.cql3;
+
+import com.tuplejump.calliope.hadoop.AbstractColumnFamilyInputFormat;
+import com.tuplejump.calliope.hadoop.HadoopCompat;
+import com.tuplejump.calliope.hadoop.ReporterWrapper;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Map;
+
+/**
+ * Hadoop InputFormat allowing map/reduce against Cassandra rows within one ColumnFamily.
+ * <p/>
+ * At minimum, you need to set the KS and CF in your Hadoop job Configuration.
+ * The ConfigHelper class is provided to make this
+ * simple:
+ * ConfigHelper.setInputColumnFamily
+ * <p/>
+ * You can also configure the number of rows per InputSplit with
+ * ConfigHelper.setInputSplitSize. The default split size is 64k rows.
+ * the number of CQL rows per page
+ * <p/>
+ * the number of CQL rows per page
+ * CQLConfigHelper.setInputCQLPageRowSize. The default page row size is 1000. You
+ * should set it to "as big as possible, but no bigger." It set the LIMIT for the CQL
+ * query, so you need set it big enough to minimize the network overhead, and also
+ * not too big to avoid out of memory issue.
+ * <p/>
+ * the column names of the select CQL query. The default is all columns
+ * CQLConfigHelper.setInputColumns
+ * <p/>
+ * the user defined the where clause
+ * CQLConfigHelper.setInputWhereClauses. The default is no user defined where clause
+ */
+public class CqlPagingInputFormat extends AbstractColumnFamilyInputFormat<Map<String, ByteBuffer>, Map<String, ByteBuffer>> {
+    public RecordReader<Map<String, ByteBuffer>, Map<String, ByteBuffer>> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter)
+            throws IOException {
+        TaskAttemptContext tac = HadoopCompat.newMapContext(
+                jobConf,
+                TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID)),
+                null,
+                null,
+                null,
+                new ReporterWrapper(reporter),
+                null);
+
+        CqlPagingRecordReader recordReader = new CqlPagingRecordReader();
+        recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
+        return recordReader;
+    }
+
+    @Override
+    public org.apache.hadoop.mapreduce.RecordReader<Map<String, ByteBuffer>, Map<String, ByteBuffer>> createRecordReader(
+            org.apache.hadoop.mapreduce.InputSplit arg0, TaskAttemptContext arg1) throws IOException,
+            InterruptedException {
+        return new CqlPagingRecordReader();
+    }
+
+}
@@ -0,0 +1,396 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tuplejump.calliope.hadoop.cql3;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.nio.ByteBuffer;
+import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+
+import com.tuplejump.calliope.hadoop.AbstractColumnFamilyRecordWriter;
+import com.tuplejump.calliope.hadoop.ConfigHelper;
+import com.tuplejump.calliope.hadoop.HadoopCompat;
+import org.apache.hadoop.util.Progressable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.db.marshal.TypeParser;
+import org.apache.cassandra.dht.Range;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.exceptions.ConfigurationException;
+import org.apache.cassandra.exceptions.SyntaxException;
+import org.apache.cassandra.thrift.*;
+import org.apache.cassandra.utils.ByteBufferUtil;
+import org.apache.cassandra.utils.FBUtilities;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.thrift.TException;
+import org.apache.thrift.transport.TTransport;
+
+/**
+ * The <code>ColumnFamilyRecordWriter</code> maps the output &lt;key, value&gt;
+ * pairs to a Cassandra column family. In particular, it applies the binded variables
+ * in the value to the prepared statement, which it associates with the key, and in 
+ * turn the responsible endpoint.
+ *
+ * <p>
+ * Furthermore, this writer groups the cql queries by the endpoint responsible for
+ * the rows being affected. This allows the cql queries to be executed in parallel,
+ * directly to a responsible endpoint.
+ * </p>
+ *
+ * @see  CqlOutputFormat
+ */
+final class CqlRecordWriter extends AbstractColumnFamilyRecordWriter<Map<String, ByteBuffer>, List<ByteBuffer>>
+{
+    private static final Logger logger = LoggerFactory.getLogger(CqlRecordWriter.class);
+
+    // handles for clients for each range running in the threadpool
+    private final Map<InetAddress, RangeClient> clients;
+
+    // host to prepared statement id mappings
+    private ConcurrentHashMap<Cassandra.Client, Integer> preparedStatements = new ConcurrentHashMap<Cassandra.Client, Integer>();
+
+    private final String cql;
+
+    private AbstractType<?> keyValidator;
+    private String [] partitionKeyColumns;
+    private List<String> clusterColumns;
+
+    /**
+     * Upon construction, obtain the map that this writer will use to collect
+     * mutations, and the ring cache for the given keyspace.
+     *
+     * @param context the task attempt context
+     * @throws java.io.IOException
+     */
+    CqlRecordWriter(TaskAttemptContext context) throws IOException
+    {
+        this(HadoopCompat.getConfiguration(context));
+        this.context = context;
+    }
+
+    CqlRecordWriter(Configuration conf, Progressable progressable) throws IOException
+    {
+        this(conf);
+        this.progressable = progressable;
+    }
+
+    CqlRecordWriter(Configuration conf)
+    {
+        super(conf);
+        this.clients = new HashMap<>();
+
+        try
+        {
+            Cassandra.Client client = ConfigHelper.getClientFromOutputAddressList(conf);
+            retrievePartitionKeyValidator(client);
+            String cqlQuery =  CqlConfigHelper.getOutputCql(conf).trim();
+            if (cqlQuery.toLowerCase().startsWith("insert"))
+                throw new UnsupportedOperationException("INSERT with CqlRecordWriter is not supported, please use UPDATE/DELETE statement");
+            cql = appendKeyWhereClauses(cqlQuery);
+
+            if (client != null)
+            {
+                TTransport transport = client.getOutputProtocol().getTransport();
+                if (transport.isOpen())
+                    transport.close();
+            }
+        }
+        catch (Exception e)
+        {
+            throw new RuntimeException(e);
+        }
+    }
+    
+    @Override
+    public void close() throws IOException
+    {
+        // close all the clients before throwing anything
+        IOException clientException = null;
+        for (RangeClient client : clients.values())
+        {
+            try
+            {
+                client.close();
+            }
+            catch (IOException e)
+            {
+                clientException = e;
+            }
+        }
+
+        if (clientException != null)
+            throw clientException;
+    }
+    
+    /**
+     * If the key is to be associated with a valid value, a mutation is created
+     * for it with the given column family and columns. In the event the value
+     * in the column is missing (i.e., null), then it is marked for
+     * {@link org.apache.cassandra.thrift.Deletion}. Similarly, if the entire value for a key is missing
+     * (i.e., null), then the entire key is marked for {@link org.apache.cassandra.thrift.Deletion}.
+     * </p>
+     *
+     * @param keyColumns
+     *            the key to write.
+     * @param values
+     *            the values to write.
+     * @throws java.io.IOException
+     */
+    @Override
+    public void write(Map<String, ByteBuffer> keyColumns, List<ByteBuffer> values) throws IOException
+    {
+        Range<Token> range = ringCache.getRange(getPartitionKey(keyColumns));
+
+        // get the client for the given range, or create a new one
+	final InetAddress address = ringCache.getEndpoint(range).get(0);
+        RangeClient client = clients.get(address);
+        if (client == null)
+        {
+            // haven't seen keys for this range: create new client
+            client = new RangeClient(ringCache.getEndpoint(range));
+            client.start();
+            clients.put(address, client);
+        }
+
+        // add primary key columns to the bind variables
+        List<ByteBuffer> allValues = new ArrayList<ByteBuffer>(values);
+        for (String column : partitionKeyColumns)
+            allValues.add(keyColumns.get(column));
+        for (String column : clusterColumns)
+            allValues.add(keyColumns.get(column));
+
+        client.put(allValues);
+
+        if (progressable != null)
+            progressable.progress();
+        if (context != null)
+            HadoopCompat.progress(context);
+    }
+
+    /**
+     * A client that runs in a threadpool and connects to the list of endpoints for a particular
+     * range. Bound variables for keys in that range are sent to this client via a queue.
+     */
+    public class RangeClient extends AbstractRangeClient<List<ByteBuffer>>
+    {
+        /**
+         * Constructs an {@link  CqlRecordWriter.RangeClient} for the given endpoints.
+         * @param endpoints the possible endpoints to execute the mutations on
+         */
+        public RangeClient(List<InetAddress> endpoints)
+        {
+            super(endpoints);
+         }
+        
+        /**
+         * Loops collecting cql binded variable values from the queue and sending to Cassandra
+         */
+        public void run()
+        {
+            outer:
+            while (run || !queue.isEmpty())
+            {
+                List<ByteBuffer> bindVariables;
+                try
+                {
+                    bindVariables = queue.take();
+                }
+                catch (InterruptedException e)
+                {
+                    // re-check loop condition after interrupt
+                    continue;
+                }
+
+                Iterator<InetAddress> iter = endpoints.iterator();
+                while (true)
+                {
+                    // send the mutation to the last-used endpoint.  first time through, this will NPE harmlessly.
+                    try
+                    {
+                        int i = 0;
+                        int itemId = preparedStatement(client);
+                        while (bindVariables != null)
+                        {
+                            client.execute_prepared_cql3_query(itemId, bindVariables, ConsistencyLevel.ONE);
+                            i++;
+                            
+                            if (i >= batchThreshold)
+                                break;
+                            
+                            bindVariables = queue.poll();
+                        }
+                        
+                        break;
+                    }
+                    catch (Exception e)
+                    {
+                        closeInternal();
+                        if (!iter.hasNext())
+                        {
+                            lastException = new IOException(e);
+                            break outer;
+                        }
+                    }
+
+                    // attempt to connect to a different endpoint
+                    try
+                    {
+                        InetAddress address = iter.next();
+                        String host = address.getHostName();
+                        int port = ConfigHelper.getOutputRpcPort(conf);
+                        client = CqlOutputFormat.createAuthenticatedClient(host, port, conf);
+                    }
+                    catch (Exception e)
+                    {
+                        closeInternal();
+                        // TException means something unexpected went wrong to that endpoint, so
+                        // we should try again to another.  Other exceptions (auth or invalid request) are fatal.
+                        if ((!(e instanceof TException)) || !iter.hasNext())
+                        {
+                            lastException = new IOException(e);
+                            break outer;
+                        }
+                    }
+                }
+            }
+        }
+
+        /** get prepared statement id from cache, otherwise prepare it from Cassandra server*/
+        private int preparedStatement(Cassandra.Client client)
+        {
+            Integer itemId = preparedStatements.get(client);
+            if (itemId == null)
+            {
+                CqlPreparedResult result;
+                try
+                {
+                    result = client.prepare_cql3_query(ByteBufferUtil.bytes(cql), Compression.NONE);
+                }
+                catch (InvalidRequestException e)
+                {
+                    throw new RuntimeException("failed to prepare cql query " + cql, e);
+                }
+                catch (TException e)
+                {
+                    throw new RuntimeException("failed to prepare cql query " + cql, e);
+                }
+
+                Integer previousId = preparedStatements.putIfAbsent(client, Integer.valueOf(result.itemId));
+                itemId = previousId == null ? result.itemId : previousId;
+            }
+            return itemId;
+        }
+    }
+
+    private ByteBuffer getPartitionKey(Map<String, ByteBuffer> keyColumns)
+    {
+        ByteBuffer partitionKey;
+        if (keyValidator instanceof CompositeType)
+        {
+            ByteBuffer[] keys = new ByteBuffer[partitionKeyColumns.length];
+            for (int i = 0; i< keys.length; i++)
+                keys[i] = keyColumns.get(partitionKeyColumns[i]);
+
+            partitionKey = CompositeType.build(keys);
+        }
+        else
+        {
+            partitionKey = keyColumns.get(partitionKeyColumns[0]);
+        }
+        return partitionKey;
+    }
+
+    /** retrieve the key validator from system.schema_columnfamilies table */
+    private void retrievePartitionKeyValidator(Cassandra.Client client) throws Exception
+    {
+        String keyspace = ConfigHelper.getOutputKeyspace(conf);
+        String cfName = ConfigHelper.getOutputColumnFamily(conf);
+        String query = "SELECT key_validator," +
+        		       "       key_aliases," +
+        		       "       column_aliases " +
+                       "FROM system.schema_columnfamilies " +
+                       "WHERE keyspace_name='%s' and columnfamily_name='%s'";
+        String formatted = String.format(query, keyspace, cfName);
+        CqlResult result = client.execute_cql3_query(ByteBufferUtil.bytes(formatted), Compression.NONE, ConsistencyLevel.ONE);
+
+        Column rawKeyValidator = result.rows.get(0).columns.get(0);
+        String validator = ByteBufferUtil.string(ByteBuffer.wrap(rawKeyValidator.getValue()));
+        keyValidator = parseType(validator);
+        
+        Column rawPartitionKeys = result.rows.get(0).columns.get(1);
+        String keyString = ByteBufferUtil.string(ByteBuffer.wrap(rawPartitionKeys.getValue()));
+        logger.debug("partition keys: " + keyString);
+
+        List<String> keys = FBUtilities.fromJsonList(keyString);
+        partitionKeyColumns = new String[keys.size()];
+        int i = 0;
+        for (String key : keys)
+        {
+            partitionKeyColumns[i] = key;
+            i++;
+        }
+
+        Column rawClusterColumns = result.rows.get(0).columns.get(2);
+        String clusterColumnString = ByteBufferUtil.string(ByteBuffer.wrap(rawClusterColumns.getValue()));
+
+        logger.debug("cluster columns: " + clusterColumnString);
+        clusterColumns = FBUtilities.fromJsonList(clusterColumnString);
+    }
+
+    private AbstractType<?> parseType(String type) throws ConfigurationException
+    {
+        try
+        {
+            // always treat counters like longs, specifically CCT.serialize is not what we need
+            if (type != null && type.equals("org.apache.cassandra.db.marshal.CounterColumnType"))
+                return LongType.instance;
+            return TypeParser.parse(type);
+        }
+        catch (SyntaxException e)
+        {
+            throw new ConfigurationException(e.getMessage(), e);
+        }
+    }
+
+    /**
+     * add where clauses for partition keys and cluster columns
+     */
+    private String appendKeyWhereClauses(String cqlQuery)
+    {
+        String keyWhereClause = "";
+
+        for (String partitionKey : partitionKeyColumns)
+            keyWhereClause += String.format("%s = ?", keyWhereClause.isEmpty() ? quote(partitionKey) : (" AND " + quote(partitionKey)));
+        for (String clusterColumn : clusterColumns)
+            keyWhereClause += " AND " + quote(clusterColumn) + " = ?";
+
+        return cqlQuery + " WHERE " + keyWhereClause;
+    }
+
+    /** Quoting for working with uppercase */
+    private String quote(String identifier)
+    {
+        return "\"" + identifier.replaceAll("\"", "\"\"") + "\"";
+    }
+}
@@ -19,15 +19,15 @@
 
 package com.tuplejump.calliope
 
-import org.apache.cassandra.hadoop.ConfigHelper
+import com.tuplejump.calliope.hadoop.ConfigHelper
 import org.apache.cassandra.thrift.{SliceRange, SlicePredicate}
 import org.apache.hadoop.mapreduce.Job
 import org.apache.cassandra.utils.ByteBufferUtil
 
 import scala.collection.JavaConversions._
 import com.tuplejump.calliope.queries.FinalQuery
 import org.apache.hadoop.conf.Configuration
-import org.apache.cassandra.hadoop.cql3.CqlConfigHelper
+import com.tuplejump.calliope.hadoop.cql3.CqlConfigHelper
 
 trait CasBuilder extends Serializable {
   def configuration: Configuration
 
@@ -22,12 +22,12 @@ package com.tuplejump.calliope
 
 import org.apache.spark.Logging
 import org.apache.cassandra.thrift.{Column, Mutation, ColumnOrSuperColumn}
-import org.apache.cassandra.hadoop.ColumnFamilyOutputFormat
+import com.tuplejump.calliope.hadoop.ColumnFamilyOutputFormat
 
 import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 
-import org.apache.cassandra.hadoop.cql3.CqlOutputFormat
+import com.tuplejump.calliope.hadoop.cql3.CqlOutputFormat
 
 import scala.collection.JavaConversions._
 
 
@@ -23,7 +23,7 @@ import org.apache.hadoop.mapreduce.{TaskAttemptID, JobID, InputSplit}
 import scala.collection.JavaConversions._
 import java.text.SimpleDateFormat
 import java.util.Date
-import org.apache.cassandra.hadoop.cql3.CqlPagingInputFormat
+import com.tuplejump.calliope.hadoop.cql3.CqlPagingInputFormat
 import com.tuplejump.calliope.CasBuilder
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
 
@@ -19,7 +19,7 @@
 
 package com.tuplejump.calliope.thrift
 
-import org.apache.cassandra.hadoop.ColumnFamilyInputFormat
+import com.tuplejump.calliope.hadoop.ColumnFamilyInputFormat
 import scala.collection.JavaConversions._
 import java.text.SimpleDateFormat
 import java.util.Date
 
@@ -24,10 +24,10 @@ import org.apache.cassandra.utils.ByteBufferUtil
 import scala.language.implicitConversions
 import java.util.Date
 import java.util.UUID
-import org.apache.cassandra.cql.jdbc.JdbcDecimal
 import java.net.InetAddress
 import com.datastax.driver.core.DataType
 import java.math.BigInteger
+import scala.collection.JavaConverters._
 
 
 object RichByteBuffer {
@@ -53,7 +53,7 @@ object RichByteBuffer {
 
   implicit def ByteBuffer2ByteArray(buffer: ByteBuffer): Array[Byte] = ByteBufferUtil.getArray(buffer)
 
-  implicit def ByteBuffer2BigDecimal(buffer: ByteBuffer): BigDecimal = JdbcDecimal.instance.compose(buffer)
+  implicit def ByteBuffer2BigDecimal(buffer: ByteBuffer): BigDecimal = DataType.decimal().deserialize(buffer).asInstanceOf[BigDecimal]
 
   implicit def ByteBuffer2BigInteger(buffer: ByteBuffer): BigInteger = new BigInteger(ByteBufferUtil.getArray(buffer))
 
@@ -111,7 +111,7 @@ object RichByteBuffer {
 
   implicit def ByteArray2ByteBuffer(bytes: Array[Byte]): ByteBuffer = ByteBuffer.wrap(bytes)
 
-  implicit def BigDecimal2ByteBuffer(bigDec: BigDecimal): ByteBuffer = JdbcDecimal.instance.decompose(bigDec.bigDecimal)
+  implicit def BigDecimal2ByteBuffer(bigDec: BigDecimal): ByteBuffer = DataType.decimal().serialize(bigDec)
 
   implicit def BigInteger2ByteBuffer(bigInt: BigInteger): ByteBuffer = bigInt.toByteArray
 
@@ -130,5 +130,12 @@ object RichByteBuffer {
   implicit def MapSS2MapBB(m: Map[String, String]) = m.map {
     case (k, v) => new Tuple2[ByteBuffer, ByteBuffer](k, v)
   }.toMap
+
+  /* Conversions for Collections */
+
+  implicit def MapSS2ByteBuffer(map: Map[String, String]): ByteBuffer = DataType.map(DataType.text(), DataType.text()).serialize(map.asJava)
+
+  implicit def ByteBuffer2MapSS(buffer: ByteBuffer): Map[String, String] = DataType.map(DataType.text(), DataType.text()).deserialize(buffer).asInstanceOf[java.util.Map[String, String]].asScala.toMap
+
 }