Source code

001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.mapred.nativetask;
019
020import java.io.IOException;
021import java.util.HashSet;
022import java.util.Set;
023
024import org.apache.hadoop.classification.InterfaceAudience;
025import org.apache.hadoop.classification.InterfaceStability;
026import org.apache.hadoop.mapred.JobConf;
027import org.apache.hadoop.mapred.nativetask.serde.INativeSerializer;
028import org.apache.hadoop.mapred.nativetask.serde.NativeSerialization;
029
030/**
031 * Base class for platforms. A platform is a framework running on top of
032 * MapReduce, like Hadoop, Hive, Pig, Mahout. Each framework defines its
033 * own key type and value type across a MapReduce job. For each platform,
034 * we should implement serializers such that we could communicate data with
035 * native side and native comparators so our native output collectors could
036 * sort them and write out. We've already provided the {@link HadoopPlatform}
037 * that supports all key types of Hadoop and users could implement their custom
038 * platform.
039 */
040@InterfaceAudience.Public
041@InterfaceStability.Evolving
042public abstract class Platform {
043  private final NativeSerialization serialization;
044  protected Set<String> keyClassNames = new HashSet<String>();
045
046  public Platform() {
047    this.serialization = NativeSerialization.getInstance();
048  }
049
050  /**
051   * initialize a platform, where we should call registerKey
052   */
053  public abstract void init() throws IOException;
054
055  /**
056   * @return name of a Platform, useful for logs and debug
057   */
058  public abstract String name();
059
060
061  /**
062   * associate a key class with its serializer and platform
063   *
064   * @param keyClassName map out key class name
065   * @param key          key serializer class
066   */
067  protected void registerKey(String keyClassName, Class<?> key) throws IOException {
068    serialization.register(keyClassName, key);
069    keyClassNames.add(keyClassName);
070  }
071
072  /**
073   * whether a platform supports a specific key should at least satisfy two conditions
074   *
075   * 1. the key belongs to the platform
076   * 2. the associated serializer must implement {@link INativeComparable} interface
077   *
078   *
079   * @param keyClassName map out put key class name
080   * @param serializer   serializer associated with key via registerKey
081   * @param job          job configuration
082   * @return             true if the platform has implemented native comparators of the key and
083   *                     false otherwise
084   */
085  protected abstract boolean support(String keyClassName,
086      INativeSerializer<?> serializer, JobConf job);
087
088
089  /**
090   * whether it's the platform that has defined a custom Java comparator
091   *
092   * NativeTask doesn't support custom Java comparators
093   * (set with mapreduce.job.output.key.comparator.class)
094   * but a platform (e.g Pig) could also set that conf and implement native
095   * comparators so we shouldn't bail out.
096   *
097   * @param keyComparator comparator set with mapreduce.job.output.key.comparator.class
098   */
099  protected abstract boolean define(Class<?> keyComparator);
100}