001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.mapred.nativetask; 019 020import java.io.IOException; 021import java.util.HashSet; 022import java.util.Set; 023 024import org.apache.hadoop.classification.InterfaceAudience; 025import org.apache.hadoop.classification.InterfaceStability; 026import org.apache.hadoop.mapred.JobConf; 027import org.apache.hadoop.mapred.nativetask.serde.INativeSerializer; 028import org.apache.hadoop.mapred.nativetask.serde.NativeSerialization; 029 030/** 031 * Base class for platforms. A platform is a framework running on top of 032 * MapReduce, like Hadoop, Hive, Pig, Mahout. Each framework defines its 033 * own key type and value type across a MapReduce job. For each platform, 034 * we should implement serializers such that we could communicate data with 035 * native side and native comparators so our native output collectors could 036 * sort them and write out. We've already provided the {@link HadoopPlatform} 037 * that supports all key types of Hadoop and users could implement their custom 038 * platform. 039 */ 040@InterfaceAudience.Public 041@InterfaceStability.Evolving 042public abstract class Platform { 043 private final NativeSerialization serialization; 044 protected Set<String> keyClassNames = new HashSet<String>(); 045 046 public Platform() { 047 this.serialization = NativeSerialization.getInstance(); 048 } 049 050 /** 051 * initialize a platform, where we should call registerKey 052 */ 053 public abstract void init() throws IOException; 054 055 /** 056 * @return name of a Platform, useful for logs and debug 057 */ 058 public abstract String name(); 059 060 061 /** 062 * associate a key class with its serializer and platform 063 * 064 * @param keyClassName map out key class name 065 * @param key key serializer class 066 */ 067 protected void registerKey(String keyClassName, Class<?> key) throws IOException { 068 serialization.register(keyClassName, key); 069 keyClassNames.add(keyClassName); 070 } 071 072 /** 073 * whether a platform supports a specific key should at least satisfy two conditions 074 * 075 * 1. the key belongs to the platform 076 * 2. the associated serializer must implement {@link INativeComparable} interface 077 * 078 * 079 * @param keyClassName map out put key class name 080 * @param serializer serializer associated with key via registerKey 081 * @param job job configuration 082 * @return true if the platform has implemented native comparators of the key and 083 * false otherwise 084 */ 085 protected abstract boolean support(String keyClassName, 086 INativeSerializer<?> serializer, JobConf job); 087 088 089 /** 090 * whether it's the platform that has defined a custom Java comparator 091 * 092 * NativeTask doesn't support custom Java comparators 093 * (set with mapreduce.job.output.key.comparator.class) 094 * but a platform (e.g Pig) could also set that conf and implement native 095 * comparators so we shouldn't bail out. 096 * 097 * @param keyComparator comparator set with mapreduce.job.output.key.comparator.class 098 */ 099 protected abstract boolean define(Class<?> keyComparator); 100}