001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.mapreduce.lib.partition; 020 021import org.apache.hadoop.classification.InterfaceAudience; 022import org.apache.hadoop.classification.InterfaceStability; 023import org.apache.hadoop.mapreduce.Partitioner; 024 025/** 026 * This partitioner rehashes values returned by {@link Object#hashCode()} 027 * to get smoother distribution between partitions which may improve 028 * reduce reduce time in some cases and should harm things in no cases. 029 * This partitioner is suggested with Integer and Long keys with simple 030 * patterns in their distributions. 031 * @since 2.0.3 032 */ 033@InterfaceAudience.Public 034@InterfaceStability.Stable 035public class RehashPartitioner<K, V> extends Partitioner<K, V> { 036 037 /** prime number seed for increasing hash quality */ 038 private static final int SEED = 1591267453; 039 040 /** Rehash {@link Object#hashCode()} to partition. */ 041 public int getPartition(K key, V value, int numReduceTasks) { 042 int h = SEED ^ key.hashCode(); 043 h ^= (h >>> 20) ^ (h >>> 12); 044 h = h ^ (h >>> 7) ^ (h >>> 4); 045 046 return (h & Integer.MAX_VALUE) % numReduceTasks; 047 } 048}