OutputFormat在java中怎么实现自定义-创新互联

本篇文章给大家分享的是有关OutputFormat在java 中怎么实现自定义,小编觉得挺实用的,因此分享给大家学习,希望大家阅读完这篇文章后可以有所收获,话不多说,跟着小编一起来看看吧。

成都创新互联公司自成立以来,一直致力于为企业提供从网站策划、网站设计、成都网站建设、网站制作、电子商务、网站推广、网站优化到为企业提供个性化软件开发等基于互联网的全面整合营销服务。公司拥有丰富的网站建设和互联网应用系统开发管理经验、成熟的应用系统解决方案、优秀的网站开发工程师团队及专业的网站设计师团队。

java 中 自定义OutputFormat

实例代码:

package com.ccse.hadoop.outputformat; 
 
import java.io.IOException; 
import java.net.URI; 
import java.net.URISyntaxException; 
import java.util.StringTokenizer; 
 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.FSDataOutputStream; 
import org.apache.hadoop.fs.FileSystem; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.JobContext; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.OutputCommitter; 
import org.apache.hadoop.mapreduce.OutputFormat; 
import org.apache.hadoop.mapreduce.RecordWriter; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.TaskAttemptContext; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; 
 
 
public class MySelfOutputFormatApp { 
   
  public final static String INPUT_PATH = "hdfs://chaoren1:9000/mapinput"; 
  public final static String OUTPUT_PATH = "hdfs://chaoren1:9000/mapoutput"; 
  public final static String OUTPUT_FILENAME = "/abc"; 
   
  public static void main(String[] args) throws IOException, URISyntaxException,  
    ClassNotFoundException, InterruptedException { 
    Configuration conf = new Configuration(); 
    FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), conf); 
    fileSystem.delete(new Path(OUTPUT_PATH), true); 
     
    Job job = new Job(conf, MySelfOutputFormatApp.class.getSimpleName()); 
    job.setJarByClass(MySelfOutputFormatApp.class); 
     
    FileInputFormat.setInputPaths(job, new Path(INPUT_PATH)); 
    job.setMapperClass(MyMapper.class); 
    job.setMapOutputKeyClass(Text.class); 
    job.setMapOutputValueClass(LongWritable.class); 
     
    job.setReducerClass(MyReducer.class); 
    job.setOutputKeyClass(Text.class); 
    job.setOutputValueClass(LongWritable.class); 
    job.setOutputFormatClass(MyselfOutputFormat.class); 
     
    job.waitForCompletion(true); 
  } 
   
  public static class MyMapper extends Mapper { 
 
    private Text word = new Text(); 
    private LongWritable writable = new LongWritable(1); 
     
    @Override 
    protected void map(LongWritable key, Text value, 
        Mapper.Context context) 
        throws IOException, InterruptedException { 
      if (value != null) { 
        String line = value.toString(); 
        StringTokenizer tokenizer = new StringTokenizer(line); 
        while (tokenizer.hasMoreTokens()) { 
          word.set(tokenizer.nextToken()); 
          context.write(word, writable); 
        } 
      } 
    } 
     
  } 
   
  public static class MyReducer extends Reducer { 
 
    @Override 
    protected void reduce(Text key, Iterable values, 
        Reducer.Context context) 
        throws IOException, InterruptedException { 
      long sum = 0;  
      for (LongWritable value : values) { 
        sum += value.get(); 
      } 
      context.write(key, new LongWritable(sum)); 
    } 
  } 
 
  public static class MyselfOutputFormat extends OutputFormat { 
 
    private FSDataOutputStream outputStream = null; 
     
    @Override 
    public RecordWriter getRecordWriter( 
        TaskAttemptContext context) throws IOException, 
        InterruptedException { 
      try { 
        FileSystem fileSystem = FileSystem.get(new URI(MySelfOutputFormatApp.OUTPUT_PATH), context.getConfiguration()); 
        //指定文件的输出路径 
        final Path path = new Path(MySelfOutputFormatApp.OUTPUT_PATH  
                     + MySelfOutputFormatApp.OUTPUT_FILENAME); 
        this.outputStream = fileSystem.create(path, false); 
      } catch (URISyntaxException e) { 
        e.printStackTrace(); 
      } 
      return new MySelfRecordWriter(outputStream); 
    } 
 
    @Override 
    public void checkOutputSpecs(JobContext context) throws IOException, 
        InterruptedException { 
    } 
 
    @Override 
    public OutputCommitter getOutputCommitter(TaskAttemptContext context) 
        throws IOException, InterruptedException { 
      return new FileOutputCommitter(new Path(MySelfOutputFormatApp.OUTPUT_PATH), context); 
    } 
     
  } 
   
  public static class MySelfRecordWriter extends RecordWriter { 
 
    private FSDataOutputStream outputStream = null; 
     
    public MySelfRecordWriter(FSDataOutputStream outputStream) { 
      this.outputStream = outputStream; 
    } 
     
    @Override 
    public void write(Text key, LongWritable value) throws IOException, 
        InterruptedException { 
      this.outputStream.writeBytes(key.toString()); 
      this.outputStream.writeBytes("\t"); 
      this.outputStream.writeLong(value.get()); 
    } 
 
    @Override 
    public void close(TaskAttemptContext context) throws IOException, 
        InterruptedException { 
      this.outputStream.close(); 
    } 
     
  } 
   
} 


新闻标题:OutputFormat在java中怎么实现自定义-创新互联
本文URL:http://pwwzsj.com/article/djidjg.html