IDEA远程提交hadoop任务
- 新建maven项目,添加如下依赖
org.apache.hadoop hadoop-common 2.7.1 org.apache.hadoop hadoop-mapreduce-client-core 2.7.1 org.apache.hadoop hadoop-hdfs 2.7.1 org.apache.hadoop hadoop-mapreduce-client-jobclient 2.7.1
- 编写Map处理
public static class Map extends Mapper{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); System.out.println("行值:" + line); StringTokenizer tokenizer = new StringTokenizer(line, "\n"); while (tokenizer.hasMoreTokens()) { StringTokenizer tokenizerLine = new StringTokenizer(tokenizer.nextToken()); String strName = tokenizerLine.nextToken(); String strScore = tokenizerLine.nextToken(); Text name = new Text(strName); int score = Integer.parseInt(strScore); context.write(name, new IntWritable(score)); } } }
- 编写Reduce处理
public static class Reduce extends Reducer{ @Override protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { int sum = 0; int count = 0; Iterator iterator = values.iterator(); while (iterator.hasNext()) { sum += iterator.next().get(); count++; } int average = sum / count; context.write(key, new IntWritable(average)); } }
- main函数
System.setProperty("HADOOP_USER_NAME", "wujinlei");Configuration conf = new Configuration();conf.set("fs.defaultFS", "hdfs://master:9000");conf.set("mapreduce.app-submission.cross-platform", "true");conf.set("mapred.jar", "E:\\JackManWu\\hadoo-ptest\\target\\hadoop-test-1.0-SNAPSHOT.jar");conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());Job job = Job.getInstance(conf, "student_score");job.setJarByClass(StudentScore.class);//要执行的jar中的类job.setMapperClass(Map.class);job.setCombinerClass(Reduce.class);job.setReducerClass(Reduce.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/home/wujinlei/work/student/input"));FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/home/wujinlei/work/student/output"));System.exit(job.waitForCompletion(true) ? 0 : 1);
- 准备好
home/wujinlei/work/student/input
输入文件,参照中的创建输入文件部分,在集群上预先准备好输入文件(ps:home/wujinlei/work/student/output
不用准备,系统自动生成输出)- 样例输入文件:
陈洲立 67陈东伟 98李宁 87杨森 86刘东奇 78谭果 94盖盖 83陈洲立 68陈东伟 96李宁 82杨森 85刘东奇 72谭果 97盖盖 82
- 执行main函数,结合hadoop日志,在任务页面查看任务执行情况,检验最终生成的结果。