wordpress建站落后吗,电商兼职网站开发,用jq和ajax做能登陆注册的一个网站,互联网公司排名去哪里看1、单词计数
在一定程度上反映了MapReduce设计的初衷--对日志文件进行分析。 public class WordCountMapper extends MapperLongWritable, Text, Text, IntWritable{//该方法循环调用#xff0c;从文件的split中读取每行调用一次#xff0c;把该行所在的下标为keyLongWritable, Text, Text, IntWritable{//该方法循环调用从文件的split中读取每行调用一次把该行所在的下标为key该行的内容为valueprotected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {String[] words StringUtils.split(value.toString(), );for(String w :words){context.write(new Text(w), new IntWritable(1));}}
}
public class WordCountReducer extends ReducerText, IntWritable, Text, IntWritable{//每组调用一次这一组数据特点key相同value可能有多个。protected void reduce(Text key, IterableIntWritable values,Context context) throws IOException, InterruptedException {int sum 0;for(IntWritable i: values){sumsumi.get();}context.write(key, new IntWritable(sum));}
}
public class RunJob {public static void main(String[] args) {Configuration config new Configuration();
// config.set(fs.defaultFS, hdfs://node1:8020);
// config.set(yarn.resourcemanager.hostname, node1);
// config.set(mapred.jar, C:\\Users\\Administrator\\Desktop\\wc.jar);try {FileSystem fs FileSystem.get(config);Job job Job.getInstance(config);job.setJarByClass(RunJob.class);job.setJobName(wc);job.setMapperClass(WordCountMapper.class);job.setReducerClass(WordCountReducer.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);FileInputFormat.addInputPath(job, new Path(/usr/input/));Path outpath new Path(/usr/output/wc);if(fs.exists(outpath)){fs.delete(outpath, true);}FileOutputFormat.setOutputPath(job, outpath);boolean f job.waitForCompletion(true);if(f){System.out.println(job completed!);}} catch (Exception e) {e.printStackTrace();}}
}2、数据去重 最终目标是让原始数据中出现次数超过一次的数据在输出文件中只出现一次。
自然会想到将同一个数据的所有记录都交给一台Reduce机器无论这个数据出现多少次只要在最终结果中输出一次就可以了。
将单次计数程序稍加改动即可。 public class DedupMapper extends MapperLongWritable, Text, Text, NullWritable{protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {context.write(key, NullWritable.get());}
}
public class DedupReducer extends ReducerText, NullWritable, Text, NullWritable{protected void reduce(Text key, IterableIntWritable values,Context context) throws IOException, InterruptedException {context.write(key, NullWritable.get());}
}
public class RunJob {public static void main(String[] args) {Configuration config new Configuration();
// config.set(fs.defaultFS, hdfs://node1:8020);
// config.set(yarn.resourcemanager.hostname, node1);config.set(mapred.jar, C:\\Users\\Administrator\\Desktop\\wc.jar);try {FileSystem fs FileSystem.get(config);Job job Job.getInstance(config);job.setJarByClass(RunJob.class);job.setJobName(dedup);job.setMapperClass(DedupMapper.class);job.setReducerClass(DedupReducer.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(NullWritable.class);FileInputFormat.addInputPath(job, new Path(/usr/input/));Path outpath new Path(/usr/output/dedup);if(fs.exists(outpath)){fs.delete(outpath, true);}FileOutputFormat.setOutputPath(job, outpath);boolean f job.waitForCompletion(true);if(f){System.out.println(job completed!);}} catch (Exception e) {e.printStackTrace();}}
} 3、排序
对输入文件中的内容进行排序。
输入文件中的每行内容均为一个数字即一个数据。
要求在输出中每行有两个间隔的数字第二个数字代表原始数据第一个数字代表原始数据的位次。
样例输入
file1:
2
32
654
32
15
765
65223
file2:
5956
22
650
92
file3:
26
54
6
样例输出
1 2
2 6
3 15
4 22
5 26
6 32
7 32
8 54
9 92
10 650
11 654
12 756
13 5956
14 65223 设计思路
可以利用MapReduce过程中默认的排序而不需要自己再实现排序。
重点
1、待排序数据作为Map任务的key
2、需要重写partition类保证整体有序具体做法是用输入数据的最大值除以系统partition数量的商作为分割数据的边界即分割数据的边界为此商的1倍、2倍至numPartitions-1倍这样就能保证执行完partition后是整体有序的。
3、Reduce获得key, value-list根据value-list中元素的个数将输入的key作为value的输出次数。 package hadoop;import java.io.IOException;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class Sort
{public static class SortMapper extends MapperObject, Text, IntWritable, NullWritable{private NullWritable nw NullWritable.get();Overrideprotected void map(Object key, Text value, MapperObject, Text, IntWritable, NullWritable.Context context)throws IOException, InterruptedException{context.write(new IntWritable(Integer.parseInt(value.toString().trim())), nw);}}public static class SortReducer extends ReducerIntWritable, NullWritable, IntWritable, IntWritable{private IntWritable counter new IntWritable(1);Overrideprotected void reduce(IntWritable key, IterableNullWritable values,ReducerIntWritable, NullWritable, IntWritable, IntWritable.Context context)throws IOException, InterruptedException{for(NullWritable nw : values){context.write(counter, key);counter new IntWritable(counter.get() 1);}}}public static class SortPartitioner extends PartitionerIntWritable, NullWritable{//numPartitions equals with the number of reduce tasksOverridepublic int getPartition(IntWritable key, NullWritable value, int numPartitions){int maxNumber 65223;int bound maxNumber/numPartitions;int keyNumber key.get();for (int i 0; i numPartitions; i){if (keyNumber (i1)*bound)return i;}return 0;}}public static void main(String[] args) throws Exception{Configuration conf new Configuration();Job job Job.getInstance(conf);job.setJarByClass(Sort.class);job.setJobName(sort);job.setMapperClass(SortMapper.class);job.setReducerClass(SortReducer.class);job.setOutputKeyClass(IntWritable.class);job.setOutputValueClass(IntWritable.class);job.setMapOutputKeyClass(IntWritable.class);job.setMapOutputValueClass(NullWritable.class);job.setNumReduceTasks(5);job.setPartitionerClass(SortPartitioner.class);String inputFile /home/jinzhao/dataset/input;String outputFile /home/jinzhao/dataset/output;FileInputFormat.setInputPaths(job, new Path(inputFile));Path output new Path(outputFile);FileSystem fs FileSystem.get(conf);if (fs.exists(output))fs.delete(output, true);FileOutputFormat.setOutputPath(job, output);job.waitForCompletion(true);}
}4、单表关联
样例输入
file:
child parent
Tom Lucy
Tom Jack
Jone Lucy
Jone Jack
Lucy Mary
Lucy Ben
Jack Alice
Jack Jesse
Terry Alice
Terry Jesse
Philip Terry
Philip Alma
Mark Terry
Mark Alma
样例输出
file:
grandchild grandparent
Tom Alice
Tom Jesse
Jone Alice
Jone Jesse
Tom Mary
Tom Ben
Jone Mary
Jone Ben
Philip Alice
philip Jesse
Mark Alice
Mark Jesse package hadoop;import java.io.IOException;
import java.util.ArrayList;
import java.util.List;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class stlink
{private static boolean flag true;public static class stlinkMapper extends MapperObject, Text, Text, Text{Overrideprotected void map(Object key, Text value, MapperObject, Text, Text, Text.Context context)throws IOException, InterruptedException{String[] names value.toString().trim().split(\t);if (names[0].compareTo(child) ! 0){ context.write(new Text(names[0]), new Text(parent:names[1]));context.write(new Text(names[1]), new Text(child:names[0]));}}}public static class stlinkReducer extends ReducerText, Text, Text, Text{Overrideprotected void reduce(Text key, IterableText values, ReducerText, Text, Text, Text.Context context)throws IOException, InterruptedException{if (flag){context.write(new Text(grandchild), new Text(grandparent));flag false;}ListString children new ArrayListString();ListString parents new ArrayListString();for(Text t : values){String[] kv t.toString().split(:);if (kv[0].compareTo(child) 0)children.add(kv[1]);elseparents.add(kv[1]);}for(String c : children)for(String p : parents)context.write(new Text(c), new Text(p));}}public static void main(String[] args) throws Exception{Configuration conf new Configuration();Job stlinkJob Job.getInstance(conf);stlinkJob.setJarByClass(stlink.class);stlinkJob.setJobName(single table link);stlinkJob.setMapperClass(stlinkMapper.class);stlinkJob.setReducerClass(stlinkReducer.class);stlinkJob.setOutputKeyClass(Text.class);stlinkJob.setOutputValueClass(Text.class);stlinkJob.setMapOutputKeyClass(Text.class);stlinkJob.setMapOutputValueClass(Text.class);Path input new Path(/home/jinzhao/dataset/input);Path output new Path(/home/jinzhao/dataset/output);FileInputFormat.setInputPaths(stlinkJob, input);FileSystem fs FileSystem.get(conf);if (fs.exists(output))fs.delete(output, true);FileOutputFormat.setOutputPath(stlinkJob, output);stlinkJob.waitForCompletion(true);}
}5、多表关联
样例输入
factory:
factoryname addressed
Beijing Red Star 1
Shenzhen Thunder 3
Guangzhou Honda 2
Beijing Rising 1
Guangzhou Development Bank 2
Tencent 3
Bank of Beijing 1
address:
1 Beijing
2 Guangzhou
3 Shenzhen
4 Xian package hadoop;import java.io.IOException;
import java.util.ArrayList;
import java.util.List;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class mtlink
{private static boolean flag true;public static class mtlinkMapper extends MapperObject, Text, Text, Text{Overrideprotected void map(Object key, Text value, MapperObject, Text, Text, Text.Context context)throws IOException, InterruptedException{String str value.toString();if (str.contains(factoryname) || str.contains(addressname))return;String[] infos str.trim().split( );if (infos[0].charAt(0) 0 infos[0].charAt(0) 9)context.write(new Text(infos[0]), new Text(right: strCombine(infos, right)));elsecontext.write(new Text(infos[infos.length - 1]), new Text(left: strCombine(infos, left)));}private String strCombine(String[] strs, String direction){StringBuilder sb new StringBuilder();if (direction.compareTo(right) 0)for(int i 1; i strs.length; i)sb.append(strs[i] );elsefor (int i 0; i strs.length - 1; i)sb.append(strs[i] );return sb.toString().trim();}}public static class mtlinkReducer extends ReducerText, Text, Text, Text{Overrideprotected void reduce(Text key, IterableText values, ReducerText, Text, Text, Text.Context context)throws IOException, InterruptedException{if (flag){context.write(new Text(factoryname), new Text(adressname));flag false;}ListString companies new ArrayListString();String place huoxing;for (Text t : values){String[] kv t.toString().trim().split(:);if (kv[0].compareTo(right) 0)place kv[1];elsecompanies.add(kv[1]);}for (String s : companies)context.write(new Text(s), new Text(place));}}public static void main(String[] args) throws Exception{Configuration conf new Configuration();Job mtlinkJob Job.getInstance(conf);mtlinkJob.setJarByClass(mtlink.class);mtlinkJob.setJobName(multiple tables link);mtlinkJob.setMapperClass(mtlinkMapper.class);mtlinkJob.setReducerClass(mtlinkReducer.class);mtlinkJob.setOutputKeyClass(Text.class);mtlinkJob.setOutputValueClass(Text.class);Path input new Path(/home/jinzhao/dataset/input);Path output new Path(/home/jinzhao/dataset/output);FileInputFormat.setInputPaths(mtlinkJob, input);FileSystem fs FileSystem.get(conf);if (fs.exists(output))fs.delete(output, true);FileOutputFormat.setOutputPath(mtlinkJob, output);mtlinkJob.waitForCompletion(true);}
}