mongodb中mapreduce实例
mongodb中mapreduce实例一、MapReduce介绍
MongoDB中MapReduce相当于group功能,使用MapReduce要实现两个函数 Map Function 和 Reduce Function
在调用mapReduce时需要用到这两个函数。
1、MapReduce定义
db.things.mapReduce(Map Function, Reduce Function, [output | option])
2、MapReduce实例
实例数据:
> db.sdk_counter.find();
{ "_id" : ObjectId("53395c10cb8d5d3a6b878f01"), "stat_date" : "2014-03-25", "show_count" : 4, "click_count" : 0 }
{ "_id" : ObjectId("53395c14cb8d5d3a6b878f02"), "stat_date" : "2014-03-23", "show_count" : 4, "click_count" : 0 }
{ "_id" : ObjectId("53395c1ccb8d5d3a6b878f03"), "stat_date" : "2014-03-24", "show_count" : 4, "click_count" : 10 }
{ "_id" : ObjectId("53395c23cb8d5d3a6b878f04"), "stat_date" : "2014-03-24", "show_count" : 3, "click_count" : 1 }
3、字段含义:
stat_date:日期
show_count:展示量
click_count:点击量
二、实现功能:分年、分月、分日期、分年和月查看展示量和点击量
var m = function(){
var year = this.stat_date.substr(0,4);
var month = this.stat_date.substr(5,2);
var day = this.stat_date.substr(8,2);
var yearAndMonth = this.stat_date.substr(0,7);
//此处填写year/month/day/yearAndMonth即可以实现功能
emit(yearAndMonth,{show_count:this.show_count,click_count:this.click_count});
};
var r = function(key,values){
var show = 0;
var click = 0;
for(var i in values){
show += values[i].show_count;
click += values[i].click_count;
}
return {"show_count":show,"click_count":click};
};
db.sdk_counter.mapReduce(m,r,"sdk_counter_tmp");
db.sdk_counter_tmp.find();
--按月份
> db.sdk_counter_tmp.find();
{ "_id" : "03", "value" : { "show_count" : 15, "click_count" : 11 } }
--按年份
> db.sdk_counter_tmp.find();
{ "_id" : "2014", "value" : { "show_count" : 15, "click_count" : 11 } }
--按日期
> db.sdk_counter_tmp.find();
{ "_id" : "23", "value" : { "show_count" : 4, "click_count" : 0 } }
{ "_id" : "24", "value" : { "show_count" : 7, "click_count" : 11 } }
{ "_id" : "25", "value" : { "show_count" : 4, "click_count" : 0 } }
四、算总量
var m = function(){emit(1,{show_count:this.show_count,click_count:this.click_count})};
var r = function(key,values){
var show = 0;
var click = 0;
for(var i in values){
show += values[i].show_count;
click += values[i].click_count;
}
return {"show_count":show,"click_count":click};
};
db.sdk_counter.mapReduce(m,r,"sdk_counter_tmp");
db.sdk_counter_tmp.find();
> db.imacha_sdk_counter_tmp.find();
{ "_id" : 1, "value" : { "show_count" : 15, "click_count" : 11 } }
五、mapreduce总结
mongo使用mapreduce实现类似关系数据库中分组统计的需求,关键是找对分组key,而其map中的emit函数的key参数至关重要,它是字段值而不是字段!