aggregation by sum of month divided by groups in R
此处为mydata
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | mydata=structure(list(doc_date = structure(c(7L, 9L, 4L, 10L, 2L, 5L, 8L, 1L, 3L, 6L), .Label = c("01.06.2018","06.04.2018","08.07.2018", "14.03.2018","20.04.2018","21.09.2018","24.01.2018","25.05.2018", "28.02.2018","28.03.2018"), class ="factor"), shop_id = c(67885L, 67885L, 67885L, 67885L, 67885L, 67885L, 67885L, 67885L, 67885L, 67885L), shop_code = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label ="02293НСК", class ="factor"), product_id = c(11622L, 11622L, 11622L, 11622L, 11622L, 11622L, 11622L, 11622L, 11622L, 11622L), product_group_id = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), city_id = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), fin_centre_id = c(15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L), return_count = c(2L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 2L)), .Names = c("doc_date","shop_id","shop_code", "product_id","product_group_id","city_id","fin_centre_id", "return_count"), class ="data.frame", row.names = c(NA, -10L )) |
如何为组shop_code product_id汇总return_count列
每个月,按枢轴格式的总和。
I.E。输出
1 2 | jan feb march apr may jun jul aug sept oct nov dec 1 2 3 2 2 3 1 3 0 2 0 0 0 |
该主题不是重复的
每组汇总/汇总多个变量(例如,总和,均值)
因为我需要数据透视格式
问题在于您具有相同月份的不同日期(也有不同的日期),因此首先我们将在月份级别进行汇总,然后进行调整。试试这个:
1 2 3 4 5 6 | mydata$new_date <- dmy(mydata$doc_date) # convert to date format) mydata$month <- month(mydata$new_date) # extract month from date mydata <- mydata %>% group_by(shop_code,product_id,month) %>% summarise(return_count= sum(return_count)) # group at your required level mydata_1 <- dcast(setDT(mydata), shop_code + product_id ~ month , fun.aggregate = sum, value.var = c("return_count")) # Pivot up using dcast |
这是一种
编辑后的结果中包含0个计数的月份
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | library(data.table) library(lubridate) setDT(mydata) # First make a variable storing the month mydata[, month := lubridate::month(as.Date(doc_date, format ="%d.%m.%y"), label = TRUE)] # Then sum return_count by the product id, group id and month. Keep only rows that are unique by month mydata <- unique(mydata[, sum := sum(return_count), by = .(product_id, product_group_id, month), ], by ="month") # Now we need to make sure any months with 0 counts are included all_months <- data.table(month = lubridate::month(1:12, label = TRUE) ) mydata <- merge(mydata[, .(month, sum)], all_months, by ="month", all.y = TRUE) mydata[is.na(sum), sum := 0] ## output month sum 1: Jan 2 2: Feb 3 3: Mar 2 4: Apr 2 5: May 3 6: Jun 1 7: Jul 3 8: Aug 0 9: Sep 2 10: Oct 0 11: Nov 0 12: Dec 0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | > require(tidyverse) > mydata$months <- months(dmy(mydata$doc_date)) > my <- mydata %>% group_by( months) %>% summarise(re_count = sum(return_count,na.rm = T)) > my # A tibble: 8 x 2 months re_count <chr> <int> 1 April 2 2 Februar 3 3 Januar 2 4 Juli 3 5 Juni 1 6 Mai 3 7 M?rz 2 8 September 2 > |
将是我使用