Take the 2-minute tour ×
Code Review Stack Exchange is a question and answer site for peer programmer code reviews. It's 100% free, no registration required.

This code takes more than 5 days running, but, it worked perfectly on the data test.

T1=Sys.time()
# Recommendation Program
#plus proches voisins
#install.packages("reshape2") like require(reshape2)
#require(data.table)
#require(reshape2)
#install.packages("data.table")
#library(Rserve)
library(data.table)
library(reshape2)


table1 <- read.table("base_test_fil.csv", sep=";", quote="\"",  header=TRUE)
#fread(data.table)

table3=as.data.table(table1)
#aggregation + binary matrix of table1
table2 <- dcast.data.table(table3, id_customer ~ product_id,value.var="product_id", fun.aggregate=length)
table2=as.data.frame(table2)
#verification
#class(table2)
data.stat=table2

#remark: we can do directly :data.stat=table2 
options(digits=4)

# Drop any column named "id_order":technique :not in 
data.stat.ibs <- (data.stat[,!(names(data.stat) %in% c("id_customer"))])

# Create a placeholder dataframe listing item vs. item
data.stat.ibs.similarity  <- matrix(NA, nrow=ncol(data.stat.ibs),ncol=ncol(data.stat.ibs),dimnames=list(colnames(data.stat.ibs),colnames(data.stat.ibs)))

getCosine <- function(x){
   as.matrix(t(x[,c(1:ncol(x))])%*%x[,c(1:ncol(x))]/(sqrt(colSums(x^2) %*% t(colSums(x^2)))))
}

data.stat.ibs.similarity<-getCosine(as.matrix(data.stat.ibs))

# Back to dataframe
data.stat.ibs.similarity <- as.data.frame(data.stat.ibs.similarity)
#View(data.stat.ibs.similarity)

# Get the top 10 neighbours for each
data.stat.neighbours <- matrix(NA, nrow=ncol(data.stat.ibs.similarity),ncol =11,dimnames=list(colnames(data.stat.ibs.similarity)))

for(i in 1:ncol(data.stat.ibs)) 
{
    data.stat.neighbours[i,] <- (t(head(n=11,rownames(data.stat.ibs.similarity[order(data.stat.ibs.similarity[,i],decreasing=TRUE),][i]))))
}

data=data.stat.neighbours
data=data[,-1]
colnames(data)=c("best1","best2","best3","best4","best5","best6","best7","best8","best9","best10")

#Export csv file 
write.csv(data,file="export1.csv",row.names=TRUE)
#Sys.time()-T1
#####user_based_collaborative
#Lets make a helper function to calculate the scores
getScore <- function(history, similarities)
{
  x <- sum(history*similarities)/sum(similarities)
  x
}

# A placeholder matrix
holder <- matrix(NA,nrow=nrow(data.stat),ncol=ncol(data.stat)-1,dimnames=list((data.stat$id_customer),colnames(data.stat[-1])))

# Loop through the id_customers (rows)
for(i in 1:nrow(holder)) 
{
  # Loops through the products (columns)
  for(j in 1:ncol(holder)) 
  {
    # Get the id_customer's name and th product's name
    # We do this not to conform with vectors sorted differently 
    id_customer <- rownames(holder)[i]
     product <- colnames(holder)[j]

     # We do not want to recommend products you have already consumed
     #Nous ne voulons pas de recommander des produits que vous avez déjà consommés
     # If you have already consumed it, we store an empty string
     if(as.integer(data.stat[data.stat$id_customer==id_customer,product]) == 1)
     { 
       holder[i,j]<-""
     } else {
       # We first have to get a product's top 10 neighbours sorted by similarity
       topN<-((head(n=11,(data.stat.ibs.similarity[order(data.stat.ibs.similarity[,product],decreasing=TRUE),][product]))))
       topN.names <- as.character(rownames(topN))
       topN.similarities <- as.numeric(topN[,1])

       # Drop the first one because it will always be the same product
       topN.similarities<-topN.similarities[-1]
       topN.names<-topN.names[-1]

       # We then get the id_customer's purchase history for those 10 items
       topN.purchases<- data.stat[,c("id_customer",topN.names)]
       topN.id_customerPurchases<-topN.purchases[topN.purchases$id_customer==id_customer,]
       topN.id_customerPurchases <- as.numeric(topN.id_customerPurchases[!(names(topN.id_customerPurchases) %in% c("id_customer"))])

       # We then calculate the score for that product and that id_customer
       holder[i,j]<-getScore(similarities=topN.similarities,history=topN.id_customerPurchases)

    } # close else statement
  } # end product for loop   
} # end id_customer for loop

# Output the results to a file
data.stat.id_customer.scores <- holder
#write.csv(file="final-id_customer-scores.csv",data.stat.id_customer.scores)

# Lets make our recommendations pretty
data.stat.id_customer.scores.holder <- matrix(NA,   nrow=nrow(data.stat.id_customer.scores),ncol=100,dimnames=list(rownames(data.stat.id_customer.scores)))
for(i in 1:nrow(data.stat.id_customer.scores)) 
{
  data.stat.id_customer.scores.holder[i,] <- names(head(n=100,(data.stat.id_customer.scores[,order(data.stat.id_customer.scores[i,],decreasing=TRUE)])[i,]))
}

# Write output to file
write.csv(data.stat.id_customer.scores.holder,file="export3.csv",row.names=TRUE)
share|improve this question
2  
please make your title more instructive of what your code is doing –  morbidCode Aug 18 at 9:14
1  
This code doesn't run. You need to point us to base_test_fil.csv, or edit your question/code to contain its own test data set. –  Curt F. Aug 18 at 9:30
1  
@CurtF. it seems your comment is compelling others to vote to close as "broken code". It isn't obviously broken code to me, and the OP says it works for small data set but too slow for a bigger one. Closing as "broken code" is not right. To the OP: it would help greatly if you could include a sample data set, something big enough to demonstrate the slowness, so that reviewers can play with it and test suggested improvements before actually suggesting them –  janos Aug 18 at 10:25
    
@joo it would help greatly if you could include a sample data set, something big enough to demonstrate the slowness, so that reviewers can play with it and test suggested improvements before actually suggesting them –  janos Aug 18 at 11:51

Your Answer

 
discard

By posting your answer, you agree to the privacy policy and terms of service.

Browse other questions tagged or ask your own question.