|
| 1 | +package com.marketing.Project_Recommend; |
| 2 | +import java.sql.Date; |
| 3 | +import java.text.DateFormat; |
| 4 | +import java.text.SimpleDateFormat; |
| 5 | +import java.util.regex.Matcher; |
| 6 | +import java.util.regex.Pattern; |
| 7 | +import java.io.BufferedReader; |
| 8 | +import java.io.File; |
| 9 | +import java.io.FileReader; |
| 10 | +import java.io.FileWriter; |
| 11 | +import java.io.IOException; |
| 12 | +import java.util.ArrayList; |
| 13 | +import java.util.Arrays; |
| 14 | +import java.util.Collection; |
| 15 | +import java.util.Collections; |
| 16 | +import java.util.Comparator; |
| 17 | +import java.util.HashMap; |
| 18 | +import java.util.Iterator; |
| 19 | +import java.util.LinkedHashMap; |
| 20 | +import java.util.List; |
| 21 | +import java.util.Map; |
| 22 | +import java.util.Map.Entry; |
| 23 | +import java.util.NavigableSet; |
| 24 | +import java.util.PriorityQueue; |
| 25 | +import java.util.Properties; |
| 26 | +import java.util.Set; |
| 27 | +import java.util.TreeMap; |
| 28 | + |
| 29 | +import org.apache.commons.collections.MapUtils; |
| 30 | +import org.apache.mahout.cf.taste.common.NoSuchUserException; |
| 31 | +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; |
| 32 | +import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood; |
| 33 | +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; |
| 34 | +import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender; |
| 35 | +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; |
| 36 | +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; |
| 37 | +import org.apache.mahout.cf.taste.model.DataModel; |
| 38 | +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; |
| 39 | +import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender; |
| 40 | +import org.apache.mahout.cf.taste.recommender.RecommendedItem; |
| 41 | +import org.apache.mahout.cf.taste.recommender.Recommender; |
| 42 | +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; |
| 43 | +import org.apache.mahout.cf.taste.similarity.ItemSimilarity; |
| 44 | +import org.apache.mahout.cf.taste.similarity.UserSimilarity; |
| 45 | + |
| 46 | +import com.google.common.collect.ArrayListMultimap; |
| 47 | +import com.google.common.collect.HashBasedTable; |
| 48 | +import com.google.common.collect.Iterables; |
| 49 | +import com.google.common.collect.ListMultimap; |
| 50 | +import com.google.common.collect.Multimap; |
| 51 | +import com.google.common.collect.SortedSetMultimap; |
| 52 | +import com.google.common.collect.Table; |
| 53 | +import com.google.common.collect.TreeMultimap; |
| 54 | +import com.google.common.primitives.Ints; |
| 55 | + |
| 56 | +/** |
| 57 | + * Hello world! |
| 58 | + * |
| 59 | + */ |
| 60 | +public class App7 |
| 61 | +{ |
| 62 | + private static final int Float = 0; |
| 63 | + private static final int List = 0; |
| 64 | + |
| 65 | + public static void main( String[] args ) throws Exception |
| 66 | + { |
| 67 | + for (int i=0; i<= 11;i++){ |
| 68 | + String temp ="C:/Users/TG/Documents/Independent study/"; |
| 69 | + DataModel model = new FileDataModel(new File(temp+"ratings_train"+Integer.toString(i)+"n.csv")); |
| 70 | + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); |
| 71 | + ItemSimilarity isimilarity = new PearsonCorrelationSimilarity(model); |
| 72 | + //modified |
| 73 | + UserNeighborhood neighborhood = new NearestNUserNeighborhood(10, similarity, model); |
| 74 | + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); |
| 75 | + /* |
| 76 | + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); |
| 77 | + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); |
| 78 | + */ |
| 79 | + ItemBasedRecommender irecommender = new GenericItemBasedRecommender(model,isimilarity); |
| 80 | + //Code to retreive top movies from training set |
| 81 | + BufferedReader traindatareader = new BufferedReader(new FileReader(temp+"ratings_train"+Integer.toString(i)+"n.csv")); |
| 82 | + Multimap<Long,Object> movie_ratings = ArrayListMultimap.create(); |
| 83 | + String line1 = null; |
| 84 | + |
| 85 | + while ((line1 = traindatareader.readLine()) != null) { |
| 86 | + String[] words1 = line1.split(","); |
| 87 | + if(line1.equals("")) |
| 88 | + continue; |
| 89 | + long arg01=Long.parseLong(words1[1]); |
| 90 | + float arg11=Integer.parseInt(words1[2]); |
| 91 | + if (movie_ratings.containsKey(arg01)){ |
| 92 | + float temp_rating=(java.lang.Float)(Iterables.get(movie_ratings.get((long) arg01),0)); |
| 93 | + temp_rating=(temp_rating+arg11); |
| 94 | + int temp_count= (Integer) Iterables.get(movie_ratings.get((long) arg01), 1); |
| 95 | + temp_count=temp_count+1; |
| 96 | + movie_ratings.removeAll(arg01); |
| 97 | + movie_ratings.put(arg01, temp_rating); |
| 98 | + movie_ratings.put(arg01, temp_count); |
| 99 | + |
| 100 | + } |
| 101 | + else{ |
| 102 | + movie_ratings.put(arg01,arg11); |
| 103 | + movie_ratings.put(arg01,1); |
| 104 | + } |
| 105 | + } |
| 106 | + |
| 107 | + |
| 108 | + //Retrieve movies with minimum view ability of 5 |
| 109 | + TreeMultimap<Long,Float> filtered_movie_ratings = TreeMultimap.create(); |
| 110 | + for( Long movie:movie_ratings.keySet() ){ |
| 111 | + int temp1=(Integer) Iterables.get(movie_ratings.get((long) movie), 1); |
| 112 | + if (temp1 > 5){ |
| 113 | + float temp2=(java.lang.Float) Iterables.get(movie_ratings.get((long) movie), 0); |
| 114 | + filtered_movie_ratings.put(movie,(float)temp2/temp1); |
| 115 | + filtered_movie_ratings.put(movie, (float) temp1); |
| 116 | + } |
| 117 | + |
| 118 | + } |
| 119 | + //Pass the filtered movies for sorting |
| 120 | + List<Long> top10=topNKeys(filtered_movie_ratings, 10); |
| 121 | + //Read all the test file into a hashmap list for convinience of calculating precision and recall |
| 122 | + BufferedReader reader = new BufferedReader(new FileReader(temp+"ratings_test"+Integer.toString(i)+"n.csv")); |
| 123 | + Multimap<Long,Object> myMultimap = ArrayListMultimap.create(); |
| 124 | + Table<Long, Long, Float> testTable = HashBasedTable.create(); |
| 125 | + Multimap<Long,Object> User_Prec_rec = ArrayListMultimap.create(); |
| 126 | + long hits=0; |
| 127 | + long miss=0; |
| 128 | + String line = null; |
| 129 | + while ((line = reader.readLine()) != null) { |
| 130 | + String[] words = line.split(","); |
| 131 | + if(line.equals("")) |
| 132 | + continue; |
| 133 | + long arg0=Long.parseLong(words[0]); |
| 134 | + long arg1=Long.parseLong(words[1]); |
| 135 | + float arg2=Integer.parseInt(words[2]); |
| 136 | + testTable.put(arg0, arg1, arg2); |
| 137 | + //if (arg0==5485){ |
| 138 | + //System.out.println(arg0+" "+ arg1+" "+arg2); |
| 139 | + |
| 140 | + //} |
| 141 | + } |
| 142 | + reader.close(); |
| 143 | + //System.out.println("print "+(testTable.get((long)5485,(long)588)<3.0)); |
| 144 | + //Retreive all users in test dataset for which we have to calculate precision |
| 145 | + Set<Long> userlist=testTable.rowKeySet(); |
| 146 | + float total_relevant_items=(float) 0.0; |
| 147 | + float allhit=(float) 0.0; |
| 148 | + float totalusers_intrain=(float)0.0; |
| 149 | + float totalprec=(float)0.0; |
| 150 | + float totalrecommendations=(float) 0.0; |
| 151 | + float no_of_users_in_test=userlist.size(); |
| 152 | + for(Long user: userlist){ |
| 153 | + float prec=0; |
| 154 | + float rec=0; |
| 155 | + //Code to retrieved the actual movies that the user under consideration has seen and reviewed |
| 156 | + totalusers_intrain+=1.0; |
| 157 | + float userlen=(testTable.row((long)user).keySet().size()); //temp_user.size(); |
| 158 | + |
| 159 | + float count_hit=0; |
| 160 | + try{ |
| 161 | + //Code to retrieve recommended movies and ratings for the user under consideration , only top 10 |
| 162 | + java.util.List<RecommendedItem> recommendations = recommender.recommend(user, 10); |
| 163 | + |
| 164 | + if (recommendations.size()>0){ |
| 165 | + totalrecommendations+=recommendations.size(); |
| 166 | + total_relevant_items+=userlen; |
| 167 | + for (RecommendedItem recommendation : recommendations) { |
| 168 | + |
| 169 | + //Extract recommended item, verify if it exist in actual data for the given user |
| 170 | + long recommended_movie = recommendation.getItemID(); |
| 171 | + |
| 172 | + if (testTable.contains((long)user, (long)recommended_movie) && testTable.get((long)user, (long)recommended_movie)>3.0){ |
| 173 | + count_hit+=1; |
| 174 | + allhit+=1; |
| 175 | + } |
| 176 | + } |
| 177 | + prec=(float)(count_hit/recommendations.size()); |
| 178 | + totalprec+=prec; |
| 179 | + rec=count_hit/userlen; |
| 180 | + User_Prec_rec.put(user,(float)1.0); |
| 181 | + User_Prec_rec.put(user,prec); |
| 182 | + User_Prec_rec.put(user,rec); |
| 183 | + User_Prec_rec.put(user,count_hit); |
| 184 | + User_Prec_rec.put(user,userlen); |
| 185 | + hits+=1; |
| 186 | + } |
| 187 | + else{ |
| 188 | + Iterator<Long> topmv = top10.iterator(); |
| 189 | + totalrecommendations+=10; |
| 190 | + total_relevant_items+=userlen; |
| 191 | + while (topmv.hasNext()) { |
| 192 | + //Extract recommended item, verify if it exist in actual data for the given user |
| 193 | + long mv= topmv.next(); |
| 194 | + if (testTable.contains((long)user,(long)mv) && testTable.get((long)user,(long)mv)>3.0){ |
| 195 | + count_hit+=1; |
| 196 | + allhit+=1; |
| 197 | + } |
| 198 | + } |
| 199 | + prec=count_hit/10; |
| 200 | + totalprec+=prec; |
| 201 | + rec=count_hit/userlen; |
| 202 | + User_Prec_rec.put(user,(float)0.0); |
| 203 | + User_Prec_rec.put(user,prec); |
| 204 | + User_Prec_rec.put(user,rec); |
| 205 | + User_Prec_rec.put(user,count_hit); |
| 206 | + User_Prec_rec.put(user,userlen); |
| 207 | + miss+=1; |
| 208 | + } |
| 209 | + } |
| 210 | + catch(NoSuchUserException e){ |
| 211 | + System.out.println("test"); |
| 212 | + } |
| 213 | + } |
| 214 | + float total_precision= allhit/(totalrecommendations); |
| 215 | + //totalprec/totalusers_intrain; |
| 216 | + float total_recall=allhit/(total_relevant_items); |
| 217 | + try |
| 218 | + { |
| 219 | + String filename= "Data/precrec10_1.txt"; |
| 220 | + FileWriter fw = new FileWriter(filename,true); //the true will append the new data |
| 221 | + fw.write("epoch :"+Integer.toString(i)+'\n'); |
| 222 | + fw.write("hits: "+ Long.toString(hits)+ " miss: "+Long.toString(miss)+ " total relevant items in recommendations: "+Long.toString((long) allhit)+" total recommendations: "+Long.toString((long) totalrecommendations)+ " total relevant items: "+Long.toString((long) total_relevant_items)+'\n'); |
| 223 | + fw.write(User_Prec_rec.toString()+'\n'); |
| 224 | + fw.write("Total Precision: "+ (total_precision)+ " Total recall: "+(total_recall)+'\n'); |
| 225 | + fw.close(); |
| 226 | + } |
| 227 | + catch(IOException ioe){ |
| 228 | + System.err.println("IOException: " + ioe.getMessage()); |
| 229 | + } |
| 230 | + } |
| 231 | + } |
| 232 | + public static List<Long> topNKeys(final TreeMultimap<Long, Float> map, int n) { |
| 233 | + PriorityQueue<Long> topN = new PriorityQueue<Long>(n, new Comparator<Long>() { |
| 234 | + public int compare(Long o1, Long o2) { |
| 235 | + float o1r=(java.lang.Float) Iterables.get(map.get((long) o1), 0); |
| 236 | + float o2r=(java.lang.Float) Iterables.get(map.get((long) o2), 0); |
| 237 | + return Double.compare(o1r, o2r); |
| 238 | + } |
| 239 | + }); |
| 240 | + |
| 241 | + for(Long key:map.keySet()){ |
| 242 | + if (topN.size() < n) |
| 243 | + topN.add(key); |
| 244 | + |
| 245 | + else if (Iterables.get(map.get((long) topN.peek()), 0)< Iterables.get(map.get((long) key), 0)) { |
| 246 | + topN.poll(); |
| 247 | + topN.add(key); |
| 248 | + } |
| 249 | + } |
| 250 | + return (List) Arrays.asList(topN.toArray()); |
| 251 | + } |
| 252 | + |
| 253 | +} |
0 commit comments