try out django
[mlpccg-meta.git] / mlpccg / Clustering.py
1 from sklearn.cluster import AffinityPropagation
2 from sklearn.feature_extraction import DictVectorizer
3 from collections import defaultdict
4 import csv
5
6 from mlpccg.CardDb import CARDDB
7 from mlpccg.DeckList import DeckList
8
9 class Clustering:
10 def __init__(self, records):
11 self.decks = []
12 self.features = []
13 self.placements = {}
14 self.clusters = defaultdict(list)
15 self.all_ids = CARDDB._by_id.keys()
16 self.labels = []
17 self.records = records
18 self.vectorizer = DictVectorizer()
19
20 for record in self.records:
21 self.features += [self.extract_features_alt(record['decklist'])]
22 self.decks += [record['decklist']]
23 self.placements[record['decklist']] = int(record['placement'])
24
25 X = self.vectorizer.fit_transform(self.features).toarray()
26 self.af = AffinityPropagation().fit(X)
27 self.labels = self.af.labels_
28
29 self.n_clusters = len(set(self.labels)) - (1 if -1 in self.labels else 0)
30
31 for i, deck in enumerate(self.decks):
32 self.clusters[self.labels[i]] += [deck]
33
34 def extract_features_alt(self, deck):
35 features = dict(zip(self.all_ids, [0] * len(self.all_ids)))
36 for card in deck.cards:
37 features[card['id'].lower()] += 1
38
39 return features
40
41 def extract_features(self, deck):
42 features = {
43 'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0,
44 'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0,
45 'Power': 0.0, 'Control': 0}
46
47 for card in deck.cards:
48 if card['type'] == 'Friend' or card['type'] == 'Mane':
49 features[card['color']] += 1
50
51 features[card['type']] += 1
52
53 if card.has_key('power'):
54 features['Power'] += card['power']
55
56 if card.has_key('control'):
57 features['Control'] += card['control']
58
59 features['Power'] = features['Power'] / len(deck.cards)
60
61 return features
62
63 def predict(self, decklist):
64 X = self.vectorizer.fit_transform([self.extract_features_alt(decklist)]).toarray()
65 return self.af.predict(X)[0]
66
67 def ranking(self):
68 ranking = []
69 cluster_placements = defaultdict(dict)
70 for i, deck in enumerate(self.decks):
71 stats = cluster_placements[self.labels[i]]
72 try:
73 stats['sum'] += self.placements[deck]
74 stats['num'] += 1
75 except KeyError:
76 stats['label'] = self.labels[i]
77 stats['sum'] = self.placements[deck]
78 stats['num'] = 1
79
80 stats['avg'] = stats['sum'] / float(stats['num'])
81
82 for stats in sorted(cluster_placements.values(), key=lambda x: x['avg']):
83 ranking += [(stats['avg'], stats['label'])]
84
85 return ranking