try out django
[mlpccg-meta.git] / mlpccg / Clustering.py
1 from sklearn.cluster import AffinityPropagation
2 from sklearn.feature_extraction import DictVectorizer
3 from collections import defaultdict
4 import csv
5
6 from mlpccg.CardDb import CARDDB
7 from mlpccg.DeckList import DeckList
8
9 class Clustering:
10 def __init__(self, records):
11 self.records = records
12 self.decks = []
13 self.features = []
14 self.features_alt = []
15 self.placements = {}
16 self.clusters = defaultdict(list)
17 self.all_ids = CARDDB._by_id.keys()
18
19 for record in self.records.all():
20 if record['decklist_url']:
21 decklist = DeckList(name=record['decklist_name'], url=record['decklist_url'])
22 self.features += [self.extract_features(decklist)]
23 self.features_alt += [self.extract_features_alt(decklist)]
24 self.decks += [decklist]
25 self.placements[decklist] = int(record['placement'])
26
27 self.vectorizer = DictVectorizer()
28 X = self.vectorizer.fit_transform(self.features).toarray()
29
30 self.af = AffinityPropagation().fit(X)
31 self.labels = self.af.labels_
32
33 self.n_clusters = len(set(self.labels)) - (1 if -1 in self.labels else 0)
34
35 for i, deck in enumerate(self.decks):
36 self.clusters[self.labels[i]] += [deck]
37
38 def extract_features_alt(self, deck):
39 features = dict(zip(self.all_ids, [0] * len(self.all_ids)))
40 for card in deck.cards:
41 features[card['id'].lower()] += 1
42
43 return features
44
45 def extract_features(self, deck):
46 return self.extract_features_alt(deck)
47
48 features = {
49 'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0,
50 'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0,
51 'Power': 0.0, 'Control': 0}
52
53 for card in deck.cards:
54 if card['type'] == 'Friend' or card['type'] == 'Mane':
55 features[card['color']] += 1
56
57 features[card['type']] += 1
58
59 if card.has_key('power'):
60 features['Power'] += card['power']
61
62 if card.has_key('control'):
63 features['Control'] += card['control']
64
65 features['Power'] = features['Power'] / len(deck.cards)
66
67 return features
68
69 def predict(self, decklist):
70 X = self.vectorizer.fit_transform([self.extract_features(decklist)]).toarray()
71 return self.af.predict(X)[0]
72
73 def ranking(self):
74 ranking = []
75 cluster_placements = defaultdict(dict)
76 for i, deck in enumerate(self.decks):
77 stats = cluster_placements[self.labels[i]]
78 try:
79 stats['sum'] += self.placements[deck]
80 stats['num'] += 1
81 except KeyError:
82 stats['label'] = self.labels[i]
83 stats['sum'] = self.placements[deck]
84 stats['num'] = 1
85
86 stats['avg'] = stats['sum'] / float(stats['num'])
87
88 for stats in sorted(cluster_placements.values(), key=lambda x: x['avg']):
89 ranking += [(stats['avg'], stats['label'])]
90
91 return ranking