x# Please enter the commit message for your changes. Lines starting
[mlpccg-meta.git] / mlpccg / Clustering.py
1 from sklearn.cluster import AffinityPropagation
2 from sklearn.feature_extraction import DictVectorizer
3 from collections import defaultdict
4 import csv
5
6 import mlpccg
7
8 class Clustering:
9 def __init__(self, records):
10 self.records = records
11 self.decks = []
12 self.features = []
13 self.features_alt = []
14 self.placements = {}
15 self.clusters = defaultdict(list)
16 self.all_ids = mlpccg.CardDb.CARDDB._by_id.keys()
17
18 for record in self.records.all():
19 if record['Decklist']:
20 decklist = mlpccg.DeckList(name=record['Name'], url=record['Decklist'])
21 self.features += [self.extract_features(decklist)]
22 self.features_alt += [self.extract_features_alt(decklist)]
23 self.decks += [decklist]
24 self.placements[decklist] = int(record['Placement'])
25
26 self.vectorizer = DictVectorizer()
27 X = self.vectorizer.fit_transform(self.features).toarray()
28
29 self.af = AffinityPropagation().fit(X)
30 self.labels = self.af.labels_
31
32 self.n_clusters = len(set(self.labels)) - (1 if -1 in self.labels else 0)
33
34 for i, deck in enumerate(self.decks):
35 self.clusters[self.labels[i]] += [deck]
36
37 def extract_features_alt(self, deck):
38 features = dict(zip(self.all_ids, [0] * len(self.all_ids)))
39 for card in deck.cards:
40 features[card['id'].lower()] += 1
41
42 return features
43
44 def extract_features(self, deck):
45 return self.extract_features_alt(deck)
46
47 features = {
48 'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0,
49 'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0,
50 'Power': 0.0, 'Control': 0}
51
52 for card in deck.cards:
53 if card['type'] == 'Friend' or card['type'] == 'Mane':
54 features[card['color']] += 1
55
56 features[card['type']] += 1
57
58 if card.has_key('power'):
59 features['Power'] += card['power']
60
61 if card.has_key('control'):
62 features['Control'] += card['control']
63
64 features['Power'] = features['Power'] / len(deck.cards)
65
66 return features
67
68 def predict(self, decklist):
69 X = self.vectorizer.fit_transform([self.extract_features(decklist)]).toarray()
70 return self.af.predict(X)[0]
71
72 def ranking(self):
73 ranking = []
74 cluster_placements = defaultdict(dict)
75 for i, deck in enumerate(self.decks):
76 stats = cluster_placements[self.labels[i]]
77 try:
78 stats['sum'] += self.placements[deck]
79 stats['num'] += 1
80 except KeyError:
81 stats['label'] = self.labels[i]
82 stats['sum'] = self.placements[deck]
83 stats['num'] = 1
84
85 stats['avg'] = stats['sum'] / float(stats['num'])
86
87 for stats in sorted(cluster_placements.values(), key=lambda x: x['avg']):
88 ranking += [(stats['avg'], stats['label'])]
89
90 return ranking
91
92 if __name__ == '__main__':
93 tournament_records = mlpccg.TournamentRecords()
94 clustering = Clustering(tournament_records)
95
96 for placement_avg, cluster_label in clustering.ranking():
97 print '%.1f - %s' % (placement_avg, '; '.join(list(set(map(lambda x: x.name, clustering.clusters[cluster_label])))))