reworking of file structure
[mlpccg-meta.git] / mlpccg / Clustering.py
1 from sklearn.cluster import AffinityPropagation
2 from sklearn.feature_extraction import DictVectorizer
3 from collections import defaultdict
4
5 from mlpccg.CardDb import CARDDB
6 from mlpccg.DeckList import DeckList
7
8 class Clustering:
9 def __init__(self, records):
10 self.decks = []
11 self.features = []
12 self.placements = {}
13 self.clusters = defaultdict(list)
14 self.all_ids = CARDDB._by_id.keys()
15 self.labels = []
16 self.records = records
17 self.vectorizer = DictVectorizer()
18
19 for record in self.records:
20 self.features += [self.extract_features_alt(record['decklist'])]
21 self.decks += [record['decklist']]
22 self.placements[record['decklist']] = int(record['placement'])
23
24 X = self.vectorizer.fit_transform(self.features).toarray()
25 self.af = AffinityPropagation().fit(X)
26 self.labels = self.af.labels_
27
28 self.n_clusters = len(set(self.labels)) - (1 if -1 in self.labels else 0)
29
30 for i, deck in enumerate(self.decks):
31 self.clusters[self.labels[i]] += [deck]
32
33 def extract_features_alt(self, deck):
34 features = dict(zip(self.all_ids, [0] * len(self.all_ids)))
35 for card in deck.cards:
36 features[card['id'].lower()] += 1
37
38 return features
39
40 def extract_features(self, deck):
41 features = {
42 'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0,
43 'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0,
44 'Power': 0.0, 'Control': 0}
45
46 for card in deck.cards:
47 if card['type'] == 'Friend' or card['type'] == 'Mane':
48 features[card['color']] += 1
49
50 features[card['type']] += 1
51
52 if card.has_key('power'):
53 features['Power'] += card['power']
54
55 if card.has_key('control'):
56 features['Control'] += card['control']
57
58 features['Power'] = features['Power'] / len(deck.cards)
59
60 return features
61
62 def predict(self, decklist):
63 X = self.vectorizer.fit_transform([self.extract_features_alt(decklist)]).toarray()
64 return self.af.predict(X)[0]
65
66 def ranking(self):
67 ranking = []
68 cluster_placements = defaultdict(dict)
69 for i, deck in enumerate(self.decks):
70 stats = cluster_placements[self.labels[i]]
71 try:
72 stats['sum'] += self.placements[deck]
73 stats['num'] += 1
74 except KeyError:
75 stats['label'] = self.labels[i]
76 stats['sum'] = self.placements[deck]
77 stats['num'] = 1
78
79 stats['avg'] = stats['sum'] / float(stats['num'])
80
81 for stats in sorted(cluster_placements.values(), key=lambda x: x['avg']):
82 ranking += [(stats['avg'], stats['label'])]
83
84 return ranking