--- /dev/null
+from sklearn.cluster import AffinityPropagation
+from sklearn.feature_extraction import DictVectorizer
+from collections import defaultdict
+import csv
+
+import mlpccg
+
+class Clustering:
+ def __init__(self, records):
+ self.records = records
+ self.decks = []
+ self.features = []
+ self.features_alt = []
+ self.placements = {}
+ self.clusters = defaultdict(list)
+ self.all_ids = mlpccg.CardDb.CARDDB._by_id.keys()
+
+ for record in self.records.all():
+ if record['Decklist']:
+ decklist = mlpccg.DeckList(name=record['Name'], url=record['Decklist'])
+ self.features += [self.extract_features(decklist)]
+ self.features_alt += [self.extract_features_alt(decklist)]
+ self.decks += [decklist]
+ self.placements[decklist] = int(record['Placement'])
+
+ self.vectorizer = DictVectorizer()
+ X = self.vectorizer.fit_transform(self.features).toarray()
+
+ self.af = AffinityPropagation().fit(X)
+ self.labels = self.af.labels_
+
+ self.n_clusters = len(set(self.labels)) - (1 if -1 in self.labels else 0)
+
+ for i, deck in enumerate(self.decks):
+ self.clusters[self.labels[i]] += [deck]
+
+ def extract_features_alt(self, deck):
+ features = dict(zip(self.all_ids, [0] * len(self.all_ids)))
+ for card in deck.cards:
+ features[card['id'].lower()] += 1
+
+ return features
+
+ def extract_features(self, deck):
+ return self.extract_features_alt(deck)
+
+ features = {
+ 'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0,
+ 'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0,
+ 'Power': 0.0, 'Control': 0}
+
+ for card in deck.cards:
+ if card['type'] == 'Friend' or card['type'] == 'Mane':
+ features[card['color']] += 1
+
+ features[card['type']] += 1
+
+ if card.has_key('power'):
+ features['Power'] += card['power']
+
+ if card.has_key('control'):
+ features['Control'] += card['control']
+
+ features['Power'] = features['Power'] / len(deck.cards)
+
+ return features
+
+ def predict(self, decklist):
+ X = self.vectorizer.fit_transform([self.extract_features(decklist)]).toarray()
+ return self.af.predict(X)[0]
+
+ def ranking(self):
+ ranking = []
+ cluster_placements = defaultdict(dict)
+ for i, deck in enumerate(self.decks):
+ stats = cluster_placements[self.labels[i]]
+ try:
+ stats['sum'] += self.placements[deck]
+ stats['num'] += 1
+ except KeyError:
+ stats['label'] = self.labels[i]
+ stats['sum'] = self.placements[deck]
+ stats['num'] = 1
+
+ stats['avg'] = stats['sum'] / float(stats['num'])
+
+ for stats in sorted(cluster_placements.values(), key=lambda x: x['avg']):
+ ranking += [(stats['avg'], stats['label'])]
+
+ return ranking
+
+if __name__ == '__main__':
+ tournament_records = mlpccg.TournamentRecords()
+ clustering = Clustering(tournament_records)
+
+ for placement_avg, cluster_label in clustering.ranking():
+ print '%.1f - %s' % (placement_avg, '; '.join(list(set(map(lambda x: x.name, clustering.clusters[cluster_label])))))
+++ /dev/null
-from sklearn.cluster import AffinityPropagation
-from sklearn.feature_extraction import DictVectorizer
-from collections import defaultdict
-import csv
-
-import mlpccg
-
-class Clustering:
- def __init__(self, records):
- self.records = records
- self.decks = []
- self.features = []
- self.features_alt = []
- self.placements = {}
- self.clusters = defaultdict(list)
- self.all_ids = mlpccg.CardDb.CARDDB._by_id.keys()
-
- for record in self.records.all():
- if record['Decklist']:
- decklist = mlpccg.DeckList(name=record['Name'], url=record['Decklist'])
- self.features += [self.extract_features(decklist)]
- self.features_alt += [self.extract_features_alt(decklist)]
- self.decks += [decklist]
- self.placements[decklist] = int(record['Placement'])
-
- self.vectorizer = DictVectorizer()
- X = self.vectorizer.fit_transform(self.features).toarray()
-
- self.af = AffinityPropagation().fit(X)
- self.labels = self.af.labels_
-
- self.n_clusters = len(set(self.labels)) - (1 if -1 in self.labels else 0)
-
- for i, deck in enumerate(self.decks):
- self.clusters[self.labels[i]] += [deck]
-
- def extract_features_alt(self, deck):
- features = dict(zip(self.all_ids, [0] * len(self.all_ids)))
- for card in deck.cards:
- features[card['id'].lower()] += 1
-
- return features
-
- def extract_features(self, deck):
- return self.extract_features_alt(deck)
-
- features = {
- 'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0,
- 'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0,
- 'Power': 0.0, 'Control': 0}
-
- for card in deck.cards:
- if card['type'] == 'Friend' or card['type'] == 'Mane':
- features[card['color']] += 1
-
- features[card['type']] += 1
-
- if card.has_key('power'):
- features['Power'] += card['power']
-
- if card.has_key('control'):
- features['Control'] += card['control']
-
- features['Power'] = features['Power'] / len(deck.cards)
-
- return features
-
- def predict(self, decklist):
- X = self.vectorizer.fit_transform([self.extract_features(decklist)]).toarray()
- return self.af.predict(X)[0]
-
- def ranking(self):
- ranking = []
- cluster_placements = defaultdict(dict)
- for i, deck in enumerate(self.decks):
- stats = cluster_placements[self.labels[i]]
- try:
- stats['sum'] += self.placements[deck]
- stats['num'] += 1
- except KeyError:
- stats['label'] = self.labels[i]
- stats['sum'] = self.placements[deck]
- stats['num'] = 1
-
- stats['avg'] = stats['sum'] / float(stats['num'])
-
- for stats in sorted(cluster_placements.values(), key=lambda x: x['avg']):
- ranking += [(stats['avg'], stats['label'])]
-
- return ranking
-
-if __name__ == '__main__':
- tournament_records = mlpccg.TournamentRecords()
- clustering = Clustering(tournament_records)
-
- for placement_avg, cluster_label in clustering.ranking():
- print '%.1f - %s' % (placement_avg, '; '.join(list(set(map(lambda x: x.name, clustering.clusters[cluster_label])))))