+++ /dev/null
-from sklearn.cluster import AffinityPropagation
-from sklearn.feature_extraction import DictVectorizer
-from collections import defaultdict
-import csv
-
-import mlpccg
-
-class Clustering:
- def __init__(self, records):
- self.records = records
- self.decks = []
- self.features = []
- self.features_alt = []
- self.placements = {}
- self.clusters = defaultdict(list)
- self.all_ids = mlpccg.CardDb.CARDDB._by_id.keys()
-
- for record in self.records.all():
- if record['Decklist']:
- decklist = mlpccg.DeckList(name=record['Name'], url=record['Decklist'])
- self.features += [self.extract_features(decklist)]
- self.features_alt += [self.extract_features_alt(decklist)]
- self.decks += [decklist]
- self.placements[decklist] = int(record['Placement'])
-
- self.vectorizer = DictVectorizer()
- X = self.vectorizer.fit_transform(self.features).toarray()
-
- self.af = AffinityPropagation().fit(X)
- self.labels = self.af.labels_
-
- self.n_clusters = len(set(self.labels)) - (1 if -1 in self.labels else 0)
-
- for i, deck in enumerate(self.decks):
- self.clusters[self.labels[i]] += [deck]
-
- def extract_features_alt(self, deck):
- features = dict(zip(self.all_ids, [0] * len(self.all_ids)))
- for card in deck.cards:
- features[card['id'].lower()] += 1
-
- return features
-
- def extract_features(self, deck):
- return self.extract_features_alt(deck)
-
- features = {
- 'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0,
- 'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0,
- 'Power': 0.0, 'Control': 0}
-
- for card in deck.cards:
- if card['type'] == 'Friend' or card['type'] == 'Mane':
- features[card['color']] += 1
-
- features[card['type']] += 1
-
- if card.has_key('power'):
- features['Power'] += card['power']
-
- if card.has_key('control'):
- features['Control'] += card['control']
-
- features['Power'] = features['Power'] / len(deck.cards)
-
- return features
-
- def predict(self, decklist):
- X = self.vectorizer.fit_transform([self.extract_features(decklist)]).toarray()
- return self.af.predict(X)[0]
-
- def ranking(self):
- ranking = []
- cluster_placements = defaultdict(dict)
- for i, deck in enumerate(self.decks):
- stats = cluster_placements[self.labels[i]]
- try:
- stats['sum'] += self.placements[deck]
- stats['num'] += 1
- except KeyError:
- stats['label'] = self.labels[i]
- stats['sum'] = self.placements[deck]
- stats['num'] = 1
-
- stats['avg'] = stats['sum'] / float(stats['num'])
-
- for stats in sorted(cluster_placements.values(), key=lambda x: x['avg']):
- ranking += [(stats['avg'], stats['label'])]
-
- return ranking
-
-if __name__ == '__main__':
- tournament_records = mlpccg.TournamentRecords()
- clustering = Clustering(tournament_records)
-
- for placement_avg, cluster_label in clustering.ranking():
- print '%.1f - %s' % (placement_avg, '; '.join(list(set(map(lambda x: x.name, clustering.clusters[cluster_label])))))