From: Jess Date: Fri, 22 Aug 2014 13:44:38 +0000 (+0200) Subject: x# Please enter the commit message for your changes. Lines starting X-Git-Url: https://git.yukkurigames.com/?p=mlpccg-meta.git;a=commitdiff_plain;h=8fa58d46994cbd5d4f70204d2b1b57ba4cc4a516;hp=a7a7ac1456426456a5a33ebeac28693804693716;ds=sidebyside x# Please enter the commit message for your changes. Lines starting --- diff --git a/mlpccg/Clustering.py b/mlpccg/Clustering.py new file mode 100644 index 0000000..e1fc959 --- /dev/null +++ b/mlpccg/Clustering.py @@ -0,0 +1,97 @@ +from sklearn.cluster import AffinityPropagation +from sklearn.feature_extraction import DictVectorizer +from collections import defaultdict +import csv + +import mlpccg + +class Clustering: + def __init__(self, records): + self.records = records + self.decks = [] + self.features = [] + self.features_alt = [] + self.placements = {} + self.clusters = defaultdict(list) + self.all_ids = mlpccg.CardDb.CARDDB._by_id.keys() + + for record in self.records.all(): + if record['Decklist']: + decklist = mlpccg.DeckList(name=record['Name'], url=record['Decklist']) + self.features += [self.extract_features(decklist)] + self.features_alt += [self.extract_features_alt(decklist)] + self.decks += [decklist] + self.placements[decklist] = int(record['Placement']) + + self.vectorizer = DictVectorizer() + X = self.vectorizer.fit_transform(self.features).toarray() + + self.af = AffinityPropagation().fit(X) + self.labels = self.af.labels_ + + self.n_clusters = len(set(self.labels)) - (1 if -1 in self.labels else 0) + + for i, deck in enumerate(self.decks): + self.clusters[self.labels[i]] += [deck] + + def extract_features_alt(self, deck): + features = dict(zip(self.all_ids, [0] * len(self.all_ids))) + for card in deck.cards: + features[card['id'].lower()] += 1 + + return features + + def extract_features(self, deck): + return self.extract_features_alt(deck) + + features = { + 'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0, + 'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0, + 'Power': 0.0, 'Control': 0} + + for card in deck.cards: + if card['type'] == 'Friend' or card['type'] == 'Mane': + features[card['color']] += 1 + + features[card['type']] += 1 + + if card.has_key('power'): + features['Power'] += card['power'] + + if card.has_key('control'): + features['Control'] += card['control'] + + features['Power'] = features['Power'] / len(deck.cards) + + return features + + def predict(self, decklist): + X = self.vectorizer.fit_transform([self.extract_features(decklist)]).toarray() + return self.af.predict(X)[0] + + def ranking(self): + ranking = [] + cluster_placements = defaultdict(dict) + for i, deck in enumerate(self.decks): + stats = cluster_placements[self.labels[i]] + try: + stats['sum'] += self.placements[deck] + stats['num'] += 1 + except KeyError: + stats['label'] = self.labels[i] + stats['sum'] = self.placements[deck] + stats['num'] = 1 + + stats['avg'] = stats['sum'] / float(stats['num']) + + for stats in sorted(cluster_placements.values(), key=lambda x: x['avg']): + ranking += [(stats['avg'], stats['label'])] + + return ranking + +if __name__ == '__main__': + tournament_records = mlpccg.TournamentRecords() + clustering = Clustering(tournament_records) + + for placement_avg, cluster_label in clustering.ranking(): + print '%.1f - %s' % (placement_avg, '; '.join(list(set(map(lambda x: x.name, clustering.clusters[cluster_label]))))) diff --git a/mlpccg/clustering.py b/mlpccg/clustering.py deleted file mode 100644 index e1fc959..0000000 --- a/mlpccg/clustering.py +++ /dev/null @@ -1,97 +0,0 @@ -from sklearn.cluster import AffinityPropagation -from sklearn.feature_extraction import DictVectorizer -from collections import defaultdict -import csv - -import mlpccg - -class Clustering: - def __init__(self, records): - self.records = records - self.decks = [] - self.features = [] - self.features_alt = [] - self.placements = {} - self.clusters = defaultdict(list) - self.all_ids = mlpccg.CardDb.CARDDB._by_id.keys() - - for record in self.records.all(): - if record['Decklist']: - decklist = mlpccg.DeckList(name=record['Name'], url=record['Decklist']) - self.features += [self.extract_features(decklist)] - self.features_alt += [self.extract_features_alt(decklist)] - self.decks += [decklist] - self.placements[decklist] = int(record['Placement']) - - self.vectorizer = DictVectorizer() - X = self.vectorizer.fit_transform(self.features).toarray() - - self.af = AffinityPropagation().fit(X) - self.labels = self.af.labels_ - - self.n_clusters = len(set(self.labels)) - (1 if -1 in self.labels else 0) - - for i, deck in enumerate(self.decks): - self.clusters[self.labels[i]] += [deck] - - def extract_features_alt(self, deck): - features = dict(zip(self.all_ids, [0] * len(self.all_ids))) - for card in deck.cards: - features[card['id'].lower()] += 1 - - return features - - def extract_features(self, deck): - return self.extract_features_alt(deck) - - features = { - 'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0, - 'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0, - 'Power': 0.0, 'Control': 0} - - for card in deck.cards: - if card['type'] == 'Friend' or card['type'] == 'Mane': - features[card['color']] += 1 - - features[card['type']] += 1 - - if card.has_key('power'): - features['Power'] += card['power'] - - if card.has_key('control'): - features['Control'] += card['control'] - - features['Power'] = features['Power'] / len(deck.cards) - - return features - - def predict(self, decklist): - X = self.vectorizer.fit_transform([self.extract_features(decklist)]).toarray() - return self.af.predict(X)[0] - - def ranking(self): - ranking = [] - cluster_placements = defaultdict(dict) - for i, deck in enumerate(self.decks): - stats = cluster_placements[self.labels[i]] - try: - stats['sum'] += self.placements[deck] - stats['num'] += 1 - except KeyError: - stats['label'] = self.labels[i] - stats['sum'] = self.placements[deck] - stats['num'] = 1 - - stats['avg'] = stats['sum'] / float(stats['num']) - - for stats in sorted(cluster_placements.values(), key=lambda x: x['avg']): - ranking += [(stats['avg'], stats['label'])] - - return ranking - -if __name__ == '__main__': - tournament_records = mlpccg.TournamentRecords() - clustering = Clustering(tournament_records) - - for placement_avg, cluster_label in clustering.ranking(): - print '%.1f - %s' % (placement_avg, '; '.join(list(set(map(lambda x: x.name, clustering.clusters[cluster_label])))))