X-Git-Url: https://git.yukkurigames.com/?p=mlpccg-meta.git;a=blobdiff_plain;f=mlpccg%2Fclustering.py;fp=mlpccg%2Fclustering.py;h=0000000000000000000000000000000000000000;hp=e1fc9595146ff6b538500570cff75eec1e9111eb;hb=8fa58d46994cbd5d4f70204d2b1b57ba4cc4a516;hpb=a7a7ac1456426456a5a33ebeac28693804693716 diff --git a/mlpccg/clustering.py b/mlpccg/clustering.py deleted file mode 100644 index e1fc959..0000000 --- a/mlpccg/clustering.py +++ /dev/null @@ -1,97 +0,0 @@ -from sklearn.cluster import AffinityPropagation -from sklearn.feature_extraction import DictVectorizer -from collections import defaultdict -import csv - -import mlpccg - -class Clustering: - def __init__(self, records): - self.records = records - self.decks = [] - self.features = [] - self.features_alt = [] - self.placements = {} - self.clusters = defaultdict(list) - self.all_ids = mlpccg.CardDb.CARDDB._by_id.keys() - - for record in self.records.all(): - if record['Decklist']: - decklist = mlpccg.DeckList(name=record['Name'], url=record['Decklist']) - self.features += [self.extract_features(decklist)] - self.features_alt += [self.extract_features_alt(decklist)] - self.decks += [decklist] - self.placements[decklist] = int(record['Placement']) - - self.vectorizer = DictVectorizer() - X = self.vectorizer.fit_transform(self.features).toarray() - - self.af = AffinityPropagation().fit(X) - self.labels = self.af.labels_ - - self.n_clusters = len(set(self.labels)) - (1 if -1 in self.labels else 0) - - for i, deck in enumerate(self.decks): - self.clusters[self.labels[i]] += [deck] - - def extract_features_alt(self, deck): - features = dict(zip(self.all_ids, [0] * len(self.all_ids))) - for card in deck.cards: - features[card['id'].lower()] += 1 - - return features - - def extract_features(self, deck): - return self.extract_features_alt(deck) - - features = { - 'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0, - 'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0, - 'Power': 0.0, 'Control': 0} - - for card in deck.cards: - if card['type'] == 'Friend' or card['type'] == 'Mane': - features[card['color']] += 1 - - features[card['type']] += 1 - - if card.has_key('power'): - features['Power'] += card['power'] - - if card.has_key('control'): - features['Control'] += card['control'] - - features['Power'] = features['Power'] / len(deck.cards) - - return features - - def predict(self, decklist): - X = self.vectorizer.fit_transform([self.extract_features(decklist)]).toarray() - return self.af.predict(X)[0] - - def ranking(self): - ranking = [] - cluster_placements = defaultdict(dict) - for i, deck in enumerate(self.decks): - stats = cluster_placements[self.labels[i]] - try: - stats['sum'] += self.placements[deck] - stats['num'] += 1 - except KeyError: - stats['label'] = self.labels[i] - stats['sum'] = self.placements[deck] - stats['num'] = 1 - - stats['avg'] = stats['sum'] / float(stats['num']) - - for stats in sorted(cluster_placements.values(), key=lambda x: x['avg']): - ranking += [(stats['avg'], stats['label'])] - - return ranking - -if __name__ == '__main__': - tournament_records = mlpccg.TournamentRecords() - clustering = Clustering(tournament_records) - - for placement_avg, cluster_label in clustering.ranking(): - print '%.1f - %s' % (placement_avg, '; '.join(list(set(map(lambda x: x.name, clustering.clusters[cluster_label])))))