x# Please enter the commit message for your changes. Lines starting
authorJess <jessicatz.fairymeadow@gmail.com>
Fri, 22 Aug 2014 13:44:38 +0000 (15:44 +0200)
committerJess <jessicatz.fairymeadow@gmail.com>
Fri, 22 Aug 2014 13:44:38 +0000 (15:44 +0200)
mlpccg/Clustering.py [new file with mode: 0644]
mlpccg/clustering.py [deleted file]

diff --git a/mlpccg/Clustering.py b/mlpccg/Clustering.py
new file mode 100644 (file)
index 0000000..e1fc959
--- /dev/null
@@ -0,0 +1,97 @@
+from sklearn.cluster import AffinityPropagation
+from sklearn.feature_extraction import DictVectorizer
+from collections import defaultdict
+import csv
+
+import mlpccg
+
+class Clustering:
+    def __init__(self, records):
+        self.records = records
+        self.decks = []
+        self.features = []
+        self.features_alt = []
+        self.placements = {}
+        self.clusters = defaultdict(list)
+        self.all_ids = mlpccg.CardDb.CARDDB._by_id.keys()
+
+        for record in self.records.all():
+            if record['Decklist']:
+                decklist = mlpccg.DeckList(name=record['Name'], url=record['Decklist'])
+                self.features += [self.extract_features(decklist)]
+                self.features_alt += [self.extract_features_alt(decklist)]
+                self.decks += [decklist]
+                self.placements[decklist] = int(record['Placement'])
+
+        self.vectorizer = DictVectorizer()
+        X = self.vectorizer.fit_transform(self.features).toarray()
+
+        self.af = AffinityPropagation().fit(X)
+        self.labels = self.af.labels_
+
+        self.n_clusters = len(set(self.labels)) - (1 if -1 in self.labels else 0)
+
+        for i, deck in enumerate(self.decks):
+            self.clusters[self.labels[i]] += [deck]
+
+    def extract_features_alt(self, deck):
+        features = dict(zip(self.all_ids, [0] * len(self.all_ids)))
+        for card in deck.cards:
+            features[card['id'].lower()] += 1
+
+        return features
+
+    def extract_features(self, deck):
+        return self.extract_features_alt(deck)
+
+        features = {
+            'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0,
+            'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0,
+            'Power': 0.0, 'Control': 0}
+
+        for card in deck.cards:
+            if card['type'] == 'Friend' or card['type'] == 'Mane':
+                features[card['color']] += 1
+
+            features[card['type']] += 1
+
+            if card.has_key('power'):
+                features['Power'] += card['power']
+
+            if card.has_key('control'):
+                features['Control'] += card['control']
+
+            features['Power'] = features['Power'] / len(deck.cards)
+
+        return features
+
+    def predict(self, decklist):
+        X = self.vectorizer.fit_transform([self.extract_features(decklist)]).toarray()
+        return self.af.predict(X)[0]
+
+    def ranking(self):
+        ranking = []
+        cluster_placements = defaultdict(dict)
+        for i, deck in enumerate(self.decks):
+            stats = cluster_placements[self.labels[i]]
+            try:
+                stats['sum'] += self.placements[deck]
+                stats['num'] += 1
+            except KeyError:
+                stats['label'] = self.labels[i]
+                stats['sum'] = self.placements[deck]
+                stats['num'] = 1
+
+            stats['avg'] = stats['sum'] / float(stats['num'])
+
+        for stats in sorted(cluster_placements.values(), key=lambda x: x['avg']):
+            ranking += [(stats['avg'], stats['label'])]
+
+        return ranking
+
+if __name__ == '__main__':
+    tournament_records = mlpccg.TournamentRecords()
+    clustering = Clustering(tournament_records)
+
+    for placement_avg, cluster_label in clustering.ranking():
+        print '%.1f - %s' % (placement_avg, '; '.join(list(set(map(lambda x: x.name, clustering.clusters[cluster_label])))))
diff --git a/mlpccg/clustering.py b/mlpccg/clustering.py
deleted file mode 100644 (file)
index e1fc959..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-from sklearn.cluster import AffinityPropagation
-from sklearn.feature_extraction import DictVectorizer
-from collections import defaultdict
-import csv
-
-import mlpccg
-
-class Clustering:
-    def __init__(self, records):
-        self.records = records
-        self.decks = []
-        self.features = []
-        self.features_alt = []
-        self.placements = {}
-        self.clusters = defaultdict(list)
-        self.all_ids = mlpccg.CardDb.CARDDB._by_id.keys()
-
-        for record in self.records.all():
-            if record['Decklist']:
-                decklist = mlpccg.DeckList(name=record['Name'], url=record['Decklist'])
-                self.features += [self.extract_features(decklist)]
-                self.features_alt += [self.extract_features_alt(decklist)]
-                self.decks += [decklist]
-                self.placements[decklist] = int(record['Placement'])
-
-        self.vectorizer = DictVectorizer()
-        X = self.vectorizer.fit_transform(self.features).toarray()
-
-        self.af = AffinityPropagation().fit(X)
-        self.labels = self.af.labels_
-
-        self.n_clusters = len(set(self.labels)) - (1 if -1 in self.labels else 0)
-
-        for i, deck in enumerate(self.decks):
-            self.clusters[self.labels[i]] += [deck]
-
-    def extract_features_alt(self, deck):
-        features = dict(zip(self.all_ids, [0] * len(self.all_ids)))
-        for card in deck.cards:
-            features[card['id'].lower()] += 1
-
-        return features
-
-    def extract_features(self, deck):
-        return self.extract_features_alt(deck)
-
-        features = {
-            'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0,
-            'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0,
-            'Power': 0.0, 'Control': 0}
-
-        for card in deck.cards:
-            if card['type'] == 'Friend' or card['type'] == 'Mane':
-                features[card['color']] += 1
-
-            features[card['type']] += 1
-
-            if card.has_key('power'):
-                features['Power'] += card['power']
-
-            if card.has_key('control'):
-                features['Control'] += card['control']
-
-            features['Power'] = features['Power'] / len(deck.cards)
-
-        return features
-
-    def predict(self, decklist):
-        X = self.vectorizer.fit_transform([self.extract_features(decklist)]).toarray()
-        return self.af.predict(X)[0]
-
-    def ranking(self):
-        ranking = []
-        cluster_placements = defaultdict(dict)
-        for i, deck in enumerate(self.decks):
-            stats = cluster_placements[self.labels[i]]
-            try:
-                stats['sum'] += self.placements[deck]
-                stats['num'] += 1
-            except KeyError:
-                stats['label'] = self.labels[i]
-                stats['sum'] = self.placements[deck]
-                stats['num'] = 1
-
-            stats['avg'] = stats['sum'] / float(stats['num'])
-
-        for stats in sorted(cluster_placements.values(), key=lambda x: x['avg']):
-            ranking += [(stats['avg'], stats['label'])]
-
-        return ranking
-
-if __name__ == '__main__':
-    tournament_records = mlpccg.TournamentRecords()
-    clustering = Clustering(tournament_records)
-
-    for placement_avg, cluster_label in clustering.ranking():
-        print '%.1f - %s' % (placement_avg, '; '.join(list(set(map(lambda x: x.name, clustering.clusters[cluster_label])))))