1 from sklearn
.cluster
import AffinityPropagation
2 from sklearn
.feature_extraction
import DictVectorizer
3 from collections
import defaultdict
6 from mlpccg
.CardDb
import CARDDB
7 from mlpccg
.DeckList
import DeckList
10 def __init__(self
, records
):
11 self
.records
= records
14 self
.features_alt
= []
16 self
.clusters
= defaultdict(list)
17 self
.all_ids
= CARDDB
._by
_id
.keys()
19 for record
in self
.records
.all():
20 if record
['decklist_url']:
21 decklist
= DeckList(name
=record
['decklist_name'], url
=record
['decklist_url'])
22 self
.features
+= [self
.extract_features(decklist
)]
23 self
.features_alt
+= [self
.extract_features_alt(decklist
)]
24 self
.decks
+= [decklist
]
25 self
.placements
[decklist
] = int(record
['placement'])
27 self
.vectorizer
= DictVectorizer()
28 X
= self
.vectorizer
.fit_transform(self
.features
).toarray()
30 self
.af
= AffinityPropagation().fit(X
)
31 self
.labels
= self
.af
.labels_
33 self
.n_clusters
= len(set(self
.labels
)) - (1 if -1 in self
.labels
else 0)
35 for i
, deck
in enumerate(self
.decks
):
36 self
.clusters
[self
.labels
[i
]] += [deck
]
38 def extract_features_alt(self
, deck
):
39 features
= dict(zip(self
.all_ids
, [0] * len(self
.all_ids
)))
40 for card
in deck
.cards
:
41 features
[card
['id'].lower()] += 1
45 def extract_features(self
, deck
):
46 return self
.extract_features_alt(deck
)
49 'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0,
50 'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0,
51 'Power': 0.0, 'Control': 0}
53 for card
in deck
.cards
:
54 if card
['type'] == 'Friend' or card
['type'] == 'Mane':
55 features
[card
['color']] += 1
57 features
[card
['type']] += 1
59 if card
.has_key('power'):
60 features
['Power'] += card
['power']
62 if card
.has_key('control'):
63 features
['Control'] += card
['control']
65 features
['Power'] = features
['Power'] / len(deck
.cards
)
69 def predict(self
, decklist
):
70 X
= self
.vectorizer
.fit_transform([self
.extract_features(decklist
)]).toarray()
71 return self
.af
.predict(X
)[0]
75 cluster_placements
= defaultdict(dict)
76 for i
, deck
in enumerate(self
.decks
):
77 stats
= cluster_placements
[self
.labels
[i
]]
79 stats
['sum'] += self
.placements
[deck
]
82 stats
['label'] = self
.labels
[i
]
83 stats
['sum'] = self
.placements
[deck
]
86 stats
['avg'] = stats
['sum'] / float(stats
['num'])
88 for stats
in sorted(cluster_placements
.values(), key
=lambda x
: x
['avg']):
89 ranking
+= [(stats
['avg'], stats
['label'])]