Skip to content

Commit

Permalink
optimise collapsed_contacts by using split-apply-combine rather tha…
Browse files Browse the repository at this point in the history
…n index iteration.
  • Loading branch information
stuartmac committed Sep 27, 2017
1 parent a8fe3c4 commit 2b4890f
Showing 1 changed file with 4 additions and 10 deletions.
14 changes: 4 additions & 10 deletions prointvar/arpeggio.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,16 +500,10 @@ def collapsed_contacts(data, col_method='full'):
col_names = list(col_min)
table = table.drop(excluded, axis=1)
# aggregate results
int_types = []
for ix in table.index:
try:
agg = [k for k in col_names if k in table if bool(table.loc[ix, k])]
except ValueError:
# checking on a pre-aggregated entry (i.e. agg_method=='unique')
agg = [k for k in col_names if k in table if bool(table.loc[ix, k].any())]
int_types.append(agg)
assert len(table) == len(int_types)
table['Int_Types'] = int_types
melted = pd.melt(table.reset_index(), id_vars=['index'], value_vars=col_names, var_name='Int_Types')
melted.query('value == 1', inplace=True)
aggregated = melted.groupby(['index'])['Int_Types'].aggregate(lambda x: set(x))
table = table.join(aggregated)
# finally remove all the columns that are not needed anymore
table = table.drop(col_names, axis=1)
return table
Expand Down

0 comments on commit 2b4890f

Please sign in to comment.