1+ from typing import Any , Callable , Dict , List , Optional , Tuple
12import pandas as pd
23import numpy as np
34from functools import reduce
89pd .set_option ("display.max_rows" , None , "display.max_columns" , None )
910
1011
11- def _assign_dte (data ):
12+ def _assign_dte (data : pd .DataFrame ) -> pd .DataFrame :
13+ """Assign days to expiration (DTE) to the dataset."""
1214 return data .assign (dte = lambda r : (r ["expiration" ] - r ["quote_date" ]).dt .days )
1315
1416
15- def _trim (data , col , lower , upper ):
17+ def _trim (data : pd .DataFrame , col : str , lower : float , upper : float ) -> pd .DataFrame :
18+ """Filter dataframe rows where column value is between lower and upper bounds."""
1619 return data .loc [(data [col ] >= lower ) & (data [col ] <= upper )]
1720
1821
19- def _ltrim (data , col , lower ):
22+ def _ltrim (data : pd .DataFrame , col : str , lower : float ) -> pd .DataFrame :
23+ """Filter dataframe rows where column value is greater than or equal to lower bound."""
2024 return data .loc [data [col ] >= lower ]
2125
2226
23- def _rtrim (data , col , upper ):
27+ def _rtrim (data : pd .DataFrame , col : str , upper : float ) -> pd .DataFrame :
28+ """Filter dataframe rows where column value is less than or equal to upper bound."""
2429 return data .loc [data [col ] <= upper ]
2530
2631
27- def _get (data , col , val ):
32+ def _get (data : pd .DataFrame , col : str , val : Any ) -> pd .DataFrame :
33+ """Filter dataframe rows where column equals specified value."""
2834 return data .loc [data [col ] == val ]
2935
3036
31- def _remove_min_bid_ask (data , min_bid_ask ):
37+ def _remove_min_bid_ask (data : pd .DataFrame , min_bid_ask : float ) -> pd .DataFrame :
38+ """Remove options with bid or ask prices below minimum threshold."""
3239 return data .loc [(data ["bid" ] > min_bid_ask ) & (data ["ask" ] > min_bid_ask )]
3340
3441
35- def _remove_invalid_evaluated_options (data ):
42+ def _remove_invalid_evaluated_options (data : pd .DataFrame ) -> pd .DataFrame :
43+ """Keep evaluated options where entry DTE is greater than exit DTE."""
3644 return data .loc [
3745 (data ["dte_exit" ] <= data ["dte_entry" ])
3846 & (data ["dte_entry" ] != data ["dte_exit" ])
3947 ]
4048
4149
42- def _cut_options_by_dte (data , dte_interval , max_entry_dte ):
50+ def _cut_options_by_dte (
51+ data : pd .DataFrame , dte_interval : int , max_entry_dte : int
52+ ) -> pd .DataFrame :
53+ """Categorize options into DTE intervals for grouping."""
4354 dte_intervals = list (range (0 , max_entry_dte , dte_interval ))
4455 data ["dte_range" ] = pd .cut (data ["dte_entry" ], dte_intervals )
4556 return data
4657
4758
48- def _cut_options_by_otm (data , otm_pct_interval , max_otm_pct_interval ):
59+ def _cut_options_by_otm (
60+ data : pd .DataFrame , otm_pct_interval : float , max_otm_pct_interval : float
61+ ) -> pd .DataFrame :
62+ """Categorize options into out-of-the-money percentage intervals."""
4963 # consider using np.linspace in future
5064 otm_pct_intervals = [
5165 round (i , 2 )
@@ -61,7 +75,10 @@ def _cut_options_by_otm(data, otm_pct_interval, max_otm_pct_interval):
6175 return data
6276
6377
64- def _group_by_intervals (data , cols , drop_na ):
78+ def _group_by_intervals (
79+ data : pd .DataFrame , cols : List [str ], drop_na : bool
80+ ) -> pd .DataFrame :
81+ """Group options by intervals and calculate descriptive statistics."""
6582 # this is a bottleneck, try to optimize
6683 grouped_dataset = data .groupby (cols )["pct_change" ].describe ()
6784
@@ -73,8 +90,17 @@ def _group_by_intervals(data, cols, drop_na):
7390 return grouped_dataset
7491
7592
76- def _evaluate_options (data , ** kwargs ):
93+ def _evaluate_options (data : pd .DataFrame , ** kwargs : Any ) -> pd .DataFrame :
94+ """
95+ Evaluate options by filtering, merging entry and exit data, and calculating costs.
7796
97+ Args:
98+ data: DataFrame containing option chain data
99+ **kwargs: Configuration parameters including max_otm_pct, min_bid_ask, exit_dte
100+
101+ Returns:
102+ DataFrame with evaluated options including entry and exit prices
103+ """
78104 # trim option chains with strikes too far out from current price
79105 data = data .pipe (_calculate_otm_pct ).pipe (
80106 _trim ,
@@ -103,7 +129,17 @@ def _evaluate_options(data, **kwargs):
103129 )[evaluated_cols ]
104130
105131
106- def _evaluate_all_options (data , ** kwargs ):
132+ def _evaluate_all_options (data : pd .DataFrame , ** kwargs : Any ) -> pd .DataFrame :
133+ """
134+ Complete pipeline to evaluate all options with DTE and OTM percentage categorization.
135+
136+ Args:
137+ data: DataFrame containing option chain data
138+ **kwargs: Configuration parameters for evaluation and categorization
139+
140+ Returns:
141+ DataFrame with evaluated and categorized options
142+ """
107143 return (
108144 data .pipe (_assign_dte )
109145 .pipe (_trim , "dte" , kwargs ["exit_dte" ], kwargs ["max_entry_dte" ])
@@ -117,21 +153,25 @@ def _evaluate_all_options(data, **kwargs):
117153 )
118154
119155
120- def _calls (data ):
156+ def _calls (data : pd .DataFrame ) -> pd .DataFrame :
157+ """Filter dataframe for call options only."""
121158 return data [data .option_type .str .lower ().str .startswith ("c" )]
122159
123160
124- def _puts (data ):
161+ def _puts (data : pd .DataFrame ) -> pd .DataFrame :
162+ """Filter dataframe for put options only."""
125163 return data [data .option_type .str .lower ().str .startswith ("p" )]
126164
127165
128- def _calculate_otm_pct (data ):
166+ def _calculate_otm_pct (data : pd .DataFrame ) -> pd .DataFrame :
167+ """Calculate out-of-the-money percentage for each option."""
129168 return data .assign (
130169 otm_pct = lambda r : round ((r ["strike" ] - r ["underlying_price" ]) / r ["strike" ], 2 )
131170 )
132171
133172
134- def _apply_ratios (data , leg_def ):
173+ def _apply_ratios (data : pd .DataFrame , leg_def : List [Tuple ]) -> pd .DataFrame :
174+ """Apply position ratios (long/short multipliers) to entry and exit prices."""
135175 for idx in range (1 , len (leg_def ) + 1 ):
136176 entry_col = f"entry_leg{ idx } "
137177 exit_col = f"exit_leg{ idx } "
@@ -142,7 +182,10 @@ def _apply_ratios(data, leg_def):
142182 return data
143183
144184
145- def _assign_profit (data , leg_def , suffixes ):
185+ def _assign_profit (
186+ data : pd .DataFrame , leg_def : List [Tuple ], suffixes : List [str ]
187+ ) -> pd .DataFrame :
188+ """Calculate total profit/loss and percentage change for multi-leg strategies."""
146189 data = _apply_ratios (data , leg_def )
147190
148191 # determine all entry and exit columns
@@ -155,29 +198,48 @@ def _assign_profit(data, leg_def, suffixes):
155198
156199 data ["pct_change" ] = np .where (
157200 data ["total_entry_cost" ].abs () > 0 ,
158- (data ["total_exit_proceeds" ] - data ["total_entry_cost" ]) / data ["total_entry_cost" ].abs (),
159- np .nan
201+ (data ["total_exit_proceeds" ] - data ["total_entry_cost" ])
202+ / data ["total_entry_cost" ].abs (),
203+ np .nan ,
160204 )
161205
162206 return data
163207
164208
165- def _strategy_engine (data , leg_def , join_on = None , rules = None ):
209+ def _strategy_engine (
210+ data : pd .DataFrame ,
211+ leg_def : List [Tuple ],
212+ join_on : Optional [List [str ]] = None ,
213+ rules : Optional [Callable ] = None ,
214+ ) -> pd .DataFrame :
215+ """
216+ Core strategy execution engine that constructs single or multi-leg option strategies.
217+
218+ Args:
219+ data: DataFrame containing evaluated option data
220+ leg_def: List of tuples defining strategy legs (side, filter_function)
221+ join_on: Columns to join on for multi-leg strategies
222+ rules: Optional filtering rules to apply after joining legs
223+
224+ Returns:
225+ DataFrame with constructed strategy and calculated profit/loss
226+ """
166227 if len (leg_def ) == 1 :
167228 data ["pct_change" ] = np .where (
168229 data ["entry" ].abs () > 0 ,
169230 (data ["exit" ] - data ["entry" ]) / data ["entry" ].abs (),
170- np .nan
231+ np .nan ,
171232 )
172233 return leg_def [0 ][1 ](data )
173234
174- def _rule_func (d , r , ld ):
235+ def _rule_func (
236+ d : pd .DataFrame , r : Optional [Callable ], ld : List [Tuple ]
237+ ) -> pd .DataFrame :
175238 return d if r is None else r (d , ld )
176239
177240 partials = [leg [1 ](data ) for leg in leg_def ]
178241 suffixes = [f"_leg{ idx } " for idx in range (1 , len (leg_def ) + 1 )]
179242
180- # noinspection PyTypeChecker
181243 return (
182244 reduce (
183245 lambda left , right : pd .merge (
@@ -190,7 +252,17 @@ def _rule_func(d, r, ld):
190252 )
191253
192254
193- def _process_strategy (data , ** context ):
255+ def _process_strategy (data : pd .DataFrame , ** context : Any ) -> pd .DataFrame :
256+ """
257+ Main entry point for processing option strategies.
258+
259+ Args:
260+ data: DataFrame containing raw option chain data
261+ **context: Dictionary containing strategy parameters, leg definitions, and formatting options
262+
263+ Returns:
264+ DataFrame with processed strategy results
265+ """
194266 _run_checks (context ["params" ], data )
195267 return (
196268 _evaluate_all_options (
@@ -217,7 +289,24 @@ def _process_strategy(data, **context):
217289 )
218290
219291
220- def _format_output (data , params , internal_cols , external_cols ):
292+ def _format_output (
293+ data : pd .DataFrame ,
294+ params : Dict [str , Any ],
295+ internal_cols : List [str ],
296+ external_cols : List [str ],
297+ ) -> pd .DataFrame :
298+ """
299+ Format strategy output as either raw data or grouped statistics.
300+
301+ Args:
302+ data: DataFrame with strategy results
303+ params: Parameters including 'raw' and 'drop_nan' flags
304+ internal_cols: Columns to include in raw output
305+ external_cols: Columns to group by for statistics output
306+
307+ Returns:
308+ Formatted DataFrame with either raw data or descriptive statistics
309+ """
221310 if params ["raw" ]:
222311 return data [internal_cols ].reset_index (drop = True )
223312
0 commit comments