Predicting Short-Term Crypto Returns with Market Microstructure¶

¶

There's a market microstructure metric that predicts short-term crypto returns surprisingly well. Most quants in this space know which one.

This notebook names it, walks through the methodology, and lets you run the same analysis to surface similar signals yourself. Rolling percentile rank is a simple but effective transformation to transform an arbitrary time series into a (close to) uniform distribution signal, and can easily work as-is for simplistic backtests as well.

In [1]:
from __future__ import annotations

import datetime
import os
from datetime import timedelta

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from aperiodic import get_derivative_metrics, get_metrics, get_ohlcv
from utils._aperiodic_demo import run_position_backtest

try:
    from dotenv import load_dotenv
except ModuleNotFoundError:  # pragma: no cover
    load_dotenv = None

SYMBOL = "perpetual-BTC-USDT:USDT"
EXCHANGE = "binance-futures"
INTERVAL = "5m"
TIMESTAMP = "exchange"  # local timestamp or "true"

START_DATE = datetime.datetime.now(tz=datetime.UTC).date() - timedelta(days=365)
END_DATE = datetime.datetime.now(tz=datetime.UTC).date()

# For demonstration purposes, we'll use only the l2_imbalance metric category.
METRICS = [
    # ("basis", "derivative"),
    # ("funding", "derivative"),
    # ("open_interest", "derivative"),
    # ("flow", "regular"),
    # ("impact", "regular"),
    # ("l1_imbalance", "regular"),
    # ("l1_liquidity", "regular"),
    ("l2_imbalance", "regular"),
    # ("l2_liquidity", "regular"),
    # ("returns", "regular"),
    # ("slippage", "regular"),
    # ("trade_size", "regular"),
    # ("updownticks", "regular"),
    # ("run_structure", "regular"),
    # ("vtwap", "regular"),
    # ("range", "regular"),
]

RANK_WINDOWS = [100, 300, 600, 1200]
COST_BPS = 0.0


if load_dotenv is not None:
    load_dotenv(".env")

API_KEY = "..."  # Set via APERIODIC_API_KEY env var or .env file
if API_KEY == "...":
    API_KEY = os.getenv("APERIODIC_API_KEY", "...")
if API_KEY == "...":
    raise RuntimeError("Set APERIODIC_API_KEY in the environment or in .env.")


def get_numeric_metric_frame(metric: str, kind: str) -> pd.DataFrame | None:
    fetcher = get_derivative_metrics if kind == "derivative" else get_metrics
    raw_df = fetcher(
        api_key=API_KEY,
        metric=metric,
        timestamp=TIMESTAMP,
        interval=INTERVAL,
        exchange=EXCHANGE,
        symbol=SYMBOL,
        start_date=START_DATE,
        end_date=END_DATE,
        show_progress=True,
    )

    # Ensure it's a pandas DataFrame
    df = raw_df.to_pandas() if hasattr(raw_df, "to_pandas") else pd.DataFrame(raw_df)

    if df.empty or "time" not in df.columns:
        return None

    # Select numeric columns
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    if "time" in numeric_cols:
        numeric_cols.remove("time")

    if not numeric_cols:
        return None

    return df.sort_values("time").drop_duplicates(subset=["time"], keep="last")[
        ["time", *numeric_cols]
    ]


def build_panel() -> tuple[pd.DataFrame, list[str]]:
    raw_ohlcv = get_ohlcv(
        api_key=API_KEY,
        timestamp=TIMESTAMP,
        interval=INTERVAL,
        exchange=EXCHANGE,
        symbol=SYMBOL,
        start_date=START_DATE,
        end_date=END_DATE,
        show_progress=True,
    )

    if hasattr(raw_ohlcv, "to_pandas"):
        panel = raw_ohlcv.to_pandas()
    else:
        panel = pd.DataFrame(raw_ohlcv)

    panel = panel.sort_values("time")[["time", "close"]]

    for metric, kind in METRICS:
        frame = get_numeric_metric_frame(metric, kind)
        if frame is not None:
            panel = panel.merge(frame, on="time", how="left")

    panel = panel.sort_values("time")
    panel["fwd_ret"] = panel["close"].pct_change().shift(-1)
    panel = panel.dropna(subset=["fwd_ret"])

    feature_cols = [
        col
        for col in panel.columns
        if col not in {"time", "close", "fwd_ret"}
        and pd.api.types.is_numeric_dtype(panel[col])
    ]

    print(f"Panel built: {len(panel)} rows. Found {len(feature_cols)} features.")
    return panel, feature_cols


def make_signal(panel_df: pd.DataFrame, feature: str, window: int) -> np.ndarray:
    rank = panel_df[feature].rolling(window).rank(method="average")
    signal = ((rank - 1.0) / (window - 1)) * 2.0 - 1.0
    return signal.to_numpy().astype(np.float64)


panel, feature_cols = build_panel()

print(f"Rows: {len(panel):,}")
print(f"Features: {feature_cols}")
print(panel.head())
Panel built: 96188 rows. Found 24 features.
Rows: 96,188
Features: ['imbalance_5', 'imbalance_ratio_5', 'bid_ask_ratio_5', 'imbalance_5_avg', 'imbalance_ratio_5_avg', 'bid_ask_ratio_5_avg', 'imbalance_10', 'imbalance_ratio_10', 'bid_ask_ratio_10', 'imbalance_10_avg', 'imbalance_ratio_10_avg', 'bid_ask_ratio_10_avg', 'imbalance_20', 'imbalance_ratio_20', 'bid_ask_ratio_20', 'imbalance_20_avg', 'imbalance_ratio_20_avg', 'bid_ask_ratio_20_avg', 'imbalance_25', 'imbalance_ratio_25', 'bid_ask_ratio_25', 'imbalance_25_avg', 'imbalance_ratio_25_avg', 'bid_ask_ratio_25_avg']
                 time    close  imbalance_5  imbalance_ratio_5  \
0 2025-04-01 00:00:00  82587.1    20.623001           0.943542   
1 2025-04-01 00:05:00  82626.3     3.709000           0.417633   
2 2025-04-01 00:10:00  82437.9    17.852999           0.552058   
3 2025-04-01 00:15:00  82555.5    24.536001           0.994165   
4 2025-04-01 00:20:00  82606.0     2.914000           0.237839   

   bid_ask_ratio_5  imbalance_5_avg  imbalance_ratio_5_avg  \
0        34.424637         2.802031               0.148939   
1         2.434262        -0.286676              -0.027503   
2         3.464862        -3.064070              -0.162171   
3       341.777802         0.582902               0.062767   
4         1.624117        -1.388703              -0.066280   

   bid_ask_ratio_5_avg  imbalance_10  imbalance_ratio_10  ...  \
0            20.895187     17.103001            0.650923  ...   
1            18.488403      4.125000            0.439063  ...   
2             6.851656     18.625999            0.556133  ...   
3            23.850851     24.980001            0.992057  ...   
4            16.122465      2.699000            0.210121  ...   

   imbalance_20_avg  imbalance_ratio_20_avg  bid_ask_ratio_20_avg  \
0          2.864821                0.109483              5.351739   
1         -1.104728               -0.059669              4.767213   
2         -2.917493               -0.120507              3.166406   
3          0.693538                0.049723              5.741375   
4         -1.912992               -0.101480              3.812000   

   imbalance_25  imbalance_ratio_25  bid_ask_ratio_25  imbalance_25_avg  \
0     20.678001            0.650088          4.715724          2.519539   
1      2.983000            0.186846          1.459559         -1.429622   
2     21.452000            0.565300          3.600873         -2.667508   
3     25.752001            0.948648         37.946918          0.722933   
4      3.520000            0.228869          1.593592         -2.188586   

   imbalance_ratio_25_avg  bid_ask_ratio_25_avg   fwd_ret  
0                0.081644              4.240677  0.000475  
1               -0.065429              3.330457 -0.002280  
2               -0.099687              2.324384  0.001427  
3                0.045970              3.948175  0.000612  
4               -0.112413              2.803426 -0.000182  

[5 rows x 27 columns]
In [2]:
# Calculate forward returns, information coefficient, and backtest for each
# metric and window combination.
forward_returns = panel["fwd_ret"].to_numpy().astype(np.float64)
results = []

for feature in feature_cols:
    for window in RANK_WINDOWS:
        signal_raw = make_signal(panel, feature, window)
        mask = np.isfinite(signal_raw) & np.isfinite(forward_returns)

        print(f"Testing {feature} | window {window}: {mask.sum()} valid observations")

        fit_corr = float(np.corrcoef(signal_raw[mask], forward_returns[mask])[0, 1])

        direction = 1 if fit_corr >= 0 else -1
        bt_frame, bt_summary = run_position_backtest(
            timestamps=panel.loc[mask, "time"],
            position=np.nan_to_num(
                np.clip(signal_raw[mask] * direction, -1.0, 1.0), nan=0.0
            ),
            forward_return=forward_returns[mask],
            cost_bps_one_way=COST_BPS,
        )

        results.append(
            {
                "feature": feature,
                "window": window,
                "direction": direction,
                "fit_corr": fit_corr,
                "sharpe": bt_summary["annualized_sharpe"],
                "total_return": bt_summary["net_return_pct"],
                "max_drawdown": bt_summary["max_drawdown_pct"],
            }
        )

results_df = pd.DataFrame(results).sort_values("sharpe", ascending=False)
print(results_df)
Testing imbalance_5 | window 100: 96089 valid observations
Testing imbalance_5 | window 300: 95889 valid observations
Testing imbalance_5 | window 600: 95589 valid observations
Testing imbalance_5 | window 1200: 94989 valid observations
Testing imbalance_ratio_5 | window 100: 96089 valid observations
Testing imbalance_ratio_5 | window 300: 95889 valid observations
Testing imbalance_ratio_5 | window 600: 95589 valid observations
Testing imbalance_ratio_5 | window 1200: 94989 valid observations
Testing bid_ask_ratio_5 | window 100: 96089 valid observations
Testing bid_ask_ratio_5 | window 300: 95889 valid observations
Testing bid_ask_ratio_5 | window 600: 95589 valid observations
Testing bid_ask_ratio_5 | window 1200: 94989 valid observations
Testing imbalance_5_avg | window 100: 96089 valid observations
Testing imbalance_5_avg | window 300: 95889 valid observations
Testing imbalance_5_avg | window 600: 95589 valid observations
Testing imbalance_5_avg | window 1200: 94989 valid observations
Testing imbalance_ratio_5_avg | window 100: 96089 valid observations
Testing imbalance_ratio_5_avg | window 300: 95889 valid observations
Testing imbalance_ratio_5_avg | window 600: 95589 valid observations
Testing imbalance_ratio_5_avg | window 1200: 94989 valid observations
Testing bid_ask_ratio_5_avg | window 100: 96089 valid observations
Testing bid_ask_ratio_5_avg | window 300: 95889 valid observations
Testing bid_ask_ratio_5_avg | window 600: 95589 valid observations
Testing bid_ask_ratio_5_avg | window 1200: 94989 valid observations
Testing imbalance_10 | window 100: 96089 valid observations
Testing imbalance_10 | window 300: 95889 valid observations
Testing imbalance_10 | window 600: 95589 valid observations
Testing imbalance_10 | window 1200: 94989 valid observations
Testing imbalance_ratio_10 | window 100: 96089 valid observations
Testing imbalance_ratio_10 | window 300: 95889 valid observations
Testing imbalance_ratio_10 | window 600: 95589 valid observations
Testing imbalance_ratio_10 | window 1200: 94989 valid observations
Testing bid_ask_ratio_10 | window 100: 96089 valid observations
Testing bid_ask_ratio_10 | window 300: 95889 valid observations
Testing bid_ask_ratio_10 | window 600: 95589 valid observations
Testing bid_ask_ratio_10 | window 1200: 94989 valid observations
Testing imbalance_10_avg | window 100: 96089 valid observations
Testing imbalance_10_avg | window 300: 95889 valid observations
Testing imbalance_10_avg | window 600: 95589 valid observations
Testing imbalance_10_avg | window 1200: 94989 valid observations
Testing imbalance_ratio_10_avg | window 100: 96089 valid observations
Testing imbalance_ratio_10_avg | window 300: 95889 valid observations
Testing imbalance_ratio_10_avg | window 600: 95589 valid observations
Testing imbalance_ratio_10_avg | window 1200: 94989 valid observations
Testing bid_ask_ratio_10_avg | window 100: 96089 valid observations
Testing bid_ask_ratio_10_avg | window 300: 95889 valid observations
Testing bid_ask_ratio_10_avg | window 600: 95589 valid observations
Testing bid_ask_ratio_10_avg | window 1200: 94989 valid observations
Testing imbalance_20 | window 100: 96089 valid observations
Testing imbalance_20 | window 300: 95889 valid observations
Testing imbalance_20 | window 600: 95589 valid observations
Testing imbalance_20 | window 1200: 94989 valid observations
Testing imbalance_ratio_20 | window 100: 96089 valid observations
Testing imbalance_ratio_20 | window 300: 95889 valid observations
Testing imbalance_ratio_20 | window 600: 95589 valid observations
Testing imbalance_ratio_20 | window 1200: 94989 valid observations
Testing bid_ask_ratio_20 | window 100: 96089 valid observations
Testing bid_ask_ratio_20 | window 300: 95889 valid observations
Testing bid_ask_ratio_20 | window 600: 95589 valid observations
Testing bid_ask_ratio_20 | window 1200: 94989 valid observations
Testing imbalance_20_avg | window 100: 96089 valid observations
Testing imbalance_20_avg | window 300: 95889 valid observations
Testing imbalance_20_avg | window 600: 95589 valid observations
Testing imbalance_20_avg | window 1200: 94989 valid observations
Testing imbalance_ratio_20_avg | window 100: 96089 valid observations
Testing imbalance_ratio_20_avg | window 300: 95889 valid observations
Testing imbalance_ratio_20_avg | window 600: 95589 valid observations
Testing imbalance_ratio_20_avg | window 1200: 94989 valid observations
Testing bid_ask_ratio_20_avg | window 100: 96089 valid observations
Testing bid_ask_ratio_20_avg | window 300: 95889 valid observations
Testing bid_ask_ratio_20_avg | window 600: 95589 valid observations
Testing bid_ask_ratio_20_avg | window 1200: 94989 valid observations
Testing imbalance_25 | window 100: 96089 valid observations
Testing imbalance_25 | window 300: 95889 valid observations
Testing imbalance_25 | window 600: 95589 valid observations
Testing imbalance_25 | window 1200: 94989 valid observations
Testing imbalance_ratio_25 | window 100: 96089 valid observations
Testing imbalance_ratio_25 | window 300: 95889 valid observations
Testing imbalance_ratio_25 | window 600: 95589 valid observations
Testing imbalance_ratio_25 | window 1200: 94989 valid observations
Testing bid_ask_ratio_25 | window 100: 96089 valid observations
Testing bid_ask_ratio_25 | window 300: 95889 valid observations
Testing bid_ask_ratio_25 | window 600: 95589 valid observations
Testing bid_ask_ratio_25 | window 1200: 94989 valid observations
Testing imbalance_25_avg | window 100: 96089 valid observations
Testing imbalance_25_avg | window 300: 95889 valid observations
Testing imbalance_25_avg | window 600: 95589 valid observations
Testing imbalance_25_avg | window 1200: 94989 valid observations
Testing imbalance_ratio_25_avg | window 100: 96089 valid observations
Testing imbalance_ratio_25_avg | window 300: 95889 valid observations
Testing imbalance_ratio_25_avg | window 600: 95589 valid observations
Testing imbalance_ratio_25_avg | window 1200: 94989 valid observations
Testing bid_ask_ratio_25_avg | window 100: 96089 valid observations
Testing bid_ask_ratio_25_avg | window 300: 95889 valid observations
Testing bid_ask_ratio_25_avg | window 600: 95589 valid observations
Testing bid_ask_ratio_25_avg | window 1200: 94989 valid observations
                   feature  window  direction  fit_corr     sharpe  \
5        imbalance_ratio_5     300          1  0.033982  10.904485   
9          bid_ask_ratio_5     300          1  0.033982  10.904475   
6        imbalance_ratio_5     600          1  0.033749  10.776432   
10         bid_ask_ratio_5     600          1  0.033749  10.776427   
33        bid_ask_ratio_10     300          1  0.033457  10.738846   
..                     ...     ...        ...       ...        ...   
12         imbalance_5_avg     100          1  0.000150   0.049464   
66  imbalance_ratio_20_avg     600          1  0.000069   0.025342   
44    bid_ask_ratio_10_avg     100          1  0.000070   0.024895   
36        imbalance_10_avg     100         -1 -0.000041   0.014495   
89  imbalance_ratio_25_avg     300         -1 -0.000010   0.003945   

    total_return  max_drawdown  
5    1203.812910     -9.084661  
9    1203.809792     -9.084661  
6    1157.813790     -9.385400  
10   1157.812289     -9.385400  
33   1152.855678    -10.068121  
..           ...           ...  
12     -1.762956    -26.714597  
66     -1.746596    -25.787486  
44     -3.011929    -23.036869  
36     -2.601620    -22.727269  
89     -2.347363    -18.694687  

[96 rows x 7 columns]

Microstructure Takeaways¶

Note that all of the backtests here are net of transaction costs.

  • The metric presented here has high turnover and is most predictive on lower timeframes.
  • Market microstructure metrics can be used to enhance directional strategies and extend existing signals.
  • They can also act as regime filters to help decide when broader trend-following ideas are more or less effective.
In [3]:
best = results_df.iloc[0].to_dict()
best_feature = str(best["feature"])
best_window = int(best["window"])
best_direction = int(best["direction"])

signal = make_signal(panel, best_feature, best_window) * best_direction
mask = np.isfinite(signal) & np.isfinite(forward_returns)
bt_frame, bt_summary = run_position_backtest(
    timestamps=panel.loc[mask, "time"],
    position=np.nan_to_num(np.clip(signal[mask], -1.0, 1.0), nan=0.0),
    forward_return=forward_returns[mask],
    cost_bps_one_way=COST_BPS,
)
equity = bt_frame["equity_curve"]

print("Best Strategy found:")
print(best)

fig, axes = plt.subplots(2, 1, figsize=(14, 6), sharex=True)
axes[0].plot(panel["time"], signal, linewidth=0.9, color="tab:red")
axes[0].axhline(0.0, color="black", linewidth=0.7, alpha=0.5)
axes[0].set_title(
    f"Signal: {best_feature} | window={best_window} | dir={best_direction}"
)
axes[0].grid(alpha=0.2)

axes[1].plot(bt_frame["timestamp"], equity, linewidth=1.1, color="tab:green")
axes[1].set_title(
    f"Equity | Sharpe={best['sharpe']:.3f} | TotalRet={best['total_return']:.3f}"
)
axes[1].grid(alpha=0.2)

fig.tight_layout()
plt.show()
Best Strategy found:
{'feature': 'imbalance_ratio_5', 'window': 300, 'direction': 1, 'fit_corr': 0.033982103815005577, 'sharpe': 10.904485476515879, 'total_return': 1203.8129097285264, 'max_drawdown': -9.084660665845405}
No description has been provided for this image
In [4]:
mask = np.isfinite(signal) & np.isfinite(forward_returns)
signal_valid = signal[mask]
returns_valid = forward_returns[mask]

order = np.argsort(signal_valid)
deciles = np.empty(signal_valid.shape[0], dtype=np.int64)
deciles[order] = (np.arange(signal_valid.shape[0]) * 10 // signal_valid.shape[0]) + 1

deciles_df = pd.DataFrame(
    [
        {
            "decile": decile,
            "count": int((decile_mask := deciles == decile).sum()),
            "mean_signal": float(np.nanmean(signal_valid[decile_mask])),
            "mean_fwd_ret": float(np.nanmean(returns_valid[decile_mask])),
        }
        for decile in range(1, 11)
    ]
)

fig, ax = plt.subplots(figsize=(10, 4))
ax.bar(deciles_df["decile"], deciles_df["mean_fwd_ret"], color="tab:blue", alpha=0.85)
ax.axhline(0.0, color="black", linewidth=0.7, alpha=0.5)
ax.set_title("Mean forward return by signal decile")
ax.set_xlabel("Decile")
ax.set_ylabel("Mean next-bar return")
ax.grid(alpha=0.2, axis="y")
fig.tight_layout()
plt.show()

print(deciles_df)
No description has been provided for this image
   decile  count  mean_signal  mean_fwd_ret
0       1   9589    -0.903381     -0.000086
1       2   9589    -0.702198     -0.000064
2       3   9589    -0.501570     -0.000014
3       4   9589    -0.300450     -0.000011
4       5   9589    -0.100676     -0.000016
5       6   9589     0.100329     -0.000005
6       7   9589     0.302215      0.000013
7       8   9589     0.502918      0.000023
8       9   9589     0.703126      0.000055
9      10   9588     0.903708      0.000090

Next Steps¶

Note that all of the backtests here are net of transaction costs.

  • The metric presented here has high turnover and is most predictive on lower timeframes.
  • Market microstructure metrics can be used to enhance directional strategies and extend existing signals.
  • They can also act as regime filters to help decide when broader trend-following ideas are more or less effective.

Register at Aperiodic.io to run an interactive version of this notebook with access to all available market microstructure metrics.