Platform paper social experiment analysis: Part 2#

In this example we will work with behavioural data collected from experiments social0.2, social0.3, and social0.4, in which two mice foraged for food in the habitat with three foraging patches whose reward rates changed dynamically over time.

The experiments each consist of three periods:

“presocial”, in which each mouse was in the habitat alone for 3-4 days.
“social”, in which both mice were in the habitat together for 2 weeks.
“postsocial”, in which each mouse was in the habitat alone again for 3-4 days.

The goal of the experiments was to understand how the mice’s behaviour changes as they learn to forage for food in the habitat, and how their behaviour differs between social vs. solo settings.

The full datasets are available on the Datasets page but for the purpose of this example, we will be using the precomputed Platform paper social analysis datasets.

Set up environment#

Create and activate a virtual environment named social-analysis using uv.

uv venv aeon-social-analysis --python ">=3.11" 
source aeon-social-analysis/bin/activate   # Unix
.\aeon-social-analysis\Scripts\activate   # Windows

Install the required ssm package and its dependencies.

uv pip install matplotlib numpy pandas plotly seaborn statsmodels pyyaml pyarrow tqdm scipy jupyter

Import libraries and define variables and helper functions#

"""Notebook settings and imports"""

from pathlib import Path
from warnings import warn

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly
import seaborn as sns
from scipy import stats
from scipy.ndimage import uniform_filter1d
from tqdm.auto import tqdm

Show code cell source

Hide code cell source

# Plot settings
sns.set_style("whitegrid")
plt.rcParams["axes.titlesize"] = 20
plt.rcParams["axes.labelsize"] = 18
plt.rcParams["xtick.labelsize"] = 15
plt.rcParams["ytick.labelsize"] = 15
plt.rcParams["legend.title_fontsize"] = 15
plt.rcParams["legend.fontsize"] = 14

# Constants
cm2px = 5.2  # 1 cm = 5.2 px roughly in aeon arenas
light_off, light_on = 7, 20  # 7am to 8pm

subject_colors = plotly.colors.qualitative.Dark24
patch_colors = plotly.colors.qualitative.Light24
patch_markers = [
    "circle",
    "bowtie",
    "square",
    "hourglass",
    "diamond",
    "cross",
    "x",
    "triangle",
    "star",
]
patch_markers_symbols = ["●", "⧓", "■", "⧗", "♦", "✖", "×", "▲", "★"]
patch_markers_dict = dict(zip(patch_markers, patch_markers_symbols, strict=False))
patch_markers_linestyles = ["solid", "dash", "dot", "dashdot", "longdashdot"]
subject_markers_linestyles = patch_markers_linestyles.copy()
patch_type_mean_map = {100: "l", 300: "m", 500: "h", 200: "l", 600: "m", 1000: "h"}
patch_type_rate_map = {
    0.01: "l",
    0.0033: "m",
    0.002: "h",
    0.005: "l",
    0.00167: "m",
    0.001: "h",
}

experiments = [
    {
        "name": "social0.2-aeon3",
        "presocial_start": "2024-01-31 11:00:00",
        "presocial_end": "2024-02-08 15:00:00",
        "social_start": "2024-02-09 16:00:00",
        "social_end": "2024-02-23 13:00:00",
        "postsocial_start": "2024-02-25 17:00:00",
        "postsocial_end": "2024-03-02 14:00:00",
    },
    {
        "name": "social0.2-aeon4",
        "presocial_start": "2024-01-31 11:00:00",
        "presocial_end": "2024-02-08 15:00:00",
        "social_start": "2024-02-09 17:00:00",
        "social_end": "2024-02-23 12:00:00",
        "postsocial_start": "2024-02-25 18:00:00",
        "postsocial_end": "2024-03-02 13:00:00",
    },
    {
        "name": "social0.3-aeon3",
        "presocial_start": "2024-06-08 19:00:00",
        "presocial_end": "2024-06-17 13:00:00",
        "social_start": "2024-06-25 11:00:00",
        "social_end": "2024-07-06 13:00:00",
        "postsocial_start": "2024-07-07 16:00:00",
        "postsocial_end": "2024-07-14 14:00:00",
    },
    {
        "name": "social0.3-aeon4",
    },
    {
        "name": "social0.4-aeon3",
        "presocial_start": "2024-08-16 17:00:00",
        "presocial_end": "2024-08-24 10:00:00",
        "social_start": "2024-08-28 11:00:00",
        "social_end": "2024-09-09 13:00:00",
        "postsocial_start": "2024-09-09 18:00:00",
        "postsocial_end": "2024-09-22 16:00:00",
    },
    {
        "name": "social0.4-aeon4",
        "presocial_start": "2024-08-16 15:00:00",
        "presocial_end": "2024-08-24 10:00:00",
        "social_start": "2024-08-28 10:00:00",
        "social_end": "2024-09-09 01:00:00",
        "postsocial_start": "2024-09-09 15:00:00",
        "postsocial_end": "2024-09-22 16:00:00",
    },
]

periods = ["social", "postsocial"]

# Define the possible combos of social and light
combos = [
    (True, True),  # Social + Light
    (True, False),  # Social + Dark
    (False, True),  # Solo + Light
    (False, False),  # Solo + Dark
]

# Define colors based on light condition (light=blue, dark=orange)
colors = {
    True: "#1f77b4",  # Blue for light conditions
    False: "#ff7f0e",  # Orange for dark conditions
}

# Define hatch patterns based on social condition
hatches = {
    True: "///",  # Hatched pattern for social
    False: None,  # No pattern (solid) for solo
}

labels = ["Social-Light", "Social-Dark", "Solo-Light", "Solo-Dark"]

Show code cell source

Hide code cell source

def load_data_from_parquet(
    experiment_name: str | None,
    period: str | None,
    data_type: str,
    data_dir: Path,
    set_time_index: bool = False,
) -> pd.DataFrame:
    """Loads saved data from parquet files.

    Args:
        experiment_name (str, optional): Filter by experiment name. If None, load all experiments.
        period (str, optional): Filter by period (presocial, social, postsocial). If None, load all periods.
        data_type (str): Type of data to load (position, patch, foraging, rfid, sleep, explore)
        data_dir (Path): Directory containing parquet files.
        set_time_index (bool, optional): If True, set 'time' column as DataFrame index.

    Returns:
        pd.DataFrame: Combined DataFrame of all matching parquet files.
    """
    if not data_dir.exists():
        print(f"Directory {data_dir} does not exist. No data files found.")
        return pd.DataFrame()

    # Create pattern based on filters
    pattern = ""
    if experiment_name:
        pattern += f"{experiment_name}_"
    else:
        pattern += "*_"

    if period:
        pattern += f"{period}_"
    else:
        pattern += "*_"

    pattern += f"{data_type}.parquet"

    # Find matching files
    matching_files = list(data_dir.glob(pattern))

    if not matching_files:
        print(f"No matching data files found with pattern: {pattern}")
        return pd.DataFrame()

    print(f"Found {len(matching_files)} matching files")

    # Load and concatenate matching files
    dfs = []
    total_rows = 0
    for file in matching_files:
        print(f"Loading {file}...")
        df = pd.read_parquet(file)
        total_rows += len(df)
        dfs.append(df)
        print(f"  Loaded {len(df)} rows")

    # Combine data
    if dfs:
        combined_df = pd.concat(dfs, ignore_index=True)
        if set_time_index and "time" in combined_df.columns:
            combined_df = combined_df.set_index("time")
        print(f"Combined data: {len(combined_df)} rows")
        return combined_df
    else:
        return pd.DataFrame()


def load_experiment_data(
    data_dir: Path,
    experiment: dict | None = None,
    periods: list | None = None,
    data_types: list[str] = ["rfid", "position"],
    trim_days: int | None = None,
) -> dict:
    """Load all data types for specified periods of an experiment.

    Parameters:
    - experiment: experiment dict with period start/end times
    - periods: list of periods to load
    - data_types: list of data types to load
    - data_dir: directory containing data files
    - trim_days: Optional number of days to trim from start (None = no trim)

    Returns:
    - Dictionary containing dataframes for each period/data type combination
    """
    result = {}

    if periods is None:
        periods = [None]

    for period in periods:
        for data_type in data_types:
            print(f"Loading {period} {data_type} data...")

            # Load data
            experiment_name = experiment["name"] if experiment is not None else None
            df = load_data_from_parquet(
                experiment_name=experiment_name,
                period=period,
                data_type=data_type,
                data_dir=data_dir,
                set_time_index=(data_type == "position"),
            )

            # Trim if requested
            if trim_days is not None and len(df) > 0:
                if data_type == "rfid":
                    start_time = df["chunk_start"].min()
                    end_time = start_time + pd.Timedelta(days=trim_days)
                    df = df[df["chunk_start"] < end_time]
                if data_type == "foraging":
                    start_time = df["start"].min()
                    end_time = start_time + pd.Timedelta(days=trim_days)
                    df = df[df["start"] < end_time]
                if data_type == "position":
                    start_time = df.index.min()
                    end_time = start_time + pd.Timedelta(days=trim_days)
                    df = df.loc[df.index < end_time]

                print(f"  Trimmed to {trim_days} days: {len(df)} records")

            # Store in result
            key = f"{period}_{data_type}"
            result[key] = df

            # For position data, handle duplicates
            if data_type == "position" and len(df) > 0:
                original_len = len(df)
                df = df.reset_index()
                df = df.drop_duplicates(subset=["time", "identity_name"])
                df = df.set_index("time")
                result[key] = df
                if len(df) < original_len:
                    print(f"  Removed duplicates: {original_len} -> {len(df)}")

    return result

Note

Change data_dir and save_dir to the paths where your local dataset (the parquet files) is stored and where you want to save the results.

# SET THESE VARIABLES ACCORDINGLY
data_dir = Path("")
save_dir = Path("")

Solo vs. Social Behaviours#

Exploring#

Distance travelled#

# Final df:
# rows = hour-datetime,
# columns = distance, exp, social-bool, subject, light-bool

dist_trav_hour_df = pd.DataFrame(
    columns=["hour", "distance", "exp", "social", "subject", "light"]
)

# For each period
# Load pos data
# Split into individual dfs
# If social, excise swaps
# Smooth down to 1s
# Calculate hour-by-hour distance traveled, and put into final df

exp_pbar = tqdm(experiments, desc="Experiments", position=0, leave=True)
for exp in exp_pbar:
    period_pbar = tqdm(periods, desc="Periods", position=1, leave=True)
    for period in period_pbar:
        pos_df = load_data_from_parquet(
            experiment_name=exp["name"],
            period=period,
            data_type="position",
            data_dir=data_dir,
            set_time_index=True,
        )
        for subject in pos_df["identity_name"].unique():
            pos_df_subj = pos_df[pos_df["identity_name"] == subject]
            pos_df_subj = pos_df_subj.resample("200ms").first().dropna(subset=["x"])
            pos_df_subj[["x", "y"]] = pos_df_subj[["x", "y"]].rolling("1s").mean()
            pos_df_subj = pos_df_subj.resample("1s").first().dropna(subset=["x"])
            pos_df_subj["distance"] = np.sqrt(
                (pos_df_subj["x"].diff() ** 2) + (pos_df_subj["y"].diff() ** 2)
            )
            pos_df_subj.at[pos_df_subj.index[0], "distance"] = 0
            pos_df_subj["distance"] /= cm2px * 100  # convert to m
            pos_df_subj["hour"] = pos_df_subj.index.floor("h")
            pos_df_subj_hour = (
                pos_df_subj.groupby("hour")["distance"].sum().reset_index()
            )
            pos_df_subj_hour["exp"] = exp["name"]
            pos_df_subj_hour["social"] = period == "social"
            pos_df_subj_hour["subject"] = subject
            hour = pos_df_subj_hour["hour"].dt.hour
            pos_df_subj_hour["light"] = ~((hour > light_off) & (hour < light_on))
            dist_trav_hour_df = pd.concat(
                [dist_trav_hour_df, pos_df_subj_hour], ignore_index=True
            )

# # Save as parquet
# dist_trav_hour_df.to_parquet(
#     data_dir / "for_plots" / "dist_trav_hour_df.parquet",
#     engine="pyarrow",
#     compression="snappy",
#     index=False,
# )

# Load the parquet file
dist_trav_hour_df = pd.read_parquet(
    data_dir / "for_plots" / "dist_trav_hour_df.parquet",
    engine="pyarrow",
)

display(dist_trav_hour_df)

	hour	distance	exp	social	subject	light
0	2024-01-31 11:00:00	214.375787	social0.2-aeon3	False	BAA-1104045	False
1	2024-01-31 12:00:00	358.672416	social0.2-aeon3	False	BAA-1104045	False
2	2024-01-31 13:00:00	301.952548	social0.2-aeon3	False	BAA-1104045	False
3	2024-01-31 14:00:00	284.154738	social0.2-aeon3	False	BAA-1104045	False
4	2024-01-31 15:00:00	420.268372	social0.2-aeon3	False	BAA-1104045	False
...	...	...	...	...	...	...
4840	2024-09-22 13:00:00	263.921865	social0.4-aeon4	False	BAA-1104797	False
4841	2024-09-22 14:00:00	316.511526	social0.4-aeon4	False	BAA-1104797	False
4842	2024-09-22 15:00:00	281.001766	social0.4-aeon4	False	BAA-1104797	False
4843	2024-09-22 16:00:00	171.733688	social0.4-aeon4	False	BAA-1104797	False
4844	2024-09-22 17:00:00	0.000905	social0.4-aeon4	False	BAA-1104797	False

4845 rows × 6 columns

"""Hists."""

fig, ax = plt.subplots(figsize=(14, 8))

# Plot histograms for each combination
for i, (social_val, light_val) in enumerate(combos):
    # Filter data for this combination
    subset = dist_trav_hour_df[
        (dist_trav_hour_df["social"] == social_val)
        & (dist_trav_hour_df["light"] == light_val)
    ]
    # Plot normalized histogram
    hist = sns.histplot(
        data=subset,
        x="distance",
        stat="probability",  # This normalizes the histogram
        alpha=0.5,
        color=colors[light_val],
        label=labels[i],
        # kde=True,  # Add kernel density estimate
        common_norm=False,  # Ensure each histogram is normalized separately
        axes=ax,
        binwidth=20,
    )

    # Set hatch pattern for bars
    if hatches[social_val]:
        for bar in hist.patches:
            bar.set_hatch(hatches[social_val])

ax.set_title(
    "Normalized Distance Traveled Distributions by Social and Light Conditions"
)
ax.set_xlabel("Distance Traveled (m / h)")
ax.set_ylabel("Probability")
ax.legend(title="Conditions")

ax.set_ylim(0, 0.2)

(0.0, 0.2)

../../_images/9cdba9fd9fb2b1d02544e77933dd189d4204130bc0188a65841bf058e3c064e5.png

"""Bars."""

fig, ax = plt.subplots(figsize=(14, 8))

summary_data = []
for social_val in [True, False]:
    for light_val in [True, False]:
        subset = dist_trav_hour_df[
            (dist_trav_hour_df["social"] == social_val)
            & (dist_trav_hour_df["light"] == light_val)
        ]
        mean_dist = subset["distance"].mean()
        sem_dist = subset["distance"].sem()
        n_samples = len(subset)
        summary_data.append(
            {
                "social": social_val,
                "light": light_val,
                "mean_distance": mean_dist,
                "sem": sem_dist,
                "condition": (
                    f"{'Social' if social_val else 'Solo'}-"
                    f"{'Light' if light_val else 'Dark'}",
                ),
                "n": n_samples,
            }
        )
summary_df = pd.DataFrame(summary_data)

# Set up positions for the bars
bar_width = 0.5
x_pos = np.array([0.25, 2.25, 0.75, 2.75])  # create two groups with a gap in the middle

# Plot bars
for i, row in enumerate(summary_data):
    pos = x_pos[i]
    social_val = row["social"]
    light_val = row["light"]

    bar = ax.bar(
        pos,
        row["mean_distance"],
        bar_width,
        yerr=row["sem"],
        color=colors[light_val],
        edgecolor="black",
        capsize=7,
        label=row["condition"],
    )

    print(
        f"Plotting {row['condition']}: mean={row['mean_distance']:.2f}, sem={row['sem']:.2f}, n={row['n']}"
    )

    # Apply hatching for social conditions
    if hatches[social_val]:
        bar[0].set_hatch(hatches[social_val])

    # Add sample size as text above each bar
    sample_size_txt = ax.text(
        pos,
        row["mean_distance"] + row["sem"] + 5,
        f"n={row['n']}",
        ha="center",
        va="bottom",
    )
    sample_size_txt.set_fontsize(11)

ax.set_ylabel("Mean Distance Traveled (m / h)")
ax.set_xticks(x_pos)
ax.set_xticklabels(["Social\nLight", "Social\nDark", "Solo\nLight", "Solo\nDark"])
ax.set_title("Mean Distance Traveled by Social and Light Conditions")
ax.legend(title="Conditions", loc="upper left")
ax.xaxis.grid(False)

# Add stats tests

light_social = dist_trav_hour_df[
    (dist_trav_hour_df["social"] == True) & (dist_trav_hour_df["light"] == True)
]["distance"]
light_solo = dist_trav_hour_df[
    (dist_trav_hour_df["social"] == False) & (dist_trav_hour_df["light"] == True)
]["distance"]

dark_social = dist_trav_hour_df[
    (dist_trav_hour_df["social"] == True) & (dist_trav_hour_df["light"] == False)
]["distance"]
dark_solo = dist_trav_hour_df[
    (dist_trav_hour_df["social"] == False) & (dist_trav_hour_df["light"] == False)
]["distance"]

light_stat, light_p = stats.ttest_ind(
    light_social, light_solo, alternative="two-sided", equal_var=False
)
dark_stat, dark_p = stats.ttest_ind(
    dark_social, dark_solo, alternative="two-sided", equal_var=False
)

test_text = (
    f"Two-sample t-tests:\n"
    f"Light conditions: p = {light_p:.2e}\nDark conditions: p = {dark_p:.2e}"
)
props = dict(boxstyle="round,pad=0.3", facecolor="lightgray", alpha=0.8)
ax.text(
    0.02,
    0.68,  # Position below the legend
    test_text,
    transform=ax.transAxes,
    fontsize=10,
    verticalalignment="top",
    bbox=props,
)

Plotting ('Social-Light',): mean=65.89, sem=2.22, n=1666
Plotting ('Social-Dark',): mean=283.67, sem=3.74, n=1406
Plotting ('Solo-Light',): mean=22.04, sem=1.20, n=949
Plotting ('Solo-Dark',): mean=135.10, sem=3.63, n=824

Text(0.02, 0.68, 'Two-sample t-tests:\nLight conditions: p = 9.89e-64\nDark conditions: p = 2.04e-151')

../../_images/5f31daea637562e18006f2f45bb8dfea3f8b011f8dcbf0e0bd0629bb2d528bb0.png

Bouts#

# Final df:
# rows = hour-datetime,
# columns = n_bouts, exp, social-bool, subject, light-bool

explore_hour_df = pd.DataFrame(
    columns=["hour", "n_bouts", "exp", "social", "subject", "light"]
)
explore_dur_df = pd.DataFrame(
    columns=["start", "duration", "exp", "social", "subject", "light"]
)

exp_pbar = tqdm(experiments, desc="Experiments", position=0, leave=True)
for exp in exp_pbar:
    period_pbar = tqdm(periods, desc="Periods", position=1, leave=False)
    for period in period_pbar:
        explore_bouts_df = load_data_from_parquet(
            experiment_name=exp["name"],
            period=period,
            data_type="explore",
            data_dir=data_dir,
            set_time_index=True,
        )
        for subject in explore_bouts_df["subject"].unique():
            explore_df_subj = explore_bouts_df[explore_bouts_df["subject"] == subject]
            explore_df_subj["hour"] = explore_df_subj["start"].dt.floor("h")
            min_hour, max_hour = (
                explore_df_subj["hour"].min(),
                explore_df_subj["hour"].max(),
            )
            complete_hours = pd.DataFrame(
                {"hour": pd.date_range(start=min_hour, end=max_hour, freq="h")}
            )
            hour_counts = (
                explore_df_subj.groupby("hour").size().reset_index(name="n_bouts")
            )
            explore_df_subj_hour = pd.merge(
                complete_hours, hour_counts, on="hour", how="left"
            ).fillna(0)
            explore_df_subj_hour["n_bouts"] = explore_df_subj_hour["n_bouts"].astype(
                int
            )
            explore_df_subj_hour["exp"] = exp["name"]
            explore_df_subj_hour["social"] = period == "social"
            explore_df_subj_hour["subject"] = subject
            hour = explore_df_subj_hour["hour"].dt.hour
            explore_df_subj_hour["light"] = ~((hour > light_off) & (hour < light_on))
            explore_hour_df = pd.concat(
                [explore_hour_df, explore_df_subj_hour], ignore_index=True
            )

            explore_dur_subj = explore_df_subj[["start", "duration"]].copy()
            explore_dur_subj["exp"] = exp["name"]
            explore_dur_subj["social"] = period == "social"
            explore_dur_subj["subject"] = subject
            hour = explore_dur_subj["start"].dt.hour
            explore_dur_subj["light"] = ~((hour > light_off) & (hour < light_on))
            explore_dur_df = pd.concat(
                [explore_dur_df, explore_dur_subj], ignore_index=True
            )

explore_dur_df["duration"] = explore_dur_df["duration"].dt.total_seconds() / 60
explore_dur_df = explore_dur_df[explore_dur_df["duration"] < 120]

"""Plot hist of bouts per hour"""

fig, ax = plt.subplots(figsize=(14, 8))

# Plot histograms for each combination
for i, (social_val, light_val) in enumerate(combos):
    subset = explore_hour_df[
        (explore_hour_df["social"] == social_val)
        & (explore_hour_df["light"] == light_val)
    ]
    # Plot normalized histogram
    hist = sns.histplot(
        data=subset,
        x="n_bouts",
        stat="probability",
        alpha=0.5,
        color=colors[light_val],
        label=labels[i],
        common_norm=False,  # Ensure each histogram is normalized separately
        axes=ax,
        binwidth=1,
    )

    # Set hatch pattern for bars
    if hatches[social_val]:
        for bar in hist.patches:
            bar.set_hatch(hatches[social_val])

ax.set_title("Normalized Exploration Bout Distributions by Social and Light Conditions")
ax.set_xlabel("Number of bouts / hour")
ax.set_ylabel("Probability")
ax.legend(title="Conditions")

ax.set_xticks(np.arange(0, 15, 2))
ax.set_xlim(0, 15)

(0.0, 15.0)

../../_images/1cc02d89525d2a1000b7946ef60defbdf7561de7474f3bdb026457bb20b1b431.png

"""Plot bars of bouts per hour"""

fig, ax = plt.subplots(figsize=(14, 8))

summary_data = []
for social_val in [True, False]:
    for light_val in [True, False]:
        subset = explore_hour_df[
            (explore_hour_df["social"] == social_val)
            & (explore_hour_df["light"] == light_val)
        ]
        mean_n_bouts = subset["n_bouts"].mean()
        sem_n_bouts = subset["n_bouts"].sem()
        n_samples = len(subset)
        summary_data.append(
            {
                "social": social_val,
                "light": light_val,
                "mean_n_bouts": mean_n_bouts,
                "sem": sem_n_bouts,
                "condition": f"{'Social' if social_val else 'Solo'}-{'Light' if light_val else 'Dark'}",
                "n": n_samples,
            }
        )
summary_df = pd.DataFrame(summary_data)

# Set up positions for the bars
bar_width = 0.5
x_pos = np.array([0.25, 2.25, 0.75, 2.75])  # create two groups with a gap in the middle

# Plot bars
for i, row in enumerate(summary_data):
    pos = x_pos[i]
    social_val = row["social"]
    light_val = row["light"]

    bar = ax.bar(
        pos,
        row["mean_n_bouts"],
        bar_width,
        yerr=row["sem"],
        color=colors[light_val],
        edgecolor="black",
        capsize=7,
        label=row["condition"],
    )

    # Apply hatching for social conditions
    if hatches[social_val]:
        bar[0].set_hatch(hatches[social_val])

    # Add sample size as text above each bar
    sample_size_txt = ax.text(
        pos,
        row["mean_n_bouts"] + row["sem"] + 0.1,
        f"n={row['n']}",
        ha="center",
        va="bottom",
    )
    sample_size_txt.set_fontsize(11)

ax.set_title("Mean Number of Exploration Bouts by Social and Light Conditions")
ax.set_ylabel("Number of bouts / hour")
ax.set_xticks(x_pos)
ax.set_xticklabels(["Social\nLight", "Social\nDark", "Solo\nLight", "Solo\nDark"])
ax.legend(title="Conditions", loc="upper left")
ax.xaxis.grid(False)

# Perform Wilcoxon rank sum tests (Mann-Whitney U)
light_social = explore_hour_df[
    (explore_hour_df["social"] == True) & (explore_hour_df["light"] == True)
]["n_bouts"]
light_solo = explore_hour_df[
    (explore_hour_df["social"] == False) & (explore_hour_df["light"] == True)
]["n_bouts"]

dark_social = explore_hour_df[
    (explore_hour_df["social"] == True) & (explore_hour_df["light"] == False)
]["n_bouts"]
dark_solo = explore_hour_df[
    (explore_hour_df["social"] == False) & (explore_hour_df["light"] == False)
]["n_bouts"]

light_social = pd.to_numeric(light_social, errors="coerce").dropna()
light_solo = pd.to_numeric(light_solo, errors="coerce").dropna()
dark_social = pd.to_numeric(dark_social, errors="coerce").dropna()
dark_solo = pd.to_numeric(dark_solo, errors="coerce").dropna()

light_stat, light_p = stats.ttest_ind(
    light_social, light_solo, alternative="two-sided", equal_var=False
)
dark_stat, dark_p = stats.ttest_ind(
    dark_social, dark_solo, alternative="two-sided", equal_var=False
)

test_text = (
    f"Two-sample t-tests:\n"
    f"Light conditions: p = {light_p:.2e}\nDark conditions: p = {dark_p:.2e}"
)
props = dict(boxstyle="round,pad=0.3", facecolor="lightgray", alpha=0.8)
ax.text(
    0.02,
    0.68,  # Position below the legend
    test_text,
    transform=ax.transAxes,
    fontsize=10,
    verticalalignment="top",
    bbox=props,
)

Text(0.02, 0.68, 'Two-sample t-tests:\nLight conditions: p = 1.31e-15\nDark conditions: p = 3.01e-10')

../../_images/9cb72837d9fd9cf0b733b4807ee808eda37561a54271d0de44f7eb82685020c3.png

"""Plot hist of durations of bouts."""

fig, ax = plt.subplots(figsize=(14, 8))

# Plot histograms for each combination
for i, (social_val, light_val) in enumerate(combos):
    subset = explore_dur_df[
        (explore_dur_df["social"] == social_val)
        & (explore_dur_df["light"] == light_val)
    ]
    # Plot normalized histogram
    hist = sns.histplot(
        data=subset,
        x="duration",
        stat="probability",
        alpha=0.5,
        color=colors[light_val],
        label=labels[i],
        # kde=True,  # Add kernel density estimate
        common_norm=False,  # Ensure each histogram is normalized separately
        axes=ax,
        binwidth=2,
    )

    # Set hatch pattern for bars
    if hatches[social_val]:
        for bar in hist.patches:
            bar.set_hatch(hatches[social_val])

ax.set_title(
    "Normalized Exploration Bout Duration Distributions by Social and Light Conditions"
)
ax.set_xlabel("Duration (mins)")
ax.set_ylabel("Probability")
ax.legend(title="Conditions")

ax.set_ylim(0, 0.3)

(0.0, 0.3)

../../_images/e5f97261c2d651abc9633c56f90c5ca12adbb535acebf4dae3dceda42cfdf71f.png

"""Plot bars of durations of bouts."""

fig, ax = plt.subplots(figsize=(14, 8))

summary_data = []
for social_val in [True, False]:
    for light_val in [True, False]:
        subset = explore_dur_df[
            (explore_dur_df["social"] == social_val)
            & (explore_dur_df["light"] == light_val)
        ]
        mean_duration = subset["duration"].mean()
        sem_duration = subset["duration"].sem()
        n_samples = len(subset)
        summary_data.append(
            {
                "social": social_val,
                "light": light_val,
                "mean_duration": mean_duration,
                "sem": sem_duration,
                "condition": f"{'Social' if social_val else 'Solo'}-{'Light' if light_val else 'Dark'}",
                "n": n_samples,
            }
        )
summary_df = pd.DataFrame(summary_data)

# Set up positions for the bars
bar_width = 0.5
x_pos = np.array([0.25, 2.25, 0.75, 2.75])  # create two groups with a gap in the middle

# Plot bars
for i, row in enumerate(summary_data):
    pos = x_pos[i]
    social_val = row["social"]
    light_val = row["light"]

    bar = ax.bar(
        pos,
        row["mean_duration"],
        bar_width,
        yerr=row["sem"],
        color=colors[light_val],
        edgecolor="black",
        capsize=7,
        label=row["condition"],
    )

    # Apply hatching for social conditions
    if hatches[social_val]:
        bar[0].set_hatch(hatches[social_val])

    # Add sample size as text above each bar
    sample_size_txt = ax.text(
        pos,
        row["mean_duration"] + row["sem"] + 0.2,
        f"n={row['n']}",
        ha="center",
        va="bottom",
    )
    sample_size_txt.set_fontsize(11)

ax.set_title("Mean Exploration Bout Duration by Social and Light Conditions")
ax.set_ylabel("Duration (minutes)")
ax.set_xticks(x_pos)
ax.set_xticklabels(["Social\nLight", "Social\nDark", "Solo\nLight", "Solo\nDark"])
ax.legend(title="Conditions", loc="upper left")
ax.xaxis.grid(False)

# Perform Wilcoxon rank sum tests (Mann-Whitney U)
light_social = explore_dur_df[
    (explore_dur_df["social"] == True) & (explore_dur_df["light"] == True)
]["duration"]
light_solo = explore_dur_df[
    (explore_dur_df["social"] == False) & (explore_dur_df["light"] == True)
]["duration"]

dark_social = explore_dur_df[
    (explore_dur_df["social"] == True) & (explore_dur_df["light"] == False)
]["duration"]
dark_solo = explore_dur_df[
    (explore_dur_df["social"] == False) & (explore_dur_df["light"] == False)
]["duration"]

light_social = pd.to_numeric(light_social, errors="coerce").dropna()
light_solo = pd.to_numeric(light_solo, errors="coerce").dropna()
dark_social = pd.to_numeric(dark_social, errors="coerce").dropna()
dark_solo = pd.to_numeric(dark_solo, errors="coerce").dropna()

light_stat, light_p = stats.ttest_ind(
    light_social, light_solo, alternative="two-sided", equal_var=False
)
dark_stat, dark_p = stats.ttest_ind(
    dark_social, dark_solo, alternative="two-sided", equal_var=False
)

test_text = (
    f"Two-sample t-tests:\n"
    f"Light conditions: p = {light_p:.2e}\nDark conditions: p = {dark_p:.2e}"
)
props = dict(boxstyle="round,pad=0.3", facecolor="lightgray", alpha=0.8)
ax.text(
    0.02,
    0.68,  # Position below the legend
    test_text,
    transform=ax.transAxes,
    fontsize=10,
    verticalalignment="top",
    bbox=props,
)

Text(0.02, 0.68, 'Two-sample t-tests:\nLight conditions: p = 1.79e-03\nDark conditions: p = 3.03e-08')

../../_images/6452e233965aada91ee83dda3568b424400f038549b74e06489fef1a1e5e2468.png

"""Plot hist of times of bouts over all hours."""

fig, ax = plt.subplots(figsize=(14, 8))

for i, social_val in enumerate([True, False]):
    subset = explore_dur_df[(explore_dur_df["social"] == social_val)]

    # Create the histogram
    hist = sns.histplot(
        data=subset,
        x=subset["start"].dt.hour,
        stat="probability",  # Normalize to show probability
        alpha=0.5,
        color="teal",
        label="Social" if social_val else "Solo",
        common_norm=False,  # Each condition normalized separately
        ax=ax,
        bins=24,  # 24 hours
        discrete=True,  # Since hours are discrete values
    )

    # Apply hatching pattern for social conditions
    if hatches[social_val]:
        # Apply the hatch pattern to each bar
        for patch in hist.patches:
            patch.set_hatch(hatches[social_val])

# Set x-tick labels for every hour
ax.set_xticks(range(0, 24))
ax.set_xticklabels([f"{h:02d}:00" for h in range(0, 24)], rotation=45)

# Customize axis labels and title
ax.set_title("Distribution of Exploration Bouts Throughout the Day")
ax.set_xlabel("Hour of Day")
ax.set_ylabel("Probability")
ax.legend(title="Conditions")

<matplotlib.legend.Legend at 0x755ef01cce90>

../../_images/6f1b1e1fad4633fbee81fef66cd2af08bf6116b3b484936e42dc9afe3a3f48b8.png

Foraging#

# Final dfs:
# 1. forage_hour_df: hour, n_pellets, dist_forage, n_bouts, exp, social-bool, subject, light-bool
# 2. forage_dur_df: start, duration(mins), exp, social-bool, subject, light-bool

forage_hour_df = pd.DataFrame(
    columns=[
        "hour",
        "n_bouts",
        "n_pellets",
        "dist_forage",
        "exp",
        "social",
        "subject",
        "light",
    ]
)
forage_dur_df = pd.DataFrame(
    columns=["start", "duration", "exp", "social", "subject", "light"]
)

# For each period
# Load foraging data
# Split into individual dfs
# Calculate hour-by-hour metrics and put into final df

exp_pbar = tqdm(experiments, desc="Experiments", position=0, leave=True)
for exp in exp_pbar:
    period_pbar = tqdm(periods, desc="Periods", position=1, leave=False)
    for period in period_pbar:
        forage_df = load_data_from_parquet(
            experiment_name=exp["name"],
            period=period,
            data_type="foraging",
            data_dir=data_dir,
            set_time_index=True,
        )
        for subject in forage_df["subject"].unique():
            forage_df_subj = forage_df[forage_df["subject"] == subject]
            forage_df_subj["hour"] = forage_df_subj["start"].dt.floor("h")
            hour_counts = pd.merge(
                forage_df_subj.groupby("hour").size().reset_index(name="n_bouts"),
                forage_df_subj.groupby("hour").agg(
                    n_pellets=("n_pellets", "sum"),
                    cum_wheel_dist=("cum_wheel_dist", "sum"),
                ),
                on="hour",
                how="left",
            )
            min_hour, max_hour = (
                forage_df_subj["hour"].min(),
                forage_df_subj["hour"].max(),
            )
            complete_hours = pd.DataFrame(
                {"hour": pd.date_range(start=min_hour, end=max_hour, freq="h")}
            )
            forage_df_subj_hour = pd.merge(
                complete_hours, hour_counts, on="hour", how="left"
            ).fillna(0)
            forage_df_subj_hour["n_bouts"] = forage_df_subj_hour["n_bouts"].astype(int)
            # Rename 'cum_wheel_dist' col
            forage_df_subj_hour.rename(
                columns={"cum_wheel_dist": "dist_forage"}, inplace=True
            )
            forage_df_subj_hour["exp"] = exp["name"]
            forage_df_subj_hour["social"] = period == "social"
            forage_df_subj_hour["subject"] = subject
            hour = forage_df_subj_hour["hour"].dt.hour
            forage_df_subj_hour["light"] = ~((hour > light_off) & (hour < light_on))
            forage_hour_df = pd.concat(
                [forage_hour_df, forage_df_subj_hour], ignore_index=True
            )

            forage_dur_subj = forage_df_subj[["start"]].copy()
            forage_dur_subj["duration"] = (
                forage_df_subj["end"] - forage_df_subj["start"]
            ).dt.total_seconds() / 60
            forage_dur_subj["exp"] = exp["name"]
            forage_dur_subj["social"] = period == "social"
            forage_dur_subj["subject"] = subject
            hour = forage_df_subj["start"].dt.hour
            forage_dur_subj["light"] = ~((hour > light_off) & (hour < light_on))
            forage_dur_df = pd.concat(
                [forage_dur_df, forage_dur_subj], ignore_index=True
            )

"""Foraging bouts per hour histogram."""

fig, ax = plt.subplots(figsize=(14, 8))

# Plot histograms for each combination
for i, (social_val, light_val) in enumerate(combos):
    subset = forage_hour_df[
        (forage_hour_df["social"] == social_val)
        & (forage_hour_df["light"] == light_val)
        & (forage_hour_df["n_pellets"] > 0)
    ]
    # Plot normalized histogram
    hist = sns.histplot(
        data=subset,
        x="n_bouts",
        stat="probability",
        alpha=0.5,
        color=colors[light_val],
        label=labels[i],
        # kde=True,  # Add kernel density estimate
        common_norm=False,  # Ensure each histogram is normalized separately
        axes=ax,
        binwidth=1,
    )

    # Set hatch pattern for bars
    if hatches[social_val]:
        for bar in hist.patches:
            bar.set_hatch(hatches[social_val])

ax.set_title("Normalized Foraging Bout Distributions by Social and Light Conditions")
ax.set_xlabel("Foraging bouts / hour")
ax.set_ylabel("Probability")
ax.legend(title="Conditions")

ax.set_xlim(1, 15)

(1.0, 15.0)

../../_images/718b7e52c891217d25dae8bc50c54c6ca6c7c22f2e2d6d1ad9e4213216fc04cc.png

"""Foraging bouts per hour bars."""

fig, ax = plt.subplots(figsize=(14, 8))

summary_data = []
for social_val in [True, False]:
    for light_val in [True, False]:
        subset = forage_hour_df[
            (forage_hour_df["social"] == social_val)
            & (forage_hour_df["light"] == light_val)
        ]
        mean_n_bouts = subset["n_bouts"].mean()
        sem_n_bouts = subset["n_bouts"].sem()
        n_samples = len(subset)
        summary_data.append(
            {
                "social": social_val,
                "light": light_val,
                "mean_n_bouts": mean_n_bouts,
                "sem": sem_n_bouts,
                "condition": f"{'Social' if social_val else 'Solo'}-{'Light' if light_val else 'Dark'}",
                "n": n_samples,
            }
        )
summary_df = pd.DataFrame(summary_data)

# Set up positions for the bars
bar_width = 0.5
x_pos = np.array([0.25, 2.25, 0.75, 2.75])  # create two groups with a gap in the middle

# Plot bars
for i, row in enumerate(summary_data):
    pos = x_pos[i]
    social_val = row["social"]
    light_val = row["light"]

    bar = ax.bar(
        pos,
        row["mean_n_bouts"],
        bar_width,
        yerr=row["sem"],
        color=colors[light_val],
        edgecolor="black",
        capsize=7,
        label=row["condition"],
    )

    print(
        f"Plotting {row['condition']}: mean={row['mean_n_bouts']:.2f}, sem={row['sem']:.2f}, n={row['n']}"
    )

    # Apply hatching for social conditions
    if hatches[social_val]:
        bar[0].set_hatch(hatches[social_val])

    # Add sample size as text above each bar
    sample_size_txt = ax.text(
        pos,
        row["mean_n_bouts"] + row["sem"] + 0.1,
        f"n={row['n']}",
        ha="center",
        va="bottom",
    )
    sample_size_txt.set_fontsize(11)

ax.set_title("Mean Number of Foraging Bouts per Hour by Social and Light Conditions")
ax.set_ylabel("Number of bouts / hour")
ax.set_xticks(x_pos)
ax.set_xticklabels(["Social\nLight", "Social\nDark", "Solo\nLight", "Solo\nDark"])
ax.legend(title="Conditions", loc="upper left")
ax.xaxis.grid(False)

# Wilcoxon rank sum tests
light_social = forage_hour_df[
    (forage_hour_df["social"] == True) & (forage_hour_df["light"] == True)
]["n_bouts"]
light_solo = forage_hour_df[
    (forage_hour_df["social"] == False) & (forage_hour_df["light"] == True)
]["n_bouts"]

dark_social = forage_hour_df[
    (forage_hour_df["social"] == True) & (forage_hour_df["light"] == False)
]["n_bouts"]
dark_solo = forage_hour_df[
    (forage_hour_df["social"] == False) & (forage_hour_df["light"] == False)
]["n_bouts"]

light_social = pd.to_numeric(light_social, errors="coerce").dropna()
light_solo = pd.to_numeric(light_solo, errors="coerce").dropna()
dark_social = pd.to_numeric(dark_social, errors="coerce").dropna()
dark_solo = pd.to_numeric(dark_solo, errors="coerce").dropna()

light_stat, light_p = stats.ttest_ind(
    light_social, light_solo, alternative="two-sided", equal_var=False
)
dark_stat, dark_p = stats.ttest_ind(
    dark_social, dark_solo, alternative="two-sided", equal_var=False
)

test_text = (
    f"Two-sample t-tests:\n"
    f"Light conditions: p = {light_p:.2e}\nDark conditions: p = {dark_p:.2e}"
)
props = dict(boxstyle="round,pad=0.3", facecolor="lightgray", alpha=0.8)
ax.text(
    0.02,
    0.68,
    test_text,
    transform=ax.transAxes,
    fontsize=10,
    verticalalignment="top",
    bbox=props,
)

Plotting Social-Light: mean=0.29, sem=0.02, n=1801
Plotting Social-Dark: mean=4.07, sem=0.08, n=1804
Plotting Solo-Light: mean=0.10, sem=0.02, n=491
Plotting Solo-Dark: mean=3.22, sem=0.17, n=467

Text(0.02, 0.68, 'Two-sample t-tests:\nLight conditions: p = 1.10e-12\nDark conditions: p = 4.51e-06')

../../_images/ce0e11c0a733ffc118d438c8d65b4bc7f2ce5080910eb43c7d20f388c2cb9590.png

"""Foraging bouts duration histogram."""

fig, ax = plt.subplots(figsize=(14, 8))

# Plot histograms for each combination
for i, (social_val, light_val) in enumerate(combos):
    subset = forage_dur_df[
        (forage_dur_df["social"] == social_val) & (forage_dur_df["light"] == light_val)
    ]
    # Plot normalized histogram
    hist = sns.histplot(
        data=subset,
        x="duration",
        stat="probability",
        alpha=0.5,
        color=colors[light_val],
        label=labels[i],
        # kde=True,  # Add kernel density estimate
        common_norm=False,  # Ensure each histogram is normalized separately
        axes=ax,
        binwidth=1,
    )

    # Set hatch pattern for bars
    if hatches[social_val]:
        for bar in hist.patches:
            bar.set_hatch(hatches[social_val])

ax.set_title(
    "Normalized Foraging Bout Duration Distributions by Social and Light Conditions"
)
ax.set_xlabel("Duration (mins)")
ax.set_ylabel("Probability")
ax.legend(title="Conditions")

ax.set_xlim(0, 20)
# ax.set_ylim(0, 0.3)

(0.0, 20.0)

../../_images/1c0af6bfe2c8ebe0c779dca82781387dd68982a135e1ba83a83971fa8198d7a6.png

"""Foraging bouts duration bars."""

max_forage_thresh = 30  # in minutes

fig, ax = plt.subplots(figsize=(14, 8))

summary_data = []
for social_val in [True, False]:
    for light_val in [True, False]:
        subset = forage_dur_df[
            (forage_dur_df["social"] == social_val)
            & (forage_dur_df["light"] == light_val)
            & (forage_dur_df["duration"] < max_forage_thresh)
        ]
        mean_duration = subset["duration"].mean()
        sem_duration = subset["duration"].sem()
        n_samples = len(subset)
        summary_data.append(
            {
                "social": social_val,
                "light": light_val,
                "mean_duration": mean_duration,
                "sem": sem_duration,
                "condition": f"{'Social' if social_val else 'Solo'}-{'Light' if light_val else 'Dark'}",
                "n": n_samples,
            }
        )
summary_df = pd.DataFrame(summary_data)

# Set up positions for the bars
bar_width = 0.5
x_pos = np.array([0.25, 2.25, 0.75, 2.75])  # create two groups with a gap in the middle

# Plot bars
for i, row in enumerate(summary_data):
    pos = x_pos[i]
    social_val = row["social"]
    light_val = row["light"]

    bar = ax.bar(
        pos,
        row["mean_duration"],
        bar_width,
        yerr=row["sem"],
        color=colors[light_val],
        edgecolor="black",
        capsize=7,
        label=row["condition"],
    )

    print(
        f"Plotting {row['condition']}: mean={row['mean_duration']:.2f}, sem={row['sem']:.2f}, n={row['n']}"
    )

    # Apply hatching for social conditions
    if hatches[social_val]:
        bar[0].set_hatch(hatches[social_val])

    # Add sample size as text above each bar
    sample_size_txt = ax.text(
        pos,
        row["mean_duration"] + row["sem"] + 0.1,
        f"n={row['n']}",
        ha="center",
        va="bottom",
    )
    sample_size_txt.set_fontsize(11)

ax.set_title("Mean Duration of Foraging Bouts by Social and Light Conditions")
ax.set_ylabel("Mean Duration (minutes)")
ax.set_xticks(x_pos)
ax.set_xticklabels(["Social\nLight", "Social\nDark", "Solo\nLight", "Solo\nDark"])
ax.legend(title="Conditions", loc="upper right")
ax.xaxis.grid(False)

# Wilcoxon rank sum tests

light_social = forage_dur_df[
    (forage_dur_df["social"] == True)
    & (forage_dur_df["light"] == True)
    & (forage_dur_df["duration"] < max_forage_thresh)
]["duration"]
light_solo = forage_dur_df[
    (forage_dur_df["social"] == False)
    & (forage_dur_df["light"] == True)
    & (forage_dur_df["duration"] < max_forage_thresh)
]["duration"]

dark_social = forage_dur_df[
    (forage_dur_df["social"] == True)
    & (forage_dur_df["light"] == False)
    & (forage_dur_df["duration"] < max_forage_thresh)
]["duration"]
dark_solo = forage_dur_df[
    (forage_dur_df["social"] == False)
    & (forage_dur_df["light"] == False)
    & (forage_dur_df["duration"] < max_forage_thresh)
]["duration"]

light_social = pd.to_numeric(light_social, errors="coerce").dropna()
light_solo = pd.to_numeric(light_solo, errors="coerce").dropna()
dark_social = pd.to_numeric(dark_social, errors="coerce").dropna()
dark_solo = pd.to_numeric(dark_solo, errors="coerce").dropna()

light_stat, light_p = stats.ttest_ind(
    light_social, light_solo, alternative="two-sided", equal_var=False
)
dark_stat, dark_p = stats.ttest_ind(
    dark_social, dark_solo, alternative="two-sided", equal_var=False
)

test_text = (
    f"Two-sample t-tests:\n"
    f"Light conditions: p = {light_p:.2e}\nDark conditions: p = {dark_p:.2e}"
)
props = dict(boxstyle="round,pad=0.3", facecolor="lightgray", alpha=0.8)
ax.text(
    0.80,
    0.68,
    test_text,
    transform=ax.transAxes,
    fontsize=10,
    verticalalignment="top",
    bbox=props,
)

Plotting Social-Light: mean=3.94, sem=0.12, n=518
Plotting Social-Dark: mean=3.01, sem=0.02, n=7350
Plotting Solo-Light: mean=7.40, sem=0.60, n=48
Plotting Solo-Dark: mean=3.31, sem=0.06, n=1504

Text(0.8, 0.68, 'Two-sample t-tests:\nLight conditions: p = 7.01e-07\nDark conditions: p = 1.70e-06')

../../_images/4bfdc2390468edcbf938d6d424cc8a720de6b1d09d6c47a1f8ec24f5ab0a744c.png

"""Foraging bouts over all hours histogram."""

fig, ax = plt.subplots(figsize=(14, 8))

for i, social_val in enumerate([True, False]):
    subset = forage_dur_df[(forage_dur_df["social"] == social_val)]

    # Create the histogram
    hist = sns.histplot(
        data=subset,
        x=subset["start"].dt.hour,
        stat="probability",  # Normalize to show probability
        alpha=0.5,
        color="teal",
        label="Social" if social_val else "Solo",
        common_norm=False,  # Each condition normalized separately
        ax=ax,
        bins=24,  # 24 hours
        discrete=True,  # Since hours are discrete values
    )

    # Apply hatching pattern for social conditions
    if hatches[social_val]:
        # Apply the hatch pattern to each bar
        for patch in hist.patches:
            patch.set_hatch(hatches[social_val])

# Set x-tick labels for every hour
ax.set_xticks(range(0, 24))
ax.set_xticklabels([f"{h:02d}:00" for h in range(0, 24)], rotation=45)

# Customize axis labels and title
ax.set_title("Distribution of Foraging Bouts Throughout the Day")
ax.set_xlabel("Hour of Day")
ax.set_ylabel("Probability")
ax.legend(title="Conditions")

<matplotlib.legend.Legend at 0x755ee7c90d50>

../../_images/826df014abfd34801b7b68172360e4f1acbaea4d61b40edfebfe99a00d3e772d.png

"""Pellet rate per hour histogram."""

fig, ax = plt.subplots(figsize=(14, 8))

# Plot histograms for each combination
for i, (social_val, light_val) in enumerate(combos):
    subset = forage_hour_df[
        (forage_hour_df["social"] == social_val)
        & (forage_hour_df["light"] == light_val)
        & (forage_hour_df["n_pellets"] > 0)
    ]
    # Plot normalized histogram
    hist = sns.histplot(
        data=subset,
        x="n_pellets",
        stat="probability",
        alpha=0.5,
        color=colors[light_val],
        label=labels[i],
        # kde=True,  # Add kernel density estimate
        common_norm=False,  # Ensure each histogram is normalized separately
        axes=ax,
        binwidth=1,
    )

    # Set hatch pattern for bars
    if hatches[social_val]:
        for bar in hist.patches:
            bar.set_hatch(hatches[social_val])

ax.set_title("Normalized Pellet Rate Distributions by Social and Light Conditions")
ax.set_xlabel("Number of pellets / hour")
ax.set_ylabel("Probability")
ax.legend(title="Conditions")

ax.set_xlim(3, 35)

(3.0, 35.0)

../../_images/7d1d6617729e7c817e6845f2c9d5bc4252a1699d415e748854c14bce4ef6102c.png

"""Pellet rate per hour bars."""

fig, ax = plt.subplots(figsize=(14, 8))

summary_data = []
for social_val in [True, False]:
    for light_val in [True, False]:
        subset = forage_hour_df[
            (forage_hour_df["social"] == social_val)
            & (forage_hour_df["light"] == light_val)
        ]
        mean_n_pellets = subset["n_pellets"].mean()
        sem_n_pellets = subset["n_pellets"].sem()
        n_samples = len(subset)
        summary_data.append(
            {
                "social": social_val,
                "light": light_val,
                "mean_n_pellets": mean_n_pellets,
                "sem": sem_n_pellets,
                "condition": f"{'Social' if social_val else 'Solo'}-{'Light' if light_val else 'Dark'}",
                "n": n_samples,
            }
        )
summary_df = pd.DataFrame(summary_data)

# Set up positions for the bars
bar_width = 0.5
x_pos = np.array([0.25, 2.25, 0.75, 2.75])  # create two groups with a gap in the middle

# Plot bars
for i, row in enumerate(summary_data):
    pos = x_pos[i]
    social_val = row["social"]
    light_val = row["light"]

    bar = ax.bar(
        pos,
        row["mean_n_pellets"],
        bar_width,
        yerr=row["sem"],
        color=colors[light_val],
        edgecolor="black",
        capsize=7,
        label=row["condition"],
    )

    print(
        f"Plotting {row['condition']}: mean={row['mean_n_pellets']:.2f}, sem={row['sem']:.2f}, n={row['n']}"
    )

    # Apply hatching for social conditions
    if hatches[social_val]:
        bar[0].set_hatch(hatches[social_val])

    # Add sample size as text above each bar
    sample_size_txt = ax.text(
        pos,
        row["mean_n_pellets"] + row["sem"] + 0.1,
        f"n={row['n']}",
        ha="center",
        va="bottom",
    )
    sample_size_txt.set_fontsize(11)

ax.set_title("Mean Number of Pellets per hour by Social and Light Conditions")
ax.set_ylabel("Number of pellets / hour")
ax.set_xticks(x_pos)
ax.set_xticklabels(["Social\nLight", "Social\nDark", "Solo\nLight", "Solo\nDark"])
ax.legend(title="Conditions", loc="upper left")
ax.xaxis.grid(False)

light_social = forage_hour_df[
    (forage_hour_df["social"] == True) & (forage_hour_df["light"] == True)
]["n_pellets"]
light_solo = forage_hour_df[
    (forage_hour_df["social"] == False) & (forage_hour_df["light"] == True)
]["n_pellets"]

dark_social = forage_hour_df[
    (forage_hour_df["social"] == True) & (forage_hour_df["light"] == False)
]["n_pellets"]
dark_solo = forage_hour_df[
    (forage_hour_df["social"] == False) & (forage_hour_df["light"] == False)
]["n_pellets"]

light_social = pd.to_numeric(light_social, errors="coerce").dropna()
light_solo = pd.to_numeric(light_solo, errors="coerce").dropna()
dark_social = pd.to_numeric(dark_social, errors="coerce").dropna()
dark_solo = pd.to_numeric(dark_solo, errors="coerce").dropna()

light_stat, light_p = stats.ttest_ind(
    light_social, light_solo, alternative="two-sided", equal_var=False
)
dark_stat, dark_p = stats.ttest_ind(
    dark_social, dark_solo, alternative="two-sided", equal_var=False
)

test_text = (
    f"Two-sample t-tests:\n"
    f"Light conditions: p = {light_p:.2e}\nDark conditions: p = {dark_p:.2e}"
)
props = dict(boxstyle="round,pad=0.3", facecolor="lightgray", alpha=0.8)
ax.text(
    0.02,
    0.68,  # Position below the legend (since legend is upper left)
    test_text,
    transform=ax.transAxes,
    fontsize=10,
    verticalalignment="top",
    bbox=props,
)

Plotting Social-Light: mean=1.13, sem=0.08, n=1801
Plotting Social-Dark: mean=11.53, sem=0.22, n=1804
Plotting Solo-Light: mean=0.83, sem=0.15, n=491
Plotting Solo-Dark: mean=12.33, sem=0.54, n=467

Text(0.02, 0.68, 'Two-sample t-tests:\nLight conditions: p = 8.50e-02\nDark conditions: p = 1.70e-01')

../../_images/c584a303b2be30948c05e5e07b60b6b8208a82c990e5360198544faaf1680c81.png

"""Distance foraged rate per hour histogram."""

fig, ax = plt.subplots(figsize=(14, 8))

# Plot histograms for each combination
for i, (social_val, light_val) in enumerate(combos):
    subset = forage_hour_df[
        (forage_hour_df["social"] == social_val)
        & (forage_hour_df["light"] == light_val)
        & (forage_hour_df["n_pellets"] > 0)
    ]
    # Plot normalized histogram
    hist = sns.histplot(
        data=subset,
        x="dist_forage",
        stat="probability",
        alpha=0.5,
        color=colors[light_val],
        label=labels[i],
        # kde=True,  # Add kernel density estimate
        common_norm=False,  # Ensure each histogram is normalized separately
        axes=ax,
        binwidth=500,
    )

    # Set hatch pattern for bars
    if hatches[social_val]:
        for bar in hist.patches:
            bar.set_hatch(hatches[social_val])

ax.set_title("Normalized Distance Foraged Distributions by Social and Light Conditions")
ax.set_xlabel("Distance foraged / hour")
ax.set_ylabel("Probability")
ax.legend(title="Conditions")

ax.set_xlim(0, 15000)

(0.0, 15000.0)

../../_images/6aa32b8988c30eeb363111cb9e04553ad2ec3e647622681aa1dacfcf1f72495b.png

"""Distance foraged rate per hour bars."""

fig, ax = plt.subplots(figsize=(14, 8))

summary_data = []
for social_val in [True, False]:
    for light_val in [True, False]:
        subset = forage_hour_df[
            (forage_hour_df["social"] == social_val)
            & (forage_hour_df["light"] == light_val)
        ]
        mean_dist_forage = subset["dist_forage"].mean()
        sem_dist_forage = subset["dist_forage"].sem()
        n_samples = len(subset)
        summary_data.append(
            {
                "social": social_val,
                "light": light_val,
                "mean_dist_forage": mean_dist_forage,
                "sem": sem_dist_forage,
                "condition": f"{'Social' if social_val else 'Solo'}-{'Light' if light_val else 'Dark'}",
                "n": n_samples,
            }
        )
summary_df = pd.DataFrame(summary_data)

# Set up positions for the bars
bar_width = 0.5
x_pos = np.array([0.25, 2.25, 0.75, 2.75])  # create two groups with a gap in the middle

# Plot bars
for i, row in enumerate(summary_data):
    pos = x_pos[i]
    social_val = row["social"]
    light_val = row["light"]

    bar = ax.bar(
        pos,
        row["mean_dist_forage"],
        bar_width,
        yerr=row["sem"],
        color=colors[light_val],
        edgecolor="black",
        capsize=7,
        label=row["condition"],
    )

    print(
        f"Plotting {row['condition']}: mean={row['mean_dist_forage']:.2f}, sem={row['sem']:.2f}, n={row['n']}"
    )

    # Apply hatching for social conditions
    if hatches[social_val]:
        bar[0].set_hatch(hatches[social_val])

    # Add sample size as text above each bar
    sample_size_txt = ax.text(
        pos,
        row["mean_dist_forage"] + row["sem"] + 10,
        f"n={row['n']}",
        ha="center",
        va="bottom",
    )
    sample_size_txt.set_fontsize(11)

ax.set_title("Mean Distance Foraged per hour by Social and Light Conditions")
ax.set_ylabel("Distance foraged / hour (cm)")
ax.set_xticks(x_pos)
ax.set_xticklabels(["Social\nLight", "Social\nDark", "Solo\nLight", "Solo\nDark"])
ax.legend(title="Conditions", loc="upper left")
ax.xaxis.grid(False)

light_social = forage_hour_df[
    (forage_hour_df["social"] == True) & (forage_hour_df["light"] == True)
]["dist_forage"]
light_solo = forage_hour_df[
    (forage_hour_df["social"] == False) & (forage_hour_df["light"] == True)
]["dist_forage"]

dark_social = forage_hour_df[
    (forage_hour_df["social"] == True) & (forage_hour_df["light"] == False)
]["dist_forage"]
dark_solo = forage_hour_df[
    (forage_hour_df["social"] == False) & (forage_hour_df["light"] == False)
]["dist_forage"]

# Wilcoxon rank sum tests

light_social = pd.to_numeric(light_social, errors="coerce").dropna()
light_solo = pd.to_numeric(light_solo, errors="coerce").dropna()
dark_social = pd.to_numeric(dark_social, errors="coerce").dropna()
dark_solo = pd.to_numeric(dark_solo, errors="coerce").dropna()

light_stat, light_p = stats.ttest_ind(
    light_social, light_solo, alternative="two-sided", equal_var=False
)
dark_stat, dark_p = stats.ttest_ind(
    dark_social, dark_solo, alternative="two-sided", equal_var=False
)

test_text = (
    f"Two-sample t-tests:\n"
    f"Light conditions: p = {light_p:.2e}\nDark conditions: p = {dark_p:.2e}"
)
props = dict(boxstyle="round,pad=0.3", facecolor="lightgray", alpha=0.8)
ax.text(
    0.02,
    0.68,  # Position below the legend (since legend is upper left)
    test_text,
    transform=ax.transAxes,
    fontsize=10,
    verticalalignment="top",
    bbox=props,
)

Plotting Social-Light: mean=371.06, sem=25.59, n=1801
Plotting Social-Dark: mean=3642.21, sem=78.83, n=1804
Plotting Solo-Light: mean=369.63, sem=66.23, n=491
Plotting Solo-Dark: mean=4552.04, sem=229.82, n=467

Text(0.02, 0.68, 'Two-sample t-tests:\nLight conditions: p = 9.84e-01\nDark conditions: p = 1.99e-04')

../../_images/9da20799bc96475fdd2fefbddeba4c75fb4f44472628f8343d11277ae9ec54b9.png

Sleeping#

n_bouts / hour
duration of bouts
total time spent sleeping / hour

sleep_dur_df = pd.DataFrame(
    columns=["subject", "start", "end", "duration", "period", "light"]
)
sleep_hour_df = pd.DataFrame(
    columns=["subject", "hour", "n_bouts", "duration", "period", "light"]
)

exp_pbar = tqdm(experiments, desc="Experiments", position=0, leave=True)
for exp in exp_pbar:
    period_pbar = tqdm(periods, desc="Periods", position=1, leave=False)
    for period in period_pbar:
        sleep_bouts_df = load_data_from_parquet(
            experiment_name=exp["name"],
            period=period,
            data_type="sleep",
            data_dir=data_dir,
            set_time_index=True,
        )

        # Get sleep bout durations
        hour = sleep_bouts_df["start"].dt.hour
        sleep_bouts_df["light"] = ~((hour > light_off) & (hour < light_on))
        sleep_dur_df = pd.concat([sleep_dur_df, sleep_bouts_df], ignore_index=True)

        # Get n sleep bouts and total duration per hour
        for subject in sleep_bouts_df["subject"].unique():
            sleep_df_subj = sleep_bouts_df[sleep_bouts_df["subject"] == subject]
            sleep_df_subj["hour"] = sleep_df_subj["start"].dt.floor("h")
            hour_stats = (
                sleep_df_subj.groupby("hour")
                .agg({"duration": ["count", "sum"]})
                .reset_index()
            )
            hour_stats.columns = ["hour", "n_bouts", "duration"]

            min_hour, max_hour = (
                sleep_df_subj["hour"].min(),
                sleep_df_subj["hour"].max(),
            )
            complete_hours = pd.DataFrame(
                {"hour": pd.date_range(start=min_hour, end=max_hour, freq="h")}
            )
            sleep_df_subj_hour = pd.merge(
                complete_hours, hour_stats, on="hour", how="left"
            ).fillna(0)
            sleep_df_subj_hour["n_bouts"] = sleep_df_subj_hour["n_bouts"].astype(int)
            sleep_df_subj_hour["period"] = period
            sleep_df_subj_hour["subject"] = subject
            hour = sleep_df_subj_hour["hour"].dt.hour
            sleep_df_subj_hour["light"] = ~((hour > light_off) & (hour < light_on))
            sleep_hour_df = pd.concat(
                [sleep_hour_df, sleep_df_subj_hour], ignore_index=True
            )

sleep_dur_df["duration"] = (
    pd.to_timedelta(sleep_dur_df["duration"]).dt.total_seconds() / 60
)
sleep_hour_df["duration"] = (
    pd.to_timedelta(sleep_hour_df["duration"]).dt.total_seconds() / 60
)

"""Plot bars of bouts per hour"""

fig, ax = plt.subplots(figsize=(14, 8))

summary_data = []
for social_val in ["social", "postsocial"]:
    for light_val in [True, False]:
        subset = sleep_hour_df[
            (sleep_hour_df["period"] == social_val)
            & (sleep_hour_df["light"] == light_val)
        ]
        mean_n_bouts = subset["n_bouts"].mean()
        sem_n_bouts = subset["n_bouts"].sem()
        n_samples = len(subset)
        summary_data.append(
            {
                "social": social_val,
                "light": light_val,
                "mean_n_bouts": mean_n_bouts,
                "sem": sem_n_bouts,
                "condition": (
                    f"{'Social' if social_val == 'social' else 'Solo'}-"
                    f"{'Light' if light_val else 'Dark'}"
                ),
                "n": n_samples,
            }
        )
summary_df = pd.DataFrame(summary_data)

# Set up positions for the bars
bar_width = 0.5
x_pos = np.array([0.25, 2.25, 0.75, 2.75])  # create two groups with a gap in the middle

# Plot bars
for i, row in enumerate(summary_data):
    pos = x_pos[i]
    social_val = row["social"]
    light_val = row["light"]

    bar = ax.bar(
        pos,
        row["mean_n_bouts"],
        bar_width,
        yerr=row["sem"],
        color=colors[light_val],
        edgecolor="black",
        capsize=7,
        label=row["condition"],
    )

    print(
        f"Plotting {row['condition']}: mean={row['mean_n_bouts']:.2f}, sem={row['sem']:.2f}, n={row['n']}"
    )

    # Apply hatching for social conditions
    if hatches[social_val == "social"]:
        bar[0].set_hatch(hatches[social_val == "social"])

    # Add sample size as text above each bar
    sample_size_txt = ax.text(
        pos,
        row["mean_n_bouts"] + row["sem"] + 0.1,
        f"n={row['n']}",
        ha="center",
        va="bottom",
    )
    sample_size_txt.set_fontsize(11)

ax.set_title("Mean Number of Sleeping Bouts per hour by Social and Light Conditions")
ax.set_ylabel("Number of bouts / hour")
ax.set_xticks(x_pos)
ax.set_xticklabels(["Social\nLight", "Social\nDark", "Solo\nLight", "Solo\nDark"])
ax.legend(title="Conditions", loc="upper center")
ax.xaxis.grid(False)

# Perform stats tests
light_social = sleep_hour_df[
    (sleep_hour_df["period"] == "social") & (sleep_hour_df["light"] == True)
]["n_bouts"]
light_solo = sleep_hour_df[
    (sleep_hour_df["period"] == "social") & (sleep_hour_df["light"] == False)
]["n_bouts"]

dark_social = sleep_hour_df[
    (sleep_hour_df["period"] == "social") & (sleep_hour_df["light"] == False)
]["n_bouts"]
dark_solo = sleep_hour_df[
    (sleep_hour_df["period"] == "postsocial") & (sleep_hour_df["light"] == False)
]["n_bouts"]

light_social = pd.to_numeric(light_social, errors="coerce").dropna()
light_solo = pd.to_numeric(light_solo, errors="coerce").dropna()
dark_social = pd.to_numeric(dark_social, errors="coerce").dropna()
dark_solo = pd.to_numeric(dark_solo, errors="coerce").dropna()

light_stat, light_p = stats.ttest_ind(
    light_social, light_solo, alternative="two-sided", equal_var=False
)
dark_stat, dark_p = stats.ttest_ind(
    dark_social, dark_solo, alternative="two-sided", equal_var=False
)

test_text = (
    f"Two-sample t-tests:\n"
    f"Light conditions: p = {light_p:.2e}"
    f"\nDark conditions: p = {dark_p:.2e}"
)
props = dict(boxstyle="round,pad=0.3", facecolor="lightgray", alpha=0.8)
ax.text(
    0.40,
    0.68,  # Position below the legend
    test_text,
    transform=ax.transAxes,
    fontsize=10,
    verticalalignment="top",
    bbox=props,
)

Plotting Social-Light: mean=3.44, sem=0.05, n=1834
Plotting Social-Dark: mean=2.06, sem=0.05, n=1815
Plotting Solo-Light: mean=1.04, sem=0.05, n=528
Plotting Solo-Dark: mean=2.26, sem=0.07, n=515

Text(0.4, 0.68, 'Two-sample t-tests:\nLight conditions: p = 7.02e-86\nDark conditions: p = 1.53e-02')

../../_images/c1975260f922829c7dcf723d43160d429a3ec3d71e5a1b3a458598014a2f7611.png

"""Plot bars of durations of bouts."""

fig, ax = plt.subplots(figsize=(14, 8))

summary_data = []
for social_val in ["social", "postsocial"]:
    for light_val in [True, False]:
        subset = sleep_dur_df[
            (sleep_dur_df["period"] == social_val)
            & (sleep_dur_df["light"] == light_val)
        ]
        mean_duration = subset["duration"].mean()
        sem_duration = subset["duration"].sem()
        n_samples = len(subset)
        summary_data.append(
            {
                "social": social_val,
                "light": light_val,
                "mean_duration": mean_duration,
                "sem": sem_duration,
                "condition": (
                    f"{'Social' if social_val == 'social' else 'Solo'}-"
                    f"{'Light' if light_val else 'Dark'}"
                ),
                "n": n_samples,
            }
        )
summary_df = pd.DataFrame(summary_data)

# Set up positions for the bars
bar_width = 0.5
x_pos = np.array([0.25, 2.25, 0.75, 2.75])  # create two groups with a gap in the middle

# Plot bars
for i, row in enumerate(summary_data):
    pos = x_pos[i]
    social_val = row["social"]
    light_val = row["light"]

    bar = ax.bar(
        pos,
        row["mean_duration"],
        bar_width,
        yerr=row["sem"],
        color=colors[light_val],
        edgecolor="black",
        capsize=7,
        label=row["condition"],
    )

    print(
        f"Plotting {row['condition']}: mean={row['mean_duration']:.2f}, sem={row['sem']:.2f}, n={row['n']}"
    )

    # Apply hatching for social conditions
    if hatches[social_val == "social"]:
        bar[0].set_hatch(hatches[social_val == "social"])

    # Add sample size as text above each bar
    sample_size_txt = ax.text(
        pos,
        row["mean_duration"] + row["sem"] + 0.1,
        f"n={row['n']}",
        ha="center",
        va="bottom",
    )
    sample_size_txt.set_fontsize(11)

ax.set_title("Mean Sleeping Bout Duration by Social and Light Conditions")
ax.set_ylabel("Duration (minutes)")
ax.set_xticks(x_pos)
ax.set_xticklabels(["Social\nLight", "Social\nDark", "Solo\nLight", "Solo\nDark"])
ax.legend(title="Conditions", loc="upper center")
ax.xaxis.grid(False)

# Perform stats tests
light_social = sleep_dur_df[
    (sleep_dur_df["period"] == "social") & (sleep_dur_df["light"] == True)
]["duration"]
light_solo = sleep_dur_df[
    (sleep_dur_df["period"] == "social") & (sleep_dur_df["light"] == False)
]["duration"]

dark_social = sleep_dur_df[
    (sleep_dur_df["period"] == "social") & (sleep_dur_df["light"] == False)
]["duration"]
dark_solo = sleep_dur_df[
    (sleep_dur_df["period"] == "postsocial") & (sleep_dur_df["light"] == False)
]["duration"]

light_social = pd.to_numeric(light_social, errors="coerce").dropna()
light_solo = pd.to_numeric(light_solo, errors="coerce").dropna()
dark_social = pd.to_numeric(dark_social, errors="coerce").dropna()
dark_solo = pd.to_numeric(dark_solo, errors="coerce").dropna()

light_stat, light_p = stats.ttest_ind(
    light_social, light_solo, alternative="two-sided", equal_var=False
)
dark_stat, dark_p = stats.ttest_ind(
    dark_social, dark_solo, alternative="two-sided", equal_var=False
)

test_text = (
    f"Two-sample t-tests:\n"
    f"Light conditions: p = {light_p:.2e}"
    f"\nDark conditions: p = {dark_p:.2e}"
)
props = dict(boxstyle="round,pad=0.3", facecolor="lightgray", alpha=0.8)
ax.text(
    0.40,
    0.68,  # Position below the legend
    test_text,
    transform=ax.transAxes,
    fontsize=10,
    verticalalignment="top",
    bbox=props,
)

Plotting Social-Light: mean=6.33, sem=0.09, n=6308
Plotting Social-Dark: mean=4.20, sem=0.09, n=3743
Plotting Solo-Light: mean=48.14, sem=2.33, n=547
Plotting Solo-Dark: mean=11.79, sem=0.73, n=1164

Text(0.4, 0.68, 'Two-sample t-tests:\nLight conditions: p = 2.65e-65\nDark conditions: p = 5.44e-24')

../../_images/21db911eee3392475d603e0217d367800aaa8f236f2f12e942961964fe022155.png

"""Total time spent sleeping per hour."""

fig, ax = plt.subplots(figsize=(14, 8))

summary_data = []
for social_val in ["social", "postsocial"]:
    for light_val in [True, False]:
        subset = sleep_hour_df[
            (sleep_hour_df["period"] == social_val)
            & (sleep_hour_df["light"] == light_val)
        ]
        mean_duration = subset["duration"].mean()
        sem_duration = subset["duration"].sem()
        n_samples = len(subset)
        summary_data.append(
            {
                "social": social_val,
                "light": light_val,
                "mean_duration": mean_duration,
                "sem": sem_duration,
                "condition": (
                    f"{'Social' if social_val == 'social' else 'Solo'}-"
                    f"{'Light' if light_val else 'Dark'}"
                ),
                "n": n_samples,
            }
        )
summary_df = pd.DataFrame(summary_data)

# Set up positions for the bars
bar_width = 0.5
x_pos = np.array([0.25, 2.25, 0.75, 2.75])  # create two groups with a gap in the middle

# Plot bars
for i, row in enumerate(summary_data):
    pos = x_pos[i]
    social_val = row["social"]
    light_val = row["light"]

    bar = ax.bar(
        pos,
        row["mean_duration"],
        bar_width,
        yerr=row["sem"],
        color=colors[light_val],
        edgecolor="black",
        capsize=7,
        label=row["condition"],
    )

    print(
        f"Plotting {row['condition']}: mean={row['mean_duration']:.2f}, sem={row['sem']:.2f}, n={row['n']}"
    )

    # Apply hatching for social conditions
    if hatches[social_val == "social"]:
        bar[0].set_hatch(hatches[social_val == "social"])

    # Add sample size as text above each bar
    sample_size_txt = ax.text(
        pos,
        row["mean_duration"] + row["sem"] + 0.1,
        f"n={row['n']}",
        ha="center",
        va="bottom",
    )
    sample_size_txt.set_fontsize(11)

ax.set_title("Mean Sleeping Time per hour by Social and Light Conditions")
ax.set_ylabel("Duration (minutes)")
ax.set_xticks(x_pos)
ax.set_xticklabels(["Social\nLight", "Social\nDark", "Solo\nLight", "Solo\nDark"])
ax.legend(title="Conditions", loc="upper center")
ax.xaxis.grid(False)

# Perform stats tests
light_social = sleep_hour_df[
    (sleep_hour_df["period"] == "social") & (sleep_hour_df["light"] == True)
]["duration"]
light_solo = sleep_hour_df[
    (sleep_hour_df["period"] == "social") & (sleep_hour_df["light"] == False)
]["duration"]

dark_social = sleep_hour_df[
    (sleep_hour_df["period"] == "social") & (sleep_hour_df["light"] == False)
]["duration"]
dark_solo = sleep_hour_df[
    (sleep_hour_df["period"] == "postsocial") & (sleep_hour_df["light"] == False)
]["duration"]

light_social = pd.to_numeric(light_social, errors="coerce").dropna()
light_solo = pd.to_numeric(light_solo, errors="coerce").dropna()
dark_social = pd.to_numeric(dark_social, errors="coerce").dropna()
dark_solo = pd.to_numeric(dark_solo, errors="coerce").dropna()

light_stat, light_p = stats.ttest_ind(
    light_social, light_solo, alternative="two-sided", equal_var=False
)
dark_stat, dark_p = stats.ttest_ind(
    dark_social, dark_solo, alternative="two-sided", equal_var=False
)

test_text = (
    f"Two-sample t-tests:\n"
    f"Light conditions: p = {light_p:.2e}"
    f"\nDark conditions: p = {dark_p:.2e}"
)
props = dict(boxstyle="round,pad=0.3", facecolor="lightgray", alpha=0.8)
ax.text(
    0.40,
    0.68,  # Position below the legend
    test_text,
    transform=ax.transAxes,
    fontsize=10,
    verticalalignment="top",
    bbox=props,
)

Plotting Social-Light: mean=21.78, sem=0.38, n=1834
Plotting Social-Dark: mean=8.66, sem=0.28, n=1815
Plotting Solo-Light: mean=49.87, sem=2.53, n=528
Plotting Solo-Dark: mean=26.64, sem=1.59, n=515

Text(0.4, 0.68, 'Two-sample t-tests:\nLight conditions: p = 7.87e-151\nDark conditions: p = 3.93e-26')

../../_images/6e91a4a39b43002238651453833399819db482ad916d00fa7baa39f61a5edcf9.png

Drinking#

n_bouts / hour
duration of bouts
total time spent drinking / hour

drink_dur_df = pd.DataFrame(
    columns=["subject", "start", "end", "duration", "period", "light"]
)
drink_hour_df = pd.DataFrame(
    columns=["subject", "hour", "n_bouts", "duration", "period", "light"]
)

exp_pbar = tqdm(experiments, desc="Experiments", position=0, leave=True)
for exp in exp_pbar:
    if exp["name"] == "social0.3-aeon4":
        continue  # Skip this experiment as the data is not available
    period_pbar = tqdm(periods, desc="Periods", position=1, leave=False)
    for period in period_pbar:
        sleep_bouts_df = load_data_from_parquet(
            experiment_name=exp["name"],
            period=period,
            data_type="drink",
            data_dir=data_dir,
            set_time_index=True,
        )

        # Get drink bout durations
        hour = sleep_bouts_df["start"].dt.hour
        sleep_bouts_df["light"] = ~((hour > light_off) & (hour < light_on))
        drink_dur_df = pd.concat([drink_dur_df, sleep_bouts_df], ignore_index=True)

        # Get n drink bouts and total duration per hour
        for subject in sleep_bouts_df["subject"].unique():
            sleep_df_subj = sleep_bouts_df[sleep_bouts_df["subject"] == subject]
            sleep_df_subj["hour"] = sleep_df_subj["start"].dt.floor("h")
            hour_stats = (
                sleep_df_subj.groupby("hour")
                .agg({"duration": ["count", "sum"]})
                .reset_index()
            )
            hour_stats.columns = ["hour", "n_bouts", "duration"]

            min_hour, max_hour = (
                sleep_df_subj["hour"].min(),
                sleep_df_subj["hour"].max(),
            )
            complete_hours = pd.DataFrame(
                {"hour": pd.date_range(start=min_hour, end=max_hour, freq="h")}
            )
            sleep_df_subj_hour = pd.merge(
                complete_hours, hour_stats, on="hour", how="left"
            ).fillna(0)
            sleep_df_subj_hour["n_bouts"] = sleep_df_subj_hour["n_bouts"].astype(int)
            sleep_df_subj_hour["period"] = period
            sleep_df_subj_hour["subject"] = subject
            hour = sleep_df_subj_hour["hour"].dt.hour
            sleep_df_subj_hour["light"] = ~((hour > light_off) & (hour < light_on))
            drink_hour_df = pd.concat(
                [drink_hour_df, sleep_df_subj_hour], ignore_index=True
            )

drink_dur_df["duration"] = (
    pd.to_timedelta(drink_dur_df["duration"]).dt.total_seconds() / 60
)
drink_hour_df["duration"] = (
    pd.to_timedelta(drink_hour_df["duration"]).dt.total_seconds() / 60
)

"""Number of drinking bouts per hour bars."""

fig, ax = plt.subplots(figsize=(14, 8))

summary_data = []
for social_val in ["social", "postsocial"]:
    for light_val in [True, False]:
        subset = drink_hour_df[
            (drink_hour_df["period"] == social_val)
            & (drink_hour_df["light"] == light_val)
        ]
        mean_n_bouts = subset["n_bouts"].mean()
        sem_n_bouts = subset["n_bouts"].sem()
        n_samples = len(subset)
        summary_data.append(
            {
                "social": social_val,
                "light": light_val,
                "mean_n_bouts": mean_n_bouts,
                "sem": sem_n_bouts,
                "condition": (
                    f"{'Social' if social_val == 'social' else 'Solo'}-"
                    f"{'Light' if light_val else 'Dark'}"
                ),
                "n": n_samples,
            }
        )
summary_df = pd.DataFrame(summary_data)

# Set up positions for the bars
bar_width = 0.5
x_pos = np.array([0.25, 2.25, 0.75, 2.75])  # create two groups with a gap in the middle

# Plot bars
for i, row in enumerate(summary_data):
    pos = x_pos[i]
    social_val = row["social"]
    light_val = row["light"]

    bar = ax.bar(
        pos,
        row["mean_n_bouts"],
        bar_width,
        yerr=row["sem"],
        color=colors[light_val],
        edgecolor="black",
        capsize=7,
        label=row["condition"],
    )

    print(
        f"Plotting {row['condition']}: mean={row['mean_n_bouts']:.2f}, sem={row['sem']:.2f}, n={row['n']}"
    )

    # Apply hatching for social conditions
    if hatches[social_val == "social"]:
        bar[0].set_hatch(hatches[social_val == "social"])

    # Add sample size as text above each bar
    sample_size_txt = ax.text(
        pos,
        row["mean_n_bouts"] + row["sem"] + 0.1,
        f"n={row['n']}",
        ha="center",
        va="bottom",
    )
    sample_size_txt.set_fontsize(11)

ax.set_title("Mean Number of Drinking Bouts per hour by Social and Light Conditions")
ax.set_ylabel("Number of bouts / hour")
ax.set_xticks(x_pos)
ax.set_xticklabels(["Social\nLight", "Social\nDark", "Solo\nLight", "Solo\nDark"])
# ax.set_ylim([0, 2.01])
ax.legend(title="Conditions", loc="upper center")
ax.xaxis.grid(False)

# Perform stats tests
light_social = drink_hour_df[
    (drink_hour_df["period"] == "social") & (drink_hour_df["light"] == True)
]["n_bouts"]
light_solo = drink_hour_df[
    (drink_hour_df["period"] == "social") & (drink_hour_df["light"] == False)
]["n_bouts"]

dark_social = drink_hour_df[
    (drink_hour_df["period"] == "social") & (drink_hour_df["light"] == False)
]["n_bouts"]
dark_solo = drink_hour_df[
    (drink_hour_df["period"] == "postsocial") & (drink_hour_df["light"] == False)
]["n_bouts"]

light_social = pd.to_numeric(light_social, errors="coerce").dropna()
light_solo = pd.to_numeric(light_solo, errors="coerce").dropna()
dark_social = pd.to_numeric(dark_social, errors="coerce").dropna()
dark_solo = pd.to_numeric(dark_solo, errors="coerce").dropna()

light_stat, light_p = stats.ttest_ind(
    light_social, light_solo, alternative="two-sided", equal_var=False
)
dark_stat, dark_p = stats.ttest_ind(
    dark_social, dark_solo, alternative="two-sided", equal_var=False
)

test_text = (
    f"Two-sample t-tests:\n"
    f"Light conditions: p = {light_p:.2e}"
    f"\nDark conditions: p = {dark_p:.2e}"
)
props = dict(boxstyle="round,pad=0.3", facecolor="lightgray", alpha=0.8)
ax.text(
    0.40,
    0.68,  # Position below the legend
    test_text,
    transform=ax.transAxes,
    fontsize=10,
    verticalalignment="top",
    bbox=props,
)

Plotting Social-Light: mean=6.59, sem=0.33, n=1498
Plotting Social-Dark: mean=12.53, sem=0.32, n=1498
Plotting Solo-Light: mean=2.09, sem=0.18, n=432
Plotting Solo-Dark: mean=10.43, sem=0.41, n=430

Text(0.4, 0.68, 'Two-sample t-tests:\nLight conditions: p = 2.30e-36\nDark conditions: p = 6.68e-05')

../../_images/d63f509b5896f5eda2a94b555e7b2cd37144c1e30f2d53a69654750a2e30437a.png

"""Plot bars of durations of bouts."""

fig, ax = plt.subplots(figsize=(14, 8))

summary_data = []
for social_val in ["social", "postsocial"]:
    for light_val in [True, False]:
        subset = drink_dur_df[
            (drink_dur_df["period"] == social_val)
            & (drink_dur_df["light"] == light_val)
        ]
        mean_duration = subset["duration"].mean()
        sem_duration = subset["duration"].sem()
        n_samples = len(subset)
        summary_data.append(
            {
                "social": social_val,
                "light": light_val,
                "mean_duration": mean_duration,
                "sem": sem_duration,
                "condition": (
                    f"{'Social' if social_val == 'social' else 'Solo'}-"
                    f"{'Light' if light_val else 'Dark'}"
                ),
                "n": n_samples,
            }
        )
summary_df = pd.DataFrame(summary_data)

# Set up positions for the bars
bar_width = 0.5
x_pos = np.array([0.25, 2.25, 0.75, 2.75])  # create two groups with a gap in the middle

# Plot bars
for i, row in enumerate(summary_data):
    pos = x_pos[i]
    social_val = row["social"]
    light_val = row["light"]

    bar = ax.bar(
        pos,
        row["mean_duration"],
        bar_width,
        yerr=row["sem"],
        color=colors[light_val],
        edgecolor="black",
        capsize=7,
        label=row["condition"],
    )

    # Apply hatching for social conditions
    if hatches[social_val == "social"]:
        bar[0].set_hatch(hatches[social_val == "social"])

    # Add sample size as text above each bar
    sample_size_txt = ax.text(
        pos,
        row["mean_duration"] + row["sem"] + 0.01,
        f"n={row['n']}",
        ha="center",
        va="bottom",
    )
    sample_size_txt.set_fontsize(11)

ax.set_title("Mean Drinking Bout Duration by Social and Light Conditions")
ax.set_ylabel("Duration (minutes)")
ax.set_xticks(x_pos)
ax.set_ylim([0, 0.351])
ax.set_xticklabels(["Social\nLight", "Social\nDark", "Solo\nLight", "Solo\nDark"])
ax.legend(title="Conditions", loc="upper center")
ax.xaxis.grid(False)

# Perform stats tests
light_social = drink_dur_df[
    (drink_dur_df["period"] == "social") & (drink_dur_df["light"] == True)
]["duration"]
light_solo = drink_dur_df[
    (drink_dur_df["period"] == "social") & (drink_dur_df["light"] == False)
]["duration"]

dark_social = drink_dur_df[
    (drink_dur_df["period"] == "social") & (drink_dur_df["light"] == False)
]["duration"]
dark_solo = drink_dur_df[
    (drink_dur_df["period"] == "postsocial") & (drink_dur_df["light"] == False)
]["duration"]

light_social = pd.to_numeric(light_social, errors="coerce").dropna()
light_solo = pd.to_numeric(light_solo, errors="coerce").dropna()
dark_social = pd.to_numeric(dark_social, errors="coerce").dropna()
dark_solo = pd.to_numeric(dark_solo, errors="coerce").dropna()

light_stat, light_p = stats.ttest_ind(
    light_social, light_solo, alternative="two-sided", equal_var=False
)
dark_stat, dark_p = stats.ttest_ind(
    dark_social, dark_solo, alternative="two-sided", equal_var=False
)

test_text = (
    f"Two-sample t-tests:\n"
    f"Light conditions: p = {light_p:.2e}"
    f"\nDark conditions: p = {dark_p:.2e}"
)
props = dict(boxstyle="round,pad=0.3", facecolor="lightgray", alpha=0.8)
ax.text(
    0.40,
    0.68,  # Position below the legend
    test_text,
    transform=ax.transAxes,
    fontsize=10,
    verticalalignment="top",
    bbox=props,
)

Text(0.4, 0.68, 'Two-sample t-tests:\nLight conditions: p = 3.68e-64\nDark conditions: p = 1.10e-04')

../../_images/c7963f7e90b3bc0e37fa2fb1008c9ff819abd2f932dba9c4afb5633bdbaa0d07.png

"""Total time spent drinking per hour."""

fig, ax = plt.subplots(figsize=(14, 8))

summary_data = []
for social_val in ["social", "postsocial"]:
    for light_val in [True, False]:
        subset = drink_hour_df[
            (drink_hour_df["period"] == social_val)
            & (drink_hour_df["light"] == light_val)
        ]
        mean_duration = subset["duration"].mean()
        sem_duration = subset["duration"].sem()
        n_samples = len(subset)
        summary_data.append(
            {
                "social": social_val,
                "light": light_val,
                "mean_duration": mean_duration,
                "sem": sem_duration,
                "condition": (
                    f"{'Social' if social_val == 'social' else 'Solo'}-"
                    f"{'Light' if light_val else 'Dark'}"
                ),
                "n": n_samples,
            }
        )
summary_df = pd.DataFrame(summary_data)

# Set up positions for the bars
bar_width = 0.5
x_pos = np.array([0.25, 2.25, 0.75, 2.75])  # create two groups with a gap in the middle

# Plot bars
for i, row in enumerate(summary_data):
    pos = x_pos[i]
    social_val = row["social"]
    light_val = row["light"]

    bar = ax.bar(
        pos,
        row["mean_duration"],
        bar_width,
        yerr=row["sem"],
        color=colors[light_val],
        edgecolor="black",
        capsize=7,
        label=row["condition"],
    )

    # Apply hatching for social conditions
    if hatches[social_val == "social"]:
        bar[0].set_hatch(hatches[social_val == "social"])

    # Add sample size as text above each bar
    sample_size_txt = ax.text(
        pos,
        row["mean_duration"] + row["sem"] + 0.01,
        f"n={row['n']}",
        ha="center",
        va="bottom",
    )
    sample_size_txt.set_fontsize(11)

ax.set_title("Mean Drinking Time per hour by Social and Light Conditions")
ax.set_ylabel("Duration (minutes)")
ax.set_xticks(x_pos)
ax.set_xticklabels(["Social\nLight", "Social\nDark", "Solo\nLight", "Solo\nDark"])
ax.legend(title="Conditions", loc="upper center")
ax.xaxis.grid(False)

# Perform stats tests
light_social = drink_hour_df[
    (drink_hour_df["period"] == "social") & (drink_hour_df["light"] == True)
]["duration"]
light_solo = drink_hour_df[
    (drink_hour_df["period"] == "social") & (drink_hour_df["light"] == False)
]["duration"]

dark_social = drink_hour_df[
    (drink_hour_df["period"] == "social") & (drink_hour_df["light"] == False)
]["duration"]
dark_solo = drink_hour_df[
    (drink_hour_df["period"] == "postsocial") & (drink_hour_df["light"] == False)
]["duration"]

light_social = pd.to_numeric(light_social, errors="coerce").dropna()
light_solo = pd.to_numeric(light_solo, errors="coerce").dropna()
dark_social = pd.to_numeric(dark_social, errors="coerce").dropna()
dark_solo = pd.to_numeric(dark_solo, errors="coerce").dropna()

light_stat, light_p = stats.ttest_ind(
    light_social, light_solo, alternative="two-sided", equal_var=False
)
dark_stat, dark_p = stats.ttest_ind(
    dark_social, dark_solo, alternative="two-sided", equal_var=False
)

test_text = (
    f"Two-sample t-tests:\n"
    f"Light conditions: p = {light_p:.2e}"
    f"\nDark conditions: p = {dark_p:.2e}"
)
props = dict(boxstyle="round,pad=0.3", facecolor="lightgray", alpha=0.8)
ax.text(
    0.40,
    0.68,  # Position below the legend
    test_text,
    transform=ax.transAxes,
    fontsize=10,
    verticalalignment="top",
    bbox=props,
)

Text(0.4, 0.68, 'Two-sample t-tests:\nLight conditions: p = 9.74e-13\nDark conditions: p = 6.84e-07')