Skip to content

Utils Module

Overview

The smolsat.utils module provides high-level convenience functions for common molecular dynamics analysis tasks. These functions offer simplified interfaces to the underlying C++ functionality with sensible defaults.

Trajectory Creation

create_example_trajectory(num_particles=100, num_frames=50, box_size=(10.0, 10.0, 10.0), time_step=0.001)

Creates an example trajectory with random walk particle motion for testing and demonstration purposes.

Parameters: - num_particles (int): Number of particles (default: 100) - num_frames (int): Number of time frames (default: 50) - box_size (Tuple[float, float, float]): Simulation box dimensions (default: (10.0, 10.0, 10.0)) - time_step (float): Time step between frames (default: 0.001)

Returns: Trajectory object with random walk data

Example:

import smolsat

# Create small test trajectory
test_traj = smolsat.create_example_trajectory(
    num_particles=50,
    num_frames=100,
    box_size=(5.0, 5.0, 5.0),
    time_step=0.002
)

print(f"Created trajectory: {test_traj.num_particles()} particles, {test_traj.num_frames()} frames")

# Create large trajectory for analysis
large_traj = smolsat.create_example_trajectory(
    num_particles=1000,
    num_frames=5000
)

Quick Analysis Functions

quick_msd(trajectory, max_lag_time=None, time_step=None, particle_indices=None)

Performs rapid Mean Square Displacement analysis with minimal setup.

Parameters: - trajectory (Trajectory): Input trajectory data - max_lag_time (float, optional): Maximum lag time for analysis - time_step (float, optional): Time step (auto-detected if None) - particle_indices (List[int], optional): Specific particles to analyze (all if None)

Returns: Tuple of (lag_times, msd_values) as NumPy arrays

Example:

import smolsat
import matplotlib.pyplot as plt

# Load or create trajectory
trajectory = smolsat.create_example_trajectory(num_particles=200, num_frames=2000)

# Quick MSD analysis
lag_times, msd_values = smolsat.quick_msd(trajectory)

# Plot results
plt.figure(figsize=(8, 6))
plt.loglog(lag_times, msd_values, 'b-', linewidth=2)
plt.xlabel('Lag Time')
plt.ylabel('Mean Square Displacement')
plt.title('MSD Analysis')
plt.grid(True, alpha=0.3)
plt.show()

# Analyze specific particles only
selected_particles = list(range(0, 50))  # First 50 particles
lag_times_sel, msd_sel = smolsat.quick_msd(trajectory, particle_indices=selected_particles)

quick_rg(trajectory, particle_indices=None, molecular=False)

Performs rapid Radius of Gyration analysis.

Parameters: - trajectory (Trajectory): Input trajectory data - particle_indices (List[int], optional): Specific particles to analyze (all if None) - molecular (bool): Whether to analyze molecules instead of individual particles (default: False)

Returns: Tuple of (times, rg_values) as NumPy arrays

Example:

import smolsat
import numpy as np

# Create trajectory
trajectory = smolsat.create_example_trajectory(num_particles=100, num_frames=1000)

# Quick radius of gyration analysis
times, rg_values = smolsat.quick_rg(trajectory)

# Calculate statistics
mean_rg = np.mean(rg_values)
std_rg = np.std(rg_values)
print(f"Average Rg: {mean_rg:.3f} ± {std_rg:.3f}")

# Plot time evolution
plt.figure(figsize=(10, 6))
plt.plot(times, rg_values, 'r-', alpha=0.7)
plt.axhline(mean_rg, color='k', linestyle='--', label=f'Mean: {mean_rg:.3f}')
plt.xlabel('Time')
plt.ylabel('Radius of Gyration')
plt.legend()
plt.show()

Data Conversion Functions

trajectory_to_numpy(trajectory, include_velocities=False, include_unwrapped=False)

Converts trajectory data to NumPy arrays for advanced analysis.

Parameters: - trajectory (Trajectory): Input trajectory - include_velocities (bool): Include velocity data (default: False) - include_unwrapped (bool): Include unwrapped positions (default: False)

Returns: Dictionary containing NumPy arrays: - 'positions': Shape (n_frames, n_particles, 3) - 'times': Shape (n_frames,) - 'box_sizes': Shape (n_frames, 3) - 'velocities': Shape (n_frames, n_particles, 3) (if requested) - 'unwrapped_positions': Shape (n_frames, n_particles, 3) (if requested)

Example:

import smolsat
import numpy as np

# Create trajectory
trajectory = smolsat.create_example_trajectory(num_particles=100, num_frames=500)

# Convert to NumPy arrays
data = smolsat.trajectory_to_numpy(trajectory)

positions = data['positions']  # (500, 100, 3)
times = data['times']          # (500,)
box_sizes = data['box_sizes']  # (500, 3)

print(f"Position array shape: {positions.shape}")
print(f"Time range: {times[0]:.3f} to {times[-1]:.3f}")

# Calculate center of mass for each frame
com = np.mean(positions, axis=1)  # (500, 3)

# Calculate distances from COM
distances = np.linalg.norm(positions - com[:, np.newaxis, :], axis=2)
mean_distance = np.mean(distances, axis=1)

plt.plot(times, mean_distance)
plt.xlabel('Time')
plt.ylabel('Mean Distance from COM')
plt.show()

numpy_to_trajectory(positions, times=None, box_sizes=None, particle_types=None, masses=None)

Creates a trajectory from NumPy arrays.

Parameters: - positions (np.ndarray): Position data, shape (n_frames, n_particles, 3) - times (np.ndarray, optional): Time data, shape (n_frames,) - box_sizes (np.ndarray, optional): Box dimensions, shape (n_frames, 3) or (3,) - particle_types (np.ndarray, optional): Particle types, shape (n_particles,) - masses (np.ndarray, optional): Particle masses, shape (n_particles,)

Returns: New Trajectory object

Example:

import smolsat
import numpy as np

# Generate synthetic trajectory data
n_frames, n_particles = 1000, 50
positions = np.random.randn(n_frames, n_particles, 3).cumsum(axis=0) * 0.1
times = np.arange(n_frames) * 0.001
box_sizes = np.full((n_frames, 3), 10.0)

# Create trajectory
trajectory = smolsat.numpy_to_trajectory(
    positions=positions,
    times=times,
    box_sizes=box_sizes,
    particle_types=np.ones(n_particles, dtype=int),
    masses=np.ones(n_particles)
)

print(f"Created trajectory: {trajectory.num_particles()} particles, {trajectory.num_frames()} frames")

# Can now use for analysis
lag_times, msd = smolsat.quick_msd(trajectory)

Analysis Utilities

calculate_diffusion_coefficient(lag_times, msd_values, fit_range=(0.1, 0.9))

Calculates diffusion coefficient from MSD data using linear fitting.

Parameters: - lag_times (np.ndarray): Lag times from MSD analysis - msd_values (np.ndarray): MSD values - fit_range (Tuple[float, float]): Fraction of data to use for fitting (default: (0.1, 0.9))

Returns: Dictionary containing: - 'D': Diffusion coefficient - 'slope': Fitted slope - 'intercept': Fitted intercept - 'r_squared': Coefficient of determination

Example:

import smolsat

# Perform MSD analysis
trajectory = smolsat.create_example_trajectory(num_particles=100, num_frames=2000)
lag_times, msd_values = smolsat.quick_msd(trajectory)

# Calculate diffusion coefficient
diff_results = smolsat.calculate_diffusion_coefficient(lag_times, msd_values)

print(f"Diffusion coefficient: {diff_results['D']:.6f}")
print(f"R-squared: {diff_results['r_squared']:.4f}")

# Custom fit range (use middle 60% of data)
diff_results_custom = smolsat.calculate_diffusion_coefficient(
    lag_times, msd_values, fit_range=(0.2, 0.8)
)

analyze_trajectory(trajectory, analyses=None, output_dir="analysis_results")

Performs comprehensive analysis of a trajectory with multiple methods.

Parameters: - trajectory (Trajectory): Input trajectory - analyses (List[str], optional): List of analyses to perform (default: ['msd', 'rg']) - output_dir (str): Directory for output files (default: "analysis_results")

Returns: Dictionary containing analysis results

Example:

import smolsat
import os

# Create trajectory
trajectory = smolsat.create_example_trajectory(num_particles=200, num_frames=3000)

# Comprehensive analysis
results = smolsat.analyze_trajectory(
    trajectory,
    analyses=['msd', 'rg', 'diffusion'],
    output_dir="my_analysis"
)

print("Analysis completed:")
print(f"  MSD data points: {len(results['msd']['lag_times'])}")
print(f"  Rg data points: {len(results['rg']['times'])}")
print(f"  Diffusion coefficient: {results['diffusion']['D']:.6f}")

# Check output files
if os.path.exists("my_analysis"):
    files = os.listdir("my_analysis")
    print(f"Output files: {files}")

System Information

get_system_info(trajectory)

Extracts comprehensive information about a trajectory system.

Parameters: - trajectory (Trajectory): Input trajectory

Returns: Dictionary containing system information

Example:

import smolsat

# Load trajectory
trajectory = smolsat.create_example_trajectory(num_particles=150, num_frames=1000)

# Get system information
info = smolsat.get_system_info(trajectory)

print("System Information:")
print(f"  Particles: {info['num_particles']}")
print(f"  Frames: {info['num_frames']}")
print(f"  Time range: {info['time_range']}")
print(f"  Box volume: {info['average_box_volume']:.2f}")
print(f"  Density: {info['density']:.4f}")
print(f"  Particle types: {info['particle_types']}")

Utility Functions

validate_trajectory(trajectory, verbose=True)

Validates trajectory data integrity and consistency.

Parameters: - trajectory (Trajectory): Trajectory to validate - verbose (bool): Print detailed validation results (default: True)

Returns: Dictionary with validation results

Example:

import smolsat

# Create trajectory
trajectory = smolsat.create_example_trajectory(num_particles=100, num_frames=500)

# Validate trajectory
validation = smolsat.validate_trajectory(trajectory)

if validation['valid']:
    print("Trajectory is valid!")
else:
    print("Issues found:")
    for issue in validation['issues']:
        print(f"  - {issue}")

merge_trajectories(trajectories, time_offset='auto')

Merges multiple trajectories into a single trajectory.

Parameters: - trajectories (List[Trajectory]): List of trajectories to merge - time_offset (str or float): How to handle time offsets ('auto', 'continuous', or specific offset)

Returns: New merged Trajectory object

Example:

import smolsat

# Create multiple trajectory segments
traj1 = smolsat.create_example_trajectory(num_particles=50, num_frames=500)
traj2 = smolsat.create_example_trajectory(num_particles=50, num_frames=500)
traj3 = smolsat.create_example_trajectory(num_particles=50, num_frames=500)

# Merge trajectories
merged_traj = smolsat.merge_trajectories([traj1, traj2, traj3], time_offset='continuous')

print(f"Merged trajectory: {merged_traj.num_frames()} frames")
print(f"Time range: {merged_traj.time(0):.3f} to {merged_traj.time(merged_traj.num_frames()-1):.3f}")

Complete Usage Example

import smolsat
import numpy as np
import matplotlib.pyplot as plt

# Create a comprehensive analysis workflow
def complete_analysis_workflow():
    """Demonstrates a complete analysis workflow using utils functions."""

    print("=== SMolSAT Complete Analysis Workflow ===")

    # 1. Create example trajectory
    print("\n1. Creating example trajectory...")
    trajectory = smolsat.create_example_trajectory(
        num_particles=200,
        num_frames=2000,
        box_size=(12.0, 12.0, 12.0),
        time_step=0.002
    )

    # 2. Validate trajectory
    print("\n2. Validating trajectory...")
    validation = smolsat.validate_trajectory(trajectory, verbose=False)
    print(f"   Trajectory valid: {validation['valid']}")

    # 3. Get system information
    print("\n3. System information:")
    info = smolsat.get_system_info(trajectory)
    for key, value in info.items():
        print(f"   {key}: {value}")

    # 4. Quick analyses
    print("\n4. Performing quick analyses...")

    # MSD analysis
    lag_times, msd_values = smolsat.quick_msd(trajectory)
    diff_results = smolsat.calculate_diffusion_coefficient(lag_times, msd_values)
    print(f"   Diffusion coefficient: {diff_results['D']:.6f}")

    # Radius of gyration
    times, rg_values = smolsat.quick_rg(trajectory)
    print(f"   Average Rg: {np.mean(rg_values):.3f} ± {np.std(rg_values):.3f}")

    # 5. Data conversion and custom analysis
    print("\n5. Custom analysis using NumPy conversion...")
    data = smolsat.trajectory_to_numpy(trajectory)
    positions = data['positions']

    # Calculate mean displacement per frame
    displacements = np.diff(positions, axis=0)
    mean_displacement = np.mean(np.linalg.norm(displacements, axis=2), axis=1)
    print(f"   Mean displacement per frame: {np.mean(mean_displacement):.4f}")

    # 6. Comprehensive analysis
    print("\n6. Running comprehensive analysis...")
    results = smolsat.analyze_trajectory(
        trajectory,
        analyses=['msd', 'rg', 'diffusion'],
        output_dir="complete_analysis"
    )

    print("   Analysis complete! Results saved to 'complete_analysis' directory.")

    return trajectory, results

# Run the complete workflow
if __name__ == "__main__":
    trajectory, results = complete_analysis_workflow()

See Also