# Load Extensions
# cpu line profiler
%load_ext line_profiler
# memory profiler
%load_ext memory_profiler
# Cython support
%load_ext Cython
# Reload modules before executing user code
%load_ext autoreload
%autoreload 2
# setup backend for matplotlibs plots
%matplotlib inline
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Audio
sns.set()
def generateNoise(num_samples):
"""Generate random signal for each sample.
Using normal distribution instead of uniform distribution for random function
Args:
num_samples (int): number of samples
Returns:
ndarray: array of floats from standard normal distirbution
"""
return np.random.randn(num_samples)
def generateSignal(time_samples,frequency,addNoise=True):
"""Generate sinusoidal data.
Generates pure sine waveform data and additional noise
can be added to the waveform.
Args:
time_samples (ndarray): time samples, for every point a signal data point is generated
freuency (int): signal frequency for one period
addNoise (bool): true add noise to the pure signal.
Returns:
ndarray: either pure siganl or pure signal with noise
"""
signal = np.sin(2*np.pi*frequency*time_samples)
if addNoise:
return signal + generateNoise(len(time_samples))
else:
return signal
def generateTimeSamples(duration,rate):
"""Generate time series
The series is the len of duration*rate
Arags:
duration (int): signal duration is seconds
rate (int): sampling rate of the signal, how many points per second
Returns:
ndarray: Time series with a data point for each smapling point with in a second
"""
return np.arange(0,duration,1/rate, dtype=np.float64)
def processTimeSeriesData(duration,rate,frequency,addNoise=True):
"""Process data"""
# generate time series
time_samples = generateTimeSamples(duration,rate)
# create pandas - init with time
df = pd.DataFrame(data=time_samples, columns=['time'])
# generate signal
df['signal'] = generateSignal(df['time'].values,frequency,addNoise)
# return data frame with signal
return df
# set random seed for reproducibility
np.random.seed(1)
# length of sample in seconds
sample_duration = 2*60*60
# number of samples in a second
sample_rate = 44100
# sample oscillation with in a second
sample_frequency = 261.63 # Middle C since it sounds nice
%lprun -f processTimeSeriesData mainDF_with_noise = processTimeSeriesData(sample_duration,sample_rate,sample_frequency,True)
# filter and plot 1 second of data
ax = mainDF_with_noise[mainDF_with_noise.time <= 1].plot(kind='line',x='time',y='signal',ylim=(-6,6),grid=True)
# filter and plot 0.1 second of data
ax = mainDF_with_noise[mainDF_with_noise.time <= 0.1].plot(kind='line',x='time',y='signal',ylim=(-6,6),grid=True)
# Let's see how the signal looks in frequency domain
fqDF_noise = mainDF_with_noise[(mainDF_with_noise.time <= 5)] # look at only 5 seconds
fig, (ax1, ax2) = plt.subplots(nrows=2)
ax1.plot(fqDF_noise['time'], fqDF_noise['signal'])
ax2.specgram(fqDF_noise['signal'], NFFT=2048, Fs=sample_rate, noverlap=500)
plt.show()
Audio(fqDF_noise['signal'],rate=sample_rate)
%lprun -f processTimeSeriesData mainDF = processTimeSeriesData(sample_duration,sample_rate,sample_frequency,False)
# filter and plot 1 second of data
ax = mainDF[mainDF.time <= 1].plot(kind='line',x='time',y='signal',ylim=(-6,6),grid=True)
# filter and plot 0.1 second of data
ax = mainDF[mainDF.time <= 0.1].plot(kind='line',x='time',y='signal',ylim=(-6,6),grid=True)
# Let's see how the signal looks in frequency domain
fqDF = mainDF[(mainDF.time <= 5)] # look at only 5 seconds
fig, (ax1, ax2) = plt.subplots(nrows=2)
ax1.plot(fqDF['time'], fqDF['signal'])
ax2.specgram(fqDF['signal'], NFFT=2048, Fs=sample_rate, noverlap=500)
plt.show()