Mid and High level API to get `timeseries` data in a DataLoaders
# path_data = Config().data
# path_data, path_data.ls()
dsname = 'NATOPS' #'NATOPS', 'LSST', 'Wine', 'Epilepsy', 'HandMovementDirection'
# url = 'http://www.timeseriesclassification.com/Downloads/NATOPS.zip'
path = unzip_data(URLs_TS.NATOPS)
path
path.ls()
fname_train = f'{dsname}_TRAIN.arff'
fname_test = f'{dsname}_TEST.arff'
fnames = [path/fname_train, path/fname_test]
fnames
data = TSData.from_arff(fnames)
print(data)
# returns tuples
items = data.get_items()
idx=0
items[idx], type(items[idx][0]), type(items[idx][1])
seed = 42
splits = RandomSplitter(seed=seed)(range_of(items))
splits
# len(split_idx[0]), len(split_idx[1])
tfms = [[ItemGetter(0), ToTensorTS()], [ItemGetter(1), Categorize()]]
# Create a dataset
ds = Datasets(items, tfms, splits=splits)
ds.vocab
ds[0]
ax = show_at(ds, 2, figsize=(1,1))
bs = 128
# Normalize (at batch time) means scale using min and max
batch_tfms = [Normalize(scale_subtype = 'per_sample_per_channel', scale_range=(0, 1))]
# batch_tfms = [Standardize(scale_subtype = 'per_sample_per_channel')]
dls1 = ds.dataloaders( bs=bs, val_bs=bs * 2, after_batch=batch_tfms, num_workers=0, device=default_device())
dls1.show_batch(max_n=9, chs=range(0,12,3))
getters = [ItemGetter(0), ItemGetter(1)]
tsdb = DataBlock(blocks=(TSBlock, CategoryBlock),
get_items=get_ts_items,
getters=getters,
splitter=RandomSplitter(seed=seed),
batch_tfms = batch_tfms)
fnames
tsdb.datasets(fnames, verbose=True)
test_eq(tsdb.type_tfms[0].map(type), [ToTensorTS])
test_eq(tsdb.type_tfms[1].map(type), [Categorize])
test_eq(tsdb.default_item_tfms.map(type), [ToTensor])
test_eq(tsdb.batch_tfms.map(type), [Normalize])
tsdb.summary(fnames)
dls2 = tsdb.dataloaders(fnames, num_workers=0, device=default_device())
dls2.show_batch(ctxs=None, max_n=9, chs=range(0,12,3))
getters = [ItemGetter(0), ItemGetter(1)]
tsdb = DataBlock(blocks=(TSBlock, CategoryBlock),
getters=getters,
splitter=RandomSplitter(seed=seed))
# Using data.get_items()
#dls3 = tsdb.dataloaders(data.get_items(), batch_tfms=batch_tfms, num_workers=0, device=default_device())
# Or using get_ts_items([fname_train, fname_test])
dls3 = tsdb.dataloaders(get_ts_items(fnames), batch_tfms=batch_tfms, num_workers=0, device=default_device())
dls3.show_batch(ctxs=None, max_n=9, chs=range(0,12,3))
Define TSDataLoaders
# fnames = [path/fname_train, path/fname_test]
dls4 = TSDataLoaders.from_files(fnames=fnames, path=path, batch_tfms=batch_tfms, num_workers=0, device=default_device())
dls4.path
dls4.show_batch(ctxs=None, max_n=9, chs=range(0,12,3))
# Number of channels (i.e. dimensions in ARFF and TS files jargon)
c_in = get_n_channels(dls2.train) # data.n_channels
# Number of classes
c_out= dls1.c
c_in,c_out
model = inception_time(c_in, c_out).to(device=default_device())
model
# model[5]
# opt_func = partial(Adam, lr=3e-3, wd=0.01)
#Or use Ranger
def opt_func(p, lr=slice(3e-3)): return Lookahead(RAdam(p, lr=lr, mom=0.95, wd=0.01))
#Learner
loss_func = LabelSmoothingCrossEntropy()
learn = Learner(dls2, model, opt_func=opt_func, loss_func=loss_func, metrics=accuracy)
print(learn.summary())
lr_min, lr_steep = learn.lr_find()
lr_min, lr_steep
# class Learner():
# def __init__(self, dls, model, loss_func=None, opt_func=Adam, lr=defaults.lr, splitter=trainable_params, cbs=None,
# metrics=None, path=None, model_dir='models', wd=defaults.wd, wd_bn_bias=False, train_bn=True,
# moms=(0.95,0.85,0.95)):
# learn.fit_one_cycle(20, lr_max=lr_steep)
lr_max = 1e-3
epochs = 30; lr_max = lr_steep; pct_start = .7; moms=(0.95,0.85,0.95); wd = 1e-2
learn.fit_one_cycle(epochs, lr_max=lr_max, pct_start=pct_start, moms=moms, wd=wd)
learn.recorder.plot_loss()
learn.show_results(max_n=9, chs=range(0,12,3))
scale_subtype = 'all_samples'
# scale_subtype = 'all_samples_per_channel'
mean, std = get_mean_std(data.x, scale_subtype=scale_subtype)
mean.shape, std.shape, mean, std
idx = 3
item = items[idx]
item
pipe = Pipeline([ItemGetter(0), ToTensorTS()])
t = pipe(item)
label = ItemGetter(1)(item)
t, t.shape, t.show(title=label)
scale_subtype = 'all_samples_per_channel' #'all_samples_per_channel' # 'all_samples', 'all_samples_per_channel'
min, max = get_min_max(data.x, scale_subtype=scale_subtype)
min.shape, max.shape, min, max
tfm_norm = Normalize(min=min, max=max, scale_subtype = 'all_samples', scale_range=(0, 1), cuda=False)
item_norm = tfm_norm(t)
test_eq_tensor(t, tfm_norm.decodes(item_norm))
tfm_norm = Normalize(min=min, max=max, scale_subtype = 'all_samples_per_channel', scale_range=(0, 1), cuda=False)
item_norm = tfm_norm(t)
test_eq_tensor(t, tfm_norm.decodes(item_norm))
# tfm_norm = Normalize(min=min, max=max, scale_subtype = 'per_sample_per_channel', scale_range=(0, 1), cuda=False)
tfm_norm = Normalize(scale_subtype = 'per_sample', scale_range=(0, 1), cuda=False)
item_norm = tfm_norm(t)
test_eq_tensor(t, tfm_norm.decodes(item_norm))
tfm_norm = Normalize(scale_subtype = 'per_sample_per_channel', scale_range=(0, 1), cuda=False)
item_norm = tfm_norm(t)
test_eq_tensor(t, tfm_norm.decodes(item_norm))
tfm_norm = Standardize(mean=mean, std=std, scale_subtype = 'all_samples', cuda=False)
item_norm = tfm_norm(t)
test_eq_tensor(t, tfm_norm.decodes(item_norm))
tfm_norm = Standardize(mean=mean, std=std, scale_subtype = 'all_samples_per_channel', cuda=False)
item_norm = tfm_norm(t)
test_eq_tensor(t, tfm_norm.decodes(item_norm))
tfm_norm = Standardize(scale_subtype = 'per_sample', cuda=False)
item_norm = tfm_norm(t)
test_eq_tensor(t, tfm_norm.decodes(item_norm))
tfm_norm = Standardize(scale_subtype = 'per_sample_per_channel', cuda=False)
item_norm = tfm_norm(t)
test_eq_tensor(t, tfm_norm.decodes(item_norm))
# #hide
# from nbdev.export2html import _notebook2html
# # notebook2script()
# _notebook2html(fname='81_timeseries_core.ipynb')