Detail: Batch: Constructor#

Overview: Batch: Constructor

Batch.__init__(items, /, *, name=None, max_workers=None, chunksize=1, use_threads=False, mp_context=None)[source]#

Default constructor of a Batch.

Parameters:

name – A hashable object to label the container.
config – A StoreConfig, or a mapping of label to StoreConfig
max_workers – Number of parallel executors, as passed to the Thread- or ProcessPoolExecutor; None defaults to the max number of machine processes.
chunksize – Units of work per executor, as passed to the Thread- or ProcessPoolExecutor.
use_threads – Use the ThreadPoolExecutor instead of the ProcessPoolExecutor.

>>> bt = sf.Batch((('i', sf.Frame(np.arange(6).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='x')), ('j', sf.Frame(np.arange(40, 46).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='v'))))
>>> bt
<Batch max_workers=None>
>>> bt.to_frame()
<Frame>
<Index>                a       b       <<U1>
<IndexHierarchy>
i                p     0       1
i                q     2       3
i                r     4       5
j                p     40      41
j                q     42      43
j                r     44      45
<<U1>            <<U1> <int64> <int64>

classmethod Batch.from_frames(frames, /, *, name=None, max_workers=None, chunksize=1, use_threads=False, mp_context=None)[source]#

Return a Batch from an iterable of Frame; labels will be drawn from Frame.name.

>>> bt = sf.Batch.from_frames((sf.Frame(np.arange(6).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='x'), sf.Frame(np.arange(40, 46).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='v')))
>>> bt
<Batch max_workers=None>
>>> bt.to_frame()
<Frame>
<Index>                a       b       <<U1>
<IndexHierarchy>
x                p     0       1
x                q     2       3
x                r     4       5
v                p     40      41
v                q     42      43
v                r     44      45
<<U1>            <<U1> <int64> <int64>

classmethod Batch.from_sqlite(fp, /, *, config=None, max_workers=None, chunksize=1, use_threads=False, mp_context=None)[source]#

Given a file path to an SQLite Batch store, return a Batch instance.

Parameters:

fp – A string file path or Path instance.
config – A StoreConfig, or a mapping of label to StoreConfig
max_workers – Number of parallel executors, as passed to the Thread- or ProcessPoolExecutor; None defaults to the max number of machine processes.
chunksize – Units of work per executor, as passed to the Thread- or ProcessPoolExecutor.
use_threads – Use the ThreadPoolExecutor instead of the ProcessPoolExecutor.

>>> bt1 = sf.Batch((('i', sf.Frame(np.arange(6).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='x')), ('j', sf.Frame(np.arange(40, 46).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='v'))))
>>> bt1.to_sqlite('/tmp/f.sqlite')
>>> bt2 = sf.Batch.from_sqlite('/tmp/f.sqlite', config=sf.StoreConfig(index_depth=1))
>>> bt2.to_frame()
<Frame>
<Index>                a       b       <<U1>
<IndexHierarchy>
i                p     0       1
i                q     2       3
i                r     4       5
j                p     40      41
j                q     42      43
j                r     44      45
<<U1>            <<U1> <int64> <int64>

classmethod Batch.from_xlsx(fp, /, *, config=None, max_workers=None, chunksize=1, use_threads=False, mp_context=None)[source]#

Given a file path to an XLSX Batch store, return a Batch instance.

Parameters:

fp – A string file path or Path instance.
config – A StoreConfig, or a mapping of label to StoreConfig
max_workers – Number of parallel executors, as passed to the Thread- or ProcessPoolExecutor; None defaults to the max number of machine processes.
chunksize – Units of work per executor, as passed to the Thread- or ProcessPoolExecutor.
use_threads – Use the ThreadPoolExecutor instead of the ProcessPoolExecutor.

>>> bt1 = sf.Batch((('i', sf.Frame(np.arange(6).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='x')), ('j', sf.Frame(np.arange(40, 46).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='v'))))
>>> bt1.to_xlsx('/tmp/f.xlsx')
>>> bt2 = sf.Batch.from_xlsx('/tmp/f.xlsx', config=sf.StoreConfig(index_depth=1))
>>> bt2.to_frame()
<Frame>
<Index>                a       b       <<U1>
<IndexHierarchy>
i                p     0       1
i                q     2       3
i                r     4       5
j                p     40      41
j                q     42      43
j                r     44      45
<<U1>            <<U1> <int64> <int64>

classmethod Batch.from_zip_csv(fp, /, *, config=None, max_workers=None, chunksize=1, use_threads=False, mp_context=None)[source]#

Given a file path to zipped CSV Batch store, return a Batch instance.

Parameters:

fp – A string file path or Path instance.
config – A StoreConfig, or a mapping of label to StoreConfig
max_workers – Number of parallel executors, as passed to the Thread- or ProcessPoolExecutor; None defaults to the max number of machine processes.
chunksize – Units of work per executor, as passed to the Thread- or ProcessPoolExecutor.
use_threads – Use the ThreadPoolExecutor instead of the ProcessPoolExecutor.

>>> bt1 = sf.Batch((('i', sf.Frame(np.arange(6).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='x')), ('j', sf.Frame(np.arange(40, 46).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='v'))))
>>> bt1.to_zip_csv('/tmp/f.zip')
>>> bt2 = sf.Batch.from_zip_csv('/tmp/f.zip', config=sf.StoreConfig(index_depth=1))
>>> bt2.to_frame()
<Frame>
<Index>                a       b       <<U1>
<IndexHierarchy>
i                p     0       1
i                q     2       3
i                r     4       5
j                p     40      41
j                q     42      43
j                r     44      45
<<U1>            <<U1> <int64> <int64>

classmethod Batch.from_zip_npy(fp, /, *, config=None, max_workers=None, chunksize=1, use_threads=False, mp_context=None)[source]#

Given a file path to zipped NPY Batch store, return a Batch instance.

Parameters:

fp – A string file path or Path instance.
config – A StoreConfig, or a mapping of label to StoreConfig
max_workers – Number of parallel executors, as passed to the Thread- or ProcessPoolExecutor; None defaults to the max number of machine processes.
chunksize – Units of work per executor, as passed to the Thread- or ProcessPoolExecutor.
use_threads – Use the ThreadPoolExecutor instead of the ProcessPoolExecutor.

>>> bt1 = sf.Batch((('i', sf.Frame(np.arange(6).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='x')), ('j', sf.Frame(np.arange(40, 46).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='v'))))
>>> bt1.to_zip_npy('/tmp/f.zip')
>>> bt2 = sf.Batch.from_zip_npy('/tmp/f.zip')
>>> bt2.to_frame()
<Frame>
<Index>                a       b       <<U1>
<IndexHierarchy>
i                p     0       1
i                q     2       3
i                r     4       5
j                p     40      41
j                q     42      43
j                r     44      45
<<U1>            <<U1> <int64> <int64>

classmethod Batch.from_zip_npz(fp, /, *, config=None, max_workers=None, chunksize=1, use_threads=False, mp_context=None)[source]#

Given a file path to zipped NPZ Batch store, return a Batch instance.

Parameters:

fp – A string file path or Path instance.
config – A StoreConfig, or a mapping of label to StoreConfig
max_workers – Number of parallel executors, as passed to the Thread- or ProcessPoolExecutor; None defaults to the max number of machine processes.
chunksize – Units of work per executor, as passed to the Thread- or ProcessPoolExecutor.
use_threads – Use the ThreadPoolExecutor instead of the ProcessPoolExecutor.

>>> bt1 = sf.Batch((('i', sf.Frame(np.arange(6).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='x')), ('j', sf.Frame(np.arange(40, 46).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='v'))))
>>> bt1.to_zip_npz('/tmp/f.zip')
>>> bt2 = sf.Batch.from_zip_npz('/tmp/f.zip')
>>> bt2.to_frame()
<Frame>
<Index>                a       b       <<U1>
<IndexHierarchy>
i                p     0       1
i                q     2       3
i                r     4       5
j                p     40      41
j                q     42      43
j                r     44      45
<<U1>            <<U1> <int64> <int64>

classmethod Batch.from_zip_parquet(fp, /, *, config=None, max_workers=None, chunksize=1, use_threads=False, mp_context=None)[source]#

Given a file path to zipped parquet Batch store, return a Batch instance.

Parameters:

fp – A string file path or Path instance.
config – A StoreConfig, or a mapping of label to StoreConfig
max_workers – Number of parallel executors, as passed to the Thread- or ProcessPoolExecutor; None defaults to the max number of machine processes.
chunksize – Units of work per executor, as passed to the Thread- or ProcessPoolExecutor.
use_threads – Use the ThreadPoolExecutor instead of the ProcessPoolExecutor.

>>> bt1 = sf.Batch((('i', sf.Frame(np.arange(6).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='x')), ('j', sf.Frame(np.arange(40, 46).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='v'))))
>>> bt1.to_zip_parquet('/tmp/f.zip')
>>> bt2 = sf.Batch.from_zip_parquet('/tmp/f.zip', config=sf.StoreConfig(index_depth=1))
>>> bt2.to_frame()
<Frame>
<Index>                   a       b       <<U1>
<IndexHierarchy>
i                p        0       1
i                q        2       3
i                r        4       5
j                p        40      41
j                q        42      43
j                r        44      45
<<U1>            <object> <int64> <int64>

classmethod Batch.from_zip_pickle(fp, /, *, config=None, max_workers=None, chunksize=1, use_threads=False, mp_context=None)[source]#

Given a file path to zipped pickle Batch store, return a Batch instance.

Parameters:

fp – A string file path or Path instance.
config – A StoreConfig, or a mapping of label to StoreConfig
max_workers – Number of parallel executors, as passed to the Thread- or ProcessPoolExecutor; None defaults to the max number of machine processes.
chunksize – Units of work per executor, as passed to the Thread- or ProcessPoolExecutor.
use_threads – Use the ThreadPoolExecutor instead of the ProcessPoolExecutor.

>>> bt1 = sf.Batch((('i', sf.Frame(np.arange(6).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='x')), ('j', sf.Frame(np.arange(40, 46).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='v'))))
>>> bt1.to_zip_pickle('/tmp/f.zip')
>>> bt2 = sf.Batch.from_zip_pickle('/tmp/f.zip')
>>> bt2.to_frame()
<Frame>
<Index>                a       b       <<U1>
<IndexHierarchy>
i                p     0       1
i                q     2       3
i                r     4       5
j                p     40      41
j                q     42      43
j                r     44      45
<<U1>            <<U1> <int64> <int64>

classmethod Batch.from_zip_tsv(fp, /, *, config=None, max_workers=None, chunksize=1, use_threads=False, mp_context=None)[source]#

Given a file path to zipped TSV Batch store, return a Batch instance.

Parameters:

fp – A string file path or Path instance.
config – A StoreConfig, or a mapping of label to StoreConfig
max_workers – Number of parallel executors, as passed to the Thread- or ProcessPoolExecutor; None defaults to the max number of machine processes.
chunksize – Units of work per executor, as passed to the Thread- or ProcessPoolExecutor.
use_threads – Use the ThreadPoolExecutor instead of the ProcessPoolExecutor.

>>> bt1 = sf.Batch((('i', sf.Frame(np.arange(6).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='x')), ('j', sf.Frame(np.arange(40, 46).reshape(3,2), index=('p', 'q', 'r'), columns=('a', 'b'), name='v'))))
>>> bt1.to_zip_tsv('/tmp/f.zip')
>>> bt2 = sf.Batch.from_zip_tsv('/tmp/f.zip', config=sf.StoreConfig(index_depth=1))
>>> bt2.to_frame()
<Frame>
<Index>                a       b       <<U1>
<IndexHierarchy>
i                p     0       1
i                q     2       3
i                r     4       5
j                p     40      41
j                q     42      43
j                r     44      45
<<U1>            <<U1> <int64> <int64>