Implemented shift operation and limit option

Summary: This diff implements the shift operation, which removes (efficiently) the first (oldest) N records from the datasource. Also implemented a `limit` option to truncate automatically and limit memory usage

Reviewed By: nikoant

Differential Revision: D26883673

fbshipit-source-id: c5ebaf2a327d56cbbe38280c6376c833bcf68b8c
This commit is contained in:
Michel Weststrate
2021-03-16 14:54:53 -07:00
committed by Facebook GitHub Bot
parent 564d440b4a
commit 2a3458aff8
2 changed files with 279 additions and 39 deletions

View File

@@ -14,6 +14,10 @@ import {
sortBy as lodashSort,
} from 'lodash';
// If the dataSource becomes to large, after how many records will we start to drop items?
const dropFactor = 0.1;
const defaultLimit = 200 * 1000;
// TODO: support better minification
// TODO: separate views from datasource to be able to support multiple transformation simultanously
// TODO: expose interface with public members only
@@ -43,8 +47,17 @@ type RemoveEvent<T> = {
entry: Entry<T>;
index: number;
};
type ShiftEvent<T> = {
type: 'shift';
entries: Entry<T>[];
amount: number;
};
type DataEvent<T> = AppendEvent<T> | UpdateEvent<T> | RemoveEvent<T>;
type DataEvent<T> =
| AppendEvent<T>
| UpdateEvent<T>
| RemoveEvent<T>
| ShiftEvent<T>;
type Entry<T> = {
value: T;
@@ -87,6 +100,9 @@ export class DataSource<
private _recordsById: Map<KEY_TYPE, T> = new Map();
private keyAttribute: undefined | keyof T;
private idToIndex: Map<KEY_TYPE, number> = new Map();
// if we shift the window, we increase shiftOffset, rather than remapping all values
private shiftOffset = 0;
limit = defaultLimit;
private sortBy: undefined | ((a: T) => Primitive);
@@ -190,17 +206,27 @@ export class DataSource<
*/
indexOfKey(key: KEY_TYPE): number {
this.assertKeySet();
return this.idToIndex.get(key) ?? -1;
const stored = this.idToIndex.get(key);
return stored === undefined ? -1 : stored + this.shiftOffset;
}
private storeIndexOfKey(key: KEY_TYPE, index: number) {
// de-normalize the index, so that on later look ups its corrected again
this.idToIndex.set(key, index - this.shiftOffset);
}
append(value: T) {
if (this._records.length >= this.limit) {
// we're full! let's free up some space
this.shift(Math.ceil(this.limit * dropFactor));
}
if (this.keyAttribute) {
const key = this.getKey(value);
if (this._recordsById.has(key)) {
throw new Error(`Duplicate key: '${key}'`);
}
this._recordsById.set(key, value);
this.idToIndex.set(key, this._records.length);
this.storeIndexOfKey(key, this._records.length);
}
const entry = {
value,
@@ -223,8 +249,7 @@ export class DataSource<
this.assertKeySet();
const key = this.getKey(value);
if (this.idToIndex.has(key)) {
const idx = this.idToIndex.get(key)!;
this.update(idx, value);
this.update(this.indexOfKey(key), value);
return true;
} else {
this.append(value);
@@ -253,7 +278,7 @@ export class DataSource<
this.idToIndex.delete(currentKey);
}
this._recordsById.set(key, value);
this.idToIndex.set(key, index);
this.storeIndexOfKey(key, index);
}
this.emitDataEvent({
type: 'update',
@@ -278,10 +303,16 @@ export class DataSource<
const key = this.getKey(entry.value);
this._recordsById.delete(key);
this.idToIndex.delete(key);
// Optimization: this is O(n)! Should be done as an async job
this.idToIndex.forEach((keyIndex, key) => {
if (keyIndex > index) this.idToIndex.set(key, keyIndex - 1);
});
if (index === 0) {
// lucky happy case, this is more efficient
this.shiftOffset -= 1;
} else {
// Optimization: this is O(n)! Should be done as an async job
this.idToIndex.forEach((keyIndex, key) => {
if (keyIndex + this.shiftOffset > index)
this.storeIndexOfKey(key, keyIndex - 1);
});
}
}
this.emitDataEvent({
type: 'remove',
@@ -298,8 +329,8 @@ export class DataSource<
*/
removeByKey(keyValue: KEY_TYPE): boolean {
this.assertKeySet();
const index = this.idToIndex.get(keyValue);
if (index === undefined) {
const index = this.indexOfKey(keyValue);
if (index === -1) {
return false;
}
this.remove(index);
@@ -310,10 +341,29 @@ export class DataSource<
* Removes the first N entries.
* @param amount
*/
shift(_amount: number) {
shift(amount: number) {
amount = Math.min(amount, this._records.length);
if (amount === this._records.length) {
this.clear();
return;
}
// increase an offset variable with amount, and correct idToIndex reads / writes with that
this.shiftOffset -= amount;
// removes the affected records for _records, _recordsById and idToIndex
throw new Error('Not Implemented');
const removed = this._records.splice(0, amount);
if (this.keyAttribute) {
removed.forEach((entry) => {
const key = this.getKey(entry.value);
this._recordsById.delete(key);
this.idToIndex.delete(key);
});
}
this.emitDataEvent({
type: 'shift',
entries: removed,
amount,
});
}
setWindow(start: number, end: number) {
@@ -368,6 +418,7 @@ export class DataSource<
this.windowEnd = 0;
this._records = [];
this._recordsById = new Map();
this.shiftOffset = 0;
this.idToIndex = new Map();
this.dataUpdateQueue = [];
this.output = [];
@@ -463,11 +514,10 @@ export class DataSource<
}
private processEvent = (event: DataEvent<T>) => {
const {entry} = event;
const {output, sortBy, filter} = this;
switch (event.type) {
case 'append': {
// TODO: increase total counter
const {entry} = event;
if (!entry.visible) {
// not in filter? skip this entry
return;
@@ -483,6 +533,7 @@ export class DataSource<
break;
}
case 'update': {
const {entry} = event;
// short circuit; no view active so update straight away
if (!filter && !sortBy) {
output[event.index].approxIndex = event.index;
@@ -523,19 +574,28 @@ export class DataSource<
break;
}
case 'remove': {
// filter active, and not visible? short circuilt
if (!entry.visible) {
return;
}
// no sorting, no filter?
if (!sortBy && !filter) {
output.splice(event.index, 1);
this.notifyItemShift(event.index, -1);
this.processRemoveEvent(event.index, event.entry);
break;
}
case 'shift': {
// no sorting? then all items are removed from the start so optimize for that
if (!sortBy) {
let amount = 0;
if (!filter) {
amount = event.amount;
} else {
// if there is a filter, count the visibles and shift those
for (let i = 0; i < event.entries.length; i++)
if (event.entries[i].visible) amount++;
}
output.splice(0, amount);
this.notifyItemShift(0, -amount);
} else {
// sorting or filter is active, find the actual location
const existingIndex = this.getSortedIndex(entry, event.entry.value);
output.splice(existingIndex, 1);
this.notifyItemShift(existingIndex, -1);
// we have sorting, so we need to remove item by item
// we do this backward, so that approxIndex is more likely to be correct
for (let i = event.entries.length - 1; i >= 0; i--) {
this.processRemoveEvent(i, event.entries[i]);
}
}
break;
}
@@ -544,6 +604,25 @@ export class DataSource<
}
};
private processRemoveEvent(index: number, entry: Entry<T>) {
const {output, sortBy, filter} = this;
// filter active, and not visible? short circuilt
if (!entry.visible) {
return;
}
// no sorting, no filter?
if (!sortBy && !filter) {
output.splice(index, 1);
this.notifyItemShift(index, -1);
} else {
// sorting or filter is active, find the actual location
const existingIndex = this.getSortedIndex(entry, entry.value);
output.splice(existingIndex, 1);
this.notifyItemShift(existingIndex, -1);
}
}
private rebuildOutput() {
const {sortBy, filter, sortHelper} = this;
// copy base array or run filter (with side effecty update of visible)
@@ -614,18 +693,26 @@ export class DataSource<
}
}
type CreateDataSourceOptions<T, K extends keyof T> = {
key?: K;
limit?: number;
};
export function createDataSource<T, KEY extends keyof T = any>(
initialSet: T[],
keyAttribute: KEY,
options: CreateDataSourceOptions<T, KEY>,
): DataSource<T, KEY, ExtractKeyType<T, KEY>>;
export function createDataSource<T>(
initialSet?: T[],
): DataSource<T, never, never>;
export function createDataSource<T, KEY extends keyof T>(
initialSet: T[] = [],
keyAttribute?: KEY | undefined,
options?: CreateDataSourceOptions<T, KEY>,
): DataSource<T, any, any> {
const ds = new DataSource<T, KEY>(keyAttribute);
const ds = new DataSource<T, KEY>(options?.key);
if (options?.limit !== undefined) {
ds.limit = options.limit;
}
initialSet.forEach((value) => ds.append(value));
return ds;
}

View File

@@ -51,7 +51,7 @@ test('can create a datasource', () => {
});
test('can create a keyed datasource', () => {
const ds = createDataSource<Todo>([eatCookie], 'id');
const ds = createDataSource<Todo>([eatCookie], {key: 'id'});
expect(ds.records).toEqual([eatCookie]);
ds.append(drinkCoffee);
@@ -101,7 +101,7 @@ test('can create a keyed datasource', () => {
});
test('throws on invalid keys', () => {
const ds = createDataSource<Todo>([eatCookie], 'id');
const ds = createDataSource<Todo>([eatCookie], {key: 'id'});
expect(() => {
ds.append({id: '', title: 'test'});
}).toThrow(`Invalid key value: ''`);
@@ -111,7 +111,7 @@ test('throws on invalid keys', () => {
});
test('removing invalid keys', () => {
const ds = createDataSource<Todo>([eatCookie], 'id');
const ds = createDataSource<Todo>([eatCookie], {key: 'id'});
expect(ds.removeByKey('trash')).toBe(false);
expect(() => {
ds.remove(1);
@@ -255,7 +255,9 @@ test('filter + sort', () => {
});
test('filter + sort + index', () => {
const ds = createDataSource<Todo>([eatCookie, drinkCoffee, submitBug], 'id');
const ds = createDataSource<Todo>([eatCookie, drinkCoffee, submitBug], {
key: 'id',
});
ds.setFilter((t) => t.title.indexOf('c') === -1);
ds.setSortBy('title');
@@ -305,7 +307,9 @@ test('filter + sort + index', () => {
});
test('filter', () => {
const ds = createDataSource<Todo>([eatCookie, drinkCoffee, submitBug], 'id');
const ds = createDataSource<Todo>([eatCookie, drinkCoffee, submitBug], {
key: 'id',
});
ds.setFilter((t) => t.title.indexOf('c') === -1);
expect(unwrap(ds.output)).toEqual([submitBug]);
@@ -436,7 +440,9 @@ test('reverse with sorting', () => {
});
test('reset', () => {
const ds = createDataSource<Todo>([submitBug, drinkCoffee, eatCookie], 'id');
const ds = createDataSource<Todo>([submitBug, drinkCoffee, eatCookie], {
key: 'id',
});
ds.setSortBy('title');
ds.setFilter((v) => v.id !== 'cookie');
expect(unwrap(ds.output)).toEqual([drinkCoffee, submitBug]);
@@ -448,7 +454,9 @@ test('reset', () => {
});
test('clear', () => {
const ds = createDataSource<Todo>([submitBug, drinkCoffee, eatCookie], 'id');
const ds = createDataSource<Todo>([submitBug, drinkCoffee, eatCookie], {
key: 'id',
});
ds.setSortBy('title');
ds.setFilter((v) => v.id !== 'cookie');
expect(unwrap(ds.output)).toEqual([drinkCoffee, submitBug]);
@@ -471,7 +479,7 @@ function testEvents<T>(
op: (ds: DataSource<T, any, any>) => void,
key?: keyof T,
): any[] {
const ds = createDataSource<T>(initial, key);
const ds = createDataSource<T>(initial, {key});
const events: any[] = [];
ds.setOutputChangeListener((e) => events.push(e));
op(ds);
@@ -638,3 +646,148 @@ test('basic remove', () => {
},
]);
});
test('basic shift', () => {
const completedBug = {id: 'bug', title: 'fixed bug', done: true};
expect(
testEvents(
[drinkCoffee, eatCookie, submitBug],
(ds) => {
ds.setWindow(0, 100);
ds.shift(2);
expect(ds.getOutput()).toEqual([submitBug]);
expect(ds.recordsById.get('bug')).toBe(submitBug);
expect(ds.recordsById.get('coffee')).toBeUndefined();
expect(ds.indexOfKey('bug')).toBe(0);
expect(ds.indexOfKey('coffee')).toBe(-1);
ds.upsert(completedBug);
expect(ds.getOutput()).toEqual([completedBug]);
expect(ds.recordsById.get('bug')).toBe(completedBug);
},
'id',
),
).toEqual([
{
type: 'shift',
newCount: 1,
location: 'in',
index: 0,
delta: -2,
},
{
type: 'update',
index: 0,
},
]);
});
test('sorted shift', () => {
expect(
testEvents(['c', 'b', 'a', 'e', 'd'], (ds) => {
ds.setWindow(0, 100);
ds.setSortBy((v) => v);
expect(ds.getOutput()).toEqual(['a', 'b', 'c', 'd', 'e']);
ds.shift(4);
expect(ds.getOutput()).toEqual(['d']);
ds.shift(1); // optimizes to reset
}),
).toEqual([
{newCount: 5, type: 'reset'}, // sort
{delta: -1, index: 4, location: 'in', newCount: 4, type: 'shift'}, // e
{delta: -1, index: 0, location: 'in', newCount: 3, type: 'shift'}, // a
{delta: -1, index: 0, location: 'in', newCount: 2, type: 'shift'}, // b
{delta: -1, index: 0, location: 'in', newCount: 1, type: 'shift'}, // c
{newCount: 0, type: 'reset'}, // shift that clears
]);
});
test('filtered shift', () => {
expect(
testEvents(['c', 'b', 'a', 'e', 'd'], (ds) => {
ds.setWindow(0, 100);
ds.setFilter((v) => v !== 'b' && v !== 'e');
expect(ds.getOutput()).toEqual(['c', 'a', 'd']);
ds.shift(4);
expect(ds.getOutput()).toEqual(['d']);
}),
).toEqual([
{newCount: 3, type: 'reset'}, // filter
{type: 'shift', location: 'in', newCount: 1, index: 0, delta: -2}, // optimized shift
]);
});
test('remove after shift works correctly', () => {
const a: Todo = {id: 'a', title: 'a', done: false};
const b: Todo = {id: 'b', title: 'b', done: false};
expect(
testEvents(
[eatCookie, drinkCoffee, submitBug, a, b],
(ds) => {
ds.setWindow(0, 100);
ds.shift(2);
ds.removeByKey('b');
ds.removeByKey('bug');
expect(ds.getOutput()).toEqual([a]);
expect(ds.indexOfKey('cookie')).toBe(-1);
expect(ds.indexOfKey('coffee')).toBe(-1);
expect(ds.indexOfKey('bug')).toBe(-1);
expect(ds.indexOfKey('a')).toBe(0);
expect(ds.indexOfKey('b')).toBe(-1);
},
'id',
),
).toEqual([
{
type: 'shift',
newCount: 3,
location: 'in',
index: 0,
delta: -2,
},
{
type: 'shift',
newCount: 2,
location: 'in',
index: 2,
delta: -1,
},
{
type: 'shift',
newCount: 1,
location: 'in',
index: 0,
delta: -1,
},
]);
});
test('respects limit', () => {
const grab = (): [length: number, first: number, last: number] => {
const output = ds.getOutput();
return [output.length, output[0], output[output.length - 1]];
};
const ds = createDataSource(
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
{limit: 20},
);
ds.setWindow(0, 100);
ds.append(19);
ds.append(20);
expect(grab()).toEqual([20, 1, 20]);
ds.append(21);
expect(grab()).toEqual([19, 3, 21]);
ds.append(22);
expect(grab()).toEqual([20, 3, 22]);
ds.remove(0);
expect(grab()).toEqual([19, 4, 22]);
ds.append(23);
expect(grab()).toEqual([20, 4, 23]);
ds.append(24);
expect(grab()).toEqual([19, 6, 24]);
});