Specifications:
- Works with data by reference when functions are named setX e.g. setOrder; to avoid copying data and minimise memory usage.
- Consider we want to pass arguments or objects for options. This impacts whether we can extend the function in the future.
- Static methods are used for operations that do not modify the original data.
import { DataTable, Row } from '@dereckmezquita/datatable';
// TODO: consider data structure for storing data internally
// handles both row and column data
const dt1 = new DataTable([
{ idx: 1, name: 'A', sex: 'F', district: 'X', score: 10 },
{ idx: 2, name: 'B', sex: 'F', district: 'Y', score: 20 },
{ idx: 3, name: 'C', sex: 'M', district: 'X', score: 30 },
{ idx: 4, name: 'D', sex: 'F', district: 'Y', score: 40 },
{ idx: 5, name: 'E', sex: 'M', district: 'Z', score: 50 }
]);
const dt2 = new DataTable({
idx: [3, 4, 5, 6, 7],
name: ['C', 'D', 'E', 'F', 'G'],
sex: ['M', 'M', 'M', 'F', 'M'],
district: ['X', 'Y', 'Z', 'X', 'Y'],
score: [30, 40, 50, 600, 700]
});
dt1.setKey('id');
dt2.setKey('id');
// array helper functions; create boolean arrays
fin(dt1.col('id'), dt2.col('id')); // returns boolean array equal in length to dt1
fnin(dt1.col('id'), dt2.col('id')); // which elements NOT in
const arr1 = [1, 2, 3];
const arr2 = [1, 3, 4, 5];
console.log(fin(arr1, arr2));
// [true, false, true]
console.log(fnin(arr1, arr2));
// [false, true, false]
// equality function for arrays returns boolean array length of longer array
compareArrays([1, 2, 3], [2, 3, 4, 5], (a, b) => {
return a > b;
});
// subsetting
// subset by index, column name or condition
const dtSub = dt1.subset({ rows: [1, 3, 5], cols: ['id', 'name'] });
// subset by condition;
const dtSub2 = dt1.subset({
rows: (row) => {
return row.score > 30;
},
cols: ['id', 'name']
});
// TODO: grouping
// select
const col: number[] = dt1.col('score'); // by column name or index
const col2: number[] = dt1.col(2); // by index
const row: Row = dt1.row(1); // by index
// TODO: reconsider this where method
const idx = dt1.where((row) => row.score > 30);
// joins
const dtInner = dt1.join(dt2, { type: 'inner' });
const dtLeft = dt1.join(dt2, { type: 'left' });
const dtRight = dt2.join(dt1, { type: 'right' });
const dtFull = DataTable.merge(dt1, dt2, { by: 'id', type: 'full' });
// TODO: c++ crossjoin: https://stackoverflow.com/questions/10600060/how-to-do-cross-join-in-r#answer-31893090
// allows for options:
// sorted: boolean = true - should setKey be called on all the columns in the order they were passed to crossJoin
// unique: boolean = true - only unique values of each vectors are used (automatically).
const dtCross = DataTable.crossJoin([dt1, dt2], { sorted: true, unique: true });
// non-equi join
// TODO: did not understand
// reshaping data
// melting to long format
const dtMelt = DataTable.melt(dt1, ['id'], { measureVars: ['name', 'score'] });
// casting to wide format
const dtCast = DataTable.dcast(dtMelt, { id: 'id', variable: 'variable', value: 'value' });
// new column with an array
dt1.setCols({ percent: dt1.map((row) => row.score / 100) });
// new column with a callback
dt1.setCols({
pass: (row) => {
return row.score > 50 ? 'pass' : 'fail';
}
});
// add columns by copy no modification to original
const dt3 = DataTable.addCols(dt1, {
percent: dt1.map((row) => row.score / 100),
pass: (row) => {
return row.score > 50 ? 'pass' : 'fail';
}
});
// rolling averages
// TODO: should be part of datatable package or statscript?
// mean
const rollAvg: number[] = dt1.frollmean('score', 3);
// sum
const rollSum: number[] = dt1.frollsum('score', 3);
// apply
const rollApply: string[] = dt1.frollapply('score', 3, (arr: number[]) => {
return '' + arr.reduce((a, b) => a + b, 0);
});
// read/write from file
const dt: DataTable = DataTable.fread('./path/to/file.csv');
dt.fwrite('./path/to/file.csv');
// helper methods
dt.head(5);
dt.tail(5);
dt.str(); // prints structure of data
dt.summary(); // statistical summary of data
dt.nrow();
dt.ncol();
dt.dim();
dt.colnames(); // by R data.table convention rownames are not supported
dt.copy(); // TODO: implement C++ copy method
dt.print();
dt.keys = ['id']; // getter/setter for keys - used for indexing and joining etc.
const dt2 = new DataTable({ id: [4], name: ['Dereck'], age: [31] });
// rbind and cbind
dt.rbind(dt2); // must have same columns
dt.rbind(dt2, true); // fill = true, fills missing columns with undefined/NAs
DataTable.rbind([dt, dt2, dt2, dt2], true); // static method
dt.cbind({ gender: ['M', 'F', 'M'] }); // cbind accepts DataTable with same number of rows
dt.cbind({ gender: ['M', 'F', 'M'] }, true); // fill = true, fills missing rows with undefined/NAs
DataTable.cbind([dt, dt2, dt2, dt2], true); // static method