@dereckmezquita/datatable

0.0.33 • Public • Published

datatable

Specifications:

  1. Works with data by reference when functions are named setX e.g. setOrder; to avoid copying data and minimise memory usage.
  2. Consider we want to pass arguments or objects for options. This impacts whether we can extend the function in the future.
  3. Static methods are used for operations that do not modify the original data.
import { DataTable, Row } from '@dereckmezquita/datatable';

// TODO: consider data structure for storing data internally
// handles both row and column data
const dt1 = new DataTable([
    { idx: 1, name: 'A', sex: 'F', district: 'X', score: 10 },
    { idx: 2, name: 'B', sex: 'F', district: 'Y', score: 20 },
    { idx: 3, name: 'C', sex: 'M', district: 'X', score: 30 },
    { idx: 4, name: 'D', sex: 'F', district: 'Y', score: 40 },
    { idx: 5, name: 'E', sex: 'M', district: 'Z', score: 50 }
]);

const dt2 = new DataTable({
    idx: [3, 4, 5, 6, 7],
    name: ['C', 'D', 'E', 'F', 'G'],
    sex: ['M', 'M', 'M', 'F', 'M'],
    district: ['X', 'Y', 'Z', 'X', 'Y'],
    score: [30, 40, 50, 600, 700]
});

dt1.setKey('id');
dt2.setKey('id');

// array helper functions; create boolean arrays
fin(dt1.col('id'), dt2.col('id')); // returns boolean array equal in length to dt1
fnin(dt1.col('id'), dt2.col('id')); // which elements NOT in

const arr1 = [1, 2, 3];
const arr2 = [1, 3, 4, 5];
console.log(fin(arr1, arr2));
// [true, false, true]
console.log(fnin(arr1, arr2));
// [false, true, false]

// equality function for arrays returns boolean array length of longer array
compareArrays([1, 2, 3], [2, 3, 4, 5], (a, b) => {
    return a > b;
});

// subsetting
// subset by index, column name or condition
const dtSub = dt1.subset({ rows: [1, 3, 5], cols: ['id', 'name'] });

// subset by condition;
const dtSub2 = dt1.subset({
    rows: (row) => {
        return row.score > 30;
    },
    cols: ['id', 'name']
});

// TODO: grouping

// select
const col: number[] = dt1.col('score'); // by column name or index
const col2: number[] = dt1.col(2); // by index
const row: Row = dt1.row(1); // by index

// TODO: reconsider this where method
const idx = dt1.where((row) => row.score > 30);

// joins
const dtInner = dt1.join(dt2, { type: 'inner' });
const dtLeft = dt1.join(dt2, { type: 'left' });
const dtRight = dt2.join(dt1, { type: 'right' });
const dtFull = DataTable.merge(dt1, dt2, { by: 'id', type: 'full' });
// TODO: c++ crossjoin: https://stackoverflow.com/questions/10600060/how-to-do-cross-join-in-r#answer-31893090
// allows for options:
// sorted: boolean = true - should setKey be called on all the columns in the order they were passed to crossJoin
// unique: boolean = true - only unique values of each vectors are used (automatically).
const dtCross = DataTable.crossJoin([dt1, dt2], { sorted: true, unique: true });

// non-equi join
// TODO: did not understand

// reshaping data
// melting to long format
const dtMelt = DataTable.melt(dt1, ['id'], { measureVars: ['name', 'score'] });

// casting to wide format
const dtCast = DataTable.dcast(dtMelt, { id: 'id', variable: 'variable', value: 'value' });

// new column with an array
dt1.setCols({ percent: dt1.map((row) => row.score / 100) });
// new column with a callback
dt1.setCols({
    pass: (row) => {
        return row.score > 50 ? 'pass' : 'fail';
    }
});

// add columns by copy no modification to original
const dt3 = DataTable.addCols(dt1, {
    percent: dt1.map((row) => row.score / 100),
    pass: (row) => {
        return row.score > 50 ? 'pass' : 'fail';
    }
});

// rolling averages
// TODO: should be part of datatable package or statscript?
// mean
const rollAvg: number[] = dt1.frollmean('score', 3);
// sum
const rollSum: number[] = dt1.frollsum('score', 3);
// apply
const rollApply: string[] = dt1.frollapply('score', 3, (arr: number[]) => {
    return '' + arr.reduce((a, b) => a + b, 0);
});

// read/write from file
const dt: DataTable = DataTable.fread('./path/to/file.csv');
dt.fwrite('./path/to/file.csv');

// helper methods
dt.head(5);
dt.tail(5);
dt.str(); // prints structure of data
dt.summary(); // statistical summary of data
dt.nrow();
dt.ncol();
dt.dim();
dt.colnames(); // by R data.table convention rownames are not supported
dt.copy(); // TODO: implement C++ copy method
dt.print();
dt.keys = ['id']; // getter/setter for keys - used for indexing and joining etc.

const dt2 = new DataTable({ id: [4], name: ['Dereck'], age: [31] });

// rbind and cbind
dt.rbind(dt2); // must have same columns
dt.rbind(dt2, true); // fill = true, fills missing columns with undefined/NAs
DataTable.rbind([dt, dt2, dt2, dt2], true); // static method
dt.cbind({ gender: ['M', 'F', 'M'] }); // cbind accepts DataTable with same number of rows
dt.cbind({ gender: ['M', 'F', 'M'] }, true); // fill = true, fills missing rows with undefined/NAs
DataTable.cbind([dt, dt2, dt2, dt2], true); // static method

Readme

Keywords

none

Package Sidebar

Install

npm i @dereckmezquita/datatable

Weekly Downloads

0

Version

0.0.33

License

MIT

Unpacked Size

23 kB

Total Files

23

Last publish

Collaborators

  • dereckmezquita