Skip to content

Commit c8e35fd

Browse files
authored
Merge pull request #27 from AlphaQuantJS/dev
feat: Implement DataFrame filtering methods and tests
2 parents 4ee497e + 223f018 commit c8e35fd

36 files changed

Lines changed: 4141 additions & 0 deletions
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/* -------------------------------------------------------------- *
2+
| DataFrame → filtering · at() |
3+
* -------------------------------------------------------------- */
4+
5+
/**
6+
* Returns a row at the specified index.<br>
7+
* `df.at(5)` → returns an object representing the row at index 5.
8+
*
9+
* @param {import('../../../data/model/DataFrame.js').DataFrame} df
10+
* @param {number} index - Row index to select
11+
* @returns {Object} - Object representing the selected row
12+
* @throws {Error} If index is invalid or out of bounds
13+
*/
14+
export function at(df, index) {
15+
// Validate index is an integer
16+
if (!Number.isInteger(index)) {
17+
throw new Error(
18+
`Index must be an integer, got ${typeof index === 'number' ? index : typeof index}`
19+
);
20+
}
21+
22+
// Validate index is not negative
23+
if (index < 0) {
24+
throw new Error(`Index out of bounds: ${index} is negative`);
25+
}
26+
27+
const rows = df.toArray();
28+
29+
// Check if DataFrame is empty
30+
if (rows.length === 0) {
31+
throw new Error('Index out of bounds: DataFrame is empty');
32+
}
33+
34+
// Check if index is within range
35+
if (index >= rows.length) {
36+
throw new Error(
37+
`Index out of bounds: ${index} >= ${rows.length}`
38+
);
39+
}
40+
41+
return rows[index];
42+
}
43+
44+
/* -------------------------------------------------------------- *
45+
| Pool for extendDataFrame |
46+
* -------------------------------------------------------------- */
47+
export default { at };
48+
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/* -------------------------------------------------------------- *
2+
| DataFrame → filtering · drop() |
3+
* -------------------------------------------------------------- */
4+
5+
/**
6+
* Removes specified columns from a DataFrame.<br>
7+
* `df.drop(['age', 'name'])` → returns a new DataFrame without the specified columns.
8+
* Can accept either an array of column names or a single column name as string.
9+
*
10+
* @param {import('../../../data/model/DataFrame.js').DataFrame} df
11+
* @param {string|string[]} columns - Column name(s) to remove
12+
* @returns {DataFrame} - New DataFrame without the dropped columns
13+
* @throws {Error} If any column doesn't exist or if dropping all columns
14+
*/
15+
export function drop(df, columns) {
16+
// Convert columns to array if it's not already
17+
const columnsArray = Array.isArray(columns) ? columns : [columns];
18+
19+
// Handle empty column list - return a copy
20+
if (columnsArray.length === 0) {
21+
// Create a shallow copy using toArray() and fromRecords
22+
const builder =
23+
typeof df.constructor.fromRecords === 'function'
24+
? df.constructor.fromRecords
25+
: (rows) => new df.constructor(rows);
26+
return builder(df.toArray());
27+
}
28+
29+
// Get all column names
30+
const allColumns = df.columns;
31+
32+
// Check that all columns to drop exist
33+
for (const col of columnsArray) {
34+
if (!allColumns.includes(col)) {
35+
throw new Error(`Column not found: '${col}'`);
36+
}
37+
}
38+
39+
// Create list of columns to keep
40+
const columnsToKeep = allColumns.filter(col => !columnsArray.includes(col));
41+
42+
// Cannot drop all columns
43+
if (columnsToKeep.length === 0) {
44+
throw new Error('Cannot drop all columns');
45+
}
46+
47+
// Create new data object with only the kept columns
48+
const rows = df.toArray();
49+
const result = {};
50+
51+
// For each column to keep, extract its data
52+
for (const col of columnsToKeep) {
53+
// Use the public API to get column data
54+
const colData = df.col(col).toArray();
55+
result[col] = colData;
56+
}
57+
58+
// Create a new DataFrame with the kept columns
59+
return new df.constructor(result, df._options);
60+
}
61+
62+
/* -------------------------------------------------------------- *
63+
| Pool for extendDataFrame |
64+
* -------------------------------------------------------------- */
65+
export default { drop };
66+
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/**
2+
* Filtering method: expr$
3+
*
4+
* This file provides the expr$ method for DataFrame rows using template literals
5+
* This provides a more intuitive syntax for filtering
6+
*
7+
* @module methods/dataframe/filtering/expr$
8+
*/
9+
10+
import { createTypedSeries } from '../../../data/utils/createTypedArray.js';
11+
12+
/**
13+
* Filters rows in a DataFrame using a template literal expression.
14+
* This provides a more intuitive syntax for filtering.
15+
*
16+
* @param {Object} df - DataFrame instance
17+
* @param {TemplateStringsArray} strings - Template strings array
18+
* @param {...any} values - Values to interpolate into the template
19+
* @returns {Object} - New DataFrame with filtered rows
20+
*
21+
* @example
22+
* // Filter rows where age > 30 and city includes "York"
23+
* df.expr$`age > 30 && city_includes("York")`
24+
*/
25+
export function expr$(df, strings, ...values) {
26+
// Create an expression from the template string
27+
const expression = String.raw({ raw: strings }, ...values);
28+
29+
// Transform the expression, replacing string methods with special functions
30+
const processedExpr = expression
31+
.replace(/([a-zA-Z0-9_]+)_includes\(([^)]+)\)/g, '$1.includes($2)')
32+
.replace(/([a-zA-Z0-9_]+)_startsWith\(([^)]+)\)/g, '$1.startsWith($2)')
33+
.replace(/([a-zA-Z0-9_]+)_endsWith\(([^)]+)\)/g, '$1.endsWith($2)')
34+
.replace(/([a-zA-Z0-9_]+)_match\(([^)]+)\)/g, '$1.match($2)');
35+
36+
// Create a predicate function for filtering rows
37+
const predicate = createPredicate(processedExpr);
38+
39+
// Get DataFrame rows
40+
const rows = df.toArray();
41+
const allColumns = df.columns;
42+
43+
// Filter rows by predicate
44+
const filteredRows = rows.filter((row) => predicate(row));
45+
46+
// If no matching rows, return an empty DataFrame with the same columns and column types
47+
if (filteredRows.length === 0) {
48+
// Create a new DataFrame instance with the same options as the original
49+
const result = new df.constructor({}, df._options);
50+
51+
// For each column, create a Series with the appropriate type
52+
for (const col of allColumns) {
53+
// Get the original column data to determine its type
54+
const originalColumn = df._columns[col];
55+
const originalArray = originalColumn.vector.__data;
56+
57+
// Create an empty array with the same type
58+
if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
59+
const TypedArrayConstructor = originalArray.constructor;
60+
const emptyTypedArray = new TypedArrayConstructor(0);
61+
result._columns[col] = createTypedSeries(emptyTypedArray, col, df);
62+
} else {
63+
result._columns[col] = createTypedSeries([], col, df);
64+
}
65+
66+
// Add to column order
67+
if (!result._order.includes(col)) {
68+
result._order.push(col);
69+
}
70+
}
71+
72+
return result;
73+
}
74+
75+
// For non-empty results, create a new DataFrame with filtered rows
76+
// Create a new DataFrame instance with the same options as the original
77+
const result = new df.constructor({}, df._options);
78+
79+
// For each column, create a Series with the appropriate type
80+
for (const col of allColumns) {
81+
// Get the original column data to determine its type
82+
const originalColumn = df._columns[col];
83+
const originalArray = originalColumn.vector.__data;
84+
85+
// Extract values for this column from the filtered rows
86+
const values = filteredRows.map(row => row[col]);
87+
88+
// Preserve the array type if it's a typed array
89+
if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
90+
const TypedArrayConstructor = originalArray.constructor;
91+
const typedValues = new TypedArrayConstructor(values.length);
92+
values.forEach((value, i) => {
93+
typedValues[i] = value;
94+
});
95+
result._columns[col] = createTypedSeries(typedValues, col, df);
96+
} else {
97+
result._columns[col] = createTypedSeries(values, col, df);
98+
}
99+
100+
// Add to column order
101+
if (!result._order.includes(col)) {
102+
result._order.push(col);
103+
}
104+
}
105+
106+
return result;
107+
}
108+
109+
/**
110+
* Create a predicate function for filtering rows
111+
*
112+
* @param {string} expr - Expression to evaluate
113+
* @returns {Function} - Predicate function
114+
* @private
115+
*/
116+
function createPredicate(expr) {
117+
try {
118+
// Use Function instead of eval for better security
119+
return new Function(
120+
'row',
121+
`
122+
try {
123+
with (row) {
124+
return ${expr};
125+
}
126+
} catch (e) {
127+
return false;
128+
}
129+
`,
130+
);
131+
} catch (e) {
132+
throw new Error(`Invalid expression: ${expr}. Error: ${e.message}`);
133+
}
134+
}
135+
136+
// Export the expr$ method directly
137+
export { expr$ };
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*-------------------------------------------------------------------------*
2+
| DataFrame › filtering · filter() |
3+
| |
4+
| df.filter(row => row.age > 30) → new DataFrame with matching rows |
5+
| Supports predicate functions and string expressions. |
6+
*-------------------------------------------------------------------------*/
7+
8+
import { createTypedSeries } from '../../../data/utils/createTypedArray.js';
9+
10+
/**
11+
* Filters rows in a DataFrame based on a predicate function
12+
*
13+
* @param {Object} df - DataFrame instance
14+
* @param {Function} predicate - Function to apply to each row
15+
* @returns {Object} - New DataFrame with filtered rows
16+
*/
17+
export function filter(df, predicate) {
18+
// Check that the argument is a function
19+
if (typeof predicate !== 'function') {
20+
throw new Error('Predicate must be a function');
21+
}
22+
23+
// Convert DataFrame to array of rows
24+
const rows = df.toArray();
25+
const allColumns = df.columns;
26+
27+
// Apply predicate to each row
28+
const filteredRows = rows.filter(predicate);
29+
30+
// If no results, create an empty DataFrame with the same columns and column types
31+
if (filteredRows.length === 0) {
32+
// Create a new DataFrame instance with the same options as the original
33+
const result = new df.constructor({}, df._options);
34+
35+
// For each column, create a Series with the appropriate type
36+
for (const col of allColumns) {
37+
// Get the original column data to determine its type
38+
const originalColumn = df._columns[col];
39+
const originalArray = originalColumn.vector.__data;
40+
41+
// Create an empty array with the same type
42+
if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
43+
const TypedArrayConstructor = originalArray.constructor;
44+
const emptyTypedArray = new TypedArrayConstructor(0);
45+
result._columns[col] = createTypedSeries(emptyTypedArray, col, df);
46+
} else {
47+
result._columns[col] = createTypedSeries([], col, df);
48+
}
49+
50+
// Add to column order
51+
if (!result._order.includes(col)) {
52+
result._order.push(col);
53+
}
54+
}
55+
56+
return result;
57+
}
58+
59+
// For non-empty results, create a new DataFrame with filtered rows
60+
// Create a new DataFrame instance with the same options as the original
61+
const result = new df.constructor({}, df._options);
62+
63+
// For each column, create a Series with the appropriate type
64+
for (const col of allColumns) {
65+
// Get the original column data to determine its type
66+
const originalColumn = df._columns[col];
67+
const originalArray = originalColumn.vector.__data;
68+
const values = filteredRows.map(row => row[col]);
69+
70+
// Preserve the array type if it's a typed array
71+
if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
72+
const TypedArrayConstructor = originalArray.constructor;
73+
const typedValues = new TypedArrayConstructor(values.length);
74+
values.forEach((value, i) => {
75+
typedValues[i] = value;
76+
});
77+
result._columns[col] = createTypedSeries(typedValues, col, df);
78+
} else {
79+
result._columns[col] = createTypedSeries(values, col, df);
80+
}
81+
82+
// Add to column order
83+
if (!result._order.includes(col)) {
84+
result._order.push(col);
85+
}
86+
}
87+
88+
return result;
89+
}
90+
91+
// Export the filter method directly
92+
export { filter };
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/* -------------------------------------------------------------- *
2+
| DataFrame → filtering · head() |
3+
* -------------------------------------------------------------- */
4+
5+
/**
6+
* Returns the first n rows of a DataFrame.<br>
7+
* `df.head(5)` → returns a new DataFrame with the first 5 rows.
8+
* Similar to pandas' head() function.
9+
*
10+
* @param {import('../../../data/model/DataFrame.js').DataFrame} df
11+
* @param {number} [n=5] - Number of rows to return
12+
* @param {Object} [options] - Additional options
13+
* @param {boolean} [options.print=false] - Option for compatibility with other libraries
14+
* @returns {DataFrame} - New DataFrame with the first n rows
15+
* @throws {Error} If n is not a positive integer
16+
*/
17+
export function head(df, n = 5, options = { print: false }) {
18+
// Validate input parameters
19+
if (n <= 0) {
20+
throw new Error('Number of rows must be a positive integer');
21+
}
22+
if (!Number.isInteger(n)) {
23+
throw new Error('Number of rows must be an integer');
24+
}
25+
26+
// Get data from DataFrame
27+
const rows = df.toArray();
28+
29+
// Select first n rows (or all if there are fewer than n)
30+
const selectedRows = rows.slice(0, n);
31+
32+
// Create a new DataFrame from the selected rows
33+
const builder =
34+
typeof df.constructor.fromRecords === 'function'
35+
? df.constructor.fromRecords
36+
: (rows) => new df.constructor(rows);
37+
38+
return builder(selectedRows);
39+
}
40+
41+
/* -------------------------------------------------------------- *
42+
| Pool for extendDataFrame |
43+
* -------------------------------------------------------------- */
44+
export default { head };
45+

0 commit comments

Comments
 (0)