CSVReader

Source

Processing.js
/*
* Given a categorical array, return two new arrays, the first containing the
* unique values, the second containing the counts.
*/
function uniqueValues(categorical) {
const countMap = {};
categorical.forEach((val) => {
if (!(val in countMap)) {
countMap[val] = 0;
}
countMap[val] += 1;
});
const uniques = [];
const counts = [];
Object.keys(countMap).forEach((uniqueVal) => {
uniques.push(uniqueVal);
counts.push(countMap[uniqueVal]);
});
return [uniques, counts];
}

/*
* Given two parallel arrays, one categorical, one numeric, return two new arrays.
* The first returned array contains the unique values from the categorical input,
* while the second returned array contains averages from the numeric input
* over each category.
*/
function averageValues(categorical, numeric) {
const sumMap = {};
const [uniques, counts] = uniqueValues(categorical);
for (let i = 0; i < uniques.length; ++i) {
sumMap[uniques[i]] = {
sum: 0,
count: counts[i],
};
}
for (let j = 0; j < numeric.length; ++j) {
sumMap[categorical[j]].sum += parseFloat(numeric[j]);
}
const u = [];
const a = [];
Object.keys(sumMap).forEach((uniqueKey) => {
u.push(uniqueKey);
a.push(sumMap[uniqueKey].sum / sumMap[uniqueKey].count);
});
return [u, a];
}

export { uniqueValues, averageValues };
index.js
function parseCsv(csvText) {
const lines = csvText.split('\n');
const columnNames = lines[0].split(',');
const columns = [];

for (let colIdx = 0; colIdx < columnNames.length; ++colIdx) {
columnNames[colIdx] = columnNames[colIdx].replace(/[\s"]/g, ' ').trim();
columns.push([]);
}

for (let rowIdx = 1; rowIdx < lines.length; ++rowIdx) {
const cells = lines[rowIdx].split(',');
for (let colIdx = 0; colIdx < cells.length; ++colIdx) {
columns[colIdx].push(cells[colIdx].replace(/[\s"]/g, ' ').trim());
}
}

return {
numRows: lines.length - 1,
numCols: columnNames.length,
colNames: columnNames,
columns,
};
}

export default class CSVReader {
constructor(csvContent) {
this.setData(csvContent);
}

setData(csvContent) {
this.data = parseCsv(csvContent);
}

getNumberOfColumns() {
return this.data.numCols;
}

getNumberOfRows() {
return this.data.numRows;
}

getColumnNames() {
return this.data.colNames;
}

getColumnByIndex(colIdx) {
if (colIdx >= 0 && colIdx < this.getNumberOfColumns()) {
return this.data.columns[colIdx];
}
throw new Error(`${colIdx} is outside the column range for this dataset.`);
}

getColumn(colName) {
const colIdx = this.data.colNames.indexOf(colName);
if (colIdx < 0) {
throw new Error(`${colName}: No such column found.`);
}
return this.getColumnByIndex(colIdx);
}

getRow(rowIdx) {
const row = [];

for (let i = 0; i < this.getNumberOfColumns(); ++i) {
row.push(this.getColumnByIndex(i)[rowIdx]);
}

return row;
}
}