123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286 |
- //
- // CSVParser.js
- // Mr-Data-Converter
- //
- // Input CSV or Tab-delimited data and this will parse it into a Data Grid Javascript object
- //
- // CSV Parsing Function from Ben Nadel, http://www.bennadel.com/blog/1504-Ask-Ben-Parsing-CSV-Strings-With-Javascript-Exec-Regular-Expression-Command.htm
- var isDecimal_re = /^\s*(\+|-)?((\d+([,\.]\d+)?)|([,\.]\d+))\s*$/;
- var CSVParser = {
- //---------------------------------------
- // UTILS
- //---------------------------------------
- isNumber: function(string) {
- if( (string == null) || isNaN( new Number(string) ) ) {
- return false;
- }
- return true;
- },
- //---------------------------------------
- // PARSE
- //---------------------------------------
- //var parseOutput = CSVParser.parse(this.inputText, this.headersProvided, this.delimiter, this.downcaseHeaders, this.upcaseHeaders);
- parse: function (input, headersIncluded, delimiterType, downcaseHeaders, upcaseHeaders, decimalSign) {
- var dataArray = [];
- var errors = [];
- //test for delimiter
- //count the number of commas
- var RE = new RegExp("[^,]", "gi");
- var numCommas = input.replace(RE, "").length;
- //count the number of tabs
- RE = new RegExp("[^\t]", "gi");
- var numTabs = input.replace(RE, "").length;
- var rowDelimiter = "\n";
- //set delimiter
- var columnDelimiter = ",";
- if (numTabs > numCommas) {
- columnDelimiter = "\t"
- };
- if (delimiterType === "comma") {
- columnDelimiter = ","
- } else if (delimiterType === "tab") {
- columnDelimiter = "\t"
- }
- // kill extra empty lines
- RE = new RegExp("^" + rowDelimiter + "+", "gi");
- input = input.replace(RE, "");
- RE = new RegExp(rowDelimiter + "+$", "gi");
- input = input.replace(RE, "");
- // var arr = input.split(rowDelimiter);
- //
- // for (var i=0; i < arr.length; i++) {
- // dataArray.push(arr[i].split(columnDelimiter));
- // };
- // dataArray = jQuery.csv(columnDelimiter)(input);
- dataArray = this.CSVToArray(input, columnDelimiter);
- //escape out any tabs or returns or new lines
- for (var i = dataArray.length - 1; i >= 0; i--){
- for (var j = dataArray[i].length - 1; j >= 0; j--){
- dataArray[i][j] = dataArray[i][j].replace("\t", "\\t");
- dataArray[i][j] = dataArray[i][j].replace("\n", "\\n");
- dataArray[i][j] = dataArray[i][j].replace("\r", "\\r");
- };
- };
- var headerNames = [];
- var headerTypes = [];
- var numColumns = dataArray[0].length;
- var numRows = dataArray.length;
- if (headersIncluded) {
- //remove header row
- headerNames = dataArray.splice(0,1)[0];
- numRows = dataArray.length;
- } else { //if no headerNames provided
- //create generic property names
- for (var i=0; i < numColumns; i++) {
- headerNames.push("val"+String(i));
- headerTypes.push("");
- };
- }
- if (upcaseHeaders) {
- for (var i = headerNames.length - 1; i >= 0; i--){
- headerNames[i] = headerNames[i].toUpperCase();
- };
- };
- if (downcaseHeaders) {
- for (var i = headerNames.length - 1; i >= 0; i--){
- headerNames[i] = headerNames[i].toLowerCase();
- };
- };
- //test all the rows for proper number of columns.
- for (var i=0; i < dataArray.length; i++) {
- var numValues = dataArray[i].length;
- if (numValues != numColumns) {this.log("Error parsing row "+String(i)+". Wrong number of columns.")};
- };
- //test columns for number data type
- var numRowsToTest = dataArray.length;
- var threshold = 0.9;
- for (var i=0; i < headerNames.length; i++) {
- var numFloats = 0;
- var numInts = 0;
- for (var r=0; r < numRowsToTest; r++) {
- if (dataArray[r]) {
- //replace comma with dot if comma is decimal separator
- if(decimalSign='comma' && isDecimal_re.test(dataArray[r][i])){
- dataArray[r][i] = dataArray[r][i].replace(",", ".");
- }
- if (CSVParser.isNumber(dataArray[r][i])) {
- numInts++
- if (String(dataArray[r][i]).indexOf(".") > 0) {
- numFloats++
- }
- };
- };
- };
- if ((numInts / numRowsToTest) > threshold){
- if (numFloats > 0) {
- headerTypes[i] = "float"
- } else {
- headerTypes[i] = "int"
- }
- } else {
- headerTypes[i] = "string"
- }
- }
- return {'dataGrid':dataArray, 'headerNames':headerNames, 'headerTypes':headerTypes, 'errors':this.getLog()}
- },
- //---------------------------------------
- // ERROR LOGGING
- //---------------------------------------
- errorLog:[],
- resetLog: function() {
- this.errorLog = [];
- },
- log: function(l) {
- this.errorLog.push(l);
- },
- getLog: function() {
- var out = "";
- if (this.errorLog.length > 0) {
- for (var i=0; i < this.errorLog.length; i++) {
- out += ("!!"+this.errorLog[i] + "!!\n");
- };
- out += "\n"
- };
- return out;
- },
- //---------------------------------------
- // UTIL
- //---------------------------------------
- // This Function from Ben Nadel, http://www.bennadel.com/blog/1504-Ask-Ben-Parsing-CSV-Strings-With-Javascript-Exec-Regular-Expression-Command.htm
- // This will parse a delimited string into an array of
- // arrays. The default delimiter is the comma, but this
- // can be overriden in the second argument.
- CSVToArray: function( strData, strDelimiter ){
- // Check to see if the delimiter is defined. If not,
- // then default to comma.
- strDelimiter = (strDelimiter || ",");
- // Create a regular expression to parse the CSV values.
- var objPattern = new RegExp(
- (
- // Delimiters.
- "(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
- // Quoted fields.
- "(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
- // Standard fields.
- "([^\"\\" + strDelimiter + "\\r\\n]*))"
- ),
- "gi"
- );
- // Create an array to hold our data. Give the array
- // a default empty first row.
- var arrData = [[]];
- // Create an array to hold our individual pattern
- // matching groups.
- var arrMatches = null;
- // Keep looping over the regular expression matches
- // until we can no longer find a match.
- while (arrMatches = objPattern.exec( strData )){
- // Get the delimiter that was found.
- var strMatchedDelimiter = arrMatches[ 1 ];
- // Check to see if the given delimiter has a length
- // (is not the start of string) and if it matches
- // field delimiter. If id does not, then we know
- // that this delimiter is a row delimiter.
- if (
- strMatchedDelimiter.length &&
- (strMatchedDelimiter != strDelimiter)
- ){
- // Since we have reached a new row of data,
- // add an empty row to our data array.
- arrData.push( [] );
- }
- // Now that we have our delimiter out of the way,
- // let's check to see which kind of value we
- // captured (quoted or unquoted).
- if (arrMatches[ 2 ]){
- // We found a quoted value. When we capture
- // this value, unescape any double quotes.
- var strMatchedValue = arrMatches[ 2 ].replace(
- new RegExp( "\"\"", "g" ),
- "\""
- );
- } else {
- // We found a non-quoted value.
- var strMatchedValue = arrMatches[ 3 ];
- }
- // Now that we have our value string, let's add
- // it to the data array.
- arrData[ arrData.length - 1 ].push( strMatchedValue );
- }
- // Return the parsed data.
- return( arrData );
- }
- }
|