CSVParser.js 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. //
  2. // CSVParser.js
  3. // Mr-Data-Converter
  4. //
  5. // Input CSV or Tab-delimited data and this will parse it into a Data Grid Javascript object
  6. //
  7. // CSV Parsing Function from Ben Nadel, http://www.bennadel.com/blog/1504-Ask-Ben-Parsing-CSV-Strings-With-Javascript-Exec-Regular-Expression-Command.htm
  8. var isDecimal_re = /^\s*(\+|-)?((\d+([,\.]\d+)?)|([,\.]\d+))\s*$/;
  9. var CSVParser = {
  10. //---------------------------------------
  11. // UTILS
  12. //---------------------------------------
  13. isNumber: function(string) {
  14. if( (string == null) || isNaN( new Number(string) ) ) {
  15. return false;
  16. }
  17. return true;
  18. },
  19. //---------------------------------------
  20. // PARSE
  21. //---------------------------------------
  22. //var parseOutput = CSVParser.parse(this.inputText, this.headersProvided, this.delimiter, this.downcaseHeaders, this.upcaseHeaders);
  23. parse: function (input, headersIncluded, delimiterType, downcaseHeaders, upcaseHeaders, decimalSign) {
  24. var dataArray = [];
  25. var errors = [];
  26. //test for delimiter
  27. //count the number of commas
  28. var RE = new RegExp("[^,]", "gi");
  29. var numCommas = input.replace(RE, "").length;
  30. //count the number of tabs
  31. RE = new RegExp("[^\t]", "gi");
  32. var numTabs = input.replace(RE, "").length;
  33. var rowDelimiter = "\n";
  34. //set delimiter
  35. var columnDelimiter = ",";
  36. if (numTabs > numCommas) {
  37. columnDelimiter = "\t"
  38. };
  39. if (delimiterType === "comma") {
  40. columnDelimiter = ","
  41. } else if (delimiterType === "tab") {
  42. columnDelimiter = "\t"
  43. }
  44. // kill extra empty lines
  45. RE = new RegExp("^" + rowDelimiter + "+", "gi");
  46. input = input.replace(RE, "");
  47. RE = new RegExp(rowDelimiter + "+$", "gi");
  48. input = input.replace(RE, "");
  49. // var arr = input.split(rowDelimiter);
  50. //
  51. // for (var i=0; i < arr.length; i++) {
  52. // dataArray.push(arr[i].split(columnDelimiter));
  53. // };
  54. // dataArray = jQuery.csv(columnDelimiter)(input);
  55. dataArray = this.CSVToArray(input, columnDelimiter);
  56. //escape out any tabs or returns or new lines
  57. for (var i = dataArray.length - 1; i >= 0; i--){
  58. for (var j = dataArray[i].length - 1; j >= 0; j--){
  59. dataArray[i][j] = dataArray[i][j].replace("\t", "\\t");
  60. dataArray[i][j] = dataArray[i][j].replace("\n", "\\n");
  61. dataArray[i][j] = dataArray[i][j].replace("\r", "\\r");
  62. };
  63. };
  64. var headerNames = [];
  65. var headerTypes = [];
  66. var numColumns = dataArray[0].length;
  67. var numRows = dataArray.length;
  68. if (headersIncluded) {
  69. //remove header row
  70. headerNames = dataArray.splice(0,1)[0];
  71. numRows = dataArray.length;
  72. } else { //if no headerNames provided
  73. //create generic property names
  74. for (var i=0; i < numColumns; i++) {
  75. headerNames.push("val"+String(i));
  76. headerTypes.push("");
  77. };
  78. }
  79. if (upcaseHeaders) {
  80. for (var i = headerNames.length - 1; i >= 0; i--){
  81. headerNames[i] = headerNames[i].toUpperCase();
  82. };
  83. };
  84. if (downcaseHeaders) {
  85. for (var i = headerNames.length - 1; i >= 0; i--){
  86. headerNames[i] = headerNames[i].toLowerCase();
  87. };
  88. };
  89. //test all the rows for proper number of columns.
  90. for (var i=0; i < dataArray.length; i++) {
  91. var numValues = dataArray[i].length;
  92. if (numValues != numColumns) {this.log("Error parsing row "+String(i)+". Wrong number of columns.")};
  93. };
  94. //test columns for number data type
  95. var numRowsToTest = dataArray.length;
  96. var threshold = 0.9;
  97. for (var i=0; i < headerNames.length; i++) {
  98. var numFloats = 0;
  99. var numInts = 0;
  100. for (var r=0; r < numRowsToTest; r++) {
  101. if (dataArray[r]) {
  102. //replace comma with dot if comma is decimal separator
  103. if(decimalSign='comma' && isDecimal_re.test(dataArray[r][i])){
  104. dataArray[r][i] = dataArray[r][i].replace(",", ".");
  105. }
  106. if (CSVParser.isNumber(dataArray[r][i])) {
  107. numInts++
  108. if (String(dataArray[r][i]).indexOf(".") > 0) {
  109. numFloats++
  110. }
  111. };
  112. };
  113. };
  114. if ((numInts / numRowsToTest) > threshold){
  115. if (numFloats > 0) {
  116. headerTypes[i] = "float"
  117. } else {
  118. headerTypes[i] = "int"
  119. }
  120. } else {
  121. headerTypes[i] = "string"
  122. }
  123. }
  124. return {'dataGrid':dataArray, 'headerNames':headerNames, 'headerTypes':headerTypes, 'errors':this.getLog()}
  125. },
  126. //---------------------------------------
  127. // ERROR LOGGING
  128. //---------------------------------------
  129. errorLog:[],
  130. resetLog: function() {
  131. this.errorLog = [];
  132. },
  133. log: function(l) {
  134. this.errorLog.push(l);
  135. },
  136. getLog: function() {
  137. var out = "";
  138. if (this.errorLog.length > 0) {
  139. for (var i=0; i < this.errorLog.length; i++) {
  140. out += ("!!"+this.errorLog[i] + "!!\n");
  141. };
  142. out += "\n"
  143. };
  144. return out;
  145. },
  146. //---------------------------------------
  147. // UTIL
  148. //---------------------------------------
  149. // This Function from Ben Nadel, http://www.bennadel.com/blog/1504-Ask-Ben-Parsing-CSV-Strings-With-Javascript-Exec-Regular-Expression-Command.htm
  150. // This will parse a delimited string into an array of
  151. // arrays. The default delimiter is the comma, but this
  152. // can be overriden in the second argument.
  153. CSVToArray: function( strData, strDelimiter ){
  154. // Check to see if the delimiter is defined. If not,
  155. // then default to comma.
  156. strDelimiter = (strDelimiter || ",");
  157. // Create a regular expression to parse the CSV values.
  158. var objPattern = new RegExp(
  159. (
  160. // Delimiters.
  161. "(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
  162. // Quoted fields.
  163. "(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
  164. // Standard fields.
  165. "([^\"\\" + strDelimiter + "\\r\\n]*))"
  166. ),
  167. "gi"
  168. );
  169. // Create an array to hold our data. Give the array
  170. // a default empty first row.
  171. var arrData = [[]];
  172. // Create an array to hold our individual pattern
  173. // matching groups.
  174. var arrMatches = null;
  175. // Keep looping over the regular expression matches
  176. // until we can no longer find a match.
  177. while (arrMatches = objPattern.exec( strData )){
  178. // Get the delimiter that was found.
  179. var strMatchedDelimiter = arrMatches[ 1 ];
  180. // Check to see if the given delimiter has a length
  181. // (is not the start of string) and if it matches
  182. // field delimiter. If id does not, then we know
  183. // that this delimiter is a row delimiter.
  184. if (
  185. strMatchedDelimiter.length &&
  186. (strMatchedDelimiter != strDelimiter)
  187. ){
  188. // Since we have reached a new row of data,
  189. // add an empty row to our data array.
  190. arrData.push( [] );
  191. }
  192. // Now that we have our delimiter out of the way,
  193. // let's check to see which kind of value we
  194. // captured (quoted or unquoted).
  195. if (arrMatches[ 2 ]){
  196. // We found a quoted value. When we capture
  197. // this value, unescape any double quotes.
  198. var strMatchedValue = arrMatches[ 2 ].replace(
  199. new RegExp( "\"\"", "g" ),
  200. "\""
  201. );
  202. } else {
  203. // We found a non-quoted value.
  204. var strMatchedValue = arrMatches[ 3 ];
  205. }
  206. // Now that we have our value string, let's add
  207. // it to the data array.
  208. arrData[ arrData.length - 1 ].push( strMatchedValue );
  209. }
  210. // Return the parsed data.
  211. return( arrData );
  212. }
  213. }