cron-filescanner.php 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. <?php
  2. include 'phpexcel/PHPExcel.php';
  3. include 'db.php';
  4. ini_set('memory_limit', '256M');
  5. //for reference!
  6. $columns_to_fields = array(
  7. 'Division' => 'division',
  8. 'Consignee' => 'consignee',
  9. 'Called In' => 'date_called_in',
  10. 'Dispatch #' => 'dispatch_number',
  11. 'Shipper' => 'shipper',
  12. 'Ready' => 'date_ready',
  13. 'Address 1' => 'address_1',
  14. 'Address 2' => 'address_2',
  15. 'City' => 'city',
  16. 'ST' => 'state',
  17. 'Zip' => 'zip',
  18. 'Phone' => 'phone',
  19. 'Ready Time' => 'time_ready',
  20. 'Close Time' => 'time_close',
  21. 'Pickup Time' => 'time_pickup',
  22. 'Depart Time' => 'time_depart',
  23. 'BOL Delivered' => 'bol_delivered',
  24. 'Dispatch Refere' => 'dispatch_reference',
  25. 'Ctns' => 'ctns',
  26. 'Weight' => 'weight',
  27. 'Cube' => 'cube'
  28. );
  29. $fields_by_col = array();
  30. $path = '../data';
  31. //scan respective directories for new files
  32. $companies = getCompanyDirectories();
  33. for($c = 0; $c < count($companies); $c++)
  34. {
  35. $company = $companies[$c];
  36. $id_company = $company['id_company'];
  37. $name = $company['name'];
  38. $ftp_directory = $company['ftp_directory'];
  39. $ftp_full_path = "$path/$ftp_directory";
  40. echo "scanning #$id_company $name\t$ftp_full_path:\t";
  41. //check if the directory exists
  42. if(!file_exists($ftp_full_path) || !is_dir($ftp_full_path))
  43. {
  44. echo "directory not found\n";
  45. continue;
  46. }
  47. //find all spreadsheets
  48. $files = glob("$ftp_full_path/*.xls");
  49. echo 'found ' . count($files) . ' excel spreadsheet(s)' . PHP_EOL;
  50. //go through each one
  51. foreach($files as $filename)
  52. {
  53. processFile($filename, $id_company);
  54. echo PHP_EOL;
  55. }
  56. }
  57. function processFile($filename, $id_company)
  58. {
  59. global $dbh;
  60. if(!file_exists($filename))
  61. {
  62. echo "file doesn't exist" . PHP_EOL;
  63. return;
  64. }
  65. $file_size = filesize($filename);
  66. $file_hash = md5_file($filename);
  67. $params = array(
  68. ':id_company' => $id_company,
  69. ':filename' => $filename,
  70. ':file_size' => $file_size,
  71. ':file_hash' => $file_hash
  72. );
  73. //first try to find if the file exists
  74. $stmt_file = $dbh->prepare("SELECT id_file
  75. FROM file
  76. WHERE id_company = :id_company
  77. AND filename = :filename
  78. AND md5_hash = :file_hash
  79. AND size = :file_size");
  80. $stmt_file->execute($params);
  81. $rows = $stmt_file->fetchAll(PDO::FETCH_ASSOC);
  82. if(count($rows))
  83. {
  84. $id_file = $rows[0]['id_file'];
  85. echo "re-processing file #$id_file" . PHP_EOL;
  86. }
  87. else
  88. {
  89. //report file processed
  90. $stmt_file = $dbh->prepare("INSERT INTO file (id_company, filename, md5_hash, size)
  91. VALUES (:id_company, :filename, :file_hash, :file_size)");
  92. if(!$stmt_file->execute($params))
  93. {
  94. echo "PDO::errorInfo():" . PHP_EOL;
  95. print_r($dbh->errorInfo());
  96. return;
  97. }
  98. $id_file = $dbh->lastInsertId();
  99. echo "processing new file #$id_file" . PHP_EOL;
  100. }
  101. //TODO: check if we already processed this file
  102. $stmt_row = $dbh->prepare("INSERT INTO file_row (
  103. id_file, row_number,
  104. division, consignee, date_called_in, dispatch_number, shipper, date_ready,
  105. address_1, address_2, city, state, zip, phone,
  106. time_ready, time_close, time_pickup, time_depart, bol_delivered,
  107. dispatch_reference,
  108. ctns, weight, cube
  109. ) VALUES (
  110. :id_file, :row_number,
  111. :division, :consignee, STR_TO_DATE(:date_called_in, '%m/%d/%Y'), :dispatch_number, :shipper, STR_TO_DATE(:date_ready, '%m/%d/%Y'),
  112. :address_1, :address_2, :city, :state, :zip, :phone,
  113. :time_ready, :time_close, :time_pickup, :time_depart, :bol_delivered,
  114. :dispatch_reference,
  115. :ctns, :weight, :cube
  116. )
  117. ON DUPLICATE KEY UPDATE
  118. division = :division, consignee = :consignee, date_called_in = STR_TO_DATE(:date_called_in, '%m/%d/%Y'), dispatch_number = :dispatch_number, shipper = :shipper, date_ready = STR_TO_DATE(:date_ready, '%m/%d/%Y'),
  119. address_1 = :address_1, address_2 = :address_2, city = :city, state = :state, zip = :zip, phone = :phone,
  120. time_ready = :time_ready, time_close = :time_close, time_pickup = :time_pickup, time_depart = :time_depart, bol_delivered = :bol_delivered,
  121. dispatch_reference = :dispatch_reference,
  122. ctns = :ctns, weight = :weight, cube = :cube");
  123. if(!$stmt_row)
  124. {
  125. echo "PDO::errorInfo():" . PHP_EOL;
  126. print_r($dbh->errorInfo());
  127. return;
  128. }
  129. $num_rows = 0;
  130. try
  131. {
  132. //this is necessary to dramatically reduce the memory footprint
  133. $cacheMethod = PHPExcel_CachedObjectStorageFactory::cache_to_phpTemp;
  134. $cacheSettings = array( ' memoryCacheSize ' => '8MB');
  135. PHPExcel_Settings::setCacheStorageMethod($cacheMethod, $cacheSettings);
  136. // figure out which type it is
  137. $type = PHPExcel_IOFactory::identify($filename);
  138. // create an excel reader object
  139. $reader = PHPExcel_IOFactory::createReader($type);
  140. $reader->setReadDataOnly(false);
  141. echo "xls detected as: $type" . PHP_EOL;
  142. // load the file
  143. $excel = $reader->load($filename);
  144. // pull the 'call details' worksheet, which should be the only one
  145. $sheet = $excel->getSheet(0);
  146. // count the number of rows and columns
  147. $num_rows = $sheet->getHighestRow();
  148. $last_column = $sheet->getHighestColumn();
  149. // let's keep track of how many succeeded and failed
  150. $num_ok = 0;
  151. $num_fail = 0;
  152. $num_skip = 0;
  153. // pull column names
  154. $column_names = $sheet->rangeToArray('A1:' . $last_column . '1', NULL, TRUE, FALSE);
  155. $columns_by_names = array();
  156. //flip the column names to reflect the indexes
  157. foreach($column_names[0] as $index => $name)
  158. if($name && strlen(trim($name)))
  159. $columns_by_names[$name] = $index;
  160. // go through and read each row
  161. for($r = 2; $r <= $num_rows; $r++)
  162. {
  163. $range = 'A' . $r . ':' . $last_column . $r;
  164. echo "processing row #$r...";
  165. // read row of data into array
  166. $row_array = $sheet->rangeToArray($range, NULL, TRUE, TRUE);
  167. if(!count($row_array))
  168. {
  169. echo "no data\n";
  170. continue;
  171. }
  172. //pull the row out and format the nulls as needed
  173. $row = $row_array[0];
  174. for($c = 0; $c < count($row); $c++)
  175. $row[$c] = $row[$c] && strlen(trim($row[$c])) ? trim($row[$c]) : null;
  176. //skip if no data for first three columns
  177. if(!$row[0] && !$row[1] && !$row[2])
  178. {
  179. echo "SKIP\n";
  180. $num_skip++;
  181. continue;
  182. }
  183. //bind appropriate values
  184. $params = array();
  185. $params[':id_file'] = $id_file;
  186. $params[':row_number'] = $r;
  187. $params[':division'] = $row[0];
  188. $params[':consignee'] = $row[1];
  189. $params[':date_called_in'] = $row[2];
  190. $params[':dispatch_number'] = $row[3];
  191. $params[':shipper'] = $row[4];
  192. $params[':date_ready'] = $row[5];
  193. $params[':address_1'] = $row[6];
  194. $params[':address_2'] = $row[7];
  195. $params[':city'] = $row[8];
  196. $params[':state'] = $row[9];
  197. $params[':zip'] = $row[10];
  198. $params[':phone'] = $row[11];
  199. $params[':time_ready'] = $row[14] ? DateTime::createFromFormat('H:i A', $row[14])->format('H:i:s') : null;
  200. $params[':time_close'] = $row[15] ? DateTime::createFromFormat('H:i A', $row[15])->format('H:i:s') : null;
  201. $params[':time_pickup'] = $row[16] ? DateTime::createFromFormat('H:i A', $row[16])->format('H:i:s') : null;
  202. $params[':time_depart'] = $row[17] ? DateTime::createFromFormat('H:i A', $row[17])->format('H:i:s') : null;
  203. $params[':bol_delivered'] = $row[18];
  204. $params[':dispatch_reference'] = $row[35];
  205. $params[':ctns'] = $row[44];
  206. $params[':weight'] = $row[45];
  207. $params[':cube'] = $row[46];
  208. try
  209. {
  210. $stmt_row->execute($params);
  211. echo "OK\n";
  212. $num_ok++;
  213. }
  214. catch(Exception $e)
  215. {
  216. echo 'FAIL: ' . $e->getMessage() . "\n";
  217. $num_fail++;
  218. }
  219. }
  220. }
  221. catch(Exception $e)
  222. {
  223. echo "Uncaught fatal error while attempting to process the downloaded report: " . $e->getMessage() . "\n";
  224. }
  225. //update the final status
  226. $stmt_file_status = $dbh->prepare("UPDATE file SET
  227. num_rows = :num_rows,
  228. num_ok = :num_ok,
  229. num_fail = :num_fail,
  230. num_skip = :num_skip
  231. WHERE id_file = :id_file");
  232. $params = array(
  233. ':id_file' => $id_file,
  234. ':num_rows' => $num_rows-1, //subtract one because of the column header
  235. ':num_ok' => $num_ok,
  236. ':num_fail' => $num_fail,
  237. ':num_skip' => $num_skip
  238. );
  239. $stmt_file_status->execute($params);
  240. }
  241. ?>