filescanner.php 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. <?php
  2. include 'phpexcel/PHPExcel.php';
  3. include '../db.php';
  4. ini_set('memory_limit', '256M');
  5. //for reference!
  6. $columns_to_fields = array(
  7. 'Division' => 'division',
  8. 'Consignee' => 'consignee',
  9. 'Called In' => 'date_called_in',
  10. 'Dispatch #' => 'dispatch_number',
  11. 'Shipper' => 'shipper',
  12. 'Ready' => 'date_ready',
  13. 'Address 1' => 'address_1',
  14. 'Address 2' => 'address_2',
  15. 'City' => 'city',
  16. 'ST' => 'state',
  17. 'Zip' => 'zip',
  18. 'Phone' => 'phone',
  19. 'Ready Time' => 'time_ready',
  20. 'Close Time' => 'time_close',
  21. 'Pickup Time' => 'time_pickup',
  22. 'Depart Time' => 'time_depart',
  23. 'BOL Delivered' => 'bol_delivered',
  24. 'Dispatch Refere' => 'dispatch_reference',
  25. 'Ctns' => 'ctns',
  26. 'Weight' => 'weight',
  27. 'Cube' => 'cube'
  28. );
  29. $fields_by_col = array();
  30. $path = '../data';
  31. //scan respective directories for new files
  32. $companies = getCompanyDirectories();
  33. for($c = 0; $c < count($companies); $c++)
  34. {
  35. $company = $companies[$c];
  36. $id_company = $company['id_company'];
  37. $name = $company['name'];
  38. $ftp_directory = $company['ftp_directory'];
  39. $ftp_full_path = "$path/$ftp_directory";
  40. echo "scanning #$id_company $name\t$ftp_full_path:\t";
  41. //check if the directory exists
  42. if(!file_exists($ftp_full_path) || !is_dir($ftp_full_path))
  43. {
  44. echo "directory not found\n";
  45. continue;
  46. }
  47. //find all spreadsheets
  48. $files = glob("$ftp_full_path/*.xls");
  49. echo 'found ' . count($files) . ' excel spreadsheet(s)' . PHP_EOL;
  50. //go through each one
  51. foreach($files as $full_path)
  52. {
  53. processFile($full_path, $id_company);
  54. echo PHP_EOL;
  55. }
  56. }
  57. function processFile($full_path, $id_company)
  58. {
  59. global $dbh;
  60. if(!file_exists($full_path))
  61. {
  62. echo "file doesn't exist" . PHP_EOL;
  63. return false;
  64. }
  65. $dir = dirname($full_path);
  66. $filename = basename($full_path);
  67. $file_size = filesize($full_path);
  68. $file_hash = md5_file($full_path);
  69. //first try to find if the file exists
  70. $stmt_file = $dbh->prepare(
  71. "SELECT id_file
  72. FROM file
  73. WHERE filename = :filename
  74. AND md5_hash = :file_hash
  75. AND size = :file_size");
  76. $params = array(
  77. ':filename' => $filename,
  78. ':file_hash' => $file_hash,
  79. ':file_size' => $file_size
  80. );
  81. $stmt_file->execute($params);
  82. $rows = $stmt_file->fetchAll(PDO::FETCH_ASSOC);
  83. //figure out if we are reprocessing
  84. if(count($rows))
  85. {
  86. $id_file = $rows[0]['id_file'];
  87. echo "re-processing file #$id_file" . PHP_EOL;
  88. }
  89. else
  90. {
  91. //report file processed
  92. $stmt_file = $dbh->prepare("INSERT INTO file (id_company, dir, filename, md5_hash, size)
  93. VALUES (:id_company, :dir, :filename, :file_hash, :file_size)");
  94. $params = array(
  95. ':id_company' => $id_company,
  96. ':dir' => dirname($full_path),
  97. ':filename' => basename($filename),
  98. ':file_size' => $file_size,
  99. ':file_hash' => $file_hash
  100. );
  101. try
  102. {
  103. $stmt_file->execute($params);
  104. }
  105. catch(Exception $e)
  106. {
  107. echo "PDO::errorInfo():" . PHP_EOL;
  108. print_r($dbh->errorInfo());
  109. return false;
  110. }
  111. $id_file = $dbh->lastInsertId();
  112. echo "processing new file #$id_file" . PHP_EOL;
  113. }
  114. //insert row
  115. $stmt_row = $dbh->prepare("INSERT INTO file_row (
  116. id_file, row_number,
  117. division, consignee, date_called_in, dispatch_number, shipper, date_ready,
  118. address_1, address_2, city, state, zip, phone,
  119. time_ready, time_close, time_pickup, time_depart, bol_delivered,
  120. dispatch_reference,
  121. ctns, weight, cube
  122. ) VALUES (
  123. :id_file, :row_number,
  124. :division, :consignee, STR_TO_DATE(:date_called_in, '%m/%d/%Y'), :dispatch_number, :shipper, STR_TO_DATE(:date_ready, '%m/%d/%Y'),
  125. :address_1, :address_2, :city, :state, :zip, :phone,
  126. :time_ready, :time_close, :time_pickup, :time_depart, :bol_delivered,
  127. :dispatch_reference,
  128. :ctns, :weight, :cube
  129. )
  130. ON DUPLICATE KEY UPDATE
  131. division = :division, consignee = :consignee, date_called_in = STR_TO_DATE(:date_called_in, '%m/%d/%Y'), dispatch_number = :dispatch_number, shipper = :shipper, date_ready = STR_TO_DATE(:date_ready, '%m/%d/%Y'),
  132. address_1 = :address_1, address_2 = :address_2, city = :city, state = :state, zip = :zip, phone = :phone,
  133. time_ready = :time_ready, time_close = :time_close, time_pickup = :time_pickup, time_depart = :time_depart, bol_delivered = :bol_delivered,
  134. dispatch_reference = :dispatch_reference,
  135. ctns = :ctns, weight = :weight, cube = :cube");
  136. if(!$stmt_row)
  137. {
  138. echo "PDO::errorInfo():" . PHP_EOL;
  139. print_r($dbh->errorInfo());
  140. return false;
  141. }
  142. $num_rows = 0;
  143. try
  144. {
  145. //this is necessary to dramatically reduce the memory footprint
  146. $cacheMethod = PHPExcel_CachedObjectStorageFactory::cache_to_phpTemp;
  147. $cacheSettings = array( ' memoryCacheSize ' => '8MB');
  148. PHPExcel_Settings::setCacheStorageMethod($cacheMethod, $cacheSettings);
  149. // figure out which type it is
  150. $type = PHPExcel_IOFactory::identify($full_path);
  151. // create an excel reader object
  152. $reader = PHPExcel_IOFactory::createReader($type);
  153. $reader->setReadDataOnly(false);
  154. echo "xls detected as: $type" . PHP_EOL;
  155. // load the file
  156. $excel = $reader->load($full_path);
  157. // pull the 'call details' worksheet, which should be the only one
  158. $sheet = $excel->getSheet(0);
  159. // count the number of rows and columns
  160. $num_rows = $sheet->getHighestRow();
  161. $last_column = $sheet->getHighestColumn();
  162. // let's keep track of how many succeeded and failed
  163. $num_ok = 0;
  164. $num_fail = 0;
  165. $num_skip = 0;
  166. // pull column names
  167. $column_names = $sheet->rangeToArray('A1:' . $last_column . '1', NULL, TRUE, FALSE);
  168. $columns_by_names = array();
  169. //flip the column names to reflect the indexes
  170. foreach($column_names[0] as $index => $name)
  171. if($name && strlen(trim($name)))
  172. $columns_by_names[$name] = $index;
  173. // go through and read each row
  174. for($r = 2; $r <= $num_rows; $r++)
  175. {
  176. $range = 'A' . $r . ':' . $last_column . $r;
  177. echo "processing row #$r...";
  178. // read row of data into array
  179. $row_array = $sheet->rangeToArray($range, NULL, TRUE, TRUE);
  180. if(!count($row_array))
  181. {
  182. echo "no data\n";
  183. continue;
  184. }
  185. //pull the row out and format the nulls as needed
  186. $row = $row_array[0];
  187. for($c = 0; $c < count($row); $c++)
  188. $row[$c] = $row[$c] && strlen(trim($row[$c])) ? trim($row[$c]) : null;
  189. //skip if no data for first three columns
  190. if(!$row[0] && !$row[1] && !$row[2])
  191. {
  192. echo "SKIP\n";
  193. $num_skip++;
  194. continue;
  195. }
  196. //bind appropriate values
  197. $params = array();
  198. $params[':id_file'] = $id_file;
  199. $params[':row_number'] = $r;
  200. $params[':division'] = $row[0];
  201. $params[':consignee'] = $row[1];
  202. $params[':date_called_in'] = $row[2];
  203. $params[':dispatch_number'] = $row[3];
  204. $params[':shipper'] = $row[4];
  205. $params[':date_ready'] = $row[5];
  206. $params[':address_1'] = $row[6];
  207. $params[':address_2'] = $row[7];
  208. $params[':city'] = $row[8];
  209. $params[':state'] = $row[9];
  210. $params[':zip'] = $row[10];
  211. $params[':phone'] = $row[11];
  212. $params[':time_ready'] = $row[14] ? DateTime::createFromFormat('H:i A', $row[14])->format('H:i:s') : null;
  213. $params[':time_close'] = $row[15] ? DateTime::createFromFormat('H:i A', $row[15])->format('H:i:s') : null;
  214. $params[':time_pickup'] = $row[16] ? DateTime::createFromFormat('H:i A', $row[16])->format('H:i:s') : null;
  215. $params[':time_depart'] = $row[17] ? DateTime::createFromFormat('H:i A', $row[17])->format('H:i:s') : null;
  216. $params[':bol_delivered'] = $row[18];
  217. $params[':dispatch_reference'] = $row[35];
  218. //remove commas for doubleval processing
  219. $row[44] = str_replace(',', '', $row[44]);
  220. $row[45] = str_replace(',', '', $row[45]);
  221. $row[46] = str_replace(',', '', $row[46]);
  222. $params[':ctns'] = doubleval($row[44]);
  223. $params[':weight'] = doubleval($row[45]);
  224. $params[':cube'] = doubleval($row[46]);
  225. try
  226. {
  227. $stmt_row->execute($params);
  228. echo "OK\n";
  229. $num_ok++;
  230. }
  231. catch(Exception $e)
  232. {
  233. echo 'FAIL: ' . $e->getMessage() . "\n";
  234. $num_fail++;
  235. }
  236. }
  237. }
  238. catch(Exception $e)
  239. {
  240. echo "Uncaught fatal error while attempting to process the downloaded report: " . $e->getMessage() . "\n";
  241. }
  242. //update the final status
  243. $stmt_file_status = $dbh->prepare("UPDATE file SET
  244. num_rows = :num_rows,
  245. num_ok = :num_ok,
  246. num_fail = :num_fail,
  247. num_skip = :num_skip
  248. WHERE id_file = :id_file");
  249. $params = array(
  250. ':id_file' => $id_file,
  251. ':num_rows' => $num_rows-1, //subtract one because of the column header
  252. ':num_ok' => $num_ok,
  253. ':num_fail' => $num_fail,
  254. ':num_skip' => $num_skip
  255. );
  256. $stmt_file_status->execute($params);
  257. return true;
  258. }
  259. ?>