我在代码中实现了@diogoriba算法,结果如下:
- 查找前两个字符串的公共前缀,然后将其与从第3个字符串开始的所有后续字符串进行比较,如果没有发现任何共同之处,则对公共字符串进行修整,在前缀中存在更多共同点而不是不同之处的情况下会获胜。
- 但是bumperbox的原始算法(错误修正除外)会获胜,因为字符串的前缀共同点要少于不同点。 在代码注释中有详细信息!
我实现了另一个想法:
首先检查数组中最短的字符串,并将其用于比较而不是简单地比较第一个字符串。 在代码中,这是通过自定义编写函数arrayStrLenMin()实现的。
- 可以显着降低迭代次数,但是函数arrayStrLenMin()本身可能会(或多或少)引起迭代。
- 仅从数组中第一个字符串的长度开始似乎很笨拙,但如果arrayStrLenMin()需要多次迭代,则可能会很有效。
代码+广泛测试+备注:
function arrayStrLenMin ($arr, $strictMode = false, $forLoop = false) { $errArrZeroLength = -1; // Return value for error: Array is empty $errOtherType = -2; // Return value for error: Found other type (than string in array) $errStrNone = -3; // Return value for error: No strings found (in array) $arrLength = count($arr); if ($arrLength <= 0 ) { return $errArrZeroLength; } $cur = 0; foreach ($arr as $key => $val) { if (is_string($val)) { $min = strlen($val); $strFirstFound = $key; // echo("KeytLength / Notification / Errorn"); // echo("$keytFound first string member at key with length: $min!n"); break; } else if ($strictMode) { return $errOtherType; } // At least 1 type other than string was found. } if (! isset($min)) { return $errStrNone; } // No string was found in array. // SpeedRatio of foreach/for is approximately 2/1 as dicussed at: // http://juliusbeckmann.de/blog/php-foreach-vs-while-vs-for-the-loop-battle.html // If $strFirstFound is found within the first 1/SpeedRatio (=0.5) of the array, "foreach" is faster! if (! $forLoop) { foreach ($arr as $key => $val) { if (is_string($val)) { $cur = strlen($val); // echo("$keyt$curn"); if ($cur == 0) { return $cur; } // 0 is the shortest possible string, so we can abort here. if ($cur < $min) { $min = $cur; } } // else { echo("$keytNo string!n"); } } } // If $strFirstFound is found after the first 1/SpeedRatio (=0.5) of the array, "for" is faster! else { for ($i = $strFirstFound + 1; $i < $arrLength; $i++) { if (is_string($arr[$i])) { $cur = strlen($arr[$i]); // echo("$it$curn"); if ($cur == 0) { return $cur; } // 0 is the shortest possible string, so we can abort here. if ($cur < $min) { $min = $cur; } } // else { echo("$itNo string!n"); } } } return $min;}function strCommonPrefixByStr($arr, $strFindShortestFirst = false) { $arrLength = count($arr); if ($arrLength < 2) { return false; } // Determine loop length /// Find shortest string in array: Can bring down iterations dramatically, but the function arrayStrLenMin() itself can cause ( more or less) iterations. if ($strFindShortestFirst) { $end = arrayStrLenMin($arr, true); } /// Simply start with length of first string in array: Seems quite clumsy, but may turn out effective, if arrayStrLenMin() needs many iterations. else { $end = strlen($arr[0]); } for ($i = 1; $i <= $end + 1; $i++) { // Grab the part from 0 up to $i $commonStrMax = substr($arr[0], 0, $i); echo("Match: $it$commonStrMaxn"); // Loop through all the values in array, and compare if they match foreach ($arr as $key => $str) { echo(" Str: $keyt$strn"); // Didn't match, return the part that did match if ($commonStrMax != substr($str, 0, $i)) { return substr($commonStrMax, 0, $i-1); } } } // Special case: No mismatch (hence no return) happened until loop end! return $commonStrMax; // Thus entire first common string is the common prefix!}function strCommonPrefixByChar($arr, $strFindShortestFirst = false) { $arrLength = count($arr); if ($arrLength < 2) { return false; } // Determine loop length /// Find shortest string in array: Can bring down iterations dramatically, but the function arrayStrLenMin() itself can cause ( more or less) iterations. if ($strFindShortestFirst) { $end = arrayStrLenMin($arr, true); } /// Simply start with length of first string in array: Seems quite clumsy, but may turn out effective, if arrayStrLenMin() needs many iterations. else { $end = strlen($arr[0]); } for ($i = 0 ; $i <= $end + 1; $i++) { // Grab char $i $char = substr($arr[0], $i, 1); echo("Match: $it"); echo(str_pad($char, $i+1, " ", STR_PAD_LEFT)); echo("n"); // Loop through all the values in array, and compare if they match foreach ($arr as $key => $str) { echo(" Str: $keyt$strn"); // Didn't match, return the part that did match if ($char != $str[$i]) { // Same functionality as ($char != substr($str, $i, 1)). Same efficiency? return substr($arr[0], 0, $i); } } } // Special case: No mismatch (hence no return) happened until loop end! return substr($arr[0], 0, $end); // Thus entire first common string is the common prefix!}function strCommonPrefixByNeighbour($arr) { $arrLength = count($arr); if ($arrLength < 2) { return false; } /// Get the common string prefix of the first 2 strings $strCommonMax = strCommonPrefixByChar(array($arr[0], $arr[1])); if ($strCommonMax === false) { return false; } if ($strCommonMax == "") { return ""; } $strCommonMaxLength = strlen($strCommonMax); /// Now start looping from the 3rd string echo("-----n"); for ($i = 2; ($i < $arrLength) && ($strCommonMaxLength >= 1); $i++ ) { echo(" STR: $it{$arr[$i]}n"); /// Compare the maximum common string with the next neighbour //// Compare by string for ($ii = $strCommonMaxLength; $ii > 0; $ii--) { echo("MATCH: $iit$strCommonMaxn"); if (substr($arr[$i],0,$ii) == $strCommonMax) { break; } else { $strCommonMax = substr($strCommonMax,0,$ii - 1); $strCommonMaxLength--; } } } return substr($arr[0], 0, $strCommonMaxLength);}// Tests for finding the common prefix/// Scenarios$filesLeastInCommon = array ("/Vol/1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/a/1","/Vol/2/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/a/2","/Vol/1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/b/1","/Vol/1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/b/2","/Vol/2/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/b/c/1","/Vol/2/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/a/1",);$filesLessInCommon = array ("/Vol/1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/a/1","/Vol/1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/a/2","/Vol/1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/b/1","/Vol/1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/b/2","/Vol/2/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/b/c/1","/Vol/2/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/a/1",);$filesMoreInCommon = array ("/Voluuuuuuuuuuuuuumes/1/a/a/1","/Voluuuuuuuuuuuuuumes/1/a/a/2","/Voluuuuuuuuuuuuuumes/1/a/b/1","/Voluuuuuuuuuuuuuumes/1/a/b/2","/Voluuuuuuuuuuuuuumes/2/a/b/c/1","/Voluuuuuuuuuuuuuumes/2/a/a/1",);$sameDir = array ("/Volumes/1/a/a/","/Volumes/1/a/a/aaaaa/2",);$sameFile = array ("/Volumes/1/a/a/1","/Volumes/1/a/a/1",);$noCommonPrefix = array ("/Volumes/1/a/a/","/Volumes/1/a/a/aaaaa/2","Net/1/a/a/aaaaa/2",);$longestLast = array ("/Volumes/1/a/a/1","/Volumes/1/a/a/aaaaa/2",);$longestFirst = array ("/Volumes/1/a/a/aaaaa/1","/Volumes/1/a/a/2",);$one = array ("/Volumes/1/a/a/aaaaa/1");$empty = array ( );// Test Results for finding the common prefix// Test Results Table// Used Functions | Iteration amount | Remarks// $result = (strCommonPrefixByStr($filesLessInCommon)); // 35// $result = (strCommonPrefixByChar($filesLessInCommon)); // 35 // Same amount of iterations, but much fewer characters compared because ByChar instead of ByString!// $result = (strCommonPrefixByNeighbour($filesLessInCommon)); // 88 + 42 = 130 // Loses in this category!// $result = (strCommonPrefixByStr($filesMoreInCommon)); // 137// $result = (strCommonPrefixByChar($filesMoreInCommon)); // 137 // Same amount of iterations, but much fewer characters compared because ByChar instead of ByString!// $result = (strCommonPrefixByNeighbour($filesLeastInCommon)); // 12 + 4 = 16 // Far the winner in this category!echo("Common prefix of all members:n");var_dump($result);// Tests for finding the shortest string in array/// Arrays// $empty = array ();// $noStrings = array (0,1,2,3.0001,4,false,true,77);// $stringsonly = array ("one","two","three","four");// $mixed = array (0,1,2,3.0001,"four",false,true,"seven", 8888);/// Scenarios// I list them from fewest to most iterations, which is not necessarily equivalent to slowest to fastest!// For speed consider the remarks in the pre considering the Speed ratio of foreach/for!//// Fewest iterations (immediate abort on "Found other type", use "for" loop)// foreach( array($empty, $noStrings, $stringsOnly, $mixed) as $arr) {// echo("NEW ANALYSIS:n");// echo("Result: " . arrayStrLenMin($arr, true, true) . "nn");// }//// Fewer iterations (immediate abort on "Found other type", use "foreach" loop)// foreach( array($empty, $noStrings, $stringsOnly, $mixed) as $arr) {// echo("NEW ANALYSIS:n");// echo("Result: " . arrayStrLenMin($arr, true, false) . "nn");// }//// More iterations (No immediate abort on "Found other type", use "for" loop)// foreach( array($empty, $noStrings, $stringsOnly, $mixed) as $arr) {// echo("NEW ANALYSIS:n");// echo("Result: " . arrayStrLenMin($arr, false, true) . "nn");// }//// Most iterations (No immediate abort on "Found other type", use "foreach" loop)// foreach( array($empty, $noStrings, $stringsOnly, $mixed) as $arr) {// echo("NEW ANALYSIS:n");// echo("Result: " . arrayStrLenMin($arr, false, false) . "nn");// }
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)