0) { switch ($argv[0]) { case "-N": $permutations = $argv[1]; array_shift($argv); break; case "--all": $allStatsDumpFile = $argv[1]; array_shift($argv); break; case "--skiphead": $skip_head = true; break; case "--reportIN": $reportInput = true; break; default: ExitOnError("Unknown arg '" . $argv[0] . "'\n" . $USAGE); } array_shift($argv); } // read data file into an array where each element is a test case // test cases are maps of keys 'A', 'B', 'G' where 'A' and 'B' // point to the classifications provided by the two classifiers // and 'G' is the gold standard classification echo "Reading Data File\n"; $cases = array(); $lines = file($csvFilename) or ExitOnError("Error opening file!"); // remove the first line if necessary if ($skip_head) { unset($lines[0]); } foreach ($lines as $line) { preg_match('/^.*([01])\s*,\s*([01])\s*,\s*([01])$/', trim($line), $matches) or ExitOnError("The format of the CSV file is invalid!"); if (count($matches) == 0) { ExitOnError("The format of the CSV file is invalid!"); } $cases[] = array('G' => $matches[1], 'A' => $matches[2], 'B' => $matches[3]); } $origData = GenPerm($cases); // used for column headers $permKeys = implode(",", array_keys($origData)); $origStats = CalcStats($origData); echo "Original Stats\n"; print_r($origData); print_r($origStats); echo "\n"; if (isset($reportInput)) { exit(0); } // the maximum number of permutations based on the number of cases $maxPerms = pow(2,count($cases)); echo "Max Permutations: $maxPerms\n"; // output the p-value level for each statistic if (isset($permutations)) { echo "Limited to $permutations permutations\n"; } else { echo "Performing exhaustive analysis of all $maxPerms permutations\n"; } // create permutations $perms = array(); if (isset($permutations)) { echo "Generating perm --/$permutations"; for ($i = 0; $i < $permutations; $i++) { // clear the last number if ($i % 1000 == 0) { for ($k = 0; $k < strlen($i - 1) + strlen($permutations) + 1; $k++) { echo BS; } echo "$i/$permutations"; } // create a random number to base the permutation on $perms[] = GenPerm($cases, true); } if ($i % 1000 == 0) { for ($k = 0; $k < strlen($i - 1) + strlen($permutations) + 1; $k++) { echo BS; } echo "$i/$permutations"; } } else { echo "Generating perm --/$maxPerms"; // create an array of binary numbers of length 2^(# cases) for ($currentPerm = 0; $currentPerm < $maxPerms; $currentPerm++) { if ($currentPerm % 1000 == 0) { for ($k = 0; $k < strlen($currentPerm - 1) + strlen($maxPerms) + 1; $k++) { printf(chr(8)); } echo "$currentPerm/$maxPerms"; } $perms[] = GenPerm($cases, true, decbin($currentPerm)); } if ($currentPerm % 1000 == 0) { for ($k = 0; $k < strlen($currentPerm - 1) + strlen($maxPerms) + 1; $k++) { printf(chr(8)); } echo "$currentPerm/$maxPerms"; } } echo "\nPermutations Generated!\n"; // output the number of stats that have a difference equal to or greater than the original differences if (isset($allStatsDumpFile)) { echo "Dumping all stats to '$allStatsDumpFile'\n"; $outHandle = fopen($allStatsDumpFile, "w") or ExitOnError("Unable to open '$outfile' for output!"); fwrite($outHandle, "$permKeys,A_precision,A_sensitivity_recall,A_specificity,A_F-measure,B_precision,B_sens_recall,B_specificity,B_F-measure"); } // the number of differences that are greater than or equal to the original difference $diffCount = array( "prec" => 0, "sens_recall" => 0, "spec" => 0, "F" => 0); echo "Aggregating stats...\n"; $lastProgressStr = ""; $k = 0; foreach ($perms as $perm) { if ($k % 1000 == 0) { for ($i = 0; $i < strlen($lastProgressStr); $i++) { printf(BS); } $lastProgressStr = "$k/" . count($perms); echo $lastProgressStr; } ++$k; // calculate stats $stat = CalcStats($perm); if (isset($allStatsDumpFile)) { fwrite($outHandle, implode(",", $perm) . "," . implode(",", $stat) . "\n"); } if (abs($stat['diff_spec']) >= abs($origStats['diff_spec'])) { $diffCount['spec']++; } if (abs($stat['diff_sens_recall']) >= abs($origStats['diff_sens_recall'])) { $diffCount['sens_recall']++; } if (abs($stat['diff_prec']) >= abs($origStats['diff_prec'])) { $diffCount['prec']++; } if (abs($stat['diff_F']) >= abs($origStats['diff_F'])) { $diffCount['F']++; } } for ($i = 0; $i < strlen($lastProgressStr); $i++) { printf(BS); } echo "$k/" . count($perms) . "\nDone Aggregating\n"; if (isset($allStatsDumpFile)) { fclose($outHandle); } printf("\nstat orig_diff\tN>=orig\tP-value (2-sided) prec %f\t\t%d\t\t%f sens_recall %f\t\t%d\t\t%f spec %f\t\t%d\t\t%f F %f\t\t%d\t\t%f \n", $origStats['diff_prec'], $diffCount['prec'], $diffCount['prec'] / count($perms), $origStats['diff_sens_recall'], $diffCount['sens_recall'], $diffCount['sens_recall'] / count($perms), $origStats['diff_spec'], $diffCount['spec'], $diffCount['spec'] / count($perms), $origStats['diff_F'], $diffCount['F'], $diffCount['F'] / count($perms)); // calculate sensitivity, specificity, recall, precision, F-measure for the data in $permData // $permData should be the output of GenPerm... in other words a map with keys // Atp, Atn, Afp, Afn, Btp, Btn, Bfp, Bfn corresponding to the 2x2 contingency matices for the two classifiers // returns a map containing kAprec, Asens_recall, Aspec, AF, Bprec, Bsens_recall, Bspec, BF // for the precision, sensitivity/recall, specificity and F-measure for the two classifiers function CalcStats($permData) { $Aprec = ($permData['Atp'] + $permData['Afp']) != 0 ? $permData['Atp'] / ($permData['Atp'] + $permData['Afp']) : "-"; $Asens_recall = ($permData['Atp'] + $permData['Afn']) != 0 ? $permData['Atp'] / ($permData['Atp'] + $permData['Afn']) : "-"; $Aspec = ($permData['Atn'] + $permData['Afp']) != 0 ? $permData['Atn'] / ($permData['Atn'] + $permData['Afp']) : "-"; $AF = 2 * ($Aprec * $Asens_recall) / ($Aprec + $Asens_recall); $Bprec = ($permData['Btp'] + $permData['Bfp']) != 0 ? $permData['Btp'] / ($permData['Btp'] + $permData['Bfp']) : "-"; $Bsens_recall = ($permData['Btp'] + $permData['Bfn']) != 0 ? $permData['Btp'] / ($permData['Btp'] + $permData['Bfn']) : "-"; $Bspec = ($permData['Btn'] + $permData['Bfp']) != 0 ? $permData['Btn'] / ($permData['Btn'] + $permData['Bfp']) : "-"; $BF = 2 * ($Bprec * $Bsens_recall) / ($Bprec + $Bsens_recall); $ret = array( 'Aprec'=>$Aprec, 'Asens_recall'=>$Asens_recall, 'Aspec'=>$Aspec, 'AF'=>$AF, 'Bprec'=>$Bprec, 'Bsens_recall'=>$Bsens_recall, 'Bspec'=>$Bspec, 'BF'=>$BF, 'diff_prec'=>$Aprec - $Bprec, 'diff_sens_recall'=>$Asens_recall - $Bsens_recall, 'diff_spec'=>$Aspec - $Bspec, 'diff_F'=>$AF - $BF); return $ret; } // generate a permutation either randomly if $rand is true, or using the $permStr. // returns a map with keys Atp, Atn, Afp, Afn, Btp, Btn, Bfp, Bfn // corresponding to the 2x2 contingency matices for the two classifiers function GenPerm($cases, $rand = false, $permStr = null) { // 4x4 stats for the two classifiers $Atp = 0; $Afp = 0; $Atn = 0; $Afn = 0; $Btp = 0; $Bfp = 0; $Btn = 0; $Bfn = 0; // iterate through all cases for ($caseNum = 0; $caseNum < count($cases); $caseNum++) { $case = $cases[$caseNum]; // check if there is a bit in the permNumber's binary rep for this case num // indicating that this case should have its responses swapped if ($rand) { // reassignment is possible if ($permStr != null) { // reassignment based on the input bin string if ($permStr[$caseNum] == 1) { $B = $case['A']; $A = $case['B']; } else { $A = $case['A']; $B = $case['B']; } } else { // reassignment based on random number if (mt_rand(0,1) == 1) { $B = $case['A']; $A = $case['B']; } else { $A = $case['A']; $B = $case['B']; } } } else { // no random assignment, just go with what we already have $A = $case['A']; $B = $case['B']; } // handle the negative cases if ($case['G'] == 0) { if ($A == 0) { $Atn++; } else { $Afp++; } if ($B == 0) { $Btn++; } else { $Bfp++; } } else { // handle the positive cases if ($A == 0) { $Afn++; } else { $Atp++; } if ($B == 0) { $Bfn++; } else { $Btp++; } } } $ret = array( 'Atn' => $Atn, 'Afn' => $Afn, 'Atp' => $Atp, 'Afp' => $Afp, 'Btn' => $Btn, 'Bfn' => $Bfn, 'Btp' => $Btp, 'Bfp' => $Bfp); return $ret; } function ExitOnError($msg, $exit_when_done = true) { fwrite(STDERR, $msg . "\n"); // var_dump(debug_backtrace()); if ($exit_when_done) { exit(1); } } ?>bug_backtrace()); if ($exit_when_done) { exit(1); } } ?>