Hey,
I've committed a new diff implementation that I've been working on for the past month. It produces exactly the same results as the previous php implementation but is theoretically of a lower complexity. I have tested its performance on a long list of articles out of the wikipedia top10 in size. I have seen cases where this implementation is slower by an order of magnitude, cases where it is faster and cases where the old diff ran out of memory where this implementation didn't.
I can't draw any final conclusions with respect to performance because I don't have a site of considerable size. I'd appreciate it if somebody could test this implementation and compare it to the current php installation.
An added feature is that for extremely complex diffs, the new implementation will switch to a greedy heuristic that is faster but produces diffs that are slightly suboptimal.
You enable it by setting $wgExternalDiffEngine = 'wikidiff3'
Cheers,
Guy
2008/8/5 guyvdb@svn.wikimedia.org:
Revision: 38653 Author: guyvdb Date: 2008-08-05 19:40:29 +0000 (Tue, 05 Aug 2008)
Log Message:
Alternative experimental diff implementation. Enable by setting = 'wikidiff3'.
Modified Paths:
trunk/phase3/includes/DifferenceEngine.php
Added Paths:
trunk/phase3/includes/Diff.php
Added: trunk/phase3/includes/Diff.php
--- trunk/phase3/includes/Diff.php (rev 0) +++ trunk/phase3/includes/Diff.php 2008-08-05 19:40:29 UTC (rev 38653) @@ -0,0 +1,472 @@ +<?php +/* Copyright (C) 2008 Guy Van den Broeck guy@guyvdb.eu
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- or see http://www.gnu.org/
- */
+/**
- Implementation of the Diff algorithm.
- DO NOT USE ON PRODUCTION SYSTEMS
- The algorithm is based on the O(NP) LCS algorithm as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm"
- and extended with my own ideas.
- @return array($from_removed, $to_added)
TRUE if the token was removed or added.
- @author Guy Van den Broeck
- */
+function wikidiff3_diff(array $from, array $to, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){
wfProfileIn( __METHOD__ );
$m = sizeof($from);
$n = sizeof($to);
$result_from = array_fill(0,$m,TRUE);
$result_to = array_fill(0,$n,TRUE);
//reduce the complexity for the next step (intentionally done twice)
//remove common tokens at the start
$i=0;
while($i<$m && $i<$n && $from[$i]===$to[$i]){
$result_from[$i] = $result_to[$i] = FALSE;
unset($from[$i],$to[$i]);
++$i;
}
//remove common tokens at the end
$j=1;
while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){
$result_from[$m-$j] = $result_to[$n-$j] = FALSE;
unset($from[$m-$j],$to[$n-$j]);
++$j;
}
$newFrom = $newFromIndex = $newTo = $newToIndex = array();
//remove tokens not in both sequences
$shared = array_fill_keys($from,FALSE);
foreach($to as $index => $el){
if(array_key_exists($el,$shared)){
//keep it
$shared[$el] = TRUE;
$newTo[] = $el;
$newToIndex[] = $index;
}
}
foreach($from as $index => $el){
if($shared[$el]){
//keep it
$newFrom[] = $el;
$newFromIndex[] = $index;
}
}
unset($from, $to, $shared);
$m2 = sizeof($newFrom);
$n2 = sizeof($newTo);
$offsetx = $offsety = 0;
$from_inLcs = new InLcs($m2);
$to_inLcs = new InLcs($n2);
wikidiff3_diffPart($newFrom, $newTo, $from_inLcs, $to_inLcs, $m2, $n2, $offsetx, $offsety, $m2, $boundRunningTime, $max_NP_before_bound);
foreach($from_inLcs->inLcs as $key => $in){
if($in){
$result_from[$newFromIndex[$key]] = FALSE;
}
}
foreach($to_inLcs->inLcs as $key => $in){
if($in){
$result_to[$newToIndex[$key]] = FALSE;
}
}
wfProfileOut( __METHOD__ );
return array($result_from, $result_to);
+}
+function wikidiff3_diffPart(array $a, array $b, InLcs $a_inLcs, InLcs $b_inLcs, $m, $n, $offsetx, $offsety, $bestKnownLcs, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){
if($bestKnownLcs==0 || $m==0 || $n==0){
return;
}
if($m>$n){
return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound);
}
$a_inLcs_sym = &$a_inLcs->inLcs;
$b_inLcs_sym = &$b_inLcs->inLcs;
//remove common tokens at the start
while($m>0 && $n>0 && $a[0]===$b[0]){
$a_inLcs_sym[$offsetx] = $b_inLcs_sym[$offsety] = TRUE;
++$offsetx; ++$offsety;
--$m; --$n;
--$bestKnownLcs;
array_shift($a);
array_shift($b);
}
//remove common tokens at the end
while($m>0 && $n>0 && $a[$m-1]===$b[$n-1]){
$a_inLcs_sym[$offsetx+$m-1] = $b_inLcs_sym[$offsety+$n-1] = TRUE;
--$m; --$n;
--$bestKnownLcs;
array_pop($a);
array_pop($b);
}
if($bestKnownLcs==0 || $m==0 || $n==0){
return;
}
if($m>$n){
return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound);
}
$delta = $n-$m;
$delta_plus_1 = $delta+1;
$delta_min_1 = $delta-1;
$fpForw = $snakeBeginForw = $snakeEndForw = array_fill( 0, $delta_plus_1, -1);
$lcsSizeForw = $snakekForw = $lcsSizeBack = $snakekBack = array_fill( 0, $delta_plus_1, 0);
$fpBack = $snakeBeginBack = $snakeEndBack = array_fill( 0, $delta_plus_1, $n);
$overlap = $delta>1 ? array_fill( 1, $delta_min_1, FALSE) : array();
$bestLcsLength = $bestLcsLengthTop = $bestLcsLengthBottom = -1;
if($boundRunningTime){
$maxp_before_bound = max((-$delta+sqrt($delta*$delta+($max_NP_before_bound<<2)))>>1,2);
if($maxp_before_bound>=$m){
$boundRunningTime = false;
unset($maxp_before_bound);
}
}
$p=-1;
$m_min_1 = $m-1;
$maxp=$m_min_1-$bestLcsLength;
while($p<$maxp){
if($boundRunningTime && $p>$maxp_before_bound){
// bound the running time by stopping early
if($bestLcsLength>=0){
// accept the best LCS thusfar
break;
}else{
$bestLcsProgressForw = $bestkForw = 0;
foreach($lcsSizeForw as $k => $localLcsProgress){
if($localLcsProgress>$bestLcsProgressForw || ($localLcsProgress==$bestLcsProgressForw && $bestkForw > $k)){
$bestLcsProgressForw = $localLcsProgress;
$bestkForw = $k;
}
}
$bestLcsProgressBack = $bestkBack = 0;
foreach($lcsSizeBack as $k => $localLcsProgress){
if($localLcsProgress>$bestLcsProgressBack || ($localLcsProgress==$bestLcsProgressBack && $bestkBack < $k)){
$bestLcsProgressBack = $localLcsProgress;
$bestkBack = $k;
}
}
if($fpForw[$bestkForw]-$bestkForw>$fpBack[$bestkBack]-$bestkBack){
// This is hard, maybe try some more? Can this even happen? A solution will be found soon anyway.
}else{
// lets do this
$topSnakeStart = $snakeBeginForw[$bestkForw];
$topSnakeEnd = $snakeEndForw[$bestkForw];
$topSnakek = $snakekForw[$bestkForw];
$bestLcsLengthTop = $lcsSizeForw[$bestkBack] + $topSnakeStart - $topSnakeEnd;
$bottomSnakeStart = $snakeEndBack[$bestkBack]+1;
$bottomSnakeEnd = $snakeBeginBack[$bestkBack]+1;
$bottomSnakek = $snakekBack[$bestkBack];
$bestLcsLengthBottom = $lcsSizeBack[$bestkBack] + $bottomSnakeStart - $bottomSnakeEnd;
if($bottomSnakeStart-$topSnakeEnd>$n*0.6 && ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek)>$m*0.6){
// cut the sequence from both sides (there isn't enough progress, 60% of the sequence is not covered)
// also diff the middle now
if($bottomSnakeEnd>($fpForw[$bestkForw]>>1)){
// do the middle diff between the snakes
wfDebug(" Limiting diff run time from both sides, middle between snakes\n");
$m_t = ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek);
$n_t = $bottomSnakeStart-$topSnakeEnd;
$a_t = array_slice($a, $topSnakeEnd-$topSnakek, $m_t);
$b_t = array_slice($b, $topSnakeEnd, $n_t);
$offsetx_t = $offsetx+($topSnakeEnd-$topSnakek);
$offsety_t = $offsety+$topSnakeEnd;
}else{
// the snake is too early in the sequence, do the middle diff between endpoints of progress
wfDebug(" Limiting diff run time from both sides, middle between endpoints\n");
$m_t = ($fpBack[$bestkBack]+1-$bestkBack)-($fpForw[$bestkForw]-$bestkForw);
$n_t = $fpBack[$bestkBack]+1-$fpForw[$bestkForw];
$a_t = array_slice($a, $fpForw[$bestkForw]-$bestkForw, $m_t);
$b_t = array_slice($b, $fpForw[$bestkForw], $n_t);
$offsetx_t = $offsetx+($fpForw[$bestkForw]-$bestkForw);
$offsety_t = $offsety+$fpForw[$bestkForw];
}
wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $n_t, $offsetx_t, $offsety_t, $m_t, $boundRunningTime, $max_NP_before_bound);
}else if(min($n-$bottomSnakeStart, $m-($bottomSnakeStart-$bottomSnakek))>min($topSnakeEnd, $topSnakeEnd-$topSnakek)){
// the bottom snake is more interesting
wfDebug("Limiting diff run time from bottom side\n");
$topSnakeStart = $bottomSnakeStart;
$topSnakeEnd = $bottomSnakeEnd;
$topSnakek = $bottomSnakek;
$bestLcsLengthTop = $topSnakeEnd-$topSnakek;
$bottomSnakeStart = $bottomSnakeEnd;
}else{
// the top snake is more interesting
wfDebug(" Limiting diff run time from top side\n");
$bottomSnakeStart = $topSnakeEnd;
$bottomSnakeEnd = $topSnakeEnd;
$bottomSnakek = $topSnakek;
$bestLcsLengthBottom = $m-($bottomSnakeEnd-$bottomSnakek);
}
break;
}
}
}
++$p;
$overlap[-$p] = $overlap[$delta+$p] = FALSE;
$min_p_min_1 = -$p-1;
$delta_plus_1_plus_p = $delta_plus_1+$p;
// forward
$fpForw[$min_p_min_1] = $snakeEndForw[$min_p_min_1] = $snakekForw[$min_p_min_1] = $snakeBeginForw[$min_p_min_1] = $fpForw[$delta_plus_1_plus_p] =
$snakeBeginForw[$delta_plus_1_plus_p] = $snakeEndForw[$delta_plus_1_plus_p] = $snakekForw[$delta_plus_1_plus_p] = -1;
$lcsSizeForw[$min_p_min_1] = $lcsSizeForw[$delta_plus_1_plus_p] = 0;
$k=-$p;
do {
$k_plus_1 = $k+1;
$k_min_1 = $k-1;
$fpBelow = $fpForw[$k_min_1]+1;
$fpAbove = $fpForw[$k_plus_1];
$y = &$fpForw[$k];
if($fpBelow>$fpAbove){
$oldy = $y = $fpBelow;
$lcsSizeForw[$k] = $lcsSizeForw[$k_min_1];
$snakeBeginForw[$k] = $snakeBeginForw[$k_min_1];
$snakeEndForw[$k] = $snakeEndForw[$k_min_1];
$snakekForw[$k] = $snakekForw[$k_min_1];
}else{
$oldy = $y = $fpAbove;
$lcsSizeForw[$k] = $lcsSizeForw[$k_plus_1];
$snakeBeginForw[$k] = $snakeBeginForw[$k_plus_1];
$snakeEndForw[$k] = $snakeEndForw[$k_plus_1];
$snakekForw[$k] = $snakekForw[$k_plus_1];
}
$x = $y-$k;
while($x < $m && $y < $n && $a[$x]===$b[$y]){
++$x;
++$y;
}
if($y-$oldy>0){
$lcsSizeForw[$k] += $y-$oldy;
$snakeBeginForw[$k] = $oldy;
$snakeEndForw[$k]= $y;
$snakekForw[$k] = $k;
}
if($y>=$fpBack[$k] && !$overlap[$k]){
// there is overlap
$overlap[$k]=TRUE;
$lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k];
if($y>$fpBack[$k]+1){
$snakeoverlap = $y-$fpBack[$k]-1;
$lcsLength -= $snakeoverlap;
}
if($lcsLength>$bestLcsLength){
// a better sequence found!
$bestLcsLength = $lcsLength;
$topSnakeStart = $snakeBeginForw[$k];
$topSnakeEnd = $snakeEndForw[$k];
$topSnakek = $snakekForw[$k];
// aligned snakes bite (\ )
// ( \ \)
$bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k]));
$bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart);
$bottomSnakek = $snakekBack[$k];
if($bottomSnakeEnd<$y){
$bottomSnakeStart = $y;
$bottomSnakeEnd = $y;
$bottomSnakek = $k;
}
$bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]);
$bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]);
if($bestKnownLcs==$lcsLength){
$fpForw[$k]=$y;
break 2;
}
$maxp=$m_min_1-$bestLcsLength;
}
}
if($k<$delta_min_1){
++$k;
}else if($k>$delta){
--$k;
}else if($k==$delta_min_1){
$k = $delta+$p;
}else{
break;
}
} while(TRUE);
// backward
$fpBack[$min_p_min_1] = $snakeBeginBack[$min_p_min_1] = $snakeEndBack[$min_p_min_1] = $snakekBack[$min_p_min_1] =
$fpBack[$delta_plus_1_plus_p] = $snakeBeginBack[$delta_plus_1_plus_p] = $snakeEndBack[$delta_plus_1_plus_p] = $snakekBack[$delta_plus_1_plus_p] = $n;
$lcsSizeBack[$min_p_min_1] = $lcsSizeBack[$delta_plus_1_plus_p] = 0;
$k=$delta+$p;
do {
$k_plus_1 = $k+1;
$k_min_1 = $k-1;
$fpBelow = $fpBack[$k_min_1];
$fpAbove = $fpBack[$k_plus_1]-1;
$y = &$fpBack[$k];
if($fpBelow<$fpAbove){
$oldy = $y = $fpBelow;
$lcsSizeBack[$k] = $lcsSizeBack[$k_min_1];
$snakeBeginBack[$k] = $snakeBeginBack[$k_min_1];
$snakeEndBack[$k] = $snakeEndBack[$k_min_1];
$snakekBack[$k] = $snakekBack[$k_min_1];
}else{
$oldy = $y = $fpAbove;
$lcsSizeBack[$k] = $lcsSizeBack[$k_plus_1];
$snakeBeginBack[$k] = $snakeBeginBack[$k_plus_1];
$snakeEndBack[$k] = $snakeEndBack[$k_plus_1];
$snakekBack[$k] = $snakekBack[$k_plus_1];
}
$x = $y-$k;
while($x > -1 && $y > -1 && $a[$x]===$b[$y]){
--$x;
--$y;
}
if($oldy-$y>0){
$lcsSizeBack[$k] += $oldy-$y;
$snakeBeginBack[$k] = $oldy;
$snakeEndBack[$k] = $y;
$snakekBack[$k] = $k;
}
if($fpForw[$k]>=$y && !$overlap[$k]){
// there is overlap
$overlap[$k]=TRUE;
$lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k];
if($fpForw[$k]>$y+1){
$snakeoverlap = $fpForw[$k]-$y-1;
$lcsLength -= $snakeoverlap;
}
if($lcsLength>$bestLcsLength){
// a better sequence found!
$bestLcsLength = $lcsLength;
$topSnakeStart = $snakeBeginForw[$k];
$topSnakeEnd = $snakeEndForw[$k];
$topSnakek = $snakekForw[$k];
// aligned snakes bite (\ )
// ( \ \)
$bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k]));
$bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart);
$bottomSnakek = $snakekBack[$k];
if($bottomSnakeEnd<$fpForw[$k]){
$bottomSnakeStart = $fpForw[$k];
$bottomSnakeEnd = $fpForw[$k];
$bottomSnakek = $k;
}
$bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]);
$bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]);
if($bestKnownLcs==$lcsLength){
$fpBack[$k] = $y;
break 2;
}
$maxp=$m_min_1-$bestLcsLength;
}
}
if($k>1){
--$k;
}else if($k<0){
++$k;
}else if($k==1){
$k = -$p;
}else{
break;
}
} while(TRUE);
}
unset($fpForw, $fpBack, $lcsSizeForw, $lcsSizeBack);
unset($snakeBeginForw, $snakeBeginBack, $snakeEndForw, $snakeEndBack, $snakekForw, $snakekBack);
unset($overlap);
// Mark snakes as in LCS
$maxi = $offsetx+$topSnakeEnd-$topSnakek;
for($i=$offsetx+$topSnakeStart-$topSnakek;$i<$maxi;++$i){
$a_inLcs_sym[$i] = TRUE;
}
$maxi = $offsety+$topSnakeEnd;
for($i=$offsety+$topSnakeStart;$i<$maxi;++$i){
$b_inLcs_sym[$i] = TRUE;
}
$maxi = $offsetx+$bottomSnakeEnd-$bottomSnakek;
for($i=$offsetx+$bottomSnakeStart-$bottomSnakek;$i<$maxi;++$i){
$a_inLcs_sym[$i] = TRUE;
}
$maxi = $offsety+$bottomSnakeEnd;
for($i=$offsety+$bottomSnakeStart;$i<$maxi;++$i){
$b_inLcs_sym[$i] = TRUE;
}
$m_t = $topSnakeStart-$topSnakek;
$a_t = array_slice($a, 0, $m_t);
$b_t = array_slice($b, 0, $topSnakeStart);
$m_b = $m-($bottomSnakeEnd-$bottomSnakek);
$n_b = $n-$bottomSnakeEnd;
$a_b = array_slice($a, $bottomSnakeEnd-$bottomSnakek, $m_b);
$b_b = array_slice($b, $bottomSnakeEnd, $n_b);
wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $topSnakeStart, $offsetx, $offsety, $bestLcsLengthTop, $boundRunningTime, $max_NP_before_bound);
wikidiff3_diffPart($a_b, $b_b, $a_inLcs, $b_inLcs, $m_b, $n_b, $offsetx+($bottomSnakeEnd-$bottomSnakek), $offsety+$bottomSnakeEnd, $bestLcsLengthBottom, $boundRunningTime, $max_NP_before_bound);
+}
+class InLcs {
public $inLcs;
function __construct($length){
$this->inLcs = $length>0 ? array_fill(0,$length,FALSE): array();
}
/**
* Get the length of the Longest Common Subsequence (computed)
*/
public function getLcsLength(){
return array_sum($this->inLcs);
}
+} +?> \ No newline at end of file
Modified: trunk/phase3/includes/DifferenceEngine.php
--- trunk/phase3/includes/DifferenceEngine.php 2008-08-05 19:04:27 UTC (rev 38652) +++ trunk/phase3/includes/DifferenceEngine.php 2008-08-05 19:40:29 UTC (rev 38653) @@ -3,6 +3,8 @@
- @defgroup DifferenceEngine DifferenceEngine
*/
+require_once( 'Diff.php' );
/**
- Constant to indicate diff cache compatibility.
- Bump this when changing the diff formatting in a way that
@@ -77,7 +79,7 @@ global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol; wfProfileIn( __METHOD__ );
# If external diffs are enabled both globally and for the user,
# If external diffs are enabled both globally and for the user, # we'll use the application/x-external-editor interface to call # an external diff tool like kompare, kdiff3, etc. if($wgUseExternalEditor && $wgUser->getOption('externaldiff')) {
@@ -88,19 +90,19 @@ $url2=$this->mTitle->getFullURL("action=raw&oldid=".$this->mNewid); $special=$wgLang->getNsText(NS_SPECIAL); $control=<<<CONTROL -[Process] -Type=Diff text -Engine=MediaWiki -Script={$wgServer}{$wgScript} -Special namespace={$special}
[Process]
Type=Diff text
Engine=MediaWiki
Script={$wgServer}{$wgScript}
Special namespace={$special}
-[File] -Extension=wiki -URL=$url1
[File]
Extension=wiki
URL=$url1
-[File 2] -Extension=wiki -URL=$url2
[File 2]
Extension=wiki
URL=$url2
CONTROL; echo($control); return; @@ -170,15 +172,15 @@ // Look for an unpatrolled change corresponding to this diff $db = wfGetDB( DB_SLAVE ); $change = RecentChange::newFromConds(
array(
// Add redundant user,timestamp condition so we can use the existing index
array(
// Add redundant user,timestamp condition so we can use the existing index 'rc_user_text' => $this->mNewRev->getRawUserText(), 'rc_timestamp' => $db->timestamp( $this->mNewRev->getTimestamp() ), 'rc_this_oldid' => $this->mNewid, 'rc_last_oldid' => $this->mOldid, 'rc_patrolled' => 0
),
__METHOD__
),
__METHOD__ ); if( $change instanceof RecentChange ) { $rcid = $change->mAttribs['rc_id'];
@@ -190,8 +192,8 @@ // Build the link if( $rcid ) { $patrol = ' <span class="patrollink">[' . $sk->makeKnownLinkObj(
$this->mTitle,
wfMsgHtml( 'markaspatrolleddiff' ),
$this->mTitle,
wfMsgHtml( 'markaspatrolleddiff' ), "action=markpatrolled&rcid={$rcid}" ) . ']</span>'; } else {
@@ -225,33 +227,33 @@ if( $wgUser->isAllowed( 'deleterevision' ) ) { $revdel = SpecialPage::getTitleFor( 'Revisiondelete' ); if( !$this->mOldRev->userCan( Revision::DELETED_RESTRICTED ) ) {
// If revision was hidden from sysops
// If revision was hidden from sysops $ldel = wfMsgHtml('rev-delundel'); } else { $ldel = $sk->makeKnownLinkObj( $revdel,
wfMsgHtml('rev-delundel'),
wfMsgHtml('rev-delundel'), 'target=' . urlencode( $this->mOldRev->mTitle->getPrefixedDbkey() ) . '&oldid=' . urlencode( $this->mOldRev->getId() ) ); // Bolden oversighted content if( $this->mOldRev->isDeleted( Revision::DELETED_RESTRICTED ) )
$ldel = "<strong>$ldel</strong>";
$ldel = "<strong>$ldel</strong>"; } $ldel = " <tt>(<small>$ldel</small>)</tt> "; // We don't currently handle well changing the top revision's settings if( $this->mNewRev->isCurrent() ) {
// If revision was hidden from sysops
// If revision was hidden from sysops $rdel = wfMsgHtml('rev-delundel'); } else if( !$this->mNewRev->userCan( Revision::DELETED_RESTRICTED ) ) {
// If revision was hidden from sysops
// If revision was hidden from sysops $rdel = wfMsgHtml('rev-delundel'); } else { $rdel = $sk->makeKnownLinkObj( $revdel,
wfMsgHtml('rev-delundel'),
wfMsgHtml('rev-delundel'), 'target=' . urlencode( $this->mNewRev->mTitle->getPrefixedDbkey() ) . '&oldid=' . urlencode( $this->mNewRev->getId() ) ); // Bolden oversighted content if( $this->mNewRev->isDeleted( Revision::DELETED_RESTRICTED ) )
$rdel = "<strong>$rdel</strong>";
$rdel = "<strong>$rdel</strong>"; } $rdel = " <tt>(<small>$rdel</small>)</tt> "; }
@@ -268,7 +270,7 @@ $this->showDiff( $oldHeader, $newHeader );
if ( !$diffOnly )
$this->renderNewRevision();
$this->renderNewRevision(); wfProfileOut( __METHOD__ ); }
@@ -283,9 +285,9 @@ $wgOut->addHTML( "<hr /><h2>{$this->mPagetitle}</h2>\n" ); #add deleted rev tag if needed if( !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) {
$wgOut->addWikiMsg( 'rev-deleted-text-permission' );
$wgOut->addWikiMsg( 'rev-deleted-text-permission' ); } else if( $this->mNewRev->isDeleted(Revision::DELETED_TEXT) ) {
$wgOut->addWikiMsg( 'rev-deleted-text-view' );
$wgOut->addWikiMsg( 'rev-deleted-text-view' ); } if( !$this->mNewRev->isCurrent() ) {
@@ -310,7 +312,7 @@ $wgOut->addHtml( "\n</pre>\n" ); } } else
$wgOut->addWikiTextTidy( $this->mNewtext );
$wgOut->addWikiTextTidy( $this->mNewtext ); if( !$this->mNewRev->isCurrent() ) { $wgOut->parserOptions()->setEditSection( $oldEditSectionSetting );
@@ -356,9 +358,9 @@
$nextlink = $sk->makeKnownLinkObj( $this->mTitle, wfMsgHtml( 'nextdiff' ), 'diff=next&oldid='.$this->mNewid, '', '', 'id="differences-nextlink"' ); $header = "<div class=\"firstrevisionheader\" style=\"text-align: center\"><strong>{$this->mOldtitle}</strong><br />" .
$sk->revUserTools( $this->mNewRev ) . "<br />" .
$sk->revComment( $this->mNewRev ) . "<br />" .
$nextlink . "</div>\n";
$sk->revUserTools( $this->mNewRev ) . "<br />" .
$sk->revComment( $this->mNewRev ) . "<br />" .
$nextlink . "</div>\n"; $wgOut->addHTML( $header );
@@ -423,9 +425,9 @@ wfProfileIn( __METHOD__ ); // Check if the diff should be hidden from this user if ( $this->mOldRev && !$this->mOldRev->userCan(Revision::DELETED_TEXT) ) {
return '';
return ''; } else if ( $this->mNewRev && !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) {
return '';
return ''; } // Cacheable? $key = false;
@@ -486,7 +488,7 @@ dl('php_wikidiff.so'); } return $wgContLang->unsegementForDiff( wikidiff_do_diff( $otext, $ntext, 2 ) ) .
$this->debug( 'wikidiff1' );
$this->debug( 'wikidiff1' ); } if ( $wgExternalDiffEngine == 'wikidiff2' ) {
@@ -505,7 +507,7 @@ return $text; } }
if ( $wgExternalDiffEngine !== false ) {
if ( $wgExternalDiffEngine != 'wikidiff3' && $wgExternalDiffEngine !== false ) { # Diff via the shell global $wgTmpDirectory; $tempName1 = tempnam( $wgTmpDirectory, 'diff_' );
@@ -541,9 +543,9 @@ $diffs = new Diff( $ota, $nta ); $formatter = new TableDiffFormatter(); return $wgContLang->unsegmentForDiff( $formatter->format( $diffs ) ) .
$this->debug();
$this->debug(); }
/** * Generate a debug comment indicating diff generating time, * server node, and generator backend.
@@ -556,10 +558,10 @@ } $data[] = wfTimestamp( TS_DB ); return "<!-- diff generator: " .
implode( " ",
array_map(
implode( " ",
array_map( "htmlspecialchars",
$data ) ) .
$data ) ) . " -->\n"; }
@@ -568,7 +570,7 @@ */ function localiseLineNumbers( $text ) { return preg_replace_callback( '/<!--LINE (\d+)-->/',
array( &$this, 'localiseLineNumbersCb' ), $text );
array( &$this, 'localiseLineNumbersCb' ), $text ); } function localiseLineNumbersCb( $matches ) {
@@ -582,7 +584,7 @@ */ function getMultiNotice() { if ( !is_object($this->mOldRev) || !is_object($this->mNewRev) )
return '';
return ''; if( !$this->mOldPage->equals( $this->mNewPage ) ) { // Comparing two different pages? Count would be meaningless.
@@ -597,7 +599,7 @@
$n = $this->mTitle->countRevisionsBetween( $oldid, $newid ); if ( !$n )
return '';
return ''; return wfMsgExt( 'diff-multi', array( 'parseinline' ), $n ); }
@@ -610,19 +612,19 @@ global $wgOut;
$header = "
<table class='diff'>
<col class='diff-marker' />
<col class='diff-content' />
<col class='diff-marker' />
<col class='diff-content' />
<tr valign='top'>
<td colspan='2' class='diff-otitle'>{$otitle}</td>
<td colspan='2' class='diff-ntitle'>{$ntitle}</td>
</tr>
<table class='diff'>
<col class='diff-marker' />
<col class='diff-content' />
<col class='diff-marker' />
<col class='diff-content' />
<tr valign='top'>
<td colspan='2' class='diff-otitle'>{$otitle}</td>
<td colspan='2' class='diff-ntitle'>{$ntitle}</td>
</tr> "; if ( $multi != '' )
$header .= "<tr><td colspan='4' align='center' class='diff-multi'>{$multi}</td></tr>";
$header .= "<tr><td colspan='4' align='center' class='diff-multi'>{$multi}</td></tr>"; return $header . $diff . "</table>"; }
@@ -657,10 +659,10 @@
// Load the new revision object $this->mNewRev = $this->mNewid
? Revision::newFromId( $this->mNewid )
: Revision::newFromTitle( $this->mTitle );
? Revision::newFromId( $this->mNewid )
: Revision::newFromTitle( $this->mTitle ); if( !$this->mNewRev instanceof Revision )
return false;
return false; // Update the new revision ID in case it was 0 (makes life easier doing UI stuff) $this->mNewid = $this->mNewRev->getId();
@@ -688,9 +690,9 @@ $this->mNewtitle .= " (<a href='$newEdit'>" . wfMsgHtml( $editable ? 'editold' : 'viewsourceold' ) . "</a>)"; } if ( !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) {
$this->mNewtitle = "<span class='history-deleted'>{$this->mPagetitle}</span>";
$this->mNewtitle = "<span class='history-deleted'>{$this->mPagetitle}</span>"; } else if ( $this->mNewRev->isDeleted(Revision::DELETED_TEXT) ) {
$this->mNewtitle = '<span class="history-deleted">'.$this->mNewtitle.'</span>';
$this->mNewtitle = '<span class="history-deleted">'.$this->mNewtitle.'</span>'; } // Load the old revision object
@@ -722,7 +724,7 @@ $this->mOldPagetitle = htmlspecialchars( wfMsg( 'revisionasof', $t ) );
$this->mOldtitle = "<a href='$oldLink'>{$this->mOldPagetitle}</a>"
. " (<a href='$oldEdit'>" . wfMsgHtml( $editable ? 'editold' : 'viewsourceold' ) . "</a>)";
. " (<a href='$oldEdit'>" . wfMsgHtml( $editable ? 'editold' : 'viewsourceold' ) . "</a>)"; // Add an "undo" link $newUndo = $this->mNewPage->escapeLocalUrl( 'action=edit&undoafter=' . $this->mOldid . '&undo=' . $this->mNewid); if( $editable && !$this->mOldRev->isDeleted( Revision::DELETED_TEXT ) && !$this->mNewRev->isDeleted( Revision::DELETED_TEXT ) ) {
@@ -730,9 +732,9 @@ }
if( !$this->mOldRev->userCan( Revision::DELETED_TEXT ) ) {
$this->mOldtitle = '<span class="history-deleted">' . $this->mOldPagetitle . '</span>';
$this->mOldtitle = '<span class="history-deleted">' . $this->mOldPagetitle . '</span>'; } else if( $this->mOldRev->isDeleted( Revision::DELETED_TEXT ) ) {
$this->mOldtitle = '<span class="history-deleted">' . $this->mOldtitle . '</span>';
$this->mOldtitle = '<span class="history-deleted">' . $this->mOldtitle . '</span>'; } }
@@ -828,7 +830,7 @@
function _DiffOp_Copy ($orig, $closing = false) { if (!is_array($closing))
$closing = $orig;
$closing = $orig; $this->orig = $orig; $this->closing = $closing; }
@@ -892,7 +894,6 @@ } }
/**
- Class used internally by Diff to actually compute the diffs.
@@ -911,65 +912,77 @@
- are my own.
- Line length limits for robustness added by Tim Starling, 2005-08-31
- Alternative implementation added by Guy Van den Broeck, 2008-07-30
- @author Geoffrey T. Dairiki, Tim Starling
- @author Geoffrey T. Dairiki, Tim Starling, Guy Van den Broeck
- @private
- @ingroup DifferenceEngine
*/ class _DiffEngine {
const MAX_XREF_LENGTH = 10000; function diff ($from_lines, $to_lines) { wfProfileIn( __METHOD__ );
global $wgExternalDiffEngine;
$n_from = sizeof($from_lines); $n_to = sizeof($to_lines);
$this->xchanged = $this->ychanged = array();
$this->xv = $this->yv = array();
$this->xind = $this->yind = array();
unset($this->seq);
unset($this->in_seq);
unset($this->lcs);
if($wgExternalDiffEngine == 'wikidiff3'){
// wikidiff3
list($this->xchanged, $this->ychanged) = wikidiff3_diff($from_lines, $to_lines, TRUE, 100000);
}else{
// old diff
wfProfileIn( __METHOD__ ." - basicdiff");
$this->xchanged = $this->ychanged = array();
$this->xv = $this->yv = array();
$this->xind = $this->yind = array();
unset($this->seq);
unset($this->in_seq);
unset($this->lcs);
// Skip leading common lines.
for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
if ($from_lines[$skip] !== $to_lines[$skip])
// Skip leading common lines.
for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
if ($from_lines[$skip] !== $to_lines[$skip]) break;
$this->xchanged[$skip] = $this->ychanged[$skip] = false;
}
// Skip trailing common lines.
$xi = $n_from; $yi = $n_to;
for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
if ($from_lines[$xi] !== $to_lines[$yi])
$this->xchanged[$skip] = $this->ychanged[$skip] = false;
}
// Skip trailing common lines.
$xi = $n_from; $yi = $n_to;
for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
if ($from_lines[$xi] !== $to_lines[$yi]) break;
$this->xchanged[$xi] = $this->ychanged[$yi] = false;
}
$this->xchanged[$xi] = $this->ychanged[$yi] = false;
}
// Ignore lines which do not exist in both files.
for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
$xhash[$this->_line_hash($from_lines[$xi])] = 1;
}
// Ignore lines which do not exist in both files.
for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
$xhash[$this->_line_hash($from_lines[$xi])] = 1;
}
for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
$line = $to_lines[$yi];
if ( ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) )
for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
$line = $to_lines[$yi];
if ( ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) ) continue;
$yhash[$this->_line_hash($line)] = 1;
$this->yv[] = $line;
$this->yind[] = $yi;
}
for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
$line = $from_lines[$xi];
if ( ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) )
$yhash[$this->_line_hash($line)] = 1;
$this->yv[] = $line;
$this->yind[] = $yi;
}
for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
$line = $from_lines[$xi];
if ( ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) ) continue;
$this->xv[] = $line;
$this->xind[] = $xi;
$this->xv[] = $line;
$this->xind[] = $xi;
}
// Find the LCS.
$this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
wfProfileOut( __METHOD__ ." - basicdiff"); }
// Find the LCS.
$this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
// Merge edits when possible $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged); $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
@@ -984,28 +997,28 @@ // Skip matching "snake". $copy = array(); while ( $xi < $n_from && $yi < $n_to
&& !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
&& !$this->xchanged[$xi] && !$this->ychanged[$yi]) { $copy[] = $from_lines[$xi++]; ++$yi; } if ($copy)
$edits[] = new _DiffOp_Copy($copy);
$edits[] = new _DiffOp_Copy($copy); // Find deletes & adds. $delete = array(); while ($xi < $n_from && $this->xchanged[$xi])
$delete[] = $from_lines[$xi++];
$delete[] = $from_lines[$xi++]; $add = array(); while ($yi < $n_to && $this->ychanged[$yi])
$add[] = $to_lines[$yi++];
$add[] = $to_lines[$yi++]; if ($delete && $add)
$edits[] = new _DiffOp_Change($delete, $add);
$edits[] = new _DiffOp_Change($delete, $add); elseif ($delete)
$edits[] = new _DiffOp_Delete($delete);
$edits[] = new _DiffOp_Delete($delete); elseif ($add)
$edits[] = new _DiffOp_Add($add);
$edits[] = new _DiffOp_Add($add); } wfProfileOut( __METHOD__ ); return $edits;
@@ -1022,7 +1035,6 @@ } }
/* Divide the Largest Common Subsequence (LCS) of the sequences * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally * sized segments.
@@ -1040,23 +1052,22 @@ * of the portions it is going to specify. */ function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
wfProfileIn( __METHOD__ ); $flip = false; if ($xlim - $xoff > $ylim - $yoff) { // Things seems faster (I'm not sure I understand why)
// when the shortest sequence in X.
$flip = true;
// when the shortest sequence in X.
$flip = true; list ($xoff, $xlim, $yoff, $ylim) = array( $yoff, $ylim, $xoff, $xlim); } if ($flip)
for ($i = $ylim - 1; $i >= $yoff; $i--)
$ymatches[$this->xv[$i]][] = $i;
for ($i = $ylim - 1; $i >= $yoff; $i--)
$ymatches[$this->xv[$i]][] = $i; else
for ($i = $ylim - 1; $i >= $yoff; $i--)
$ymatches[$this->yv[$i]][] = $i;
for ($i = $ylim - 1; $i >= $yoff; $i--)
$ymatches[$this->yv[$i]][] = $i; $this->lcs = 0; $this->seq[0]= $yoff - 1;
@@ -1068,23 +1079,23 @@ for ($chunk = 0; $chunk < $nchunks; $chunk++) { wfProfileIn( __METHOD__ . "-chunk" ); if ($chunk > 0)
for ($i = 0; $i <= $this->lcs; $i++)
$ymids[$i][$chunk-1] = $this->seq[$i];
for ($i = 0; $i <= $this->lcs; $i++)
$ymids[$i][$chunk-1] = $this->seq[$i]; $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks); for ( ; $x < $x1; $x++) { $line = $flip ? $this->yv[$x] : $this->xv[$x];
if (empty($ymatches[$line]))
continue;
if (empty($ymatches[$line]))
continue; $matches = $ymatches[$line]; reset($matches); while (list ($junk, $y) = each($matches))
if (empty($this->in_seq[$y])) {
$k = $this->_lcs_pos($y);
USE_ASSERTS && assert($k > 0);
$ymids[$k] = $ymids[$k-1];
break;
}
if (empty($this->in_seq[$y])) {
$k = $this->_lcs_pos($y);
USE_ASSERTS && assert($k > 0);
$ymids[$k] = $ymids[$k-1];
break;
} while (list ( /* $junk */, $y) = each($matches)) { if ($y > $this->seq[$k-1]) { USE_ASSERTS && assert($y < $this->seq[$k]);
@@ -1100,7 +1111,6 @@ } } }
wfProfileOut( __METHOD__ . "-chunk" ); } $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
@@ -1112,13 +1122,10 @@ } $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
wfProfileOut( __METHOD__ ); return array($this->lcs, $seps); } function _lcs_pos ($ypos) {
wfProfileIn( __METHOD__ );
$end = $this->lcs; if ($end == 0 || $ypos > $this->seq[$end]) { $this->seq[++$this->lcs] = $ypos;
@@ -1131,9 +1138,9 @@ while ($beg < $end) { $mid = (int)(($beg + $end) / 2); if ( $ypos > $this->seq[$mid] )
$beg = $mid + 1;
$beg = $mid + 1; else
$end = $mid;
$end = $mid; } USE_ASSERTS && assert($ypos != $this->seq[$end]);
@@ -1141,7 +1148,6 @@ $this->in_seq[$this->seq[$end]] = false; $this->seq[$end] = $ypos; $this->in_seq[$ypos] = 1;
wfProfileOut( __METHOD__ ); return $end; }
@@ -1157,24 +1163,22 @@ * All line numbers are origin-0 and discarded lines are not counted. */ function _compareseq ($xoff, $xlim, $yoff, $ylim) {
wfProfileIn( __METHOD__ );
// Slide down the bottom initial diagonal. while ($xoff < $xlim && $yoff < $ylim
&& $this->xv[$xoff] == $this->yv[$yoff]) {
&& $this->xv[$xoff] == $this->yv[$yoff]) { ++$xoff; ++$yoff; } // Slide up the top initial diagonal. while ($xlim > $xoff && $ylim > $yoff
&& $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
&& $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) { --$xlim; --$ylim; } if ($xoff == $xlim || $yoff == $ylim)
$lcs = 0;
$lcs = 0; else { // This is ad hoc but seems to work well. //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
@@ -1188,9 +1192,9 @@ // X and Y sequences have no common subsequence: // mark all changed. while ($yoff < $ylim)
$this->ychanged[$this->yind[$yoff++]] = 1;
$this->ychanged[$this->yind[$yoff++]] = 1; while ($xoff < $xlim)
$this->xchanged[$this->xind[$xoff++]] = 1;
$this->xchanged[$this->xind[$xoff++]] = 1; } else { // Use the partitions to split this problem into subproblems. reset($seps);
@@ -1200,7 +1204,6 @@ $pt1 = $pt2; } }
wfProfileOut( __METHOD__ ); } /* Adjust inserts/deletes of identical lines to join changes
@@ -1237,23 +1240,23 @@ * $other_changed[$j] == false. */ while ($j < $other_len && $other_changed[$j])
$j++;
$j++; while ($i < $len && ! $changed[$i]) { USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]'); $i++; $j++; while ($j < $other_len && $other_changed[$j])
$j++;
$j++; } if ($i == $len)
break;
break; $start = $i; // Find the end of this run of changes. while (++$i < $len && $changed[$i])
continue;
continue; do { /*
@@ -1271,10 +1274,10 @@ $changed[--$start] = 1; $changed[--$i] = false; while ($start > 0 && $changed[$start - 1])
$start--;
$start--; USE_ASSERTS && assert('$j > 0'); while ($other_changed[--$j])
continue;
continue; USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]'); }
@@ -1296,14 +1299,14 @@ $changed[$start++] = false; $changed[$i++] = 1; while ($i < $len && $changed[$i])
$i++;
$i++; USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]'); $j++; if ($j < $other_len && $other_changed[$j]) { $corresponding = $i; while ($j < $other_len && $other_changed[$j])
$j++;
$j++; } } } while ($runlength != $i - $start);
@@ -1317,7 +1320,7 @@ $changed[--$i] = 0; USE_ASSERTS && assert('$j > 0'); while ($other_changed[--$j])
continue;
continue; USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]'); } }
@@ -1376,7 +1379,7 @@ function isEmpty () { foreach ($this->edits as $edit) { if ($edit->type != 'copy')
return false;
return false; } return true; }
@@ -1392,7 +1395,7 @@ $lcs = 0; foreach ($this->edits as $edit) { if ($edit->type == 'copy')
$lcs += sizeof($edit->orig);
$lcs += sizeof($edit->orig); } return $lcs; }
@@ -1410,7 +1413,7 @@
foreach ($this->edits as $edit) { if ($edit->orig)
array_splice($lines, sizeof($lines), 0, $edit->orig);
array_splice($lines, sizeof($lines), 0, $edit->orig); } return $lines; }
@@ -1428,7 +1431,7 @@
foreach ($this->edits as $edit) { if ($edit->closing)
array_splice($lines, sizeof($lines), 0, $edit->closing);
array_splice($lines, sizeof($lines), 0, $edit->closing); } return $lines; }
@@ -1441,21 +1444,21 @@ function _check ($from_lines, $to_lines) { wfProfileIn( __METHOD__ ); if (serialize($from_lines) != serialize($this->orig()))
trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
trigger_error("Reconstructed original doesn't match", E_USER_ERROR); if (serialize($to_lines) != serialize($this->closing()))
trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
trigger_error("Reconstructed closing doesn't match", E_USER_ERROR); $rev = $this->reverse(); if (serialize($to_lines) != serialize($rev->orig()))
trigger_error("Reversed original doesn't match", E_USER_ERROR);
trigger_error("Reversed original doesn't match", E_USER_ERROR); if (serialize($from_lines) != serialize($rev->closing()))
trigger_error("Reversed closing doesn't match", E_USER_ERROR);
trigger_error("Reversed closing doesn't match", E_USER_ERROR); $prevtype = 'none'; foreach ($this->edits as $edit) { if ( $prevtype == $edit->type )
trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
trigger_error("Edit sequence is non-optimal", E_USER_ERROR); $prevtype = $edit->type; }
@@ -1496,7 +1499,7 @@ * have the same number of elements as $to_lines. */ function MappedDiff($from_lines, $to_lines,
$mapped_from_lines, $mapped_to_lines) {
$mapped_from_lines, $mapped_to_lines) { wfProfileIn( __METHOD__ ); assert(sizeof($from_lines) == sizeof($mapped_from_lines));
@@ -1579,8 +1582,8 @@ $block[] = new _DiffOp_Copy($context); } $this->_block($x0, $ntrail + $xi - $x0,
$y0, $ntrail + $yi - $y0,
$block);
$y0, $ntrail + $yi - $y0,
$block); $block = false; } }
@@ -1593,21 +1596,21 @@ $y0 = $yi - sizeof($context); $block = array(); if ($context)
$block[] = new _DiffOp_Copy($context);
$block[] = new _DiffOp_Copy($context); } $block[] = $edit; } if ($edit->orig)
$xi += sizeof($edit->orig);
$xi += sizeof($edit->orig); if ($edit->closing)
$yi += sizeof($edit->closing);
$yi += sizeof($edit->closing); } if (is_array($block))
$this->_block($x0, $xi - $x0,
$y0, $yi - $y0,
$block);
$this->_block($x0, $xi - $x0,
$y0, $yi - $y0,
$block); $end = $this->_end_diff(); wfProfileOut( __METHOD__ );
@@ -1619,15 +1622,15 @@ $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen)); foreach ($edits as $edit) { if ($edit->type == 'copy')
$this->_context($edit->orig);
$this->_context($edit->orig); elseif ($edit->type == 'add')
$this->_added($edit->closing);
$this->_added($edit->closing); elseif ($edit->type == 'delete')
$this->_deleted($edit->orig);
$this->_deleted($edit->orig); elseif ($edit->type == 'change')
$this->_changed($edit->orig, $edit->closing);
$this->_changed($edit->orig, $edit->closing); else
trigger_error('Unknown edit type', E_USER_ERROR);
trigger_error('Unknown edit type', E_USER_ERROR); } $this->_end_block(); wfProfileOut( __METHOD__ );
@@ -1645,9 +1648,9 @@
function _block_header($xbeg, $xlen, $ybeg, $ylen) { if ($xlen > 1)
$xbeg .= "," . ($xbeg + $xlen - 1);
$xbeg .= "," . ($xbeg + $xlen - 1); if ($ylen > 1)
$ybeg .= "," . ($ybeg + $ylen - 1);
$ybeg .= "," . ($ybeg + $ylen - 1); return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg; }
@@ -1661,7 +1664,7 @@
function _lines($lines, $prefix = ' ') { foreach ($lines as $line)
echo "$prefix $line\n";
echo "$prefix $line\n"; } function _context($lines) {
@@ -1716,40 +1719,40 @@ $newline = 1; $retval = array(); foreach($diff->edits as $edit)
switch($edit->type) {
case 'add':
foreach($edit->closing as $l) {
$retval[] = array(
switch($edit->type) {
case 'add':
foreach($edit->closing as $l) {
$retval[] = array( 'action' => 'add', 'new'=> $l, 'newline' => $newline++
);
}
break;
case 'delete':
foreach($edit->orig as $l) {
$retval[] = array(
);
}
break;
case 'delete':
foreach($edit->orig as $l) {
$retval[] = array( 'action' => 'delete', 'old' => $l, 'oldline' => $oldline++,
);
}
break;
case 'change':
foreach($edit->orig as $i => $l) {
$retval[] = array(
);
}
break;
case 'change':
foreach($edit->orig as $i => $l) {
$retval[] = array( 'action' => 'change', 'old' => $l, 'new' => @$edit->closing[$i], 'oldline' => $oldline++, 'newline' => $newline++,
);
}
break;
case 'copy':
$oldline += count($edit->orig);
$newline += count($edit->orig);
}
);
}
break;
case 'copy':
$oldline += count($edit->orig);
$newline += count($edit->orig);
} return $retval; }
} @@ -1777,13 +1780,13 @@ function _flushGroup ($new_tag) { if ($this->_group !== '') { if ($this->_tag == 'ins')
$this->_line .= '<ins class="diffchange diffchange-inline">' .
htmlspecialchars ( $this->_group ) . '</ins>';
$this->_line .= '<ins class="diffchange diffchange-inline">' .
htmlspecialchars ( $this->_group ) . '</ins>'; elseif ($this->_tag == 'del')
$this->_line .= '<del class="diffchange diffchange-inline">' .
htmlspecialchars ( $this->_group ) . '</del>';
$this->_line .= '<del class="diffchange diffchange-inline">' .
htmlspecialchars ( $this->_group ) . '</del>'; else
$this->_line .= htmlspecialchars ( $this->_group );
$this->_line .= htmlspecialchars ( $this->_group ); } $this->_group = ''; $this->_tag = $new_tag;
@@ -1792,21 +1795,21 @@ function _flushLine ($new_tag) { $this->_flushGroup($new_tag); if ($this->_line != '')
array_push ( $this->_lines, $this->_line );
array_push ( $this->_lines, $this->_line ); else
# make empty lines visible by inserting an NBSP
array_push ( $this->_lines, NBSP );
# make empty lines visible by inserting an NBSP
array_push ( $this->_lines, NBSP ); $this->_line = ''; } function addWords ($words, $tag = '') { if ($tag != $this->_tag)
$this->_flushGroup($tag);
$this->_flushGroup($tag); foreach ($words as $word) { // new-line should only come as first char of word. if ($word == '')
continue;
continue; if ($word[0] == "\n") { $this->_flushLine($tag); $word = substr($word, 1);
@@ -1837,7 +1840,7 @@ list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
$this->MappedDiff($orig_words, $closing_words,
$orig_stripped, $closing_stripped);
$orig_stripped, $closing_stripped); wfProfileOut( __METHOD__ ); }
@@ -1862,7 +1865,7 @@ } else { $m = array(); if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
$line, $m))
$line, $m)) { $words = array_merge( $words, $m[0] ); $stripped = array_merge( $stripped, $m[1] );
@@ -1879,9 +1882,9 @@
foreach ($this->edits as $edit) { if ($edit->type == 'copy')
$orig->addWords($edit->orig);
$orig->addWords($edit->orig); elseif ($edit->orig)
$orig->addWords($edit->orig, 'del');
$orig->addWords($edit->orig, 'del'); } $lines = $orig->getLines(); wfProfileOut( __METHOD__ );
@@ -1894,9 +1897,9 @@
foreach ($this->edits as $edit) { if ($edit->type == 'copy')
$closing->addWords($edit->closing);
$closing->addWords($edit->closing); elseif ($edit->closing)
$closing->addWords($edit->closing, 'ins');
$closing->addWords($edit->closing, 'ins'); } $lines = $closing->getLines(); wfProfileOut( __METHOD__ );
@@ -1969,24 +1972,24 @@ function _added( $lines ) { foreach ($lines as $line) { echo '<tr>' . $this->emptyLine() .
$this->addedLine( '<ins class="diffchange">' .
htmlspecialchars ( $line ) . '</ins>' ) . "</tr>\n";
$this->addedLine( '<ins class="diffchange">' .
htmlspecialchars ( $line ) . '</ins>' ) . "</tr>\n"; } } function _deleted($lines) { foreach ($lines as $line) { echo '<tr>' . $this->deletedLine( '<del class="diffchange">' .
htmlspecialchars ( $line ) . '</del>' ) .
$this->emptyLine() . "</tr>\n";
htmlspecialchars ( $line ) . '</del>' ) .
$this->emptyLine() . "</tr>\n"; } } function _context( $lines ) { foreach ($lines as $line) { echo '<tr>' .
$this->contextLine( htmlspecialchars ( $line ) ) .
$this->contextLine( htmlspecialchars ( $line ) ) . "</tr>\n";
$this->contextLine( htmlspecialchars ( $line ) ) .
$this->contextLine( htmlspecialchars ( $line ) ) . "</tr>\n"; } }
@@ -2003,11 +2006,11 @@ while ( $line = array_shift( $del ) ) { $aline = array_shift( $add ); echo '<tr>' . $this->deletedLine( $line ) .
$this->addedLine( $aline ) . "</tr>\n";
$this->addedLine( $aline ) . "</tr>\n"; } foreach ($add as $line) { # If any leftovers echo '<tr>' . $this->emptyLine() .
$this->addedLine( $line ) . "</tr>\n";
$this->addedLine( $line ) . "</tr>\n"; } wfProfileOut( __METHOD__ ); }
MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs