Jump to content

User:PerfektesChaos/WikidiffLX/coding/Line.cpp

From mediawiki.org

Declarations in Line.h

Basic idea stolen from wikidiff2/Word.h but extended by specific flags:

member type meaning
trailingEnd Iterator point after last char of last trailing invisible line, maybe equal to suffixEnd
number size_t line (paragraph) number in original counting of \n (if not NO_LINE_NUMBERS)
lineHard bool this line is terminated by \n or last line at all
lineBlack bool this change line has no difference compared to the corresponding match except trailing whitepace
op int DiffOp
match Line* pointer to the corresponding line for copy, NULL for del and add, and one of them for change.


#include "Line.h"

// A small class to accomodate lines with hard and virtual termination
// Basically, the pointers and a marker indicating the termination type.
// Only the bodies are compared on operator==.
// For presentation the length of the suffix (not the invisible content)
// can be retrieved, and suffixes may be compared internally.
// Also the number of trailing lines and each length (not the invisible content)
// can be retrieved, and trailing lines may be compared internally.
//
// This class stores iterators pointing to the text string, this is to avoid
// excessive allocation calls. To avoid invalidation, the source string should
// not be changed or destroyed.

   /**
     * The body is the character sequence [bs, be)
     * The space suffix is the character sequence [be, se), none if se=be
     * A \n break is indicated by hard;  false for virtual break
     * Trailing empty lines after hard break is [se, te), none if te=se
     * The suffix is at least a "\n" for hard breaks
     *     and one space if virtual break detected by ". "
     *     suffix is empty at end of text.
     * The number is the line number in original \n counting
     *
     * Introduced in WikidiffLX by PerfektesChaos@de.wikipedia 2011
     */
#ifdef NO_LINE_NUMBERS
Line::Line(Iterator bs, Iterator be, Iterator se, Iterator te)
      : bodyStart(bs), bodyEnd(be), suffixEnd(se), trailingEnd(te)
   {
      // \n break or end of text
      lineHard = true;
   }
Line::Line(Iterator bs, Iterator be, Iterator se)
      : bodyStart(bs), bodyEnd(be), suffixEnd(se)
#else
Line::Line(Iterator bs, Iterator be, Iterator se, Iterator te, size_t n)
      : bodyStart(bs), bodyEnd(be), suffixEnd(se), trailingEnd(te), number(n)
   {
      // \n break or end of text
      lineHard = true;
      lineBlack = false;
   }
Line::Line(Iterator bs, Iterator be, Iterator se, size_t n)
      : bodyStart(bs), bodyEnd(be), suffixEnd(se), number(n)
#endif
   {
      // virtual line
      lineHard = false;
      trailingEnd = be;
      lineBlack = false;
   }

bool Line::operator==(const Line &o) const
   {
      return (bodyEnd - bodyStart == o.bodyEnd - o.bodyStart)
             && std::equal(bodyStart, bodyEnd, o.bodyStart);
   }
bool Line::operator!=(const Line &o) const
   {
      return !operator==(o);
   }
bool Line::operator<(const Line &o) const
   {
      return std::lexicographical_compare(bodyStart, bodyEnd, o.bodyStart, o.bodyEnd);
   }
//fade out? diffEngine
Line::operator String() const
   {
      return String(bodyStart, suffixEnd);
   }

bool Line::is_Change() const
   {
      return (op == DiffOp<Line>::change);
   }   // is_Change()
bool Line::is_Copy() const
   {
      return (op == DiffOp<Line>::copy);
   }   // is_Copy()

bool Line::is_HardBreak() const
   {
      // true: \n      false: virtual
      return lineHard;
   }   // is_HardBreak()

bool Line::equals_body() const
   {
      return lineBlack;
   }   // is_HardBreak()
bool Line::equals_suffix(const Line * o) const
   {
      // True iff this and other line o suffixes are literally equal
      if (suffixEnd == bodyEnd) {
         return (o->suffixEnd == o->bodyEnd);
      }
      return (suffixEnd - bodyEnd == o->suffixEnd - o->bodyEnd)
              & std::equal(bodyEnd, suffixEnd, o->bodyEnd);
   }   // equals_suffix()
bool Line::equals_trailing(const Line * o, size_t i) const
   {
      Iterator pB = point2break(i);
      Iterator pB2 = o->point2break(i);
      Iterator pE = point2break(i+1);
      Iterator pE2 = o->point2break(i+1);
      if (pE == pB) {
         return (pE2 == pB2);
      }
      return (pE - pB == pE2 - pB2)
              & std::equal(pB, pE, pB2);
   }   // equals_trailing(Line, size_t)

Line::String Line::get_body() const
   {
      // Retrieve visible body
      return String(bodyStart, bodyEnd);
   }   // get_body()
Line::Iterator Line::get_bodyBegin() const {
      return bodyStart;
   }
Line::Iterator Line::get_bodyEnd() const {
      return bodyEnd;
   }
Line * Line::get_counterPart() const
   {
      return match;
   }   // get_counterPart()
int Line::get_diffCode() const
   {
      return op;
   }   // get_diffCode()
#ifndef NO_LINE_NUMBERS
size_t Line::get_lineNumber() const
   {
      return number;
   }   // get_lineNumber()
#endif
size_t Line::get_suffixLength() const
   {
      Iterator p = bodyEnd;
      return count4invisible(&p, suffixEnd);
   }   // get_suffixLength()
size_t Line::get_trailingCount() const
   {
      // Retrieve number of empty trailing lines
      size_t k = 0;
      if (trailingEnd > suffixEnd) {
         Iterator p = suffixEnd + 1;   // '\n' at suffixEnd
         k = 0;
         while (p < trailingEnd) {
            p = point2break(p);
            k++;
         }   // while
      }   // trailing exists
      return k;
   }   // get_trailingCount()
size_t Line::get_trailingLength(const size_t i) const
   {
      // Retrieve length of empty trailing line #i
      // i counts from 0 to get_trailingCount-1
      Iterator p = point2break(i);
      return count4invisible(&p, trailingEnd);
   }   // get_trailingLength()

void Line::set_diff(const int k)
   {
      op = k;
      match = NULL;
   }
void Line::set_diff(const int k, Line * p) {
      op = k;
      match = p;
   }
void Line::set_diffCopyChange()
   {
      if (op = DiffOp<Line>::copy) {
         op = DiffOp<Line>::change;
         lineBlack = true;
         match->op = DiffOp<Line>::change;
         match->lineBlack = true;
      } else {
         op = DiffOp<Line>::change;
      }
   }
void Line::whitespaceOnly()
   {
      if (op == DiffOp<Line>::copy) {
         if (match->op == DiffOp<Line>::copy) {
            bool leap = true;
            if (trailingEnd == bodyEnd) {
               leap = (match->trailingEnd > match->bodyEnd);
            } else if (trailingEnd - bodyEnd ==
                       match->trailingEnd - match->bodyEnd) {
               leap = std::equal(bodyEnd, trailingEnd, match->bodyEnd);
            }
            if (leap) {
               lineBlack = true;
               op = DiffOp<Line>::change;
               match->op = DiffOp<Line>::change;
               match->lineBlack = true;
            }
         }
      }
   }   // whitespaceOnly()

Line::Iterator Line::point2break(const Iterator point) const
   {
      Iterator p = point;
      unsigned char b;
      while (p < trailingEnd) {
         b = (unsigned char)*p;
         if (b == 0x0A) {
            return p + 1;
         }
         p++;
      }   // while
      return trailingEnd;
   }   // point2break(Iterator)
Line::Iterator Line::point2break(const size_t i) const
   {
      size_t k = 0;
      Iterator p = suffixEnd + 1;   // '\n' at suffixEnd
      while (p < trailingEnd) {
         p = point2break(p);
         if (k == i) {
            break;   // while
         }
         k++;
      }   // while
      return p;
   }   // point2break(size_t)
size_t Line::count4invisible(Iterator *p, const Iterator pEnd) const
   {
      size_t n = 0;
      unsigned char b;
      while (*p < pEnd) {
         b = (unsigned char)**p;
         if (b == 0x0A) {
            return n;
         } else if (b >= 0xE0) {
            //UTF8 whitespace: U+2002...U+200A
            //Characters in range U+0800 to U+FFFF represented by 3 bytes
            //first octet is 1110 = 14 = xE
            n += 2;
         }
         // ASCII whitespace <= 0x20
         n++;
      }   // while
      return n;
   }   // count4invisible()