aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'cvs2svn_lib/checkout_internal.py')
-rw-r--r--cvs2svn_lib/checkout_internal.py778
1 files changed, 778 insertions, 0 deletions
diff --git a/cvs2svn_lib/checkout_internal.py b/cvs2svn_lib/checkout_internal.py
new file mode 100644
index 0000000..fe28e0c
--- /dev/null
+++ b/cvs2svn_lib/checkout_internal.py
@@ -0,0 +1,778 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes that implement the --use-internal-co option.
+
+The idea is to patch up the revisions' contents incrementally, thus
+avoiding the huge number of process spawns and the O(n^2) overhead of
+using 'co' and 'cvs'.
+
+InternalRevisionRecorder saves the RCS deltas and RCS revision trees
+to databases. Notably, deltas from the trunk need to be reversed, as
+CVS stores them so they apply from HEAD backwards.
+
+InternalRevisionExcluder copies the revision trees to a new database,
+omitting excluded branches.
+
+InternalRevisionReader produces the revisions' contents on demand. To
+generate the text for a typical revision, we need the revision's delta
+text plus the fulltext of the previous revision. Therefore, we
+maintain a checkout database containing a copy of the fulltext of any
+revision for which subsequent revisions still need to be retrieved.
+It is crucial to remove text from this database as soon as it is no
+longer needed, to prevent it from growing enormous.
+
+There are two reasons that the text from a revision can be needed: (1)
+because the revision itself still needs to be output to a dumpfile;
+(2) because another revision needs it as the base of its delta. We
+maintain a reference count for each revision, which includes *both*
+possibilities. The first time a revision's text is needed, it is
+generated by applying the revision's deltatext to the previous
+revision's fulltext, and the resulting fulltext is stored in the
+checkout database. Each time a revision's fulltext is retrieved, its
+reference count is decremented. When the reference count goes to
+zero, then the fulltext is deleted from the checkout database.
+
+The administrative data for managing this consists of one TextRecord
+entry for each revision. Each TextRecord has an id, which is the same
+id as used for the corresponding CVSRevision instance. It also
+maintains a count of the times it is expected to be retrieved.
+TextRecords come in several varieties:
+
+FullTextRecord -- Used for revisions whose fulltext is contained
+ directly in the RCS file, and therefore available during
+ CollectRevsPass (i.e., typically revision 1.1 of each file).
+
+DeltaTextRecord -- Used for revisions that are defined via a delta
+ relative to some other TextRecord. These records record the id of
+ the TextRecord that holds the base text against which the delta is
+ defined. When the text for a DeltaTextRecord is retrieved, the
+ DeltaTextRecord instance is deleted and a CheckedOutTextRecord
+ instance is created to take its place.
+
+CheckedOutTextRecord -- Used during OutputPass for a revision that
+ started out as a DeltaTextRecord, but has already been retrieved
+ (and therefore its fulltext is stored in the checkout database).
+
+While a file is being processed during CollectRevsPass, the fulltext
+and deltas are stored to the delta database, and TextRecord instances
+are created to keep track of things. The reference counts are all
+initialized to zero.
+
+After CollectRevsPass has done any preliminary tree mangling, its
+_FileDataCollector.parse_completed(), method calls
+RevisionRecorder.finish_file(), passing it the CVSFileItems instance
+that describes the revisions in the file. At this point the reference
+counts for the file's TextRecords are updated: each record referred to
+by a delta has its refcount incremented, and each record that
+corresponds to a non-delete CVSRevision is incremented. After that,
+any records with refcount==0 are removed. When one record is removed,
+that can cause another record's reference count to go to zero and be
+removed too, recursively. When a TextRecord is deleted at this stage,
+its deltatext is also deleted from the delta database.
+
+In FilterSymbolsPass, the exact same procedure (described in the
+previous paragraph) is repeated, but this time using the CVSFileItems
+after it has been updated for excluded symbols, symbol
+preferred-parent grafting, etc."""
+
+
+import cStringIO
+import re
+import time
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.cvs_item import CVSRevisionModification
+from cvs2svn_lib.database import Database
+from cvs2svn_lib.database import IndexedDatabase
+from cvs2svn_lib.rcs_stream import RCSStream
+from cvs2svn_lib.rcs_stream import MalformedDeltaException
+from cvs2svn_lib.revision_manager import RevisionRecorder
+from cvs2svn_lib.revision_manager import RevisionExcluder
+from cvs2svn_lib.revision_manager import RevisionReader
+from cvs2svn_lib.serializer import MarshalSerializer
+from cvs2svn_lib.serializer import CompressingSerializer
+from cvs2svn_lib.serializer import PrimedPickleSerializer
+
+
+class TextRecord(object):
+ """Bookkeeping data for the text of a single CVSRevision."""
+
+ __slots__ = ['id', 'refcount']
+
+ def __init__(self, id):
+ # The cvs_rev_id of the revision whose text this is.
+ self.id = id
+
+ # The number of times that the text of this revision will be
+ # retrieved.
+ self.refcount = 0
+
+ def __getstate__(self):
+ return (self.id, self.refcount,)
+
+ def __setstate__(self, state):
+ (self.id, self.refcount,) = state
+
+ def increment_dependency_refcounts(self, text_record_db):
+ """Increment the refcounts of any records that this one depends on."""
+
+ pass
+
+ def decrement_refcount(self, text_record_db):
+ """Decrement the number of times our text still has to be checked out.
+
+ If the reference count goes to zero, call discard()."""
+
+ self.refcount -= 1
+ if self.refcount == 0:
+ text_record_db.discard(self.id)
+
+ def checkout(self, text_record_db):
+ """Workhorse of the checkout process.
+
+ Return the text for this revision, decrement our reference count,
+ and update the databases depending on whether there will be future
+ checkouts."""
+
+ raise NotImplementedError()
+
+ def free(self, text_record_db):
+ """This instance will never again be checked out; free it.
+
+ Also free any associated resources and decrement the refcounts of
+ any other TextRecords that this one depends on."""
+
+ raise NotImplementedError()
+
+
+class FullTextRecord(TextRecord):
+ __slots__ = []
+
+ def __getstate__(self):
+ return (self.id, self.refcount,)
+
+ def __setstate__(self, state):
+ (self.id, self.refcount,) = state
+
+ def checkout(self, text_record_db):
+ text = text_record_db.delta_db[self.id]
+ self.decrement_refcount(text_record_db)
+ return text
+
+ def free(self, text_record_db):
+ del text_record_db.delta_db[self.id]
+
+ def __str__(self):
+ return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
+
+
+class DeltaTextRecord(TextRecord):
+ __slots__ = ['pred_id']
+
+ def __init__(self, id, pred_id):
+ TextRecord.__init__(self, id)
+
+ # The cvs_rev_id of the revision relative to which this delta is
+ # defined.
+ self.pred_id = pred_id
+
+ def __getstate__(self):
+ return (self.id, self.refcount, self.pred_id,)
+
+ def __setstate__(self, state):
+ (self.id, self.refcount, self.pred_id,) = state
+
+ def increment_dependency_refcounts(self, text_record_db):
+ text_record_db[self.pred_id].refcount += 1
+
+ def checkout(self, text_record_db):
+ base_text = text_record_db[self.pred_id].checkout(text_record_db)
+ co = RCSStream(base_text)
+ delta_text = text_record_db.delta_db[self.id]
+ co.apply_diff(delta_text)
+ text = co.get_text()
+ del co
+ self.refcount -= 1
+ if self.refcount == 0:
+ # This text will never be needed again; just delete ourselves
+ # without ever having stored the fulltext to the checkout
+ # database:
+ del text_record_db[self.id]
+ else:
+ # Store a new CheckedOutTextRecord in place of ourselves:
+ text_record_db.checkout_db['%x' % self.id] = text
+ new_text_record = CheckedOutTextRecord(self.id)
+ new_text_record.refcount = self.refcount
+ text_record_db.replace(new_text_record)
+ return text
+
+ def free(self, text_record_db):
+ del text_record_db.delta_db[self.id]
+ text_record_db[self.pred_id].decrement_refcount(text_record_db)
+
+ def __str__(self):
+ return 'DeltaTextRecord(%x -> %x, %d)' \
+ % (self.pred_id, self.id, self.refcount,)
+
+
+class CheckedOutTextRecord(TextRecord):
+ __slots__ = []
+
+ def __getstate__(self):
+ return (self.id, self.refcount,)
+
+ def __setstate__(self, state):
+ (self.id, self.refcount,) = state
+
+ def checkout(self, text_record_db):
+ text = text_record_db.checkout_db['%x' % self.id]
+ self.decrement_refcount(text_record_db)
+ return text
+
+ def free(self, text_record_db):
+ del text_record_db.checkout_db['%x' % self.id]
+
+ def __str__(self):
+ return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
+
+
+class NullDatabase(object):
+ """A do-nothing database that can be used with TextRecordDatabase.
+
+ Use this when you don't actually want to allow anything to be
+ deleted."""
+
+ def __delitem__(self, id):
+ pass
+
+
+class TextRecordDatabase:
+ """Holds the TextRecord instances that are currently live.
+
+ During CollectRevsPass and FilterSymbolsPass, files are processed
+ one by one and a new TextRecordDatabase instance is used for each
+ file. During OutputPass, a single TextRecordDatabase instance is
+ used for the duration of OutputPass; individual records are added
+ and removed when they are active."""
+
+ def __init__(self, delta_db, checkout_db):
+ # A map { cvs_rev_id -> TextRecord }.
+ self.text_records = {}
+
+ # A database-like object using cvs_rev_ids as keys and containing
+ # fulltext/deltatext strings as values. Its __getitem__() method
+ # is used to retrieve deltas when they are needed, and its
+ # __delitem__() method is used to delete deltas when they can be
+ # freed. The modifiability of the delta database varies from pass
+ # to pass, so the object stored here varies as well:
+ #
+ # CollectRevsPass: a fully-functional IndexedDatabase. This
+ # allows deltas that will not be needed to be deleted.
+ #
+ # FilterSymbolsPass: a NullDatabase. The delta database cannot be
+ # modified during this pass, and we have no need to retrieve
+ # deltas, so we just use a dummy object here.
+ #
+ # OutputPass: a disabled IndexedDatabase. During this pass we
+ # need to retrieve deltas, but we are not allowed to modify the
+ # delta database. So we use an IndexedDatabase whose __del__()
+ # method has been disabled to do nothing.
+ self.delta_db = delta_db
+
+ # A database-like object using cvs_rev_ids as keys and containing
+ # fulltext strings as values. This database is only set during
+ # OutputPass.
+ self.checkout_db = checkout_db
+
+ # If this is set to a list, then the list holds the ids of
+ # text_records that have to be deleted; when discard() is called,
+ # it adds the requested id to the list but does not delete it. If
+ # this member is set to None, then text_records are deleted
+ # immediately when discard() is called.
+ self.deferred_deletes = None
+
+ def __getstate__(self):
+ return (self.text_records.values(),)
+
+ def __setstate__(self, state):
+ (text_records,) = state
+ self.text_records = {}
+ for text_record in text_records:
+ self.add(text_record)
+ self.delta_db = NullDatabase()
+ self.checkout_db = NullDatabase()
+ self.deferred_deletes = None
+
+ def add(self, text_record):
+ """Add TEXT_RECORD to our database.
+
+ There must not already be a record with the same id."""
+
+ assert not self.text_records.has_key(text_record.id)
+
+ self.text_records[text_record.id] = text_record
+
+ def __getitem__(self, id):
+ return self.text_records[id]
+
+ def __delitem__(self, id):
+ """Free the record with the specified ID."""
+
+ del self.text_records[id]
+
+ def replace(self, text_record):
+ """Store TEXT_RECORD in place of the existing record with the same id.
+
+ Do not do anything with the old record."""
+
+ assert self.text_records.has_key(text_record.id)
+ self.text_records[text_record.id] = text_record
+
+ def discard(self, *ids):
+ """The text records with IDS are no longer needed; discard them.
+
+ This involves calling their free() methods and also removing them
+ from SELF.
+
+ If SELF.deferred_deletes is not None, then the ids to be deleted
+ are added to the list instead of deleted immediately. This
+ mechanism is to prevent a stack overflow from the avalanche of
+ deletes that can result from deleting a long chain of revisions."""
+
+ if self.deferred_deletes is None:
+ # This is an outer-level delete.
+ self.deferred_deletes = list(ids)
+ while self.deferred_deletes:
+ id = self.deferred_deletes.pop()
+ text_record = self[id]
+ if text_record.refcount != 0:
+ raise InternalError(
+ 'TextRecordDatabase.discard(%s) called with refcount = %d'
+ % (text_record, text_record.refcount,)
+ )
+ # This call might cause other text_record ids to be added to
+ # self.deferred_deletes:
+ text_record.free(self)
+ del self[id]
+ self.deferred_deletes = None
+ else:
+ self.deferred_deletes.extend(ids)
+
+ def itervalues(self):
+ return self.text_records.itervalues()
+
+ def recompute_refcounts(self, cvs_file_items):
+ """Recompute the refcounts of the contained TextRecords.
+
+ Use CVS_FILE_ITEMS to determine which records will be needed by
+ cvs2svn."""
+
+ # First clear all of the refcounts:
+ for text_record in self.itervalues():
+ text_record.refcount = 0
+
+ # Now increment the reference count of records that are needed as
+ # the source of another record's deltas:
+ for text_record in self.itervalues():
+ text_record.increment_dependency_refcounts(self.text_records)
+
+ # Now increment the reference count of records that will be needed
+ # by cvs2svn:
+ for lod_items in cvs_file_items.iter_lods():
+ for cvs_rev in lod_items.cvs_revisions:
+ if isinstance(cvs_rev, CVSRevisionModification):
+ self[cvs_rev.id].refcount += 1
+
+ def free_unused(self):
+ """Free any TextRecords whose reference counts are zero."""
+
+ # The deletion of some of these text records might cause others to
+ # be unused, in which case they will be deleted automatically.
+ # But since the initially-unused records are not referred to by
+ # any others, we don't have to be afraid that they will be deleted
+ # before we get to them. But it *is* crucial that we create the
+ # whole unused list before starting the loop.
+
+ unused = [
+ text_record.id
+ for text_record in self.itervalues()
+ if text_record.refcount == 0
+ ]
+
+ self.discard(*unused)
+
+ def log_leftovers(self):
+ """If any TextRecords still exist, log them."""
+
+ if self.text_records:
+ Log().warn(
+ "%s: internal problem: leftover revisions in the checkout cache:"
+ % warning_prefix)
+ for text_record in self.itervalues():
+ Log().warn(' %s' % (text_record,))
+
+ def __repr__(self):
+ """Debugging output of the current contents of the TextRecordDatabase."""
+
+ retval = ['TextRecordDatabase:']
+ for text_record in self.itervalues():
+ retval.append(' %s' % (text_record,))
+ return '\n'.join(retval)
+
+
+class InternalRevisionRecorder(RevisionRecorder):
+ """A RevisionRecorder that reconstructs the fulltext internally."""
+
+ def __init__(self, compress):
+ RevisionRecorder.__init__(self)
+ self._compress = compress
+
+ def register_artifacts(self, which_pass):
+ artifact_manager.register_temp_file(
+ config.RCS_DELTAS_INDEX_TABLE, which_pass
+ )
+ artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
+ artifact_manager.register_temp_file(
+ config.RCS_TREES_INDEX_TABLE, which_pass
+ )
+ artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
+
+ def start(self):
+ ser = MarshalSerializer()
+ if self._compress:
+ ser = CompressingSerializer(ser)
+ self._rcs_deltas = IndexedDatabase(
+ artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
+ artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
+ DB_OPEN_NEW, ser)
+ primer = (FullTextRecord, DeltaTextRecord)
+ self._rcs_trees = IndexedDatabase(
+ artifact_manager.get_temp_file(config.RCS_TREES_STORE),
+ artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
+ DB_OPEN_NEW, PrimedPickleSerializer(primer))
+
+ def start_file(self, cvs_file_items):
+ self._cvs_file_items = cvs_file_items
+
+ # A map from cvs_rev_id to TextRecord instance:
+ self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
+
+ def record_text(self, cvs_rev, log, text):
+ if isinstance(cvs_rev.lod, Trunk):
+ # On trunk, revisions are encountered in reverse order (1.<N>
+ # ... 1.1) and deltas are inverted. The first text that we see
+ # is the fulltext for the HEAD revision. After that, the text
+ # corresponding to revision 1.N is the delta (1.<N+1> ->
+ # 1.<N>)). We have to invert the deltas here so that we can
+ # read the revisions out in dependency order; that is, for
+ # revision 1.1 we want the fulltext, and for revision 1.<N> we
+ # want the delta (1.<N-1> -> 1.<N>). This means that we can't
+ # compute the delta for a revision until we see its logical
+ # parent. When we finally see revision 1.1 (which is recognized
+ # because it doesn't have a parent), we can record the diff (1.1
+ # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
+
+ if cvs_rev.next_id is None:
+ # This is HEAD, as fulltext. Initialize the RCSStream so
+ # that we can compute deltas backwards in time.
+ self._stream = RCSStream(text)
+ else:
+ # Any other trunk revision is a backward delta. Apply the
+ # delta to the RCSStream to mutate it to the contents of this
+ # revision, and also to get the reverse delta, which we store
+ # as the forward delta of our child revision.
+ try:
+ text = self._stream.invert_diff(text)
+ except MalformedDeltaException, (msg):
+ Log().error('Malformed RCS delta in %s, revision %s: %s'
+ % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev,
+ msg))
+ raise RuntimeError
+ text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id)
+ self._writeout(text_record, text)
+
+ if cvs_rev.prev_id is None:
+ # This is revision 1.1. Write its fulltext:
+ text_record = FullTextRecord(cvs_rev.id)
+ self._writeout(text_record, self._stream.get_text())
+
+ # There will be no more trunk revisions delivered, so free the
+ # RCSStream.
+ del self._stream
+
+ else:
+ # On branches, revisions are encountered in logical order
+ # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
+ # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
+ # <BRANCH>.<N>). That's what we need, so just store it.
+
+ # FIXME: It would be nice to avoid writing out branch deltas
+ # when --trunk-only. (They will be deleted when finish_file()
+ # is called, but if the delta db is in an IndexedDatabase the
+ # deletions won't actually recover any disk space.)
+ text_record = DeltaTextRecord(cvs_rev.id, cvs_rev.prev_id)
+ self._writeout(text_record, text)
+
+ return None
+
+ def _writeout(self, text_record, text):
+ self.text_record_db.add(text_record)
+ self._rcs_deltas[text_record.id] = text
+
+ def finish_file(self, cvs_file_items):
+ """Finish processing of the current file.
+
+ Compute the initial text record refcounts, discard any records
+ that are unneeded, and store the text records for the file to the
+ _rcs_trees database."""
+
+ # Delete our copy of the preliminary CVSFileItems:
+ del self._cvs_file_items
+
+ self.text_record_db.recompute_refcounts(cvs_file_items)
+ self.text_record_db.free_unused()
+ self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
+ del self.text_record_db
+
+ def finish(self):
+ self._rcs_deltas.close()
+ self._rcs_trees.close()
+
+
+class InternalRevisionExcluder(RevisionExcluder):
+ """The RevisionExcluder used by InternalRevisionReader."""
+
+ def register_artifacts(self, which_pass):
+ artifact_manager.register_temp_file_needed(
+ config.RCS_TREES_STORE, which_pass
+ )
+ artifact_manager.register_temp_file_needed(
+ config.RCS_TREES_INDEX_TABLE, which_pass
+ )
+ artifact_manager.register_temp_file(
+ config.RCS_TREES_FILTERED_STORE, which_pass
+ )
+ artifact_manager.register_temp_file(
+ config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
+ )
+
+ def start(self):
+ self._tree_db = IndexedDatabase(
+ artifact_manager.get_temp_file(config.RCS_TREES_STORE),
+ artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
+ DB_OPEN_READ)
+ primer = (FullTextRecord, DeltaTextRecord)
+ self._new_tree_db = IndexedDatabase(
+ artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
+ artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
+ DB_OPEN_NEW, PrimedPickleSerializer(primer))
+
+ def process_file(self, cvs_file_items):
+ text_record_db = self._tree_db[cvs_file_items.cvs_file.id]
+ text_record_db.recompute_refcounts(cvs_file_items)
+ text_record_db.free_unused()
+ self._new_tree_db[cvs_file_items.cvs_file.id] = text_record_db
+
+ def finish(self):
+ self._tree_db.close()
+ self._new_tree_db.close()
+
+
+class _KeywordExpander:
+ """A class whose instances provide substitutions for CVS keywords.
+
+ This class is used via its __call__() method, which should be called
+ with a match object representing a match for a CVS keyword string.
+ The method returns the replacement for the matched text.
+
+ The __call__() method works by calling the method with the same name
+ as that of the CVS keyword (converted to lower case).
+
+ Instances of this class can be passed as the REPL argument to
+ re.sub()."""
+
+ date_fmt_old = "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs
+ date_fmt_new = "%Y-%m-%d %H:%M:%S" # CVS 1.12
+
+ date_fmt = date_fmt_new
+
+ @classmethod
+ def use_old_date_format(klass):
+ """Class method to ensure exact compatibility with CVS 1.11
+ output. Use this if you want to verify your conversion and you're
+ using CVS 1.11."""
+ klass.date_fmt = klass.date_fmt_old
+
+ def __init__(self, cvs_rev):
+ self.cvs_rev = cvs_rev
+
+ def __call__(self, match):
+ return '$%s: %s $' % \
+ (match.group(1), getattr(self, match.group(1).lower())(),)
+
+ def author(self):
+ return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
+
+ def date(self):
+ return time.strftime(self.date_fmt,
+ time.gmtime(self.cvs_rev.timestamp))
+
+ def header(self):
+ return '%s %s %s %s Exp' % \
+ (self.source(), self.cvs_rev.rev, self.date(), self.author())
+
+ def id(self):
+ return '%s %s %s %s Exp' % \
+ (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author())
+
+ def locker(self):
+ # Handle kvl like kv, as a converted repo is supposed to have no
+ # locks.
+ return ''
+
+ def log(self):
+ # Would need some special handling.
+ return 'not supported by cvs2svn'
+
+ def name(self):
+ # Cannot work, as just creating a new symbol does not check out
+ # the revision again.
+ return 'not supported by cvs2svn'
+
+ def rcsfile(self):
+ return self.cvs_rev.cvs_file.basename + ",v"
+
+ def revision(self):
+ return self.cvs_rev.rev
+
+ def source(self):
+ project = self.cvs_rev.cvs_file.project
+ return project.cvs_repository_root + '/' + project.cvs_module + \
+ self.cvs_rev.cvs_file.cvs_path + ",v"
+
+ def state(self):
+ # We check out only live revisions.
+ return 'Exp'
+
+
+class InternalRevisionReader(RevisionReader):
+ """A RevisionReader that reads the contents from an own delta store."""
+
+ _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
+ _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
+ _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
+
+ def __init__(self, compress):
+ self._compress = compress
+
+ def register_artifacts(self, which_pass):
+ artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
+ artifact_manager.register_temp_file_needed(
+ config.RCS_DELTAS_STORE, which_pass
+ )
+ artifact_manager.register_temp_file_needed(
+ config.RCS_DELTAS_INDEX_TABLE, which_pass
+ )
+ artifact_manager.register_temp_file_needed(
+ config.RCS_TREES_FILTERED_STORE, which_pass
+ )
+ artifact_manager.register_temp_file_needed(
+ config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
+ )
+
+ def start(self):
+ self._delta_db = IndexedDatabase(
+ artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
+ artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
+ DB_OPEN_READ)
+ self._delta_db.__delitem__ = lambda id: None
+ self._tree_db = IndexedDatabase(
+ artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
+ artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
+ DB_OPEN_READ)
+ ser = MarshalSerializer()
+ if self._compress:
+ ser = CompressingSerializer(ser)
+ self._co_db = Database(
+ artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
+ ser)
+
+ # The set of CVSFile instances whose TextRecords have already been
+ # read:
+ self._loaded_files = set()
+
+ # A map { CVSFILE : _FileTree } for files that currently have live
+ # revisions:
+ self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
+
+ def _get_text_record(self, cvs_rev):
+ """Return the TextRecord instance for CVS_REV.
+
+ If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
+ do so now."""
+
+ if cvs_rev.cvs_file not in self._loaded_files:
+ for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
+ self._text_record_db.add(text_record)
+ self._loaded_files.add(cvs_rev.cvs_file)
+
+ return self._text_record_db[cvs_rev.id]
+
+ def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
+ """Check out the text for revision C_REV from the repository.
+
+ Return the text wrapped in a readable file object. If
+ SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be
+ _un_expanded prior to returning the file content. Note that $Log$
+ never actually generates a log (which makes test 'requires_cvs()'
+ fail).
+
+ Revisions may be requested in any order, but if they are not
+ requested in dependency order the checkout database will become
+ very large. Revisions may be skipped. Each revision may be
+ requested only once."""
+
+ try:
+ text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
+ except MalformedDeltaException, (msg):
+ raise FatalError('Malformed RCS delta in %s, revision %s: %s'
+ % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
+ if cvs_rev.cvs_file.mode != 'b' and cvs_rev.cvs_file.mode != 'o':
+ if suppress_keyword_substitution or cvs_rev.cvs_file.mode == 'k':
+ text = self._kw_re.sub(r'$\1$', text)
+ else:
+ text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
+
+ return cStringIO.StringIO(text)
+
+ def finish(self):
+ self._text_record_db.log_leftovers()
+
+ del self._text_record_db
+ self._delta_db.close()
+ self._tree_db.close()
+ self._co_db.close()
+