diff options
Diffstat (limited to 'cvs2svn_lib/checkout_internal.py')
-rw-r--r-- | cvs2svn_lib/checkout_internal.py | 778 |
1 files changed, 778 insertions, 0 deletions
diff --git a/cvs2svn_lib/checkout_internal.py b/cvs2svn_lib/checkout_internal.py new file mode 100644 index 0000000..fe28e0c --- /dev/null +++ b/cvs2svn_lib/checkout_internal.py @@ -0,0 +1,778 @@ +# (Be in -*- python -*- mode.) +# +# ==================================================================== +# Copyright (c) 2007-2009 CollabNet. All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://subversion.tigris.org/license-1.html. +# If newer versions of this license are posted there, you may use a +# newer version instead, at your option. +# +# This software consists of voluntary contributions made by many +# individuals. For exact contribution history, see the revision +# history and logs, available at http://cvs2svn.tigris.org/. +# ==================================================================== + +"""This module contains classes that implement the --use-internal-co option. + +The idea is to patch up the revisions' contents incrementally, thus +avoiding the huge number of process spawns and the O(n^2) overhead of +using 'co' and 'cvs'. + +InternalRevisionRecorder saves the RCS deltas and RCS revision trees +to databases. Notably, deltas from the trunk need to be reversed, as +CVS stores them so they apply from HEAD backwards. + +InternalRevisionExcluder copies the revision trees to a new database, +omitting excluded branches. + +InternalRevisionReader produces the revisions' contents on demand. To +generate the text for a typical revision, we need the revision's delta +text plus the fulltext of the previous revision. Therefore, we +maintain a checkout database containing a copy of the fulltext of any +revision for which subsequent revisions still need to be retrieved. +It is crucial to remove text from this database as soon as it is no +longer needed, to prevent it from growing enormous. + +There are two reasons that the text from a revision can be needed: (1) +because the revision itself still needs to be output to a dumpfile; +(2) because another revision needs it as the base of its delta. We +maintain a reference count for each revision, which includes *both* +possibilities. The first time a revision's text is needed, it is +generated by applying the revision's deltatext to the previous +revision's fulltext, and the resulting fulltext is stored in the +checkout database. Each time a revision's fulltext is retrieved, its +reference count is decremented. When the reference count goes to +zero, then the fulltext is deleted from the checkout database. + +The administrative data for managing this consists of one TextRecord +entry for each revision. Each TextRecord has an id, which is the same +id as used for the corresponding CVSRevision instance. It also +maintains a count of the times it is expected to be retrieved. +TextRecords come in several varieties: + +FullTextRecord -- Used for revisions whose fulltext is contained + directly in the RCS file, and therefore available during + CollectRevsPass (i.e., typically revision 1.1 of each file). + +DeltaTextRecord -- Used for revisions that are defined via a delta + relative to some other TextRecord. These records record the id of + the TextRecord that holds the base text against which the delta is + defined. When the text for a DeltaTextRecord is retrieved, the + DeltaTextRecord instance is deleted and a CheckedOutTextRecord + instance is created to take its place. + +CheckedOutTextRecord -- Used during OutputPass for a revision that + started out as a DeltaTextRecord, but has already been retrieved + (and therefore its fulltext is stored in the checkout database). + +While a file is being processed during CollectRevsPass, the fulltext +and deltas are stored to the delta database, and TextRecord instances +are created to keep track of things. The reference counts are all +initialized to zero. + +After CollectRevsPass has done any preliminary tree mangling, its +_FileDataCollector.parse_completed(), method calls +RevisionRecorder.finish_file(), passing it the CVSFileItems instance +that describes the revisions in the file. At this point the reference +counts for the file's TextRecords are updated: each record referred to +by a delta has its refcount incremented, and each record that +corresponds to a non-delete CVSRevision is incremented. After that, +any records with refcount==0 are removed. When one record is removed, +that can cause another record's reference count to go to zero and be +removed too, recursively. When a TextRecord is deleted at this stage, +its deltatext is also deleted from the delta database. + +In FilterSymbolsPass, the exact same procedure (described in the +previous paragraph) is repeated, but this time using the CVSFileItems +after it has been updated for excluded symbols, symbol +preferred-parent grafting, etc.""" + + +import cStringIO +import re +import time + +from cvs2svn_lib import config +from cvs2svn_lib.common import DB_OPEN_NEW +from cvs2svn_lib.common import DB_OPEN_READ +from cvs2svn_lib.common import warning_prefix +from cvs2svn_lib.common import FatalError +from cvs2svn_lib.common import InternalError +from cvs2svn_lib.context import Ctx +from cvs2svn_lib.log import Log +from cvs2svn_lib.artifact_manager import artifact_manager +from cvs2svn_lib.symbol import Trunk +from cvs2svn_lib.cvs_item import CVSRevisionModification +from cvs2svn_lib.database import Database +from cvs2svn_lib.database import IndexedDatabase +from cvs2svn_lib.rcs_stream import RCSStream +from cvs2svn_lib.rcs_stream import MalformedDeltaException +from cvs2svn_lib.revision_manager import RevisionRecorder +from cvs2svn_lib.revision_manager import RevisionExcluder +from cvs2svn_lib.revision_manager import RevisionReader +from cvs2svn_lib.serializer import MarshalSerializer +from cvs2svn_lib.serializer import CompressingSerializer +from cvs2svn_lib.serializer import PrimedPickleSerializer + + +class TextRecord(object): + """Bookkeeping data for the text of a single CVSRevision.""" + + __slots__ = ['id', 'refcount'] + + def __init__(self, id): + # The cvs_rev_id of the revision whose text this is. + self.id = id + + # The number of times that the text of this revision will be + # retrieved. + self.refcount = 0 + + def __getstate__(self): + return (self.id, self.refcount,) + + def __setstate__(self, state): + (self.id, self.refcount,) = state + + def increment_dependency_refcounts(self, text_record_db): + """Increment the refcounts of any records that this one depends on.""" + + pass + + def decrement_refcount(self, text_record_db): + """Decrement the number of times our text still has to be checked out. + + If the reference count goes to zero, call discard().""" + + self.refcount -= 1 + if self.refcount == 0: + text_record_db.discard(self.id) + + def checkout(self, text_record_db): + """Workhorse of the checkout process. + + Return the text for this revision, decrement our reference count, + and update the databases depending on whether there will be future + checkouts.""" + + raise NotImplementedError() + + def free(self, text_record_db): + """This instance will never again be checked out; free it. + + Also free any associated resources and decrement the refcounts of + any other TextRecords that this one depends on.""" + + raise NotImplementedError() + + +class FullTextRecord(TextRecord): + __slots__ = [] + + def __getstate__(self): + return (self.id, self.refcount,) + + def __setstate__(self, state): + (self.id, self.refcount,) = state + + def checkout(self, text_record_db): + text = text_record_db.delta_db[self.id] + self.decrement_refcount(text_record_db) + return text + + def free(self, text_record_db): + del text_record_db.delta_db[self.id] + + def __str__(self): + return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,) + + +class DeltaTextRecord(TextRecord): + __slots__ = ['pred_id'] + + def __init__(self, id, pred_id): + TextRecord.__init__(self, id) + + # The cvs_rev_id of the revision relative to which this delta is + # defined. + self.pred_id = pred_id + + def __getstate__(self): + return (self.id, self.refcount, self.pred_id,) + + def __setstate__(self, state): + (self.id, self.refcount, self.pred_id,) = state + + def increment_dependency_refcounts(self, text_record_db): + text_record_db[self.pred_id].refcount += 1 + + def checkout(self, text_record_db): + base_text = text_record_db[self.pred_id].checkout(text_record_db) + co = RCSStream(base_text) + delta_text = text_record_db.delta_db[self.id] + co.apply_diff(delta_text) + text = co.get_text() + del co + self.refcount -= 1 + if self.refcount == 0: + # This text will never be needed again; just delete ourselves + # without ever having stored the fulltext to the checkout + # database: + del text_record_db[self.id] + else: + # Store a new CheckedOutTextRecord in place of ourselves: + text_record_db.checkout_db['%x' % self.id] = text + new_text_record = CheckedOutTextRecord(self.id) + new_text_record.refcount = self.refcount + text_record_db.replace(new_text_record) + return text + + def free(self, text_record_db): + del text_record_db.delta_db[self.id] + text_record_db[self.pred_id].decrement_refcount(text_record_db) + + def __str__(self): + return 'DeltaTextRecord(%x -> %x, %d)' \ + % (self.pred_id, self.id, self.refcount,) + + +class CheckedOutTextRecord(TextRecord): + __slots__ = [] + + def __getstate__(self): + return (self.id, self.refcount,) + + def __setstate__(self, state): + (self.id, self.refcount,) = state + + def checkout(self, text_record_db): + text = text_record_db.checkout_db['%x' % self.id] + self.decrement_refcount(text_record_db) + return text + + def free(self, text_record_db): + del text_record_db.checkout_db['%x' % self.id] + + def __str__(self): + return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,) + + +class NullDatabase(object): + """A do-nothing database that can be used with TextRecordDatabase. + + Use this when you don't actually want to allow anything to be + deleted.""" + + def __delitem__(self, id): + pass + + +class TextRecordDatabase: + """Holds the TextRecord instances that are currently live. + + During CollectRevsPass and FilterSymbolsPass, files are processed + one by one and a new TextRecordDatabase instance is used for each + file. During OutputPass, a single TextRecordDatabase instance is + used for the duration of OutputPass; individual records are added + and removed when they are active.""" + + def __init__(self, delta_db, checkout_db): + # A map { cvs_rev_id -> TextRecord }. + self.text_records = {} + + # A database-like object using cvs_rev_ids as keys and containing + # fulltext/deltatext strings as values. Its __getitem__() method + # is used to retrieve deltas when they are needed, and its + # __delitem__() method is used to delete deltas when they can be + # freed. The modifiability of the delta database varies from pass + # to pass, so the object stored here varies as well: + # + # CollectRevsPass: a fully-functional IndexedDatabase. This + # allows deltas that will not be needed to be deleted. + # + # FilterSymbolsPass: a NullDatabase. The delta database cannot be + # modified during this pass, and we have no need to retrieve + # deltas, so we just use a dummy object here. + # + # OutputPass: a disabled IndexedDatabase. During this pass we + # need to retrieve deltas, but we are not allowed to modify the + # delta database. So we use an IndexedDatabase whose __del__() + # method has been disabled to do nothing. + self.delta_db = delta_db + + # A database-like object using cvs_rev_ids as keys and containing + # fulltext strings as values. This database is only set during + # OutputPass. + self.checkout_db = checkout_db + + # If this is set to a list, then the list holds the ids of + # text_records that have to be deleted; when discard() is called, + # it adds the requested id to the list but does not delete it. If + # this member is set to None, then text_records are deleted + # immediately when discard() is called. + self.deferred_deletes = None + + def __getstate__(self): + return (self.text_records.values(),) + + def __setstate__(self, state): + (text_records,) = state + self.text_records = {} + for text_record in text_records: + self.add(text_record) + self.delta_db = NullDatabase() + self.checkout_db = NullDatabase() + self.deferred_deletes = None + + def add(self, text_record): + """Add TEXT_RECORD to our database. + + There must not already be a record with the same id.""" + + assert not self.text_records.has_key(text_record.id) + + self.text_records[text_record.id] = text_record + + def __getitem__(self, id): + return self.text_records[id] + + def __delitem__(self, id): + """Free the record with the specified ID.""" + + del self.text_records[id] + + def replace(self, text_record): + """Store TEXT_RECORD in place of the existing record with the same id. + + Do not do anything with the old record.""" + + assert self.text_records.has_key(text_record.id) + self.text_records[text_record.id] = text_record + + def discard(self, *ids): + """The text records with IDS are no longer needed; discard them. + + This involves calling their free() methods and also removing them + from SELF. + + If SELF.deferred_deletes is not None, then the ids to be deleted + are added to the list instead of deleted immediately. This + mechanism is to prevent a stack overflow from the avalanche of + deletes that can result from deleting a long chain of revisions.""" + + if self.deferred_deletes is None: + # This is an outer-level delete. + self.deferred_deletes = list(ids) + while self.deferred_deletes: + id = self.deferred_deletes.pop() + text_record = self[id] + if text_record.refcount != 0: + raise InternalError( + 'TextRecordDatabase.discard(%s) called with refcount = %d' + % (text_record, text_record.refcount,) + ) + # This call might cause other text_record ids to be added to + # self.deferred_deletes: + text_record.free(self) + del self[id] + self.deferred_deletes = None + else: + self.deferred_deletes.extend(ids) + + def itervalues(self): + return self.text_records.itervalues() + + def recompute_refcounts(self, cvs_file_items): + """Recompute the refcounts of the contained TextRecords. + + Use CVS_FILE_ITEMS to determine which records will be needed by + cvs2svn.""" + + # First clear all of the refcounts: + for text_record in self.itervalues(): + text_record.refcount = 0 + + # Now increment the reference count of records that are needed as + # the source of another record's deltas: + for text_record in self.itervalues(): + text_record.increment_dependency_refcounts(self.text_records) + + # Now increment the reference count of records that will be needed + # by cvs2svn: + for lod_items in cvs_file_items.iter_lods(): + for cvs_rev in lod_items.cvs_revisions: + if isinstance(cvs_rev, CVSRevisionModification): + self[cvs_rev.id].refcount += 1 + + def free_unused(self): + """Free any TextRecords whose reference counts are zero.""" + + # The deletion of some of these text records might cause others to + # be unused, in which case they will be deleted automatically. + # But since the initially-unused records are not referred to by + # any others, we don't have to be afraid that they will be deleted + # before we get to them. But it *is* crucial that we create the + # whole unused list before starting the loop. + + unused = [ + text_record.id + for text_record in self.itervalues() + if text_record.refcount == 0 + ] + + self.discard(*unused) + + def log_leftovers(self): + """If any TextRecords still exist, log them.""" + + if self.text_records: + Log().warn( + "%s: internal problem: leftover revisions in the checkout cache:" + % warning_prefix) + for text_record in self.itervalues(): + Log().warn(' %s' % (text_record,)) + + def __repr__(self): + """Debugging output of the current contents of the TextRecordDatabase.""" + + retval = ['TextRecordDatabase:'] + for text_record in self.itervalues(): + retval.append(' %s' % (text_record,)) + return '\n'.join(retval) + + +class InternalRevisionRecorder(RevisionRecorder): + """A RevisionRecorder that reconstructs the fulltext internally.""" + + def __init__(self, compress): + RevisionRecorder.__init__(self) + self._compress = compress + + def register_artifacts(self, which_pass): + artifact_manager.register_temp_file( + config.RCS_DELTAS_INDEX_TABLE, which_pass + ) + artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass) + artifact_manager.register_temp_file( + config.RCS_TREES_INDEX_TABLE, which_pass + ) + artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass) + + def start(self): + ser = MarshalSerializer() + if self._compress: + ser = CompressingSerializer(ser) + self._rcs_deltas = IndexedDatabase( + artifact_manager.get_temp_file(config.RCS_DELTAS_STORE), + artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE), + DB_OPEN_NEW, ser) + primer = (FullTextRecord, DeltaTextRecord) + self._rcs_trees = IndexedDatabase( + artifact_manager.get_temp_file(config.RCS_TREES_STORE), + artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), + DB_OPEN_NEW, PrimedPickleSerializer(primer)) + + def start_file(self, cvs_file_items): + self._cvs_file_items = cvs_file_items + + # A map from cvs_rev_id to TextRecord instance: + self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase()) + + def record_text(self, cvs_rev, log, text): + if isinstance(cvs_rev.lod, Trunk): + # On trunk, revisions are encountered in reverse order (1.<N> + # ... 1.1) and deltas are inverted. The first text that we see + # is the fulltext for the HEAD revision. After that, the text + # corresponding to revision 1.N is the delta (1.<N+1> -> + # 1.<N>)). We have to invert the deltas here so that we can + # read the revisions out in dependency order; that is, for + # revision 1.1 we want the fulltext, and for revision 1.<N> we + # want the delta (1.<N-1> -> 1.<N>). This means that we can't + # compute the delta for a revision until we see its logical + # parent. When we finally see revision 1.1 (which is recognized + # because it doesn't have a parent), we can record the diff (1.1 + # -> 1.2) for revision 1.2, and also the fulltext for 1.1. + + if cvs_rev.next_id is None: + # This is HEAD, as fulltext. Initialize the RCSStream so + # that we can compute deltas backwards in time. + self._stream = RCSStream(text) + else: + # Any other trunk revision is a backward delta. Apply the + # delta to the RCSStream to mutate it to the contents of this + # revision, and also to get the reverse delta, which we store + # as the forward delta of our child revision. + try: + text = self._stream.invert_diff(text) + except MalformedDeltaException, (msg): + Log().error('Malformed RCS delta in %s, revision %s: %s' + % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, + msg)) + raise RuntimeError + text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id) + self._writeout(text_record, text) + + if cvs_rev.prev_id is None: + # This is revision 1.1. Write its fulltext: + text_record = FullTextRecord(cvs_rev.id) + self._writeout(text_record, self._stream.get_text()) + + # There will be no more trunk revisions delivered, so free the + # RCSStream. + del self._stream + + else: + # On branches, revisions are encountered in logical order + # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to + # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> -> + # <BRANCH>.<N>). That's what we need, so just store it. + + # FIXME: It would be nice to avoid writing out branch deltas + # when --trunk-only. (They will be deleted when finish_file() + # is called, but if the delta db is in an IndexedDatabase the + # deletions won't actually recover any disk space.) + text_record = DeltaTextRecord(cvs_rev.id, cvs_rev.prev_id) + self._writeout(text_record, text) + + return None + + def _writeout(self, text_record, text): + self.text_record_db.add(text_record) + self._rcs_deltas[text_record.id] = text + + def finish_file(self, cvs_file_items): + """Finish processing of the current file. + + Compute the initial text record refcounts, discard any records + that are unneeded, and store the text records for the file to the + _rcs_trees database.""" + + # Delete our copy of the preliminary CVSFileItems: + del self._cvs_file_items + + self.text_record_db.recompute_refcounts(cvs_file_items) + self.text_record_db.free_unused() + self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db + del self.text_record_db + + def finish(self): + self._rcs_deltas.close() + self._rcs_trees.close() + + +class InternalRevisionExcluder(RevisionExcluder): + """The RevisionExcluder used by InternalRevisionReader.""" + + def register_artifacts(self, which_pass): + artifact_manager.register_temp_file_needed( + config.RCS_TREES_STORE, which_pass + ) + artifact_manager.register_temp_file_needed( + config.RCS_TREES_INDEX_TABLE, which_pass + ) + artifact_manager.register_temp_file( + config.RCS_TREES_FILTERED_STORE, which_pass + ) + artifact_manager.register_temp_file( + config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass + ) + + def start(self): + self._tree_db = IndexedDatabase( + artifact_manager.get_temp_file(config.RCS_TREES_STORE), + artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), + DB_OPEN_READ) + primer = (FullTextRecord, DeltaTextRecord) + self._new_tree_db = IndexedDatabase( + artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE), + artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE), + DB_OPEN_NEW, PrimedPickleSerializer(primer)) + + def process_file(self, cvs_file_items): + text_record_db = self._tree_db[cvs_file_items.cvs_file.id] + text_record_db.recompute_refcounts(cvs_file_items) + text_record_db.free_unused() + self._new_tree_db[cvs_file_items.cvs_file.id] = text_record_db + + def finish(self): + self._tree_db.close() + self._new_tree_db.close() + + +class _KeywordExpander: + """A class whose instances provide substitutions for CVS keywords. + + This class is used via its __call__() method, which should be called + with a match object representing a match for a CVS keyword string. + The method returns the replacement for the matched text. + + The __call__() method works by calling the method with the same name + as that of the CVS keyword (converted to lower case). + + Instances of this class can be passed as the REPL argument to + re.sub().""" + + date_fmt_old = "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs + date_fmt_new = "%Y-%m-%d %H:%M:%S" # CVS 1.12 + + date_fmt = date_fmt_new + + @classmethod + def use_old_date_format(klass): + """Class method to ensure exact compatibility with CVS 1.11 + output. Use this if you want to verify your conversion and you're + using CVS 1.11.""" + klass.date_fmt = klass.date_fmt_old + + def __init__(self, cvs_rev): + self.cvs_rev = cvs_rev + + def __call__(self, match): + return '$%s: %s $' % \ + (match.group(1), getattr(self, match.group(1).lower())(),) + + def author(self): + return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author + + def date(self): + return time.strftime(self.date_fmt, + time.gmtime(self.cvs_rev.timestamp)) + + def header(self): + return '%s %s %s %s Exp' % \ + (self.source(), self.cvs_rev.rev, self.date(), self.author()) + + def id(self): + return '%s %s %s %s Exp' % \ + (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author()) + + def locker(self): + # Handle kvl like kv, as a converted repo is supposed to have no + # locks. + return '' + + def log(self): + # Would need some special handling. + return 'not supported by cvs2svn' + + def name(self): + # Cannot work, as just creating a new symbol does not check out + # the revision again. + return 'not supported by cvs2svn' + + def rcsfile(self): + return self.cvs_rev.cvs_file.basename + ",v" + + def revision(self): + return self.cvs_rev.rev + + def source(self): + project = self.cvs_rev.cvs_file.project + return project.cvs_repository_root + '/' + project.cvs_module + \ + self.cvs_rev.cvs_file.cvs_path + ",v" + + def state(self): + # We check out only live revisions. + return 'Exp' + + +class InternalRevisionReader(RevisionReader): + """A RevisionReader that reads the contents from an own delta store.""" + + _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State' + _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$') + _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$') + + def __init__(self, compress): + self._compress = compress + + def register_artifacts(self, which_pass): + artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass) + artifact_manager.register_temp_file_needed( + config.RCS_DELTAS_STORE, which_pass + ) + artifact_manager.register_temp_file_needed( + config.RCS_DELTAS_INDEX_TABLE, which_pass + ) + artifact_manager.register_temp_file_needed( + config.RCS_TREES_FILTERED_STORE, which_pass + ) + artifact_manager.register_temp_file_needed( + config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass + ) + + def start(self): + self._delta_db = IndexedDatabase( + artifact_manager.get_temp_file(config.RCS_DELTAS_STORE), + artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE), + DB_OPEN_READ) + self._delta_db.__delitem__ = lambda id: None + self._tree_db = IndexedDatabase( + artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE), + artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE), + DB_OPEN_READ) + ser = MarshalSerializer() + if self._compress: + ser = CompressingSerializer(ser) + self._co_db = Database( + artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW, + ser) + + # The set of CVSFile instances whose TextRecords have already been + # read: + self._loaded_files = set() + + # A map { CVSFILE : _FileTree } for files that currently have live + # revisions: + self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db) + + def _get_text_record(self, cvs_rev): + """Return the TextRecord instance for CVS_REV. + + If the TextRecords for CVS_REV.cvs_file haven't been loaded yet, + do so now.""" + + if cvs_rev.cvs_file not in self._loaded_files: + for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues(): + self._text_record_db.add(text_record) + self._loaded_files.add(cvs_rev.cvs_file) + + return self._text_record_db[cvs_rev.id] + + def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False): + """Check out the text for revision C_REV from the repository. + + Return the text wrapped in a readable file object. If + SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be + _un_expanded prior to returning the file content. Note that $Log$ + never actually generates a log (which makes test 'requires_cvs()' + fail). + + Revisions may be requested in any order, but if they are not + requested in dependency order the checkout database will become + very large. Revisions may be skipped. Each revision may be + requested only once.""" + + try: + text = self._get_text_record(cvs_rev).checkout(self._text_record_db) + except MalformedDeltaException, (msg): + raise FatalError('Malformed RCS delta in %s, revision %s: %s' + % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg)) + if cvs_rev.cvs_file.mode != 'b' and cvs_rev.cvs_file.mode != 'o': + if suppress_keyword_substitution or cvs_rev.cvs_file.mode == 'k': + text = self._kw_re.sub(r'$\1$', text) + else: + text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text) + + return cStringIO.StringIO(text) + + def finish(self): + self._text_record_db.log_leftovers() + + del self._text_record_db + self._delta_db.close() + self._tree_db.close() + self._co_db.close() + |